Groups | Blog | Home
all groups > c# > may 2004 >

c# : Cannot parse an en-US date on a non-en-US system!



Jon Davis
5/17/2004 11:45:28 PM
I have put my users through so much crap with this bug it is an absolute
shame.

I have a product that reads/writes RSS 2.0 documents, among other things.
The RSS 2.0 spec mandates an en-US style of date formatting (RFC 822). I
have been using a variation of RFC 1123 (just change the time zone to an
offset, i.e. "-0800"). It seems to be writing okay, but it's failing to
parse.

I've tried changing the regional & language settings in my Windows XP
control panel so that I could test, but after getting it to work that way, a
guy from Pakistan (I think?) is still getting date parsing problems.

Here's what I've got. See ParseDateTime() below.



/// <summary>
/// Currently, this method simply calls DateToRFC1123String(),
/// but then changes the time zone to "+/-####", i.e. "-0700".
/// </summary>
public static string DateToRFC822String(DateTime dt, bool
fromLocalTimeZone) {
string dts = DateToRFC1123String(dt, false);
if (fromLocalTimeZone) {
string offset = MiscUtil.SysDTOffset.ToString();
offset = offset.Replace(":", "").Replace(" ", "");
if (offset.Substring(0, 1) != "-") {
if (offset.Substring(0, 1) == "+") {
offset = offset.Substring(0, 5);
} else {
offset = "+" + offset.Substring(0, 4);
}
} else {
offset = offset.Substring(0, 5);
}
dts = dts.Replace("GMT", offset);
}
return dts;
}

public static TimeSpan SysDTOffset {
get {
return System.TimeZone.CurrentTimeZone.GetUtcOffset(DateTime.Now);
}
}

/// <summary>
/// Converts a time zone (e.g. "PST") to an offset string (e.g. "-0700").
/// </summary>
/// <param name="tz">The time zone to convert.</param>
/// <returns>The offset string (e.g. "-0700").</returns>
public static string TimeZoneToOffset(string tz) {
tz = tz.ToUpper().Trim()
.Replace("PACIFIC", "PST")
.Replace("MOUNTAIN", "MST")
.Replace("CENTRAL", "CST")
.Replace("EASTERN", "EST");
for (int i=0; i<TimeZones.Length; i++) {
if (((string)((string[])TimeZones.GetValue(i)).GetValue(0)) == tz) {
return ((string)((string[])TimeZones.GetValue(i)).GetValue(1));
}
}
return
System.TimeZone.CurrentTimeZone.GetUtcOffset(DateTime.Now).ToString()
.Replace(":", "").Substring(0, 5);
}

public static string OffsetToTimeZone(string offset) {
foreach (string[] tz in TimeZones) {
if (((string)tz.GetValue(1)) == offset) {
return (string)tz.GetValue(0);
}
}
return "GMT";
}

public static string OffsetToTimeZone(string offset, bool
isDaylightSavings) {
foreach (string[] tz in TimeZones) {
if (((string)tz.GetValue(1)) == offset) {
string tzs = (string)tz.GetValue(0);
if (isDaylightSavings) {
switch (tzs) {
case "PDT":
return tzs;
case "MDT":
return tzs;
case "CDT":
return tzs;
case "EDT":
return tzs;
case "MST":
return "PDT";
case "CST":
return "MDT";
case "EST":
return "CDT";
default:
return tzs;
}
} else {
return tzs;
}
//return;
//return (string)tz.GetValue(0);
}
}
return "GMT";
}

public static string DateToRFC1123String(DateTime dt, bool
fromLocalTimeZone) {
if (fromLocalTimeZone) {
dt = dt.ToUniversalTime();
}
System.Globalization.CultureInfo ci = new
System.Globalization.CultureInfo("en-US", false);
string ret = dt.ToString(ci.DateTimeFormat.RFC1123Pattern, ci);
return ret;
}


/// <summary>
/// Parses dates with time zones in the following formats:
/// "Thu, 17 Jul 2003 12:35:18 PST",
/// "Thu, 17 Jul 2003 12:35:18 -0700".
/// Converts the time to the local time zone.
/// </summary>
/// <param name="dateTime">The date/time to parse.</param>
/// <returns>A DateTime object</returns>
public static DateTime ParseDateTime(string dateTime) {
System.Globalization.CultureInfo ci = null;
try {
ci = new System.Globalization.CultureInfo("en-US", false);
return DateTime.Parse(dateTime,
ci,
System.Globalization.DateTimeStyles.AllowWhiteSpaces);
} catch (FormatException fex0) {
try {
fex0=fex0; // ignore
return DateTime.Parse(dateTime,
System.Globalization.DateTimeFormatInfo.InvariantInfo,
System.Globalization.DateTimeStyles.AllowWhiteSpaces);

} catch (FormatException fex) {
try {
try {
string iso8601_date = dateTime;
ci = new System.Globalization.CultureInfo("en-US", false);
return DateTime.ParseExact(
iso8601_date,
ci.DateTimeFormat.SortableDateTimePattern,
ci.DateTimeFormat);
} catch {}
string loc = fex.Message.Substring(fex.Message.LastIndexOf(" "));
loc = loc.Substring(0, loc.LastIndexOf("."));
string tz = "";
if (loc.Trim() == "DateTime") {
tz = dateTime.Substring(dateTime.LastIndexOf(" ")).Trim();
dateTime = dateTime.Substring(0, dateTime.Length - tz.Length);
} else {
try {
int iLoc = int.Parse(loc);
tz = dateTime.Substring(iLoc);
tz = TimeZoneToOffset(tz);
dateTime = dateTime.Substring(0, iLoc);
} catch {
}
}
ci = new System.Globalization.CultureInfo("en-US", false);
DateTime ret = DateTime.Parse(dateTime,
ci,
System.Globalization.DateTimeStyles.AllowWhiteSpaces);

// offset for time zone
if (tz.Length > 0) {
try {
if (tz.Length == 4 && tz.Substring(0, 1) != "-") {
try {
int.Parse(tz.Substring(0, 1));
tz = "+" + tz;
} catch {
}
}
if (tz.Length == 5 && tz.Substring(0, 1) == "-" ||
tz.Length == 5 && tz.Substring(0, 1) == "+") {
try {
int h = int.Parse(tz.Substring(1, 2));
int m = int.Parse(tz.Substring(3, 2));
if (tz.Substring(0, 1) == "-") {
ret = ret.AddHours((h * -1) - SysDTOffset.Hours);
ret = ret.AddMinutes((m * -1) - SysDTOffset.Minutes);
} else {
ret = ret.AddHours(h - SysDTOffset.Hours);
ret = ret.AddMinutes(m - SysDTOffset.Minutes);
}
} catch {
}
}
} catch {}
}

return ret;
} catch {
return new DateTime(0);
}
}
}
}

/// <summary>
/// An array of time zones
/// (e.g. new string[] {"PST", "-0700", "(US) Pacific Standard"}).
/// </summary>
public static string[][] TimeZones = new string[][] {
new string[] {"ACDT", "+1030", "Australian Central Daylight"},
new string[] {"ACST", "+0930", "Australian Central Standard"},
new string[] {"ADT", "-0300", "(US) Atlantic Daylight"},
Jon Davis
5/18/2004 2:22:12 AM
I already said that I use a variation of RFC 1123 to get RFC 822
ToString("R") (RFC 1123), but regarding the time zone, .NET's 1123 output
always says "GMT", which is utterly useless. And rather than go with a time
zone, I decided to force a reference to the offset value rather than a time
zone. But for those RSS files (NOT files produced by my app) that my app
attempts to read that has a time zone, I have to read the time zone because
the .NET Framework DOES NOT understand time zones nor offsets.

Being neither here nor there, I'm trying to parse the date, not just produce
the date which as I said already works (using .NET's RFC 1123 formatting as
you suggest).

Jon


"Ed Courtenay" <replace-this-with-my-first-name@edcourtenay.co.uk> wrote in
message news:uvZT$bLPEHA.624@TK2MSFTNGP11.phx.gbl...
[quoted text, click to view]

Jon Davis
5/18/2004 2:24:31 AM
[quoted text, click to view]

By the way, no it's not a web app, it's a Windows app. That's the point.
People all over the world are downloading my software. Everyone's got a
different language config on their system, so I have to force it to "en-US"
in accordance to RFC 822.

The app is http://www.powerblog.net/ if you want to further explore
assumptions as to what the scenario is.

Jon


"Ed Courtenay" <replace-this-with-my-first-name@edcourtenay.co.uk> wrote in
message news:uvZT$bLPEHA.624@TK2MSFTNGP11.phx.gbl...
[quoted text, click to view]

Jon Davis
5/18/2004 4:11:54 AM
Guys, we are completely off-topic here. I truncate (remove) the time zone /
offset before parsing the date if .NET can't parse the date automatically.
Hence I won't belabor the time zone subject any further.

The problem is that even with time zone / offset removed, while the
date/time is being parsed fine on my computer, it is not parsing on a
foreigner's computer. It is a Pakistan, I believe. Might as well be Chinese,
it doesn't matter, it *should work* if I specify "en-US", so why does it not
work?

Jon



"Ed Courtenay" <replace-this-with-my-first-name@edcourtenay.co.uk> wrote in
message news:%2388SoJMPEHA.624@TK2MSFTNGP11.phx.gbl...
[quoted text, click to view]

Jon Davis
5/18/2004 4:37:27 AM
The date string in a test file for one of my guinea pig users is "Thu, 13
May 2004 15:54:25 +0200", which is what my app came up with in the
DateToRFC822String() method (see first post in thread).

Sorry, there is no stack trace. I can and may have to rebuild it with the
removal of a quiet try..catch, and have my guinea pig re-download and
re-test, but I have burdened him with like five rebuilds of my app for this
bug and I'm so embarrassed..

... If that's the only way, it's the only way *sigh*.

Jon


"Ed Courtenay" <replace-this-with-my-first-name@edcourtenay.co.uk> wrote in
message news:u3yffpMPEHA.308@TK2MSFTNGP11.phx.gbl...
[quoted text, click to view]

Jon Davis
5/18/2004 5:34:56 AM
is InvariantCulture based on en-US?

"Ed Courtenay" <replace-this-with-my-first-name@edcourtenay.co.uk> wrote in
message news:%23caVIDNPEHA.624@TK2MSFTNGP11.phx.gbl...
[quoted text, click to view]

Jon Davis
5/18/2004 6:00:02 AM
The exception you describe would be because parseCulture is null. Try making
it en-US?

I'm going to make some subtle changes based on what you experimented ..

Jon

"Ed Courtenay" <replace-this-with-my-first-name@edcourtenay.co.uk> wrote in
message news:%23caVIDNPEHA.624@TK2MSFTNGP11.phx.gbl...
[quoted text, click to view]

Ed Courtenay
5/18/2004 10:03:26 AM
[quoted text, click to view]

It looks to me like you're making a rod for your own back here; why are
you interested in the various time zones you're handling? I'm assuming
that the application you're writing is Web based, and if so just write
it from the point of view of the web server.

If you want to get an RFC 1123 (RFC 822) format date string, simply use:

DateTime dateTime = DateTime.Now;
string dateString = dateTime.ToString("R");

--

Ed Courtenay
[MCP, MCSD]
Jon Davis
5/18/2004 11:01:28 AM
Thanks, Ed!

The major change to be made was to pass the dateFormat param (with GMT'
changed to zzz) to ParseExact(), which you describe below, and my guinea pig
user reports that this change works (just before I got your e-mail). So
thanks very much, glad to see confirmation.

Jon



"Ed Courtenay" <replace-this-with-my-first-name@edcourtenay.co.uk> wrote in
message news:eBXtiNOPEHA.2128@TK2MSFTNGP11.phx.gbl...
[quoted text, click to view]

Ed Courtenay
5/18/2004 11:25:09 AM
[quoted text, click to view]

I don't understand where 'forcing en-US' comes into this; times
specified in RFC 822 (and in 1123 and 2822) are based on UT/GMT.

Granted, .NET's DateTime parser will not handle timezone specifiers like
PST, CDT etc., which I have to admit is a major oversight.

When writing dates, I simply would not bother trying to convert to a
local timezone - just use ToString("R"). Parsing is anpther matter
however; the following might help:

string dateFormat =
CultureInfo.InvariantCulture.DateTimeFormat.RFC1123Pattern + " zzz";
string dateString = "Tue, 27 Apr 2004 08:16:13 GMT +08:00";
Console.WriteLine("{0:R}", DateTime.ParseExact(dateString, dateFormat,
null));

So, if you parse off the timezone specifier in your input string and
replace it with the timezone offset as 'GMT [+|-]hh:mm', it should work.

Hope this helps


[quoted text, click to view]

--

Ed Courtenay
[MCP, MCSD]
Ed Courtenay
5/18/2004 12:22:10 PM
[quoted text, click to view]

Sorry, I obviously misunderstood your problem; apologies for the wild
goose chase!

I don't suppose you have a stack trace of the error generated, or a copy
of the string that your parser is actually trying to work on do you?

--

Ed Courtenay
[MCP, MCSD]
Ed Courtenay
5/18/2004 1:08:03 PM
[quoted text, click to view]

Well, I hope this will help... ;)

Thread.CurrentThread.CurrentCulture = new CultureInfo("ur-PK");
string dateFormat =
CultureInfo.InvariantCulture.DateTimeFormat.RFC1123Pattern.Replace("'GMT'",
"zzz");
string dateString = "Thu, 13 May 2004 15:54:25 +0200";
CultureInfo parseCulture = null;
parseCulture = CultureInfo.InvariantCulture;
DateTime dateTime = DateTime.ParseExact(dateString.Replace("GMT",
"+0000"), dateFormat, parseCulture);
Console.WriteLine("{0:R}", dateTime);

If you comment out "parseCulture = CultureInfo.InvariantCulture;" you'll
get an exception, otherwise this will work as expected.


--

Ed Courtenay
[MCP, MCSD]
Ed Courtenay
5/18/2004 2:02:41 PM
[quoted text, click to view]

From MSDN:

"The CultureInfo.InvariantCulture property is neither a neutral nor a
specific culture. It is a third type of culture that is
culture-insensitive. It is associated with the English language but not
with a country or region. You can use InvariantCulture in almost any
method in the System.Globalization namespace that requires a culture.
However, you should use the invariant culture only for processes that
require culture-independent results, such as system services. In other
cases, it produces results that might be linguistically incorrect or
culturally inappropriate."

--

Ed Courtenay
[MCP, MCSD]
Ed Courtenay
5/18/2004 2:38:17 PM
[quoted text, click to view]

Jon,

I've just looked through the code you provided, and I think I can see
where there's a potential problem.

You're parsing the error message provided by the FormatException to
determine the type of error thrown. In particular, you're assuming that
the message is going to be something like:

"The string was not recognized as a valid DateTime. There is a unknown
word starting at index 26."

or

"The string was not recognized as a valid DateTime."

and handling the last 'word' in the message; you cannot guarantee what
the format of the message will be, and bear in mind that the message
will change depending on the locale of the system. In other words, you
cannot guarantee that the exception is in English!

I'm fairly certain that this is where your problem lies.

--

Ed Courtenay
[MCP, MCSD]
Ed Courtenay
5/18/2004 3:21:13 PM
[quoted text, click to view]

I've re-written your ParseDateTime function so that it doesn't rely on
the format of the FormatException message; feel free to use/abuse it!

I hope this helps...

public static DateTime ParseDateTime(string dateTime)
{
System.Globalization.CultureInfo ci = CultureInfo.InvariantCulture;

try
{
return DateTime.Parse(dateTime, ci, DateTimeStyles.AllowWhiteSpaces);
}
catch (FormatException)
{
try
{
return DateTime.ParseExact(dateTime, new string[]
{ci.DateTimeFormat.SortableDateTimePattern,
ci.DateTimeFormat.RFC1123Pattern}, ci, DateTimeStyles.AllowWhiteSpaces);
}
catch (FormatException)
{
string dateFormat =
ci.DateTimeFormat.RFC1123Pattern.Replace("'GMT'", "zzz");
foreach(string[] entry in TimeZones)
{
if (dateTime.EndsWith(entry[0]))
{
dateTime = String.Format("{0}{1}", dateTime.Substring(0,
dateTime.Length - entry[0].Length), entry[1]);
break; }
}
try
{
return DateTime.ParseExact(dateTime, dateFormat, ci,
DateTimeStyles.AllowWhiteSpaces);
}
catch (FormatException)
{
return DateTime.Now;
}
}
}
}


--

Ed Courtenay
[MCP, MCSD]
AddThis Social Bookmark Button