bug-cvs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Bug-gnulib] getdate.y question


From: Paul Eggert
Subject: Re: [Bug-gnulib] getdate.y question
Date: Thu, 28 Oct 2004 13:47:37 -0700
User-agent: Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (gnu/linux)

Derek Robert Price <derek@ximbiot.com> writes:

> The fact that I couldn't find the boundry led me to believe that
> there is something here that I do not understand, however.

Yes, unfortunately the code (like most code having to do with time) is
way too tricky.

Rather than worry about fixing this one problem, could you please try
the following patch, which fixes all the getdate problems that I know
of and have time to fix right now?  If it works I can check it into
gnulib with a proper changelog entry and documentation.  This change
also adds a TZ="xxx" feature requested by RMS.  Thanks.

--- ../gnulib/lib/getdate.y     2004-10-24 22:39:03 -0700
+++ lib/getdate.y       2004-10-28 13:45:46 -0700
@@ -22,22 +22,19 @@
    <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990.
 
    Modified by Paul Eggert <eggert@twinsun.com> in August 1999 to do
-   the right thing about local DST, and in February 2004 to support
-   nanosecond-resolution time stamps.  Unlike previous versions, this
-   version is reentrant.  */
+   the right thing about local DST, in February 2004 to support
+   nanosecond-resolution time stamps, and in October 2004 to support
+   TZ values in dates.  */
 
 /* FIXME: Check for arithmetic overflow in all cases, not just
-   some of them.
-
-   FIXME: The current code uses 'int' to count seconds; it should use
-   something like 'intmax_t' to support time stamps that don't fit in
-   32 bits.  */
+   some of them.  */
 
 #ifdef HAVE_CONFIG_H
 # include <config.h>
 #endif
 
 #include "getdate.h"
+#include "xalloc.h"
 
 #include <alloca.h>
 
@@ -53,6 +50,7 @@
 
 #include <ctype.h>
 #include <limits.h>
+#include <stdlib.h>
 
 #if STDC_HEADERS || (! defined isascii && ! HAVE_ISASCII)
 # define IN_CTYPE_DOMAIN(c) 1
@@ -97,6 +95,15 @@ typedef struct
   size_t digits;
 } textint;
 
+/* A TZ value.  It is a pointer to a '"'-terminated string.  The
+   length counts the number of bytes in the represented string,
+   including the terminating null, but not including escaping '\'s.  */
+typedef struct
+{
+  char const *base;
+  size_t size;
+} tztext;
+
 /* An entry in the lexical lookup table.  */
 typedef struct
 {
@@ -128,6 +135,10 @@ typedef struct
   /* Time zone, in minutes east of UTC.  */
   long int time_zone;
 
+  /* Value of TZ environment variable, to be used when parsing the
+     string.  */
+  tztext tz;
+
   /* Style used for time.  */
   int meridian;
 
@@ -155,6 +166,7 @@ typedef struct
   size_t local_zones_seen;
   size_t rels_seen;
   size_t times_seen;
+  size_t tzs_seen;
   size_t zones_seen;
 
   /* Table of local time zone abbrevations, terminated by a null entry.  */
@@ -167,7 +179,8 @@ static int yyerror (parser_control *, ch
 
 %}
 
-/* We want a reentrant parser.  */
+/* We want a reentrant parser, even if the TZ manipulation and the calls to
+   localtime and gmtime are not reentrant.  */
 %pure-parser
 %parse-param { parser_control *pc }
 %lex-param { parser_control *pc }
@@ -498,6 +511,8 @@ o_merid:
 
 %%
 
+static char const TZ[] = "TZ";
+
 static table const meridian_table[] =
 {
   { "AM",   tMERIDIAN, MERam },
@@ -990,6 +1005,51 @@ yyerror (parser_control *pc ATTRIBUTE_UN
   return 0;
 }
 
+/* If *TM0 is the old and *TM1 is the new value of a struct tm after passing it
+   to mktime, return true if it's OK that mktime returned T.  *TM0 must be an
+   absolute time stamp with in-range members.  */
+
+static bool
+mktime_ok (struct tm const *tm0, struct tm const *tm1, time_t t)
+{
+  if (t == (time_t) -1)
+    {
+      /* Guard against falsely reporting an error when parsing a time stamp
+        equal to 1969-12-31 23:59:59 UTC, on a host that supports such a
+        time stamp.  */
+      tm1 = localtime (&t);
+      if (!tm1)
+       return false;
+    }
+
+  return ! ((tm0->tm_sec ^ tm1->tm_sec)
+           | (tm0->tm_min ^ tm1->tm_min)
+           | (tm0->tm_hour ^ tm1->tm_hour)
+           | (tm0->tm_mday ^ tm1->tm_mday)
+           | (tm0->tm_mon ^ tm1->tm_mon)
+           | (tm0->tm_year ^ tm1->tm_year));
+}
+
+/* A reasonable upper bound for the size of ordinary TZ strings.
+   Use heap allocation if TZ's length exceeds this.  */
+enum { TZBUFSIZE = 100 };
+
+/* Return a copy of TZ, stored in TZBUF if it fits, and heap-allocated
+   otherwise.  */
+static char *
+get_tz (char tzbuf[TZBUFSIZE])
+{
+  char *tz = getenv (TZ);
+  if (tz)
+    {
+      size_t tzsize = strlen (tz) + 1;
+      tz = (tzsize <= TZBUFSIZE
+           ? memcpy (tzbuf, tz, tzsize)
+           : xmemdup (tz, tzsize));
+    }
+  return tz;
+}
+
 /* Parse a date/time string, storing the resulting time value into *RESULT.
    The string itself is pointed to by P.  Return true if successful.
    P can be an incomplete or relative time specification; if so, use
@@ -1004,6 +1064,11 @@ get_date (struct timespec *result, char 
   struct tm tm0;
   parser_control pc;
   struct timespec gettime_buffer;
+  unsigned char c;
+  bool tz_was_altered = false;
+  char *tz0 = NULL;
+  char tz0buf[TZBUFSIZE];
+  bool ok = false;
 
   if (! now)
     {
@@ -1019,6 +1084,44 @@ get_date (struct timespec *result, char 
   if (! tmp)
     return false;
 
+  while (c = *p, ISSPACE (c))
+    p++;
+
+  if (p[0] == 'T' && p[1] == 'Z' && p[2] == '=' && p[3] == '"')
+    {
+      char const *tzbase = p + 4;
+      size_t tzsize = 1;
+      char const *s;
+      
+      for (s = tzbase; *s; s++, tzsize++)
+       if (*s == '\\')
+         {
+           s++;
+           if (! (*s == '\\' || *s == '"'))
+             break;
+         }
+       else if (*s == '"')
+         {
+           char *z;
+           char *tz1;
+           char tz1buf[TZBUFSIZE];
+           bool large_tz = TZBUFSIZE < tzsize;
+           bool setenv_ok;
+           tz0 = get_tz (tz0buf);
+           z = tz1 = large_tz ? xmalloc (tzsize) : tz1buf;
+           for (s = tzbase; *s != '"'; s++)
+             *z++ = *(s += *s == '\\');
+           *z = '\0';
+           setenv_ok = setenv (TZ, tz1, 1) == 0;
+           if (large_tz)
+             free (tz1);
+           if (!setenv_ok)
+             goto fail;
+           tz_was_altered = true;
+           p = s + 1;
+         }
+    }
+
   pc.input = p;
   pc.year.value = tmp->tm_year;
   pc.year.value += TM_YEAR_BASE;
@@ -1106,142 +1209,171 @@ get_date (struct timespec *result, char 
     }
 
   if (yyparse (&pc) != 0)
-    return false;
+    goto fail;
 
   if (pc.timespec_seen)
-    {
-      *result = pc.seconds;
-      return true;
-    }
-
-  if (1 < pc.times_seen || 1 < pc.dates_seen || 1 < pc.days_seen
-      || 1 < (pc.local_zones_seen + pc.zones_seen)
-      || (pc.local_zones_seen && 1 < pc.local_isdst))
-    return false;
-
-  tm.tm_year = to_year (pc.year) - TM_YEAR_BASE + pc.rel_year;
-  tm.tm_mon = pc.month - 1 + pc.rel_month;
-  tm.tm_mday = pc.day + pc.rel_day;
-  if (pc.times_seen || (pc.rels_seen && ! pc.dates_seen && ! pc.days_seen))
-    {
-      tm.tm_hour = to_hour (pc.hour, pc.meridian);
-      if (tm.tm_hour < 0)
-       return false;
-      tm.tm_min = pc.minutes;
-      tm.tm_sec = pc.seconds.tv_sec;
-    }
+    *result = pc.seconds;
   else
     {
-      tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
-      pc.seconds.tv_nsec = 0;
-    }
-
-  /* Let mktime deduce tm_isdst if we have an absolute time stamp,
-     or if the relative time stamp mentions days, months, or years.  */
-  if (pc.dates_seen | pc.days_seen | pc.times_seen | pc.rel_day
-      | pc.rel_month | pc.rel_year)
-    tm.tm_isdst = -1;
-
-  /* But if the input explicitly specifies local time with or without
-     DST, give mktime that information.  */
-  if (pc.local_zones_seen)
-    tm.tm_isdst = pc.local_isdst;
+      if (1 < pc.times_seen || 1 < pc.dates_seen || 1 < pc.days_seen
+         || 1 < (pc.local_zones_seen + pc.zones_seen)
+         || (pc.local_zones_seen && 1 < pc.local_isdst))
+       goto fail;
+
+      tm.tm_year = to_year (pc.year) - TM_YEAR_BASE;
+      tm.tm_mon = pc.month - 1;
+      tm.tm_mday = pc.day;
+      if (pc.times_seen || (pc.rels_seen && ! pc.dates_seen && ! pc.days_seen))
+       {
+         tm.tm_hour = to_hour (pc.hour, pc.meridian);
+         if (tm.tm_hour < 0)
+           goto fail;
+         tm.tm_min = pc.minutes;
+         tm.tm_sec = pc.seconds.tv_sec;
+       }
+      else
+       {
+         tm.tm_hour = tm.tm_min = tm.tm_sec = 0;
+         pc.seconds.tv_nsec = 0;
+       }
 
-  tm0 = tm;
+      /* Let mktime deduce tm_isdst if we have an absolute time stamp.  */
+      if (pc.dates_seen | pc.days_seen | pc.times_seen)
+       tm.tm_isdst = -1;
+
+      /* But if the input explicitly specifies local time with or without
+        DST, give mktime that information.  */
+      if (pc.local_zones_seen)
+       tm.tm_isdst = pc.local_isdst;
 
-  Start = mktime (&tm);
+      tm0 = tm;
 
-  if (Start == (time_t) -1)
-    {
+      Start = mktime (&tm);
 
-      /* Guard against falsely reporting errors near the time_t boundaries
-        when parsing times in other time zones.  For example, if the min
-        time_t value is 1970-01-01 00:00:00 UTC and we are 8 hours ahead
-        of UTC, then the min localtime value is 1970-01-01 08:00:00; if
-        we apply mktime to 1970-01-01 00:00:00 we will get an error, so
-        we apply mktime to 1970-01-02 08:00:00 instead and adjust the time
-        zone by 24 hours to compensate.  This algorithm assumes that
-        there is no DST transition within a day of the time_t boundaries.  */
-      if (pc.zones_seen)
+      if (! mktime_ok (&tm0, &tm, Start))
        {
-         tm = tm0;
-         if (tm.tm_year <= EPOCH_YEAR - TM_YEAR_BASE)
-           {
-             tm.tm_mday++;
-             pc.time_zone += 24 * 60;
-           }
+         if (! pc.zones_seen)
+           goto fail;
          else
            {
-             tm.tm_mday--;
-             pc.time_zone -= 24 * 60;
+             /* Guard against falsely reporting errors near the time_t
+                boundaries when parsing times in other time zones.  For
+                example, suppose the input string "1969-12-31 23:00:00 -0100",
+                the current time zone is 8 hours ahead of UTC, and the min
+                time_t value is 1970-01-01 00:00:00 UTC.  Then the min
+                localtime value is 1970-01-01 08:00:00, and mktime will
+                therefore fail on 1969-12-31 23:00:00.  To work around the
+                problem, set the time zone to 1 hour behind UTC temporarily
+                by setting TZ="XXX1:00" and try mktime again.  */
+
+             long int time_zone = pc.time_zone;
+             long int abs_time_zone = time_zone < 0 ? - time_zone : time_zone;
+             long int abs_time_zone_hour = abs_time_zone / 60;
+             int abs_time_zone_min = abs_time_zone % 60;
+             char tz1buf[sizeof "XXX+0:00"
+                         + sizeof pc.time_zone * CHAR_BIT / 3];
+             if (!tz_was_altered)
+               tz0 = get_tz (tz0buf);
+             sprintf (tz1buf, "XXX%s%ld:%02d", "-" + (time_zone < 0),
+                      abs_time_zone_hour, abs_time_zone_min);
+             if (setenv (TZ, tz1buf, 1) != 0)
+               goto fail;
+             tz_was_altered = true;
+             tm = tm0;
+             Start = mktime (&tm);
+             if (! mktime_ok (&tm0, &tm, Start))
+               goto fail;
            }
-         Start = mktime (&tm);
        }
 
-      if (Start == (time_t) -1)
-       return false;
-    }
-
-  if (pc.days_seen && ! pc.dates_seen)
-    {
-      tm.tm_mday += ((pc.day_number - tm.tm_wday + 7) % 7
-                    + 7 * (pc.day_ordinal - (0 < pc.day_ordinal)));
-      tm.tm_isdst = -1;
-      Start = mktime (&tm);
-      if (Start == (time_t) -1)
-       return false;
-    }
+      if (pc.days_seen && ! pc.dates_seen)
+       {
+         tm.tm_mday += ((pc.day_number - tm.tm_wday + 7) % 7
+                        + 7 * (pc.day_ordinal - (0 < pc.day_ordinal)));
+         tm.tm_isdst = -1;
+         Start = mktime (&tm);
+         if (Start == (time_t) -1)
+           goto fail;
+       }
 
-  if (pc.zones_seen)
-    {
-      long int delta = pc.time_zone * 60;
-      time_t t1;
+      if (pc.zones_seen)
+       {
+         long int delta = pc.time_zone * 60;
+         time_t t1;
 #ifdef HAVE_TM_GMTOFF
-      delta -= tm.tm_gmtoff;
+         delta -= tm.tm_gmtoff;
 #else
-      time_t t = Start;
-      struct tm const *gmt = gmtime (&t);
-      if (! gmt)
-       return false;
-      delta -= tm_diff (&tm, gmt);
+         time_t t = Start;
+         struct tm const *gmt = gmtime (&t);
+         if (! gmt)
+           goto fail;
+         delta -= tm_diff (&tm, gmt);
 #endif
-      t1 = Start - delta;
-      if ((Start < t1) != (delta < 0))
-       return false;   /* time_t overflow */
-      Start = t1;
+         t1 = Start - delta;
+         if ((Start < t1) != (delta < 0))
+           goto fail;  /* time_t overflow */
+         Start = t1;
+       }
+
+      /* Add relative date.  */
+      if (pc.rel_year | pc.rel_month | pc.rel_day)
+       {
+         int year = tm.tm_year + pc.rel_year;
+         int month = tm.tm_mon + pc.rel_month;
+         int day = tm.tm_mday + pc.rel_day;
+         if (((year < tm.tm_year) ^ (pc.rel_year < 0))
+             | (month < tm.tm_mon) ^ (pc.rel_month < 0)
+             | (day < tm.tm_mday) ^ (pc.rel_day < 0))
+           goto fail;
+         tm.tm_year = year;
+         tm.tm_mon = month;
+         tm.tm_mday = day;
+         Start = mktime (&tm);
+         if (Start == (time_t) -1)
+           goto fail;
+       }
+
+      /* Add relative hours, minutes, and seconds.  Ignore leap seconds;
+        i.e. "+ 10 minutes" means 600 seconds, even if one of them is a
+        leap second.  Typically this is not what the user wants, but it's
+        too hard to do it the other way, because the time zone indicator
+        must be applied before relative times, and if mktime is applied
+        again the time zone will be lost.  */
+      {
+       long int sum_ns = pc.seconds.tv_nsec + pc.rel_ns;
+       long int normalized_ns = (sum_ns % BILLION + BILLION) % BILLION;
+       time_t t0 = Start;
+       long int d1 = 60 * 60 * pc.rel_hour;
+       time_t t1 = t0 + d1;
+       long int d2 = 60 * pc.rel_minutes;
+       time_t t2 = t1 + d2;
+       long int d3 = pc.rel_seconds;
+       time_t t3 = t2 + d3;
+       long int d4 = (sum_ns - normalized_ns) / BILLION;
+       time_t t4 = t3 + d4;
+
+       if ((d1 / (60 * 60) ^ pc.rel_hour)
+           | (d2 / 60 ^ pc.rel_minutes)
+           | ((t1 < t0) ^ (d1 < 0))
+           | ((t2 < t1) ^ (d2 < 0))
+           | ((t3 < t2) ^ (d3 < 0))
+           | ((t4 < t3) ^ (d4 < 0)))
+         goto fail;
+
+       result->tv_sec = t4;
+       result->tv_nsec = normalized_ns;
+      }
     }
 
-  /* Add relative hours, minutes, and seconds.  Ignore leap seconds;
-     i.e. "+ 10 minutes" means 600 seconds, even if one of them is a
-     leap second.  Typically this is not what the user wants, but it's
-     too hard to do it the other way, because the time zone indicator
-     must be applied before relative times, and if mktime is applied
-     again the time zone will be lost.  */
-  {
-    long int sum_ns = pc.seconds.tv_nsec + pc.rel_ns;
-    long int normalized_ns = (sum_ns % BILLION + BILLION) % BILLION;
-    time_t t0 = Start;
-    long int d1 = 60 * 60 * pc.rel_hour;
-    time_t t1 = t0 + d1;
-    long int d2 = 60 * pc.rel_minutes;
-    time_t t2 = t1 + d2;
-    long int d3 = pc.rel_seconds;
-    time_t t3 = t2 + d3;
-    long int d4 = (sum_ns - normalized_ns) / BILLION;
-    time_t t4 = t3 + d4;
-
-    if ((d1 / (60 * 60) ^ pc.rel_hour)
-       | (d2 / 60 ^ pc.rel_minutes)
-       | ((t1 < t0) ^ (d1 < 0))
-       | ((t2 < t1) ^ (d2 < 0))
-       | ((t3 < t2) ^ (d3 < 0))
-       | ((t4 < t3) ^ (d4 < 0)))
-      return false;
-
-    result->tv_sec = t4;
-    result->tv_nsec = normalized_ns;
-    return true;
-  }
+  ok = true;
+
+ fail:
+  if (tz_was_altered)
+    ok &= (tz0 ? setenv (TZ, tz0, 1) : unsetenv (TZ)) == 0;
+  if (tz0 != tz0buf)
+    free (tz0);
+
+  return ok;
 }
 
 #if TEST




reply via email to

[Prev in Thread] Current Thread [Next in Thread]