bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Bug-gnulib] proposed getline / getndelim2 cleanup for gnulib


From: Paul Eggert
Subject: [Bug-gnulib] proposed getline / getndelim2 cleanup for gnulib
Date: Wed, 12 May 2004 00:26:28 -0700
User-agent: Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (gnu/linux)

Following up on Derek's proposal to clean up getndelim2, which Jim
liked, I'm proposing the following patch.  This adds one extra
wrinkle: it uses EOF rather than 0 to indicate no delimiter, which
removes an ambiguity when one wants '\0' to be a delimiter.  It also
fixes some more potential integer-overflow bugs.

tar, coreutils, and CVS will be affected by the API change, but their
maintainers have signed off on it.  Are any other programs affected?

2004-05-11  Derek Price  <address@hidden>
            Paul Eggert  <address@hidden>

        getline cleanup.  This changes the getndelim2 API: both order of
        arguments, and meaning of delim2 (now uses EOF, not 0, to indicate
        no delimiter).
        
        * lib/getline.c: Don't include stddef.h or stdio.h, since our
        interface does that.
        (getline): Always use getdelim, so that we don't have two
        copies of this code.
        * lib/getndelim2.c: Include <limits.h>, <inttypes.h>, <stdint.h>
        if available.
        (PTRDIFF_MAX, SIZE_MAX, SSIZE_MAX): Define if not defined.
        (GETNDELIM2_MAXIMUM): New macro.
        (getndelim2): Reorder arguments.  delim==EOF now means no delimiter,
        instead of the old practice of delim2==0.  All callers changed.
        Return -1 on overflow, instead of returning junk.
        Do not set *linesize unless allocation succeeds.
        * lib/getndelim2.h: Do not include stddef.h; no longer needed, now
        that we include sys/types.h.
        * lib/getnline.h: Likewise.
        * lib/getndelim2.h (GETNLINE_NO_LIMIT): New macro.
        (getndelim2): Reorder arguments.
        * lib/getnline.c (getnline, getndelim):
        Don't discard the NMAX argument.
        (getnline): Invoke getndelim, to avoid code duplication.
        * lib/getnline.h (GETNLINE_NO_LIMIT): New macro, used instead
        of (size_t) -1 by callers of the getnline family.

Index: lib/getline.c
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/getline.c,v
retrieving revision 1.16
diff -p -u -r1.16 getline.c
--- lib/getline.c       14 Aug 2003 21:50:36 -0000      1.16
+++ lib/getline.c       12 May 2004 07:17:50 -0000
@@ -1,7 +1,7 @@
 /* getline.c -- Replacement for GNU C library function getline
 
-   Copyright (C) 1993, 1996, 1997, 1998, 2000, 2003 Free Software
-   Foundation, Inc.
+   Copyright (C) 1993, 1996, 1997, 1998, 2000, 2003, 2004 Free
+   Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -23,36 +23,22 @@
 # include <config.h>
 #endif
 
-/* Specification.  */
 #include "getline.h"
 
-#include <stddef.h>
-#include <stdio.h>
-
-/* Get ssize_t.  */
-#include <sys/types.h>
-
-#if defined __GNU_LIBRARY__ && HAVE_GETDELIM
-
-ssize_t
-getline (char **lineptr, size_t *linesize, FILE *stream)
-{
-  return getdelim (lineptr, linesize, '\n', stream);
-}
-
-#else /* ! have getdelim */
+#if ! (defined __GNU_LIBRARY__ && HAVE_GETDELIM)
 
 # include "getndelim2.h"
 
 ssize_t
-getline (char **lineptr, size_t *linesize, FILE *stream)
+getdelim (char **lineptr, size_t *linesize, int delimiter, FILE *stream)
 {
-  return getndelim2 (lineptr, linesize, (size_t)(-1), stream, '\n', 0, 0);
+  return getndelim2 (lineptr, linesize, 0, GETNLINE_NO_LIMIT, delimiter, EOF,
+                     stream);
 }
+#endif
 
 ssize_t
-getdelim (char **lineptr, size_t *linesize, int delimiter, FILE *stream)
+getline (char **lineptr, size_t *linesize, FILE *stream)
 {
-  return getndelim2 (lineptr, linesize, (size_t)(-1), stream, delimiter, 0, 0);
+  return getdelim (lineptr, linesize, '\n', stream);
 }
-#endif
Index: lib/getndelim2.c
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/getndelim2.c,v
retrieving revision 1.6
diff -p -u -r1.6 getndelim2.c
--- lib/getndelim2.c    25 Apr 2004 14:12:22 -0000      1.6
+++ lib/getndelim2.c    12 May 2004 07:17:50 -0000
@@ -24,82 +24,106 @@
 # include <config.h>
 #endif
 
-/* Specification.  */
 #include "getndelim2.h"
 
 #include <stdlib.h>
 
 #include "unlocked-io.h"
 
-/* Always add at least this many bytes when extending the buffer.  */
+#include <limits.h>
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#if HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifndef PTRDIFF_MAX
+# define PTRDIFF_MAX ((ptrdiff_t) (SIZE_MAX / 2))
+#endif
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+#ifndef SSIZE_MAX
+# define SSIZE_MAX ((ssize_t) (SIZE_MAX / 2))
+#endif
+
+/* The maximum value that getndelim2 can return without suffering from
+   overflow problems, either internally (because of pointer
+   subtraction overflow) or due to the API (because of ssize_t).  */
+#define GETNDELIM2_MAXIMUM (PTRDIFF_MAX < SSIZE_MAX ? PTRDIFF_MAX : SSIZE_MAX)
+
+/* Try to add at least this many bytes when extending the buffer.
+   MIN_CHUNK must be no greater than GETNDELIM2_MAXIMUM.  */
 #define MIN_CHUNK 64
 
 ssize_t
-getndelim2 (char **lineptr, size_t *linesize, size_t nmax,
-           FILE *stream, int delim1, int delim2, size_t offset)
+getndelim2 (char **lineptr, size_t *linesize, size_t offset, size_t nmax,
+            int delim1, int delim2, FILE *stream)
 {
-  size_t nbytes_avail;         /* Allocated but unused chars in *LINEPTR.  */
+  size_t nbytes_avail;         /* Allocated but unused bytes in *LINEPTR.  */
   char *read_pos;              /* Where we're reading into *LINEPTR. */
+  ssize_t bytes_stored = -1;
+  char *ptr = *lineptr;
+  size_t size = *linesize;
 
-  if (!lineptr || !linesize || !nmax || !stream)
-    return -1;
-
-  if (!*lineptr)
+  if (!ptr)
     {
-      size_t newlinesize = MIN_CHUNK;
-
-      if (newlinesize > nmax)
-       newlinesize = nmax;
-
-      *linesize = newlinesize;
-      *lineptr = malloc (*linesize);
-      if (!*lineptr)
+      size = nmax < MIN_CHUNK ? nmax : MIN_CHUNK;
+      ptr = malloc (size);
+      if (!ptr)
        return -1;
     }
 
-  if (*linesize < offset)
-    return -1;
+  if (size < offset)
+    goto done;
 
-  nbytes_avail = *linesize - offset;
-  read_pos = *lineptr + offset;
+  nbytes_avail = size - offset;
+  read_pos = ptr + offset;
 
-  if (nbytes_avail == 0 && *linesize >= nmax)
-    return -1;
+  if (nbytes_avail == 0 && nmax <= size)
+    goto done;
 
   for (;;)
     {
-      /* Here always *lineptr + *linesize == read_pos + nbytes_avail.  */
+      /* Here always ptr + size == read_pos + nbytes_avail.  */
 
-      register int c;
+      int c;
 
-      /* We always want at least one char left in the buffer, since we
-        always (unless we get an error while reading the first char)
+      /* We always want at least one byte left in the buffer, since we
+        always (unless we get an error while reading the first byte)
         NUL-terminate the line buffer.  */
 
-      if (nbytes_avail < 2 && *linesize < nmax)
+      if (nbytes_avail < 2 && size < nmax)
        {
-         size_t newlinesize =
-           (*linesize > MIN_CHUNK ? 2 * *linesize : *linesize + MIN_CHUNK);
-         char *p;
-
-         if (! (*linesize < newlinesize && newlinesize <= nmax))
-           newlinesize = nmax;
-
-         *linesize = newlinesize;
-         nbytes_avail = *linesize + *lineptr - read_pos;
-         p = realloc (*lineptr, *linesize);
-         if (!p)
-           return -1;
-         *lineptr = p;
-         read_pos = *linesize - nbytes_avail + *lineptr;
+         size_t newsize = size < MIN_CHUNK ? size + MIN_CHUNK : 2 * size;
+         char *newptr;
+
+         if (! (size < newsize && newsize <= nmax))
+           newsize = nmax;
+
+         if (GETNDELIM2_MAXIMUM < newsize - offset)
+           {
+             size_t newsizemax = offset + GETNDELIM2_MAXIMUM + 1;
+             if (size == newsizemax)
+               goto done;
+             newsize = newsizemax;
+           }
+
+         nbytes_avail = newsize - (read_pos - ptr);
+         newptr = realloc (ptr, newsize);
+         if (!newptr)
+           goto done;
+         ptr = newptr;
+         size = newsize;
+         read_pos = size - nbytes_avail + ptr;
        }
 
       c = getc (stream);
       if (c == EOF)
        {
          /* Return partial line, if any.  */
-         if (read_pos == *lineptr)
-           return -1;
+         if (read_pos == ptr)
+           goto done;
          else
            break;
        }
@@ -110,14 +134,19 @@ getndelim2 (char **lineptr, size_t *line
          nbytes_avail--;
        }
 
-      if (c == delim1 || (delim2 && c == delim2))
+      if (c == delim1 || c == delim2)
        /* Return the line.  */
        break;
     }
 
-  /* Done - NUL terminate and return the number of chars read.
+  /* Done - NUL terminate and return the number of bytes read.
      At this point we know that nbytes_avail >= 1.  */
   *read_pos = '\0';
 
-  return read_pos - (*lineptr + offset);
+  bytes_stored = read_pos - (ptr + offset);
+
+ done:
+  *lineptr = ptr;
+  *linesize = size;
+  return bytes_stored;
 }
Index: lib/getndelim2.h
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/getndelim2.h,v
retrieving revision 1.1
diff -p -u -r1.1 getndelim2.h
--- lib/getndelim2.h    18 Jul 2003 16:58:06 -0000      1.1
+++ lib/getndelim2.h    12 May 2004 07:17:50 -0000
@@ -1,7 +1,7 @@
 /* getndelim2 - Read a line from a stream, stopping at one of 2 delimiters,
    with bounded memory allocation.
 
-   Copyright (C) 2003 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -20,23 +20,24 @@
 #ifndef GETNDELIM2_H
 #define GETNDELIM2_H 1
 
-#include <stddef.h>
 #include <stdio.h>
-
-/* Get ssize_t.  */
 #include <sys/types.h>
 
-/* Read up to (and including) a delimiter DELIM1 from STREAM into *LINEPTR
-   + OFFSET (and NUL-terminate it).  If DELIM2 is non-zero, then read up
-   and including the first occurrence of DELIM1 or DELIM2.  *LINEPTR is
-   a pointer returned from malloc (or NULL), pointing to *LINESIZE bytes of
-   space.  It is realloc'd as necessary.  Reallocation is limited to
-   NMAX bytes; if the line is longer than that, the extra bytes are read but
-   thrown away.
+#define GETNLINE_NO_LIMIT ((size_t) -1)
+
+/* Read into a buffer *LINEPTR returned from malloc (or NULL),
+   pointing to *LINESIZE bytes of space.  Store the input bytes
+   starting at *LINEPTR + OFFSET, and null-terminate them.  Reallocate
+   the buffer as necessary, but if NMAX is not GETNLINE_NO_LIMIT
+   then do not allocate more than NMAX bytes; if the line is longer
+   than that, read and discard the extra bytes.  Stop reading after
+   after the first occurrence of DELIM1 or DELIM2, whichever comes
+   first; a delimiter equal to EOF stands for no delimiter.  Read the
+   input bytes from STREAM.
    Return the number of bytes read and stored at *LINEPTR + OFFSET (not
    including the NUL terminator), or -1 on error or EOF.  */
-extern ssize_t getndelim2 (char **lineptr, size_t *linesize, size_t nmax,
-                          FILE *stream, int delim1, int delim2,
-                          size_t offset);
+extern ssize_t getndelim2 (char **lineptr, size_t *linesize, size_t offset,
+                           size_t nmax, int delim1, int delim2,
+                           FILE *stream);
 
 #endif /* GETNDELIM2_H */
Index: lib/getnline.c
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/getnline.c,v
retrieving revision 1.2
diff -p -u -r1.2 getnline.c
--- lib/getnline.c      18 Jul 2003 16:58:06 -0000      1.2
+++ lib/getnline.c      12 May 2004 07:17:50 -0000
@@ -1,6 +1,6 @@
 /* getnline - Read a line from a stream, with bounded memory allocation.
 
-   Copyright (C) 2003 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -26,14 +26,14 @@
 #include "getndelim2.h"
 
 ssize_t
-getnline (char **lineptr, size_t *linesize, size_t nmax, FILE *stream)
+getndelim (char **lineptr, size_t *linesize, size_t nmax,
+          int delimiter, FILE *stream)
 {
-  return getndelim2 (lineptr, linesize, (size_t)(-1), stream, '\n', 0, 0);
+  return getndelim2 (lineptr, linesize, 0, nmax, delimiter, EOF, stream);
 }
 
 ssize_t
-getndelim (char **lineptr, size_t *linesize, size_t nmax,
-          int delimiter, FILE *stream)
+getnline (char **lineptr, size_t *linesize, size_t nmax, FILE *stream)
 {
-  return getndelim2 (lineptr, linesize, (size_t)(-1), stream, delimiter, 0, 0);
+  return getndelim (lineptr, linesize, nmax, '\n', stream);
 }
Index: lib/getnline.h
===================================================================
RCS file: /cvsroot/gnulib/gnulib/lib/getnline.h,v
retrieving revision 1.1
diff -p -u -r1.1 getnline.h
--- lib/getnline.h      17 Jul 2003 16:23:52 -0000      1.1
+++ lib/getnline.h      12 May 2004 07:17:50 -0000
@@ -1,6 +1,6 @@
 /* getnline - Read a line from a stream, with bounded memory allocation.
 
-   Copyright (C) 2003 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -19,11 +19,10 @@
 #ifndef GETNLINE_H
 #define GETNLINE_H 1
 
-#include <stddef.h>
 #include <stdio.h>
-
-/* Get ssize_t.  */
 #include <sys/types.h>
+
+#define GETNLINE_NO_LIMIT ((size_t) -1)
 
 /* Read a line, up to the next newline, from STREAM, and store it in *LINEPTR.
    *LINEPTR is a pointer returned from malloc (or NULL), pointing to *LINESIZE




reply via email to

[Prev in Thread] Current Thread [Next in Thread]