bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: speed up getndelim2


From: Bruno Haible
Subject: Re: speed up getndelim2
Date: Thu, 1 May 2008 16:48:58 +0200
User-agent: KMail/1.5.4

Eric Blake wrote:
> $ time (~/coreutils/src/cut -f 2-3 < data > data2)
> 
> real  0m4.168s
> user  0m3.952s
> sys   0m0.109s
> 
> $ time (/bin/cut -f 2-3 < data > data1)
> 
> real  0m6.194s
> user  0m6.109s
> sys   0m0.108s

Impressing!!

However, I have to follow up with a fix for the reallocation: You want
to ensure that after a reallocation of the line buffer,
nbytes_avail >= buffer_len + 1. Not only size >= buffer_len. That makes a
difference. Furthermore when you set
    newsize = buffer_len + size;
you may get less than proportional growth, leading to O(n^2) running time.

Also, there is no need to test the 'done' flag at the beginning of the loop,
only at the end of the loop. Also, this flag is a bit misnamed when there
is also a label 'done' and the two are not equivalent.


2008-05-01  Bruno Haible  <address@hidden>

        * lib/getndelim2.c (getndelim2): Fix newsize computation during
        reallocation. Rename 'done' to 'found_delimiter'.

*** lib/getndelim2.c.orig       2008-05-01 16:45:00.000000000 +0200
--- lib/getndelim2.c    2008-05-01 16:44:28.000000000 +0200
***************
*** 76,82 ****
    ssize_t bytes_stored = -1;
    char *ptr = *lineptr;
    size_t size = *linesize;
!   bool done = false;
  
    if (!ptr)
      {
--- 76,82 ----
    ssize_t bytes_stored = -1;
    char *ptr = *lineptr;
    size_t size = *linesize;
!   bool found_delimiter;
  
    if (!ptr)
      {
***************
*** 103,109 ****
  
    flockfile (stream);
  
!   while (!done)
      {
        /* Here always ptr + size == read_pos + nbytes_avail.
         Also nbytes_avail > 0 || size < nmax.  */
--- 103,110 ----
  
    flockfile (stream);
  
!   found_delimiter = false;
!   do
      {
        /* Here always ptr + size == read_pos + nbytes_avail.
         Also nbytes_avail > 0 || size < nmax.  */
***************
*** 121,127 ****
              if (end)
                {
                  buffer_len = end - buffer + 1;
!                 done = true;
                }
            }
        }
--- 122,128 ----
              if (end)
                {
                  buffer_len = end - buffer + 1;
!                 found_delimiter = true;
                }
            }
        }
***************
*** 137,143 ****
                break;
            }
          if (c == delim1 || c == delim2)
!           done = true;
          buffer_len = 1;
        }
  
--- 138,144 ----
                break;
            }
          if (c == delim1 || c == delim2)
!           found_delimiter = true;
          buffer_len = 1;
        }
  
***************
*** 145,157 ****
         always (unless we get an error while reading the first byte)
         NUL-terminate the line buffer.  */
  
!       if (nbytes_avail < 1 + buffer_len && size < nmax)
        {
          size_t newsize = size < MIN_CHUNK ? size + MIN_CHUNK : 2 * size;
          char *newptr;
  
!         if (newsize < buffer_len)
!           newsize = buffer_len + size;
          if (! (size < newsize && newsize <= nmax))
            newsize = nmax;
  
--- 146,163 ----
         always (unless we get an error while reading the first byte)
         NUL-terminate the line buffer.  */
  
!       if (nbytes_avail < buffer_len + 1 && size < nmax)
        {
+         /* Grow size proportionally, not linearly, to avoid O(n^2)
+            running time.  */
          size_t newsize = size < MIN_CHUNK ? size + MIN_CHUNK : 2 * size;
          char *newptr;
  
!         /* Increase newsize so that it becomes
!            >= (read_pos - ptr) + buffer_len.  */
!         if (newsize - (read_pos - ptr) < buffer_len + 1)
!           newsize = (read_pos - ptr) + buffer_len + 1;
!         /* Respect nmax.  This handles possible integer overflow.  */
          if (! (size < newsize && newsize <= nmax))
            newsize = nmax;
  
***************
*** 193,198 ****
--- 199,205 ----
        if (buffer && freadseek (stream, buffer_len))
        goto unlock_done;
      }
+   while (!found_delimiter);
  
    /* Done - NUL terminate and return the number of bytes read.
       At this point we know that nbytes_avail >= 1.  */





reply via email to

[Prev in Thread] Current Thread [Next in Thread]