bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

fts.c: fix bug in du


From: Jim Meyering
Subject: fts.c: fix bug in du
Date: Sun, 12 Nov 2006 18:34:39 +0100

This is a pretty big change for what looks like a little bug,
but it comes with a nice performance gain, too.
I've seen over 20% in some cases.  It will often eliminate half
of the openat and fstat calls.

I don't particularly like the current i-ring name or implementation.
I may end up changing the fts_open API, by adding a new parameter to
specify how many file descriptors to cache via this ring, rather than
hard-coding that number at 4.  Actually, I'm inclined to keep the API
and something like the current default, and to provide an interface to
change the ring size after fts_open has been called.

I've tried to be careful not to introduce file descriptor leaks,
but haven't done extensive testing.

The bug that prompted this was reported by Mike Frysinger, in
  http://article.gmane.org/gmane.comp.gnu.core-utils.bugs/8831
I'll check in the new coreutils test case shortly.

        Make fts (in FTS_CWDFD mode) more efficient by caching a few open
        file descriptors.  This also averts a failure on systems with
        native openat support when a traversed directory lacks "x" access.

        * lib/fts_.h: Include "i-ring.h"
        (struct FTS) [fts_fd_ring]: New member.
        * lib/fts.c (RESTORE_INITIAL_CWD): Also call fd_ring_clear.
        (FCHDIR): Add parentheses.
        (fd_ring_check, fd_ring_print) [!FTS_DEBUG]: Define away.
        (cwd_advance_fd): Add a 3rd parameter.  Adjust all callers.
        When descending, rather than simply closing the previous
        fts_cwd_fd value, push that file descriptor onto the ring.
        (same_fd, fd_ring_print, fd_ring_check) [FTS_DEBUG]: New functions.
        (fts_open): Initialize the new fd_ring member.
        (fts_close): Clear the ring.
        (fts_safe_changedir): When possible, use our new fd_ring to skip
        the diropen and fstat and dev/ino comparison that would normally
        accompany a virtual `chdir ("..")'.

        * modules/fts (Depends-on): Add i-ring.
        * modules/i-ring: New module.
        * lib/i-ring.c, lib/i-ring.h, lib/i-ring-test.c: New files.
        * m4/i-ring.m4: New file.

Index: lib/fts_.h
===================================================================
RCS file: /sources/gnulib/gnulib/lib/fts_.h,v
retrieving revision 1.7
diff -u -r1.7 fts_.h
--- lib/fts_.h  12 Oct 2006 10:32:32 -0000      1.7
+++ lib/fts_.h  12 Nov 2006 17:18:58 -0000
@@ -65,6 +65,7 @@
 # include <stddef.h>
 # include <sys/types.h>
 # include <sys/stat.h>
+# include "i-ring.h"

 typedef struct {
        struct _ftsent *fts_cur;        /* current node */
@@ -163,7 +164,12 @@
                   but it's not appropriate for programs like du.  */
                struct cycle_check_state *state;
        } fts_cycle;
+
 # endif
+       /* A stack of the file descriptors corresponding to the
+          most-recently traversed parent directories.
+          Currently used only in FTS_CWDFD mode.  */
+       I_ring fts_fd_ring;
 } FTS;

 typedef struct _ftsent {
Index: lib/fts.c
===================================================================
RCS file: /sources/gnulib/gnulib/lib/fts.c,v
retrieving revision 1.26
diff -u -r1.26 fts.c
--- lib/fts.c   10 Nov 2006 22:22:31 -0000      1.26
+++ lib/fts.c   12 Nov 2006 17:18:58 -0000
@@ -74,6 +74,7 @@
 # include "lstat.h"
 # include "openat.h"
 # include "unistd--.h"
+# include "same-inode.h"
 #endif

 #include <dirent.h>
@@ -177,17 +178,21 @@
 #define ISSET(opt)     (sp->fts_options & (opt))
 #define SET(opt)       (sp->fts_options |= (opt))

-#define RESTORE_INITIAL_CWD(sp)        FCHDIR (sp, (ISSET (FTS_CWDFD) \
-                                            ? AT_FDCWD        \
-                                            : sp->fts_rfd))
-
-#define FCHDIR(sp, fd) (!ISSET(FTS_NOCHDIR)                \
-                        && (ISSET(FTS_CWDFD)               \
-                             ? (cwd_advance_fd (sp, fd), 0) \
-                            : fchdir(fd)))
+/* FIXME: make this a function */
+#define RESTORE_INITIAL_CWD(sp)                        \
+  (fd_ring_clear (&((sp)->fts_fd_ring)),       \
+   FCHDIR ((sp), (ISSET (FTS_CWDFD) ? AT_FDCWD : (sp)->fts_rfd)))
+
+/* FIXME: FTS_NOCHDIR is now misnamed.
+   Call it FTS_USE_FULL_RELATIVE_FILE_NAMES instead. */
+#define FCHDIR(sp, fd)                                 \
+  (!ISSET(FTS_NOCHDIR) && (ISSET(FTS_CWDFD)            \
+                          ? (cwd_advance_fd ((sp), (fd), true), 0) \
+                          : fchdir (fd)))


 /* fts_build flags */
+/* FIXME: make this an enum */
 #define BCHILD         1               /* fts_children */
 #define BNAMES         2               /* fts_children, names only */
 #define BREAD          3               /* fts_read */
@@ -196,10 +201,13 @@
 # include <inttypes.h>
 # include <stdint.h>
 # include <stdio.h>
+# include "getcwdat.h"
 bool fts_debug = false;
-# define Dprintf(x) do { if (fts_debug) printf x; } while (0)
+# define Dprintf(x) do { if (fts_debug) printf x; } while (false)
 #else
 # define Dprintf(x)
+# define fd_ring_check(x)
+# define fd_ring_print(a, b, c)
 #endif

 #define LEAVE_DIR(Fts, Ent, Tag)                               \
@@ -207,9 +215,21 @@
     {                                                          \
       Dprintf (("  %s-leaving: %s\n", Tag, (Ent)->fts_path));  \
       leave_dir (Fts, Ent);                                    \
+      fd_ring_check (Fts);                                     \
     }                                                          \
   while (false)

+static void
+fd_ring_clear (I_ring *fd_ring)
+{
+  while ( ! i_ring_empty (fd_ring))
+    {
+      int fd = i_ring_pop (fd_ring);
+      if (0 <= fd)
+       close (fd);
+    }
+}
+
 /* Overload the fts_statp->st_size member (otherwise unused, when
    fts_info is FTS_NSOK) to indicate whether fts_read should stat
    this entry or not.  */
@@ -244,19 +264,35 @@
   return dirp;
 }

-/* Virtual fchdir.  Advance SP's working directory
-   file descriptor, SP->fts_cwd_fd, to FD, and close
-   the previous one, ignoring any error.  */
+/* Virtual fchdir.  Advance SP's working directory file descriptor,
+   SP->fts_cwd_fd, to FD, and push the previous value onto the fd_ring.
+   CHDIR_DOWN_ONE is true if FD corresponds to an entry in the directory
+   open on sp->fts_cwd_fd; i.e., to move the working directory one level
+   down.  */
 static void
 internal_function
-cwd_advance_fd (FTS *sp, int fd)
+cwd_advance_fd (FTS *sp, int fd, bool chdir_down_one)
 {
   int old = sp->fts_cwd_fd;
   if (old == fd && old != AT_FDCWD)
     abort ();
+
+  if (chdir_down_one)
+    {
+      /* Push "old" onto the ring.
+        If the displaced file descriptor is non-negative, close it.  */
+      int prev_fd_in_slot = i_ring_push (&sp->fts_fd_ring, old);
+      fd_ring_print (sp, stderr, "post-push");
+      if (0 <= prev_fd_in_slot)
+       close (prev_fd_in_slot); /* ignore any close failure */
+    }
+  else if ( ! ISSET (FTS_NOCHDIR))
+    {
+      if (0 <= old)
+       close (old); /* ignore any close failure */
+    }
+
   sp->fts_cwd_fd = fd;
-  if (0 <= old)
-    close (old); /* ignore any close failure */
 }

 /* Open the directory DIR if possible, and return a file
@@ -448,6 +484,7 @@
            && (sp->fts_rfd = diropen (sp, ".")) < 0)
                SET(FTS_NOCHDIR);

+       i_ring_init (&sp->fts_fd_ring, -1);
        return (sp);

 mem3:  fts_lfree(root);
@@ -521,6 +558,7 @@
            close(sp->fts_rfd);
          }

+       fd_ring_clear (&sp->fts_fd_ring);
        free_dir (sp);

        /* Free up the stream pointer. */
@@ -850,7 +888,7 @@
        sp->fts_child = fts_build(sp, instr);
        if (ISSET(FTS_CWDFD))
          {
-           cwd_advance_fd (sp, fd);
+           cwd_advance_fd (sp, fd, true);
          }
        else
          {
@@ -1228,6 +1266,87 @@
        }
     }
 }
+
+static bool
+same_fd (int fd1, int fd2)
+{
+  struct stat sb1, sb2;
+  return (fstat (fd1, &sb1) == 0
+         && fstat (fd2, &sb2) == 0
+         && SAME_INODE (sb1, sb2));
+}
+
+static void
+fd_ring_print (FTS const *sp, FILE *stream, char const *msg)
+{
+  I_ring const *fd_ring = &sp->fts_fd_ring;
+  unsigned int i = fd_ring->fts_front;
+  char *cwd = getcwdat (sp->fts_cwd_fd, NULL, 0);
+  fprintf (stream, "=== %s ========== %s\n", msg, cwd);
+  free (cwd);
+  if (i_ring_empty (fd_ring))
+    return;
+
+  while (true)
+    {
+      int fd = fd_ring->fts_fd_ring[i];
+      if (fd < 0)
+       fprintf (stream, "%d: %d:\n", i, fd);
+      else
+       {
+         char *wd = getcwdat (fd, NULL, 0);
+         fprintf (stream, "%d: %d: %s\n", i, fd, wd);
+         free (wd);
+       }
+      if (i == fd_ring->fts_back)
+       break;
+      i = (i + I_RING_SIZE - 1) % I_RING_SIZE;
+    }
+}
+
+/* Ensure that each file descriptor on the fd_ring matches a
+   parent, grandparent, etc. of the current working directory.  */
+static void
+fd_ring_check (FTS const *sp)
+{
+  if (!fts_debug)
+    return;
+
+  /* Make a writable copy.  */
+  I_ring fd_w = sp->fts_fd_ring;
+
+  int cwd_fd = sp->fts_cwd_fd;
+  cwd_fd = dup (cwd_fd);
+  char *dot = getcwdat (cwd_fd, NULL, 0);
+  error (0, 0, "===== check ===== cwd: %s", dot);
+  free (dot);
+  while ( ! i_ring_empty (&fd_w))
+    {
+      int fd = i_ring_pop (&fd_w);
+      if (0 <= fd)
+       {
+         int parent_fd = openat (cwd_fd, "..", O_RDONLY);
+         if (parent_fd < 0)
+           {
+             // Warn?
+             break;
+           }
+         if (!same_fd (fd, parent_fd))
+           {
+             char *cwd = getcwdat (fd, NULL, 0);
+             error (0, errno, "ring  : %s", cwd);
+             char *c2 = getcwdat (parent_fd, NULL, 0);
+             error (0, errno, "parent: %s", c2);
+             free (cwd);
+             free (c2);
+             abort ();
+           }
+         close (cwd_fd);
+         cwd_fd = parent_fd;
+       }
+    }
+  close (cwd_fd);
+}
 #endif

 static unsigned short int
@@ -1503,28 +1622,51 @@
 fts_safe_changedir (FTS *sp, FTSENT *p, int fd, char const *dir)
 {
        int ret;
-
-       int newfd = fd;
+       bool is_dotdot = dir && STREQ (dir, "..");
+       int newfd;

        /* This clause handles the unusual case in which FTS_NOCHDIR
           is specified, along with FTS_CWDFD.  In that case, there is
           no need to change even the virtual cwd file descriptor.
           However, if FD is non-negative, we do close it here.  */
-       if (ISSET(FTS_NOCHDIR)) {
-               if (ISSET(FTS_CWDFD) && 0 <= fd)
-                       close (fd);
-               return (0);
-       }
+       if (ISSET (FTS_NOCHDIR))
+         {
+           if (ISSET (FTS_CWDFD) && 0 <= fd)
+             close (fd);
+           return 0;
+         }
+
+       if (fd < 0 && is_dotdot && ISSET (FTS_CWDFD))
+         {
+           /* When possible, skip the diropen and subsequent fstat+dev/ino
+              comparison.  I.e., when changing to parent directory
+              (chdir ("..")), use a file descriptor from the ring and
+              save the overhead of diropen+fstat, as well as avoiding
+              failure when we lack "x" access to the virtual cwd.  */
+           if ( ! i_ring_empty (&sp->fts_fd_ring))
+             {
+               fd_ring_print (sp, stderr, "pre-pop");
+               int parent_fd = i_ring_pop (&sp->fts_fd_ring);
+               is_dotdot = true;
+               if (0 <= parent_fd)
+                 {
+                   fd = parent_fd;
+                   dir = NULL;
+                 }
+             }
+         }
+
+       newfd = fd;
        if (fd < 0 && (newfd = diropen (sp, dir)) < 0)
-               return (-1);
+         return -1;

-       /* The following dev/inode check is necessary if we're doing
-          a `logical' traversal (through symlinks, a la chown -L),
-          if the system lacks O_NOFOLLOW support, or if we're changing
-          to "..".  In the latter case, O_NOFOLLOW can't help.  In
-          general (when the target is not ".."), diropen's use of
-          O_NOFOLLOW ensures we don't mistakenly follow a symlink,
-          so we can avoid the expense of this fstat.  */
+       /* The following dev/inode check is necessary if we're doing a
+          `logical' traversal (through symlinks, a la chown -L), if the
+          system lacks O_NOFOLLOW support, or if we're changing to ".."
+          (but not via a popped file descriptor).  When changing to the
+          name "..", O_NOFOLLOW can't help.  In general, when the target is
+          not "..", diropen's use of O_NOFOLLOW ensures we don't mistakenly
+          follow a symlink, so we can avoid the expense of this fstat.  */
        if (ISSET(FTS_LOGICAL) || ! HAVE_WORKING_O_NOFOLLOW
            || (dir && STREQ (dir, "..")))
          {
@@ -1545,7 +1687,7 @@

        if (ISSET(FTS_CWDFD))
          {
-           cwd_advance_fd (sp, newfd);
+           cwd_advance_fd (sp, newfd, ! is_dotdot);
            return 0;
          }

@@ -1557,5 +1699,5 @@
            (void)close(newfd);
            __set_errno (oerrno);
          }
-       return (ret);
+       return ret;
 }
Index: lib/i-ring.c
===================================================================
RCS file: lib/i-ring.c
diff -N lib/i-ring.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ lib/i-ring.c        12 Nov 2006 17:18:58 -0000
@@ -0,0 +1,69 @@
+/* a simple ring buffer
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+/* written by Jim Meyering */
+
+#include <config.h>
+#include "i-ring.h"
+
+#include <stdlib.h>
+
+void
+i_ring_init (I_ring *ir, int default_val)
+{
+  int i;
+  ir->ir_empty = true;
+  ir->ir_front = 0;
+  ir->ir_back = 0;
+  for (i = 0; i < I_RING_SIZE; i++)
+    ir->ir_data[i] = default_val;
+  ir->ir_default_val = default_val;
+}
+
+bool
+i_ring_empty (I_ring const *ir)
+{
+  return ir->ir_empty;
+}
+
+int
+i_ring_push (I_ring *ir, int val)
+{
+  unsigned int dest_idx = (ir->ir_front + !ir->ir_empty) % I_RING_SIZE;
+  int old_val = ir->ir_data[dest_idx];
+  ir->ir_data[dest_idx] = val;
+  ir->ir_front = dest_idx;
+  if (dest_idx == ir->ir_back)
+    ir->ir_back = (ir->ir_back + !ir->ir_empty) % I_RING_SIZE;
+  ir->ir_empty = false;
+  return old_val;
+}
+
+int
+i_ring_pop (I_ring *ir)
+{
+  int top_val;
+  if (i_ring_empty (ir))
+    abort ();
+  top_val = ir->ir_data[ir->ir_front];
+  ir->ir_data[ir->ir_front] = ir->ir_default_val;
+  if (ir->ir_front == ir->ir_back)
+    ir->ir_empty = true;
+  else
+    ir->ir_front = ((ir->ir_front + I_RING_SIZE - 1) % I_RING_SIZE);
+  return top_val;
+}
Index: lib/i-ring.h
===================================================================
RCS file: lib/i-ring.h
diff -N lib/i-ring.h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ lib/i-ring.h        12 Nov 2006 17:18:58 -0000
@@ -0,0 +1,45 @@
+/* definitions for a simple ring buffer
+   Copyright (C) 2006 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+#include <stdbool.h>
+#include "verify.h"
+
+enum { I_RING_SIZE = 4 };
+verify (1 <= I_RING_SIZE);
+
+/* When ir_empty is true, the ring is empty.
+   Otherwise, ir_data[B..F] are defined, where B..F is the contiguous
+   range of indices, modulo I_RING_SIZE, from back to front, inclusive.
+   Undefined elements of ir_data are always set to ir_default_val.
+   Popping from an empty ring aborts.
+   Pushing onto a full ring returns the displaced value.
+   An empty ring has F==B and ir_empty == true.
+   A ring with one entry still has F==B, but now ir_empty == false.  */
+struct I_ring
+{
+  int ir_data[I_RING_SIZE];
+  int ir_default_val;
+  unsigned int ir_front;
+  unsigned int ir_back;
+  bool ir_empty;
+};
+typedef struct I_ring I_ring;
+
+void i_ring_init (I_ring *ir, int ir_default_val);
+int i_ring_push (I_ring *ir, int val);
+int i_ring_pop (I_ring *ir);
+bool i_ring_empty (I_ring const *ir);
Index: lib/i-ring-test.c
===================================================================
RCS file: lib/i-ring-test.c
diff -N lib/i-ring-test.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ lib/i-ring-test.c   12 Nov 2006 17:18:58 -0000
@@ -0,0 +1,44 @@
+#include <assert.h>
+#include "i-ring.h"
+/* Test with this:
+   gcc -I. -Wall -W -O i-ring-test.c i-ring.c -L. -lcoreutils && ./a.out
+*/
+
+int
+main ()
+{
+  I_ring ir;
+  i_ring_init (&ir, -1);
+  int o = i_ring_push (&ir, 1);
+  assert (o == -1);
+  o = i_ring_push (&ir, 2);
+  assert (o == -1);
+  o = i_ring_push (&ir, 3);
+  assert (o == -1);
+  o = i_ring_push (&ir, 4);
+  assert (o == -1);
+  o = i_ring_push (&ir, 5);
+  assert (o == 1);
+  o = i_ring_push (&ir, 6);
+  assert (o == 2);
+  o = i_ring_push (&ir, 7);
+  assert (o == 3);
+
+  o = i_ring_pop (&ir);
+  assert (o == 7);
+  o = i_ring_pop (&ir);
+  assert (o == 6);
+  o = i_ring_pop (&ir);
+  assert (o == 5);
+  o = i_ring_pop (&ir);
+  assert (o == 4);
+  assert (i_ring_empty (&ir));
+
+  o = i_ring_push (&ir, 8);
+  assert (o == -1);
+  o = i_ring_pop (&ir);
+  assert (o == 8);
+  assert (i_ring_empty (&ir));
+
+  return 0;
+}
Index: modules/fts
===================================================================
RCS file: /sources/gnulib/gnulib/modules/fts,v
retrieving revision 1.10
diff -u -r1.10 fts
--- modules/fts 13 Oct 2006 12:40:23 -0000      1.10
+++ modules/fts 12 Nov 2006 17:18:58 -0000
@@ -14,6 +14,7 @@
 fcntl
 fcntl-safer
 hash
+i-ring
 lstat
 openat
 stdbool
Index: modules/i-ring
===================================================================
RCS file: modules/i-ring
diff -N modules/i-ring
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ modules/i-ring      12 Nov 2006 17:18:58 -0000
@@ -0,0 +1,23 @@
+Description:
+a simple ring buffer
+
+Files:
+lib/i-ring.h
+lib/i-ring.c
+m4/i-ring.m4
+
+Depends-on:
+
+configure.ac:
+gl_I_RING
+
+Makefile.am:
+
+Include:
+"i-ring.h"
+
+License:
+GPL
+
+Maintainer:
+Jim Meyering
Index: m4/i-ring.m4
===================================================================
RCS file: m4/i-ring.m4
diff -N m4/i-ring.m4
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ m4/i-ring.m4        12 Nov 2006 17:18:58 -0000
@@ -0,0 +1,10 @@
+# serial 1
+dnl Copyright (C) 2006 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_I_RING],
+[
+  AC_LIBOBJ([i-ring])
+])




reply via email to

[Prev in Thread] Current Thread [Next in Thread]