bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

fts: avoid O(n^2) ext3/ext4 performance hit on million+-entry dirs


From: Jim Meyering
Subject: fts: avoid O(n^2) ext3/ext4 performance hit on million+-entry dirs
Date: Thu, 25 Sep 2008 18:16:58 +0200

Hello,

Here is a patch that makes it so tools using fts,
like chmod, chown, chgrp, chcon, du, and find are no
longer susceptible to an O(n^2) performance penalty when
processing very large directory-entry counts (as in millions).
I first noticed the problem on ext3 and ext4 file systems,
but the patch also improves performance on reiserfs and xfs,
but not on tmpfs.

I've been using this on multiple systems via coreutils,
and of course, it passes all of the tests there.
I expect to push this tomorrow.

>From 81cd6073dfc09d295f9cf0910cc29138f017f58d Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Tue, 16 Sep 2008 10:05:47 +0200
Subject: [PATCH] fts: sort dirent entries on inode number before traversing

This avoids a quadratic, seek-related performance penalty when
operating on a directory containing many entries (measurable at 10k,
3.5 hours at 2 million entries with a cold cache) on certain types
of file systems, including ext3 and ext4, but not tmpfs.
* lib/fts.c (DT_MUST_BE, NOT_AN_INODE_NUMBER, D_INO): Define.
(FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD): Define if not defined.
(S_MAGIC_TMPFS, S_MAGIC_NFS): Define.
(fs_handles_readdir_ordered_dirents_efficiently): New function.
(dirent_inode_sort_may_be_useful, fts_compare_ino): Likewise.
(fts_build): Set the stat.st_ino member from D_INO.
If it is likely to be useful, sort dirent entries on inode number.
* m4/fts.m4 (gl_FUNC_FTS_CORE): Check for fstatfs, sys/vfs.h,
and the struct statfs.f_type member.
* modules/fts (Depends-on): Add d-ino.
---
 lib/fts.c   |   90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 m4/fts.m4   |    9 +++--
 modules/fts |    1 +
 3 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/lib/fts.c b/lib/fts.c
index 7956f72..cd14ec4 100644
--- a/lib/fts.c
+++ b/lib/fts.c
@@ -91,6 +91,29 @@ static char sccsid[] = "@(#)fts.c    8.6 (Berkeley) 8/14/94";
 # define DT_MUST_BE(d, t) false
 #endif

+enum
+{
+  NOT_AN_INODE_NUMBER = 0
+};
+
+#ifdef D_INO_IN_DIRENT
+# define D_INO(dp) (dp)->d_ino
+#else
+/* Some systems don't have inodes, so fake them to avoid lots of ifdefs.  */
+# define D_INO(dp) NOT_AN_INODE_NUMBER
+#endif
+
+/* If there are more than this many entries in a directory,
+   and the conditions mentioned below are satisfied, then sort
+   the entries on inode number before any further processing.  */
+#ifndef FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
+# define FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD 10000
+#endif
+enum
+{
+  _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD = FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
+};
+
 enum Fts_stat
 {
   FTS_NO_STAT_REQUIRED = 1,
@@ -911,6 +934,58 @@ fts_children (register FTS *sp, int instr)
        return (sp->fts_child);
 }

+#if defined HAVE_SYS_VFS_H && HAVE_FSTATFS && HAVE_STRUCT_STATFS_F_TYPE
+# include <sys/statfs.h>
+/* FIXME: what about when f_type is not an integral type?
+   deal with that if/when it's encountered.  */
+static bool
+fs_handles_readdir_ordered_dirents_efficiently (uintmax_t fs_type)
+{
+/* From coreutils' src/fs.h */
+#define S_MAGIC_TMPFS 0x1021994
+#define S_MAGIC_NFS 0x6969
+  switch (fs_type)
+    {
+    case S_MAGIC_TMPFS:
+    case S_MAGIC_NFS:
+      return true;
+    default:
+      return false;
+    }
+}
+
+/* Return true if it is easy to determine the file system type of the
+   current directory, and sorting dirents on inode numbers is known to
+   improve traversal performance with that type of file system.  */
+static bool
+dirent_inode_sort_may_be_useful (FTS const *sp)
+{
+  struct statfs fs_buf;
+  /* Skip the sort only if we can determine efficiently
+     that it's the right thing to do.  */
+  bool skip = (ISSET (FTS_CWDFD)
+              && fstatfs (sp->fts_cwd_fd, &fs_buf) == 0
+              && fs_handles_readdir_ordered_dirents_efficiently
+                   (fs_buf.f_type));
+  return !skip;
+}
+#else
+static bool dirent_inode_sort_may_be_useful (FTS const *sp) { return true; }
+#endif
+
+/* A comparison function to sort on increasing inode number.
+   For some file system types, sorting either way makes a huge
+   performance difference for a directory with very many entries,
+   but sorting on increasing values is slightly better than sorting
+   on decreasing values.  The difference is in the 5% range.  */
+static int
+fts_compare_ino (struct _ftsent const **a, struct _ftsent const **b)
+{
+  int diff = (b[0]->fts_statp->st_ino
+             - a[0]->fts_statp->st_ino);
+  return diff;
+}
+
 /*
  * This is the tricky part -- do not casually change *anything* in here.  The
  * idea is to build the linked list of entries that are used by fts_children
@@ -1111,6 +1186,9 @@ mem1:                             saved_errno = errno;
                if (dp->d_type == DT_WHT)
                        p->fts_flags |= FTS_ISW;
 #endif
+               /* Store dirent.d_ino, in case we need to sort
+                  entries before processing them.  */
+               p->fts_statp->st_ino = D_INO (dp);

                /* Build a file name for fts_stat to stat. */
                if (ISSET(FTS_NOCHDIR)) {
@@ -1206,6 +1284,18 @@ mem1:                            saved_errno = errno;
                return (NULL);
        }

+       /* If there are many entries, no sorting function has been specified,
+          and this file system is of a type that may be slow with a large
+          number of entries, then sort the directory entries on increasing
+          inode numbers.  */
+       if (nitems > _FTS_INODE_SORT_DIR_ENTRIES_THRESHOLD
+           && !sp->fts_compar
+           && dirent_inode_sort_may_be_useful (sp)) {
+               sp->fts_compar = fts_compare_ino;
+               head = fts_sort (sp, head, nitems);
+               sp->fts_compar = NULL;
+       }
+
        /* Sort the entries. */
        if (sp->fts_compar && nitems > 1)
                head = fts_sort(sp, head, nitems);
diff --git a/m4/fts.m4 b/m4/fts.m4
index cceb48f..8693db9 100644
--- a/m4/fts.m4
+++ b/m4/fts.m4
@@ -1,5 +1,5 @@
-#serial 13
-dnl Copyright (C) 2005-2007 Free Software Foundation, Inc.
+#serial 14
+dnl Copyright (C) 2005-2008 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
 dnl with or without modifications, as long as this notice is preserved.
@@ -23,6 +23,7 @@ AC_DEFUN([gl_FUNC_FTS_CORE],
   dnl Prerequisites of lib/fts.c.
   gl_FUNC_OPENAT

-  # Checks for header files.
-  AC_CHECK_HEADERS_ONCE([sys/param.h])dnl
+  AC_CHECK_FUNCS_ONCE([fstatfs])
+  AC_CHECK_HEADERS_ONCE([sys/param.h sys/vfs])dnl
+  AC_CHECK_MEMBERS([struct statfs.f_type])
 ])
diff --git a/modules/fts b/modules/fts
index b45f026..a412d0a 100644
--- a/modules/fts
+++ b/modules/fts
@@ -9,6 +9,7 @@ m4/fts.m4

 Depends-on:
 cycle-check
+d-ino
 d-type
 dirfd
 fchdir
--
1.6.0.2.307.gc427




reply via email to

[Prev in Thread] Current Thread [Next in Thread]