bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [bug-gnulib] new modules chdir-long, openat; also, save-cwd improvem


From: Jim Meyering
Subject: Re: [bug-gnulib] new modules chdir-long, openat; also, save-cwd improvements
Date: Wed, 19 Jan 2005 00:04:54 +0100

Paul Eggert <address@hidden> wrote:
> I installed the following patch, merged from coreutils.  It removes
> PATH_MAX-related limitations on the length of the working directory.

Thanks for handling that.

> One interesting thing about this change is that we have a cycle of
> module dependencies: chdir-long depends on open-at which depends on
> save-cwd which depends on chdir-long.  Is this a bad thing?

IMHO, it's not a problem.
A few weeks ago I wrote rough replacements for fdopendir and fstatat so
that I could make getcwd.c more robust even on systems lacking openat
and those other two functions.[*]  Then, to test things out, I made
getcwd.c include the updated openat.h and tweaked pwd.c so that all it
does is call xgetcwd and print the result.  Works fine.

Testing on Linux, the only problem that arose came when running pwd from a
directory that was neither readable nor writable and whose absolute name
was longer than PATH_MAX.  In that case, it hit the recursive call loop:

  xgetcwd, getcwd, openat, save_cwd, xgetcwd, ...


[*] With these three replacement functions we might be able to remove
(or at least avoid using) getcwd's glibc-based O(N^2) code altogether
in favor of Paul's openat-based version.

For single-threaded applications, this change breaks the cycle:

Index: save-cwd.c
===================================================================
RCS file: /fetish/cu/lib/save-cwd.c,v
retrieving revision 1.23
diff -u -p -r1.23 save-cwd.c
--- save-cwd.c  3 Dec 2004 06:25:03 -0000       1.23
+++ save-cwd.c  3 Jan 2005 16:53:39 -0000
@@ -86,7 +86,18 @@ save_cwd (struct saved_cwd *cwd)
          cwd->desc = open (".", O_WRONLY);
          if (cwd->desc < 0)
            {
+             /* The following code protects against recursion via the
+                robust implementation of getcwd that calls openat, whose
+                replacement implementation in turn uses save_cwd.  Without
+                this test for recursion, calling save_cwd from a directory
+                that is neither readable nor writable would result in this
+                code calling xgetcwd, getcwd, openat, save_cwd, xgetcwd, ...
+                and so on.  */
+             static unsigned int recurse;
+             if (1 < recurse++)
+               return -1;
              cwd->name = xgetcwd ();
+             --recurse;
              return cwd->name ? 0 : -1;
            }
        }


I haven't spent time on any of the following changes
for a couple of weeks, but your message reminded me of them.
I haven't even written ChangeLog entries yet.
The chdir-long.c one is by far the biggest.  It removes the final
FIXME in that code by changing the parameter to be a non-const string.

Comments welcome:

Index: chdir-long.c
===================================================================
RCS file: /fetish/cu/lib/chdir-long.c,v
retrieving revision 1.3
diff -u -p -r1.3 chdir-long.c
--- chdir-long.c        11 Dec 2004 11:19:27 -0000      1.3
+++ chdir-long.c        3 Jan 2005 16:53:39 -0000
@@ -30,76 +30,47 @@
 #include <assert.h>
 #include <limits.h>
 
-#include "mempcpy.h"
 #include "openat.h"
 
 #ifndef O_DIRECTORY
 # define O_DIRECTORY 0
 #endif
 
-#ifndef MIN
-# define MIN(a, b) ((a) < (b) ? (a) : (b))
-#endif
-
 #ifndef PATH_MAX
 # error "compile this file only if your system defines PATH_MAX"
 #endif
 
-/* FIXME: this use of `MIN' is our sole concession to arbitrary limitations.
-   If, for some system, PATH_MAX is larger than 8191 and you call
-   chdir_long with a directory name that is longer than PATH_MAX,
-   yet that contains a single component that is more than 8191 bytes
-   long, then this function will fail.  */
-#define MAX_COMPONENT_LENGTH MIN (PATH_MAX - 1, 8 * 1024)
-
 struct cd_buf
 {
-  /* FIXME maybe allocate this via malloc, rather than using the stack.
-     But that would be the sole use of malloc.  Is it worth it to
-     let chdir_long fail due to a low-memory condition?
-     But when using malloc, and assuming we remove the `concession'
-     above, we'll still have to avoid allocating 2^31 bytes on
-     systems that define PATH_MAX to very large number.
-     Ideally, we'd allocate enough to deal with most names, and
-     dynamically increase the buffer size only when necessary.  */
-  char buffer[MAX_COMPONENT_LENGTH + 1];
-  char *avail;
   int fd;
 };
 
-/* Like memchr, but return the number of bytes from MEM
-   to the first occurrence of C thereafter.  Search only
-   LEN bytes.  Return LEN if C is not found.  */
-static inline size_t
-memchrcspn (char const *mem, int c, size_t len)
-{
-  char const *found = memchr (mem, c, len);
-  if (!found)
-    return len;
-
-  len = found - mem;
-  return len;
-}
-
-static void
+static inline void
 cdb_init (struct cd_buf *cdb)
 {
-  cdb->avail = cdb->buffer;
   cdb->fd = AT_FDCWD;
 }
 
-static inline bool
-cdb_empty (struct cd_buf const *cdb)
-{
-  return cdb->avail == cdb->buffer;
-}
-
 static inline int
 cdb_fchdir (struct cd_buf const *cdb)
 {
   return fchdir (cdb->fd);
 }
 
+static inline void
+cdb_free (struct cd_buf const *cdb)
+{
+  if (0 <= cdb->fd)
+    {
+      bool close_fail = close (cdb->fd);
+      assert (! close_fail);
+    }
+}
+
+/* Given a file descriptor of an open directory (or AT_FDCWD), CDB->fd,
+   try to open the CDB->fd-relative directory, DIR.  If the open succeeds,
+   update CDB->fd with the resulting descriptor, close the incoming file
+   descriptor, and return zero.  Upon failure, return -1 and set errno.  */
 static int
 cdb_advance_fd (struct cd_buf *cdb, char const *dir)
 {
@@ -111,85 +82,37 @@ cdb_advance_fd (struct cd_buf *cdb, char
        return -1;
     }
 
-  if (cdb->fd != AT_FDCWD)
-    close (cdb->fd);
+  cdb_free (cdb);
   cdb->fd = new_fd;
 
   return 0;
 }
 
-static int
-cdb_flush (struct cd_buf *cdb)
-{
-  if (cdb_empty (cdb))
-    return 0;
-
-  cdb->avail[0] = '\0';
-  if (cdb_advance_fd (cdb, cdb->buffer) != 0)
-    return -1;
-
-  cdb->avail = cdb->buffer;
-
-  return 0;
-}
-
-static void
-cdb_free (struct cd_buf *cdb)
-{
-  if (0 <= cdb->fd && close (cdb->fd) != 0)
-    abort ();
-}
-
-static int
-cdb_append (struct cd_buf *cdb, char const *s, size_t len)
-{
-  char const *end = cdb->buffer + sizeof cdb->buffer;
-
-  /* Insert a slash separator if there is a preceding byte
-     and it's not a slash.  */
-  bool need_slash = (cdb->buffer < cdb->avail && cdb->avail[-1] != '/');
-  size_t n_free;
-
-  if (sizeof cdb->buffer < len + 1)
-    {
-      /* This single component is too long.  */
-      errno = ENAMETOOLONG;
-      return -1;
-    }
-
-  /* See if there's enough room for the `/', the new component and
-     a trailing NUL.  */
-  n_free = end - cdb->avail;
-  if (n_free < need_slash + len + 1)
-    {
-      if (cdb_flush (cdb) != 0)
-       return -1;
-      need_slash = false;
-    }
-
-  if (need_slash)
-    *(cdb->avail)++ = '/';
-
-  cdb->avail = mempcpy (cdb->avail, s, len);
-  return 0;
-}
-
-/* This is a wrapper around chdir that works even on PATH_MAX-limited
-   systems.  It handles an arbitrarily long directory name by extracting
-   and processing manageable portions of the name.  On systems without
-   the openat syscall, this means changing the working directory to
-   more and more `distant' points along the long directory name and
-   then restoring the working directory.
-   If any of those attempts to change or restore the working directory
-   fails, this function exits nonzero.
-
-   Note that this function may still fail with errno == ENAMETOOLONG,
-   but only if the specified directory name contains a component that
-   is long enough to provoke such a failure all by itself (e.g. if the
-   component is longer than PATH_MAX on systems that define PATH_MAX).  */
+/* Return a pointer to the first non-slash in S.  */
+static inline char *
+find_non_slash (char const *s)
+{
+  size_t n_slash = strspn (s, "/");
+  return (char *) s + n_slash;
+}
+
+/* This is a function much like chdir, but without the PATH_MAX limitation
+   on the length of the directory name.  A significant difference is that
+   it must be able to modify (albeit only temporarily) the directory
+   name.  It handles an arbitrarily long directory name by operating
+   on manageable portions of the name.  On systems without the openat
+   syscall, this means changing the working directory to more and more
+   `distant' points along the long directory name and then restoring
+   the working directory.  If any of those attempts to save or restore
+   the working directory fails, this function exits nonzero.
+
+   Note that this function may still fail with errno == ENAMETOOLONG, but
+   only if the specified directory name contains a component that is long
+   enough to provoke such a failure all by itself (e.g. if the component
+   has length PATH_MAX or greater on systems that define PATH_MAX).  */
 
 int
-chdir_long (char const *dir)
+chdir_long (char *dir)
 {
   int e = chdir (dir);
   if (e == 0 || errno != ENAMETOOLONG)
@@ -197,70 +120,76 @@ chdir_long (char const *dir)
 
   {
     size_t len = strlen (dir);
-    char const *dir_end = dir + len;
-    char const *d;
+    char *dir_end = dir + len;
     struct cd_buf cdb;
+    size_t n_leading_slash;
 
     cdb_init (&cdb);
 
     /* If DIR is the empty string, then the chdir above
        must have failed and set errno to ENOENT.  */
     assert (0 < len);
+    assert (PATH_MAX <= len);
 
-    if (*dir == '/')
+    /* Count leading slashes.  */
+    n_leading_slash = strspn (dir, "/");
+
+    /* Handle any leading slashes as well as any name that matches
+       the regular expression, m!^//hostname[/]*! .  */
+    if (n_leading_slash == 2)
       {
-       /* Names starting with exactly two slashes followed by at least
-          one non-slash are special --
-          for example, in some environments //Hostname/file may
-          denote a file on a different host.
-          Preserve those two leading slashes.  Treat all other
-          sequences of slashes like a single one.  */
-       if (3 <= len && dir[1] == '/' && dir[2] != '/')
-         {
-           size_t name_len = 1 + strcspn (dir + 3, "/");
-           if (cdb_append (&cdb, dir, 2 + name_len) != 0)
-             goto Fail;
-           /* Advance D to next slash or to end of string. */
-           d = dir + 2 + name_len;
-           assert (*d == '/' || *d == '\0');
-         }
-       else
+       int err;
+       /* Find next slash.
+          We already know that dir[2] is neither a slash nor '\0'.  */
+       char *slash = memchr (dir + 3, '/', dir_end - (dir + 3));
+       if (slash == NULL)
          {
-           if (cdb_append (&cdb, "/", 1) != 0)
-             goto Fail;
-           d = dir + 1;
+           errno = ENAMETOOLONG;
+           return -1;
          }
+       *slash = '\0';
+       err = cdb_advance_fd (&cdb, dir);
+       *slash = '/';
+       if (err != 0)
+         goto Fail;
+       dir = find_non_slash (slash + 1);
       }
-    else
+    else if (n_leading_slash)
       {
-       d = dir;
+       if (cdb_advance_fd (&cdb, "/") != 0)
+         goto Fail;
+       dir += n_leading_slash;
       }
 
-    while (1)
+    assert (*dir != '/');
+    assert (dir <= dir_end);
+
+    while (PATH_MAX <= dir_end - dir)
       {
-       /* Skip any slashes to find start of next component --
-          or the end of DIR. */
-       char const *start = d + strspn (d, "/");
-       if (*start == '\0')
-         {
-           if (cdb_flush (&cdb) != 0)
-             goto Fail;
-           break;
-         }
-       /* If the remaining portion is no longer than PATH_MAX, then
-          flush anything that is buffered and do the rest in one chunk.  */
-       if (dir_end - start <= PATH_MAX)
+       int err;
+       /* Find a slash that is PATH_MAX or fewer bytes away from dir.
+          I.e. see if there is a slash that will give us a name of
+          length PATH_MAX-1 or less.  */
+       char *slash = memrchr (dir, '/', PATH_MAX);
+       if (slash == NULL)
          {
-           if (cdb_flush (&cdb) != 0
-               || cdb_advance_fd (&cdb, start) != 0)
-             goto Fail;
-           break;
+           errno = ENAMETOOLONG;
+           return -1;
          }
 
-       len = memchrcspn (start, '/', dir_end - start);
-       assert (len == strcspn (start, "/"));
-       d = start + len;
-       if (cdb_append (&cdb, start, len) != 0)
+       *slash = '\0';
+       assert (slash - dir < PATH_MAX);
+       err = cdb_advance_fd (&cdb, dir);
+       *slash = '/';
+       if (err != 0)
+         goto Fail;
+
+       dir = find_non_slash (slash + 1);
+      }
+
+    if (dir < dir_end)
+      {
+       if (cdb_advance_fd (&cdb, dir) != 0)
          goto Fail;
       }
 
@@ -318,16 +247,19 @@ main (int argc, char *argv[])
     error (EXIT_FAILURE, errno,
           "chdir_long failed: %s", line);
 
-  {
-    /* Using `pwd' here makes sense only if it is a robust implementation,
-       like the one in coreutils after the 2004-04-19 changes.  */
-    char const *cmd = "pwd";
-    execlp (cmd, (char *) NULL);
-    error (EXIT_FAILURE, errno, "%s", cmd);
-  }
+  if (argc <= 1)
+    {
+      /* Using `pwd' here makes sense only if it is a robust implementation,
+        like the one in coreutils after the 2004-04-19 changes.  */
+      char const *cmd = "pwd";
+      execlp (cmd, (char *) NULL);
+      error (EXIT_FAILURE, errno, "%s", cmd);
+    }
+
+  fclose (stdin);
+  fclose (stderr);
 
-  /* not reached */
-  abort ();
+  exit (EXIT_SUCCESS);
 }
 #endif
 
Index: chdir-long.h
===================================================================
RCS file: /fetish/cu/lib/chdir-long.h,v
retrieving revision 1.2
diff -u -p -r1.2 chdir-long.h
--- chdir-long.h        30 Nov 2004 14:45:28 -0000      1.2
+++ chdir-long.h        3 Jan 2005 16:53:39 -0000
@@ -31,5 +31,5 @@
 #ifndef PATH_MAX
 # define chdir_long(Dir) chdir (Dir)
 #else
-int chdir_long (char const *dir);
+int chdir_long (char *dir);
 #endif
Index: openat.c
===================================================================
RCS file: /fetish/cu/lib/openat.c,v
retrieving revision 1.5
diff -u -p -r1.5 openat.c
--- openat.c    31 Dec 2004 10:06:43 -0000      1.5
+++ openat.c    3 Jan 2005 16:53:39 -0000
@@ -89,3 +89,88 @@ rpl_openat (int fd, char const *filename
   errno = saved_errno;
   return new_fd;
 }
+
+/* Replacement for Solaris' fdopendir function.
+   FIXME: find better URL
+   <http://www.google.com/search?q=openat+site:docs.sun.com>
+   Simulate it by doing save_cwd/fchdir/opendir(".")/restore_cwd.
+   If either the save_cwd or the restore_cwd fails (relatively unlikely,
+   and usually indicative of a problem that deserves close attention),
+   then give a diagnostic and exit nonzero.
+   Otherwise, upon failure, set errno and return NULL, as fdopendir does.
+   Upon successful completion, return a DIR* pointer.  */
+DIR *
+fdopendir (int fd)
+{
+  struct saved_cwd saved_cwd;
+  int saved_errno;
+  DIR *dir;
+
+  if (fd == AT_FDCWD)
+    return opendir (".");
+
+  if (save_cwd (&saved_cwd) != 0)
+    error (exit_failure, errno,
+          _("fdopendir: unable to record current working directory"));
+
+  if (fchdir (fd) != 0)
+    {
+      saved_errno = errno;
+      free_cwd (&saved_cwd);
+      errno = saved_errno;
+      return NULL;
+    }
+
+  dir = opendir (".");
+  saved_errno = errno;
+
+  if (restore_cwd (&saved_cwd) != 0)
+    error (exit_failure, errno,
+          _("fdopendir: unable to restore working directory"));
+
+  free_cwd (&saved_cwd);
+
+  errno = saved_errno;
+  return dir;
+}
+
+/* FIXME: is `flag' of the right type?
+   FIXME: use same variable name Solaris uses. */
+int
+fstatat (int fd, char const *filename, struct stat *st, int flag)
+{
+  struct saved_cwd saved_cwd;
+  int saved_errno;
+  int err;
+
+  if (fd == AT_FDCWD)
+    return (flag == AT_SYMLINK_NOFOLLOW
+           ? lstat (filename, st)
+           : stat (filename, st));
+
+  if (save_cwd (&saved_cwd) != 0)
+    error (exit_failure, errno,
+          _("fstatat: unable to record current working directory"));
+
+  if (fchdir (fd) != 0)
+    {
+      saved_errno = errno;
+      free_cwd (&saved_cwd);
+      errno = saved_errno;
+      return -1;
+    }
+
+  err = (flag == AT_SYMLINK_NOFOLLOW
+        ? lstat (filename, st)
+        : stat (filename, st));
+  saved_errno = errno;
+
+  if (restore_cwd (&saved_cwd) != 0)
+    error (exit_failure, errno,
+          _("fstatat: unable to restore working directory"));
+
+  free_cwd (&saved_cwd);
+
+  errno = saved_errno;
+  return err;
+}
Index: openat.h
===================================================================
RCS file: /fetish/cu/lib/openat.h,v
retrieving revision 1.3
diff -u -p -r1.3 openat.h
--- openat.h    3 Dec 2004 06:44:30 -0000       1.3
+++ openat.h    3 Jan 2005 16:53:39 -0000
@@ -21,9 +21,20 @@
 # include <fcntl.h>
 #endif
 
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <unistd.h>
+
 #ifndef AT_FDCWD
 # define AT_FDCWD (-3041965) /* same value as Solaris 9 */
 
+enum
+{
+  /* FIXME: use same value Solaris uses */
+  AT_SYMLINK_NOFOLLOW = 100
+};
+
 # ifdef __OPENAT_PREFIX
 #  undef openat
 #  define __OPENAT_CONCAT(x, y) x ## y
@@ -31,5 +42,10 @@
 #  define __OPENAT_ID(y) __OPENAT_XCONCAT (__OPENAT_PREFIX, y)
 #  define openat __OPENAT_ID (openat)
 int openat (int fd, char const *filename, int flags, /* mode_t mode */ ...);
+#  define fdopendir __OPENAT_ID (fdopendir)
+DIR *fdopendir (int fd);
+#  define fstatat __OPENAT_ID (fstatat)
+int fstatat (int fd, char const *filename, struct stat *st, int flag);
 # endif
+
 #endif




reply via email to

[Prev in Thread] Current Thread [Next in Thread]