bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: fnmatch has exponential running time


From: Jakub Jelinek
Subject: Re: fnmatch has exponential running time
Date: Tue, 27 Mar 2007 12:06:28 +0200
User-agent: Mutt/1.4.2.2i

On Thu, Mar 22, 2007 at 09:21:37PM +0100, Bruno Haible wrote:
> fnmatch() has a worst-case complexity O(m*n) where m is the size of the
> pattern and n is the size of the sample string. Unfortunately glibc has
> chosen an implementation with exponential running time.
> 
> $ mkdir foo
> $ cd foo
> $ touch 
> aaaabbbbccccddddeeeeffffgggghhhhiiiijjjjkkkkllllmmmmnnnnooooppppqqqqrrrrssssttttuuuuvvvvwwwwxxxxyyyy
> $ time find . -name 'a*b*d*e*f*g*h*i*j*k*l*m*n*o*p*q*z*'
> real    4m50.007s
> user    4m44.002s
> sys     0m0.056s

The following patch ought to fix it.
When recursing for * handling it will stop when it hits next normal *,
tell the caller what the current pattern and string pointers were
and return to the caller which will restart from that place.
I have also removed no_leading_period2 variables, since there is no need
to preserve no_leading_period variable.  Either the code will always
return without ever looking at that var again (that was the only case
before this patch), or it will reinitialize it from what the recursive
call passed up.

2007-03-27  Jakub Jelinek  <address@hidden>

        * posix/fnmatch.c (STRUCT): Define.
        (fnmatch): Pass NULL as last argument to internal_fn{,w}match.
        * posix/fnmatch_loop.c (struct STRUCT): New type.
        (FCT): Add ends argument.  If ends != NULL and normal * is
        seen in the pattern, store current pattern and string pointers
        and return.  Adjust recursive calls.
        (EXT): Adjust FCT callers.
        (STRUCT): Undef at the end of the file.
        * posix/Makefile (tests): Add tst-fnmatch2.
        * posix/tst-fnmatch2.c: New test.

--- libc/posix/fnmatch.c.jj     2005-03-30 06:17:47.000000000 +0200
+++ libc/posix/fnmatch.c        2007-03-27 10:31:25.000000000 +0200
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003
+/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2007
        Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
@@ -209,6 +209,7 @@ __wcschrnul (s, c)
 # define FCT   internal_fnmatch
 # define EXT   ext_match
 # define END   end_pattern
+# define STRUCT        fnmatch_struct
 # define L(CS) CS
 # ifdef _LIBC
 #  define BTOWC(C)     __btowc (C)
@@ -235,7 +236,8 @@ __wcschrnul (s, c)
 #  define INT  wint_t
 #  define FCT  internal_fnwmatch
 #  define EXT  ext_wmatch
-# define END   end_wpattern
+#  define END  end_wpattern
+#  define STRUCT fnwmatch_struct
 #  define L(CS)        L##CS
 #  define BTOWC(C)     (C)
 #  define STRLEN(S) __wcslen (S)
@@ -397,12 +399,12 @@ fnmatch (pattern, string, flags)
        }
 
       return internal_fnwmatch (wpattern, wstring, wstring + n,
-                               flags & FNM_PERIOD, flags);
+                               flags & FNM_PERIOD, flags, NULL);
     }
 # endif  /* mbstate_t and mbsrtowcs or _LIBC.  */
 
   return internal_fnmatch (pattern, string, string + strlen (string),
-                          flags & FNM_PERIOD, flags);
+                          flags & FNM_PERIOD, flags, NULL);
 }
 
 # ifdef _LIBC
--- libc/posix/fnmatch_loop.c.jj        2005-10-15 00:44:42.000000000 +0200
+++ libc/posix/fnmatch_loop.c   2007-03-27 11:47:12.000000000 +0200
@@ -1,5 +1,5 @@
-/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005
-   Free Software Foundation, Inc.
+/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005,
+   2007 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -17,10 +17,18 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+struct STRUCT
+{
+  const CHAR *pattern;
+  const CHAR *string;
+  int no_leading_period;
+};
+
 /* Match STRING against the filename pattern PATTERN, returning zero if
    it matches, nonzero if not.  */
 static int FCT (const CHAR *pattern, const CHAR *string,
-               const CHAR *string_end, int no_leading_period, int flags)
+               const CHAR *string_end, int no_leading_period, int flags,
+               struct STRUCT *ends)
      internal_function;
 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
                const CHAR *string_end, int no_leading_period, int flags)
@@ -29,12 +37,13 @@ static const CHAR *END (const CHAR *patt
 
 static int
 internal_function
-FCT (pattern, string, string_end, no_leading_period, flags)
+FCT (pattern, string, string_end, no_leading_period, flags, ends)
      const CHAR *pattern;
      const CHAR *string;
      const CHAR *string_end;
      int no_leading_period;
      int flags;
+     struct STRUCT *ends;
 {
   register const CHAR *p = pattern, *n = string;
   register UCHAR c;
@@ -97,6 +106,13 @@ FCT (pattern, string, string_end, no_lea
              if (res != -1)
                return res;
            }
+         else if (ends != NULL)
+           {
+             ends->pattern = p - 1;
+             ends->string = n;
+             ends->no_leading_period = no_leading_period;
+             return 0;
+           }
 
          if (n != string_end && *n == L('.') && no_leading_period)
            return FNM_NOMATCH;
@@ -157,7 +173,9 @@ FCT (pattern, string, string_end, no_lea
          else
            {
              const CHAR *endp;
+             struct STRUCT end;
 
+             end.pattern = NULL;
              endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
                             string_end - n);
              if (endp == NULL)
@@ -170,36 +188,46 @@ FCT (pattern, string, string_end, no_lea
                {
                  int flags2 = ((flags & FNM_FILE_NAME)
                                ? flags : (flags & ~FNM_PERIOD));
-                 int no_leading_period2 = no_leading_period;
 
-                 for (--p; n < endp; ++n, no_leading_period2 = 0)
-                   if (FCT (p, n, string_end, no_leading_period2, flags2)
-                       == 0)
-                     return 0;
+                 for (--p; n < endp; ++n, no_leading_period = 0)
+                   if (FCT (p, n, string_end, no_leading_period, flags2,
+                            &end) == 0)
+                     goto found;
                }
              else if (c == L('/') && (flags & FNM_FILE_NAME))
                {
                  while (n < string_end && *n != L('/'))
                    ++n;
                  if (n < string_end && *n == L('/')
-                     && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
-                         == 0))
+                     && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
+                              NULL) == 0))
                    return 0;
                }
              else
                {
                  int flags2 = ((flags & FNM_FILE_NAME)
                                ? flags : (flags & ~FNM_PERIOD));
-                 int no_leading_period2 = no_leading_period;
 
                  if (c == L('\\') && !(flags & FNM_NOESCAPE))
                    c = *p;
                  c = FOLD (c);
-                 for (--p; n < endp; ++n, no_leading_period2 = 0)
+                 for (--p; n < endp; ++n, no_leading_period = 0)
                    if (FOLD ((UCHAR) *n) == c
-                       && (FCT (p, n, string_end, no_leading_period2, flags2)
-                           == 0))
-                     return 0;
+                       && (FCT (p, n, string_end, no_leading_period, flags2,
+                                &end) == 0))
+                     {
+                     found:
+                       if (end.pattern == NULL)
+                         return 0;
+                       break;
+                     }
+                 if (end.pattern != NULL)
+                   {
+                     p = end.pattern;
+                     n = end.string;
+                     no_leading_period = end.no_leading_period;
+                     continue;
+                   }
                }
            }
 
@@ -1098,7 +1126,7 @@ EXT (INT opt, const CHAR *pattern, const
   switch (opt)
     {
     case L('*'):
-      if (FCT (p, string, string_end, no_leading_period, flags) == 0)
+      if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
        return 0;
       /* FALLTHROUGH */
 
@@ -1109,7 +1137,8 @@ EXT (INT opt, const CHAR *pattern, const
            /* First match the prefix with the current pattern with the
               current pattern.  */
            if (FCT (list->str, string, rs, no_leading_period,
-                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
+                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+                    NULL) == 0
                /* This was successful.  Now match the rest with the rest
                   of the pattern.  */
                && (FCT (p, rs, string_end,
@@ -1117,7 +1146,7 @@ EXT (INT opt, const CHAR *pattern, const
                         ? no_leading_period
                         : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
                         flags & FNM_FILE_NAME
-                        ? flags : flags & ~FNM_PERIOD) == 0
+                        ? flags : flags & ~FNM_PERIOD, NULL) == 0
                    /* This didn't work.  Try the whole pattern.  */
                    || (rs != string
                        && FCT (pattern - 1, rs, string_end,
@@ -1126,7 +1155,7 @@ EXT (INT opt, const CHAR *pattern, const
                                : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
                                   ? 1 : 0),
                                flags & FNM_FILE_NAME
-                               ? flags : flags & ~FNM_PERIOD) == 0)))
+                               ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
              /* It worked.  Signal success.  */
              return 0;
        }
@@ -1136,7 +1165,7 @@ EXT (INT opt, const CHAR *pattern, const
       return FNM_NOMATCH;
 
     case L('?'):
-      if (FCT (p, string, string_end, no_leading_period, flags) == 0)
+      if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
        return 0;
       /* FALLTHROUGH */
 
@@ -1148,7 +1177,8 @@ EXT (INT opt, const CHAR *pattern, const
           pattern list.  */
        if (FCT (STRCAT (list->str, p), string, string_end,
                 no_leading_period,
-                flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
+                flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+                NULL) == 0)
          /* It worked.  Signal success.  */
          return 0;
       while ((list = list->next) != NULL);
@@ -1163,7 +1193,8 @@ EXT (INT opt, const CHAR *pattern, const
 
          for (runp = list; runp != NULL; runp = runp->next)
            if (FCT (runp->str, string, rs,  no_leading_period,
-                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
+                    flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+                    NULL) == 0)
              break;
 
          /* If none of the patterns matched see whether the rest does.  */
@@ -1172,8 +1203,8 @@ EXT (INT opt, const CHAR *pattern, const
                       rs == string
                       ? no_leading_period
                       : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
-                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
-                 == 0))
+                      flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
+                      NULL) == 0))
            /* This is successful.  */
            return 0;
        }
@@ -1198,6 +1229,7 @@ EXT (INT opt, const CHAR *pattern, const
 #undef FCT
 #undef EXT
 #undef END
+#undef STRUCT
 #undef MEMPCPY
 #undef MEMCHR
 #undef STRCOLL
--- libc/posix/Makefile.jj      2007-02-26 18:13:45.000000000 +0100
+++ libc/posix/Makefile 2007-03-27 11:07:05.000000000 +0200
@@ -90,7 +90,7 @@ tests         := tstgetopt testfnm runtests run
                   tst-execv1 tst-execv2 tst-execl1 tst-execl2 \
                   tst-execve1 tst-execve2 tst-execle1 tst-execle2 \
                   tst-execvp3 tst-execvp4 tst-rfc3484 tst-rfc3484-2 \
-                  tst-getaddrinfo3
+                  tst-getaddrinfo3 tst-fnmatch2
 xtests         := bug-ga2
 ifeq (yes,$(build-shared))
 test-srcs      := globtest
--- libc/posix/tst-fnmatch2.c.jj        2007-03-27 11:06:37.000000000 +0200
+++ libc/posix/tst-fnmatch2.c   2007-03-27 11:55:15.000000000 +0200
@@ -0,0 +1,35 @@
+#include <fnmatch.h>
+#include <stdio.h>
+
+int
+do_test (void)
+{
+  char pattern[] = "a*b*c*d*e*f*g*h*i*j*k*l*m*n*o*p*q*r*s*t*u*v*w*x*y*z*";
+  const char *string = "aaaabbbbccccddddeeeeffffgggghhhhiiiijjjjkkkkllllmmmm"
+                      "nnnnooooppppqqqqrrrrssssttttuuuuvvvvwwwwxxxxyyyy";
+  if (fnmatch (pattern, string, 0) != FNM_NOMATCH)
+    {
+      puts ("First fnmatch didn't return FNM_NOMATCH");
+      return 1;
+    }
+  pattern[(sizeof pattern) - 3] = '*';
+  if (fnmatch (pattern, string, 0) != 0)
+    {
+      puts ("Second fnmatch didn't return 0");
+      return 1;
+    }
+  if (fnmatch ("a*b/*", "abbb/.x", FNM_PATHNAME | FNM_PERIOD) != FNM_NOMATCH)
+    {
+      puts ("Third fnmatch didn't return FNM_NOMATCH");
+      return 1;
+    }
+  if (fnmatch ("a*b/*", "abbb/xy", FNM_PATHNAME | FNM_PERIOD) != 0)
+    {
+      puts ("Fourth fnmatch didn't return 0");
+      return 1;
+    }
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"

        Jakub




reply via email to

[Prev in Thread] Current Thread [Next in Thread]