bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 4/4] gnulib-tool: faster string handling for Posix shells.


From: Bruno Haible
Subject: Re: [PATCH 4/4] gnulib-tool: faster string handling for Posix shells.
Date: Fri, 2 Jan 2009 02:26:52 +0100
User-agent: KMail/1.9.9

Hello Ralf,

20% is a good speedup that you achieve here by use of shell built-ins instead of
subprocesses and sed invocations.

> * gnulib-tool (func_strip): New function.

You only ever need to strip the prefix _or_ the suffix, never both
simultaneously. Therefore it will be faster to define 2 functions.

> +# func_strip name prefix suffix

This function is insufficiently documented: What restrictions apply to
'name', 'prefix', 'suffix'?

> +if ( eval 'foo=bar; test "${foo%r}" = ba' ) >/dev/null 2>&1; then
> +  func_strip ()
> +  {
> +    stripped=$1
> +    stripped=${stripped#$2}
> +    stripped=${stripped%$3}
> +  }

I don't much like functions which assign to a particular variable: it makes
it hard to remember how to use the function.

> @@ -1495,11 +1516,15 @@ func_get_automake_snippet ()
>          | sed -n -e "$sed_extract_mentioned_files" | sed -e 's/#.*//'`
>        func_get_filelist "$mymodule"
>        all_files=$module_files
> -      lib_files=`for f in $all_files; do \
> -                   case $f in \
> -                     lib/*) echo $f ;; \
> -                   esac; \
> -                 done | sed -e 's,^lib/,,'`
> +      lib_files=
> +      for f in $all_files; do
> +        case $f in
> +          lib/*)
> +            func_strip "$f" lib/ ''
> +            func_append lib_files "$nl$stripped"
> +            ;;
> +        esac
> +      done
>        # Remove $already_mentioned_files from $lib_files.
>        echo "$lib_files" | LC_ALL=C sort -u > "$tmp"/lib-files
>        extra_files=`func_reset_sigpipe; \

This introduces an extra blank line in "$tmp"/lib-files. It doesn't matter
in the end, but nevertheless.

> @@ -1524,36 +1549,47 @@ func_get_automake_snippet ()
>        case "$mymodule" in
>          relocatable-prog-wrapper) ;;
>          *)
> -          sed_extract_c_files='/\.c$/p'
> -          extra_files=`echo "$extra_files" | sed -n -e 
> "$sed_extract_c_files"`
> -          if test -n "$extra_files"; then
> -            set x $extra_files
> +           extra_c_files=
> +           for f in $extra_files; do
> +             case $f in
> +               *.c)
> +                 func_append extra_c_files " $f" ;;
> +             esac
> +           done
> +          if test -n "$extra_c_files"; then
> +            set x $extra_c_files
>              shift

This hunk is not well indented. Also, I don't find this code easier to 
understand:
it uses string manipulations instead of a filtering 'sed' command, and consumes
more lines of code.

Despite the general objections against string processing in bash that I listed
in the other mail, I'm applying this modified patch. It reduces the total
complexity of the code to an amount that will hopefully increase maintainability
instead of reducing it.

The speedup I saw (on the m4 1.4 branch, like you did):
before:

real    0m32.802s
user    0m11.053s
sys     0m21.693s

real    0m33.114s
user    0m11.149s
sys     0m21.773s

real    0m32.823s
user    0m11.145s
sys     0m21.657s

after:

real    0m29.970s
user    0m10.469s
sys     0m19.457s

real    0m29.715s
user    0m10.573s
sys     0m18.941s



2009-01-01  Ralf Wildenhues  <address@hidden>
            Bruno Haible  <address@hidden>

        Speed up gnulib-tool by doing more string processing through shell
        built-ins.
        * gnulib-tool (fast_func_append): New variable.
        (func_remove_prefix, func_remove_suffix): New functions.
        (fast_func_remove_prefix, fast_func_remove_suffix): New variables.
        (func_filter_filelist): New function.
        (func_get_dependencies): Use func_remove_suffix instead of sed.
        (func_get_automake_snippet): Use func_filter_filelist instead of a
        subshell and sed invocation.

--- gnulib-tool.orig    2009-01-02 02:19:59.000000000 +0100
+++ gnulib-tool 2009-01-02 01:32:56.000000000 +0100
@@ -398,11 +398,57 @@
   {
     eval "$1+=\"\$2\""
   }
+  fast_func_append=true
 else
   func_append ()
   {
     eval "$1=\"\$$1\$2\""
   }
+  fast_func_append=false
+fi
+
+# func_remove_prefix var prefix
+# removes the given prefix from the value of the shell variable var.
+# var should be the name of a shell variable.
+# Its value should not contain a newline and not start or end with whitespace.
+# prefix should not contain the characters "$`\{}|.
+if ( foo=bar; eval 'test "${foo#b}" = ar' ) >/dev/null 2>&1; then
+  func_remove_prefix ()
+  {
+    eval "$1=\${$1#\$2}"
+  }
+  fast_func_remove_prefix=true
+else
+  func_remove_prefix ()
+  {
+    eval "value=\"\$$1\""
+    prefix="$2"
+    value=`echo "$value" | sed -e "s|^${prefix}||"`
+    eval "$1=\"\$value\""
+  }
+  fast_func_remove_prefix=false
+fi
+
+# func_remove_suffix var suffix
+# removes the given suffix from the value of the shell variable var.
+# var should be the name of a shell variable.
+# Its value should not contain a newline and not start or end with whitespace.
+# suffix should not contain the characters "$`\{}|.
+if ( foo=bar; eval 'test "${foo%r}" = ba' ) >/dev/null 2>&1; then
+  func_remove_suffix ()
+  {
+    eval "$1=\${$1%\$2}"
+  }
+  fast_func_remove_suffix=true
+else
+  func_remove_suffix ()
+  {
+    eval "value=\"\$$1\""
+    suffix="$2"
+    value=`echo "$value" | sed -e "s|${suffix}\$||"`
+    eval "$1=\"\$value\""
+  }
+  fast_func_remove_suffix=false
 fi
 
 # func_fatal_error message
@@ -1328,13 +1374,63 @@
   esac
 }
 
+# func_filter_filelist outputvar separator filelist prefix suffix 
removed_prefix removed_suffix [added_prefix [added_suffix]]
+# stores in outputvar the filtered and processed filelist. Filtering: Only the
+# elements starting with prefix and ending with suffix are considered.
+# Processing: removed_prefix and removed_suffix are removed from each element,
+# added_prefix and added_suffix are added to each element.
+# removed_prefix, removed_suffix should not contain the characters "$`\{}|.
+# added_prefix, added_suffix should not contain the characters \|.
+func_filter_filelist ()
+{
+  if test "$2" != "$nl" \
+     || { $fast_func_append \
+          && { test -z "$6" || $fast_func_remove_prefix; } \
+          && { test -z "$7" || $fast_func_remove_suffix; }; \
+        }; then
+    ffflist=
+    for fff in $3; do
+      case "$fff" in
+        "$4"*"$5")
+          if test -n "$6"; then
+            func_remove_prefix fff "$6"
+          fi
+          if test -n "$7"; then
+            func_remove_suffix fff "$7"
+          fi
+          fff="$8${fff}$9"
+          if test -z "$ffflist"; then
+            ffflist="${fff}"
+          else
+            func_append ffflist "$2${fff}"
+          fi
+          ;;
+      esac
+    done
+  else
+    sed_fff_filter="s|^$6\(.*\)$7\\$|$8\\1$9|"
+    ffflist=`for fff in $3; do
+               case "$fff" in
+                 "$4"*"$5") echo "$fff" ;;
+               esac
+             done | sed -e "$sed_fff_filter"`
+  fi
+  eval "$1=\"\$ffflist\""
+}
+
 # func_get_dependencies module
 # Input:
 # - local_gnulib_dir  from --local-dir
 func_get_dependencies ()
 {
   # ${module}-tests always implicitly depends on ${module}.
-  echo "$1" | sed -n -e 's/-tests$//p'
+  case "$1" in
+    *-tests)
+      fgd1="$1"
+      func_remove_suffix fgd1 '-tests'
+      echo "$fgd1"
+      ;;
+  esac
   # Then the explicit dependencies listed in the module description.
   func_lookup_file "modules/$1"
   sed -n -e "/^Depends-on$sed_extract_prog" < "$lookedup_file"
@@ -1370,11 +1466,7 @@
       # *-tests module live in tests/, not lib/.
       # Synthesize an EXTRA_DIST augmentation.
       all_files=`func_get_filelist $1`
-      tests_files=`for f in $all_files; do \
-                     case $f in \
-                       tests/*) echo $f ;; \
-                     esac; \
-                   done | sed -e 's,^tests/,,'`
+      func_filter_filelist tests_files " " "$all_files" 'tests/' '' 'tests/' ''
       extra_files="$tests_files"
       if test -n "$extra_files"; then
         echo "EXTRA_DIST +=" $extra_files
@@ -1396,11 +1488,7 @@
         | sed -e "$sed_combine_lines" \
         | sed -n -e "$sed_extract_mentioned_files" | sed -e 's/#.*//'`
       all_files=`func_get_filelist $1`
-      lib_files=`for f in $all_files; do \
-                   case $f in \
-                     lib/*) echo $f ;; \
-                   esac; \
-                 done | sed -e 's,^lib/,,'`
+      func_filter_filelist lib_files "$nl" "$all_files" 'lib/' '' 'lib/' ''
       # Remove $already_mentioned_files from $lib_files.
       echo "$lib_files" | LC_ALL=C sort -u > "$tmp"/lib-files
       extra_files=`func_reset_sigpipe; \
@@ -1424,8 +1512,7 @@
       case "$1" in
         relocatable-prog-wrapper) ;;
         *)
-          sed_extract_c_files='/\.c$/p'
-          extra_files=`echo "$extra_files" | sed -n -e "$sed_extract_c_files"`
+          func_filter_filelist extra_files "$nl" "$extra_files" '' '.c' '' ''
           if test -n "$extra_files"; then
             echo "EXTRA_lib_SOURCES +=" $extra_files
             echo
@@ -1433,22 +1520,14 @@
           ;;
       esac
       # Synthesize an EXTRA_DIST augmentation also for the files in build-aux/.
-      buildaux_files=`for f in $all_files; do \
-                        case $f in \
-                          build-aux/*) echo $f ;; \
-                        esac; \
-                      done | sed -e 's,^build-aux/,,'`
+      func_filter_filelist buildaux_files "$nl" "$all_files" 'build-aux/' '' 
'build-aux/' ''
       if test -n "$buildaux_files"; then
         sed_prepend_auxdir='s,^,$(top_srcdir)/'"$auxdir"'/,'
         echo "EXTRA_DIST += "`echo "$buildaux_files" | sed -e 
"$sed_prepend_auxdir"`
         echo
       fi
       # Synthesize an EXTRA_DIST augmentation also for the files from top/.
-      top_files=`for f in $all_files; do \
-                   case $f in \
-                     top/*) echo $f ;; \
-                   esac; \
-                 done | sed -e 's,^top/,,'`
+      func_filter_filelist top_files "$nl" "$all_files" 'top/' '' 'top/' ''
       if test -n "$top_files"; then
         sed_prepend_topdir='s,^,$(top_srcdir)/,'
         echo "EXTRA_DIST += "`echo "$top_files" | sed -e "$sed_prepend_topdir"`




reply via email to

[Prev in Thread] Current Thread [Next in Thread]