>From 72ac023526e84e7847cc67788be9afca2f3204fb Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 7 Aug 2022 17:02:21 +0200 Subject: [PATCH 11/19] gnulib-tool.py: Implement option --find. * pygnulib/GLModuleSystem.py (GLModuleSystem.file_is_module): New method. (GLModuleSystem.list): Filter the listing in memory; don't use a 'sed' subprocess. * gnulib-tool.py (main): Handle mode 'find'. --- ChangeLog | 7 +++++ gnulib-tool.py | 60 +++++++++++++++++++++++++++++++++++++- gnulib-tool.py.TODO | 5 +++- pygnulib/GLModuleSystem.py | 59 +++++++++++++++++-------------------- 4 files changed, 97 insertions(+), 34 deletions(-) diff --git a/ChangeLog b/ChangeLog index e259a09333..acb82e903c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2022-08-07 Bruno Haible + gnulib-tool.py: Implement option --find. + * pygnulib/GLModuleSystem.py (GLModuleSystem.file_is_module): New + method. + (GLModuleSystem.list): Filter the listing in memory; don't use a 'sed' + subprocess. + * gnulib-tool.py (main): Handle mode 'find'. + gnulib-tool: Fix option --find in combination with option --local-dir. * gnulib-tool (func_prefixed_modules_in_dir): New function. (find): Use it, and filter the directory names away after the 'grep' diff --git a/gnulib-tool.py b/gnulib-tool.py index bb763a2245..032deee6e0 100755 --- a/gnulib-tool.py +++ b/gnulib-tool.py @@ -43,15 +43,18 @@ import codecs import random import argparse import subprocess as sp +import shlex from tempfile import mktemp from pygnulib import constants from pygnulib import classes +from pygnulib import GLError #=============================================================================== # Define global constants #=============================================================================== APP = constants.APP +DIRS = constants.DIRS ENCS = constants.ENCS UTILS = constants.UTILS MODES = constants.MODES @@ -422,6 +425,7 @@ def main(): # Determine when user tries to combine modes. args = [ cmdargs.mode_list, + cmdargs.mode_find, cmdargs.mode_import, cmdargs.mode_add_import, cmdargs.mode_remove_import, @@ -460,6 +464,9 @@ def main(): files = None if cmdargs.mode_list != None: mode = 'list' + if cmdargs.mode_find != None: + mode = 'find' + files = list(cmdargs.non_option_arguments) if cmdargs.mode_import != None: mode = 'import' modules = list(cmdargs.non_option_arguments) @@ -701,13 +708,64 @@ def main(): ) # Work in the given mode. - if mode in ['list']: + if mode == 'list': modulesystem = classes.GLModuleSystem(config) listing = modulesystem.list() result = '\n'.join(listing) os.rmdir(config['tempdir']) print(result) + elif mode == 'find': + # Prepare GLModuleSystem.find to throw an exception. + config.setErrors(True) + modulesystem = classes.GLModuleSystem(config) + for filename in files: + if (isfile(joinpath(DIRS['root'], filename)) + or (localpath != None + and any([ isfile(joinpath(localdir, filename)) + for localdir in localpath ]))): + # Convert the file name to a POSIX basic regex. + # Needs to handle . [ \ * ^ $. + filename_regex = filename.replace('\\', '\\\\').replace('[', '\\[').replace('^', '\\^') + filename_regex = re.compile('([.*$])').sub('[\\1]', filename_regex) + filename_line_regex = '^' + filename_regex + '$' + # Read module candidates from gnulib root directory. + command = "find modules -type f -print | xargs -n 100 grep -l %s /dev/null | sed -e 's,^modules/,,'" % shlex.quote(filename_line_regex) + os.chdir(constants.DIRS['root']) + with sp.Popen(command, shell=True, stdout=sp.PIPE) as proc: + result = proc.stdout.read().decode("UTF-8") + os.chdir(DIRS['cwd']) + # Read module candidates from local directories. + if localpath != None and len(localpath) > 0: + command = "find modules -type f -print | xargs -n 100 grep -l %s /dev/null | sed -e 's,^modules/,,' -e 's,\\.diff$,,'" % shlex.quote(filename_line_regex) + for localdir in localpath: + os.chdir(localdir) + with sp.Popen(command, shell=True, stdout=sp.PIPE) as proc: + result += proc.stdout.read().decode("UTF-8") + os.chdir(DIRS['cwd']) + listing = [ line + for line in result.split('\n') + if line.strip() ] + # Remove modules/ prefix from each file name. + pattern = re.compile('^modules/') + listing = [ pattern.sub('', line) + for line in listing ] + # Filter out undesired file names. + listing = [ line + for line in listing + if modulesystem.file_is_module(line) ] + module_candidates = sorted(set(listing)) + for module in module_candidates: + try: + if filename in modulesystem.find(module).getFiles(): + print(module) + except GLError: + # Ignore module candidates that don't actually exist. + pass + else: + message = '%s: warning: file %s does not exist\n' % (constants.APP['name'], filename) + sys.stderr.write(message) + elif mode in ['import', 'add-import', 'remove-import', 'update']: mode = MODES[mode] if not destdir: diff --git a/gnulib-tool.py.TODO b/gnulib-tool.py.TODO index 53915662b8..a46da5e2ad 100644 --- a/gnulib-tool.py.TODO +++ b/gnulib-tool.py.TODO @@ -20,7 +20,6 @@ Inline all 'sed' invocations. -------------------------------------------------------------------------------- Implement the options: - --find --extract-recursive-dependencies --extract-recursive-link-directive --extract-tests-module @@ -37,6 +36,10 @@ Implement the options: Remove exit() in GLImport.py. +Optimize: + - GLModuleSystem: Parse each module description only once. + - os.chdir around subprocess creation -> cwd=... argument instead. + -------------------------------------------------------------------------------- commit 76c7703cb2e9e0e803d1296618d8ab9e86e13d6c diff --git a/pygnulib/GLModuleSystem.py b/pygnulib/GLModuleSystem.py index 75316515b8..3383b22b28 100644 --- a/pygnulib/GLModuleSystem.py +++ b/pygnulib/GLModuleSystem.py @@ -43,6 +43,7 @@ DIRS = constants.DIRS ENCS = constants.ENCS TESTS = constants.TESTS joinpath = constants.joinpath +subend = constants.subend isdir = os.path.isdir isfile = os.path.isfile filter_filelist = constants.filter_filelist @@ -113,6 +114,18 @@ class GLModuleSystem(object): sys.stderr.write('gnulib-tool: warning: ') sys.stderr.write('file %s does not exist\n' % str(module)) + def file_is_module(self, filename): + '''Given the name of a file in the modules/ directory, return true + if should be viewed as a module description file.''' + return not (filename == 'ChangeLog' or filename.endswith('/ChangeLog') + or filename == 'COPYING' or filename.endswith('/COPYING') + or filename == 'README' or filename.endswith('/README') + or filename == 'TEMPLATE' + or filename == 'TEMPLATE-EXTENDED' + or filename == 'TEMPLATE-TESTS' + or filename.startswith('.') + or filename.endswith('~')) + def list(self): '''GLModuleSystem.list() -> list @@ -123,23 +136,6 @@ class GLModuleSystem(object): listing = list() localpath = self.config['localpath'] find_args = ['find', 'modules', '-type', 'f', '-print'] - sed_args = \ - [ - 'sed', - '-e', r's,^modules/,,', - '-e', r'/^ChangeLog$/d', - '-e', r'/\/ChangeLog$/d', - '-e', r'/^COPYING$/d', - '-e', r'/\/COPYING$/d', - '-e', r'/^README$/d', - '-e', r'/\/README$/d', - '-e', r'/^TEMPLATE$/d', - '-e', r'/^TEMPLATE-EXTENDED$/d', - '-e', r'/^TEMPLATE-TESTS$/d', - '-e', r'/^\..*/d', - '-e', r'/~$/d', - '-e', r'/-tests$/d', - ] # Read modules from gnulib root directory. os.chdir(constants.DIRS['root']) @@ -154,24 +150,23 @@ class GLModuleSystem(object): find = sp.Popen(find_args, stdout=sp.PIPE) result += find.stdout.read().decode("UTF-8") os.chdir(DIRS['cwd']) - sed_args += ['-e', r's,\.diff$,,'] - - # Save the list of the modules to file. - path = joinpath(self.config['tempdir'], 'list') - with codecs.open(path, 'wb', 'UTF-8') as file: - file.write(result) - - # Filter the list of the modules. - stdin = codecs.open(path, 'rb', 'UTF-8') - sed = sp.Popen(sed_args, stdin=stdin, stdout=sp.PIPE) - result = sed.stdout.read().decode("UTF-8") - stdin.close() - os.remove(path) + listing = [ line for line in result.split('\n') if line.strip() ] - listing = sorted(set(listing)) - return listing + if len(localpath) > 0: + listing = [ subend('.diff', '', line) + for line in listing ] + # Remove modules/ prefix from each file name. + pattern = re.compile('^modules/') + listing = [ pattern.sub('', line) + for line in listing ] + # Filter out undesired file names. + listing = [ line + for line in listing + if self.file_is_module(line) and not line.endswith('-tests') ] + modules = sorted(set(listing)) + return modules #=============================================================================== -- 2.34.1