>From 81b8c4d5565dbbea10eb3561063d2e8da52148d7 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Sun, 7 Aug 2022 22:53:32 +0200 Subject: [PATCH 15/19] gnulib-tool.py: Fix section extraction from module descriptions. The code with self.content.split(section)[-1] was broken because it recognizes an indented section label. Similar code with ('\n' + self.content).split('\n' + section)[-1] would still be broken because it recognizes an indented section label in the first line of the file. The code with section_label_regex was broken because sometimes it returns the second-to-last section with the given label, not the last one. Also, whitespace after the colon was not ignored. * pygnulib/GLModuleSystem.py (GLModule.__init__): Dissect the module description's contents immediately, once only, in a reliable way. (GLModule.getDescription, GLModule.getComment): Simplify. (GLModule.getStatus): Simplify. Return a string. (GLModule.getStatuses): New function. Return a list. (GLModule.getNotice, GLModule.getApplicability, GLModule.getFiles, GLModule.getDependencies, GLModules.getAutoconfSnippet_Early, GLModules.getAutoconfSnippet, GLModule.getAutomakeSnippet_Conditional, GLModule.getInclude, GLModule.getLink, GLModule.getLicense_Raw): Simplify. (GLModule.getLicense): Remove whitespace after calling getLicense_Raw. (GLModule.getMaintainer): Simplify. (GLModuleTable.transitive_closure): Call getStatuses() instead of getStatus(). * pygnulib/GLEmiter.py: Likewise. * gnulib-tool.py (main): For --extract-description, --extract-comment, --extract-status, --extract-notice, --extract-autoconf-snippet, --extract-automake-snippet, --extract-include-directive, --extract-link-directive, --extract-maintainer, don't add an extra newline after the snippet. --- ChangeLog | 31 +++ gnulib-tool.py | 19 +- gnulib-tool.py.TODO | 1 - pygnulib/GLEmiter.py | 4 +- pygnulib/GLModuleSystem.py | 387 ++++++++++--------------------------- 5 files changed, 140 insertions(+), 302 deletions(-) diff --git a/ChangeLog b/ChangeLog index 36962c2dd2..305d02be51 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,36 @@ 2022-08-07 Bruno Haible + gnulib-tool.py: Fix section extraction from module descriptions. + The code with self.content.split(section)[-1] + was broken because it recognizes an indented section label. + Similar code with ('\n' + self.content).split('\n' + section)[-1] + would still be broken because it recognizes an indented section label + in the first line of the file. + The code with section_label_regex + was broken because sometimes it returns the second-to-last section with + the given label, not the last one. + Also, whitespace after the colon was not ignored. + * pygnulib/GLModuleSystem.py (GLModule.__init__): Dissect the module + description's contents immediately, once only, in a reliable way. + (GLModule.getDescription, GLModule.getComment): Simplify. + (GLModule.getStatus): Simplify. Return a string. + (GLModule.getStatuses): New function. Return a list. + (GLModule.getNotice, GLModule.getApplicability, GLModule.getFiles, + GLModule.getDependencies, GLModules.getAutoconfSnippet_Early, + GLModules.getAutoconfSnippet, GLModule.getAutomakeSnippet_Conditional, + GLModule.getInclude, GLModule.getLink, GLModule.getLicense_Raw): + Simplify. + (GLModule.getLicense): Remove whitespace after calling getLicense_Raw. + (GLModule.getMaintainer): Simplify. + (GLModuleTable.transitive_closure): Call getStatuses() instead of + getStatus(). + * pygnulib/GLEmiter.py: Likewise. + * gnulib-tool.py (main): For --extract-description, --extract-comment, + --extract-status, --extract-notice, --extract-autoconf-snippet, + --extract-automake-snippet, --extract-include-directive, + --extract-link-directive, --extract-maintainer, don't add an extra + newline after the snippet. + gnulib-tool.py: Improve field naming. * pygnulib/GLModuleSystem.py (GLModule): Rename field 'module' to 'path'. Fix a typo in a TypeError message. diff --git a/gnulib-tool.py b/gnulib-tool.py index 032deee6e0..0e888e6fd8 100755 --- a/gnulib-tool.py +++ b/gnulib-tool.py @@ -974,29 +974,28 @@ def main(): modules = [ modulesystem.find(module) for module in modules ] for module in modules: - print(module.getDescription()) + sys.stdout.write(module.getDescription()) elif mode == 'extract-comment': modulesystem = classes.GLModuleSystem(config) modules = [ modulesystem.find(module) for module in modules ] for module in modules: - print(module.getComment()) + sys.stdout.write(module.getComment()) elif mode == 'extract-status': modulesystem = classes.GLModuleSystem(config) modules = [ modulesystem.find(module) for module in modules ] for module in modules: - status = module.getStatus() - print('\n'.join(status)) + sys.stdout.write(module.getStatus()) elif mode == 'extract-notice': modulesystem = classes.GLModuleSystem(config) modules = [ modulesystem.find(module) for module in modules ] for module in modules: - print(module.getNotice()) + sys.stdout.write(module.getNotice()) elif mode == 'extract-applicability': modulesystem = classes.GLModuleSystem(config) @@ -1039,28 +1038,28 @@ def main(): modules = [ modulesystem.find(module) for module in modules ] for module in modules: - print(module.getAutoconfSnippet()) + sys.stdout.write(module.getAutoconfSnippet()) elif mode == 'extract-automake-snippet': modulesystem = classes.GLModuleSystem(config) modules = [ modulesystem.find(module) for module in modules ] for module in modules: - print(module.getAutomakeSnippet()) + sys.stdout.write(module.getAutomakeSnippet()) elif mode == 'extract-include-directive': modulesystem = classes.GLModuleSystem(config) modules = [ modulesystem.find(module) for module in modules ] for module in modules: - print(module.getInclude()) + sys.stdout.write(module.getInclude()) elif mode == 'extract-link-directive': modulesystem = classes.GLModuleSystem(config) modules = [ modulesystem.find(module) for module in modules ] for module in modules: - print(module.getLink()) + sys.stdout.write(module.getLink()) elif mode == 'extract-license': modulesystem = classes.GLModuleSystem(config) @@ -1074,7 +1073,7 @@ def main(): modules = [ modulesystem.find(module) for module in modules ] for module in modules: - print(module.getMaintainer()) + sys.stdout.write(module.getMaintainer()) elif mode == 'extract-tests-module': modulesystem = classes.GLModuleSystem(config) diff --git a/gnulib-tool.py.TODO b/gnulib-tool.py.TODO index a46da5e2ad..9efcda510d 100644 --- a/gnulib-tool.py.TODO +++ b/gnulib-tool.py.TODO @@ -37,7 +37,6 @@ Implement the options: Remove exit() in GLImport.py. Optimize: - - GLModuleSystem: Parse each module description only once. - os.chdir around subprocess creation -> cwd=... argument instead. -------------------------------------------------------------------------------- diff --git a/pygnulib/GLEmiter.py b/pygnulib/GLEmiter.py index e0164ed7cc..61cabc92fa 100644 --- a/pygnulib/GLEmiter.py +++ b/pygnulib/GLEmiter.py @@ -974,9 +974,9 @@ AC_DEFUN([%V1%_LIBSOURCES], [ # Skip the contents if it's entirely empty. if snippet.strip(): # Check status of the module. - status = module.getStatus() + statuses = module.getStatuses() islongrun = False - for word in status: + for word in statuses: if word == 'longrunning-test': islongrun = True break diff --git a/pygnulib/GLModuleSystem.py b/pygnulib/GLModuleSystem.py index ec2ff0c35c..32a133009a 100644 --- a/pygnulib/GLModuleSystem.py +++ b/pygnulib/GLModuleSystem.py @@ -177,14 +177,11 @@ class GLModule(object): path. GLModule can get all information about module, get its dependencies, files, etc.''' - section_label_regex = '(?:Description:|Comment:|Status:|Notice:|Applicability:|\ -Files:|Depends-on:|configure\\.ac-early:|configure\\.ac:|Makefile\\.am:|\ -Include:|Link:|License:|Maintainer:)' - section_label_pattern = \ re.compile('^(Description|Comment|Status|Notice|Applicability|' + 'Files|Depends-on|configure\\.ac-early|configure\\.ac|' - + 'Makefile\\.am|Include|Link|License|Maintainer):$') + + 'Makefile\\.am|Include|Link|License|Maintainer):$', + re.M) def __init__(self, config, path, patched=False): '''GLModule.__init__(config, path[, patched]) -> GLModule @@ -209,8 +206,20 @@ Include:|Link:|License:|Maintainer:)' self.config = config self.filesystem = GLFileSystem(self.config) self.modulesystem = GLModuleSystem(self.config) + # Read the module description file into memory. with codecs.open(path, 'rb', 'UTF-8') as file: self.content = file.read().replace('\r\n', '\n') + # Dissect it into sections. + self.sections = dict() + last_section_label = None + last_section_start = 0 + for match in GLModule.section_label_pattern.finditer(self.content): + if last_section_label != None: + self.sections[last_section_label] = self.content[last_section_start : match.start()] + last_section_label = match.group(1) + last_section_start = match.end() + 1 + if last_section_label != None: + self.sections[last_section_label] = self.content[last_section_start:] def __eq__(self, module): '''x.__eq__(y) <==> x==y''' @@ -378,117 +387,51 @@ Include:|Link:|License:|Maintainer:)' '''GLModule.getDescription() -> str Return description of the module.''' - section = 'Description:' - if 'description' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - pattern = '^%s[\t ]*(.*?)%s' % (section, GLModule.section_label_regex) - pattern = re.compile(pattern, re.S | re.M) - result = pattern.findall(self.content) - if type(result) is list: - if not result: - result = '' - else: # if result - result = result[-1] - result = result.strip() - self.cache['description'] = result - return self.cache['description'] + return self.sections.get('Description', '') def getComment(self): '''GLModule.getComment() -> str Return comment to module.''' - section = 'Comment:' - if 'comment' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - pattern = '^%s[\t ]*(.*?)%s' % (section, GLModule.section_label_regex) - pattern = re.compile(pattern, re.S | re.M) - result = pattern.findall(self.content) - if type(result) is list: - if not result: - result = '' - else: # if result - result = result[-1] - result = result.strip() - self.cache['comment'] = result - return self.cache['comment'] + return self.sections.get('Comment', '') def getStatus(self): '''GLModule.getStatus() -> str Return module status.''' - section = 'Status:' - if 'status' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - result = [ part.strip() - for part in parts - if part.strip() ] - self.cache['status'] = list(result) - return list(self.cache['status']) + return self.sections.get('Status', '') + + def getStatuses(self): + '''GLModule.getStatuses() -> list + + Return module status.''' + if 'statuses' not in self.cache: + snippet = self.getStatus() + result = [ line.strip() + for line in snippet.split('\n') + if line.strip() ] + self.cache['statuses'] = result + return self.cache['statuses'] def getNotice(self): '''GLModule.getNotice() -> str Return notice to module.''' - section = 'Notice:' - if 'notice' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - result = ''.join(parts) - self.cache['notice'] = result - return self.cache['notice'] + return self.sections.get('Notice', '') def getApplicability(self): '''GLModule.getApplicability() -> str Return applicability of module.''' - section = 'Applicability:' if 'applicability' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - parts = [ part.strip() - for part in parts ] - result = ''.join(parts) - if not result.strip(): - if self.getName().endswith('-tests'): + result = self.sections.get('Applicability', '') + result = result.strip() + if not result: + # The default is 'main' or 'tests', depending on the module's name. + if self.isTests(): result = 'tests' - else: # if not self.getName().endswith('-tests') + else: result = 'main' - result = result.strip() self.cache['applicability'] = result return self.cache['applicability'] @@ -497,115 +440,56 @@ Include:|Link:|License:|Maintainer:)' Return list of files. GLConfig: ac_version.''' - ac_version = self.config['ac_version'] - section = 'Files:' - result = list() if 'files' not in self.cache: - if section not in self.content: - result = list() - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - result = [ part.strip() - for part in parts - if part.strip() ] - result += [joinpath('m4', '00gnulib.m4')] - result += [joinpath('m4', 'zzgnulib.m4')] - result += [joinpath('m4', 'gnulib-common.m4')] - self.cache['files'] = list(result) - return list(self.cache['files']) + snippet = self.sections.get('Files', '') + result = [ line.strip() + for line in snippet.split('\n') + if line.strip() ] + result.append(joinpath('m4', '00gnulib.m4')) + result.append(joinpath('m4', 'zzgnulib.m4')) + result.append(joinpath('m4', 'gnulib-common.m4')) + self.cache['files'] = result + return self.cache['files'] def getDependencies(self): '''GLModule.getDependencies() -> list Return list of dependencies. GLConfig: localpath.''' - result = list() - section = 'Depends-on:' if 'dependencies' not in self.cache: - if section not in self.content: - depmodules = list() - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - modules = ''.join(parts) - modules = [ line - for line in modules.split('\n') - if line.strip() ] - modules = [ module - for module in modules - if not module.startswith('#') ] - for line in modules: - split = [ part - for part in line.split(' ') - if part.strip() ] - if len(split) == 1: - module = line.strip() - condition = None - else: # if len(split) != 1 - module = split[0] - condition = split[1] - result += [tuple([self.modulesystem.find(module), condition])] + snippet = self.sections.get('Depends-on', '') + modules = [ line.strip() + for line in snippet.split('\n') + if line.strip() ] + modules = [ module + for module in modules + if not module.startswith('#') ] + result = list() + for line in modules: + split = [ part + for part in line.split(' ') + if part.strip() ] + if len(split) == 1: + module = line.strip() + condition = None + else: # if len(split) != 1 + module = split[0] + condition = split[1] + result += [tuple([self.modulesystem.find(module), condition])] self.cache['dependencies'] = result - return list(self.cache['dependencies']) + return self.cache['dependencies'] def getAutoconfSnippet_Early(self): '''GLModule.getAutoconfSnippet_Early() -> str Return autoconf-early snippet.''' - section = 'configure.ac-early:' - if 'autoconf-early' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - result = ''.join(parts) - self.cache['autoconf-early'] = result - return self.cache['autoconf-early'] + return self.sections.get('configure.ac-early', '') def getAutoconfSnippet(self): '''GLModule.getAutoconfSnippet() -> str Return autoconf snippet.''' - section = 'configure.ac:' - if 'autoconf' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - result = ''.join(parts) - self.cache['autoconf'] = result - return self.cache['autoconf'] + return self.sections.get('configure.ac', '') def getAutomakeSnippet(self): '''getAutomakeSnippet() -> str @@ -625,23 +509,7 @@ Include:|Link:|License:|Maintainer:)' '''GLModule.getAutomakeSnippet_Conditional() -> str Return conditional automake snippet.''' - section = 'Makefile.am:' - if 'makefile-conditional' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - result = ''.join(parts) - self.cache['makefile-conditional'] = result - return self.cache['makefile-conditional'] + return self.sections.get('Makefile.am', '') def getAutomakeSnippet_Unconditional(self): '''GLModule.getAutomakeSnippet_Unconditional() -> str @@ -718,24 +586,10 @@ Include:|Link:|License:|Maintainer:)' '''GLModule.getInclude() -> str Return include directive.''' - section = 'Include:' if 'include' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - result = ''.join(parts) - result = result.strip() - pattern = re.compile('^(["<].*[>"])', re.M) - result = pattern.sub('#include \\1', result) + snippet = self.sections.get('Include', '') + pattern = re.compile('^(["<])', re.M) + result = pattern.sub('#include \\1', snippet) self.cache['include'] = result return self.cache['include'] @@ -743,64 +597,36 @@ Include:|Link:|License:|Maintainer:)' '''GLModule.getLink() -> str Return link directive.''' - section = 'Link:' - if 'link' not in self.cache: - parts = list() - if section in self.content: - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - parts = [ part.strip() - for part in parts - if part.strip() ] - # result = ' '.join(parts) - self.cache['link'] = parts - return self.cache['link'] - - def getLicense(self): - '''GLModule.getLicense(self) -> str - - Get license and warn user if module lacks a license.''' - if str(self) == 'parse-datetime': - # This module is under a weaker license only for the purpose of some - # users who hand-edit it and don't use gnulib-tool. For the regular - # gnulib users they are under a stricter license. - return 'GPL' - else: - license = self.getLicense_Raw() - if not self.isTests(): - if not license: - if self.config['errors']: - raise GLError(18, str(self)) - else: # if not self.config['errors'] - sys.stderr.write('gnulib-tool: warning: module %s lacks a license\n' % str(self)) - if not license: - license = 'GPL' - return license + return self.sections.get('Link', '') def getLicense_Raw(self): '''GLModule.getLicense_Raw() -> str Return module license.''' - section = 'License:' + return self.sections.get('License', '') + + def getLicense(self): + '''GLModule.getLicense(self) -> str + + Get license and warn user if module lacks a license.''' if 'license' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - pattern = '^%s[\t ]*(.*?)%s' % (section, GLModule.section_label_regex) - pattern = re.compile(pattern, re.S | re.M) - result = pattern.findall(self.content) - if type(result) is list: - if not result: - result = '' - else: # if result - result = result[-1] - result = result.strip() + result = None + if str(self) == 'parse-datetime': + # This module is under a weaker license only for the purpose of some + # users who hand-edit it and don't use gnulib-tool. For the regular + # gnulib users they are under a stricter license. + result = 'GPL' + else: + license = self.getLicense_Raw().strip() + if not self.isTests(): + if not license: + if self.config['errors']: + raise GLError(18, str(self)) + else: # if not self.config['errors'] + sys.stderr.write('gnulib-tool: warning: module %s lacks a license\n' % str(self)) + if not license: + license = 'GPL' + result = license self.cache['license'] = result return self.cache['license'] @@ -808,24 +634,7 @@ Include:|Link:|License:|Maintainer:)' '''GLModule.getMaintainer() -> str Return maintainer directive.''' - section = 'Maintainer:' - if 'maintainer' not in self.cache: - if section not in self.content: - result = '' - else: # if section in self.content - snippet = self.content.split(section)[-1] - lines = [ '%s\n' % line - for line in snippet.split('\n') ] - parts = list() - for line in lines: - findflag = GLModule.section_label_pattern.findall(line) - if findflag: - break - parts += [line] - result = ''.join(parts) - result = result.strip() - self.cache['maintainer'] = result - return self.cache['maintainer'] + return self.sections.get('Maintainer', '') #=============================================================================== @@ -1002,8 +811,8 @@ class GLModuleTable(object): conditions += [None] for depmodule in depmodules: include = True - status = depmodule.getStatus() - for word in status: + statuses = depmodule.getStatuses() + for word in statuses: if word == 'obsolete': if not self.config.checkInclTestCategory(TESTS['obsolete']): include = False -- 2.34.1