bug-texinfo
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 1/3] Re-add simple_parser


From: Gavin Smith
Subject: [PATCH 1/3] Re-add simple_parser
Date: Sat, 25 Nov 2023 17:31:56 +0000

* tp/Texinfo/ParserNonXS.pm, tp/Texinfo/XS/parsetexi/Parsetexi.pm
(parser, simple_parser): Re-add simple_parser, used for document
translations.  The only difference from before is that we
do not share the indices between parsers.
* tp/Texinfo/Translations.pm (replace_convert_substrings):
Call simple_parser again.

This reverses the change on 2023-08-10.
---
 ChangeLog                            |  13 +++
 tp/Texinfo/ParserNonXS.pm            | 114 +++++++++++++++++++++------
 tp/Texinfo/Translations.pm           |   2 +-
 tp/Texinfo/XS/parsetexi/Parsetexi.pm |   4 +
 4 files changed, 107 insertions(+), 26 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 318e84f2e5..b50d9056e8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2023-11-25  Gavin Smith <gavinsmith0123@gmail.com>
+
+       Re-add simple_parser
+
+       * tp/Texinfo/ParserNonXS.pm, tp/Texinfo/XS/parsetexi/Parsetexi.pm
+       (parser, simple_parser): Re-add simple_parser, used for document
+       translations.  The only difference from before is that we
+       do not share the indices between parsers.
+       * tp/Texinfo/Translations.pm (replace_convert_substrings):
+       Call simple_parser again.
+
+       This reverses the change on 2023-08-10.
+
 2023-11-24  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Common.pm (parse_node_manual), tp/Texinfo/Convert/HTML.pm
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index 85b9a165bd..6a1a7ac2df 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -572,31 +572,8 @@ sub parser(;$$)
   my $parser = dclone(\%parser_default_configuration);
   bless $parser;
 
-  $parser->{'set'} = {};
-  if (defined($conf)) {
-    foreach my $key (keys(%$conf)) {
-      if (exists($parser_settable_configuration{$key})) {
-        # we keep registrar instead of copying on purpose, to reuse the object
-        if ($key ne 'values' and $key ne 'registrar' and ref($conf->{$key})) {
-          $parser->{$key} = dclone($conf->{$key});
-        } else {
-          $parser->{$key} = $conf->{$key};
-        }
-        if ($initialization_overrides{$key}) {
-          $parser->{'set'}->{$key} = $parser->{$key};
-        }
-      } else {
-        warn "ignoring parser configuration value \"$key\"\n";
-      }
-    }
-  }
-  # restrict variables found by get_conf, and set the values to the
-  # parser initialization values only.  What is found in the document
-  # has no effect.
-  foreach my $key 
(keys(%Texinfo::Common::default_parser_customization_values)) {
-    $parser->{'conf'}->{$key} = $parser->{$key};
-  }
-
+  _setup_conf($parser, $conf);
+  # This is not very useful in perl, but mimics the XS parser
   print STDERR "!!!!!!!!!!!!!!!! RESETTING THE PARSER !!!!!!!!!!!!!!!!!!!!!\n"
     if ($parser->{'DEBUG'});
 
@@ -611,6 +588,7 @@ sub parser(;$$)
   $parser->{'close_paragraph_commands'} = {%default_close_paragraph_commands};
   $parser->{'close_preformatted_commands'} = {%close_preformatted_commands};
 
+  # following is common with simple_parser
   # other initializations
   $parser->{'definfoenclose'} = {};
   $parser->{'source_mark_counters'} = {};
@@ -637,6 +615,66 @@ sub parser(;$$)
   return $parser;
 }
 
+# simple parser initialization.  The only difference with a regular parser
+# is that the dynamical @-commands groups and indices information references
+# that are initialized in each regular parser are initialized once for all
+# and shared among simple parsers.  It is used in gdt() and this has a sizable
+# effect on performance.
+my $simple_parser_line_commands = dclone(\%line_commands);
+my $simple_parser_brace_commands = dclone(\%brace_commands);
+my $simple_parser_valid_nestings = dclone(\%default_valid_nestings);
+my $simple_parser_no_paragraph_commands = {%default_no_paragraph_commands};
+my $simple_parser_index_names = dclone(\%index_names);
+my $simple_parser_command_index = {%command_index};
+my $simple_parser_close_paragraph_commands = 
{%default_close_paragraph_commands};
+my $simple_parser_close_preformatted_commands = {%close_preformatted_commands};
+sub simple_parser(;$)
+{
+  my $conf = shift;
+
+  my $parser = dclone(\%parser_default_configuration);
+  bless $parser;
+
+  _setup_conf($parser, $conf);
+  # This is not very useful in perl, but mimics the XS parser
+  print STDERR "!!!!!!!!!!!!!!!! RESETTING THE PARSER !!!!!!!!!!!!!!!!!!!!!\n"
+    if ($parser->{'DEBUG'});
+
+  $parser->{'line_commands'} = $simple_parser_line_commands;
+  $parser->{'brace_commands'} = $simple_parser_brace_commands;
+  $parser->{'valid_nestings'} = $simple_parser_valid_nestings;
+  $parser->{'no_paragraph_commands'} = $simple_parser_no_paragraph_commands;
+  #$parser->{'index_names'} = $simple_parser_index_names;
+  $parser->{'index_names'} = dclone(\%index_names);
+  $parser->{'command_index'} = $simple_parser_command_index;
+  $parser->{'close_paragraph_commands'} = 
$simple_parser_close_paragraph_commands;
+  $parser->{'close_preformatted_commands'} = 
$simple_parser_close_preformatted_commands;
+
+  # other initializations
+  $parser->{'definfoenclose'} = {};
+  $parser->{'source_mark_counters'} = {};
+  $parser->{'nesting_context'} = {%nesting_context_init};
+  $parser->{'nesting_context'}->{'basic_inline_stack'} = [];
+  $parser->{'nesting_context'}->{'basic_inline_stack_on_line'} = [];
+  $parser->{'nesting_context'}->{'basic_inline_stack_block'} = [];
+  $parser->{'nesting_context'}->{'regions_stack'} = [];
+  $parser->{'basic_inline_commands'} = {%default_basic_inline_commands};
+
+  $parser->_init_context_stack();
+
+  # turn the array to a hash for speed.  Not sure it really matters for such
+  # a small array.
+  foreach my $expanded_format(@{$parser->{'EXPANDED_FORMATS'}}) {
+    $parser->{'expanded_formats_hash'}->{$expanded_format} = 1;
+  }
+
+  if (not defined($parser->{'registrar'})) {
+    $parser->{'registrar'} = Texinfo::Report::new();
+  }
+
+  return $parser;
+}
+
 sub get_conf($$)
 {
   my ($self, $var) = @_;
@@ -935,6 +973,32 @@ sub registered_errors($)
 
 sub _setup_conf($$)
 {
+  my ($parser, $conf) = @_;
+
+  $parser->{'set'} = {};
+  if (defined($conf)) {
+    foreach my $key (keys(%$conf)) {
+      if (exists($parser_settable_configuration{$key})) {
+        # we keep registrar instead of copying on purpose, to reuse the object
+        if ($key ne 'values' and $key ne 'registrar' and ref($conf->{$key})) {
+          $parser->{$key} = dclone($conf->{$key});
+        } else {
+          $parser->{$key} = $conf->{$key};
+        }
+        if ($initialization_overrides{$key}) {
+          $parser->{'set'}->{$key} = $parser->{$key};
+        }
+      } else {
+        warn "ignoring parser configuration value \"$key\"\n";
+      }
+    }
+  }
+  # restrict variables found by get_conf, and set the values to the
+  # parser initialization values only.  What is found in the document
+  # has no effect.
+  foreach my $key 
(keys(%Texinfo::Common::default_parser_customization_values)) {
+    $parser->{'conf'}->{$key} = $parser->{$key};
+  }
 }
 
 # Following are the internal parsing subroutines.  The most important are
diff --git a/tp/Texinfo/Translations.pm b/tp/Texinfo/Translations.pm
index f9329499c0..c91689a82c 100644
--- a/tp/Texinfo/Translations.pm
+++ b/tp/Texinfo/Translations.pm
@@ -398,7 +398,7 @@ sub replace_convert_substrings($$;$)
     #  }
     #}
   }
-  my $parser = Texinfo::Parser::parser($parser_conf);
+  my $parser = Texinfo::Parser::simple_parser($parser_conf);
 
   if ($customization_information->get_conf('DEBUG')) {
     print STDERR "IN TR PARSER '$texinfo_line'\n";
diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.pm 
b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
index 1ced996e7a..7e3d9fe23b 100644
--- a/tp/Texinfo/XS/parsetexi/Parsetexi.pm
+++ b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
@@ -76,6 +76,10 @@ sub get_conf($$)
   return $self->{'conf'}->{$var};
 }
 
+sub simple_parser {
+  goto &parser;
+}
+
 # Initialize the parser
 sub parser (;$$)
 {
-- 
2.34.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]