texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Mon, 6 Mar 2023 12:44:23 -0500 (EST)

branch: master
commit a357baf0efe054564ab04b199c05630dfc1c7a0c
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Mon Mar 6 17:34:49 2023 +0100

    * tp/Texinfo/ParserNonXS.pm (_parse_texi_regex)
    (_process_remaining_on_line), tp/Texinfo/XS/MiscXS.xs
    (xs_parse_texi_regex), tp/Texinfo/XS/misc.c (xs_parse_texi_regex),
    tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line),
    tp/Texinfo/XS/parsetexi/separator.c (handle_separator): handle
    separately the different separators (except for menu only separators)
    instead of grouping them, directly in process_remaining_on_line.
    Remove the handle_separator function.
---
 ChangeLog                           | 11 ++++++
 tp/Texinfo/ParserNonXS.pm           | 75 +++++++++++++++++++++----------------
 tp/Texinfo/XS/MiscXS.xs             | 33 +++++++++++-----
 tp/Texinfo/XS/misc.c                | 44 ++++++++++++----------
 tp/Texinfo/XS/miscxs.h              |  3 ++
 tp/Texinfo/XS/parsetexi/parser.c    | 63 ++++++++++++++++++++++++-------
 tp/Texinfo/XS/parsetexi/parser.h    |  5 ++-
 tp/Texinfo/XS/parsetexi/separator.c | 50 -------------------------
 8 files changed, 156 insertions(+), 128 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 45a352cc09..b80224f020 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2023-03-06  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/ParserNonXS.pm (_parse_texi_regex)
+       (_process_remaining_on_line), tp/Texinfo/XS/MiscXS.xs
+       (xs_parse_texi_regex), tp/Texinfo/XS/misc.c (xs_parse_texi_regex),
+       tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line),
+       tp/Texinfo/XS/parsetexi/separator.c (handle_separator): handle
+       separately the different separators (except for menu only separators)
+       instead of grouping them, directly in process_remaining_on_line.
+       Remove the handle_separator function.
+
 2023-03-06  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Common.pm (_copy_tree, _substitute_references_in_array)
diff --git a/tp/Texinfo/ParserNonXS.pm b/tp/Texinfo/ParserNonXS.pm
index f476013d4d..e3b9018e5b 100644
--- a/tp/Texinfo/ParserNonXS.pm
+++ b/tp/Texinfo/ParserNonXS.pm
@@ -4471,27 +4471,28 @@ sub _parse_texi_regex {
   my ($line) = @_;
 
   # REMACRO
-  my ($at_command, $open_brace, $asterisk, $single_letter_command,
-      $separator_match, $menu_separator, $misc_text)
+  my ($at_command, $open_brace, $close_brace, $comma,
+      $asterisk, $single_letter_command, $arobase,
+      $form_feed, $menu_only_separator, $misc_text)
     = ($line =~ /^\@([[:alnum:]][[:alnum:]-]*)
                 |^(\{)
+                |^(\})
+                |^(,)
                 |^(\*)
                 |^\@(["'~\@&\}\{,\.!\? \t\n\*\-\^`=:\|\/\\])
-                |^([{}@,\f])
+                |^(@)
+                |^(\f)
                 |^([:\t.])
                 |^([^{}@,:\t.\n\f]+)
                 /x);
 
-  if ($open_brace) {
-    $separator_match = $open_brace;
-  } elsif ($asterisk) {
+  if ($asterisk) {
     ($misc_text) = ($line =~ /^([^{}@,:\t.\n\f]+)/);
-  } elsif ($separator_match and $separator_match eq ',') {
-    $menu_separator = $separator_match;
   }
 
-  return ($at_command, $open_brace, $asterisk, $single_letter_command,
-    $separator_match, $menu_separator, $misc_text);
+  return ($at_command, $open_brace, $close_brace, $comma,
+    $asterisk, $single_letter_command, $arobase,
+    $form_feed, $menu_only_separator, $misc_text);
 }
 
 sub _check_line_directive {
@@ -5797,8 +5798,11 @@ sub _process_remaining_on_line($$$$)
 
   my $at_command_length;
   my @line_parsing = _parse_texi_regex($line);
-  my ($at_command, $open_brace, $asterisk, $single_letter_command,
-      $separator_match, $menu_separator, $misc_text) = @line_parsing;
+  my ($at_command, $open_brace, $close_brace, $comma, $asterisk,
+      $single_letter_command, $arobase, $form_feed, $menu_only_separator,
+      $misc_text) = @line_parsing;
+  my $menu_separator = $comma;
+  $menu_separator = $menu_only_separator if (!$comma);
   print STDERR "PARSED: "
     .join(', ',map {!defined($_) ? 'UNDEF' : "'$_'"} @line_parsing)."\n"
        if ($self->{'DEBUG'} and $self->{'DEBUG'} > 3);
@@ -6058,7 +6062,8 @@ sub _process_remaining_on_line($$$$)
       $current = $current->{'parent'};
     }
   } elsif (_handle_menu_entry_separators($self, \@current_array_for_ref,
-                        \$line, $source_info, $asterisk, $menu_separator)) {
+                                         \$line, $source_info, $asterisk,
+                                         $menu_separator)) {
     $current = $current_array_for_ref[0];
   # Any other @-command.
   } elsif ($command) {
@@ -6199,15 +6204,8 @@ sub _process_remaining_on_line($$$$)
       $command_element->{'info'} = {} if (!$command_element->{'info'});
       $command_element->{'info'}->{'alias_of'} = $from_alias;
     }
-  } elsif ($separator_match) {
-    my $separator = $separator_match;
+  } elsif ($open_brace) {
     substr ($line, 0, 1) = '';
-    print STDERR "SEPARATOR: $separator\n" if ($self->{'DEBUG'});
-    if ($separator eq '@') {
-      # this may happen with a @ at the very end of a file, therefore
-      # not followed by anything.
-      $self->_line_error(__("unexpected \@"), $source_info);
-    } elsif ($separator eq '{') {
       # handle_open_brace in XS parser
       if ($current->{'cmdname'}
            and defined($self->{'brace_commands'}->{$current->{'cmdname'}})) {
@@ -6357,7 +6355,8 @@ sub _process_remaining_on_line($$$$)
         $self->_line_error(sprintf(__("misplaced {")), $source_info);
       }
 
-    } elsif ($separator eq '}') {
+  } elsif ($close_brace) {
+    substr ($line, 0, 1) = '';
       # handle_close_brace in XS parser
 
       # For footnote and caption closing, when there is a paragraph inside.
@@ -6609,9 +6608,10 @@ sub _process_remaining_on_line($$$$)
       } else {
         $self->_line_error(sprintf(__("misplaced }")), $source_info);
       }
-    } elsif ($separator eq ','
-             and $current->{'parent'}
-             and $current->{'parent'}->{'remaining_args'}) {
+  } elsif ($comma) {
+    substr ($line, 0, 1) = '';
+    if ($current->{'parent'}
+        and $current->{'parent'}->{'remaining_args'}) {
       # handle_comma in XS parser
       _abort_empty_line($self, $current);
       _isolate_last_space($self, $current);
@@ -6772,28 +6772,37 @@ sub _process_remaining_on_line($$$$)
                           'extra' => {'spaces_associated_command' => $current}
                          };
       push @{$current->{'contents'}}, $space_before;
-    } elsif ($separator eq ',' and $current->{'type'}
+    } elsif ($current->{'type'}
              and $current->{'type'} eq 'line_arg'
              and $current->{'parent'}->{'cmdname'}
              and $current->{'parent'}->{'cmdname'} eq 'node') {
       $self->_line_warn(__("superfluous arguments for node"), $source_info);
-    } elsif ($separator eq "\f" and $current->{'type'}
-             and $current->{'type'} eq 'paragraph') {
+    } else {
+      $current = _merge_text($self, $current, $comma);
+    }
+  } elsif ($form_feed) {
+    substr ($line, 0, 1) = '';
+    if ($current->{'type'}
+        and $current->{'type'} eq 'paragraph') {
       # A form feed stops and restart a paragraph.
       $current = _end_paragraph($self, $current, $source_info);
-      my $line_feed = {'type' => 'empty_line', 'text' => $separator,
+      my $line_feed = {'type' => 'empty_line', 'text' => $form_feed,
                        'parent' => $current };
       push @{$current->{'contents'}}, $line_feed;
       my $empty_line = { 'type' => 'empty_line', 'text' => '',
                          'parent' => $current };
       push @{$current->{'contents'}}, $empty_line;
     } else {
-      $current = _merge_text($self, $current, $separator);
+      $current = _merge_text($self, $current, $form_feed);
     }
-  # need to be after as , is in common with separators
-  } elsif ($menu_separator) {
+  } elsif ($arobase) {
+    substr ($line, 0, 1) = '';
+    # this may happen with a @ at the very end of a file, therefore
+    # not followed by anything.
+    $self->_line_error(__("unexpected \@"), $source_info);
+  } elsif ($menu_only_separator) {
     substr ($line, 0, 1) = '';
-    $current = _merge_text($self, $current, $menu_separator);
+    $current = _merge_text($self, $current, $menu_only_separator);
   # Misc text except end of line
   } elsif (defined $misc_text) {
     print STDERR "MISC TEXT: $misc_text\n" if ($self->{'DEBUG'});
diff --git a/tp/Texinfo/XS/MiscXS.xs b/tp/Texinfo/XS/MiscXS.xs
index 4223eec88b..a8c58cc539 100644
--- a/tp/Texinfo/XS/MiscXS.xs
+++ b/tp/Texinfo/XS/MiscXS.xs
@@ -105,16 +105,20 @@ xs_parse_texi_regex (text)
   PREINIT:
      char *at_command;
      char *open_brace;
+     char *close_brace;
+     char *comma;
      char *asterisk;
      char *single_letter_command;
      char *separator_match;
-     char *menu_separator;
+     char *arobase;
+     char *form_feed;
+     char *menu_only_separator;
      char *new_text;
   PPCODE:
-     xs_parse_texi_regex(text, &at_command, &open_brace, &asterisk, 
-                         &single_letter_command, &separator_match,
-                         &menu_separator, &new_text);
-     EXTEND(SP,6);
+     xs_parse_texi_regex(text, &at_command, &open_brace, &close_brace,
+                         &comma, &asterisk, &single_letter_command,
+                         &arobase, &form_feed, &menu_only_separator, 
&new_text);
+     EXTEND(SP,9);
      PUSHs(sv_newmortal());
      sv_setpv((SV*)ST(0), at_command);
      SvUTF8_on(ST(0));
@@ -122,20 +126,29 @@ xs_parse_texi_regex (text)
      sv_setpv((SV*)ST(1), open_brace);
      SvUTF8_on(ST(1));
      PUSHs(sv_newmortal());
-     sv_setpv((SV*)ST(2), asterisk);
+     sv_setpv((SV*)ST(2), close_brace);
      SvUTF8_on(ST(2));
      PUSHs(sv_newmortal());
-     sv_setpv((SV*)ST(3), single_letter_command);
+     sv_setpv((SV*)ST(3), comma);
      SvUTF8_on(ST(3));
      PUSHs(sv_newmortal());
-     sv_setpv((SV*)ST(4), separator_match);
+     sv_setpv((SV*)ST(4), asterisk);
      SvUTF8_on(ST(4));
      PUSHs(sv_newmortal());
-     sv_setpv((SV*)ST(5), menu_separator);
+     sv_setpv((SV*)ST(5), single_letter_command);
      SvUTF8_on(ST(5));
      PUSHs(sv_newmortal());
-     sv_setpv((SV*)ST(6), new_text);
+     sv_setpv((SV*)ST(6), arobase);
      SvUTF8_on(ST(6));
+     PUSHs(sv_newmortal());
+     sv_setpv((SV*)ST(7), form_feed);
+     SvUTF8_on(ST(7));
+     PUSHs(sv_newmortal());
+     sv_setpv((SV*)ST(8), menu_only_separator);
+     SvUTF8_on(ST(8));
+     PUSHs(sv_newmortal());
+     sv_setpv((SV*)ST(9), new_text);
+     SvUTF8_on(ST(9));
 
 SV *
 xs_default_format_protect_text (self, text_in)
diff --git a/tp/Texinfo/XS/misc.c b/tp/Texinfo/XS/misc.c
index de7fc5cb30..f582a81ee2 100644
--- a/tp/Texinfo/XS/misc.c
+++ b/tp/Texinfo/XS/misc.c
@@ -281,15 +281,17 @@ xs_entity_text (char *text)
   return new;
 }
 
-/* Return list ($at_command, $open_brace, $asterisk, $single_letter_command,
-       $separator_match) */
+/* Return list ($at_command, $open_brace, ....) */
 void xs_parse_texi_regex (SV *text_in,
                           char **at_command,
                           char **open_brace,
+                          char **close_brace,
+                          char **comma,
                           char **asterisk,
                           char **single_letter_command,
-                          char **separator_match,
-                          char **menu_separator,
+                          char **arobase,
+                          char **form_feed,
+                          char **menu_only_separator,
                           char **new_text)
 {
   char *text;
@@ -301,8 +303,9 @@ void xs_parse_texi_regex (SV *text_in,
     sv_utf8_upgrade (text_in);
   text = SvPV_nolen (text_in);
 
-  *at_command = *open_brace = *asterisk = *single_letter_command
-          = *separator_match = *menu_separator = *new_text = 0;
+  *at_command = *open_brace = *close_brace = *comma = *asterisk
+     = *single_letter_command = *arobase = *form_feed
+          = *menu_only_separator = *new_text = 0;
 
   if (*text == '@' && isalnum(text[1]))
     {
@@ -324,9 +327,16 @@ void xs_parse_texi_regex (SV *text_in,
       if (*text == '{')
         {
           *open_brace = "{";
-          *separator_match = "{";
+        }
+      else if (*text == '}')
+        {
+          *close_brace = "}";
         }
 
+      else if (*text == ',')
+        {
+          *comma = ",";
+        }
       else if (*text == '@'
                  && text[1] && strchr ("([\"'~@&}{,.!?"
                                        " \t\n"
@@ -338,25 +348,21 @@ void xs_parse_texi_regex (SV *text_in,
           a[0] = text[1];
           a[1] = '\0';
         }
-
-      else if (strchr ("{}@,\f", *text))
+      else if (strchr (":\t.", *text))
         {
           static char a[2];
-          *separator_match = a;
-          if (*text == ',')
-            *menu_separator = a;
+          *menu_only_separator = a;
           a[0] = *text;
           a[1] = '\0';
         }
-
-      else if (strchr (":\t.", *text))
+      else if (*text == '\f')
         {
-          static char a[2];
-          *menu_separator = a;
-          a[0] = *text;
-          a[1] = '\0';
+          *form_feed = "\f";
+        }
+      else if (*text == '@')
+        {
+          *arobase = "@";
         }
-
       else
         {
           char *p;
diff --git a/tp/Texinfo/XS/miscxs.h b/tp/Texinfo/XS/miscxs.h
index 8dea52bffa..696c61e304 100644
--- a/tp/Texinfo/XS/miscxs.h
+++ b/tp/Texinfo/XS/miscxs.h
@@ -2,6 +2,9 @@ char *xs_unicode_text (char *, int);
 char *xs_entity_text (char *);
 char *xs_process_text (char *text);
 void xs_parse_texi_regex (SV *text,
+                          char **,
+                          char **,
+                          char **,
                           char **,
                           char **,
                           char **,
diff --git a/tp/Texinfo/XS/parsetexi/parser.c b/tp/Texinfo/XS/parsetexi/parser.c
index ea37eefc6f..55c166f02c 100644
--- a/tp/Texinfo/XS/parsetexi/parser.c
+++ b/tp/Texinfo/XS/parsetexi/parser.c
@@ -1618,7 +1618,9 @@ process_remaining_on_line (ELEMENT **current_inout, char 
**line_inout)
   /* There are cases when we need more input, but we don't want to
      get it in the top-level loop in parse_texi - this is mostly
      (always?) when we don't want to start a new, empty line, and
-     need to get more from the current, incomplete line of input. */
+     need to get more from the current, incomplete line of input.
+     Also, this ensures that the line cannot be empty in parsing below
+   */
   while (*line == '\0')
     {
       static char *allocated_text;
@@ -1949,7 +1951,7 @@ process_remaining_on_line (ELEMENT **current_inout, char 
**line_inout)
     /* special case for accent commands, use following character except @
      * as argument */
       else if ((command_flags(current) & CF_accent)
-               && *line != '\0' && *line != '@')
+               && *line != '@')
         {
           ELEMENT *e, *e2;
           debug ("ACCENT following_arg");
@@ -2219,30 +2221,65 @@ process_remaining_on_line (ELEMENT **current_inout, 
char **line_inout)
                                command_name (from_alias));
         }
     }
-  /* "Separator" character */
-  else if (*line != '\0' && strchr ("{}@,\f", *line))
+  /* "Separator" characters */
+  else if (*line == '{')
+    {
+      char separator = *line++;
+      current = handle_open_brace (current, &line);
+    }
+  else if (*line == '}')
+    {
+      char separator = *line++;
+      current = handle_close_brace (current, &line);
+    }
+  else if (*line == ',')
     {
       char separator = *line++;
-      debug ("SEPARATOR: %c", separator);
-      if (separator == '@')
-        line_error ("unexpected @");
+      /* comma as a command argument separator */
+      if (counter_value (&count_remaining_args, current->parent) > 0)
+        current = handle_comma (current, &line);
+      else if (current->type == ET_line_arg && current->parent->cmd == CM_node)
+        line_warn ("superfluous arguments for node");
       else
-        current = handle_separator (current, separator, &line);
+        current = merge_text (current, ",", 0);
     }
-  else if (*line != '\0' && strchr (":\t.", *line))
+  else if (strchr (":\t.", *line))
     {
-      /* merge menu separator (other than comma, done with other separators) */
+      /* merge menu separator (other than comma) */
       char separator = *line++;
       char t[2];
       t[0] = separator;
       t[1] = '\0';
       current = merge_text (current, t, 0);
     }
+  else if (*line == '@')
+    {
+      char separator = *line++;
+      line_error ("unexpected @");
+    }
+  else if (*line == '\f')
+    {
+      char separator = *line++;
+      if (current->type == ET_paragraph)
+        {
+          ELEMENT *e;
+
+          /* A form feed stops and restarts a paragraph. */
+          current = end_paragraph (current, 0, 0);
+          e = new_element (ET_empty_line);
+          text_append_n (&e->text, "\f", 1);
+          add_to_element_contents (current, e);
+          e = new_element (ET_empty_line);
+          add_to_element_contents (current, e);
+        }
+      else
+       current = merge_text (current, "\f", 0);
+    }
   /* "Misc text except end of line." */
-  else if (*line && *line != '\n')
+  else if (*line != '\n')
     {
       size_t len;
-      
+
       /* Output until next command, separator or newline. */
       {
         char saved; /* TODO: Have a length argument to merge_text? */
@@ -2253,8 +2290,6 @@ process_remaining_on_line (ELEMENT **current_inout, char 
**line_inout)
         line += len;
         *line = saved;
       }
-
-      goto funexit;
     }
   else /*  End of line */
     {
diff --git a/tp/Texinfo/XS/parsetexi/parser.h b/tp/Texinfo/XS/parsetexi/parser.h
index c2751a7601..e6a63ef179 100644
--- a/tp/Texinfo/XS/parsetexi/parser.h
+++ b/tp/Texinfo/XS/parsetexi/parser.h
@@ -141,8 +141,9 @@ extern size_t floats_space;
 
 
 /* In separator.c */
-ELEMENT *handle_separator (ELEMENT *current, char separator,
-                           char **line_inout);
+ELEMENT * handle_open_brace (ELEMENT *current, char **line_inout);
+ELEMENT * handle_close_brace (ELEMENT *current, char **line_inout);
+ELEMENT * handle_comma (ELEMENT *current, char **line_inout);
 
 /* In parser.c */
 typedef struct {
diff --git a/tp/Texinfo/XS/parsetexi/separator.c 
b/tp/Texinfo/XS/parsetexi/separator.c
index 738295dde3..47fc2d7080 100644
--- a/tp/Texinfo/XS/parsetexi/separator.c
+++ b/tp/Texinfo/XS/parsetexi/separator.c
@@ -747,53 +747,3 @@ funexit:
   return current;
 }
 
-/* Actions to be taken when a special character appears in the input. */
-ELEMENT *
-handle_separator (ELEMENT *current, char separator, char **line_inout)
-{
-  char *line = *line_inout;
-
-  if (separator == '{')
-    {
-      current = handle_open_brace (current, &line);
-    }
-  else if (separator == '}')
-    {
-      current = handle_close_brace (current, &line);
-    }
-  /* If a comma is seen after all the arguments for the command have been
-     read, it is included in the last argument. */
-  else if (separator == ','
-           && counter_value (&count_remaining_args, current->parent) > 0)
-    {
-      current = handle_comma (current, &line);
-    }
-  else if (separator == ',' && current->type == ET_line_arg
-           && current->parent->cmd == CM_node)
-    {
-      line_warn ("superfluous arguments for node");
-    }
-  else if (separator == '\f' && current->type == ET_paragraph)
-    {
-      ELEMENT *e;
-
-      /* A form feed stops and restarts a paragraph. */
-      current = end_paragraph (current, 0, 0);
-      e = new_element (ET_empty_line);
-      text_append_n (&e->text, "\f", 1);
-      add_to_element_contents (current, e);
-      e = new_element (ET_empty_line);
-      add_to_element_contents (current, e);
-    }
-  else
-    {
-      /* Default - merge the character as usual. */
-      char t[2];
-      t[0] = separator;
-      t[1] = '\0';
-      current = merge_text (current, t, 0);
-    }
-
-  *line_inout = line;
-  return current;
-}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]