texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: Protect spaces in space attributes in XML specifi


From: Patrice Dumas
Subject: branch master updated: Protect spaces in space attributes in XML specific code
Date: Thu, 01 Dec 2022 04:58:39 -0500

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 8eaa609444 Protect spaces in space attributes in XML specific code
8eaa609444 is described below

commit 8eaa609444dec0b2b5f2fd1fa82f65b57824ce02
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Thu Dec 1 10:58:30 2022 +0100

    Protect spaces in space attributes in XML specific code
    
    * tp/Texinfo/Convert/TexinfoMarkup.pm,
    tp/Texinfo/Convert/TexinfoXML.pm (_xml_attributes): protect spaces
    in space attributes in format specific code.
---
 ChangeLog                           |  8 ++++++++
 tp/Texinfo/Convert/TexinfoMarkup.pm | 31 +++++--------------------------
 tp/Texinfo/Convert/TexinfoSXML.pm   |  2 +-
 tp/Texinfo/Convert/TexinfoXML.pm    | 13 +++++++++++--
 4 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index e2eef9c62e..e7d2f57c5f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2022-12-01  Patrice Dumas  <pertusus@free.fr>
+
+       Protect spaces in space attributes in XML specific code
+
+       * tp/Texinfo/Convert/TexinfoMarkup.pm,
+       tp/Texinfo/Convert/TexinfoXML.pm (_xml_attributes): protect spaces
+       in space attributes in format specific code.
+
 2022-11-30  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Convert/TexinfoMarkup.pm (_convert),
diff --git a/tp/Texinfo/Convert/TexinfoMarkup.pm 
b/tp/Texinfo/Convert/TexinfoMarkup.pm
index 715a635e51..ba8852be24 100644
--- a/tp/Texinfo/Convert/TexinfoMarkup.pm
+++ b/tp/Texinfo/Convert/TexinfoMarkup.pm
@@ -446,29 +446,12 @@ sub convert_tree($$)
   return $self->_convert($root);
 }
 
-# FIXME that function is markup format specific, it only works if \ is not
-# special in the markup language
-sub _protect_in_spaces_attribute_text($)
-{
-  my $text = shift;
-  $text =~ s/\n/\\n/g;
-  # protect formfeed in space attributes.  It is necessary for XML 1.0
-  # (and most likely XML 1.1) and probably a good thing in other formats.
-  $text =~ s/\f/\\f/g;
-  # \v does not match U+000B vertical tab, but matches diverse vertical spaces.
-  # We nevertheless use \v here to represent ^K as is customarily done in other
-  # contexts.
-  $text =~ s/\N{U+000B}/\\v/g;
-  return $text;
-}
-
 sub _leading_spaces_arg($)
 {
   my $element = shift;
   if ($element->{'info'} and $element->{'info'}->{'spaces_before_argument'}
       and $element->{'info'}->{'spaces_before_argument'} ne '') {
-    return ['spaces', _protect_in_spaces_attribute_text(
-                            $element->{'info'}->{'spaces_before_argument'})];
+    return ['spaces', $element->{'info'}->{'spaces_before_argument'}];
   } else {
     return ();
   }
@@ -518,7 +501,7 @@ sub _trailing_spaces_arg($)
     my $spaces = $element->{'info'}->{'spaces_after_argument'};
     chomp($spaces);
     if ($spaces ne '') {
-      return ['trailingspaces', _protect_in_spaces_attribute_text($spaces)];
+      return ['trailingspaces', $spaces];
     }
   }
   return ();
@@ -638,8 +621,7 @@ sub _convert($$;$)
           if ($element->{'info'}
               and $element->{'info'}->{'spaces_after_cmd_before_arg'}) {
             push @$attributes, ['spaces',
-               _protect_in_spaces_attribute_text(
-                    $element->{'info'}->{'spaces_after_cmd_before_arg'})];
+                    $element->{'info'}->{'spaces_after_cmd_before_arg'}];
           }
           if ($element->{'args'}->[0]->{'type'} eq 'following_arg') {
              push @$attributes, ['bracketed', 'off'];
@@ -758,7 +740,6 @@ sub _convert($$;$)
         my $attribute;
         if ($line_command_line_attributes{$cmdname}) {
           if ($element->{'extra'} and 
defined($element->{'extra'}->{'text_arg'})) {
-            # FIXME use _protect_in_spaces_attribute_text?
             push @$attribute, [$line_command_line_attributes{$cmdname},
                   $element->{'extra'}->{'text_arg'}];
           }
@@ -1046,8 +1027,7 @@ sub _convert($$;$)
       if ($element->{'info'}
           and $element->{'info'}->{'spaces_after_cmd_before_arg'}) {
         $space_after_command_attribute = ['spacesaftercmd',
-              _protect_in_spaces_attribute_text(
-                $element->{'info'}->{'spaces_after_cmd_before_arg'})];
+                $element->{'info'}->{'spaces_after_cmd_before_arg'}];
       }
 
       my @format_elements;
@@ -1481,8 +1461,7 @@ sub _convert($$;$)
           my $leading_spaces = $element->{'info'}->{'spaces_before_argument'};
           # may happen without any argument, remove as a \n is added below
           $leading_spaces =~ s/\n//;
-          $leading_spaces_attribute_spec = [['spaces',
-                          _protect_in_spaces_attribute_text($leading_spaces)]]
+          $leading_spaces_attribute_spec = [['spaces', $leading_spaces]]
             if ($leading_spaces ne '');
         }
         $result .= $self->txi_markup_open_element($element->{'cmdname'},
diff --git a/tp/Texinfo/Convert/TexinfoSXML.pm 
b/tp/Texinfo/Convert/TexinfoSXML.pm
index 36b43bce43..4b921b386d 100644
--- a/tp/Texinfo/Convert/TexinfoSXML.pm
+++ b/tp/Texinfo/Convert/TexinfoSXML.pm
@@ -50,7 +50,7 @@ sub converter_defaults($$)
   return %defaults;
 }
 
-
+# TODO protect foormfeeds, end of lines and other special spaces as in 
TexinfoXML?
 sub txi_markup_protect_text($$)
 {
   my $self = shift;
diff --git a/tp/Texinfo/Convert/TexinfoXML.pm b/tp/Texinfo/Convert/TexinfoXML.pm
index e9932c12a3..610174a506 100644
--- a/tp/Texinfo/Convert/TexinfoXML.pm
+++ b/tp/Texinfo/Convert/TexinfoXML.pm
@@ -84,11 +84,11 @@ my %special_xml_attributes = (
 #                      |  "'" ([^<&'] | Reference)* "'"
 # Reference as https://www.w3.org/TR/REC-xml/#NT-Reference
 # Reference       ::=          EntityRef | CharRef
-# Next CharRef is defined as https://www.w3.org/TR/REC-xml/#NT-CharRef
+# CharRef is defined as https://www.w3.org/TR/REC-xml/#NT-CharRef
 # CharRef         ::=          '&#' [0-9]+ ';'
 #                      | '&#x' [0-9a-fA-F]+ ';'
 # With the additional constraint that
-# Characters referred to using character references MUST match the production 
for Char.
+#   Characters referred to using character references MUST match the 
production for Char.
 # Which means that numerical entities used in attributes should correspond to
 # characters in the range of acceptable characters.  For example form feed is 
not
 # in that range, such that both \f and &#12; are invalid.
@@ -119,6 +119,15 @@ sub _xml_attributes($$)
       $text =~ s/\N{U+000B}/&attrverticaltab;/g;
       # &attrformfeed; and similar resolves to \f and similar so \ are doubled.
       $text =~ s/\\/\\\\/g;
+    } else {
+      $text =~ s/\n/\\n/g;
+      # protect formfeed in space attributes.  It is necessary for XML 1.0
+      # (and most likely XML 1.1).
+      $text =~ s/\f/\\f/g;
+      # \v does not match U+000B vertical tab, but matches diverse vertical
+      # spaces in  perl.  We nevertheless use \v here to represent ^K as
+      # is customarily done in other contexts.
+      $text =~ s/\N{U+000B}/\\v/g;
     }
     my $attribute_name = $attribute_spec->[0];
     if ($special_xml_attributes{$format_element}



reply via email to

[Prev in Thread] Current Thread [Next in Thread]