texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Wed, 15 Nov 2023 03:37:20 -0500 (EST)

branch: master
commit ad3205d85fd183bb1ddf8205d7e48ae5465bbde6
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Tue Nov 14 23:12:52 2023 +0100

    * tp/Texinfo/Convert/HTML.pm (%XS_overrides, %XS_conversion_overrides)
    (import): use hashes for association of overriden sub with XS sub.
    
    * tp/Texinfo/Convert/HTML.pm (_XS_format_init, import),
    tp/Texinfo/XS/convert/ConvertXS.xs (html_format_init),
    tp/Texinfo/XS/convert/convert_html.c (html_converter_initialize)
    (html_format_init): add html_format_init function to be called once to
    setup C data that does not need any information on customization
    with code from html_converter_initialize.  Setup XS interface.
    
    * tp/Texinfo/XS/convert/convert_html.c (convert_to_html_internal):
    use current_commands_conversion and not commands_conversion and
    current_types_conversion and not types_conversion.
    
    * tp/Texinfo/XS/convert/convert_html.c
    (html_default_format_protect_text)
    (default_css_string_format_protect_text): implement.
    
    * tp/Texinfo/XS/convert/converter.c
    (xml_format_text_with_numeric_entities, xml_protect_text): implement.
---
 ChangeLog                            |  23 ++++++++
 tp/Texinfo/Convert/Converter.pm      |  12 ++--
 tp/Texinfo/Convert/HTML.pm           | 109 +++++++++++++++++------------------
 tp/Texinfo/XS/convert/ConvertXS.xs   |   3 +
 tp/Texinfo/XS/convert/convert_html.c | 105 +++++++++++++++++++++++++++------
 tp/Texinfo/XS/convert/convert_html.h |   2 +
 tp/Texinfo/XS/convert/converter.c    |  92 +++++++++++++++++++++++++++++
 tp/Texinfo/XS/convert/converter.h    |   3 +
 8 files changed, 271 insertions(+), 78 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b1f0d0e300..0a7210931f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -8,6 +8,29 @@
 
        Report from Ihor Radchenko <yantar92@posteo.net> for Org mode manual.
 
+2023-11-14  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/Convert/HTML.pm (%XS_overrides, %XS_conversion_overrides)
+       (import): use hashes for association of overriden sub with XS sub.
+
+       * tp/Texinfo/Convert/HTML.pm (_XS_format_init, import),
+       tp/Texinfo/XS/convert/ConvertXS.xs (html_format_init),
+       tp/Texinfo/XS/convert/convert_html.c (html_converter_initialize)
+       (html_format_init): add html_format_init function to be called once to
+       setup C data that does not need any information on customization
+       with code from html_converter_initialize.  Setup XS interface.
+
+       * tp/Texinfo/XS/convert/convert_html.c (convert_to_html_internal):
+       use current_commands_conversion and not commands_conversion and
+       current_types_conversion and not types_conversion.
+
+       * tp/Texinfo/XS/convert/convert_html.c
+       (html_default_format_protect_text)
+       (default_css_string_format_protect_text): implement.
+
+       * tp/Texinfo/XS/convert/converter.c
+       (xml_format_text_with_numeric_entities, xml_protect_text): implement.
+
 2023-11-14  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/Translations.pm (import), tp/Texinfo/XS/Makefile.am,
diff --git a/tp/Texinfo/Convert/Converter.pm b/tp/Texinfo/Convert/Converter.pm
index 166886a411..2a6e6d2403 100644
--- a/tp/Texinfo/Convert/Converter.pm
+++ b/tp/Texinfo/Convert/Converter.pm
@@ -1570,12 +1570,12 @@ sub get_output_files_XS_unclosed_streams($)
 # XML related methods and variables that may be used in different
 # XML Converters.
 
-my $xml_numeric_entity_mdash = '&#'.hex('2014').';';
-my $xml_numeric_entity_ndash = '&#'.hex('2013').';';
-my $xml_numeric_entity_ldquo = '&#'.hex('201C').';';
-my $xml_numeric_entity_rdquo = '&#'.hex('201D').';';
-my $xml_numeric_entity_lsquo = '&#'.hex('2018').';';
-my $xml_numeric_entity_rsquo = '&#'.hex('2019').';';
+my $xml_numeric_entity_mdash = '&#'.hex('2014').';'; #8212
+my $xml_numeric_entity_ndash = '&#'.hex('2013').';'; #8211
+my $xml_numeric_entity_ldquo = '&#'.hex('201C').';'; #8220
+my $xml_numeric_entity_rdquo = '&#'.hex('201D').';'; #8221
+my $xml_numeric_entity_lsquo = '&#'.hex('2018').';'; #8216
+my $xml_numeric_entity_rsquo = '&#'.hex('2019').';'; #8217
 
 sub xml_format_text_with_numeric_entities($$)
 {
diff --git a/tp/Texinfo/Convert/HTML.pm b/tp/Texinfo/Convert/HTML.pm
index 0684ac6a30..d4801b32ef 100644
--- a/tp/Texinfo/Convert/HTML.pm
+++ b/tp/Texinfo/Convert/HTML.pm
@@ -92,73 +92,72 @@ my $XS_convert = 0;
 $XS_convert = 1 if (defined $ENV{TEXINFO_XS_CONVERT}
                     and $ENV{TEXINFO_XS_CONVERT} eq '1');
 
+my %XS_overrides = (
+  "Texinfo::Convert::HTML::_default_format_protect_text"
+    => "Texinfo::MiscXS::default_format_protect_text",
+  "Texinfo::Convert::HTML::_entity_text"
+    => "Texinfo::MiscXS::entity_text",
+);
+
+my %XS_conversion_overrides = (
+  "Texinfo::Convert::HTML::_XS_format_init"
+   => "Texinfo::Convert::ConvertXS::html_format_init",
+  "Texinfo::Convert::HTML::_XS_converter_initialize"
+   => "Texinfo::Convert::ConvertXS::html_converter_initialize_sv",
+  "Texinfo::Convert::HTML::_XS_initialize_output_state"
+   => "Texinfo::Convert::ConvertXS::html_initialize_output_state",
+  "Texinfo::Convert::HTML::_finalize_output_state"
+   => "Texinfo::Convert::ConvertXS::html_finalize_output_state",
+  "Texinfo::Convert::HTML::_new_document_context"
+   => "Texinfo::Convert::ConvertXS::html_new_document_context",
+  "Texinfo::Convert::HTML::_pop_document_context"
+   => "Texinfo::Convert::ConvertXS::html_pop_document_context",
+  "Texinfo::Convert::HTML::_XS_get_index_entries_sorted_by_letter"
+   => "Texinfo::Convert::ConvertXS::get_index_entries_sorted_by_letter",
+  "Texinfo::Convert::HTML::_XS_html_merge_index_entries"
+   => "Texinfo::Convert::ConvertXS::html_merge_index_entries",
+  "Texinfo::Convert::HTML::_prepare_conversion_units"
+   => "Texinfo::Convert::ConvertXS::html_prepare_conversion_units",
+  "Texinfo::Convert::HTML::_prepare_units_directions_files"
+   => "Texinfo::Convert::ConvertXS::html_prepare_units_directions_files",
+  "Texinfo::Convert::HTML::_prepare_output_units_global_targets"
+   => "Texinfo::Convert::ConvertXS::html_prepare_output_units_global_targets",
+  "Texinfo::Convert::HTML::_translate_names"
+   => "Texinfo::Convert::ConvertXS::html_translate_names",
+  "Texinfo::Convert::HTML::_prepare_title_titlepage"
+   => "Texinfo::Convert::ConvertXS::html_prepare_title_titlepage",
+  "Texinfo::Convert::HTML::_html_convert_convert"
+   => "Texinfo::Convert::ConvertXS::html_convert_convert",
+  "Texinfo::Convert::HTML::_html_convert_output"
+   => "Texinfo::Convert::ConvertXS::html_convert_output",
+  #"Texinfo::Convert::HTML::_XS_html_convert_tree"
+  # => "Texinfo::Convert::ConvertXS::html_convert_tree",
+);
+
+# XS function does initialization independent of customization
+sub _XS_format_init()
+{
+}
+
 our $module_loaded = 0;
 sub import {
   if (!$module_loaded) {
-    Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_default_format_protect_text",
-      "Texinfo::MiscXS::default_format_protect_text");
-    Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_entity_text",
-      "Texinfo::MiscXS::entity_text");
+    foreach my $sub (keys %XS_overrides) {
+      Texinfo::XSLoader::override ($sub, $XS_overrides{$sub});
+    }
 
     if ($XS_convert) {
-
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_XS_converter_initialize",
-      "Texinfo::Convert::ConvertXS::html_converter_initialize_sv");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_XS_initialize_output_state",
-      "Texinfo::Convert::ConvertXS::html_initialize_output_state");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_finalize_output_state",
-      "Texinfo::Convert::ConvertXS::html_finalize_output_state");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_new_document_context",
-      "Texinfo::Convert::ConvertXS::html_new_document_context");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_pop_document_context",
-      "Texinfo::Convert::ConvertXS::html_pop_document_context");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_XS_get_index_entries_sorted_by_letter",
-      "Texinfo::Convert::ConvertXS::get_index_entries_sorted_by_letter");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_XS_html_merge_index_entries",
-      "Texinfo::Convert::ConvertXS::html_merge_index_entries");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_prepare_conversion_units",
-      "Texinfo::Convert::ConvertXS::html_prepare_conversion_units");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_prepare_units_directions_files",
-      "Texinfo::Convert::ConvertXS::html_prepare_units_directions_files");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_prepare_output_units_global_targets",
-      "Texinfo::Convert::ConvertXS::html_prepare_output_units_global_targets");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_translate_names",
-      "Texinfo::Convert::ConvertXS::html_translate_names");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_prepare_title_titlepage",
-      "Texinfo::Convert::ConvertXS::html_prepare_title_titlepage");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_html_convert_convert",
-      "Texinfo::Convert::ConvertXS::html_convert_convert");
-      Texinfo::XSLoader::override(
-      "Texinfo::Convert::HTML::_html_convert_output",
-      "Texinfo::Convert::ConvertXS::html_convert_output");
-      #Texinfo::XSLoader::override(
-      #"Texinfo::Convert::HTML::_XS_html_convert_tree",
-      #"Texinfo::Convert::ConvertXS::html_convert_tree");
+      foreach my $sub (keys %XS_conversion_overrides) {
+        Texinfo::XSLoader::override ($sub, $XS_conversion_overrides{$sub});
+      }
+      _XS_format_init();
     }
-
     $module_loaded = 1;
   }
   # The usual import method
   goto &Exporter::import;
 }
 
-
-
 my %nobrace_commands = %Texinfo::Commands::nobrace_commands;
 my %line_commands = %Texinfo::Commands::line_commands;
 my %nobrace_symbol_text = %Texinfo::Common::nobrace_symbol_text;
diff --git a/tp/Texinfo/XS/convert/ConvertXS.xs 
b/tp/Texinfo/XS/convert/ConvertXS.xs
index d61bd0b537..aeec850a6f 100644
--- a/tp/Texinfo/XS/convert/ConvertXS.xs
+++ b/tp/Texinfo/XS/convert/ConvertXS.xs
@@ -175,6 +175,9 @@ text_convert_tree (SV *text_options_in, SV *tree_in, 
unused=0)
 
 # HTML
 
+void
+html_format_init ()
+
 int
 html_converter_initialize_sv (SV *converter_in, SV 
*default_formatting_references, SV *default_css_string_formatting_references, 
SV *default_commands_open, SV *default_commands_conversion, SV 
*default_css_string_commands_conversion, SV *default_types_open, SV 
*default_types_conversion, SV *default_css_string_types_conversion, SV 
*default_output_units_conversion)
 
diff --git a/tp/Texinfo/XS/convert/convert_html.c 
b/tp/Texinfo/XS/convert/convert_html.c
index e261f91b95..3f750f7881 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -2219,6 +2219,75 @@ html_prepare_units_directions_files (CONVERTER *self,
   return files_source_info;
 }
 
+
+#define ADDN(str,nr) text_append_n (result, str, nr)
+void
+html_default_format_protect_text (const char *text, TEXT *result)
+{
+  const char *p = text;
+
+  while (*p)
+    {
+      int before_sep_nr = strcspn (p, "<>&\"\f");
+      if (before_sep_nr)
+        {
+          text_append_n (result, p, before_sep_nr);
+          p += before_sep_nr;
+        }
+      if (!*p)
+        break;
+      switch (*p)
+        {
+        case '<':
+          ADDN("&lt;", 4);
+          break;
+        case '>':
+          ADDN("&gt;", 4);
+          break;
+        case '&':
+          ADDN("&amp;", 5);
+          break;
+        case '"':
+          ADDN("&quot;", 6);
+          break;
+        case '\f':
+          ADDN("&#12;", 5);
+          break;
+        }
+      p++;
+    }
+}
+
+void
+default_css_string_format_protect_text (const char *text, TEXT *result)
+{
+  const char *p = text;
+
+  while (*p)
+    {
+      int before_sep_nr = strcspn (p, "\\'");
+      if (before_sep_nr)
+        {
+          text_append_n (result, p, before_sep_nr);
+          p += before_sep_nr;
+        }
+      if (!*p)
+        break;
+      switch (*p)
+        {
+        case '\\':
+          ADDN("\\\\", 2);
+          break;
+        case '\'':
+          ADDN("\\'", 2);
+          break;
+        }
+      p++;
+    }
+}
+
+#undef ADDN
+
 static char *
 command_conversion (CONVERTER *self, enum command_id cmd,
                     const ELEMENT *element, HTML_ARGS_FORMATTED 
*args_formatted,
@@ -2278,7 +2347,6 @@ type_open (CONVERTER *self, enum element_type type, const 
ELEMENT *element)
   return 0;
 }
 
-
 static void
 push_html_formatting_context (HTML_FORMATTING_CONTEXT_STACK *stack,
                               char *context_name)
@@ -2404,22 +2472,15 @@ reset_translated_special_unit_info_tree (CONVERTER 
*self)
     }
 }
 
-/* most of the initialization is done by html_converter_initialize_sv
-   in get_perl_info, the initialization that do not require information
-   from perl is done here.  This is called after information from perl
-   has been gathered  */
+/* set information that is independent of customization, only called once */
 void
-html_converter_initialize (CONVERTER *self)
+html_format_init ()
 {
   int i;
-  int nr_special_units;
   int nr_default_commands
     = sizeof (default_commands_args) / sizeof (default_commands_args[0]);
   int max_args = MAX_COMMAND_ARGS_NR;
 
-  /* first set information that is fully independent from information
-     coming from perl */
-
   for (i = 0; i < nr_default_commands; i++)
     {
       /* we file the status for specified commands, to distinguish them
@@ -2477,7 +2538,17 @@ html_converter_initialize (CONVERTER *self)
   html_commands_data[CM_float].flags |= HF_composition_context;
 
   html_commands_data[CM_sc].flags |= HF_upper_case;
+}
 
+/* most of the initialization is done by html_converter_initialize_sv
+   in get_perl_info, the initialization that do not require information
+   directly from perl data is done here.  This is called after information
+   from perl has been gathered  */
+void
+html_converter_initialize (CONVERTER *self)
+{
+  int i;
+  int nr_special_units;
   /* initialization needing some information from perl */
 
   nr_special_units = self->special_unit_varieties.number;
@@ -3174,9 +3245,9 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT 
*element,
     }
 
   if ((element->type
-       && self->types_conversion[element->type].status == FRS_status_ignored)
+       && self->current_types_conversion[element->type].status == 
FRS_status_ignored)
       || (cmd
-          && self->commands_conversion[cmd].status == FRS_status_ignored))
+          && self->current_commands_conversion[cmd].status == 
FRS_status_ignored))
     {
       if (self->conf->DEBUG > 0)
         {
@@ -3248,7 +3319,7 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT 
*element,
           self->modified_state |= HMSF_current_root;
         }
 
-      if (self->commands_conversion[cmd].status)
+      if (self->current_commands_conversion[cmd].status)
         {
           int convert_to_latex = 0;
           HTML_ARGS_FORMATTED *args_formatted = 0;
@@ -3662,7 +3733,7 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT 
*element,
             }
 
           /* args are formatted, now format the command itself */
-          if (self->commands_conversion[cmd].status)
+          if (self->current_commands_conversion[cmd].status)
             {
               char *conv_str = command_conversion (self, cmd,
                                                    element, args_formatted,
@@ -3786,7 +3857,7 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT 
*element,
             }
         }
 
-      if (self->types_conversion[type].status)
+      if (self->current_types_conversion[type].status)
         {
           char *conversion_result
                     = type_conversion (self, type, element,
@@ -3864,8 +3935,8 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT 
*element,
     {
       if (self->conf->DEBUG > 0)
         fprintf (stderr, "UNNAMED empty\n");
-      if (self->types_conversion[0].status
-          && self->types_conversion[0].status != FRS_status_ignored)
+      if (self->current_types_conversion[0].status
+          && self->current_types_conversion[0].status != FRS_status_ignored)
         {
           char *conversion_result
                     = type_conversion (self, 0, element, "");
diff --git a/tp/Texinfo/XS/convert/convert_html.h 
b/tp/Texinfo/XS/convert/convert_html.h
index 85d39a3648..289fd3fbba 100644
--- a/tp/Texinfo/XS/convert/convert_html.h
+++ b/tp/Texinfo/XS/convert/convert_html.h
@@ -14,6 +14,8 @@ extern char *html_formatting_reference_names[];
 extern TRANSLATED_SUI_ASSOCIATION translated_special_unit_info[];
 extern const char *special_unit_info_type_names[SUI_type_heading + 1];
 
+void html_format_init (void);
+
 void html_converter_initialize (CONVERTER *self);
 
 void html_initialize_output_state (CONVERTER *self, char *context);
diff --git a/tp/Texinfo/XS/convert/converter.c 
b/tp/Texinfo/XS/convert/converter.c
index b413d807ba..dfe58b34f8 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -580,3 +580,95 @@ free_generic_converter (CONVERTER *self)
   free_output_files_information (&self->output_files_information);
   free_output_unit_files (&self->output_unit_files);
 }
+
+
+/* XML conversion functions */
+
+#define ADD(x) text_append_n (result, "&#" #x ";", 7)
+void
+xml_format_text_with_numeric_entities (const char *text, TEXT *result)
+{
+  const char *p;
+  int str_len;
+
+  p = text;
+  while (*p)
+    {
+      int before_sep_nr = strcspn (p, "-'`");
+      if (before_sep_nr)
+        {
+          text_append_n (result, p, before_sep_nr);
+          p += before_sep_nr;
+        }
+      if (!*p)
+        break;
+      str_len = strlen (p);
+      if ((str_len > 1) && (!strncmp (p, "``", 2)))
+        {
+          ADD(8220);
+          p += 2;
+        }
+      else if ((str_len > 1) && (!strncmp (p, "''", 2)))
+        {
+          ADD(8221);
+          p += 2;
+        }
+      else if ((str_len > 2) && !strncmp (p, "---", 3))
+        {
+          ADD(8212);
+          p += 3;
+        }
+      else if ((str_len > 1) && !strncmp (p, "--", 2))
+        {
+          ADD(8211);
+          p += 2;
+        }
+      else
+        {
+          if (*p == '\'')
+            ADD(8217);
+          else if (*p == '`')
+            ADD(8216);
+          p++;
+        }
+    }
+}
+#undef ADD
+
+#define ADDN(str,nr) text_append_n (result, str, nr)
+void
+xml_protect_text (const char *text, TEXT *result)
+{
+  const char *p;
+
+  p = text;
+
+  while (*p)
+    {
+      int before_sep_nr = strcspn (p, "<>&\"\f");
+      if (before_sep_nr)
+        {
+          text_append_n (result, p, before_sep_nr);
+          p += before_sep_nr;
+        }
+      if (!*p)
+        break;
+      switch (*p)
+        {
+        case '<':
+          ADDN("&lt;", 4);
+          break;
+        case '>':
+          ADDN("&gt;", 4);
+          break;
+        case '&':
+          ADDN("&amp;", 5);
+          break;
+        case '"':
+          ADDN("&quot;", 6);
+          break;
+        }
+      p++;
+    }
+}
+#undef ADDN
diff --git a/tp/Texinfo/XS/convert/converter.h 
b/tp/Texinfo/XS/convert/converter.h
index 00102ab73a..c864784f47 100644
--- a/tp/Texinfo/XS/convert/converter.h
+++ b/tp/Texinfo/XS/convert/converter.h
@@ -39,4 +39,7 @@ void clear_output_unit_files (FILE_NAME_PATH_COUNTER_LIST 
*output_unit_files);
 void free_output_unit_files (FILE_NAME_PATH_COUNTER_LIST *output_unit_files);
 
 void free_generic_converter (CONVERTER *self);
+
+
+void xml_format_text_with_numeric_entities (const char *text, TEXT *result);
 #endif



reply via email to

[Prev in Thread] Current Thread [Next in Thread]