texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: HTML argument formatting context for url, with co


From: Patrice Dumas
Subject: branch master updated: HTML argument formatting context for url, with conversion to UTF-8
Date: Sun, 07 Aug 2022 09:08:31 -0400

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 9aa105f16f HTML argument formatting context for url, with conversion 
to UTF-8
9aa105f16f is described below

commit 9aa105f16fe7bd2165943f86962736c33a372cd0
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sun Aug 7 15:08:20 2022 +0200

    HTML argument formatting context for url, with conversion to UTF-8
    
    * tp/Texinfo/Convert/HTML.pm (%default_commands_args)
    (_convert_email_command, _convert_uref_command, _convert):
    add a new argument formatting, 'url' that always used UTF-8
    as encoding for plain text, and is otherwise the same than
    monospacetext.  It is better for percent encodded urls.
---
 ChangeLog                  | 10 ++++++++++
 doc/customization_api.texi |  6 ++++++
 tp/Texinfo/Convert/HTML.pm | 44 ++++++++++++++++++++++++--------------------
 3 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index d623b2157a..8046f3ffdf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2022-08-07  Patrice Dumas  <pertusus@free.fr>
+
+       HTML argument formatting context for url, with conversion to UTF-8
+
+       * tp/Texinfo/Convert/HTML.pm (%default_commands_args)
+       (_convert_email_command, _convert_uref_command, _convert):
+       add a new argument formatting, 'url' that always used UTF-8
+       as encoding for plain text, and is otherwise the same than
+       monospacetext.  It is better for percent encodded urls.
+
 2022-08-06  Gavin Smith  <gavinsmith0123@gmail.com>
 
        Do not protect ] in LaTeX
diff --git a/doc/customization_api.texi b/doc/customization_api.texi
index d4236cd479..df14b16220 100644
--- a/doc/customization_api.texi
+++ b/doc/customization_api.texi
@@ -1959,6 +1959,12 @@ In string context. @xref{Init File Expansion Contexts}.
 The Texinfo tree element corresponding to the argument.
 @xref{Texinfo Tree Elements in User Defined Functions}.
 
+@item url
+Similar with monospacetext.  The difference is that UTF-8 encoding is always
+used for the conversion of accented and special insertion @@-commands to plain
+text.  This is best for percent encoding of url, which should always be
+produced from UTF-8 encoded strings.
+
 @end table
 
 The formatted arguments contexts depend on the @@-command, there could be none,
diff --git a/tp/Texinfo/Convert/HTML.pm b/tp/Texinfo/Convert/HTML.pm
index 0e09431e2d..24ebd5a25e 100644
--- a/tp/Texinfo/Convert/HTML.pm
+++ b/tp/Texinfo/Convert/HTML.pm
@@ -272,6 +272,14 @@ my @image_files_extensions = ('.png', '.jpg', '.jpeg', 
'.gif');
 # this allows init files to get the location of the image files
 # which cannot be determined from the result, as the file
 # location is not used in the element output.
+# FIXME use monospacetext or url?  url is always UTF-8 encoded
+# to fit with percent encoding, monospacetext uses the output
+# encoding.  As a file name, monospacetext could make sense,
+# although the underlying character obtained with utf-8 may also
+# make sense.  It is also used as the path part of a url.
+# In practice, the user should check that the output encoding
+# and the commands used in file names match, so url or
+# monospacetext should be the same.
 sub html_image_file_location_name($$$$)
 {
   my $self = shift;
@@ -2135,17 +2143,17 @@ my %default_code_types = (
 # specification of arguments formatting
 my %default_commands_args = (
   'anchor' => [['monospacestring']],
-  'email' => [['monospacetext', 'monospacestring'], ['normal']],
+  'email' => [['url', 'monospacestring'], ['normal']],
   'footnote' => [[]],
   'printindex' => [[]],
-  'uref' => [['monospacetext', 'monospacestring'], ['normal'], ['normal']],
-  'url' => [['monospacetext', 'monospacestring'], ['normal'], ['normal']],
+  'uref' => [['url', 'monospacestring'], ['normal'], ['normal']],
+  'url' => [['url', 'monospacestring'], ['normal'], ['normal']],
   'sp' => [[]],
   'inforef' => [['monospace'],['normal'],['monospacetext']],
   'xref' => [['monospace'],['normal'],['normal'],['monospacetext'],['normal']],
   'pxref' => 
[['monospace'],['normal'],['normal'],['monospacetext'],['normal']],
   'ref' => [['monospace'],['normal'],['normal'],['monospacetext'],['normal']],
-  'image' => [['monospacetext', 
'monospacestring'],['monospacetext'],['monospacetext'],['string', 
'normal'],['monospacetext']],
+  'image' => [['url', 'monospacetext', 
'monospacestring'],['monospacetext'],['monospacetext'],['string', 
'normal'],['monospacetext']],
   # FIXME shouldn't it better not to convert if later ignored?
   'inlinefmt' => [['monospacetext'],['normal']],
   'inlinefmtifelse' => [['monospacetext'],['normal'],['normal']],
@@ -2629,7 +2637,7 @@ sub _convert_email_command($$$$)
   my $mail = '';
   my $mail_string;
   if (defined($mail_arg)) {
-    $mail = $mail_arg->{'monospacetext'};
+    $mail = $mail_arg->{'url'};
     $mail_string = $mail_arg->{'monospacestring'};
   }
   my $text = '';
@@ -2642,7 +2650,7 @@ sub _convert_email_command($$$$)
     return "$mail_string ($text)";
   } else {
     return $self->html_attribute_class('a', [$cmdname])
-    .' 
href="'.$self->url_protect_url_text("mailto:$mail_string";)."\">$text</a>";
+    .' href="'.$self->url_protect_url_text("mailto:$mail";)."\">$text</a>";
   }
 }
 
@@ -2847,7 +2855,7 @@ sub _convert_uref_command($$$$)
 
   my ($url, $url_string, $text, $replacement);
   if (defined($url_arg)) {
-    $url = $url_arg->{'monospacetext'};
+    $url = $url_arg->{'url'};
     $url_string = $url_arg->{'monospacestring'};
   }
   $text = $text_arg->{'normal'} if defined($text_arg);
@@ -2858,18 +2866,6 @@ sub _convert_uref_command($$$$)
   return $text if (!defined($url) or $url eq '');
   return "$text ($url_string)" if ($self->in_string());
 
-  # Convert again, but this time with encoding set to UTF-8
-  # to have a normalized percent encoded file name not dependent
-  # on the encoding, and representing better the underlying characters
-  my $output_encoding = $self->get_conf('OUTPUT_ENCODING_NAME');
-  if (not defined($output_encoding) or $output_encoding ne 'utf-8') {
-    my $text_conversion_options = {'code' => 1,
-      Texinfo::Convert::Text::copy_options_for_convert_text($self, 1)};
-    $text_conversion_options->{'enabled_encoding'} = 'utf-8';
-    $url
-      = Texinfo::Convert::Text::convert_to_text($url_arg->{'tree'},
-                                               $text_conversion_options);
-  }
   return $self->html_attribute_class('a', [$cmdname])
            .' href="'.$self->url_protect_url_text($url)."\">$text</a>";
 }
@@ -2890,7 +2886,6 @@ sub _convert_image_command($$$$)
     $basefile_string = $args->[0]->{'monospacestring'}
         if (defined($args->[0]->{'monospacestring'}));
     return $basefile_string if ($self->in_string());
-    my $basefile = $args->[0]->{'monospacetext'};
     my ($image_file, $image_basefile, $image_extension, $image_path)
       = $self->html_image_file_location_name($cmdname, $command, $args);
     if (not defined($image_path)) {
@@ -10130,6 +10125,15 @@ sub _convert($$;$)
                 $arg_formatted->{$arg_type}
                   = Texinfo::Convert::Text::convert_to_text($arg, {'code' => 1,
                      
Texinfo::Convert::Text::copy_options_for_convert_text($self, 1)});
+              } elsif ($arg_type eq 'url') {
+                # set the encoding to UTF-8 to always have a string that is 
suitable
+                # for percent encoding.
+                my $text_conversion_options = {'code' => 1,
+                  Texinfo::Convert::Text::copy_options_for_convert_text($self, 
1)};
+                $text_conversion_options->{'enabled_encoding'} = 'utf-8';
+                $arg_formatted->{$arg_type}
+                   = Texinfo::Convert::Text::convert_to_text($arg,
+                                                   $text_conversion_options);
               } elsif ($arg_type eq 'raw') {
                 $self->{'document_context'}->[-1]->{'raw'}++;
                 $arg_formatted->{$arg_type} = $self->_convert($arg, 
$explanation);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]