[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Mon, 8 Aug 2022 18:42:27 -0400 (EDT) |
branch: master
commit dbb7a98c9d2bfffacf4acf3d578d33009e12c4ad
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Tue Aug 9 00:37:40 2022 +0200
Reorganization of latex2html.pm code
* tp/ext/latex2html.pm (l2h_to_latex, l2h_process): reorganize
l2h_process and l2h_to_latex to write to the latex file only
in l2h_to_latex, and only if some conversion is needed.
Integrate l2h_finish_to_latex in l2h_process.
* tp/ext/latex2html.pm (l2h_convert_command): rename l2h_do_tex
as l2h_convert_command.
* tp/ext/latex2html.pm (l2h_process, l2h_convert_command, l2h_finish):
instead of setting a global status, use counters and difference
between counters to detect that a stage failed.
* tp/ext/latex2html.pm: avoid global commands if not needed.
Other code cleanups.
---
ChangeLog | 19 +
tp/TODO | 2 -
tp/ext/latex2html.pm | 543 +++++++++++----------
.../encod\303\251/tex_complex_l2h.tex" | 2 +-
.../encod\303\251/tex_encod\303\251_utf8_l2h.tex" | 2 +-
.../tex_l2h_res/tex_complex_l2h.tex | 2 +-
tp/tests/many_input_files/tex_l2h_res/tex_l2h.tex | 2 +-
.../res_parser/block_EOL_tex/block_EOL_l2h.tex | 2 +-
.../res_parser/formatting_singular/sing_l2h.tex | 2 +-
.../math_not_closed/math_not_closed_l2h.tex | 2 +-
tp/tests/tex_html/res_parser/tex/tex_l2h.tex | 2 +-
.../res_parser/tex_accents_l2h/tex_accents_l2h.tex | 2 +-
.../res_parser/tex_complex_l2h/tex_complex_l2h.tex | 2 +-
.../tex_encode_latin1_l2h.tex | 2 +-
.../tex_encod\303\251_utf8_l2h.tex" | 2 +-
.../res_parser/tex_eqalign_l2h/tex_eqalign_l2h.tex | 2 +-
.../res_parser/tex_gdef_l2h/tex_gdef_l2h.tex | 2 +-
.../tex_in_copying/tex_in_copying_l2h.tex | 2 +-
.../tex_not_closed/tex_not_closed_l2h.tex | 2 +-
19 files changed, 312 insertions(+), 284 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 98caf9b9ea..b763203e40 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -25,6 +25,25 @@
are automatically within a group. Wrap more of the header
with \makeatletter and \makeatother.
+2022-08-08 Patrice Dumas <pertusus@free.fr>
+
+ Reorganization of latex2html.pm code
+
+ * tp/ext/latex2html.pm (l2h_to_latex, l2h_process): reorganize
+ l2h_process and l2h_to_latex to write to the latex file only
+ in l2h_to_latex, and only if some conversion is needed.
+ Integrate l2h_finish_to_latex in l2h_process.
+
+ * tp/ext/latex2html.pm (l2h_convert_command): rename l2h_do_tex
+ as l2h_convert_command.
+
+ * tp/ext/latex2html.pm (l2h_process, l2h_convert_command, l2h_finish):
+ instead of setting a global status, use counters and difference
+ between counters to detect that a stage failed.
+
+ * tp/ext/latex2html.pm: avoid global commands if not needed.
+ Other code cleanups.
+
2022-08-08 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Convert/HTML.pm (run_stage_handlers):
diff --git a/tp/TODO b/tp/TODO
index 44dffb4892..60f02094aa 100644
--- a/tp/TODO
+++ b/tp/TODO
@@ -17,8 +17,6 @@ xterm &
Before next release
===================
-LaTeX nested @example and similar, combine indentation if possible.
-
Bugs
====
diff --git a/tp/ext/latex2html.pm b/tp/ext/latex2html.pm
index 5da885b0b6..bace9441f6 100644
--- a/tp/ext/latex2html.pm
+++ b/tp/ext/latex2html.pm
@@ -40,9 +40,9 @@ use Texinfo::Convert::Texinfo;
texinfo_register_handler('structure', \&l2h_process);
texinfo_register_handler('finish', \&l2h_finish);
-texinfo_register_command_formatting('math', \&l2h_do_tex);
-texinfo_register_command_formatting('tex', \&l2h_do_tex);
-texinfo_register_command_formatting('displaymath', \&l2h_do_tex);
+texinfo_register_command_formatting('math', \&l2h_convert_command);
+texinfo_register_command_formatting('tex', \&l2h_convert_command);
+texinfo_register_command_formatting('displaymath', \&l2h_convert_command);
# name/location of latex2html program
texinfo_set_from_init_file('L2H_L2H', 'latex2html');
@@ -70,22 +70,15 @@ texinfo_set_from_init_file('L2H_CLEAN', 1);
# latex2html conversions consist of 2 stages:
# 1) l2h_process
# to latex: Put "latex" code into a latex file
-# (l2h_to_latex, l2h_finish_to_latex)
+# (l2h_to_latex)
# to html: Use latex2html to generate corresponding html code and images
# (l2h_to_html)
# from html: Extract generated code and images from latex2html run
-# (l2h_init_from_html)
-# 2) l2h_do_tex called each time a @tex or @math command is encountered
-# in the output tree.
+# (l2h_retrieve_from_html)
+# 2) l2h_convert_command called each time an handled @-command (@math, ...) is
+# encountered in the tree conversion.
-# init l2h defaults for files and names
-
-my ($l2h_name, $l2h_latex_path_name, $l2h_latex_path_string,
$l2h_cache_path_name,
- $l2h_html_path_name, $l2h_html_path_string, $l2h_prefix,
$l2h_prefix_string);
-
-# holds the status of latex2html operations. If > 0 it means that there was
-# an error, if < 0, means that there is nothing to do
-my $status;
+my ($l2h_name, $l2h_cache_path_name);
my $debug;
my $verbose;
@@ -94,86 +87,106 @@ my $destination_directory_string;
my $docu_name;
my %commands_counters;
+my %commands_text_index;
-# init_from_html
my $extract_error_count;
-my $invalid_counter_count;
+my $invalid_text_index_count;
# change_image_file_names
-my %l2h_img; # associate src file to destination file
- # such that files are not copied twice
+my %l2h_img;
my $image_count;
-# do_tex
-my $html_output_count = 0; # html text outputed in html result file
+my $html_output_count;
##########################
#
# First stage: Generation of Latex file
-# Initialize with: init
-# Add content with: l2h_to_latex ($text) --> HTML placeholder comment
-# Finish with: finish_to_latex
-#
-my $l2h_latex_preamble = <<EOT;
-% This document was automatically generated by the l2h extenstion of texi2html
+sub l2h_to_latex($$$$$)
+{
+ my $self = shift;
+ my $l2h_latex_path_string = shift;
+ my $l2h_latex_path_name = shift;
+ my $latex_text_indices_to_convert = shift;
+ my $latex_texts = shift;
+
+ unless (open(L2H_LATEX, ">$l2h_latex_path_string")) {
+ #$self->document_error($self, sprintf(__(
+ $self->document_warn($self, sprintf(__(
+ "l2h: could not open latex file %s for writing: %s"),
+ $l2h_latex_path_name, $!));
+ return 0;
+ }
+ # according to the .log file latex2html is expecting utf-8 if no information
+ # is provided
+ binmode(L2H_LATEX, ':utf8');
+ warn "# l2h: use $l2h_latex_path_string as latex file\n" if ($verbose);
+ print L2H_LATEX <<EOT;
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\\documentclass{article}
\\usepackage{html}
\\begin{document}
EOT
-my $l2h_latex_closing = <<EOT;
-\\end{document}
-EOT
+ foreach my $latex_text_index (@$latex_text_indices_to_convert) {
+ print L2H_LATEX "\\begin{rawhtml}\n\n";
+ print L2H_LATEX "<!-- l2h_begin $l2h_name $latex_text_index -->\n";
+ print L2H_LATEX "\\end{rawhtml}\n";
-my %l2h_to_latex = (); # associate a latex text with the index in the
- # html result array.
-my @l2h_to_latex = (); # array used to associate the index with
- # the original latex text.
-my $latex_count = 0; # number of latex texts really stored
-my $latex_converted_count = 0; # number of latex texts passed through
latex2html
-my $to_latex_count = 0; # total number of latex texts processed
-my $cached_count = 0; # number of cached latex texts
-# need to be our, as in the do that loads the cache, the file lexicals
-# are not visible
-our %l2h_cache = (); # the cache hash. Associate latex text with
- # html from the previous run
-my @l2h_from_html; # array of resulting html
+ print L2H_LATEX "$latex_texts->[$latex_text_index]\n";
+
+ print L2H_LATEX "\\begin{rawhtml}\n";
+ print L2H_LATEX "<!-- l2h_end $l2h_name $latex_text_index -->\n\n";
+ print L2H_LATEX "\\end{rawhtml}\n";
+ }
+ # print closing into latex file and close it
+ print L2H_LATEX '\end{document}'."\n";
+ # FIXME error condition not checked
+ close (L2H_LATEX);
+ return 1;
+}
+
+my $latex_commands_count;
+my @latex_texts;
+my $latex_texts_count;
+my $latex_to_convert_count;
+my $latex_converted_count;
+my $html_converted_count;
-my %global_count = (); # associate a command name and the
- # corresponding counter to the index in the
- # html result array
+# our because the file lexicals are not visible is the do loading the cache
+our %l2h_cache;
+my @l2h_from_html;
-# set $status to 0, if l2h could be initalized properly
sub l2h_process($$)
{
my $self = shift;
my $document_root = shift;
- %l2h_to_latex = (); # associate a latex text with the index in the
- # html result array.
- @l2h_to_latex = (); # array used to associate the index with
- # the original latex text.
- $latex_count = 0; # number of latex texts really stored
- $latex_converted_count = 0; # number of latex texts passed through latex2html
- $to_latex_count = 0; # total number of latex texts processed
- $cached_count = 0; # number of cached latex texts
- %l2h_cache = (); # the cache hash. Associate latex text with
- # html from the previous run
- @l2h_from_html = (); # array of resulting html
-
- %global_count = (); # associate a command name and the
- # corresponding counter to the index in the
- # html result array
- %commands_counters = ();
- $extract_error_count = 0;
- $invalid_counter_count = 0;
+ @latex_texts = (); # array used to associate the index with
+ # a latex text.
+ $latex_commands_count = 0; # number of latex Texinfo commands collected
+ $latex_texts_count = 0; # number of latex texts stored, two same text
+ # are only stored once
+ $latex_to_convert_count = 0; # number of latex texts that should pass through
+ # latex2html
+ $latex_converted_count = 0; # number of latex texts passed through
latex2html
+ $html_converted_count = 0; # number of html texts retrieved
+ %l2h_cache = (); # the cache hash. Associate latex text with
+ # html from the previous run
+ @l2h_from_html = (); # array of resulting html
+
+ %commands_counters = (); # associate an element to the global counter
+ # of processed elements
+ %commands_text_index = (); # associate an element to the index of latex
text,
+ # also the index in the HTML results array
+ $extract_error_count = 0; # number of fragments that cannot be retrieved
+ # when @-commands are converted
+ $invalid_text_index_count = 0;
%l2h_img = (); # associate src file to destination file
# such that files are not copied twice
$image_count = 1;
$html_output_count = 0; # html text outputed in html result file
- $status = 1;
return -1 if (defined($self->get_conf('OUTFILE'))
and $Texinfo::Common::null_device_file{$self->get_conf('OUTFILE')});
@@ -190,55 +203,41 @@ sub l2h_process($$)
= $self->encoded_output_file_name($dir);
$l2h_name = "${docu_name}_l2h";
- my $l2h_latex_file_name = "${l2h_name}.tex";
- $l2h_latex_path_name = File::Spec->catfile($destination_directory,
- "${l2h_name}.tex");
+ my $l2h_latex_file_name = "${l2h_name}.tex";
+ my $l2h_latex_path_name = File::Spec->catfile($destination_directory,
+ $l2h_latex_file_name);
# we use utf-8 encoding irrespective of what is used in texi2any
# because latex2html use the file name in the resulting file and
# it needs to be utf-8
my $encoded_l2h_latex_file_name = encode('UTF-8', $l2h_latex_file_name);
- $l2h_latex_path_string = File::Spec->catfile($destination_directory_string,
- $encoded_l2h_latex_file_name);
+ my $l2h_latex_path_string =
File::Spec->catfile($destination_directory_string,
+
$encoded_l2h_latex_file_name);
$l2h_cache_path_name = File::Spec->catfile($destination_directory,
- "${docu_name}-l2h_cache.pm");
+ "${docu_name}-l2h_cache.pm");
+ # set consistently with $l2h_latex_file_name to ensure that
+ # latex2html will create a file with this name.
my $l2h_html_file_name = "${l2h_name}.html";
- $l2h_html_path_name = File::Spec->catfile($destination_directory,
- "${l2h_name}.html");
- my $encoded_l2h_html_file_name = encode('UTF-8', $l2h_html_file_name);
- $l2h_html_path_string = File::Spec->catfile($destination_directory_string,
- $encoded_l2h_html_file_name);
- $l2h_prefix = "${l2h_name}_";
- $l2h_prefix_string = encode('UTF-8', $l2h_prefix);
$debug = $self->get_conf('DEBUG');
$verbose = $self->get_conf('VERBOSE');
- unless ($self->get_conf('L2H_SKIP')) {
- unless (open(L2H_LATEX, ">$l2h_latex_path_string")) {
- $self->document_error($self, sprintf(__(
- "l2h: could not open latex file %s for writing: %s"),
- $l2h_latex_path_name, $!));
- $status = 1;
- return 1;
- }
- # according to the .log file latex2html is expecting utf-8 if no
information
- # is provided
- binmode(L2H_LATEX, ':utf8');
- warn "# l2h: use $l2h_latex_path_string as latex file\n" if ($verbose);
- print L2H_LATEX $l2h_latex_preamble;
- }
# open the database that holds cached text
l2h_init_cache($self) if (!defined($self->get_conf('L2H_SKIP'))
- or $self->get_conf('L2H_SKIP'));
+ or $self->get_conf('L2H_SKIP'));
my @replaced_commands = ('tex', 'math', 'displaymath');
my $collected_commands = Texinfo::Common::collect_commands_list_in_tree(
$document_root, \@replaced_commands);
- my $counter = 0;
+ my $texinfo_command_index = 0; # index of latex elements/commands
processed
+ my @latex_text_indices_to_convert; # indices of latex texts that should be
converted
+ my %latex_text_indices; # associate a latex text with the index in the
+ # html result array. Allows to do each text
+ # only once
+ my $cached_count = 0; # number of cached latex texts
if (scalar(@{$collected_commands})) {
foreach my $element (@{$collected_commands}) {
+ $texinfo_command_index++;
my $command = $element->{'cmdname'};
- $counter++;
my $tree;
if ($command eq 'math') {
$tree = $element->{'args'}->[0];
@@ -254,89 +253,90 @@ sub l2h_process($$)
pop @{$tree->{'contents'}};
}
}
- my $text = Texinfo::Convert::Texinfo::convert_to_texinfo($tree);
- l2h_to_latex($self, $command, $text, $counter);
- $commands_counters{$element} = $counter;
+ my $texinfo_text = Texinfo::Convert::Texinfo::convert_to_texinfo($tree);
+ # print $texinfo_text into latex file (if not already there nor in cache)
+ # which can be later on replaced by the latex2html generated text.
+ my $latex_text = $texinfo_text;
+ if ($command eq 'tex') {
+ $latex_text .= ' ';
+ } elsif ($command eq 'math') {
+ $latex_text = "\$".$latex_text."\$";
+ } elsif ($command eq 'displaymath') {
+ $latex_text = "\$\$".$latex_text."\$\$";
+ }
+ $latex_text =~ s/(\s*)$//;
+ # try whether we have text already on things to do
+ my $latex_text_index = $latex_text_indices{$latex_text};
+ unless ($latex_text_index) {
+ $latex_texts_count++;
+ $latex_text_index = $latex_texts_count;
+ # try whether we can get it from cache
+ my $cached_text = l2h_from_cache($self, $latex_text);
+ if (defined($cached_text)) {
+ $cached_count++;
+ # put the cached result in the html result array
+ $l2h_from_html[$latex_text_index] = $cached_text;
+ } else {
+ # the text indexed by $latex_text_index should be converted
+ push @latex_text_indices_to_convert, $latex_text_index;
+ }
+ $latex_texts[$latex_text_index] = $latex_text;
+ $latex_text_indices{$latex_text} = $latex_text_index;
+ }
+ $commands_counters{$element} = $texinfo_command_index;
+ $commands_text_index{$element} = $latex_text_index;
}
- }
- $status = l2h_finish_to_latex($self);
- if ($status == 0) {
- $status = l2h_to_html($self);
- }
- if ($status == 0) {
- $status = l2h_init_from_html($self);
- }
- if ($status <= 0) {
- return 0;
} else {
- return -$status;
- }
-}
-
-
-# print text (2nd arg) into latex file (if not already there nor in cache)
-# which can be later on replaced by the latex2html generated text.
-sub l2h_to_latex($$$$)
-{
- my $self = shift;
- my $command = shift;
- my $text = shift;
- my $counter = shift;
-
- if ($command eq 'tex') {
- $text .= ' ';
- } elsif ($command eq 'math') {
- $text = "\$".$text."\$";
- } elsif ($command eq 'displaymath') {
- $text = "\$\$".$text."\$\$";
+ # no handled command, nothing to do
+ warn "# l2h: no handled commands\n" if ($verbose);
+ return 0;
}
- $to_latex_count++;
- $text =~ s/(\s*)$//;
- # try whether we have text already on things to do
- my $count = $l2h_to_latex{$text};
- unless ($count) {
- $latex_count++;
- $count = $latex_count;
- # try whether we can get it from cache
- my $cached_text = l2h_from_cache($self, $text);
- if (defined($cached_text)) {
- $cached_count++;
- # put the cached result in the html result array
- $l2h_from_html[$count] = $cached_text;
- } else {
- $latex_converted_count++;
- unless ($self->get_conf('L2H_SKIP')) {
- print L2H_LATEX "\\begin{rawhtml}\n\n";
- print L2H_LATEX "<!-- l2h_begin $l2h_name $count -->\n";
- print L2H_LATEX "\\end{rawhtml}\n";
-
- print L2H_LATEX "$text\n";
+ $latex_to_convert_count = scalar(@latex_text_indices_to_convert);
+
+ $latex_commands_count = $texinfo_command_index;
+ my $reused = $latex_commands_count - $latex_to_convert_count - $cached_count;
+ warn "# l2h: to latex ($cached_count cached, $reused reused,
$latex_to_convert_count to process)\n" if ($verbose);
+
+ # when there are tex constructs to convert (not everything
+ # comes from the cache)
+ if ($latex_to_convert_count > 0) {
+ unless ($self->get_conf('L2H_SKIP')) {
+ my $l2h_to_latex_status
+ = l2h_to_latex($self, $l2h_latex_path_string, $l2h_latex_path_name,
+ \@latex_text_indices_to_convert, \@latex_texts);
+ return 1 unless ($l2h_to_latex_status);
+
+ # the non equality of $latex_converted_count and $latex_to_convert_count
+ # is the preferred indicator of skipping this stage (and possible failure
+ # but in case of failure, the converter normally aborts).
+ $latex_converted_count = $latex_to_convert_count;
+ }
- print L2H_LATEX "\\begin{rawhtml}\n";
- print L2H_LATEX "<!-- l2h_end $l2h_name $count -->\n\n";
- print L2H_LATEX "\\end{rawhtml}\n";
+ if ($latex_converted_count > 0) {
+ my $l2h_to_html_status = l2h_to_html($self, $l2h_latex_path_string,
+ $l2h_latex_path_name);
+ return 1 unless($l2h_to_html_status);
+
+ my @html_retrieved_text_indices = l2h_retrieve_from_html($self,
+ $l2h_html_file_name);
+ $html_converted_count = scalar(@html_retrieved_text_indices);
+ # Not the same number of converted elements and retrieved elements.
+ if ($latex_converted_count != $html_converted_count) {
+ # unless latex2html somewhat mangles the output this cannot
+ # actually happen, so it could also be presented as a bug.
+ $self->document_warn($self, sprintf(__(
+ "latex2html.pm: processing produced %d items in HTML; expected %d"),
+ $html_converted_count, $latex_converted_count));
}
+ # It could be checked, in addition, that @html_retrieved_text_indices
+ # contains the same indices as @latex_text_indices_to_convert.
+ warn "# l2h: retrieved converted $html_converted_count of
$latex_texts_count html contents\n"
+ if ($verbose);
+ } else {
+ warn "# l2h: skipping latex2html run\n" if ($verbose);
}
- $l2h_to_latex[$count] = $text;
- $l2h_to_latex{$text} = $count;
- }
- $global_count{"${command}_$counter"} = $count;
-}
-
-# print closing into latex file and close it
-sub l2h_finish_to_latex($)
-{
- my $self = shift;
- my $reused = $to_latex_count - $latex_converted_count - $cached_count;
- unless ($self->get_conf('L2H_SKIP')) {
- print L2H_LATEX $l2h_latex_closing;
- # FIXME error condition not checked
- close (L2H_LATEX);
- }
- warn "# l2h: finished to latex ($cached_count cached, $reused reused,
$latex_converted_count to process)\n" if ($verbose);
- if ($latex_count == 0) {
- # no @tex nor @math at all, nothing to do
- return -1;
+ } else {
+ warn "# l2h: no latex2html run needed\n" if ($verbose);
}
return 0;
}
@@ -344,22 +344,21 @@ sub l2h_finish_to_latex($)
###################################
# Use latex2html to generate corresponding html code and images
#
-# to_html():
+# l2h_to_html():
# Call latex2html on $l2h_latex_path_string
-# Put images (prefixed with $l2h_name."_") and html file(s) in $l2h_html_dir
+# Put images (prefixed with $l2h_name."_") and html file(s) in
$destination_directory_string
# Return 1, on success
# 0, otherwise
#
-sub l2h_to_html($)
+sub l2h_to_html($$$)
{
my $self = shift;
+ my $l2h_latex_path_string = shift;
+ my $l2h_latex_path_name = shift;
+
+ my $l2h_prefix = "${l2h_name}_";
+
my $dotbug;
- # when there are no tex constructs to convert (happens in case everything
- # comes from the cache), there is no latex2html run
- if ($self->get_conf('L2H_SKIP') or ($latex_converted_count == 0)) {
- warn "# l2h: skipping latex2html run\n" if ($verbose);
- return 0;
- }
# Check for dot in directory where dvips will work
if ($self->get_conf('L2H_TMP')) {
if ($self->get_conf('L2H_TMP') =~ /\./) {
@@ -374,13 +373,13 @@ sub l2h_to_html($)
$dotbug = 1;
}
}
- #return 1 if ($dotbug);
my $latex2html_command = $self->get_conf('L2H_L2H');
if (not defined($latex2html_command) or $latex2html_command !~ /\S/) {
$self->document_error($self, __("l2h: command not set"));
- return 1;
+ return 0;
}
+
# the final call is obtained by concatenating $call_start encoded
# and strings based on already encoded file paths.
my $call_start = $latex2html_command;
@@ -422,30 +421,30 @@ sub l2h_to_html($)
} else {
$encoded_call_start = $call_start;
}
+ my $l2h_prefix_string = encode('UTF-8', $l2h_prefix);
# concatenante strings containing already encoded file paths
my $encoded_call = $encoded_call_start . $encoded_destination_dir_option
." -prefix $l2h_prefix_string $l2h_latex_path_string";
my $call = $call_start . $destination_dir_option
." -prefix $l2h_prefix $l2h_latex_path_name";
+
warn "# l2h: executing '$encoded_call'\n" if ($verbose);
if (system($encoded_call)) {
$self->document_error($self, sprintf(__("l2h: command did not succeed:
%s"),
$call));
- return 1;
- } else {
- warn "# l2h: latex2html finished successfully\n" if ($verbose);
return 0;
+ } else {
+ warn "# l2h: latex2html terminated successfully\n" if ($verbose);
+ return 1;
}
}
##########################
# Third stage: Extract generated contents from latex2html run
-# Initialize with: init_from_html
+# Retrieve with: l2h_retrieve_from_html
# open $l2h_html_path_string for reading
-# reads in contents into array indexed by numbers
-# return 1, on success -- 0, otherwise
-# Finish with: finish
-# closes $l2h_html_dir/$l2h_name.".$docu_ext"
+# reads in contents into array indexed by text indices
+# return the indices of retrieved fragments
# the images generated by latex2html have names like ${docu_name}_l2h_img?.png
@@ -500,7 +499,7 @@ sub l2h_change_image_file_names($$)
my $file_dest
= File::Spec->catfile($destination_directory, $dest);
my $encoded_dest = Encode::encode('UTF-8', $dest);
- my $encoded_file_dest =
File::Spec->catfile($destination_directory_string,
+ my $encoded_file_dest =
File::Spec->catfile($destination_directory_string,
$encoded_dest);
if ($debug) {
copy($encoded_file_src, $encoded_file_dest);
@@ -518,47 +517,47 @@ sub l2h_change_image_file_names($$)
return $content;
}
-sub l2h_init_from_html($)
+sub l2h_retrieve_from_html($$)
{
my $self = shift;
- # when there are no tex constructs to convert (happens in case everything
- # comes from the cache), the html file that was generated by previous
- # latex2html runs isn't reused.
- if ($latex_converted_count == 0) {
- return 0;
- }
+ my $l2h_html_file_name = shift;
+
+ my @html_retrieved_text_indices; # the text indices retrieved
+
+ my $l2h_html_path_name = File::Spec->catfile($destination_directory,
+ $l2h_html_file_name);
+ my $encoded_l2h_html_file_name = encode('UTF-8', $l2h_html_file_name);
+ my $l2h_html_path_string = File::Spec->catfile($destination_directory_string,
+ $encoded_l2h_html_file_name);
if (! open(L2H_HTML, "<$l2h_html_path_string")) {
$self->document_warn($self,
sprintf(__("l2h: could not open %s: %s"),
$l2h_html_path_name, $!));
- return 1;
+ return @html_retrieved_text_indices;
}
# the file content is UTF-8 encoded
binmode(L2H_HTML, ':utf8');
warn "# l2h: use $l2h_html_path_string as html file\n" if ($verbose);
- my $html_converted_count = 0; # number of html resulting texts
- # retrieved in the file
-
- my ($count, $h_line);
+ my ($latex_text_index, $h_line);
while ($h_line = <L2H_HTML>) {
if ($h_line =~ /!-- l2h_begin $l2h_name ([0-9]+) --/) {
- $count = $1;
+ $latex_text_index = $1;
my $h_content = '';
my $h_end_found = 0;
while ($h_line = <L2H_HTML>) {
- if ($h_line =~ /!-- l2h_end $l2h_name $count --/) {
+ if ($h_line =~ /!-- l2h_end $l2h_name $latex_text_index --/) {
$h_end_found = 1;
chomp $h_content;
chomp $h_content;
- $html_converted_count++;
+ push @html_retrieved_text_indices, $latex_text_index;
# transform image file names and copy image files
$h_content = l2h_change_image_file_names($self, $h_content);
# store result in the html result array
- $l2h_from_html[$count] = $h_content;
+ $l2h_from_html[$latex_text_index] = $h_content;
# also add the result in cache hash
- $l2h_cache{$l2h_to_latex[$count]} = $h_content;
+ $l2h_cache{$latex_texts[$latex_text_index]} = $h_content;
last;
}
$h_content = $h_content.$h_line;
@@ -566,33 +565,20 @@ sub l2h_init_from_html($)
unless ($h_end_found) {
# couldn't found the closing comment. Should be a bug.
$self->document_warn($self,
- sprintf(__("latex2html.pm: end of \@%s item %d not found"),
- $l2h_name, $count));
- close(L2H_HTML);
- return 1;
+ sprintf(__("latex2html.pm: end of \@%s text %d not found"),
+ $l2h_name, $latex_text_index));
+ last;
}
}
}
-
- # Not the same number of converted elements and retrieved elements
- if ($latex_converted_count != $html_converted_count) {
- $self->document_warn($self, sprintf(__(
- "latex2html.pm: processing produced %d items in HTML; expected %d, the
number of items found in the document"),
- $html_converted_count, $latex_converted_count));
- }
-
- warn "# l2h: Got $html_converted_count of $latex_count html contents\n"
- if ($verbose);
-
+ # FIXME error/warning if close fails
close(L2H_HTML);
- return 0;
+ return @html_retrieved_text_indices;
}
-# $html_output_count = 0; # html text outputed in html result file
-
-# called each time a construct handled by latex2html is encountered, should
+# called each time an element handled by latex2html is encountered, should
# output the corresponding html
-sub l2h_do_tex($$$;$$)
+sub l2h_convert_command($$$;$$)
{
my $self = shift;
my $cmdname = shift;;
@@ -600,24 +586,26 @@ sub l2h_do_tex($$$;$$)
my $args = shift;
my $content = shift;
- my $counter = $commands_counters{$command};
- return '' unless ($status == 0);
- my $count = $global_count{"${cmdname}_$counter"};
+ my $command_count = $commands_counters{$command};
+ my $latex_text_index = $commands_text_index{$command};
################################## begin debug section (incorrect counts)
- if (!defined($count)) {
+ if (!defined($command_count)) {
+ $self->present_bug_message("l2h: conversion of ${cmdname}, undef
command_count");
+ $command_count = -1;
+ }
+ if (!defined($latex_text_index)) {
# counter is undefined
- $invalid_counter_count++;
- $self->document_warn($self,
- sprintf(__("l2h: could not determine the fragment %d for \@%s"),
- $counter, $cmdname));
- return ("<!-- l2h: ". __LINE__ . " undef count for ${cmdname}_$counter
-->")
+ $invalid_text_index_count++;
+ $self->present_bug_message(
+ "l2h: could not determine the fragment $command_count, for
\@$cmdname");
+ return ("<!-- l2h: ". __LINE__ . " undef count for ${cmdname}
$command_count -->")
if ($debug);
return '';
- } elsif(($count <= 0) or ($count > $latex_count)) {
+ } elsif(($latex_text_index <= 0) or ($latex_text_index >
$latex_texts_count)) {
# counter out of range
- $invalid_counter_count++;
- $self->present_bug_message("l2h: request of $count out of range
[0,$latex_count]");
- return ("<!-- l2h: ". __LINE__ . " out of range count $count -->")
+ $invalid_text_index_count++;
+ $self->present_bug_message("l2h: request of $latex_text_index out of range
[0,$latex_texts_count]");
+ return ("<!-- l2h: ". __LINE__ . " out of range index $latex_text_index
-->")
if ($debug);
return '';
}
@@ -625,23 +613,34 @@ sub l2h_do_tex($$$;$$)
# this seems to be a valid counter
my $result = '';
- $result = "<!-- l2h_begin $l2h_name $count -->" if ($debug);
- if (defined($l2h_from_html[$count])) {
+ $result = "<!-- l2h_begin $l2h_name $latex_text_index -->" if ($debug);
+ if (defined($l2h_from_html[$latex_text_index])) {
$html_output_count++;
- $result .= $l2h_from_html[$count];
+ $result .= $l2h_from_html[$latex_text_index];
$result .= "\n" if ($cmdname eq 'tex');
} else {
- # if the result is not in @l2h_from_html, there is an error somewhere.
+ # if the result is not in @l2h_from_html, it should in general mean that
+ # the conversion was skipped, as failures in general cause the converter
+ # to abort. It could also happen if latex2html somehow mangled the output.
$extract_error_count++;
- $self->document_warn($self, sprintf(__(
- "l2h: could not extract the fragment %d for \@%s with output counter %d
from HTML"),
- $counter, $cmdname, $count));
- # try simple (ordinary) substitution (without l2h)
+ # Expected error if the conversion to html failed or was skipped,
+ # additional warning only if the conversion seems to have proceeded
normally.
+ if ($latex_converted_count == $latex_to_convert_count
+ and $latex_converted_count == $html_converted_count) {
+ # it could also probably be marked as a bug as there is no situation
+ # in which this could happen given the check on succeeding conversion.
+ $self->document_warn($self, sprintf(__(
+ "l2h: could not extract the fragment %d for \@%s, text %d, from HTML"),
+ $command_count, $cmdname, $latex_text_index));
+ } elsif ($verbose) {
+ warn "# l2h: incomplete l2h. No conversion command $command_count
\@$cmdname, text index $latex_text_index\n";
+ }
+ # simple (ordinary) substitution (without l2h)
$result .= "<!-- l2h: ". __LINE__ . " use default -->" if ($debug);
$result .= &{$self->default_command_conversion($cmdname)}($self,
$cmdname, $command, $args,
$content);
}
- $result .= "<!-- l2h_end $l2h_name $count -->" if ($debug);
+ $result .= "<!-- l2h_end $l2h_name $latex_text_index -->" if ($debug);
return $result;
}
@@ -649,24 +648,33 @@ sub l2h_do_tex($$$;$$)
sub l2h_finish($)
{
my $self = shift;
- # return immediately if nothing to do or an error
- return 0 unless ($status == 0);
if ($verbose) {
- if ($extract_error_count + $invalid_counter_count) {
- warn "# l2h: finished from html ($extract_error_count extract and
$invalid_counter_count invalid counter errors)\n";
+ if ($extract_error_count + $invalid_text_index_count) {
+ warn "# l2h: finish ($extract_error_count extract errors,
$invalid_text_index_count invalid index errors)\n";
} else {
- warn "# l2h: finished from html (no error)\n";
+ warn "# l2h: finish (no error)\n";
}
if ($html_output_count != $latex_converted_count) {
+ # this happens if texts are reused, or cache is used,
+ # and if commands are not expanded later.
+ warn "# l2h: $html_output_count html outputed for $latex_converted_count
converted\n";
+ }
+ if ($html_output_count != $latex_commands_count) {
# this may happen if @-commands are collected at some places
# but @-command at those places are not expanded later. For
- # example @math on @multitable lines.
- warn "# l2h: $html_output_count html outputed for $latex_converted_count
converted\n";
+ # example @math on @multitable lines, or in @copying.
+ warn "# l2h: $html_output_count html outputed for $latex_commands_count
collected\n";
}
}
+
+ # return in case of error or, more likely, skipped run, as the
+ # errors cause the converter to abort.
+ return 0 if ($latex_converted_count != $latex_to_convert_count
+ or $latex_converted_count != $html_converted_count);
+
l2h_store_cache($self);
- if ($self->get_conf('L2H_CLEAN')) {
+ if ($self->get_conf('L2H_CLEAN') and $latex_converted_count > 0) {
warn "# l2h: removing temporary files generated by l2h extension\n"
if ($verbose);
my $quoted_l2h_name = quotemeta($l2h_name);
@@ -683,14 +691,14 @@ sub l2h_finish($)
}
}
}
- warn "# l2h: Finished\n" if $verbose;
+ warn "# l2h: end\n" if $verbose;
return 0;
}
##############################
# stuff for l2h caching
#
-# FIXME it is clear that l2h stuff takes very long compared with texi2any
+# FIXME it is clear that l2h stuff can take very long compared with texi2any
# which is already quite long. However this also adds some complexity
# It was originally tried with a dbm data base, but it did not store all
@@ -727,14 +735,15 @@ sub l2h_init_cache($)
}
}
}
- warn "# l2h: Cached: ".join('|', sort(keys(%l2h_cache)))."\n" if $verbose;
+ warn "# l2h: Cached: ".join('|', sort(keys(%l2h_cache)))."\n"
+ if ($verbose and scalar(keys(%l2h_cache)));
}
# store all the text obtained through latex2html
sub l2h_store_cache($)
{
my $self = shift;
- return unless $latex_count;
+ return unless $latex_texts_count;
my ($key, $value);
my ($encoded_l2h_cache_path_name, $l2h_cache_path_encoding)
= $self->encoded_output_file_name($l2h_cache_path_name);
@@ -763,8 +772,10 @@ sub l2h_store_cache($)
print FH "\n\$l2h_cache_key = q/$key/;\n";
print FH "\$l2h_cache{\$l2h_cache_key} = q|$value|;\n";
}
- print FH 'warn "# cache: Cached: ".join("|",
sort(keys(%l2h_cache)))."\n"'.";\n"
- if ($verbose);
+ # this can be used when debugging, but otherwise this is not such a
+ # good idea, as it will be read by the next run
+ #print FH 'warn "# cache: Cached: ".join("|",
sort(keys(%l2h_cache)))."\n"'.";\n"
+ # if ($verbose);
print FH "\n1;\n";
# FIXME error condition not checked
close(FH);
diff --git
"a/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii_res/encod\303\251/tex_complex_l2h.tex"
"b/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii_res/encod\303\251/tex_complex_l2h.tex"
index 8431099b54..800f0d5287 100644
---
"a/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii_res/encod\303\251/tex_complex_l2h.tex"
+++
"b/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii_res/encod\303\251/tex_complex_l2h.tex"
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git
"a/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii_res/encod\303\251/tex_encod\303\251_utf8_l2h.tex"
"b/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii_res/encod\303\251/tex_encod\303\251_utf8_l2h.tex"
index 62abee15c8..a0b99e8e8c 100644
---
"a/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii_res/encod\303\251/tex_encod\303\251_utf8_l2h.tex"
+++
"b/tp/tests/many_input_files/tex_l2h_output_dir_non_ascii_res/encod\303\251/tex_encod\303\251_utf8_l2h.tex"
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/many_input_files/tex_l2h_res/tex_complex_l2h.tex
b/tp/tests/many_input_files/tex_l2h_res/tex_complex_l2h.tex
index 8431099b54..800f0d5287 100644
--- a/tp/tests/many_input_files/tex_l2h_res/tex_complex_l2h.tex
+++ b/tp/tests/many_input_files/tex_l2h_res/tex_complex_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/many_input_files/tex_l2h_res/tex_l2h.tex
b/tp/tests/many_input_files/tex_l2h_res/tex_l2h.tex
index 347f95d2a5..646a0d5bc2 100644
--- a/tp/tests/many_input_files/tex_l2h_res/tex_l2h.tex
+++ b/tp/tests/many_input_files/tex_l2h_res/tex_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/tex_html/res_parser/block_EOL_tex/block_EOL_l2h.tex
b/tp/tests/tex_html/res_parser/block_EOL_tex/block_EOL_l2h.tex
index b745d7f0fb..4d1102e89b 100644
--- a/tp/tests/tex_html/res_parser/block_EOL_tex/block_EOL_l2h.tex
+++ b/tp/tests/tex_html/res_parser/block_EOL_tex/block_EOL_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/tex_html/res_parser/formatting_singular/sing_l2h.tex
b/tp/tests/tex_html/res_parser/formatting_singular/sing_l2h.tex
index 5bd2dd76e6..e4b6fb9525 100644
--- a/tp/tests/tex_html/res_parser/formatting_singular/sing_l2h.tex
+++ b/tp/tests/tex_html/res_parser/formatting_singular/sing_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git
a/tp/tests/tex_html/res_parser/math_not_closed/math_not_closed_l2h.tex
b/tp/tests/tex_html/res_parser/math_not_closed/math_not_closed_l2h.tex
index b038ffce4e..d0f4b4b566 100644
--- a/tp/tests/tex_html/res_parser/math_not_closed/math_not_closed_l2h.tex
+++ b/tp/tests/tex_html/res_parser/math_not_closed/math_not_closed_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/tex_html/res_parser/tex/tex_l2h.tex
b/tp/tests/tex_html/res_parser/tex/tex_l2h.tex
index 347f95d2a5..646a0d5bc2 100644
--- a/tp/tests/tex_html/res_parser/tex/tex_l2h.tex
+++ b/tp/tests/tex_html/res_parser/tex/tex_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/tex_html/res_parser/tex_accents_l2h/tex_accents_l2h.tex
b/tp/tests/tex_html/res_parser/tex_accents_l2h/tex_accents_l2h.tex
index e1de1c60be..4b18e17c14 100644
--- a/tp/tests/tex_html/res_parser/tex_accents_l2h/tex_accents_l2h.tex
+++ b/tp/tests/tex_html/res_parser/tex_accents_l2h/tex_accents_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/tex_html/res_parser/tex_complex_l2h/tex_complex_l2h.tex
b/tp/tests/tex_html/res_parser/tex_complex_l2h/tex_complex_l2h.tex
index 8431099b54..800f0d5287 100644
--- a/tp/tests/tex_html/res_parser/tex_complex_l2h/tex_complex_l2h.tex
+++ b/tp/tests/tex_html/res_parser/tex_complex_l2h/tex_complex_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git
a/tp/tests/tex_html/res_parser/tex_encoded_latin1_l2h/tex_encode_latin1_l2h.tex
b/tp/tests/tex_html/res_parser/tex_encoded_latin1_l2h/tex_encode_latin1_l2h.tex
index 4ff4ba6c72..1e8dbf886f 100644
---
a/tp/tests/tex_html/res_parser/tex_encoded_latin1_l2h/tex_encode_latin1_l2h.tex
+++
b/tp/tests/tex_html/res_parser/tex_encoded_latin1_l2h/tex_encode_latin1_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git
"a/tp/tests/tex_html/res_parser/tex_encoded_utf8_l2h/tex_encod\303\251_utf8_l2h.tex"
"b/tp/tests/tex_html/res_parser/tex_encoded_utf8_l2h/tex_encod\303\251_utf8_l2h.tex"
index 62abee15c8..a0b99e8e8c 100644
---
"a/tp/tests/tex_html/res_parser/tex_encoded_utf8_l2h/tex_encod\303\251_utf8_l2h.tex"
+++
"b/tp/tests/tex_html/res_parser/tex_encoded_utf8_l2h/tex_encod\303\251_utf8_l2h.tex"
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/tex_html/res_parser/tex_eqalign_l2h/tex_eqalign_l2h.tex
b/tp/tests/tex_html/res_parser/tex_eqalign_l2h/tex_eqalign_l2h.tex
index 2136817a7e..8d70120033 100644
--- a/tp/tests/tex_html/res_parser/tex_eqalign_l2h/tex_eqalign_l2h.tex
+++ b/tp/tests/tex_html/res_parser/tex_eqalign_l2h/tex_eqalign_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/tex_html/res_parser/tex_gdef_l2h/tex_gdef_l2h.tex
b/tp/tests/tex_html/res_parser/tex_gdef_l2h/tex_gdef_l2h.tex
index 8e1f858157..4f61f5c32f 100644
--- a/tp/tests/tex_html/res_parser/tex_gdef_l2h/tex_gdef_l2h.tex
+++ b/tp/tests/tex_html/res_parser/tex_gdef_l2h/tex_gdef_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/tex_html/res_parser/tex_in_copying/tex_in_copying_l2h.tex
b/tp/tests/tex_html/res_parser/tex_in_copying/tex_in_copying_l2h.tex
index 673333b6fe..0f21e3b80d 100644
--- a/tp/tests/tex_html/res_parser/tex_in_copying/tex_in_copying_l2h.tex
+++ b/tp/tests/tex_html/res_parser/tex_in_copying/tex_in_copying_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}
diff --git a/tp/tests/tex_html/res_parser/tex_not_closed/tex_not_closed_l2h.tex
b/tp/tests/tex_html/res_parser/tex_not_closed/tex_not_closed_l2h.tex
index 5a848b6cd8..01d9e28173 100644
--- a/tp/tests/tex_html/res_parser/tex_not_closed/tex_not_closed_l2h.tex
+++ b/tp/tests/tex_html/res_parser/tex_not_closed/tex_not_closed_l2h.tex
@@ -1,4 +1,4 @@
-% This document was automatically generated by the l2h extenstion of texi2html
+% Automatically generated by Texinfo HTML l2h extension
% DO NOT EDIT !!!
\documentclass{article}
\usepackage{html}