[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[groff] 03/05: Allow utf8 in web links, allows IDN links.
From: |
Deri James |
Subject: |
[groff] 03/05: Allow utf8 in web links, allows IDN links. |
Date: |
Mon, 28 Aug 2023 13:50:06 -0400 (EDT) |
deri pushed a commit to branch deri-gropdf-ng
in repository groff.
commit fddc79906cc8f9eba71fc17ff985a90275bdca90
Author: Deri James <deri@chuzzlewit.myzen.co.uk>
AuthorDate: Mon Aug 28 15:17:53 2023 +0100
Allow utf8 in web links, allows IDN links.
Fix for 'pdfhref-whack.groff' in #64576
---
src/devices/gropdf/gropdf.pl | 41 ++++++++++++++++++++++++++++++++++-------
1 file changed, 34 insertions(+), 7 deletions(-)
diff --git a/src/devices/gropdf/gropdf.pl b/src/devices/gropdf/gropdf.pl
index 37f13d20d..52e0953d4 100644
--- a/src/devices/gropdf/gropdf.pl
+++ b/src/devices/gropdf/gropdf.pl
@@ -1499,6 +1499,7 @@ sub do_x
FixRect($annot->{DATA}->{Rect}); # Y origin to ll
FixPDFColour($annot->{DATA});
$annot->{DATA}->{Dest}=UTFName($annot->{DATA}->{Dest}) if
exists($annot->{DATA}->{Dest});
+
$annot->{DATA}->{A}->{URI}=URIName($annot->{DATA}->{A}->{URI}) if
exists($annot->{DATA}->{A}->{URI});
push(@PageAnnots,$annotno);
}
elsif ($pdfmark=~m/(.+) \/OUT\s*$/)
@@ -1966,7 +1967,20 @@ sub do_x
}
}
-sub utf16
+sub URIName
+{
+ my $s=shift;
+
+ $s=Clean($s);
+ $s=~s/\\\[u((?i)D[89AB]\p{AHex}{2})\] # High surrogate in range
0xD800–0xDBFF
+ \\\[u((?i)D[CDEF]\p{AHex}{2})\] # Low surrogate in range
0xDC00–0xDFFF
+ /chr( ((hex($1) - 0xD800) * 0x400) + (hex($2) - 0xDC00) + 0x10000
)/xge;
+ $s=~s/\\\[u(\p{AHex}{4})]/chr hex $1/ge;
+
+ return(join '', map {(m/[-\w.~_]/)?chr($_):'%'.sprintf("%02X", $_)} unpack
"C*", encode('utf8',$s));
+}
+
+sub Clean
{
my $p=shift;
@@ -1986,6 +2000,13 @@ sub utf16
$p=~s/\\\((\w\w)/\\\[$1\]/g; # convert \(xx to \[xx]
+ return $p;
+}
+
+sub utf16
+{
+ my $p=Clean(shift);
+
$p=~s/\\\[(.*?)\]/FindChr($1,0)/eg;
$p=~s/\\C($parcln)/FindChr($1,1)/eg;
# $p=~s/\\\((..)/FindChr($1)/eg;
@@ -2111,6 +2132,7 @@ sub PutHotSpot
FixPDFColour($annot->{DATA});
FixRect($annot->{DATA}->{Rect}); # Y origin to ll
$annot->{DATA}->{Dest}=UTFName($annot->{DATA}->{Dest}) if
exists($annot->{DATA}->{Dest});
+ $annot->{DATA}->{A}->{URI}=URIName($annot->{DATA}->{A}->{URI}) if
exists($annot->{DATA}->{A});
push(@PageAnnots,$annotno);
}
@@ -2696,16 +2718,21 @@ sub nextwd
if ($wd=~m/^(.*?)(<<|>>|(?:(?<!\\)\[|\]))(.*)/)
{
- if (defined($1) and length($1))
+ my ($p1,$p2,$p3)=($1,$2,$3);
+
+ if (defined($p1) and length($p1))
{
- unshift(@{$pdfwds},$3) if defined($3) and length($3);
- unshift(@{$pdfwds},$2);
- $wd=$1;
+ if (!($p2 eq ']' and $p1=~m/\[/))
+ {
+ unshift(@{$pdfwds},$p3) if defined($p3) and length($p3);
+ unshift(@{$pdfwds},$p2);
+ $wd=$p1;
+ }
}
else
{
- unshift(@{$pdfwds},$3) if defined($3) and length($3);
- $wd=$2;
+ unshift(@{$pdfwds},$p3) if defined($p3) and length($p3);
+ $wd=$p2;
}
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [groff] 03/05: Allow utf8 in web links, allows IDN links.,
Deri James <=