groff-commit
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[groff] 03/05: Allow utf8 in web links, allows IDN links.


From: Deri James
Subject: [groff] 03/05: Allow utf8 in web links, allows IDN links.
Date: Mon, 28 Aug 2023 13:50:06 -0400 (EDT)

deri pushed a commit to branch deri-gropdf-ng
in repository groff.

commit fddc79906cc8f9eba71fc17ff985a90275bdca90
Author: Deri James <deri@chuzzlewit.myzen.co.uk>
AuthorDate: Mon Aug 28 15:17:53 2023 +0100

    Allow utf8 in web links, allows IDN links.
    
    Fix for 'pdfhref-whack.groff' in #64576
---
 src/devices/gropdf/gropdf.pl | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/src/devices/gropdf/gropdf.pl b/src/devices/gropdf/gropdf.pl
index 37f13d20d..52e0953d4 100644
--- a/src/devices/gropdf/gropdf.pl
+++ b/src/devices/gropdf/gropdf.pl
@@ -1499,6 +1499,7 @@ sub do_x
                     FixRect($annot->{DATA}->{Rect}); # Y origin to ll
                     FixPDFColour($annot->{DATA});
                    $annot->{DATA}->{Dest}=UTFName($annot->{DATA}->{Dest}) if 
exists($annot->{DATA}->{Dest});
+                   
$annot->{DATA}->{A}->{URI}=URIName($annot->{DATA}->{A}->{URI}) if 
exists($annot->{DATA}->{A}->{URI});
                     push(@PageAnnots,$annotno);
                 }
                 elsif ($pdfmark=~m/(.+) \/OUT\s*$/)
@@ -1966,7 +1967,20 @@ sub do_x
     }
 }
 
-sub utf16
+sub URIName
+{
+    my $s=shift;
+
+    $s=Clean($s);
+    $s=~s/\\\[u((?i)D[89AB]\p{AHex}{2})\] # High surrogate in range 
0xD800–0xDBFF
+              \\\[u((?i)D[CDEF]\p{AHex}{2})\] #  Low surrogate in range 
0xDC00–0xDFFF
+             /chr( ((hex($1) - 0xD800) * 0x400) + (hex($2) - 0xDC00) + 0x10000 
)/xge;
+    $s=~s/\\\[u(\p{AHex}{4})]/chr hex $1/ge;
+
+    return(join '', map {(m/[-\w.~_]/)?chr($_):'%'.sprintf("%02X", $_)} unpack 
"C*", encode('utf8',$s));
+}
+
+sub Clean
 {
     my $p=shift;
 
@@ -1986,6 +2000,13 @@ sub utf16
 
     $p=~s/\\\((\w\w)/\\\[$1\]/g;        # convert \(xx to \[xx]
 
+    return $p;
+}
+
+sub utf16
+{
+    my $p=Clean(shift);
+
     $p=~s/\\\[(.*?)\]/FindChr($1,0)/eg;
     $p=~s/\\C($parcln)/FindChr($1,1)/eg;
 #     $p=~s/\\\((..)/FindChr($1)/eg;
@@ -2111,6 +2132,7 @@ sub PutHotSpot
     FixPDFColour($annot->{DATA});
     FixRect($annot->{DATA}->{Rect}); # Y origin to ll
     $annot->{DATA}->{Dest}=UTFName($annot->{DATA}->{Dest}) if 
exists($annot->{DATA}->{Dest});
+    $annot->{DATA}->{A}->{URI}=URIName($annot->{DATA}->{A}->{URI}) if 
exists($annot->{DATA}->{A});
     push(@PageAnnots,$annotno);
 }
 
@@ -2696,16 +2718,21 @@ sub nextwd
 
     if ($wd=~m/^(.*?)(<<|>>|(?:(?<!\\)\[|\]))(.*)/)
     {
-        if (defined($1) and length($1))
+        my ($p1,$p2,$p3)=($1,$2,$3);
+
+        if (defined($p1) and length($p1))
         {
-            unshift(@{$pdfwds},$3) if defined($3) and length($3);
-            unshift(@{$pdfwds},$2);
-            $wd=$1;
+            if (!($p2 eq ']' and $p1=~m/\[/))
+            {
+                unshift(@{$pdfwds},$p3) if defined($p3) and length($p3);
+                unshift(@{$pdfwds},$p2);
+                $wd=$p1;
+            }
         }
         else
         {
-            unshift(@{$pdfwds},$3) if defined($3) and length($3);
-            $wd=$2;
+            unshift(@{$pdfwds},$p3) if defined($p3) and length($p3);
+            $wd=$p2;
         }
     }
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]