guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

01/02: utils: Allow text substitution even in the presence of NUL charac


From: guix-commits
Subject: 01/02: utils: Allow text substitution even in the presence of NUL characters.
Date: Fri, 8 Jan 2021 14:11:07 -0500 (EST)

apteryx pushed a commit to branch core-updates
in repository guix.

commit 485ac28235bf8775914b905060266b09629c1bac
Author: Mark H Weaver <mhw@netris.org>
AuthorDate: Thu Jan 7 15:15:24 2021 -0500

    utils: Allow text substitution even in the presence of NUL characters.
    
    Fixes <https://issues.guix.gnu.org/30116>.
    
    Before this change, the presence of a NUL character on a line meant that
    the (glibc) regexp engine used by Guile would either 1. stop scanning the
    string or 2. crash with the error "string contains #\\nul character",
    depending on the locale used.
    
    This change works around this limitation by first replacing the NUL 
character
    by an unused Unicode code point, doing the substitution, then reverting the
    replacement.
    
    * guix/build/utils.scm (unused-private-use-code-point)
    (replace-char): New procedures.
    (substitute): Make use of the above procedures to work around the NUL
    character regexp engine limitation.
    * tests/build-utils.scm: Add tests.
    
    Co-authored-by: Maxim Cournoyer <maxim.cournoyer@gmail.com>
    Signed-off-by: Maxim Cournoyer <maxim.cournoyer@gmail.com>
---
 guix/build/utils.scm  | 57 +++++++++++++++++++++++++++++++++++++++++----------
 tests/build-utils.scm | 24 +++++++++++++++++++++-
 2 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/guix/build/utils.scm b/guix/build/utils.scm
index f0ea830..76180e6 100644
--- a/guix/build/utils.scm
+++ b/guix/build/utils.scm
@@ -2,7 +2,7 @@
 ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic 
Courtès <ludo@gnu.org>
 ;;; Copyright © 2013 Andreas Enge <andreas@enge.fr>
 ;;; Copyright © 2013 Nikita Karetnikov <nikita@karetnikov.org>
-;;; Copyright © 2015, 2018 Mark H Weaver <mhw@netris.org>
+;;; Copyright © 2015, 2018, 2021 Mark H Weaver <mhw@netris.org>
 ;;; Copyright © 2018 Arun Isaac <arunisaac@systemreboot.net>
 ;;; Copyright © 2018, 2019 Ricardo Wurmus <rekado@elephly.net>
 ;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il>
@@ -784,6 +784,31 @@ PROC's result is returned."
       (lambda (key . args)
         (false-if-exception (delete-file template))))))
 
+(define (unused-private-use-code-point s)
+  "Find a code point within a Unicode Private Use Area that is not
+present in S, and return the corresponding character object.  If one
+cannot be found, return false."
+  (define (scan lo hi)
+    (and (<= lo hi)
+         (let ((c (integer->char lo)))
+           (if (string-index s c)
+               (scan (+ lo 1) hi)
+               c))))
+  (or (scan   #xE000   #xF8FF)
+      (scan  #xF0000  #xFFFFD)
+      (scan #x100000 #x10FFFD)))
+
+(define (replace-char c1 c2 s)
+  "Return a string which is equal to S except with all instances of C1
+replaced by C2.  If C1 and C2 are equal, return S."
+  (if (char=? c1 c2)
+      s
+      (string-map (lambda (c)
+                    (if (char=? c c1)
+                        c2
+                        c))
+                  s)))
+
 (define (substitute file pattern+procs)
   "PATTERN+PROCS is a list of regexp/two-argument-procedure pairs.  For each
 line of FILE, and for each PATTERN that it matches, call the corresponding
@@ -802,16 +827,26 @@ end of a line; by itself it won't match the terminating 
newline of a line."
         (let loop ((line (read-line in 'concat)))
           (if (eof-object? line)
               #t
-              (let ((line (fold (lambda (r+p line)
-                                  (match r+p
-                                    ((regexp . proc)
-                                     (match (list-matches regexp line)
-                                       ((and m+ (_ _ ...))
-                                        (proc line m+))
-                                       (_ line)))))
-                                line
-                                rx+proc)))
-                (display line out)
+              ;; Work around the fact that Guile's regexp-exec does not handle
+              ;; NUL characters (a limitation of the underlying GNU libc's
+              ;; regexec) by temporarily replacing them by an unused private
+              ;; Unicode code point.
+              ;; TODO: Use SRFI-115 instead, once available in Guile.
+              (let* ((nul* (or (and (string-index line #\nul)
+                                    (unused-private-use-code-point line))
+                               #\nul))
+                     (line* (replace-char #\nul nul* line))
+                     (line1* (fold (lambda (r+p line)
+                                     (match r+p
+                                       ((regexp . proc)
+                                        (match (list-matches regexp line)
+                                          ((and m+ (_ _ ...))
+                                           (proc line m+))
+                                          (_ line)))))
+                                   line*
+                                   rx+proc))
+                     (line1 (replace-char nul* #\nul line1*)))
+                (display line1 out)
                 (loop (read-line in 'concat)))))))))
 
 
diff --git a/tests/build-utils.scm b/tests/build-utils.scm
index 654b480..31be7ff 100644
--- a/tests/build-utils.scm
+++ b/tests/build-utils.scm
@@ -1,6 +1,7 @@
 ;;; GNU Guix --- Functional package management for GNU
 ;;; Copyright © 2012, 2015, 2016, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
 ;;; Copyright © 2019 Ricardo Wurmus <rekado@elephly.net>
+;;; Copyright © 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -18,7 +19,7 @@
 ;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.
 
 
-(define-module (test-build-utils)
+(define-module (test build-utils)
   #:use-module (guix tests)
   #:use-module (guix build utils)
   #:use-module ((guix utils)
@@ -241,4 +242,25 @@ print('hello world')"))
                                            "/some/other/path")))
          #f)))))
 
+(test-equal "substitute*, text contains a NUL byte, UTF-8"
+  "c\0d"
+  (with-fluids ((%default-port-encoding "UTF-8")
+                (%default-port-conversion-strategy 'error))
+    ;; The GNU libc is locale sensitive.  Depending on the value of LANG, the
+    ;; test could fail with "string contains #\\nul character: ~S" or "cannot
+    ;; convert wide string to output locale".
+    (setlocale LC_ALL "en_US.UTF-8")
+    (call-with-temporary-output-file
+     (lambda (file port)
+       (format port "a\0b")
+       (flush-output-port port)
+
+       (substitute* file
+         (("a") "c")
+         (("b") "d"))
+
+       (with-input-from-file file
+         (lambda _
+           (get-string-all (current-input-port))))))))
+
 (test-end)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]