guile-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Fix `get-string-n!' &i/o-decoding exception behavior


From: Andreas Rottmann
Subject: [PATCH] Fix `get-string-n!' &i/o-decoding exception behavior
Date: Thu, 8 Nov 2012 00:51:48 +0100

Previously, `get-string-n!' from `(rnrs io ports)' would not throw the
exception required by R6RS, and could not easily do so due to being
implemented entirely in C.

This change fixes this by introducing a corresponding internal C
function reporting errors by return value and reimplementing the
`get-string-n!' in Scheme on top of that.  Along with `get-string-n!',
`get-string-n' gets fixed, inheriting the correct behavior.

* libguile/ports.c (scm_i_getc): New function, a version of `scm_getc'
  not using exceptions.
  (scm_getc): Implemented using `scm_i_getc'.
* libguile/ports.h (scm_i_getc): Add prototype marked SCM_INTERNAL.

* libguile/r6rs-ports.c (scm_i_get_string_n_x): Exception-free version
  of `get-string-n!', making use of `scm_i_getc'.
  (scm_get_string_n_x): Removed, now implemented in Scheme.

* module/ice-9/binary-ports.scm (get-string-n!): Removed from export
  list, it doesn't fit the module module purpose anyway.
* module/rnrs/io/ports.scm (%get-string-n): Newly defined by internal
  reference to `(ice-9 binary-ports)'.
  (get-string-n!): Implemented in Scheme on top of `%get-string-n!'.

* test-suite/tests/r6rs-ports.test ("8.2.9 Textual
  input")["read-error"]: Activate commented-out exception-behavior tests
  of `get-string-n!'.
  ["decoding error"]: New test prefix with tests for `get-char',
  `get-string-n!' and `get-string-n' and `get-line'.
---
 libguile/ports.c                 |   20 ++++++++++++++++----
 libguile/ports.h                 |    1 +
 libguile/r6rs-ports.c            |   21 +++++++++++----------
 module/ice-9/binary-ports.scm    |    6 +++---
 module/rnrs/io/ports.scm         |   14 ++++++++++++++
 test-suite/tests/r6rs-ports.test |   24 ++++++++++++++++++++----
 6 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/libguile/ports.c b/libguile/ports.c
index 55808e2..b653af4 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1392,12 +1392,10 @@ scm_t_wchar
 scm_getc (SCM port)
 #define FUNC_NAME "scm_getc"
 {
-  int err;
-  size_t len;
+  int err = 0;
   scm_t_wchar codepoint;
-  char buf[SCM_MBCHAR_BUF_SIZE];
 
-  err = get_codepoint (port, &codepoint, buf, &len);
+  codepoint = scm_i_getc (port, &err);
   if (SCM_UNLIKELY (err != 0))
     /* At this point PORT should point past the invalid encoding, as per
        R6RS-lib Section 8.2.4.  */
@@ -1407,6 +1405,20 @@ scm_getc (SCM port)
 }
 #undef FUNC_NAME
 
+/* Read a codepoint from PORT and return it.  This version reports
+   errors via the ERROR argument instead of via exceptions. */
+scm_t_wchar
+scm_i_getc (SCM port, int *error)
+{
+  size_t len;
+  scm_t_wchar codepoint;
+  char buf[SCM_MBCHAR_BUF_SIZE];
+
+  *error = get_codepoint (port, &codepoint, buf, &len);
+
+  return codepoint;
+}
+
 /* this should only be called when the read buffer is empty.  it
    tries to refill the read buffer.  it returns the first char from
    the port, which is either EOF or *(pt->read_pos).  */
diff --git a/libguile/ports.h b/libguile/ports.h
index d4d59b7..2f70056 100644
--- a/libguile/ports.h
+++ b/libguile/ports.h
@@ -281,6 +281,7 @@ SCM_API SCM scm_force_output (SCM port);
 SCM_API SCM scm_flush_all_ports (void);
 SCM_API SCM scm_read_char (SCM port);
 SCM_API scm_t_wchar scm_getc (SCM port);
+SCM_INTERNAL scm_t_wchar scm_i_getc (SCM port, int *error);
 SCM_API size_t scm_c_read (SCM port, void *buffer, size_t size);
 SCM_API void scm_c_write (SCM port, const void *buffer, size_t size);
 SCM_API void scm_lfwrite (const char *ptr, size_t size, SCM port);
diff --git a/libguile/r6rs-ports.c b/libguile/r6rs-ports.c
index e867429..bd10081 100644
--- a/libguile/r6rs-ports.c
+++ b/libguile/r6rs-ports.c
@@ -1242,18 +1242,17 @@ SCM_DEFINE (scm_i_make_transcoded_port,
 
 /* Textual I/O */
 
-SCM_DEFINE (scm_get_string_n_x,
-            "get-string-n!", 4, 0, 0,
+SCM_DEFINE (scm_i_get_string_n_x,
+            "%get-string-n!", 4, 0, 0,
             (SCM port, SCM str, SCM start, SCM count),
-            "Read up to @var{count} characters from @var{port} into "
-            "@var{str}, starting at @var{start}.  If no characters "
-            "can be read before the end of file is encountered, the end "
-            "of file object is returned.  Otherwise, the number of "
-            "characters read is returned.")
-#define FUNC_NAME s_scm_get_string_n_x
+            "Read up to @var{count} characters from @var{port} into @var{str}, 
"
+            "starting at @var{start}. Returns the end of file object, a list "
+            "containing an errno value, or the number of characters read.")
+#define FUNC_NAME s_scm_i_get_string_n_x
 {
   size_t c_start, c_count, c_len, c_end, j;
   scm_t_wchar c;
+  int err;
 
   SCM_VALIDATE_OPINPORT (1, port);
   SCM_VALIDATE_STRING (2, str);
@@ -1267,8 +1266,10 @@ SCM_DEFINE (scm_get_string_n_x,
 
   for (j = c_start; j < c_end; j++)
     {
-      c = scm_getc (port);
-      if (c == EOF)
+      c = scm_i_getc (port, &err);
+      if (SCM_UNLIKELY (err != 0))
+        return scm_list_1 (scm_from_int (err));
+      else if (SCM_UNLIKELY (c == EOF))
         {
           size_t chars_read = j - c_start;
           return chars_read == 0 ? SCM_EOF_VAL : scm_from_size_t (chars_read);
diff --git a/module/ice-9/binary-ports.scm b/module/ice-9/binary-ports.scm
index c07900b..3f7b9e6 100644
--- a/module/ice-9/binary-ports.scm
+++ b/module/ice-9/binary-ports.scm
@@ -37,14 +37,14 @@
             get-bytevector-n!
             get-bytevector-some
             get-bytevector-all
-            get-string-n!
             put-u8
             put-bytevector
             open-bytevector-output-port
             make-custom-binary-output-port))
 
-;; Note that this extension also defines %make-transcoded-port, which is
-;; not exported but is used by (rnrs io ports).
+;; Note that this extension also defines `%make-transcoded-port' and
+;; `%get-string-n!', which are not exported but used by `(rnrs io
+;; ports)'.
 
 (load-extension (string-append "libguile-" (effective-version))
                 "scm_init_r6rs_ports")
diff --git a/module/rnrs/io/ports.scm b/module/rnrs/io/ports.scm
index fddb491..fb8c795 100644
--- a/module/rnrs/io/ports.scm
+++ b/module/rnrs/io/ports.scm
@@ -445,6 +445,20 @@ return the characters accumulated in that port."
 (define (get-string-all port)
   (with-textual-input-conditions port (read-delimited "" port 'concat)))
 
+(define %get-string-n! (@@ (ice-9 binary-ports) %get-string-n!))
+
+(define (get-string-n! port str start count)
+  "Read up to @var{count} characters from @var{port} into @var{str},
+starting at @var{start}.  If no characters can be read before the end of
+file is encountered, the end of file object is returned.  Otherwise, the
+number of characters read is returned."
+  (with-i/o-port-error port make-i/o-read-error
+    (lambda ()
+      (let ((result (%get-string-n! port str start count)))
+        (if (pair? result)
+            (raise (make-i/o-decoding-error port))
+            result)))))
+
 (define (get-string-n port count)
   "Read up to @var{count} characters from @var{port}.
 If no characters could be read before encountering the end of file,
diff --git a/test-suite/tests/r6rs-ports.test b/test-suite/tests/r6rs-ports.test
index 46da67f..b1d0f25 100644
--- a/test-suite/tests/r6rs-ports.test
+++ b/test-suite/tests/r6rs-ports.test
@@ -633,6 +633,16 @@
       (binary-port? (standard-error-port)))))
 
 
+;; Create a textual input port in UTF-8 encoding reading from a byte
+;; vector containing an invalid UTF-8 sequence. Thus any read from that
+;; port should result in an &i/o-decoding-error condition being raised.
+(define (make-invalid-utf8-port)
+  (transcoded-port
+   (open-bytevector-input-port '#vu8(#xFE #xFF))
+   (make-transcoder (utf-8-codec)
+                    (native-eol-style)
+                    (error-handling-mode raise))))
+
 (with-test-prefix "8.2.6  Input and output ports"
 
   (pass-if "transcoded-port [output]"
@@ -724,19 +734,25 @@
       (get-char (make-failing-port)))
     (pass-if-condition "lookahead-char" i/o-read-error?
       (lookahead-char (make-failing-port)))
-    ;; FIXME: these are not yet exception-correct
-    #|
     (pass-if-condition "get-string-n" i/o-read-error?
       (get-string-n (make-failing-port) 5))
     (pass-if-condition "get-string-n!" i/o-read-error?
       (get-string-n! (make-failing-port) (make-string 5) 0 5))
-    |#
     (pass-if-condition "get-string-all" i/o-read-error?
       (get-string-all (make-failing-port 100)))
     (pass-if-condition "get-line" i/o-read-error?
       (get-line (make-failing-port)))
     (pass-if-condition "get-datum" i/o-read-error?
-      (get-datum (make-failing-port)))))
+      (get-datum (make-failing-port))))
+  (with-test-prefix "decoding error"
+    (pass-if-condition "get-char" i/o-decoding-error?
+      (get-char (make-invalid-utf8-port)))
+    (pass-if-condition "get-string-n" i/o-decoding-error?
+      (get-string-n (make-invalid-utf8-port) 2))
+    (pass-if-condition "get-string-n!" i/o-decoding-error?
+      (get-string-n! (make-invalid-utf8-port) (make-string 5) 0 5))
+    (pass-if-condition "get-line" i/o-decoding-error?
+      (get-line (make-invalid-utf8-port)))))
 
 (define (encoding-error-predicate char)
   (lambda (c)
-- 
1.7.10.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]