skribilo-bugs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#54705: [PATCH] reader: Add Gemtext reader.


From: Arun Isaac
Subject: bug#54705: [PATCH] reader: Add Gemtext reader.
Date: Mon, 4 Apr 2022 14:57:42 +0530

* src/guile/skribilo/reader/gemtext.scm: New file.
* src/guile/Makefile.am (readers): Register it.
* doc/user/syntax.skb (The Gemtext Syntax): New section.
* tests/readers/gemtext.test: New file.
* tests/Makefile.am (TESTS): Add readers/gemtext.test.
---
 doc/user/syntax.skb                   |  21 ++-
 src/guile/Makefile.am                 |   3 +-
 src/guile/skribilo/reader/gemtext.scm | 231 ++++++++++++++++++++++++++
 tests/Makefile.am                     |   3 +-
 tests/readers/gemtext.test            | 133 +++++++++++++++
 5 files changed, 388 insertions(+), 3 deletions(-)
 create mode 100644 src/guile/skribilo/reader/gemtext.scm
 create mode 100644 tests/readers/gemtext.test

diff --git a/doc/user/syntax.skb b/doc/user/syntax.skb
index 9a4070c..2de7cbd 100644
--- a/doc/user/syntax.skb
+++ b/doc/user/syntax.skb
@@ -211,7 +211,26 @@ documents that can be output in variety of formats (see 
,(numref :text
 [Chapter] :ident "engines")).  The downside is that, being a very simple
 markup-less document format, there are many things that cannot be done
 using it, most notably tables, bibliographies, and cross-references.]))
-   
+
+   (section :title [The Gemtext Syntax] :ident "gemtext-syntax"
+     (p [,(ref
+:url "https://gemini.circumlunar.space/docs/gemtext.gmi";
+:text "Gemtext"), the lightweight markup language used by the ,(ref
+:url "https://gemini.circumlunar.space"; :text "Gemini protocol"), is
+supported as an input syntax. To use it, just pass ,(tt
+[--reader=gemtext]) to the compiler. When used programmatically, the
+Gemtext reader can be customized using the following options.])
+
+     (doc-markup 'make-gemtext-reader
+                 '((:join-lines? [If ,(code "#t"), lines which are not
+separated by a blank line are joined into a single paragraph. This is
+a relaxation of the Gemtext standard, and is not done by default.])
+                   (:section-numbers? [If ,(code "#t"), sections are
+numbered. Else, they are not.]))
+                 :common-args '()
+                 :source "skribilo/reader/gemtext.scm"
+                :idx *function-index*))
+
    (section :title [The RSS 2.0 Syntax]
             :ident "rss2-syntax"
       
diff --git a/src/guile/Makefile.am b/src/guile/Makefile.am
index 98f2873..0a66a88 100644
--- a/src/guile/Makefile.am
+++ b/src/guile/Makefile.am
@@ -47,7 +47,8 @@ SOURCES =                                                     
        \
 SOURCES += $(readers) $(packages) $(engines)
 
 readers =                                                      \
-  skribilo/reader/skribe.scm skribilo/reader/outline.scm
+  skribilo/reader/skribe.scm skribilo/reader/outline.scm       \
+  skribilo/reader/gemtext.scm
 
 if BUILD_RSS2_READER
 
diff --git a/src/guile/skribilo/reader/gemtext.scm 
b/src/guile/skribilo/reader/gemtext.scm
new file mode 100644
index 0000000..4ae403c
--- /dev/null
+++ b/src/guile/skribilo/reader/gemtext.scm
@@ -0,0 +1,231 @@
+;;; gemtext.scm  --  A reader for the Gemini protocol's Gemtext markup
+;;;
+;;; Copyright © 2022 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;;
+;;; This file is part of Skribilo.
+;;;
+;;; Skribilo is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation, either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; Skribilo is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with Skribilo.  If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (skribilo reader gemtext)
+  #:use-module (rnrs io ports)
+  #:use-module (srfi srfi-26)
+  #:use-module (srfi srfi-71)
+  #:use-module (srfi srfi-171)
+  #:use-module (ice-9 match)
+  #:use-module ((ice-9 textual-ports) #:select (unget-char unget-string))
+  #:use-module (skribilo reader)
+  #:use-module (skribilo utils syntax)
+  #:export (reader-specification
+            make-gemtext-reader))
+
+(skribilo-module-syntax)
+
+;;; Author: Arun Isaac
+;;;
+;;; Commentary:
+;;;
+;;; A reader for gemtext, the lightweight markup language used by the
+;;; Gemini protocol
+;;;
+;;; Code:
+
+(define %join-lines?
+  (make-parameter #f))
+
+(define %section-numbers?
+  (make-parameter #f))
+
+(define (string-blank? str)
+  "Return #t if STR contains only whitespace characters.  Else, return
+#f."
+  (string-every char-set:whitespace str))
+
+(define (string-remove-prefix prefix str)
+  "Return STR with PREFIX removed.  If PREFIX is not a prefix of STR,
+return #f."
+  (and (string-prefix? prefix str)
+       (substring str (string-length prefix))))
+
+(define (string-partition str char-pred)
+  "Return the part of STR before and after the first occurrence of
+CHAR-PRED as two values."
+  (let ((partition-index (string-index str char-pred)))
+    (if partition-index
+        (values (substring str 0 partition-index)
+                (substring str partition-index))
+        (values str #f))))
+
+(define (unget-line port line)
+  "Place the string LINE in PORT so that subsequent read operations
+will read LINE followed by a newline character."
+  (unget-char port #\newline)
+  (unget-string port line))
+
+(define (read-preformatted-text in out)
+  "Read preformatted text from port IN and write it to port OUT."
+  (let ((line (get-line in)))
+    (unless (or (eof-object? line)
+                (string-prefix? "```" line))
+      (put-string out line)
+      (newline out)
+      (read-preformatted-text in out))))
+
+(define (heading-level line)
+  "Return the level of the heading in LINE. If LINE is not a heading,
+return #f."
+  (cond
+   ((string-prefix? "### " line) 3)
+   ((string-prefix? "## " line) 2)
+   ((string-prefix? "# " line) 1)
+   (else #f)))
+
+(define (read-section-children level port)
+  "Read section elements of LEVEL from PORT. Return as a list."
+  (let ((line (get-line port)))
+    (cond
+     ;; End of file
+     ((eof-object? line) (list))
+     ;; If another heading of same or higher level begins, unget line
+     ;; and end section.
+     ((let ((heading-level (heading-level line)))
+        (and heading-level
+             (<= heading-level level)))
+      (unget-line port line)
+      (list))
+     ;; If blank line, continue.
+     ((string-blank? line)
+      (read-section-children level port))
+     ;; Else, add element and continue.
+     (else
+      (unget-line port line)
+      (cons (read-gemtext-element port)
+            (read-section-children level port))))))
+
+(define (paragraph-line? line)
+  "Return #t if LINE is a paragraph line. Else, return #f."
+  (not (or (string-blank? line)
+           (heading-level line)
+           (string-prefix? "* " line)
+           (string-prefix? ">" line)
+           (string-prefix? "=>" line)
+           (string-prefix? "```" line))))
+
+(define (link-line->ref line)
+  "Convert link LINE to a skribilo ref expression."
+  (let* ((trimmed-line (string-trim (string-remove-prefix "=>" line)))
+         (url text (string-partition trimmed-line (char-set #\space #\tab))))
+    (if text
+        `(ref #:url ,url #:text ,(string-trim text))
+        `(ref #:url ,url))))
+
+(define (retf-unget-line port result line)
+  "Unget LINE to PORT and return RESULT. This function is used as an
+argument to ttake-while."
+  (unget-line port line)
+  result)
+
+(define (read-gemtext-element port)
+  "Read next gemtext element from PORT."
+  (let ((line (get-line port)))
+    (cond
+     ;; End of file
+     ((eof-object? line) line)
+     ;; Section
+     ((heading-level line)
+      => (lambda (level)
+           `(,(case level
+                ((1) 'section)
+                ((2) 'subsection)
+                ((3) 'subsubsection))
+             #:title ,(substring line (1+ level))
+             #:number ,(%section-numbers?)
+             ,@(read-section-children level port))))
+     ;; List
+     ((string-remove-prefix "* " line)
+      => (lambda (first-item)
+           `(itemize
+             ,@(port-transduce (compose (ttake-while (cut string-prefix? "* " 
<>)
+                                                     (cut retf-unget-line port 
<> <>))
+                                        (tmap (lambda (line)
+                                                `(item ,(string-remove-prefix 
"* " line)))))
+                               rcons
+                               (list `(item ,first-item))
+                               get-line
+                               port))))
+     ;; Blockquote
+     ((string-remove-prefix ">" line)
+      => (lambda (first-line)
+           (list 'blockquote
+                 (if (%join-lines?)
+                     (string-join
+                      (port-transduce (compose (ttake-while (cut 
string-prefix? ">" <>)
+                                                            (cut 
retf-unget-line port <> <>))
+                                               (tmap (cut string-remove-prefix 
">" <>)))
+                                      rcons
+                                      (list first-line)
+                                      get-line
+                                      port)
+                      " ")
+                     line))))
+     ;; Link
+     ((string-prefix? "=>" line)
+      (cons 'paragraph
+            (port-transduce (compose (ttake-while (cut string-prefix? "=>" <>)
+                                                  (cut retf-unget-line port <> 
<>))
+                                     (tmap link-line->ref))
+                            rcons
+                            (list (link-line->ref line))
+                            get-line
+                            port)))
+     ;; Preformatted text
+     ((string-remove-prefix "```" line)
+      => (lambda (alt-text)
+           ;; We don't use the alt text.
+           `(pre ,(call-with-output-string
+                    (cut read-preformatted-text port <>)))))
+     ;; Ignore blank lines.
+     ((string-blank? line) (read-gemtext-element port))
+     ;; Paragraph
+     (else
+      (list 'paragraph
+            (if (%join-lines?)
+                (string-join
+                 (port-transduce (ttake-while paragraph-line?
+                                              (cut retf-unget-line port <> <>))
+                                 rcons
+                                 (list line)
+                                 get-line
+                                 port)
+                 " ")
+                line))))))
+
+(define* (make-gemtext-reader :key join-lines? section-numbers?)
+  "Return a gemtext reader.
+
+If JOIN-LINES? is #t, lines which are not separated by a blank line
+are joined into a single paragraph.
+
+If SECTION-NUMBERS? is #t, sections are numbered. Else, they are not."
+  (lambda (port)
+    (parameterize ((%join-lines? join-lines?)
+                   (%section-numbers? section-numbers?))
+      (match (port-transduce (tmap identity)
+                             rcons
+                             read-gemtext-element
+                             port)
+        (() (eof-object))
+        (elements `(document ,@elements))))))
+
+(define-reader gemtext "0.1" make-gemtext-reader)
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 8ba7637..16478a9 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -13,7 +13,8 @@ TESTS =                                               \
   ast.test                                     \
   resolve.test                                 \
   engines/info.test                            \
-  location.test
+  location.test                                        \
+  readers/gemtext.test
 
 if BUILD_RSS2_READER
 
diff --git a/tests/readers/gemtext.test b/tests/readers/gemtext.test
new file mode 100644
index 0000000..99891c8
--- /dev/null
+++ b/tests/readers/gemtext.test
@@ -0,0 +1,133 @@
+;;; Exercise Gemtext reader.                  -*- Scheme -*-
+;;;
+;;; Copyright © 2022 Arun Isaac <arunisaac@systemreboot.net>
+;;;
+;;;
+;;; This file is part of Skribilo.
+;;;
+;;; Skribilo is free software: you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation, either version 3 of the License, or
+;;; (at your option) any later version.
+;;;
+;;; Skribilo is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with Skribilo.  If not, see <http://www.gnu.org/licenses/>.
+
+(define-module (tests gemtext)
+  #:use-module (srfi srfi-64)
+  #:use-module (ice-9 match)
+  #:use-module (skribilo reader))
+
+(define make-gemtext-reader
+  (reader:make (lookup-reader 'gemtext)))
+
+(define-syntax-rule (match? exp pattern)
+  (match exp
+    (pattern #t)
+    (_ #f)))
+
+
+
+(test-begin "gemtext")
+
+(test-assert "basic gemtext document"
+  (match? (call-with-input-string "# Heading
+* Mercury
+* Gemini
+* Apollo
+## Subheading
+
+### Subsubheading
+
+> I contend that text-based websites should not exceed in size the major works 
of Russian literature.
+
+# Links
+
+=>https://example.com A cool website
+=>gopher://example.com      An even cooler gopherhole
+=> gemini://example.com A supremely cool Gemini capsule
+=>   sftp://example.com
+
+```
+This is a preformatted block.
+```
+
+```alt
+This is a preformatted block with \"alt text\".
+```"
+            (make-gemtext-reader))
+          `(document
+            (section #:title "Heading" #:number #f
+                     (itemize (item "Mercury")
+                              (item "Gemini")
+                              (item "Apollo"))
+                     (subsection #:title "Subheading" #:number #f
+                                 (subsubsection #:title "Subsubheading" 
#:number #f
+                                                (blockquote "> I contend that 
text-based websites should not exceed in size the major works of Russian 
literature."))))
+            (section #:title "Links" #:number #f
+                     (paragraph (ref #:url "https://example.com"; #:text "A 
cool website")
+                                (ref #:url "gopher://example.com"; #:text "An 
even cooler gopherhole")
+                                (ref #:url "gemini://example.com" #:text "A 
supremely cool Gemini capsule")
+                                (ref #:url "sftp://example.com";))
+                     (pre "This is a preformatted block.\n")
+                     (pre "This is a preformatted block with \"alt 
text\".\n")))))
+
+(test-assert "do not join short lines into paragraph"
+  (match? (call-with-input-string "Foo
+Bar"
+            (make-gemtext-reader))
+          `(document
+            (paragraph "Foo")
+            (paragraph "Bar"))))
+
+(test-assert "join short lines into paragraphs"
+  (match? (call-with-input-string "Foo
+Bar"
+            (make-gemtext-reader #:join-lines? #t))
+          `(document
+            (paragraph "Foo Bar"))))
+
+(test-assert "do not number sections"
+  (match? (call-with-input-string "# Foo
+## Bar"
+            (make-gemtext-reader))
+          `(document
+            (section #:title "Foo" #:number #f
+                     (subsection #:title "Bar" #:number #f)))))
+
+(test-assert "number sections"
+  (match? (call-with-input-string "# Foo
+## Bar"
+            (make-gemtext-reader #:section-numbers? #t))
+          `(document
+            (section #:title "Foo" #:number #t
+                     (subsection #:title "Bar" #:number #t)))))
+
+(test-assert "break up links separated by blank lines into paragraphs"
+  (match? (call-with-input-string "=>https://example.com A cool website
+=>gopher://example.com      An even cooler gopherhole
+
+=> gemini://example.com A supremely cool Gemini capsule
+=>   sftp://example.com";
+            (make-gemtext-reader))
+          `(document
+            (paragraph (ref #:url "https://example.com"; #:text "A cool 
website")
+                       (ref #:url "gopher://example.com"; #:text "An even 
cooler gopherhole"))
+            (paragraph (ref #:url "gemini://example.com" #:text "A supremely 
cool Gemini capsule")
+                       (ref #:url "sftp://example.com";)))))
+
+(test-assert "ignore blank lines that have a non-zero number of whitespace 
characters"
+  (match? (call-with-input-string "Foo
+  
+Bar"
+            (make-gemtext-reader))
+          `(document
+            (paragraph "Foo")
+            (paragraph "Bar"))))
+
+(test-end "gemtext")
-- 
2.34.0






reply via email to

[Prev in Thread] Current Thread [Next in Thread]