;;; combinatorix
;;;
;;; Copyright © 2018 Amirouche Boubekki
;;;
;;; This module is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Lesser General Public License
;;; as published by the Free Software Foundation; either version 3 of
;;; the License, or (at your option) any later version.
;;;
;;; This module is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;; Lesser General Public License for more details.
;;;
;;; You should have received a copy of the GNU Lesser General Public
;;; License along with this module. If not, see
;;; .
;;; Commentary:
;;
;; Parser combinators.
;;
;; TODO:
;;
;; - improve error handling
;;
;; Also see:
;;
;; - https://epsil.github.io/gll/
;; - https://docs.racket-lang.org/parsack/index.html
;; - https://docs.racket-lang.org/megaparsack/
;; - https://git.dthompson.us/guile-parser-combinators.git
;; - https://gitlab.com/tampe/stis-parser
;;
;;; Code:
(define-module (neon combinatorix))
(use-modules ((srfi srfi-9)))
(use-modules ((srfi srfi-9 gnu)))
(use-modules ((ice-9 match)))
(use-modules ((neon streams)))
;; test macro
(define-syntax-rule (test-check test-name expr expected)
(when (getenv "DEBUG")
(format #t "* ~a: " test-name)
(let ((expr* expr)
(expected* expected))
(if (equal? expr* expected*)
(format #t "PASS :)\n")
(begin
(format #t "FAILED :(\n")
(format #t "** expected: ~s\n" expected*)
(format #t "** found: ~s\n" expr*))))))
(define-record-type
(make-result value stream)
result?
(value result-value)
(stream result-stream))
(define-record-type
(make-error value parser args)
error?
(value error-value)
(parser error-parser)
(args error-args))
(define continue make-result)
(define (fail stream parser args)
(make-error (stream-car stream) parser args))
(define-record-type
(make-xchar char line column offset)
xchar?
(char xchar-char)
(line xchar-line)
(column xchar-column)
(offset xchar-offset))
(define-public (lift proc parser)
"Apply PROC to the result of PARSER"
(lambda (stream)
(match (parser stream)
(($ value stream) (continue (proc value) stream))
(else else))))
(export xchar-char)
(define (xchar-format xchar port)
(format port ""
(xchar-char xchar)
(xchar-line xchar)
(xchar-column xchar)
(xchar-offset xchar)))
(set-record-type-printer! xchar-format)
(define (string->xchar-stream string)
;; TODO: optimize
(let loop ((chars (string->list string))
(line 1)
(column 1)
(offset 0)
(out '()))
(if (null? chars)
(list->stream (reverse! out))
(if (eq? (car chars) #\newline)
(loop (cdr chars)
(+ 1 line)
1
(+ 1 offset)
(cons (make-xchar #\newline line column offset) out))
(loop (cdr chars)
line
(+ 1 column)
(+ 1 offset)
(cons (make-xchar (car chars) line column offset) out))))))
(define-public (parse parser string)
(match (parser (string->xchar-stream string))
(($ value (? stream-null? stream)) value)
(else (throw 'combinatorix else))))
(define-public (parse-xchar char)
(lambda (stream)
(call-with-values stream
(lambda (value next)
(if next
(if (char=? (xchar-char value) char)
(continue value next)
(fail stream parse-xchar char))
(fail stream parse-xchar char))))))
(test-check "parse-xchar"
(xchar-char (parse (parse-xchar #\c) "c"))
#\c)
(define (either2 one two)
(lambda (stream)
(let ((result (one stream)))
(if (result? result)
result
(two stream)))))
(test-check "either2 az 1"
(xchar-char (parse (either2 (parse-xchar #\a) (parse-xchar #\z)) "a"))
#\a)
(test-check "either2 az 2"
(xchar-char (parse (either2 (parse-xchar #\a) (parse-xchar #\z)) "z"))
#\z)
(define (each2 one two)
(lambda (stream)
(match (one stream)
(($ a next0)
(match (two next0)
(($ b next1)
(continue (cons a b) next1))
(else else)))
(else else))))
(test-check "each2 az"
((match-lambda ((a . b) (cons (xchar-char a) (xchar-char b))))
(parse (each2 (parse-xchar #\a) (parse-xchar #\z)) "az"))
(cons #\a #\z))
(test-check "each2+either2 ae"
((match-lambda ((a . b) (cons (xchar-char a) (xchar-char b))))
(parse (each2 (either2 (parse-xchar #\a) (parse-xchar #\z))
(parse-xchar #\e))
"ae"))
(cons #\a #\e))
(test-check "each2+either2 ze"
((match-lambda ((a . b) (cons (xchar-char a) (xchar-char b))))
(parse (each2 (either2 (parse-xchar #\a) (parse-xchar #\z))
(parse-xchar #\e))
"ze"))
(cons #\z #\e))
(define (%either . parsers)
(lambda (stream)
(let loop ((parsers parsers))
(if (null? parsers)
(fail stream %either (map (lambda (x) (x)) parsers))
(let ((continue (((car parsers)) stream)))
(if (result? continue)
continue
(loop (cdr parsers))))))))
(define-syntax-rule (either parser ...)
(%either (lambda () parser) ...))
(export either)
(test-check "either abc 1"
(xchar-char (parse (either (parse-xchar #\a) (parse-xchar #\b) (parse-xchar #\c))
"a"))
#\a)
(test-check "either abc 2"
(xchar-char (parse (either (parse-xchar #\a) (parse-xchar #\b) (parse-xchar #\c))
"b"))
#\b)
(test-check "either abc 3"
(xchar-char (parse (either (parse-xchar #\a) (parse-xchar #\b) (parse-xchar #\c))
"c"))
#\c)
(define (%each . parsers)
(lambda (stream)
(let loop ((parsers parsers)
(stream stream)
(out '()))
(if (null? parsers)
(continue (reverse! out) stream)
(match (((car parsers)) stream)
(($ value stream) (loop (cdr parsers) stream (cons value out)))
(else else))))))
(define-syntax-rule (each parser ...)
(%each (lambda () parser) ...))
(export each)
(test-check "each abc"
(list->string (map xchar-char (parse (each (parse-xchar #\a)
(parse-xchar #\b)
(parse-xchar #\c)
(parse-xchar #\d))
"abcd")))
"abcd")
(define-public (zero-or-more parser)
(lambda (stream)
(let loop ((stream stream)
(out '()))
(match (parser stream)
(($ value next)
(loop next (cons value out)))
(else (continue (reverse! out) stream))))))
(test-check "zero or more 1"
(list->string (map xchar-char (parse (zero-or-more (parse-xchar #\a)) "aaa")))
"aaa")
(test-check "zero or more 2"
(list->string (map xchar-char (parse (zero-or-more (parse-xchar #\a)) "")))
"")
(define-public (one-or-more parser)
(lift (lambda (x) (apply cons x)) (each parser (zero-or-more parser))))
(test-check "one or more"
(list->string (map xchar-char (parse (one-or-more (parse-xchar #\a)) "aaa")))
"aaa")
(define-public (otherwise predicate parser)
(lambda (stream)
(if (error? (predicate stream))
(parser stream)
(fail stream predicate parser))))
(test-check "recursive lift..."
(letrec ((recursive (lift (lambda (a) (if (pair? a) a (list a)))
(either
(lift (lambda (a) (apply cons* a))
(each (parse-xchar #\a) (parse-xchar #\b) (parse-xchar #\c) recursive))
(parse-xchar #\x)))))
(list->string (map xchar-char (parse recursive "abcabcx"))))
"abcabcx")
(define-public (parse-char-set char-set)
(lambda (stream)
(call-with-values stream
(lambda (value next)
(if next
(if (char-set-contains? char-set (xchar-char value))
(continue value next)
(fail stream parse-char-set char-set))
(fail stream parse-char-set char-set))))))
(define-public any
(lambda (stream)
(call-with-values stream
(lambda (value next)
(if next
(continue value next)
(fail stream any '()))))))