;;; combinatorix ;;; ;;; Copyright © 2018 Amirouche Boubekki ;;; ;;; This module is free software; you can redistribute it and/or ;;; modify it under the terms of the GNU Lesser General Public License ;;; as published by the Free Software Foundation; either version 3 of ;;; the License, or (at your option) any later version. ;;; ;;; This module is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;;; Lesser General Public License for more details. ;;; ;;; You should have received a copy of the GNU Lesser General Public ;;; License along with this module. If not, see ;;; . ;;; Commentary: ;; ;; Parser combinators. ;; ;; TODO: ;; ;; - improve error handling ;; ;; Also see: ;; ;; - https://epsil.github.io/gll/ ;; - https://docs.racket-lang.org/parsack/index.html ;; - https://docs.racket-lang.org/megaparsack/ ;; - https://git.dthompson.us/guile-parser-combinators.git ;; - https://gitlab.com/tampe/stis-parser ;; ;;; Code: (define-module (neon combinatorix)) (use-modules ((srfi srfi-9))) (use-modules ((srfi srfi-9 gnu))) (use-modules ((ice-9 match))) (use-modules ((neon streams))) ;; test macro (define-syntax-rule (test-check test-name expr expected) (when (getenv "DEBUG") (format #t "* ~a: " test-name) (let ((expr* expr) (expected* expected)) (if (equal? expr* expected*) (format #t "PASS :)\n") (begin (format #t "FAILED :(\n") (format #t "** expected: ~s\n" expected*) (format #t "** found: ~s\n" expr*)))))) (define-record-type (make-result value stream) result? (value result-value) (stream result-stream)) (define-record-type (make-error value parser args) error? (value error-value) (parser error-parser) (args error-args)) (define continue make-result) (define (fail stream parser args) (make-error (stream-car stream) parser args)) (define-record-type (make-xchar char line column offset) xchar? (char xchar-char) (line xchar-line) (column xchar-column) (offset xchar-offset)) (define-public (lift proc parser) "Apply PROC to the result of PARSER" (lambda (stream) (match (parser stream) (($ value stream) (continue (proc value) stream)) (else else)))) (export xchar-char) (define (xchar-format xchar port) (format port "" (xchar-char xchar) (xchar-line xchar) (xchar-column xchar) (xchar-offset xchar))) (set-record-type-printer! xchar-format) (define (string->xchar-stream string) ;; TODO: optimize (let loop ((chars (string->list string)) (line 1) (column 1) (offset 0) (out '())) (if (null? chars) (list->stream (reverse! out)) (if (eq? (car chars) #\newline) (loop (cdr chars) (+ 1 line) 1 (+ 1 offset) (cons (make-xchar #\newline line column offset) out)) (loop (cdr chars) line (+ 1 column) (+ 1 offset) (cons (make-xchar (car chars) line column offset) out)))))) (define-public (parse parser string) (match (parser (string->xchar-stream string)) (($ value (? stream-null? stream)) value) (else (throw 'combinatorix else)))) (define-public (parse-xchar char) (lambda (stream) (call-with-values stream (lambda (value next) (if next (if (char=? (xchar-char value) char) (continue value next) (fail stream parse-xchar char)) (fail stream parse-xchar char)))))) (test-check "parse-xchar" (xchar-char (parse (parse-xchar #\c) "c")) #\c) (define (either2 one two) (lambda (stream) (let ((result (one stream))) (if (result? result) result (two stream))))) (test-check "either2 az 1" (xchar-char (parse (either2 (parse-xchar #\a) (parse-xchar #\z)) "a")) #\a) (test-check "either2 az 2" (xchar-char (parse (either2 (parse-xchar #\a) (parse-xchar #\z)) "z")) #\z) (define (each2 one two) (lambda (stream) (match (one stream) (($ a next0) (match (two next0) (($ b next1) (continue (cons a b) next1)) (else else))) (else else)))) (test-check "each2 az" ((match-lambda ((a . b) (cons (xchar-char a) (xchar-char b)))) (parse (each2 (parse-xchar #\a) (parse-xchar #\z)) "az")) (cons #\a #\z)) (test-check "each2+either2 ae" ((match-lambda ((a . b) (cons (xchar-char a) (xchar-char b)))) (parse (each2 (either2 (parse-xchar #\a) (parse-xchar #\z)) (parse-xchar #\e)) "ae")) (cons #\a #\e)) (test-check "each2+either2 ze" ((match-lambda ((a . b) (cons (xchar-char a) (xchar-char b)))) (parse (each2 (either2 (parse-xchar #\a) (parse-xchar #\z)) (parse-xchar #\e)) "ze")) (cons #\z #\e)) (define (%either . parsers) (lambda (stream) (let loop ((parsers parsers)) (if (null? parsers) (fail stream %either (map (lambda (x) (x)) parsers)) (let ((continue (((car parsers)) stream))) (if (result? continue) continue (loop (cdr parsers)))))))) (define-syntax-rule (either parser ...) (%either (lambda () parser) ...)) (export either) (test-check "either abc 1" (xchar-char (parse (either (parse-xchar #\a) (parse-xchar #\b) (parse-xchar #\c)) "a")) #\a) (test-check "either abc 2" (xchar-char (parse (either (parse-xchar #\a) (parse-xchar #\b) (parse-xchar #\c)) "b")) #\b) (test-check "either abc 3" (xchar-char (parse (either (parse-xchar #\a) (parse-xchar #\b) (parse-xchar #\c)) "c")) #\c) (define (%each . parsers) (lambda (stream) (let loop ((parsers parsers) (stream stream) (out '())) (if (null? parsers) (continue (reverse! out) stream) (match (((car parsers)) stream) (($ value stream) (loop (cdr parsers) stream (cons value out))) (else else)))))) (define-syntax-rule (each parser ...) (%each (lambda () parser) ...)) (export each) (test-check "each abc" (list->string (map xchar-char (parse (each (parse-xchar #\a) (parse-xchar #\b) (parse-xchar #\c) (parse-xchar #\d)) "abcd"))) "abcd") (define-public (zero-or-more parser) (lambda (stream) (let loop ((stream stream) (out '())) (match (parser stream) (($ value next) (loop next (cons value out))) (else (continue (reverse! out) stream)))))) (test-check "zero or more 1" (list->string (map xchar-char (parse (zero-or-more (parse-xchar #\a)) "aaa"))) "aaa") (test-check "zero or more 2" (list->string (map xchar-char (parse (zero-or-more (parse-xchar #\a)) ""))) "") (define-public (one-or-more parser) (lift (lambda (x) (apply cons x)) (each parser (zero-or-more parser)))) (test-check "one or more" (list->string (map xchar-char (parse (one-or-more (parse-xchar #\a)) "aaa"))) "aaa") (define-public (otherwise predicate parser) (lambda (stream) (if (error? (predicate stream)) (parser stream) (fail stream predicate parser)))) (test-check "recursive lift..." (letrec ((recursive (lift (lambda (a) (if (pair? a) a (list a))) (either (lift (lambda (a) (apply cons* a)) (each (parse-xchar #\a) (parse-xchar #\b) (parse-xchar #\c) recursive)) (parse-xchar #\x))))) (list->string (map xchar-char (parse recursive "abcabcx")))) "abcabcx") (define-public (parse-char-set char-set) (lambda (stream) (call-with-values stream (lambda (value next) (if next (if (char-set-contains? char-set (xchar-char value)) (continue value next) (fail stream parse-char-set char-set)) (fail stream parse-char-set char-set)))))) (define-public any (lambda (stream) (call-with-values stream (lambda (value next) (if next (continue value next) (fail stream any '()))))))