From 5d804dfac41897aa069399516af2846e73d04f2f Mon Sep 17 00:00:00 2001 From: Eric Bavier Date: Mon, 15 Feb 2016 19:36:09 -0600 Subject: [PATCH] Add compiler for the Joy language. * module/language/joy/lexer.scm, module/language/joy/parser.scm, module/language/joy/compile-tree-il.scm, module/language/joy/spec.scm, module/language/joy/primitives.scm, module/language/joy/eval.scm, module/language/joy/base.joy, module/language/joy/tests/inicheck.joy, module/language/joy/tests/test-base.joy, module/language/joy/joy-mode.el: New files. --- module/language/joy/base.joy | 269 ++++++++++++++++++++++++++++++++ module/language/joy/compile-tree-il.scm | 81 ++++++++++ module/language/joy/eval.scm | 29 ++++ module/language/joy/joy-mode.el | 74 +++++++++ module/language/joy/lexer.scm | 255 ++++++++++++++++++++++++++++++ module/language/joy/parser.scm | 148 ++++++++++++++++++ module/language/joy/primitives.scm | 148 ++++++++++++++++++ module/language/joy/spec.scm | 18 +++ module/language/joy/tests/inicheck.joy | 52 ++++++ module/language/joy/tests/test-base.joy | 197 +++++++++++++++++++++++ 10 files changed, 1271 insertions(+) create mode 100644 module/language/joy/base.joy create mode 100644 module/language/joy/compile-tree-il.scm create mode 100644 module/language/joy/eval.scm create mode 100644 module/language/joy/joy-mode.el create mode 100644 module/language/joy/lexer.scm create mode 100644 module/language/joy/parser.scm create mode 100644 module/language/joy/primitives.scm create mode 100644 module/language/joy/spec.scm create mode 100644 module/language/joy/tests/inicheck.joy create mode 100644 module/language/joy/tests/test-base.joy diff --git a/module/language/joy/base.joy b/module/language/joy/base.joy new file mode 100644 index 0000000..2260993 --- /dev/null +++ b/module/language/joy/base.joy @@ -0,0 +1,269 @@ +(* base.joy -- basic operators and combinators for Joy. + + Various useful operators and combinators written in terms of Joy + primitives. + + Copyright © 2016 Eric Bavier + + This source is released under the terms of the GNU General Public + License, version 3, or (at your option) any later version. +*) + +DEFINE + +(* ===== Stack manipulation operators ===== *) + + newstack == [] unstack ; + dup2 == dup [[dup] dip swap] dip ; + rollup == swap [swap] dip ; # [S | x y z] => [S | z x y] + rolldown == [swap] dip swap ; # [S | x y z] => [S | y z x] + rotate == rollup swap ; # [S | x y z] => [S | z y x] + popd == [pop] dip ; + dupd == [dup] dip ; + swapd == [swap] dip ; + pop2 == pop pop ; + popop == pop2 ; + +(* ===== General combinators ===== *) + (* We could use 'dup dip pop' to define i as exmplained in + "Mathematical Foundation of Joy", but it is not efficient + as the straighforward definition. *) + i == stack cdr swap infra unstack ; + dip == stack cddr swap infra cons unstack ; + dipd == [dip] cons dip ; + dipdd == [dipd] cons dip ; + nullary == stack cdr swap infra car ; + unary == stack cdr swap infra car popd ; + branch == choice i ; + ifte == [[stack] dip infra car] dipd branch ; + app1 == i ; + app2 == dup rollup i [i] dip ; + app3 == dup rollup i [app2] dip ; + i2 == [dip] dip i ; + shunt == [swons] step ; # See literature for description + (* The definition 'b == concat i' is elegant, but it is also + costly (I think? TODO: check). *) + b == [i] dip i ; + cleave == [nullary] dip swap [nullary] dip swap ; + k == [pop] dip i ; + w == [dup] dip i ; + c == [swap] dip i ; + + (* [S | L [P]] : Step through the list L, unconsing the first + element, placing it on the top S and executing the quoted + program P. *) + step == + [pop null] + [pop pop] + [[uncons] dip dup dipd] + tailrec ; + + (* [S | I [P]] :: Execute quoted program P I times. *) + times == + swap + [0 <=] + [pop pop] + [pred [dup dip] dip] + tailrec ; + +(* ===== List operators ===== *) + car == uncons pop ; + cdr == unswons pop ; + cddr == cdr cdr ; + cadr == cdr car ; + caddr == cddr car ; + first == car ; + second == cadr ; + third == caddr ; + rest == cdr ; + leaf == list not ; + quote == [] cons ; + unpair == uncons uncons pop ; + pairlist == [] cons cons ; + concat == swap swoncat ; + swoncat == reverse shunt ; + swons == swap cons ; + unswons == uncons swap ; + null == [list] [[] =] [0 =] ifte ; + nulld == [null] dip ; + consd == [cons] dip ; + swonsd == [swons] dip ; + unconsd == [uncons] dip ; + unswonsd == [unswons] dip unswons swapd ; + null2 == nulld null or ; + cons2 == swapd cons consd ; + uncons2 == unconsd uncons swapd ; + swons2 == swapd swons swonsd ; + zip == + [null2] + [pop pop []] + [uncons2] + [[pairlist] dip cons] + linrec ; + sum == 0 swap [+ ] step ; + product == 1 swap [* ] step ; + size == 0 swap [pop succ] step ; + size2 == 0 swap [size + ] step ; # two levels of nesting + + (* reverse the aggregate on top of the stack *) + reverse == [] swap [swons] step ; + + (* [S | L V O] => [S | V'], where L is a list, V is an initial + value, and O is a quoted binary operator. *) + fold == swapd step ; + + (* [S | L P] => [S | B], where B is true if applying the predicate + P to each element of L produces true, otherwise false. It does + not short-circuit. *) + every == [i and] cons true fold ; + all == every ; # reference name + + (* [S | L P] => [S | B], where B is true if applying the predicate + P to any element of L produces true, otherwise false. It does + not short-circuit. *) + any == [i or] cons false fold ; + some == any ; # reference name + + (* Treat each element of an aggregate as a new stack, and apply + the given unary operator to it, resulting in a new aggregate + of the results *) + map == + [] # initialize accumulator + [pop pop null] + [rollup pop pop] + [[unswons [] cons] dipd # pull out first and create new list + dupd [infra] dipd # exec copy of quotation on this + rolldown car swons] # add it to accumulator + tailrec + reverse ; + + (* [S | L L' O] => [S | L''] where L'' is the list resulting from + applying the binary operator O to respective pairs of elements + from L and L'. L'' is the same length as the shortest of L and + L'. *) + map2 == + [] # initialize accumulator + [pop pop null2] + [[pop pop pop] dip] # Remove operator, L, and L' + [[[unswons] dipd swapd # pull out first of L + [unswons [] cons] dipd # pull out first of L' + swonsd # make a list of the two + dup [infra] dip] # exec copy of quotation on this + dip + rolldown car swons] # add it to accumulator + tailrec + reverse ; + + (* [S | L L'] => [S | B], where B is true if every element of list + L compares equal to each respective element of L', otherwise + false. *) + equal == + [[size] app2 =] + [ true [[[list] app2] [equal] [=] ifte] fold ] + [false] + ifte ; + + (* [S | L I] -> [S | L'] where L' is L with I elements removed + from the front. *) + drop == + [0 <=] + [pop] + [pred [cdr] dip] + tailrec ; + (* [S | L I] -> [S | L'] where L' is the first I items of L. *) + take == + [] rollup # initialize accumulator + [0 <=] + [pop pop reverse] + [pred [uncons] dip [swons] dipd] + tailrec ; + + (* fold == swapd step ; *) + at == drop car ; + of == swap at ; + + +(* ===== Boolean and Mathematic operators ===== *) + pred == 1 - ; + succ == 1 + ; + true == [true] car ; + false == [false] car ; + >= == dup2 > [=] dip or ; + <= == dup2 < [=] dip or ; + != == = not ; + or == [pop true] [] branch ; + and == [] [pop false] branch ; + not == false true choice ; + xor == dup2 or rollup and not and ; + max == dup2 > rollup choice ; + min == dup2 < rollup choice ; + sign == [0 >] [1] [[0 <] [-1] [0] ifte] ifte ; + (* [S | Y X] -> [S | D M] where Y = D*X + M *) + divmod == + [0] rollup # initialize marker list + [<] # When Y < X + [pop swap] # Remove X, bring markers to front + [dup [-] dip # Recurse with Y<-Y-X ... + [1 swons] dipd] # and mark + [[+] infra] # Accumulate division markers + linrec # [S | M [D]] + car swap ; # [S | D M] + / == divmod pop ; + % == divmod swap pop ; + * == # WARNING: Only for positive integers + dup2 min [max] dip # Put the larger number on top + [0 =] + [pop pop 0] + [pred dupd] + [+] + linrec ; + exp == + [0 =] + [pop pop 1] + [pred dupd] + [*] + linrec ; + sum-up-to == [0 =] [pop 0] [dup 1 -] [+] linrec ; + fact == [0 =] [pop 1] [dup 1 -] [*] linrec ; + +(* ===== Recursion combinators ===== *) + (* [S | [I} [T] [E1] [E2]] - Like the ifte combinator it executes + I, and if that yields true it executes T. Otherwise it + executes E1, then it recurses with all 4 parts, and finally it + executes E2. *) + # For example: + # fact == + # [0 =] + # [pop 1] + # [dup 1 -] + # [*] + # linrec . + # becomes: + # fact == + # [ [pop 0 =] + # [pop pop 1] + # [ [dup 1 -] dip + # dup i + # * ] + # ifte ] + # dup i . + make-linrec == + [[[pop] car swons] app2] dipd # [[E2] [E1] [pop T] [pop I] | S] + [i] car swons [dup] car swons [dip] car swons cons + [ifte] cons cons cons # [[ifte [[E1] dip dup i E2] [pop T] [pop I] | S] + ; + linrec == make-linrec dup i ; + + make-tailrec == + [[[pop] car swons] app2] dip + [dip dup i] cons + [ifte] cons cons cons ; + tailrec == make-tailrec dup i ; + + +(* ===== IO operators ===== *) + newline == '\n putch ; + putchars == [putch] step ; + putstrings == [putchars] step ; + +END \ No newline at end of file diff --git a/module/language/joy/compile-tree-il.scm b/module/language/joy/compile-tree-il.scm new file mode 100644 index 0000000..a89e0ec --- /dev/null +++ b/module/language/joy/compile-tree-il.scm @@ -0,0 +1,81 @@ +;;; compile-tree-il.scm -- compile Joy to tree-il. +;;; +;;; Copyright © 2016 Eric Bavier +;;; +;;; This source is released under the terms of the GNU General Public +;;; License, version 3, or (at your option) any later version. + +(define-module (language joy compile-tree-il) + #:use-module (language tree-il) + #:use-module (system base pmatch) + #:use-module (srfi srfi-1) + #:export (compile-tree-il)) + +(define (location x) + (and (pair? x) + (let ((props (source-properties x))) + (and (not (null? props)) + props)))) + +(define *eval* '(language joy eval)) + +(define (compile-factor fact) + (let ((loc (location fact))) + (cond + ((list? fact) + (make-application loc + (make-primitive-ref loc 'list) + (map compile-factor fact))) + (else + (make-const loc fact))))) + +(define (compile-term term) + (let ((loc (location term))) + (pmatch term + ((term ,factors) + (make-application loc + (make-primitive-ref loc 'list) + (map compile-factor factors)))))) + +(define (compile-definition def) + (let ((loc (location def))) + (pmatch def + ((public ,name ,term) + (make-toplevel-define loc + name + (compile-term term))) + ((private ,name ,term) ;TODO: make private! + (make-toplevel-define loc + name + (compile-term term)))))) + +(define (compile-expr expr) + (let ((loc (location expr))) + (pmatch expr + ;; Literals + ((term ,factors) + (make-application loc + (make-module-ref loc '(srfi srfi-1) + 'fold #t) + (list + (make-module-ref loc *eval* 'eval #f) + ;; Toplevel terms are executed with an empty + ;; stack. This behavior deviates from the + ;; reference implementation, but I don't + ;; believe it strays from the spirit of Joy + ;; or is particularly burdensome. + (make-const #f '()) + (compile-term expr)))) + ((definitions . ,defs) + (make-sequence loc (map compile-definition defs)))))) + +(define (process-options! opts) + #t) + +(define (compile-tree-il expr env opts) + (values + (begin + (process-options! opts) + (compile-expr expr)) + env + env)) diff --git a/module/language/joy/eval.scm b/module/language/joy/eval.scm new file mode 100644 index 0000000..256f987 --- /dev/null +++ b/module/language/joy/eval.scm @@ -0,0 +1,29 @@ +;;; eval.scm -- Joy runtime. +;;; +;;; Copyright © 2016 Eric Bavier +;;; +;;; This source is released under the terms of the GNU General Public +;;; License, version 3, or (at your option) any later version. + +(define-module (language joy eval) + #:use-module (srfi srfi-1) + #:replace (eval)) + +(define *primitives* '(language joy primitives)) + +(define (eval x S) + "Evaluate joy term X with the stack S." + (cond + ((symbol? x) + (let ((v (or (and=> (module-variable + (resolve-interface *primitives*) x) + variable-ref) + (module-ref (current-module) x)))) + (if (procedure? v) + ;; Joy primitives are procedures that must be applied + (apply v S) + ;; Variables from the 'DEFINE ...' syntax are lists of factors + ;; to be evaluated. + (fold eval S v)))) + (else + (cons x S)))) diff --git a/module/language/joy/joy-mode.el b/module/language/joy/joy-mode.el new file mode 100644 index 0000000..e471a5b --- /dev/null +++ b/module/language/joy/joy-mode.el @@ -0,0 +1,74 @@ +;;; joy-mode.el --- major mode for editing Joy source. +;;; +;;; Copyright © 2016 Eric Bavier +;;; +;;; This source is released under the terms of the GNU General Public +;;; License, version 3, or (at your option) any later version. + +;;; Commentary: + +;; - provides syntax highlighting for the small number of primitive +;; operators in Joy, as well as for the fundamental datatypes. +;; +;; TODO: +;; - comment insertion/deletion +;; - indentation +;; - function documentation lookup +;; - function template insertion + +;;; Code: + +(setq joy-keywords '("DEFINE" "PUBLIC" "PRIVATE" "LIBRA" "HIDE" "END" "IN")) +(setq joy-primitives '("unstack" "display" "include" "logical" "integer" + "string" "choice" "uncons" "infra" "stack" "putch" + "swap" "cons" "char" "list" "dup" "pop")) + +(setq joy-keywords-regexp (regexp-opt joy-keywords 'words)) +(setq joy-primitives-regexp (regexp-opt joy-primitives 'words)) + +(setq joy-font-lock-keywords + `("==" ";" + ("\\(\\.\\)[^[:digit:]]" 1) + ("\\([[:graph:]]+\\)[[:blank:]]*==" 1 font-lock-function-name-face) + ("'\\\\?[[:alnum:]]\\([[:digit:]][[:digit:]]\\)?" . font-lock-type-face) + (,joy-keywords-regexp . font-lock-keyword-face) + (,joy-primitives-regexp . font-lock-builtin-face))) + +(defvar joy-syntax-table nil "Syntax table for `joy-mode'.") +(setq joy-syntax-table + (let ((synTable (make-syntax-table))) + ;; bash style comment: "# ..." + (modify-syntax-entry ?# "< b" synTable) + (modify-syntax-entry ?\n "> b" synTable) + + ;; Mathematic style comment: "(* ... *)" + (modify-syntax-entry ?\( ". 1" synTable) + (modify-syntax-entry ?\) ". 4" synTable) + (modify-syntax-entry ?* ". 23" synTable) + + synTable)) + +;;;###autoload +(define-derived-mode joy-mode fundamental-mode + "Joy mode" + "Major mode for editing the purely functional, + concatenative programming language Joy." + :syntax-table joy-syntax-table + + (setq font-lock-defaults '(joy-font-lock-keywords)) + (setq mode-name "joy")) + +;; clear memory. no longer needed +(setq joy-keywords nil) +(setq joy-primitives nil) +(setq joy-keywords-regexp nil) +(setq joy-primitives-regexp nil) + +;; add the mode to the `features' list +(provide 'joy-mode) + +;; Local Variables: +;; coding: utf-8 +;; End: + +;;; joy-mode.el ends here diff --git a/module/language/joy/lexer.scm b/module/language/joy/lexer.scm new file mode 100644 index 0000000..494f71b --- /dev/null +++ b/module/language/joy/lexer.scm @@ -0,0 +1,255 @@ +;;; lexer.scm -- lexer for Joy. +;;; +;;; Copyright © 2016 Eric Bavier +;;; +;;; This source is released under the terms of the GNU General Public +;;; License, version 3, or (at your option) any later version. + +;;; Code: + +(define-module (language joy lexer) + #:use-module (ice-9 rdelim) + #:export (get-lexer)) + +;;; See j09imp.html for a more thorough description of that prototype of +;;; a Joy interpreter. +;;; +;;; There it says that joy interpreter supports lines starting with '$', +;;; which are processed by the command shell. Interesting. + +(define *keywords* + '(("==" . ==) + ("MODULE" . module) + ("PRIVATE" . private) + ("PUBLIC" . public) + ("DEFINE" . define) + ("END" . end))) + +(define integer-regex (make-regexp "^[+-]?[0-9]+$")) + +(define float-regex + (make-regexp + "^[+-]?([0-9]+\\.?[0-9]*|[0-9]*\\.?[0-9]+)(e[+-]?[0-9]+)?$")) + +(define symbol-allowed-characters + (char-set-difference + ;; We allow #\. because it is handled elsewhere + char-set:graphic (string->char-set "[]{};"))) + +(define (get-symbol-or-number port) + (let iterate ((result-chars '()) + (non-numeric? #f)) + (let* ((c (read-char port)) + (finish (lambda () + (let ((result (list->string + (reverse result-chars)))) + (values + (cond + ((regexp-exec integer-regex result) + 'integer) + ((regexp-exec float-regex result) + 'float) + (else 'symbol)) + result)))) + (allowed? (lambda (c) + (char-set-contains? + symbol-allowed-characters c)))) + (cond + ((eof-object? c) (finish)) + ((char=? c #\\) + (error "character escapes not allowed in symbols")) + ((char=? c #\.) + ;; If we've encountered non-numeric characters up until now, + ;; interpret the #\. as END, otherwise, assume it's part of a + ;; float. + (if non-numeric? + (begin + (unread-char c port) + (finish)) + (iterate (cons c result-chars) #f))) + ((allowed? c) + (iterate (cons c result-chars) + (or non-numeric? + (not (or (char-numeric? c) + (char=? c #\+) + (char=? c #\-)))))) + (else + (unread-char c port) + (finish)))))) + +(define (char-hex? c) + (and (not (eof-object? c)) + (or (char-numeric? c) + (memv c '(#\a #\b #\c #\d #\e #\f)) + (memv c '(#\A #\B #\C #\D #\E #\F))))) + +(define (digit->number c) + (- (char->integer c) (char->integer #\0))) + +(define (hex->number c) + (if (char-numeric? c) + (digit->number c) + (+ 10 (- (char->integer (char-downcase c)) (char->integer #\a))))) + +(define (read-escape port) + (let ((c (read-char port))) + (case c + ((#\' #\" #\\) c) + ((#\b) #\bs) + ((#\f) #\np) + ((#\n) #\nl) + ((#\r) #\cr) + ((#\t) #\tab) + ((#\v) #\vt) + ((#\0) + (let ((next (peek-char port))) + (cond + ((eof-object? next) #\nul) + ((char-numeric? next) + (error "octal escape sequences are not supported")) + (else #\nul)))) + ((#\x) + (let* ((a (read-char port)) + (b (read-char port))) + (cond + ((and (char-hex? a) (char-hex? b)) + (integer->char (+ (* 16 (hex->number a)) (hex->number b)))) + (else + (error "bad hex character escape"))))) + ((#\u) + (let* ((a (read-char port)) + (b (read-char port)) + (c (read-char port)) + (d (read-char port))) + (integer->char (string->number (string a b c d) 16)))) + (else + c)))) + +(define (read-string port) + (let iterate ((chars '())) + (let ((c (read-char port))) + (case c + ((#\") + (list->string (reverse chars))) + ((#\\) + (case (peek-char port) + ((#\newline #\space) + (iterate chars)) + (else + (iterate (cons (read-character port) chars))))) + (else + (iterate (cons c chars))))))) + +(define (read-character port) + (let ((c (read-char port))) + (case c + ((#\\) (read-escape port)) + (else c)))) + + +;;; Main lexer routine which is given a port and looks for the next +;;; token. +(define (lex port) + (let ((return (let ((file (if (file-port? port) + (port-filename port) + #f)) + (line (1+ (port-line port))) + (column (1+ (port-column port)))) + (lambda (token value) + (let ((obj (cons token value))) + (set-source-property! obj 'filename file) + (set-source-property! obj 'line line) + (set-source-property! obj 'column column) + obj)))) + ;; Read afterwards so the source-properties are correct above + ;; and actually point to the very character to be read. + (c (read-char port))) + (cond + ;; End of input must be specially marked to the parser. + ((eof-object? c) (return 'eof c)) + ;; Whitespace, just skip it. + ((char-whitespace? c) (lex port)) + ;; The period character or "END" keyword is used to delimit a + ;; term expression if it is immediately followed by whitespace or + ;; EOF, otherwise it is understood to be a float. + ((and (char=? c #\.) + (let ((c' (peek-char port))) + (or (eof-object? c') + (char-whitespace? c')))) + (return 'end #f)) + (else + (case c + ;; An line comment, skip until end-of-line is found + ((#\#) + (read-line port) + (lex port)) + ((#\') + ;; A literal character + (return 'character (read-character port))) + ((#\") + ;; A literal string. Similar to single characters, except + ;; that escaped newline and space are to be completely + ;; ignored. + (return 'string (read-string port))) + ((#\() + (let ((c (read-char port))) + (case c + ;; Multi-line comment, discard until closing "*)" + ((#\*) + (let iterate () + (let ((c (read-char port))) + (cond + ((eof-object? c) + (error "unexpected end of file in multi-line comment")) + ((char=? c #\*) + (cond + ((char=? (read-char port) #\)) (lex port)) + (else (iterate)))) + (else (iterate)))))) + (else + ;; The #\( could be understood as part of a symbol, but + ;; it seems wiser to reserve it for future use as its + ;; own token. + (unread-char c port) + (return 'paren-open #f))))) + ((#\)) (return 'paren-close #f)) + ((#\[) (return 'square-open #f)) + ((#\]) (return 'square-close #f)) + ((#\{) (return 'bracket-open #f)) + ((#\}) (return 'bracket-close #f)) + ((#\;) (return 'semicolon #f)) + (else + ;; Now only have numeric or symbol input possible. + (unread-char c port) + (call-with-values + (lambda () (get-symbol-or-number port)) + (lambda (type str) + (case type + ((symbol) + ;; str could be empty if the first character is already + ;; something not allowed in a symbol (and not escaped)! + ;; Take care about that, it is an error because that + ;; character should have been handled elsewhere or is + ;; invalid in the input. + (cond + ((zero? (string-length str)) + (begin + ;; Take it out so the REPL might not get into an + ;; infinite loop with further reading attempts. + (read-char port) + (error "invalid character in input" c))) + ((assoc-ref *keywords* str) + => (lambda (kw) (return kw #f))) + (else + (return 'symbol (string->symbol str))))) + ((integer) + (return 'integer (string->number str))) + ((float) + (return 'float (string->number str))) + (else + (error "unexpected numeric/symbol type" type))))))))))) + +;;; Build a lexer thunk for a port. This is the exported routine +;;; which can be used to create a lexer for the parser to use. +(define (get-lexer port) + (lambda () (lex port))) diff --git a/module/language/joy/parser.scm b/module/language/joy/parser.scm new file mode 100644 index 0000000..1d88738 --- /dev/null +++ b/module/language/joy/parser.scm @@ -0,0 +1,148 @@ +;;; parser.scm -- parse lexer tokens for Joy. +;;; +;;; Copyright © 2016 Eric Bavier +;;; +;;; This source is released under the terms of the GNU General Public +;;; License, version 3, or (at your option) any later version. + +(define-module (language joy parser) + #:use-module (language joy lexer) + #:export (read-joy)) + +(define* (parse-error token msg . args) + (apply error + (format #f "address@hidden:~]~d:~d: ~a" + (source-property token 'filename) + (source-property token 'line) + (source-property token 'column) + msg) + args)) + +(define (return result token) + (if (pair? result) + (set-source-properties! + result + (source-properties token))) + result) + +(define (get-symbol lex) + (let* ((token (lex)) + (type (car token))) + (case type + ((symbol) + (return (cdr token) token)) + (else + (parse-error token "expected atom, got" token))))) + +(define (get-quote lex) + (let iterate ((items '())) + (let* ((token (lex)) + (type (car token))) + (case type + ((eof) + (parse-error token "unexpected end of file in quote")) + ((square-close) + (return (reverse items) token)) + ((square-open) + (iterate (cons (get-quote lex) items))) + (else + (iterate (cons (cdr token) items))))))) + +(define (get-term lex) + (let iterate ((items '())) + (let* ((token (lex)) + (type (car token))) + (case type + ((eof) + (parse-error token "unexpected end of file in term")) + ((==) + (parse-error token "'==' outside definition")) + ((square-open) + (iterate (cons (get-quote lex) items))) + ((end) + (return `(term ,(reverse items)) token)) + (else + (iterate (cons (cdr token) items))))))) + +(define (get-definition lex) + (let* ((token (lex)) + (type (car token))) + (case type + ((==) + ;; Similar to get-term, but returns two value: the list of + ;; terms and the type of the token that caused termination. + (let iterate ((items '())) + (let* ((token (lex)) + (type (car token))) + (case type + ((eof) + (parse-error token + "unexpected end of file in definition")) + ((==) + (parse-error token + "unexpected '==' in definition")) + ((square-open) + (iterate (cons (get-quote lex) items))) + ((semicolon end) + (values + (return `(term ,(reverse items)) token) + type)) + (else + (iterate (cons (cdr token) items))))))) + (else + (parse-error token "expecting '==', got" token))))) + +(define (get-definition-sequence lex) + (let iterate ((definitions '())) + (let* ((token (lex)) + (type (car token))) + (case type + ((eof) + (parse-error token "unexpected end of file in definition")) + ((end) + (return (reverse definitions) token)) + ((symbol) + (call-with-values + (lambda () (get-definition lex)) + (lambda (term end-type) + (let ((d `(,(cdr token) ,term))) + (case end-type + ((semicolon) + (iterate (cons d definitions))) + ((end) + (return (reverse (cons d definitions)) + token))))))) + (else + (parse-error token "expecting symbol, got" token)))))) + +(define (get-expression lex) + (let* ((token (lex)) + (type (car token))) + (case type + ((eof) (cdr token)) + ((public private define) + (return `(definitions + ,@(map (lambda (d) + (cons (case type + ((public define) + 'public) + (else 'private)) + d)) + (get-definition-sequence lex))) + token)) + ((==) + (parse-error token "'==' outside definition")) + ((paren-open bracket-open) + (parse-error token "joy sets not implemented")) + ((square-open) + (return `(term (,(get-quote lex) ,@(cadr (get-term lex)))) + token)) + (else + ;; We've probably already read the first factor of a term. + ;; Read the rest, and add this one to the front. + (return `(term (,(cdr token) ,@(cadr (get-term lex)))) + token))))) + +(define (read-joy port) + (let ((lexer (get-lexer port))) + (get-expression lexer))) diff --git a/module/language/joy/primitives.scm b/module/language/joy/primitives.scm new file mode 100644 index 0000000..65f1900 --- /dev/null +++ b/module/language/joy/primitives.scm @@ -0,0 +1,148 @@ +;;; primitives.scm -- primitive operators for Joy. +;;; +;;; Copyright © 2016 Eric Bavier +;;; +;;; This source is released under the terms of the GNU General Public +;;; License, version 3, or (at your option) any later version. + +;;; Commentary: +;;; +;;; All Joy procedures take a stack argument and return a stack +;;; argument. It's convenient for primitives to use Scheme "rest" +;;; arguments to deconstruct the expected number of arguments. + +(define-module (language joy primitives) + #:use-module ((system base compile) + #:select (compiled-file-name compile-file)) + #:use-module ((guile) + #:select (load-compiled string-append and=>)) + #:use-module ((ice-9 safe-r5rs) + #:select (+ - < > eqv? display) + #:renamer (symbol-prefix-proc '%)) + #:use-module ((srfi srfi-1) + #:select (cons fold) + #:renamer (symbol-prefix-proc '%)) + #:replace (cons + - < > =) + #:export (uncons + swap + dup + pop + choice + infra + stack + unstack + + ;; IO + putch + display + include + + ;; datatype inquiry + logical + char + integer + string + list + + exit)) + +;;; Code: + +(define (->truth b) + (if b 'true 'false)) + +;;; TODO: This could be written in base in terms of '='. +(define (logical x . S) + (%cons (->truth (if (eq? x 'true) + #t + (eq? x 'false))) S)) + +(define (char x . S) + (%cons (->truth (char? x)) S)) + +(define (integer x . S) + (%cons (->truth (integer? x)) S)) + +(define (string x . S) + (%cons (->truth (string? x)) S)) + +(define (list x . S) + (%cons (->truth (list? x)) S)) + +(define (cons lst x . S) + (%cons (%cons x lst) S)) + +(define (uncons lst . S) + (%cons (cdr lst) (%cons (car lst) S))) + +(define (swap x y . S) + (%cons y (%cons x S))) + +(define (dup x . S) + (%cons x (%cons x S))) + +(define (pop _ . S) + S) + +(define (+ x y . S) + (%cons (%+ y x) S)) + +(define (- x y . S) + (%cons (%- y x) S)) + +(define (< x y . S) + (%cons (->truth (%< y x)) S)) + +(define (> x y . S) + (%cons (->truth (%> y x)) S)) + +(define (= x y . S) + (%cons (->truth (%eqv? x y)) S)) + +(define (choice y x b . S) + (%cons (if (eq? b 'true) x y) S)) + +(define (infra q lst . S) + (%cons (%fold (@ (language joy eval) eval) lst q) + S)) + +(define (stack . S) + (%cons S S)) + +(define (unstack S . _) + S) + +(define (putch c . S) + (write-char c) + S) + +(define (display x . S) + (%display x) + S) + +;;; TODO: It Would Be Nice™ if the search included both the current +;;; working directory and the directory from where the include is being +;;; issued. +(define (include str . S) + (let* ((std (string-append (%library-dir) "/language/joy")) + (go (and=> (search-path (%cons std %load-path) str '("" ".joy")) + (lambda (f) (compiled-file-name f))))) + (if go + (begin + (compile-file str #:output-file go #:from 'joy) + (load-compiled go)) + (error "could not find file to include:" str))) + S) + +(define (exit status . _) + "Immediately exit the program with STATUS." + (primitive-exit status)) + +;;; For efficiency, having low-level implementations of the following +;;; might be beneficial (though I have yet to prove this in practice): +;;; +;;; i dip dipd popd dupd swapd times divmod / * % <= >= max min true false and +;;; or not null branch ifte +;;; +;;; In particular, it seems the dip and i combinators would be +;;; especially beneficial. Or just dip if we use 'i == dup dip pop'. diff --git a/module/language/joy/spec.scm b/module/language/joy/spec.scm new file mode 100644 index 0000000..97eb492 --- /dev/null +++ b/module/language/joy/spec.scm @@ -0,0 +1,18 @@ +;;; spec.scm -- Guile language specification for Joy. +;;; +;;; Copyright © 2016 Eric Bavier +;;; +;;; This source is released under the terms of the GNU General Public +;;; License, version 3, or (at your option) any later version. + +(define-module (language joy spec) + #:use-module (system base language) + #:use-module (language joy parser) + #:use-module (language joy compile-tree-il) + #:export (joy)) + +(define-language joy + #:title "Joy" + #:reader (lambda (port env) (read-joy port)) + #:compilers `((tree-il . ,compile-tree-il)) + #:printer write) diff --git a/module/language/joy/tests/inicheck.joy b/module/language/joy/tests/inicheck.joy new file mode 100644 index 0000000..9934a7d --- /dev/null +++ b/module/language/joy/tests/inicheck.joy @@ -0,0 +1,52 @@ +(* inicheck.joy -- primitive unit test combinators. + + Test routines that make use of only Joy primitives, so that the + standard library routines may be tested. + + Copyright © Eric Bavier + + This source is released under the terms of the GNU General Public + License, version 3, or at your option any later version. +*) + +DEFINE + (* private *) + newline == '\n putch ; + puts == display newline ; + primitive-check == + display " " display + ["ok" puts] ["fail" puts] + choice infra ; + + (* Use these combinators in the following way: + + "foo" [P] satisfies [P'] ? . + + P is executed, immediately followed by a predicate P', which + should leave true or false on the top of the stack to indicate + success or failure of the test. If succesful, "foo ok" is + printed to stdout, otherwise "foo fail". *) + satisfies == swap display [] swap infra ; + ? == infra uncons pop + [" ok" puts] [" fail" puts] + choice [] swap infra pop ; +END + +(* Before exporting these test routines, do what sanity checking we + can do on our primitives. *) +[] [true] uncons pop "choice" primitive-check . +[] 3 3 = "=" primitive-check . +[] 1 3 + 4 = "+" primitive-check . +[] 4 2 - 2 = "=" primitive-check . +[] 2 4 < "<" primitive-check . +[] 4 2 > ">" primitive-check . +[] 2 dup + 4 = "dup" primitive-check . +[] 2 4 pop 2 = "pop" primitive-check . +[] [3 1] dup [+] infra uncons pop 4 = "dup list" primitive-check . +[] [true] uncons pop logical "logical" primitive-check . +[] [false] uncons pop logical "logical false" primitive-check . +[] 'b char "char" primitive-check . +[] 2 integer "integer" primitive-check . +[] "foo" string "string" primitive-check . +[] [2] list "list" primitive-check . +[] [] null "null" primitive-check . diff --git a/module/language/joy/tests/test-base.joy b/module/language/joy/tests/test-base.joy new file mode 100644 index 0000000..42e6491 --- /dev/null +++ b/module/language/joy/tests/test-base.joy @@ -0,0 +1,197 @@ +(* test-base.joy -- tests for base.joy + + Copyright © Eric Bavier + + This source is released under the terms of the GNU General Public + License, version 3, or at your option any later version. +*) +"base" include . +"tests/inicheck" include . + +DEFINE + test-swons == "swons" [[3] 2 swons car] satisfies [2 =] ? ; + test-unswons == "unswons" [[2 3] unswons] satisfies [2 =] ? ; + + test-car == "car" [[1 2] car] satisfies [1 =] ? ; + test-cdr == "cdr" [[1 2] cdr car] satisfies [2 =] ? ; + test-cadr == "cadr" [[1 2] cadr] satisfies [2 =] ? ; + + test-first == + "first" [[1 2 3] first] satisfies [1 =] ? ; + test-second == + "second" [[1 2 3] second] satisfies [2 =] ? ; + test-third == + "third" [[1 2 3] third] satisfies [3 =] ? ; + + test-booleans == + "true" [true] satisfies [1 0 choice 1 =] ? + "false" [false] satisfies [1 0 choice 0 =] ? ; + + test-leaf == + "numeric leaf" [2 leaf] satisfies [true =] ? + "char leaf" ['b leaf] satisfies [true =] ? + "string leaf" ["foo" leaf] satisfies [true =] ? + "list leaf" [[1 2] leaf] satisfies [false =] ? ; + test-null == + "numeric null(0)" [0 null] satisfies [true =] ? + "numeric non-null" [1 null] satisfies [false =] ? + "null list" [[] null] satisfies [true =] ? + "non-null list" [[1] null] satisfies [false =] ? ; + test-nulld == + "numeric nulld" [0 1 nulld] satisfies [pop true =] ? + "numeric non-nulld" [1 0 nulld] satisfies [pop false =] ? + "list nulld" [[] [1] nulld] satisfies [pop true =] ? + "list non-nulld" [[1] [] nulld] satisfies [pop false =] ? ; + + test-newstack == + "newstack" [newstack] satisfies [stack null] ? ; + + test-i == + "i id" [1 [] i] satisfies [1 =] ? + "i atom" [[1] i] satisfies [1 =] ? + "i pop" [1 2 [pop] i] satisfies [1 =] ? + "i +" [1 2 [+] i] satisfies [3 =] ? + "i2 id" [1 2 [] [] i2] satisfies [2 =] ? + "i2 +" [1 2 [3 +] [2 +] i2] satisfies [=] ? ; + + test-dip == + "dip id" [1 2 [] dip] satisfies [pop 1 =] ? + "dip atom" [2 [1] dip] satisfies [pop 1 =] ? + "dip pop" [1 2 3 [pop] dip] satisfies [3 =] ? + "dip pop 2" [1 2 3 [pop] dip] satisfies [pop 1 =] ? ; + + test-b == + "b +" [1 2 3 [+] [+] b] satisfies [6 =] ? + "b" [4 [2 +] [3 -] b] satisfies [3 =] ? ; + + test-cleave == + "cleave" [2 [1 +] [4 +] cleave] satisfies [[3 =] [6 =] i2 and] ? ; + + test-branch == + "branch true" [true [1] [0] branch] satisfies [1 =] ? + "branch false" [false [1] [0] branch] satisfies [0 =] ? + "ifte true" [1 [0 >] [1] [0] ifte] satisfies [1 =] ? + "ifte false" [0 [0 >] [1] [0] ifte] satisfies [0 =] ? + "ifte restore" [2 [pop true] [2 +] [] ifte] satisfies [4 =] ? ; + + test-logic == + "not true" [true not] satisfies [false =] ? + "not false" [false not]satisfies [true =] ? + "or tt" [true true] satisfies [or] ? + "or tf" [true false] satisfies [or] ? + "or ft" [false true] satisfies [or] ? + "or ff" [false false] satisfies [or not] ? + "and tt" [true true] satisfies [and] ? + "and tf" [true false] satisfies [and not] ? + "and ft" [false true] satisfies [and not] ? + "and ff" [false false] satisfies [and not] ? + "xor tt" [true true] satisfies [xor not] ? + "xor tf" [true false] satisfies [xor] ? + "xor ft" [false true] satisfies [xor] ? + "xor ff" [false false] satisfies [xor not] ? ; + + test-pop == + "pop2" [1 2 3 pop2] satisfies [1 =] ? + "popop" [1 2 3 pop2] satisfies [1 =] ? + "popd" [1 2 3 popd] satisfies [3 =] ? + "popd 2" [1 2 3 popd] satisfies [pop 1 =] ? ; + + test-dup == + "dup2" [2 3 dup2] satisfies [[2 =] dip 3 = and] ? + "dupd" [2 3 dupd] satisfies [[2 =] dip 3 = and] ? ; + + test-roll == + "rollup" [1 2 3 rollup] satisfies [2 = [1 =] dip and + [3 =] dip and] ? + "rolldown" [1 2 3 rolldown] satisfies [1 = [3 =] dip and + [2 =] dip and] ? + "rotate" [1 2 3 rotate] satisfies [1 = [2 =] dip and + [3 =] dip and] ? ; + + test-app == + "app2" [1 3 [1 +] app2] satisfies [4 = [2 =] dip and] ? + "app3" [1 3 5 [2 >] app3] satisfies [and [true =] + [false =] i2] ? ; + + test-maxima == + ">=" [2 3 >= 3 3 >= 4 3 >=] satisfies [and swap not and] ? + "<=" [2 3 <= 3 3 <= 4 3 <=] satisfies [not and and] ? + "!=" [2 3 != 3 3 !=] satisfies [not and] ? + "max" [2 3 max -2 3 max] satisfies [[3 =] app2 and] ? ; + + test-arithmetic == + "*" [3 7 * 0 5 *] satisfies [[21 =] [null] i2 and] ? + "divmod" [11 3 divmod] satisfies [[3 =] [2 =] i2 and] ? + "/" [21 5 / 25 25 /] satisfies [[4 =] [1 =] i2 and] ? + "%" [21 2 % 37 5 %] satisfies [[1 =] [2 =] i2 and] ? + "exp" [2 3 exp 3 0 exp] satisfies [[8 =] [1 =] i2 and] ? + ; + + test-linrec == + (* We test * and divmod here because they are currently + implemented in joy itself using linear recursion. *) + "fact" [0 fact 4 fact] satisfies [[1 =] [24 =] i2 and] ? + "sum-up-to" [6 sum-up-to] satisfies [21 =] ? ; + + test-aggregates == + "step" [[1 2 3] [] step] + satisfies [[2 =] [3 =] i2 and [1 =] dip and] ? + "step +" [[1 3] [5 +] step] satisfies [[6 =] [8 =] i2 and] ? + "reverse" [[1 2] reverse] + satisfies [[[1 =] [2 =] i2 and] infra car] ? + "fold +" [[1 4 5] 0 [+] fold] satisfies [10 =] ? + "fold swons" [[1 4 5] [] [swons] fold] + satisfies [unstack 5 = swap 4 = and [1 =] dip and] ? + "fold or" [[false true false] false [or] fold] + satisfies [true =] ? + "fold and" [[true true false] true [and] fold] + satisfies [false =] ? + "sum" [[1 2 8 9] sum] satisfies [20 =] ? + "product" [[1 2 8 9] product] satisfies [144 =] ? + "size" [[] size [1 4 8 9] size] satisfies [[0 =] [4 =] i2 and] ? + "map id" [[1 2] [] map] + satisfies [[[2 =] [1 =] i2 and] infra car] ? + "map +" [[1 2] [10 +] map] + satisfies [[[12 =] [11 =] i2 and] infra car] ? + "map >" [[3 7] [4 >] map] + satisfies [[[true =] [false =] i2 and] infra car] ? + "drop" [[1 2 3] [1 drop car] [2 drop car] cleave] + satisfies [[2 =] [3 =] i2 and] ? + "take" [[1 2 3] [2 take i] [1 take i] cleave] + satisfies [[2 =] [1 =] i2 and] ? + "at" [[1 2 3 4] [2 at] [1 at] cleave] + satisfies [[3 =] [2 =] i2 and] ? + "of" [[1 2 3 4] [2 swap of] [1 swap of] cleave] + satisfies [[3 =] [2 =] i2 and] ? + ; + + run-base-tests == + test-swons + test-unswons + test-car + test-cdr + test-cadr + test-first + test-second + test-third + test-booleans + test-leaf + test-null + test-nulld + test-newstack + test-i + test-dip + test-b + test-cleave + test-branch + test-logic + test-pop + test-dup + test-roll + test-maxima + test-arithmetic + test-linrec + test-aggregates +END + +run-base-tests . \ No newline at end of file -- 2.6.3