guile-sources
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

gxsed snapshot


From: Thien-Thi Nguyen
Subject: gxsed snapshot
Date: Wed, 08 Sep 2004 15:22:41 +0200

folks,

sed is a nice, mostly stable, design where one can practice simple
compilation techniques, such as parsing (and building trees), basic
blocks threading, backpatching, accomodation of midstream scope
changes, and minor optimization.

please find below a work-in-progress (rife with code-enhancement
opportunities ;-) implementation that has been tested alongside GNU sed
3.02 and GNU sed 4.1 with the `testsed' script:

  #!/bin/sh
  $HOME/local/src/.ttn/build/sed-4.1/sed/sed "$@"
  sed "$@"
  sh gxsed.scm "$@" 

obviously, this script does not support testing gxsed as a stdin filter;
that has to be done separately in an ad-hoc manner (at least, by those
who aren't bourne-shell file descriptor munging wizards...).

to play, comment out the pretty-printing (unless that sort of thing
interests you) and use the script above to watch output/speed of the
respective implementations.  for example:

  sh -x testsed -n \
    '/^[*][*]*$/{;n;n;n;/complete mystery/{;g;p;};};h' \
    ~/local/info/ttn-do.info

this displays program names in the ttn-do package for which the
documentation barely scrapes by (tsk tsk).

see Commentary for specific failure scenarios and the TODO.

the name "gxsed" is because eventually this will be "guile extensible";
that is, you will be able to define new sed commands (bound to a letter
not already used in the normal sed language), written in a parenthetical
computer language, for the engine to "compile" and execute.  in any
case, we continue to scheme towards overthrow of the auto* tools, one
subsystem at a time.... ;->

happy hacking,
thi



_____________________
#!/bin/sh
exec ${GUILE-guile} -e '(ttn-do gxsed)' -s $0 "$@" # -*- scheme -*-
!#
;;; gxsed --- The yin of editors

;; Copyright (C) 2004 Thien-Thi Nguyen
;; This program is part of ttn-do, released under GNU GPL v2 with ABSOLUTELY
;; NO WARRANTY.  See http://www.gnu.org/copyleft/gpl.txt for details.

;;; Commentary:

;; Usage: gxsed [OPTION]... {script-only-if-no-other-script} [INPUT-FILE]...
;;
;;   -n, --quiet, --silent
;;                  suppress automatic printing of pattern space
;;   -e script, --expression=script
;;                  add the script to the commands to be executed
;;   -f script-file, --file=script-file
;;                  add the contents of script-file to the commands to be 
executed
;;       --help     display this help and exit
;;       --version  output version information and exit
;;
;; If no -e, --expression, -f, or --file option is given, then the first
;; non-option argument is taken as the gxsed script to interpret.  All
;; remaining arguments are names of input files; if no input files are
;; specified, then the standard input is read.
;;
;;
;; FAILS (GNU sed 4.1 testsuite):
;;  TEST -- OBSERVATION [-- HYPOTHESIS, NOTES, ETC]
;;  newjis -- extra char remains -- matched text not counted properly?
;;  noeolw -- newline suppressed (does not follow GNU sed peculiarity)
;;  numsub5 -- miscompare -- GNU sed 3.02 does the same thing
;;  0range -- configuration error (-s) -- see TODO
;;  bkslashes -- compilation error (bad `s' flag: #\newline) -- parser 
incomplete?
;;  madding -- compilation error (bad `s' flag: #\newline) -- parser incomplete?
;;  mac-mf -- compilation error (,) -- escaped sep misparsed
;;  xabcx -- hang
;;  (etc)
;;
;;
;; TODO:
;;  check `nl' usage (use `xx' instead)
;;  escape processing
;;  `-s' command-line option (GNU extension)
;;  `s' command sed-syntax -> guile-syntax regexp translation
;;  `s' command flag: `I'

;;; Code:

(define-module (ttn-do gxsed)
  #:autoload (scripts PROGRAM) (HVQC-MAIN)
  #:autoload (scripts slurp) (slurp)
  #:autoload (ice-9 pretty-print) (pretty-print)
  #:use-module ((ice-9 regex) #:select (string-match))
  #:use-module ((ice-9 rdelim) #:select (read-line write-line))
  #:use-module ((ice-9 gap-buffer) #:select (make-gap-buffer
                                             gb->string
                                             gb->port!))
  #:use-module (ttn edit)
  #:export (gxsed))

(define vr vector-ref)
(define v! vector-set!)
(define si string-index)
(define sub make-shared-substring)

(define (transliterate tmap)
  (let* ((s (editing-buffer PATT (buffer-string)))
         (len (string-length s)))
    (do ((i 0 (1+ i)))
        ((= i len))
      (and=> (assq-ref tmap (string-ref s i))
             (lambda (new)
               (string-set! s i new))))
    (editing-buffer PATT
      (erase-buffer)
      (insert s))))

(define *display-unambiguously-map* #f)

(define (make-*display-unambiguously-map*!)
  (let ((v (make-vector 256)))
    (define (octal n)
      (let ((v (number->string n 8)))
        (format #f "\\~A~A"
                (make-string (max 0 (- 3 (string-length v))) #\0)
                v)))
    (define (range-set! x y val)
      (do ((i x (1+ i)))
          ((> i y))
        (vector-set! v i (if (procedure? val) (val i) val))))
    (define (point-set! c val)
      (let ((x (char->integer c)))
        (range-set! x x val)))
    (range-set! 0 255 octal)
    (range-set! 32 176 #f)
    (point-set! #\bs "\\b")
    (point-set! #\ht "\\t")
    (point-set! #\nl "\\n")
    (point-set! #\vt "\\v")
    (point-set! #\np "\\f")
    (point-set! #\cr "\\r")
    (point-set! #\\ "\\\\")
    (set! *display-unambiguously-map* v)))

(define (display-unambiguously wrap)
  (or *display-unambiguously-map* (make-*display-unambiguously-map*!))
  (let* ((s (gb->string PATT))
         (len (string-length s))
         (col 0))
    (do ((i 0 (1+ i)))
        ((= i len))
      (let* ((c (string-ref s i))
             (rep (vector-ref *display-unambiguously-map*
                              (char->integer c))))
        (or (= 0 wrap)
            (let* ((rlen (if (not rep) 1 (string-length rep)))
                   (ncol (+ col rlen)))
              (cond ((< ncol wrap) (set! col ncol))
                    (else (display "\\\n")
                          (set! col rlen)))))
        (display (or rep c))))
    (display "$\n")))

(define (->grx sedrx)
  (let loop ((ls (string->list sedrx)) (acc '()))
    (cond ((null? ls) (list->string (reverse! acc)))
          ((char=? #\\ (car ls))
           (if (null? (cdr ls))
               (error "trailing backslash:" sedrx)
               (case (cadr ls)
                 ((#\() (loop (cddr ls) (cons #\( acc)))
                 ((#\)) (loop (cddr ls) (cons #\) acc)))
                 (else (loop (cdr ls) (cons #\\ acc))))))
          ((char=? #\$ (car ls))
           (if (null? (cdr ls))
               (loop (cdr ls) (cons (car ls) acc))
               (loop (cdr ls) (append (string->list "]$[") acc))))
          (else (loop (cdr ls) (cons (car ls) acc))))))

(define (make-next-foo-proc prefix)
  (let ((serial 0))
    (lambda check
      (cond ((null? check)
             (set! serial (1+ serial))
             (string->symbol (format #f "~A~A" prefix serial)))
            (else serial)))))

(define next-TT (make-next-foo-proc "tt")) ; tail thunk
(define next-RX (make-next-foo-proc "rx")) ; regular expression
(define next-WP (make-next-foo-proc "wp")) ; write port
(define next-GE (make-next-foo-proc "ge")) ; group end

(define (parse auto-print? program)     ; => list of accumulators
  (let ((len (string-length program))
        (outfiles '())
        (groups '())
        (saved-r-ops '())
        (need-rx-check? #f)
        (need-midstream-fill? #f)
        ;; returned accumulators
        (symaddress '())                ; (LABEL . PC)
        (conditions '())                ; (CONDVAR TURN-ON TURN-OFF)
        (globaldefs '())                ; (NAME VALUE TYPE-SPECIFIC-EXTRA)
        (linelpdefs '())                ; (NAME VALUE TYPE-SPECIFIC-EXTRA)
        (insn-group '())                ; (PC . EXP...)
        (explicit-j '()))               ; (PC . (go DEST-LABEL))

    (define (acc-syma! x) (set! symaddress (cons x symaddress)))
    (define (acc-cond! x) (set! conditions (cons x conditions)))
    (define (acc-gdef! x) (set! globaldefs (cons x globaldefs)))
    (define (acc-ldef! x) (set! linelpdefs (cons x linelpdefs)))
    (define (acc-igrp! x) (set! insn-group (cons x insn-group)))
    (define (acc-expj! x) (set! explicit-j (cons x explicit-j)))

    (define (gdef! name value type-specific-extra)
      (acc-gdef! (list name value type-specific-extra)))

    (define (ldef! name value type-specific-extra)
      (acc-ldef! (list name value type-specific-extra)))

    (define (spewer-name filename which)
      (or (assoc-ref outfiles filename)
          (let ((name (next-WP)))
            (set! outfiles (acons filename name outfiles))
            (gdef! name `(make-spew!-proc (open-output-file ,filename)) which)
            name)))

    (define (pr position)
      (string-ref program position))

    (define (sw position)
      (if (memq (pr position) '(#\space #\ht))
          (sw (1+ position))
          position))

    (let loop ((pos 0) (pc 0) (addr #f) (sense #f))

      (define (p n) (+ pos n))

      (define (c0) (pr (p 0)))
      (define (c1) (pr (p 1)))
      (define (nl) (si program #\newline pos))
      (define (sc) (si program #\; pos))
      (define (xx) (let ((n (nl)) (s (sc)))
                     (cond ((not (or n s)) len)
                           ((and n s) (min n s))
                           (s) (n))))

      (define (<-> start end)
        (sub program start end))

      (define (<!> start end)
        (set! pos (1+ end))
        (sub program start end))

      (define (syma! x) (acc-syma! (cons x pc)))
      (define (igrp! x) (acc-igrp! (cons pc x)))
      (define (expj! x) (acc-expj! (cons pc x)))

      (define (amws! . forms)           ; always more work, sigh

        (define (->cond p)
          (let* ((x (p addr))
                 (rv (cond ((number? x)
                            `(= ,x lln))
                           ((string? x)
                            (let* ((name (next-RX))
                                   (form `(rx-check ,name)))
                              (gdef! name `(make-regexp ,x) #:condition-regexp)
                              (set! need-rx-check? #t)
                              form))
                           (else x))))
            (if sense
                `(not ,rv)
                rv)))

        (cond ((and (vector? addr) (= 4 (vector-length addr)))
               (let* ((first (vr addr 1))
                      (step (vr addr 3))
                      (conditional `(let ((norm (- lln ,first)))
                                      (and (<= 0 norm)
                                           (= 0 (modulo norm ,step))))))
                 (igrp! `((cond (,(if sense
                                      `(not ,conditional)
                                      conditional)
                                 ,@forms))))))
              ((vector? addr)
               (let ((conditional `(<= ,(vr addr 0) lln ,(vr addr 1))))
                 (igrp! `((cond (,(if sense
                                      `(not ,conditional)
                                      conditional)
                                 ,@forms))))))
              ((pair? addr)
               (let ((tag (string->symbol (format #f "C~A" pc)))
                     (up (->cond car))
                     (dn (->cond cdr)))
                 (acc-cond! (list tag up dn))
                 (igrp! `((cond (,tag ,@forms))))))
              (addr
               (igrp! `((cond (,(->cond identity) ,@forms)))))
              (sense
               (igrp! `((cond (#f ,@forms)))))
              (else
               (igrp! forms))))

      (define (first-spew-then-amws! form)
        (apply amws! `(,@(if auto-print?                 ;;; gross
                             '((spew!))
                             '())
                       ,form)))

      (define (check-zero-only! c)
        (and addr (error (format #f "`~A' command does not accept any address"
                                 (make-string 1 c)))))

      (define (check-one-only! c)
        (and (or (and (vector? addr) (= 2 (vector-length addr))) (pair? addr))
             (error (format #f "`~A' command accepts at most one address"
                            (make-string 1 c)))))

      (define (read-backslash-lines! last-nl)
        (let line-loop ((acc '()) (last-nl last-nl))
          (let ((end (si program #\newline (1+ last-nl))))
            (if (char=? #\\ (pr (1- end)))
                (line-loop (cons (<-> (if (null? acc)       ;;; blech!
                                          (1+ last-nl)
                                          last-nl)
                                      (1- end))
                                 acc)
                           end)
                (let ((fin-end (1+ end)))
                  (set! pos fin-end)    ; !
                  (apply string-append  ; rv
                         (reverse (cons (<-> last-nl fin-end) acc))))))))

      (define (amws-tail-thunk! form which . extra)
        (let* ((name (next-TT))
               (set-tail `(add-hook! tail-thunks ,name #t))
               (first-form (if (or (null? extra) (not (car extra)))
                               set-tail
                               `(and ,(car extra) ,set-tail)))
               (rest-forms (if (null? extra)
                               '()
                               (cdr extra))))
          (gdef! name form which)
          (apply amws! (cons first-form rest-forms))))

      (define (define-tail-thunk form)
        (let ((name (next-TT)))
          (gdef! name form #:tail-thunk)
          name))

      (define (command-ok! new-pos)
        (loop new-pos (1+ pc) #f #f))

      (define (p2-command-ok!)
        (command-ok! (p 2)))

      (cond

       ;; nothing else to read
       ((= pos len)
        ;; define fill!
        (cond ((and need-rx-check? need-midstream-fill?)
               (gdef! 'patt-string #f #:global-patt-string)
               (gdef! 'fill! '(let ((system:fill! fill!))
                                (lambda args
                                  (let ((v (apply system:fill! args)))
                                    (cond ((eof-object? v) v)
                                          (else
                                           (set! patt-string (gb->string PATT))
                                           v)))))
                      #:fill!-that-sets-patt-string)
               (gdef! 'rx-check '(lambda (rx)
                                   (regexp-exec rx patt-string))
                      #:single-arg-rx-match-against-PATT))
              (need-rx-check?
               (ldef! 'patt-string '(gb->string PATT) #:PATT-as-string)
               (ldef! 'rx-check '(lambda (rx)
                                   (regexp-exec rx patt-string))
                      #:single-arg-rx-match-against-PATT)))
        ;; patch substitution replacement ops (if necessary)
        (and (vector? saved-r-ops)
             (for-each (lambda (r-op+form)
                         (let ((r-op (car r-op+form)))
                           (if (eq? 'begin (car r-op))
                               (set-cdr!
                                (last-pair (cdr r-op+form))
                                `((and status (set! global-status #t))))
                               (let ((r-op-copy (list-copy r-op)))
                                 (set-car! r-op 'begin)
                                 (set-cdr! r-op `((set! global-status #t)
                                                  ,r-op-copy))))))
                       (vr saved-r-ops 0)))
        ;; define line-loop vars (where necessary)
        (and (vector? saved-r-ops)
             (ldef! 'global-status #f #:used-by-the-t-command))
        (or (= 0 (next-TT #:check))
            (ldef! 'tail-thunks '(make-hook 0) #:tail-thunks))
        ;; add some built-in labels
        (acc-syma! '(#:%done-noprint . -1))
        (acc-syma! `(#:%done . ,pc))
        ;; add finishing instruction group
        (amws! #t)
        ;; rv -- NOTE: accumulators unreversed
        (list symaddress
              conditions
              globaldefs
              linelpdefs
              insn-group
              explicit-j))

       ;; newline or comment
       ((memq (c0) '(#\# #\newline))
        (if (char=? #\# (c0))
            (check-zero-only! #\#)
            (and addr (error "missing command for addr:" addr)))
        (loop (1+ (nl)) pc addr sense))

       ;; leading whitespace
       ((memq (c0) '(#\space #\ht))
        (loop (sw (p 1)) pc addr sense))

       ;;; address parsing -- keep pc the same, modify addr and sense

       ;; sense inversion
       ((char=? #\! (c0))
        (loop (p 1) pc addr #t))

       ;; two-address continuation
       ((and addr (not (or (pair? addr) (vector? addr))) (char=? #\, (c0)))
        (loop (p 1) pc addr #f))

       ;; line number --or-- FIRST~STEP (GNU extension)
       ((and (not (pair? addr))
             (char-numeric? (c0)))
        (let line-loop ((end (p 1)))
          (if (char-numeric? (pr end))
              (line-loop (1+ end))
              (let ((n (string->number (<-> (p 0) end))))
                (if (and (not addr)
                         (< end (- len 2))
                         (char=? #\~ (pr end))
                         (char-numeric? (pr (1+ end))))
                    (let step-loop ((step-end (+ 2 end)))
                      (if (char-numeric? (pr step-end))
                          (step-loop (1+ step-end))
                          (let ((step (string->number (<-> (1+ end) step-end))))
                            (loop step-end pc
                                  (vector #:first n #:step step)
                                  #f))))
                    (loop end pc
                          (cond ((not addr) n)
                                ((number? addr) (vector addr n)) ; for speed
                                (else (cons addr n)))
                          #f))))))

       ;; regexp
       ((and (not (pair? addr))
             (si "/\\" (c0)))
        => (lambda (start)
             (set! start (p start))
             (let* ((delim ((if (= (p 0) start) c0 c1)))
                    (end (si program delim (1+ start)))
                    (rxs (<!> (1+ start) end)))
               (loop pos pc (if addr (cons addr rxs) rxs) #f))))

       ;; last line
       ((and (not (pair? addr))
             (not (vector? addr))
             (char=? #\$ (c0)))
        (loop (p 1) pc (if addr (cons addr 'ULT?) 'ULT?) #f))

       (else
        (case (c0)

          ;;; command parsing -- modify pc, set addr and sense to #f

          ;; labels
          ((#\:)
           (check-zero-only! #\:)
           (let ((label (<!> (sw (p 1)) (nl))))
             (syma! (symbol->keyword (string->symbol label)))
             (loop pos pc addr sense)))

          ;; branch
          ((#\b)
           (let* ((dest (<!> (sw (p 1)) (xx)))
                  (j `(go ,(if (string=? "" dest)
                               #:%done
                               (symbol->keyword (string->symbol dest))))))
             (expj! j)
             (amws! j))
           (command-ok! pos))

          ;; print
          ((#\p)
           (amws! '(spew!))
           (p2-command-ok!))

          ;; print until first newline
          ((#\P)
           (amws! '(display (editing-buffer PATT
                              (goto-char (point-min))
                              (end-of-line)
                              (buffer-substring (point-min) (point))))
                  '(newline))
           (p2-command-ok!))

          ;; delete
          ((#\d)
           (let ((j '(go #:%done-noprint)))
             (expj! j)
             ;; no need to actually do anything;
             ;; we get deletion for free from the big loop
             (amws! j))
           (p2-command-ok!))

          ;; next!
          ((#\n)
           (set! need-midstream-fill? #t)
           (first-spew-then-amws! '(if (eof-object? (fill!))
                                       (exit #t)
                                       (set! lln (1+ lln))))
           (p2-command-ok!))

          ;; substitute
          ((#\s)
           (let* ((sep (c1))
                  (src-end (si program sep (p 2)))
                  (dst-end (si program sep (1+ src-end)))
                  (src (<-> (p 2) src-end))
                  (dst (<!> (1+ src-end) dst-end))
                  (flag-end ((if (char=? #\; sep) nl xx)))
                  (flags (let floop ((fp (1+ dst-end)) (acc '()))
                           (if (= flag-end fp)
                               acc
                               (let ((fc (pr fp)))
                                 '(and (< flag-end fp)
                                      (error "flag badness!"
                                             (list flag-end fp fc)))
                                 (case fc
                                   ((#\p) (floop (1+ fp) (cons #:p acc)))
                                   ((#\g) (floop
                                           (1+ fp)
                                           ;; ignore `g' if regexp ends in "$"
                                           (if (char=? #\$ (pr (1- src-end)))
                                               acc
                                               (cons #:g acc))))
                                   ((#\w) (acons #:write
                                                 (<-> (sw (1+ fp)) flag-end)
                                                 acc))
                                   ;; TODO: handle `I' flag
                                   (else (if (char-numeric? fc)
                                             (let nloop ((num-end (1+ fp)))
                                               (if (char-numeric? (pr num-end))
                                                   (nloop (1+ num-end))
                                                   (floop num-end
                                                          (acons #:number
                                                                 (string->number
                                                                  (<-> fp 
num-end))
                                                                 acc))))
                                             (error "bad `s' flag:"
                                                    (list len flag-end fp fc
                                                          )))))))))
                  (compiled-src-name (next-RX))
                  (write-op (and=> (assq-ref flags #:write)
                                   (lambda (filename)
                                     (let ((spew (spewer-name
                                                  filename
                                                  #:substitution-filestream)))
                                       `((,spew))))))
                  (status-copy-op (if (vector? saved-r-ops)
                                      '((set! global-status #t))
                                      '()))
                  (post-op (cond (write-op
                                  `(cond (status ,@status-copy-op
                                                 ,@write-op
                                                 ,@(if (memq #:p flags)
                                                       '((spew!))
                                                       '()))))
                                 ((memq #:p flags)
                                  `(and status ,@status-copy-op (spew!)))
                                 ((pair? status-copy-op)
                                  `(and status ,(car status-copy-op)))
                                 (else
                                  #f)))
                  (s-op `(re-search-forward
                          ,compiled-src-name
                          (point-max)
                          #t
                          ,@(or (and=> (assq-ref flags #:number) list)
                                '())))
                  (r-op (let* ((len (string-length dst))
                               (bs? (and=> (string-index dst #\\)
                                           (lambda (bs)
                                             (and (< bs (- len 2))
                                                  (string-index
                                                   "0123456789&"
                                                   (string-ref
                                                    dst (1+ bs)))))))
                               (replace `(replace-match
                                          ,dst
                                          ,@(if bs? '() (list #t)))))
                          (if post-op
                              `(begin (set! status #t) ,replace)
                              replace)))
                  (op `(editing-buffer PATT
                         (goto-char (point-min))
                         ,(cond ((and (memq #:g flags)
                                      (assq-ref flags #:number))
                                 => (lambda (n-first)
                                      (let ((adjusted (1- n-first))
                                            (pre (list-copy s-op)))
                                        (set-car! (last-pair pre) adjusted)
                                        (set-cdr! (list-tail s-op 3) '())
                                        `(and ,pre (while ,s-op ,r-op)))))
                                ((memq #:g flags)
                                 `(while ,s-op ,r-op))
                                (else
                                 `(and ,s-op ,r-op)))))
                  (form (if post-op
                            `(let ((status #f)) ,op ,post-op)
                            op)))
             (gdef! compiled-src-name `(make-regexp ,(->grx src))
                    #:substitution-regexp)
             (amws! form)
             (or (vector? saved-r-ops)
                 (set! saved-r-ops (acons r-op form saved-r-ops)))
             (command-ok! (1+ flag-end))))

          ;; transliterate
          ((#\y)
           (let* ((sep (c1))
                  (src-end (si program sep (p 2)))
                  (dst-end (si program sep (1+ src-end)))
                  (src (<-> (p 2) src-end))             ;;; TODO: unescaping
                  (dst (<-> (1+ src-end) dst-end)))     ;;; TODO: unescaping
             (or (= (string-length src) (string-length dst))
                 (error "for `y', src and dst not the same length"))
             (amws! `(transliterate ',(map cons
                                           (string->list src)
                                           (string->list dst))))
             (command-ok! (+ 2 dst-end))))

          ;; HOLD <- PATT
          ((#\h)
           (amws! '(editing-buffer HOLD
                     (erase-buffer)
                     (insert PATT)))
           (p2-command-ok!))

          ;; HOLD <- HOLD \n PATT
          ((#\H)
           (amws! '(editing-buffer HOLD
                     (goto-char (point-max))
                     (insert #\newline)
                     (insert PATT)))
           (p2-command-ok!))

          ;; PATT <- HOLD
          ((#\g)
           (amws! '(editing-buffer PATT
                     (erase-buffer)
                     (insert HOLD)))
           (p2-command-ok!))

          ;; PATT <- PATT \n HOLD
          ((#\G)
           (amws! '(editing-buffer PATT
                     (goto-char (point-max))
                     (insert #\newline)
                     (insert HOLD)))
           (p2-command-ok!))

          ;; PATT <-> HOLD
          ((#\x)
           (amws! '(let ((tmp HOLD))
                     (set! HOLD PATT)
                     (set! PATT tmp)))
           (p2-command-ok!))

          ;; display line number
          ((#\=)
           (amws! '(write-line lln))
           (p2-command-ok!))

          ;; delete until first newline, repeating
          ((#\D)
           (let ((j '(go #:%done-noprint)))
             (expj! j)
             (amws! `(if (editing-buffer PATT
                           (goto-char (point-min))
                           (search-forward "\n" (point-max) 1)
                           (delete-region (point-min) (point))
                           (= (point-min) (point-max)))
                         ,j
                         (go 0))))
           (p2-command-ok!))

          ;; add \n and fill
          ((#\N)
           (set! need-midstream-fill? #t)
           (amws! '(editing-buffer PATT
                     (goto-char (point-max))
                     (insert #\newline)
                     (if (eof-object? (fill! #t))
                         (exit #t)
                         (set! lln (1+ lln)))))
           (p2-command-ok!))

          ;; quit
          ((#\q)
           (check-one-only! #\q)
           (first-spew-then-amws! '(exit #t))
           (p2-command-ok!))

          ;; append
          ((#\a)
           (check-one-only! #\a)
           (or (char=? #\\ (c1))
               (error "expecting #\\\\ but got:" (c1)))
           (let ((text (read-backslash-lines! (nl))))
             (amws-tail-thunk! `(lambda () (display ,text)) #:append)
             (command-ok! pos)))

          ;; insert
          ((#\i)
           (check-one-only! #\i)
           (or (char=? #\\ (c1))
               (error "expecting #\\\\ but got:" (c1)))
           (let ((text (read-backslash-lines! (nl))))
             (amws! `(display ,text))
             (command-ok! pos)))

          ;; read file
          ((#\r)
           (check-one-only! #\r)
           (let ((filename (<!> (sw (p 1)) (nl))))
             (amws-tail-thunk!
              `(let ((text (false-if-exception (slurp ,filename))))
                 (if text
                     (lambda () (display text))
                     (lambda () #t)))
              #:read-file)
             (command-ok! pos)))

          ;; write file (stream)
          ((#\w)
           (let* ((filename (<!> (sw (p 1)) (nl)))
                  (spew (spewer-name filename #:output-filestream)))
             (amws! `(,spew))
             (command-ok! pos)))

          ;; begin group
          ((#\{)
           ;; sidestep flow analysis hair associated w/ proper-nesting model;
           ;; do "COND => BLOCK" as "(NOT COND) => (SKIP BLOCK)" straightaway
           (set! sense (not sense))
           (let* ((name (next-GE))
                  (j `(go ,(symbol->keyword name))))
             (set! groups (cons name groups))
             (expj! j)
             (amws! j))
           (p2-command-ok!))

          ;; end group
          ((#\})
           (check-zero-only! #\})
           (let ((name (if (null? groups)
                           (error "unexpected }")
                           (car groups))))
             (syma! (symbol->keyword name))
             (set! groups (cdr groups)))
           (loop (p 2) pc #f #f))

          ;; concatenate
          ((#\c)
           (or (char=? #\\ (c1))
               (error "expecting #\\\\ but got:" (c1)))
           (let ((text (read-backslash-lines! (nl)))
                 (j '(go #:%done-noprint)))
             (expj! j)
             (amws-tail-thunk! `(lambda () (display ,text)) #:concat
                               (cond ((vector? addr)
                                      `(= ,(vr addr 1) lln))
                                     ((pair? addr)
                                      (string->symbol
                                       (format #f "C~A-zonk?" pc)))
                                     (else #f))
                               j)
             (command-ok! pos)))

          ;; display unambiguously
          ((#\l)
           (amws! `(display-unambiguously
                    ,(or (string->number (<!> (sw (p 1)) (xx)))
                         0)))
           (command-ok! pos))

          ;; branch conditionally ("if true")
          ((#\t)
           (let* ((dest (<!> (sw (p 1)) (xx)))
                  (j `(go ,(if (string=? "" dest)
                               #:%done
                               (symbol->keyword (string->symbol dest))))))
             (expj! j)
             (amws! `(cond (global-status ,j))))
           (set! saved-r-ops (vector saved-r-ops))
           (command-ok! pos))

          ;;; commands specific to GNU sed

          ;; quit silently
          ((#\Q)
           (check-one-only! #\Q)
           (amws! '(exit #t))
           (p2-command-ok!))

          (else (error "unknown command:" (c0)))))))))

(define (thread symaddress conditions globaldefs linelpdefs insn-group 
explicit-j)
  ;; first patch jumps
  (for-each (lambda (go)
              (let ((jdest (cadr go)))
                (or (number? jdest)
                    (set-cdr! go (list (assq-ref symaddress jdest))))))
            (map cdr explicit-j))
  ;; then do the threading
  (let loop ((ls (cdr insn-group))
             (bb (let ((all (sort       ; basic block beginnings
                             (append
                              (map cdr symaddress)
                              (map 1+ (map car explicit-j))
                              (list 0 -1))
                             >)))
                   ;; eliminate dups
                   (let dup ((ls all))
                     (cond ((null? (cdr ls))
                            ;; CAR is #:%done pc
                            (cdr all))
                           ((= (car ls) (cadr ls))
                            (set-cdr! ls (cddr ls))
                            (dup ls))
                           (else
                            (dup (cdr ls)))))))
             (core (list #f (let ((done (car insn-group)))
                              `((,(car done)) ,@(cdr done))))))
    (if (null? ls)
        (list conditions                ; rv
              globaldefs
              linelpdefs
              `(let go ((pc 0))
                 (case pc
                   ((-1) #f)
                   ,@(cdr core))))
        (let* ((head (car ls))
               (pc (car head))
               (forms (cdr head))
               (bbb (car bb)))
          ;; augment partial block unconditionally
          (set-car! core
                    (cond ((car core)
                           => (lambda (prev)
                                `(,@forms ,@prev)))
                          ((not (assq pc explicit-j))
                           `(,@forms (go ,(1+ pc))))
                          ((let ((last (car (last-pair forms))))
                             (and (pair? last)
                                  (eq? 'cond (car last))
                                  (last-pair last)))
                           => (lambda (graft)
                                (set-cdr! graft `((else (go ,(1+ pc)))))
                                forms))
                          (else forms)))
          ;; for basic block beginning,
          (if (= pc bbb)
              ;; ... cap partial w/ `(PC)' and make space
              (let ((next-bb (cdr bb)))
                (set-car! core `((,pc) ,@(car core)))
                (loop (cdr ls) next-bb (cons #f core)))
              ;; ... otherwise keep looking
              (loop (cdr ls) bb core))))))

(define (elaborate do+spew conditions globaldefs linelpdefs edits)

  (define (cond->forms model)
    (map (lambda (spec)
           (apply model spec))
         conditions))

  (define (zonkname condvar)
    (symbol-append condvar '-zonk?))

  ;; rv -- the final form of the program
  `(lambda ()
     ;; global definitions must be done in order
     (let* ,(map (lambda (def)
                   `(,(car def) ,(cadr def)))
                 (reverse globaldefs))
       ;; loop over each line -- initially condition vars are false
       (let loop ((lln 1) ,@(cond->forms
                             (lambda (v u d)
                               `(,v #f))))
         (or (eof-object? (fill!))
             (let* (;; line-loop defs must be done in order
                    ,@(map (lambda (def)
                             `(,(car def) ,(cadr def)))
                           (reverse linelpdefs))
                    ;; condition var check: falling edge
                    ,@(cond->forms
                       (lambda (v u d)
                         `(,(zonkname v) (and ,v ,d)))))
               ;; condition var check plus change: rising edge
               ,@(cond->forms
                  (lambda (v u d)
                    `(or ,v (set! ,v ,u))))
               ;; execute the program, display pattern space if auto-print
               ,(do+spew edits)
               ;; tail thunks
               ,@(if (assq 'tail-thunks linelpdefs)
                     '((run-hook tail-thunks))
                     '())
               ;; loop, updating condition vars for falling edge
               (loop (1+ lln) ,@(cond->forms
                                 (lambda (v u d)
                                   `(if ,(zonkname v) #f ,v))))))))))

(define PATT (make-gap-buffer))         ; set by `fill!'
(define EOL? #f)                        ; set by `fill!'
(define ULT? #f)                        ; set by `fill!'
(define HOLD (make-gap-buffer))

(define (make-spew!-proc outp)
  (lambda ()
    (gb->port! PATT outp)
    (and EOL? (newline outp))))

(define fill! #f)
(define spew! (make-spew!-proc (current-output-port)))

(define *gxsed-module* (current-module))

(define (compile auto-print? program)
  (let* ((bighair (apply elaborate
                         (if auto-print?
                             (lambda (do-form) `(and ,do-form (spew!)))
                             identity)
                         (apply thread (parse auto-print? program))))
         (big-pic (eval-in-module bighair *gxsed-module*)))
    (begin
      (pretty-print bighair #:escape-strings? #t)
      (display "3...") (force-output) (sleep 1)
      (display "2...") (force-output) (sleep 1)
      (display "1...") (force-output) (sleep 1)
      (display "0\n") (force-output))
    (lambda (getline)
      (set! fill! (lambda noerase
                    (let* ((pair (getline))
                           (line (car pair))
                           (eolc (cdr pair)))
                      (if (eof-object? line)
                          line
                          (begin
                            (set! EOL? (char? eolc))
                            (editing-buffer PATT
                              (and (null? noerase) (erase-buffer))
                              (insert line)))))))
      (big-pic))))

(define (getl<-filenames filenames)
  (let ((ports (if (null? filenames)
                   (list (current-input-port))
                   (map open-input-file filenames)))
        (ring (let ((ls (list #f #f)))
                (set-cdr! (cdr ls) ls)
                ls))
        (init? #t)
        (eof #f))
    (define (next)
      (or eof (let ((v (read-line (car ports) 'split)))
                (cond ((eof-object? (car v))
                       (set! ports (cdr ports))
                       (if (null? ports)
                           (begin (set! eof v) v)
                           (next)))
                      (else v)))))
    (define (getl)
      (cond (init? (set-car! (cdr ring) (next))
                   (set! init? #f)))
      (set-car! ring (next))
      (and (eof-object? (car (car ring)))
           (set! ULT? #t))
      (set! ring (cdr ring))
      (car ring))
    ;; rv
    getl))

(define (gxsed/qop qop)
  (let* ((take-first? (not (or (qop 'expression) (qop 'file))))
         (inspecs ((if take-first? cdr identity) (qop '()))))
    ((compile (not (qop 'quiet))
              (if take-first?
                  (string-append (car (qop '())) "\n")
                  (let ((cmp (if (null? inspecs) "" (car inspecs))))
                    (let loop ((acc '()) (ls (cdr (qop #:full-args))))
                      (define (collect p)
                        (loop (cons (string-append (p (cadr ls)) "\n") acc)
                              (cddr ls)))
                      (if (or (null? ls) (string=? cmp (car ls)))
                          (apply string-append (reverse acc))
                          (case (string->symbol (car ls))
                            ((-e --expression) (collect identity))
                            ((-f --file) (collect slurp))
                            ((-n --quiet) (loop acc (cdr ls)))
                            (else (error "unrecognized option:"
                                         (car ls)))))))))
     (getl<-filenames inspecs))))

(define (main args)
  (HVQC-MAIN args gxsed/qop
             '(usage . commentary)
             '(package . "ttn-do")
             '(version . "0.0")
             `(option-spec (quiet (single-char #\n))
                           (expression (single-char #\e) (value #t)
                                       (merge-multiple? #t))
                           (file (single-char #\f) (value #t)
                                 (predicate ,file-exists?)
                                 (merge-multiple? #t)))))

;;; gxsed ends here




reply via email to

[Prev in Thread] Current Thread [Next in Thread]