[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
read-text-outline 1.3
From: |
Thien-Thi Nguyen |
Subject: |
read-text-outline 1.3 |
Date: |
Fri, 05 Apr 2002 17:59:12 -0800 |
generalized from 1.2. (guile TODO summarization now a SMOP away... :-)
thi
_____________________________________________
#!/bin/sh
# aside from this initial boilerplate, this is actually -*- scheme -*- code
main='(module-ref (resolve-module '\''(scripts read-text-outline)) '\'main')'
exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@"
!#
;;; read-text-outline --- Read a text outline and display it as a sexp
;; Copyright (C) 2002 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2, or
;; (at your option) any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING. If not, write to
;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
;; Boston, MA 02111-1307 USA
;;; Author: Thien-Thi Nguyen <address@hidden>
;;; Commentary:
;; Usage: read-text-outline OUTLINE
;;
;; Scan OUTLINE file and display a list of trees, the structure of
;; each reflecting the "levels" in OUTLINE. The recognized outline
;; format (used to indicate outline headings) is zero or more pairs of
;; leading spaces followed by "-". Something like:
;;
;; - a 0
;; - b 1
;; - c 2
;; - d 1
;; - e 0
;; - f 1
;; - g 2
;; - h 1
;;
;; In this example the levels are shown to the right. The output for
;; such a file would be the single line:
;;
;; (("a" ("b" "c") "d") ("e" ("f" "g") "h"))
;;
;; Basically, anything at the beginning of a list is a parent, and the
;; remaining elements of that list are its children.
;;
;;
;; Usage from a Scheme program: These two procs are exported:
;;
;; (read-text-outline . args) ; only first arg is used
;; (read-text-outline-silently port)
;; (make-text-outline-reader re specs)
;;
;; `make-text-outline-reader' returns a proc that reads from PORT and
;; returns a list of trees (similar to `read-text-outline-silently').
;;
;; RE is a regular expression (string) that is used to identify a header
;; line of the outline (as opposed to a whitespace line or intervening
;; text). RE must begin w/ a sub-expression to match the "level prefix"
;; of the line. You can use `level-submatch-number' in SPECS (explained
;; below) to specify a number other than 1, the default.
;;
;; Normally, the level of the line is taken directly as the length of
;; its level prefix. This often results in adjacent levels not mapping
;; to adjacent numbers, which confuses the tree-building portion of the
;; program, which expects top-level to be 0, first sub-level to be 1,
;; etc. You can use `level-substring-divisor' or `compute-level' in
;; SPECS to specify a constant scaling factor or specify a completely
;; alternative procedure, respectively.
;;
;; SPECS is an alist which may contain the following key/value pairs:
;;
;; - level-submatch-number NUMBER
;; - level-substring-divisor NUMBER
;; - compute-level PROC
;; - body-submatch-number NUMBER
;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...)
;;
;; The PROC value associated with key `compute-level' should take a
;; Scheme match structure (as returned by `regexp-exec') and return a
;; number, the normalized level for that line. If this is specified,
;; it takes precedence over other level-computation methods.
;;
;; Use `body-submatch-number' if RE specifies the whole body, or if you
;; want to make use of the extra fields parsing. The `extra-fields'
;; value is a sub-alist, whose keys name additional fields that are to
;; be recognized. These fields along with `level' are set as object
;; properties of the final string ("body") that is consed into the tree.
;; If a field name ends in "?" the field value is set to be #t if there
;; is a match and the result is not an empty string, and #f otherwise.
;;
;;
;; Bugs and caveats:
;;
;; (1) Only the first file specified on the command line is scanned.
;; (2) TAB characters at the beginnings of lines are not recognized.
;; (3) Outlines that "skip" levels signal an error. In other words,
;; this will fail:
;;
;; - a 0
;; - b 1
;; - c 3 <-- skipped 2 -- error!
;; - d 1
;;
;;
;; TODO: Determine what's the right thing to do for skips.
;; Handle TABs.
;; Make line format customizable via longopts.
;;; Code:
(define-module (scripts read-text-outline)
:export (read-text-outline
read-text-outline-silently
make-text-outline-reader)
:use-module (ice-9 regex)
:autoload (ice-9 rdelim) (read-line)
:autoload (ice-9 getopt-long) (getopt-long))
(define (?? symbol)
(let ((name (symbol->string symbol)))
(string=? "?" (substring name (1- (string-length name))))))
(define (msub n)
(lambda (m)
(match:substring m n)))
(define (??-predicates pair)
(cons (car pair)
(if (?? (car pair))
(lambda (m)
(not (string=? "" (match:substring m (cdr pair)))))
(msub (cdr pair)))))
(define (make-line-parser re specs)
(let* ((rx (let ((fc (substring re 0 1)))
(make-regexp (if (string=? "^" fc)
re
(string-append "^" re)))))
(check (lambda (key)
(assq-ref specs key)))
(level-substring (msub (or (check 'level-submatch-number) 1)))
(extract-level (cond ((check 'compute-level)
=> (lambda (proc)
(lambda (m)
(proc m))))
((check 'level-substring-divisor)
=> (lambda (n)
(lambda (m)
(/ (string-length (level-substring m))
n))))
(else
(lambda (m)
(string-length (level-substring m))))))
(extract-body (cond ((check 'body-submatch-number)
=> msub)
(else
(lambda (m) (match:suffix m)))))
(misc-props! (cond ((check 'extra-fields)
=> (lambda (alist)
(let ((new (map ??-predicates alist)))
(lambda (obj m)
(for-each
(lambda (pair)
(set-object-property!
obj (car pair)
((cdr pair) m)))
new)))))
(else
(lambda (obj m) #t)))))
;; retval
(lambda (line)
(cond ((regexp-exec rx line)
=> (lambda (m)
(let ((level (extract-level m))
(body (extract-body m)))
(set-object-property! body 'level level)
(misc-props! body m)
body)))
(else #f)))))
(define (make-text-outline-reader re specs)
(let ((parse-line (make-line-parser re specs)))
;; retval
(lambda (port)
(let* ((all '(start))
(pchain (list))) ; parents chain
(let loop ((line (read-line port))
(prev-level -1) ; how this relates to the first input
; level determines whether or not we
; start in "sibling" or "child" mode.
; in the end, `start' is ignored and
; it's much easier to ignore parents
; than siblings (sometimes). this is
; not to encourage ignorance, however.
(tp all)) ; tail pointer
(or (eof-object? line)
(cond ((parse-line line)
=> (lambda (w)
(let* ((words (list w))
(level (object-property w 'level))
(diff (- level prev-level)))
(cond
;; sibling
((zero? diff)
;; just extend the chain
(set-cdr! tp words))
;; child
((positive? diff)
(or (= 1 diff)
(error "unhandled diff not 1:" diff line))
;; parent may be contacted by uncle later (kids
;; these days!) so save its level
(set-object-property! tp 'level prev-level)
(set! pchain (cons tp pchain))
;; "push down" car into hierarchy
(set-car! tp (cons (car tp) words)))
;; uncle
((negative? diff)
;; prune back to where levels match
(do ((p pchain (cdr p)))
((= level (object-property (car p) 'level))
(set! pchain p)))
;; resume at this level
(set-cdr! (car pchain) words)
(set! pchain (cdr pchain))))
(loop (read-line port) level words))))
(else (loop (read-line port) prev-level tp)))))
(set! all (car all))
(if (eq? 'start all)
'() ; wasteland
(cdr all))))))
(define read-text-outline-silently
(make-text-outline-reader "(([ ][ ])*)- *"
'((level-substring-divisor . 2))))
(define (read-text-outline . args)
(write (read-text-outline-silently (open-file (car args) "r")))
(newline)
#t) ; exit val
(define main read-text-outline)
;;; read-text-outline ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- read-text-outline 1.3,
Thien-Thi Nguyen <=