[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
updated module: (database fcookie)
From: |
Thien-Thi Nguyen |
Subject: |
updated module: (database fcookie) |
Date: |
Tue, 30 Nov 2004 16:10:28 +0100 |
folks,
this will appear in Guile 1.4.1.101. here is the ChangeLog entry:
2004-11-30 Thien-Thi Nguyen <address@hidden>
* fcookie.scm: Use module (srfi srfi-56).
No longer use module (database binconv).
(read-word-proc): Rewrite.
(write-word-proc): Likewise.
let's hope srfi 56 finalizes w/ `read-network-uint32' and
`write-network-uint32', otherwise an update will be required.
happy fortune-cookie munching,
thi
_______________________________________________________________________
;;; fcookie.scm --- cookie file read, cookie-index file read/write
;; Copyright (C) 2004 Free Software Foundation, Inc.
;;
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;;
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this software; see the file COPYING. If not, write to
;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
;; Boston, MA 02111-1307 USA
;;
;; As a special exception, the Free Software Foundation gives permission
;; for additional uses of the text contained in its release of GUILE.
;;
;; The exception is that, if you link the GUILE library with other files
;; to produce an executable, this does not by itself cause the
;; resulting executable to be covered by the GNU General Public License.
;; Your use of that executable is in no way restricted on account of
;; linking the GUILE library code into it.
;;
;; This exception does not however invalidate any other reasons why
;; the executable file might be covered by the GNU General Public License.
;;
;; This exception applies only to the code released by the
;; Free Software Foundation under the name GUILE. If you copy
;; code from other Free Software Foundation releases into a copy of
;; GUILE, as the General Public License permits, the exception does
;; not apply to the code that you add in this way. To avoid misleading
;; anyone as to the status of such modified files, you must delete
;; this exception notice from them.
;;
;; If you write modifications of your own for GUILE, it is your choice
;; whether to permit this exception to apply to your modifications.
;; If you do not wish that, delete this exception notice.
;;; Commentary:
;; This module provides two procs:
;; (create-index-file! out-name in-name delim flags) => #t for success
;; (fortune-cookie cookie-filename [dat-filename]) => string
;;
;; A fortune cookie file is a text file containing groups of lines separated
;; by "delim lines", a specially chosen character (traditionally the percent
;; `%' character) on a line by itself. The first and last lines must also be
;; delim lines. Each group of lines thus delimited is a "cookie". Thus, a
;; file with N cookies has N+1 delim lines.
;;
;; A fortune cookie index file is a binary file consisting of a six-word
;; header followed by the offset table (each entry a word). A word is four
;; bytes in network (big-endian) order. The header is:
;;
;; version -- typically 1 for old files and 2 for newer ones
;; count -- number of cookies
;; longest -- number of bytes of longest cookie
;; shortest -- number of bytes of shortest cookie
;; flags -- logior of #x1 (random)
;; #x2 (ordered)
;; #x4 (rotated)
;; delim-char -- this is shifted to the MSB position
;; (the remaining bytes are #\nul)
;;
;; Traditionally, for cookie file foo, the index file is named foo.dat, but
;; that is not required.
;;; Code:
(define-module (database fcookie)
#:use-module ((ice-9 rdelim) #:select (read-line))
#:use-module ((srfi srfi-56) #:select (read-network-uint32
write-network-uint32))
#:use-module ((scripts slurp) #:select (slurp slurp-file!))
#:export (create-index-file!
fortune-cookie))
(define sub make-shared-substring)
(define (k name)
(case name
((#:format-version) 2) ; hmmm
((#:sizeof-word) 4)
((#:all-flags) '(#:random #:ordered #:rotated))
((#:random) #x1)
((#:ordered) #x2)
((#:rotated) #x4)
((#:bits-per-byte) 8) ; you never know...
((#:delim+newline) 2)
((#:all-headers) '(#:version #:count #:longest #:shortest
#:flags #:delim #:filler))))
(define (read-word-proc port)
(lambda () (read-network-uint32 port)))
(define (write-word-proc port)
(lambda (n) (write-network-uint32 n port)))
(define (words<-cookie-file filename delim flags)
(let* ((p (open-input-file filename))
(next (lambda () (read-line p 'concat)))
(count -1) (shortest #f) (longest #f)
(box (list #f))
(tp box)
(order? (memq #:ordered flags)))
;; If `order?', accumulate (OFFSET . COOKIE-TRIMMED-TO-FIRST-ALPHANUMERIC),
;; then sort ascending. Otherwise, accumulate OFFSET only.
(let loop ((line (next)) (acc '()))
(cond ((eof-object? line)
(close-port p)
(list* ; rv
;; header
(k #:format-version)
count
longest
shortest
(apply logior (map (lambda (flag)
(if (memq flag flags)
(k flag)
0))
(k #:all-flags)))
(ash (char->integer delim)
(* (1- (k #:sizeof-word))
(k #:bits-per-byte)))
;; offset table
(if order?
(map car (sort (cdr box) (lambda (a b)
(string<? (cdr a) (cdr b)))))
(cdr box))))
((and (= (k #:delim+newline) (string-length line))
(char=? delim (string-ref line 0)))
(let ((ofs (seek p 0 SEEK_CUR))
(prev (car tp)))
(and prev
(let ((len (- ofs
(if order?
(car prev)
prev)
(k #:delim+newline))))
(cond ((not shortest)
(set! shortest len)
(set! longest len))
(else
(set! shortest (min shortest len))
(set! longest (max longest len))))))
(set-cdr! tp (list
(if order?
(cons ofs
(let* ((full (apply string-append
(reverse acc)))
(len (string-length full)))
(do ((i 0 (1+ i)))
((or (= len i)
(let ((c (string-ref full i)))
(or (char-alphabetic? c)
(char-numeric? c))))
(sub full i)))))
ofs))))
(set! tp (cdr tp))
(set! count (1+ count))
(loop (next) '()))
(else
(loop (next) (cons line acc)))))))
;; Create index file @var{out-name} from cookie file @var{in-name}, separating
;; cookies by looking for char @var{delim} on a line by itself. Optional
;; @var{flags} are keywords:
;;
;; @table @code
;; @item #:random
;; Set bit 0 (corresponding to a mask of #x1) in the flags word in the header,
;; but do nothing else at the moment (FIXME).
;;
;; @item #:ordered
;; Set bit 1 (corresponding to a mask of #x2) in the flags word in the header,
;; and order the offsets by sorting the cookies with @code{string<?}, ignoring
;; non-alphanumeric leading characters.
;;
;; @item #:rotated
;; Set bit 2 (corresponding to a mask of #x4) in the flags word in the header,
;; to note that the cookies are @dfn{ROT13}.
;; @end table
;;
;; Return #t on success.
;;
(define (create-index-file! out-name in-name delim . flags)
(let* ((words (words<-cookie-file in-name delim flags))
(outp (open-output-file out-name))
(ww (write-word-proc outp)))
(for-each ww words)
(close-port outp)))
(define (grok-header readc readw)
(let ((info (map cons
(k #:all-headers)
(list (readw)
(readw)
(readw)
(readw)
(let ((w (readw)))
(let loop ((ls (k #:all-flags)) (acc '()))
(if (null? ls)
acc
(let ((flag (car ls)))
(loop (cdr ls)
(if (= 0 (logand (k flag) w))
acc
(cons flag acc)))))))
(readc)
(map (lambda ignored
(readc))
(iota (1- (k #:sizeof-word))))))))
;; rv
(lambda (name)
(assq-ref info name))))
(define (get-cookie cookie-file dat-file)
(let* ((port (open-input-file dat-file))
(readw (read-word-proc port))
(qh (grok-header (lambda () (read-char port)) readw))
(selection (random (qh #:count)))
(start (do ((i 0 (1+ i)))
((= i selection) (readw))
(readw)))
(need-scan? (memq #:ordered (qh #:flags)))
(len (if need-scan?
(min (qh #:longest)
(- (stat:size (stat cookie-file)) start))
(- (readw) start (k #:delim+newline))))
(cookie (make-string len)))
(close-port port)
(slurp-file! cookie cookie-file start len 0)
(and need-scan?
(let ((delim (qh #:delim))
(cookie-char=? (lambda (c n)
(char=? c (string-ref cookie n)))))
(let loop ((nl (string-index cookie #\nl 0)))
(and nl (if (and (<= (+ nl (k #:delim+newline)) len)
(cookie-char=? delim (1+ nl))
(cookie-char=? #\nl (+ nl 2)))
(set! cookie (sub cookie 0 (1+ nl)))
(loop (string-index cookie #\nl (1+ nl))))))))
(if (memq #:rotated (qh #:flags))
(list->string
(let ((a-n (char->integer #\a))
(A-n (char->integer #\A))
(rot (lambda (base n)
(integer->char (+ base (modulo (+ (- n base) 13) 26))))))
(map (lambda (c)
(let ((n (char->integer c)))
(cond ((char<=? #\a c #\z) (rot a-n n))
((char<=? #\A c #\Z) (rot A-n n))
(else c))))
(string->list cookie))))
cookie)))
;; Return a randomly-chosen string extracted from @var{cookie-file},
;; using the index file named by appending @file{.dat} to @var{cookie-file}.
;; Optional arg @var{dat-file} specifies the index file to use instead of the
;; default.
;;
;;-sig: (cookie-file [dat-file])
;;
(define (fortune-cookie cookie-file . dat-file)
(get-cookie cookie-file (if (null? dat-file)
(string-append cookie-file ".dat")
(car dat-file))))
;; Notes
;;
;; This module is actually a practice run (fodder for factoring) for some
;; future `(database isam)' module, since it does in fact implement indexed
;; sequential access methods --- not very sophisticated but it's a start.
;; And who can resist a fortune cookie?
;;; fcookie.scm ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- updated module: (database fcookie),
Thien-Thi Nguyen <=