guile-user
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: regexp character classes not supported?


From: Marco Maggi
Subject: Re: regexp character classes not supported?
Date: Sun, 30 Dec 2012 12:38:44 +0100

Limbo Peng wrote:

> On Sat, Dec 29, 2012 at 1:22 AM, Mark H Weaver <address@hidden> wrote:

>     Regular expression syntax is not standardized, and there are
>     several
>     different variants.  The "\d" syntax for character classes is a
>     non-standard perl extension, and is not supported by Guile.


> Thx...seems that I've been taking such syntax for granted for a long
> time :(

  Shameless plug: if  you do not mind  installing stuff, you
can try the  regexp library re2[1] (written  in C++) through
its C wrapper CRE2[2][3].

[1] <http://code.google.com/p/re2/>
[2] <http://github.com/marcomaggi/cre2/>
[3] <http://code.google.com/p/cre2/downloads/list>

  Here  is a  Guile program  making  use of  it through  the
foreign  functions interface  (sorry for  the R6RS  code, it
also needs to be polished here and there):

;; guile-cre2.sps --
;;
;; Show off CRE2 with Guile.

#!r6rs
(import (rnrs)
  (system foreign)
  (ice-9 format))

(define-syntax begin0
  (syntax-rules ()
    ((_ ?expr0 ?expr ...)
     (call-with-values
         (lambda () ?expr0)
       (lambda args ?expr ... (apply values args))))))

(define-syntax unwind-protect
  ;;Not general, but enough.
  (syntax-rules ()
    ((_ ?body ?cleanup0 ?cleanup ...)
     (let ((cleanup (lambda () ?cleanup0 ?cleanup ...)))
       (with-exception-handler
           (lambda (E) (cleanup) (raise E))
         (lambda () (begin0 ?body (cleanup))))))))

(define (main)
  (let* ((ptn     "(ciao) (hello)")
         (ptn.str (string->pointer ptn))
         (ptn.len (string-length ptn))
         (opt     (cre2_opt_new))
         (rex     (cre2_new ptn.str ptn.len opt)))
    (unwind-protect
        (let* ((txt     "ciao hello")
               (txt.str (string->pointer txt))
               (txt.len (string-length txt))
               (nmatch  3)
               (matches (make-cre2_string_t nmatch))
               (ranges  (make-cre2_range_t  nmatch)))
          (let ((rv (cre2_match rex
                                txt.str txt.len 0 txt.len
                                CRE2_UNANCHORED matches nmatch)))
            (when (positive? rv)
              (cre2_strings_to_ranges txt.str ranges matches nmatch)
              (let ((R (parse-cre2_range_t ranges nmatch)))
                (print "Full match: ~s\n"
                       (substring txt (list-ref R 0) (list-ref R 1)))
                (print "1st submatch: ~s\n"
                       (substring txt (list-ref R 2) (list-ref R 3)))
                (print "2nd submatch: ~s\n"
                       (substring txt (list-ref R 4) (list-ref R 5)))
                ))))
      (cre2_delete rex)
      (cre2_opt_delete opt))))

(define cre2
  (dynamic-link "libcre2.so"))

(define cre2_new
  (let* ((ptr     (dynamic-func "cre2_new" cre2))
         (callout (pointer->procedure '* ptr (list '* int '*))))
    (lambda (ptn.str ptn.len options)
      (callout ptn.str ptn.len options))))

(define cre2_delete
  (let* ((ptr     (dynamic-func "cre2_delete" cre2))
         (callout (pointer->procedure void ptr (list '*))))
    (lambda (rex)
      (callout rex))))

(define cre2_opt_new
  (let* ((ptr     (dynamic-func "cre2_opt_new" cre2))
         (callout (pointer->procedure '* ptr '())))
    (lambda ()
      (callout))))

(define cre2_opt_delete
  (let* ((ptr     (dynamic-func "cre2_opt_delete" cre2))
         (callout (pointer->procedure void ptr (list '*))))
    (lambda (options)
      (callout options))))

(define cre2_match
  (let* ((ptr     (dynamic-func "cre2_match" cre2))
         (callout (pointer->procedure
                   int ptr (list '* '* int
                                 int int int '* int))))
    (lambda (rex txt.str txt.len txt.start txt.end anchor match nmatch)
      (callout rex
               txt.str txt.len txt.start txt.end
               anchor match nmatch))))

(define cre2_strings_to_ranges
  (let* ((ptr     (dynamic-func "cre2_strings_to_ranges" cre2))
         (callout (pointer->procedure
                   void ptr (list '* '* '* int))))
    (lambda (txt.str ranges strings nmatch)
      (callout txt.str ranges strings nmatch))))

(define CRE2_UNANCHORED 1)

(define (make-cre2_string_t nmatch)
  (do ((i 0 (+ 1 i))
       (T '() (append (list '* int) T))
       (V '() (append (list %null-pointer 0) V)))
      ((= i nmatch)
       (make-c-struct T V))))

(define (make-cre2_range_t nmatch)
  (do ((i 0 (+ 1 i))
       (T '() (append (list long long) T))
       (V '() (append '(0 0) V)))
      ((= i nmatch)
       (make-c-struct T V))))

(define (parse-cre2_string_t S nmatch)
  (do ((i 0 (+ 1 i))
       (T '() (append (list '* int) T)))
      ((= i nmatch)
       (parse-c-struct S T))))

(define (parse-cre2_range_t S nmatch)
  (do ((i 0 (+ 1 i))
       (T '() (append (list long long) T)))
      ((= i nmatch)
       (parse-c-struct S T))))

(define (print template . args)
  (apply format (current-output-port) template args))

(main)

;;; end of file

-- 
Marco Maggi



reply via email to

[Prev in Thread] Current Thread [Next in Thread]