>From b5b7d9232d5144a4296c3c9c60034628f8146eec Mon Sep 17 00:00:00 2001 From: Florian Pelz Date: Wed, 7 Aug 2019 23:48:59 +0200 Subject: [PATCH 2/6] website: Add custom xgettext implementation that extracts from nested sexps. * website/scripts/sexp-xgettext.scm: New file for generating a PO file. * website/sexp-xgettext.scm: New file with module for looking up translations. * website/i18n-howto: New file with usage instructions. --- website/i18n-howto.txt | 63 +++ website/scripts/sexp-xgettext.scm | 814 ++++++++++++++++++++++++++++++ website/sexp-xgettext.scm | 454 +++++++++++++++++ 3 files changed, 1331 insertions(+) create mode 100644 website/i18n-howto.txt create mode 100644 website/scripts/sexp-xgettext.scm create mode 100644 website/sexp-xgettext.scm diff --git a/website/i18n-howto.txt b/website/i18n-howto.txt new file mode 100644 index 0000000..66d19d0 --- /dev/null +++ b/website/i18n-howto.txt @@ -0,0 +1,63 @@ +With sexp-xgettext, arbitrary s-expressions can be marked for +translations (not only strings like with normal xgettext). + +S-expressions can be marked with G_ (simple marking for translation), +N_ (“complex” marking with different forms depending on number like +ngettext), C_ (“complex” marking distinguished from other markings by +a msgctxt like pgettext) or NC_ (mix of both). + +Marking a string for translation behaves like normal gettext. Marking +a parenthesized expression (i.e. a list or procedure call) extracts +each string from the parenthesized expression. If a symbol, keyword +or other parenthesized expression occurs between the strings, it is +extracted as an XML element. Expressions before or after all strings +are not extracted. If strings from a parenthesized sub-expression +shall be extracted too, the sub-expression must again be marked with +G_ unless it is the only sub-expression or it follows a quote, +unquote, quasiquote or unquote-splicing. The order of XML elements +can be changed in the translation to produce a different ordering +inside a parenthesized expression. If a string shall not be extracted +from a marked expression, it must be wrapped, for example by a call to +the identity procedure. Be careful when marking non-SHTML content +such as procedure calls for translation: Additional strings will be +inserted between non-string elements. + +Known issues: + +* Line numbers are sometimes off. + +* Some less important other TODOs in the comments. + +===== + +To create a pot file: + +guile scripts/sexp-xgettext.scm -f po/POTFILES -o po/guix-website.pot --from-code=UTF-8 --copyright-holder="Ludovic Courtès" --package-name="guix-website" --msgid-bugs-address="address@hidden" --keyword=G_ --keyword=N_:1,2 --keyword=C_:1c,2 --keyword=NC_:1c,2,3 + +To create a po file from a pot file, do the usual: + +cd po +msginit -l de --no-translator + +To merge an existing po file with a new pot file: + +cd po +msgmerge -U de.po guix-website.pot + +To update mo files: + +mkdir -p de/LC_MESSAGES +cd po +msgfmt de.po +cd .. +mv po/messages.mo de/LC_MESSAGES/guix-website.mo + +To test: + +guix environment --ad-hoc haunt +GUILE_LOAD_PATH=$(guix build guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH GUIX_WEB_SITE_LOCAL=yes haunt build +GUILE_LOAD_PATH=$(guix build guile-syntax-highlight)/share/guile/site/2.2:$GUILE_LOAD_PATH haunt serve + +For checking for errors / debugging newly marked files e.g.: + +GUILE_LOAD_PATH=.:$(guix build haunt)/share/guile/site/2.2:$GUILE_LOAD_PATH guile apps/base/templates/about.scm diff --git a/website/scripts/sexp-xgettext.scm b/website/scripts/sexp-xgettext.scm new file mode 100644 index 0000000..634c716 --- /dev/null +++ b/website/scripts/sexp-xgettext.scm @@ -0,0 +1,814 @@ +;;; GNU Guix web site +;;; Copyright © 2019 Florian Pelz +;;; +;;; This file is part of the GNU Guix web site. +;;; +;;; The GNU Guix web site is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU Affero General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; The GNU Guix web site is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU Affero General Public License for more details. +;;; +;;; You should have received a copy of the GNU Affero General Public License +;;; along with the GNU Guix web site. If not, see . + +(use-modules (ice-9 getopt-long) + (ice-9 match) + (ice-9 peg) + (ice-9 receive) + (ice-9 regex) + (ice-9 textual-ports) + (srfi srfi-1) ;lists + (srfi srfi-9) ;records + (srfi srfi-19) ;date + (srfi srfi-26)) ;cut + +;;; This script imitates xgettext, but combines nested s-expressions +;;; in the input Scheme files to a single msgstr in the PO file. It +;;; works by first reading the keywords specified on the command-line, +;;; then dealing with the remaining options using (ice-9 getopt-long). +;;; Then, it parses each Scheme file in the POTFILES file specified +;;; with --files-from and constructs po entries from it. For parsing, +;;; a PEG is used instead of Scheme’s read, because we can extract +;;; comments with it. The po entries are written to the PO file +;;; specified with the --output option. Scheme code can then use the +;;; (sexp-xgettext) module to deconstruct the msgids looked up in the +;;; PO file via gettext. + +(define-record-type + (make-keyword-spec id sg pl c total xcomment) + keyword-spec? + (id keyword-spec-id) ;identifier + (sg keyword-spec-sg) ;arg with singular + (pl keyword-spec-pl) ;arg with plural + (c keyword-spec-c) ;arg with msgctxt or 'mixed if sg is mixed msgctxt|singular + (total keyword-spec-total) ;total number of args + (xcomment keyword-spec-xcomment)) + +(define (complex-keyword-spec? keyword-spec) + (match keyword-spec + (($ _ _ #f #f _ #f) #f) + (else #t))) + +(define %keyword-specs + ;; List of valid xgettext keyword options. + ;; Read keywords from command-line options. + (let loop ((opts (cdr (command-line)));command-line options from + ;which to extract --keyword + ;options + (remaining-opts '()) ;unhandled opts + (specs '())) + (define (string->integer str) + (if (string-match "[0-9]+" str) + (string->number str) + (error "Not a decimal integer."))) + (define* (argnums->spec id #:optional (argnums '())) + (let loop ((sg #f) + (pl #f) + (c #f) + (total #f) + (xcomment #f) + (argnums argnums)) + (match argnums + (() (make-keyword-spec id + (if sg sg 1) + pl + c + total + xcomment)) + ((arg . argnums) + (cond + ((string-suffix? "c" arg) + (cond (c (error "c suffix clashes")) + (else + (let* ((number-str (string-drop-right arg 1)) + (number (string->integer number-str))) + (loop sg pl number total xcomment argnums))))) + ((string-suffix? "g" arg) + (cond + (sg (error "Only first argnum can have g suffix.")) + (c (error "g suffix clashes.")) + (else + (let* ((number-str (string-drop-right arg 1)) + (number (string->integer number-str))) + (loop number #f 'mixed total xcomment argnums))))) + ((string-suffix? "t" arg) + (cond (total (error "t suffix clashes")) + (else + (let* ((number-str (string-drop-right arg 1)) + (number (string->integer number-str))) + (loop sg pl c number xcomment argnums))))) + ((string-suffix? "\"" arg) + (cond (xcomment (error "xcomment clashes")) + (else + (let* ((comment (substring arg + 1 + (- (string-length arg) 1)))) + (loop sg pl c total comment argnums))))) + (else + (let* ((number (string->integer arg))) + (if sg + (if pl + (error "Too many argnums.") + (loop sg number c total xcomment argnums)) + (loop number #f c total xcomment argnums))))))))) + + (define (string->spec str) ;see `info xgettext` + (match (string-split str #\:) + ((id) (argnums->spec id)) + ((id argnums) + (argnums->spec id (string-split argnums #\,))))) + (match opts + (() (begin + ;; remove recognized --keyword command-line options: + (set-program-arguments (cons (car (command-line)) + (reverse remaining-opts))) + specs)) + ((current-opt . rest) + (cond + ((string=? "--" current-opt) specs) + ((string-prefix? "--keyword=" current-opt) + (let ((keyword (string-drop current-opt (string-length "--keyword=")))) + (loop rest remaining-opts (cons (string->spec keyword) specs)))) + ((or (string=? "--keyword" current-opt) + (string=? "-k" current-opt)) + (let ((next-opt (car rest))) + (loop (cdr rest) + remaining-opts + (cons (string->spec next-opt) specs)))) + (else (loop rest (cons current-opt remaining-opts) specs))))))) + +;;; Other options are not repeated, so we can use getopt-long: + +(define %options ;; Corresponds to what is documented at `info xgettext`. + (let ((option-spec + `((files (single-char #\f) (value #t)) + (directory (single-char #\D) (value #t)) + (default-domain (single-char #\d) (value #t)) + (output (single-char #\o) (value #t)) + (output-dir (single-char #\p) (value #t)) + (from-code (value #t)) + (join-existing (single-char #\j) (value #f)) + (exclude-file (single-char #\x) (value #t)) + (add-comments (single-char #\c) (value #t)) + + ;; Because getopt-long does not support repeated options, + ;; we took care of --keyword options further up. + ;; (keyword (single-char #\k) (value #t)) + + (flag (value #t)) + (force-po (value #f)) + (indent (single-char #\i) (value #f)) + (no-location (value #f)) + (add-location (single-char #\n) (value #t)) + (width (single-char #\w) (value #t)) + (no-wrap (value #f)) + (sort-output (single-char #\s) (value #f)) + (sort-by-file (single-char #\F) (value #f)) + (omit-header (value #f)) + (copyright-holder (value #t)) + (foreign-user (value #f)) + (package-name (value #t)) + (package-version (value #t)) + (msgid-bugs-address (value #t)) + (msgstr-prefix (single-char #\m) (value #t)) + (msgstr-suffix (single-char #\m) (value #t)) + (help (value #f)) + (pack (value #f))))) + (getopt-long (command-line) option-spec))) + + +(define parse-scheme-file + ;; This procedure parses FILE and returns a parse tree. + (let () + ;;TODO: Optionally ignore case. + (define-peg-pattern NL all "\n") + (define-peg-pattern comment all (and ";" + (* (and peg-any + (not-followed-by NL))) + (and peg-any (followed-by NL)))) + (define-peg-pattern empty none (or " " "\t")) + (define-peg-pattern whitespace body (or empty NL)) + (define-peg-pattern quotation body (or "'" "`" "," ",@")) + ;TODO: Allow user to specify + ;other quote reader macros to + ;be ignored and also ignore + ;quote spelled out without + ;reader macro. + (define-peg-pattern open body (and (? quotation) + (or "(" "[" "{"))) + (define-peg-pattern close body (or ")" "]" "}")) + (define-peg-pattern string body (and (followed-by "\"") + (* (or "\\\"" + (and (or NL peg-any) + (not-followed-by "\"")))) + (and (or NL peg-any) + (followed-by "\"")) + "\"")) + (define-peg-pattern token all (or string + (and + (not-followed-by open) + (not-followed-by close) + (not-followed-by comment) + (* (and peg-any + (not-followed-by open) + (not-followed-by close) + (not-followed-by comment) + (not-followed-by string) + (not-followed-by whitespace))) + (or + (and peg-any (followed-by open)) + (and peg-any (followed-by close)) + (and peg-any (followed-by comment)) + (and peg-any (followed-by string)) + (and peg-any (followed-by whitespace)) + (not-followed-by peg-any))))) + (define-peg-pattern list all (or (and (? quotation) "(" program ")") + (and (? quotation) "[" program "]") + (and (? quotation) "{" program "}"))) + (define-peg-pattern t-or-s body (or token list)) + (define-peg-pattern program all (* (or whitespace + comment + t-or-s))) + (lambda (file) + (call-with-input-file file + (lambda (port) + ;; It would be nice to match port directly without + ;; converting to a string first, but apparently guile cannot + ;; do that yet. + (let ((string (get-string-all port))) + (peg:tree (match-pattern program string)))))))) + + +(define-record-type + (make-po-entry ecomments ref flags ctxt id idpl) + po-entry? +;;; irrelevant: (tcomments po-entry-tcomments) ;translator-comments + (ecomments po-entry-ecomments) ;extracted-comments + (ref po-entry-ref) ;reference + (flags po-entry-flags) +;;; irrelevant: (prevctxt po-entry-prevctxt) ;previous-ctxt +;;; irrelevant: (prev po-entry-prev) ;previous-translation + (ctxt po-entry-ctxt) ;msgctxt + (id po-entry-id) ;msgid + (idpl po-entry-idpl) ;msgid-plural +;;; irrelevant: (str po-entry-str) ;msgstr string or association list +;;; ;integer to string + ) + +(define (po-equal? po1 po2) + "Returns whether PO1 and PO2 have equal ctxt, id and idpl." + (and (equal? (po-entry-ctxt po1) (po-entry-ctxt po2)) + (equal? (po-entry-id po1) (po-entry-id po2)) + (equal? (po-entry-idpl po1) (po-entry-idpl po2)))) + +(define (combine-duplicate-po-entries list) + "Returns LIST with duplicate po entries replaced by a single PO +entry with both refs." + (let loop ((remaining list)) + (match remaining + (() '()) + ((head . tail) + (receive (before from) + (break (cut po-equal? head <>) tail) + (cond + ((null? from) (cons head (loop tail))) + (else + (loop + (cons + (match head + (($ ecomments1 ref1 flags ctxt id idpl) + (match (car from) + (($ ecomments2 ref2 _ _ _ _) + (let ((ecomments (if (or ecomments1 ecomments2) + (append (or ecomments1 '()) + (or ecomments2 '())) + #f)) + (ref (if (or ref1 ref2) + (string-join + (cons + (or ref1 "") + (cons + (or ref2 "") + '()))) + #f))) + (make-po-entry ecomments ref flags ctxt id idpl)))))) + (append before (cdr from))))))))))) + +(define (write-po-entry po-entry) + (define (prepare-text text) + "If TEXT is false, returns #f. Otherwise corrects the formatting +of TEXT by escaping backslashes and newlines and enclosing TEXT in +quotes. Note that Scheme’s write is insufficient because it would +escape far more. TODO: Strings should be wrappable to a maximum line +width." + (and text + (string-append "\"" + (with-output-to-string + (lambda () + (call-with-input-string text + (lambda (port) + (let loop ((c (get-char port))) + (unless (eof-object? c) + (case c + ((#\\) (display "\\")) + ((#\newline) (display "\\n")) + (else (write-char c))) + (loop (get-char port)))))))) + "\""))) + (define (write-component c prefix) + (when c + (begin (display prefix) + (display " ") + (display c) + (newline)))) + (match po-entry + (($ ecomments ref flags ctxt id idpl) + (let ((prepared-ctxt (prepare-text ctxt)) + (prepared-id (prepare-text id)) + (prepared-idpl (prepare-text idpl))) + (when ecomments + (for-each + (lambda (line) + (write-component line "#.")) + (reverse ecomments))) + (write-component ref "#:") + (write-component (and flags (string-join flags ", ")) "#,") + (write-component prepared-ctxt "msgctxt") + (write-component prepared-id "msgid") + (write-component prepared-idpl "msgid_plural") + (display "msgstr \"\"") + (newline))))) + +(define %comments-line + (make-parameter #f)) + +(define %ecomments-string + (make-parameter #f)) + +(define (update-ecomments-string! str) + "Sets the value of the parameter object %ecomments-string if str is +an ecomments string. An ecomments string is extracted from a comment +because it starts with TRANSLATORS or a key specified with +--add-comments." ;TODO: Support for other keys is missing. + (cond + ((not str) (%ecomments-string #f)) + ((= (1+ (or (%comments-line) -42)) (or (%line-number) 0)) + (let ((m (string-match ";+[ \t]*(.*)" str))) + (when m + (%comments-line (%line-number)) + (%ecomments-string + (if (%ecomments-string) + (cons (match:substring m 1) (%ecomments-string)) + (list (match:substring m 1))))))) + (else + (let ((m (string-match ";+[ \t]*(TRANSLATORS:.*)" str))) + (if m + (begin + (%comments-line (%line-number)) + (%ecomments-string + (if (%ecomments-string) + (cons (match:substring m 1) (%ecomments-string)) + (list (match:substring m 1))))) + (%ecomments-string '#f)))))) + +(define %file-name + (make-parameter #f)) + +(define (update-file-name! name) + "Sets the value of the parameter object %file-name to NAME." + (%file-name name)) + +(define %old-line-number + (make-parameter #f)) + +(define (update-old-line-number! number) + "Sets the value of the parameter object %old-line-number to NUMBER." + (%old-line-number number)) + +(define %line-number + (make-parameter #f)) + +(define (update-line-number! number) + "Sets the value of the parameter object %line-number to NUMBER." + (%line-number number)) + +(define (incr-line-number!) + "Increments the value of the parameter object %line-number by 1." + (%line-number (1+ (%line-number)))) + +(define (incr-line-number-for-each-nl! list) + "Increments %line-number once for each NL recursively in LIST. Does +nothing if LIST is no list but e.g. an empty 'program." + (when (list? list) + (for-each + (lambda (part) + (match part + ('NL (incr-line-number!)) + ((? list?) (incr-line-number-for-each-nl! part)) + (else #f))) + list))) + +(define (current-ref) + "Returns the location field for a PO entry." + (let ((add (option-ref %options 'add-location 'full))) + (cond + ((option-ref %options 'no-location #f) #f) + ((eq? add 'full) + (string-append (%file-name) ":" (number->string (%line-number)))) + ((eq? add 'file) + (%file-name)) + ((eq? add 'never) + #f)))) + +(define (make-simple-po-entry msgid) + (let ((po (make-po-entry + (%ecomments-string) + (current-ref) + #f ;TODO: Use scheme-format for format strings? + #f ;no ctxt + msgid + #f))) + (update-ecomments-string! #f) + po)) + + +(define (matching-keyword id) + "Returns the keyword-spec whose identifier is the same as ID, or #f +if ID is no string or no such keyword-spec exists." + (and (symbol? id) + (let ((found (member (symbol->string id) + %keyword-specs + (lambda (id spec) + (string=? id (keyword-spec-id spec)))))) + (and found (car found))))) + +(define (nth-exp program n) + "Returns the Nth 'token or 'list inside the PROGRAM parse tree or #f +if no tokens or lists exist." + (let loop ((i 0) + (rest program)) + (define (on-hit exp) + (if (= i n) exp + ;; else: + (loop (1+ i) (cdr rest)))) + (match rest + (() #f) + ((('token . _) . _) (on-hit (car rest))) + ((('list open-paren exp close-paren) . _) (on-hit (car rest))) + ((_ . _) (loop i (cdr rest))) + (else #f)))) + +(define (more-than-one-exp? program) + "Returns true if PROGRAM consiste of more than one expression." + (if (matching-keyword (token->string-symbol-or-keyw (nth-exp program 0))) + (nth-exp program 2) ;if there is third element, keyword does not count + (nth-exp program 1))) + +(define (token->string-symbol-or-keyw tok) + "For a parse tree TOK, if it is a 'token parse tree, returns its +value as a string, symbol or #:-keyword, otherwise returns #f." + (match tok + (('token (parts ...) . remaining) + ;; This is a string with line breaks in it. + (with-input-from-string + (string-append + (apply string-append + (map-in-order + (lambda (part) + (match part + (('NL _) + (begin (incr-line-number!) + "\n")) + (else part))) + parts)) + (car remaining)) + (lambda () + (read)))) + (('token exp) + (with-input-from-string exp + (lambda () + (read)))) + (else #f))) + +(define (complex-marked-list->po-entries parse-tree) + "Checks if PARSE-TREE is marked by a keyword. If yes, for a complex +keyword spec, returns a list of po-entries for it. For a simple +keyword spec, returns the argument number of its singular form. +Otherwise returns #f." + (let* ((first (nth-exp parse-tree 0)) + (spec (matching-keyword (token->string-symbol-or-keyw first)))) + (if spec + (if ;if the identifier of a complex keyword occurs first + (complex-keyword-spec? spec) + ;; then make po entries for it + (match spec + (($ id sg pl c total xcomment) + (if (eq? c 'mixed) ; if msgctxt and singular msgid are in one string + (let* ((exp (nth-exp parse-tree sg)) + (val (token->string-symbol-or-keyw exp)) + (idx (if (string? val) (string-rindex val #\|)))) + (list + (let ((po (make-po-entry + (%ecomments-string) + (current-ref) + #f ;TODO: Use scheme-format for format strings? + (string-take val idx) + (string-drop val (1+ idx)) + #f))) ;plural forms are unsupported here + (update-ecomments-string! #f) + po))) + ;; else construct msgids + (receive (pl-id pl-entries) + (match pl + (#t (construct-msgid-and-po-entries + (nth-exp parse-tree pl))) + (#f (values #f '()))) + (receive (sg-id sg-entries) + (construct-msgid-and-po-entries + (nth-exp parse-tree sg)) + (cons + (let ((po (make-po-entry + (%ecomments-string) + (current-ref) + #f ;TODO: Use scheme-format for format strings? + (and c (token->string-symbol-or-keyw + (nth-exp parse-tree c))) + sg-id + pl-id))) + (update-ecomments-string! #f) + po) + (append sg-entries pl-entries))))))) + ;; else if it is a simple keyword, return the argnum: + (keyword-spec-sg spec)) + ;; if no keyword occurs, then false + #f))) + +(define (construct-po-entries parse-tree) + "Converts a PARSE-TREE resulting from a call to parse-scheme-file to +a list of po-entry records. Unlike construct-msgid-and-po-entries, +strings are not collected to a msgid. The list of po-entry records is +the return value." + (let ((entries (complex-marked-list->po-entries parse-tree))) + (cond + ((list? entries) entries) + ((number? entries) ;parse-tree yields a single, simple po entry + (update-old-line-number! (%line-number)) + (receive (id entries) + (construct-msgid-and-po-entries + (nth-exp parse-tree entries)) + (update-line-number! (%old-line-number)) + (let ((po (make-simple-po-entry id))) + (incr-line-number-for-each-nl! parse-tree) + (cons po entries)))) + (else ;search for marked translations in parse-tree + (match parse-tree + (() '()) + (('comment str) (begin + (update-ecomments-string! str) + '())) + (('NL _) (begin (incr-line-number!) '())) + (('token . _) (begin (incr-line-number-for-each-nl! parse-tree) '())) + (('list open-paren program close-paren) + (construct-po-entries program)) + (('program . components) + (append-map construct-po-entries components)) + ;; Note: PEG compresses empty programs to non-lists: + ('program + '())))))) + +(define* (tag counter prefix #:key (flavor 'start)) + "Formats the number COUNTER as a tag according to FLAVOR, which is +either 'start, 'end or 'empty for a start, end or empty tag, +respectively." + (string-append "<" + (if (eq? flavor 'end) "/" "") + prefix + (number->string counter) + (if (eq? flavor 'empty) "/" "") + ">")) + +(define-record-type + (make-construct-fold-state msgid-string maybe-part counter po-entries) + construct-fold-state? + ;; msgid constructed so far: + (msgid-string construct-fold-state-msgid-string) + ;; only append this if string follows: + (maybe-part construct-fold-state-maybe-part) + ;; counter for next tag: + (counter construct-fold-state-counter) + ;; complete po entries from marked sub-expressions: + (po-entries construct-fold-state-po-entries)) + +(define* (construct-msgid-and-po-entries parse-tree + #:optional + (prefix "")) + "Like construct-po-entries, but with two return values. The first +is an accumulated msgid constructed from all components in PARSE-TREE +for use in make-po-entry. Non-strings are replaced by tags containing +PREFIX. The second return value is a list of po entries for +sub-expressions marked with a complex keyword spec." + (match parse-tree + (() (values "" '())) + ;; Note: PEG compresses empty programs to non-lists: + ('program (values "" '())) + (('comment str) (begin + (update-ecomments-string! str) + (values "" '()))) + (('NL _) (begin (incr-line-number!) + (error "Program consists only of line break." + `(,(%file-name) ,(%line-number))))) + (('token . _) + (let ((maybe-string (token->string-symbol-or-keyw parse-tree))) + (if (string? maybe-string) + (values maybe-string '()) + (error "Single symbol marked for translation." + `(,maybe-string ,(%file-name) ,(%line-number)))))) + (('list open-paren program close-paren) + ;; parse program instead + (construct-msgid-and-po-entries program prefix)) + (('program (? matching-keyword)) + (error "Double-marked for translation." + `(,parse-tree ,(%file-name) ,(%line-number)))) + (('program . components) + ;; Concatenate strings in parse-tree to a new msgid and add an + ;; tag for each list in between. + (match + (fold + (lambda (component prev-state) + (match prev-state + (($ msgid-string maybe-part + counter po-entries) + (match component + (('comment str) (begin (update-ecomments-string! str) + prev-state)) + (('NL _) (begin (incr-line-number!) + prev-state)) + (('token . _) + (let ((maybe-string (token->string-symbol-or-keyw component))) + (cond + ((string? maybe-string) + ;; if string, append maybe-string to previous msgid + (make-construct-fold-state + (string-append msgid-string maybe-part maybe-string) + "" + counter + po-entries)) + ((and (more-than-one-exp? components) ;not the only symbol + (or (string-null? msgid-string) ;no string so far + (string-suffix? ">" msgid-string))) ;tag before + prev-state) ;then ignore + (else ;append tag representing the token + (make-construct-fold-state + msgid-string + (string-append + maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter) + po-entries))))) + (('list open-paren program close-paren) + (let ((first (nth-exp program 0))) + (incr-line-number-for-each-nl! list) + (match (complex-marked-list->po-entries program) + ((? list? result) + (make-construct-fold-state + msgid-string + (string-append + maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter) + (append result po-entries))) + (result + (cond + ((number? result) + (receive (id entries) + (construct-msgid-and-po-entries + program + (string-append prefix + (number->string counter) + ".")) + (make-construct-fold-state + (string-append msgid-string + maybe-part + (tag counter prefix + #:flavor 'start) + id + (tag counter prefix + #:flavor 'end)) + "" + (1+ counter) + (append entries po-entries)))) + ((not (more-than-one-exp? components)) + ;; Singletons do not need to be marked. + (receive (id entries) + (construct-msgid-and-po-entries + program + prefix) + (make-construct-fold-state + id + "" + counter + (append entries po-entries)))) + (else ;unmarked list + (if (string-null? msgid-string) + ;; then ignore + prev-state + ;; else: + (make-construct-fold-state + msgid-string + (string-append + maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter) + po-entries)))))))))))) + (make-construct-fold-state "" "" 1 '()) + components) + (($ msgid-string maybe-part counter po-entries) + (values msgid-string po-entries)))))) + +(define scheme-file->po-entries + (compose construct-po-entries + parse-scheme-file)) + +(define %files-from-port + (let ((files-from (option-ref %options 'files #f))) + (if files-from + (open-input-file files-from) + (current-input-port)))) + +(define %source-files + (let loop ((line (get-line %files-from-port)) + (source-files '())) + (if (eof-object? line) + (begin + (close-port %files-from-port) + source-files) + ;; else read file names before comment + (let ((before-comment (car (string-split line #\#)))) + (loop (get-line %files-from-port) + (append + (map match:substring (list-matches "[^ \t]+" line)) + source-files)))))) + +(define %output-po-entries + (fold (lambda (scheme-file po-entries) + (begin + (update-file-name! scheme-file) + (update-line-number! 1) + (update-old-line-number! #f) + (%comments-line #f) + (append (scheme-file->po-entries scheme-file) + po-entries))) + '() + %source-files)) + +(define %output-port + (let ((output (option-ref %options 'output #f)) + (domain (option-ref %options 'default-domain #f))) + (cond + (output (open-output-file output)) + (domain (open-output-file (string-append domain ".po"))) + (else (open-output-file "messages.po"))))) + +(with-output-to-port %output-port + (lambda () + (let ((copyright (option-ref %options 'copyright-holder + "THE PACKAGE'S COPYRIGHT HOLDER")) + (package (option-ref %options 'package-name "PACKAGE")) + (version (option-ref %options 'package-version #f)) + (bugs-email (option-ref %options 'msgid-bugs-address ""))) + (display "# SOME DESCRIPTIVE TITLE.\n") + (display (string-append "# Copyright (C) YEAR " copyright "\n")) + (display (string-append "# This file is distributed under the same \ +license as the " package " package.\n")) + (display "# FIRST AUTHOR , YEAR.\n") + (display "#\n") + (write-po-entry (make-po-entry #f #f '("fuzzy") #f "" #f)) + (display (string-append "\"Project-Id-Version: " + package + (if version + (string-append " " version) + "") + "\\n\"\n")) + (display (string-append "\"Report-Msgid-Bugs-To: " + bugs-email + "\\n\"\n")) + (display (string-append "\"POT-Creation-Date: " + (date->string (current-date) "~1 ~H:~M~z") + "\\n\"\n")) + (display "\"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n\"\n") + (display "\"Last-Translator: FULL NAME \\n\"\n") + (display "\"Language-Team: LANGUAGE \\n\"\n") + (display "\"Language: \\n\"\n") + (display "\"MIME-Version: 1.0\\n\"\n") + (display "\"Content-Type: text/plain; charset=UTF-8\\n\"\n") + (display "\"Content-Transfer-Encoding: 8bit\\n\"\n") + (for-each (lambda (po-entry) + (begin + (newline) + (write-po-entry po-entry))) + (combine-duplicate-po-entries %output-po-entries))))) diff --git a/website/sexp-xgettext.scm b/website/sexp-xgettext.scm new file mode 100644 index 0000000..45ee3df --- /dev/null +++ b/website/sexp-xgettext.scm @@ -0,0 +1,454 @@ +;;; GNU Guix web site +;;; Copyright © 2019 Florian Pelz +;;; +;;; This file is part of the GNU Guix web site. +;;; +;;; The GNU Guix web site is free software; you can redistribute it and/or modify it +;;; under the terms of the GNU Affero General Public License as published by +;;; the Free Software Foundation; either version 3 of the License, or (at +;;; your option) any later version. +;;; +;;; The GNU Guix web site is distributed in the hope that it will be useful, but +;;; WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU Affero General Public License for more details. +;;; +;;; You should have received a copy of the GNU Affero General Public License +;;; along with the GNU Guix web site. If not, see . + +(define-module (sexp-xgettext) + #:use-module (ice-9 local-eval) + #:use-module (ice-9 match) + #:use-module (ice-9 receive) + #:use-module (ice-9 regex) + #:use-module (srfi srfi-1) ;lists + #:use-module (srfi srfi-9) ;records + #:export (set-complex-keywords! + set-simple-keywords! + sgettext + sngettext + spgettext + snpgettext)) + +(define %complex-keywords + ;; Use set-complex-keywords! to change this to a list of keywords + ;; for sexp-xgettext functions other than sgettext. + (make-parameter '())) + +(define (set-complex-keywords! kw) + (%complex-keywords kw)) + +(define %simple-keywords + ;; Use set-simple-keywords! to change this to a list of keywords + ;; for sgettext. + (make-parameter '())) + +(define (set-simple-keywords! kw) + (%simple-keywords kw)) + +(define (gettext-keyword? id) + (or (member id (%complex-keywords)) + (member id (%simple-keywords)))) + +;;COPIED FROM scripts/sexp-xgettext.scm: +(define* (tag counter prefix #:key (flavor 'start)) + "Formats the number COUNTER as a tag according to FLAVOR, which is +either 'start, 'end or 'empty for a start, end or empty tag, +respectively." + (string-append "<" + (if (eq? flavor 'end) "/" "") + prefix + (number->string counter) + (if (eq? flavor 'empty) "/" "") + ">")) +;;END COPIED FROM scripts/sexp-xgettext.scm + +;;SEMI-COPIED FROM scripts/sexp-xgettext.scm +(define-record-type + (make-construct-fold-state msgid-string maybe-part counter) + construct-fold-state? + ;; msgid constructed so far + (msgid-string construct-fold-state-msgid-string) + ;; only append this if string follows: + (maybe-part construct-fold-state-maybe-part) + ;; counter for next tag: + (counter construct-fold-state-counter)) +;;END SEMI-COPIED FROM scripts/sexp-xgettext.scm + +(define (sexp->msgid exp) + "Return the msgid as constructed by construct-msgid-and-po-entries +in scripts/sexp-xgettext.scm from the expression EXP." + (let loop ((exp exp) + (prefix "")) + (match exp + (() "") + ((or ('quote inner-exp) + ('quasiquote inner-exp) + ('unquote inner-exp) + ('unquote-splicing inner-exp)) + (loop inner-exp prefix)) + ((first-component . components) + (cond + ((gettext-keyword? first-component) + (error "Double-marked for translation." exp)) + (else + (construct-fold-state-msgid-string + (fold + (lambda (component prev-state) + (match prev-state + (($ msgid-string maybe-part counter) + (let inner-loop ((exp component)) + (match exp + ((or (? symbol?) (? keyword?)) + (if (string-null? msgid-string) + ;; ignore symbols at the beginning + prev-state + ;; else make a tag for the symbol + (make-construct-fold-state + msgid-string + (string-append maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter)))) + ((? string?) + (make-construct-fold-state + (string-append msgid-string maybe-part exp) "" counter)) + ((? list?) + (match exp + (() ;ignore empty list + prev-state) + ((or (singleton) + ('quote singleton) + ('quasiquote singleton) + ('unquote singleton) + ('unquote-splicing singleton)) + (inner-loop singleton)) + ((components ...) + (cond + ((and (not (null? components)) + (member (car components) (%simple-keywords))) + ;; if marked for translation, insert inside tag + (make-construct-fold-state + (string-append msgid-string + maybe-part + (tag counter prefix #:flavor 'start) + (loop (cadr components) + (string-append + prefix + (number->string counter) + ".")) + (tag counter prefix #:flavor 'end)) + "" + (1+ counter))) + ;; else ignore if first + ((string-null? msgid-string) + prev-state) + ;; else make empty tag + (else (make-construct-fold-state + msgid-string + (string-append + maybe-part + (tag counter prefix #:flavor 'empty)) + (1+ counter)))))))))))) + (make-construct-fold-state "" "" 1) + exp))))) + ((? string?) exp) + (else (error "Single symbol marked for translation." exp))))) + +(define-record-type + (make-deconstruct-fold-state tagged maybe-tagged counter) + deconstruct-fold-state? + ;; XML-tagged expressions as an association list name->expression: + (tagged deconstruct-fold-state-tagged) + ;; associate this not-yet-tagged expression with pre if string + ;; follows, with post if not: + (maybe-tagged deconstruct-fold-state-maybe-tagged) + ;; counter for next tag: + (counter deconstruct-fold-state-counter)) + +(define (deconstruct exp msgstr) + "Return an s-expression like EXP, but filled with the content from +MSGSTR." + (define (find-empty-element msgstr name) + "Returns the regex match structure for the empty tag for XML +element of type NAME inside MSGSTR. If the element does not exist or +is more than the empty tag, #f is returned." + (string-match (string-append "<" (regexp-quote name) "/>") msgstr)) + (define (find-element-with-content msgstr name) + "Returns the regex match structure for the non-empty XML element +of type NAME inside MSGSTR. Submatch 1 is its content. If the +element does not exist or is just the empty tag, #f is returned." + (string-match (string-append "<" (regexp-quote name) ">" + "(.*)" + "") + msgstr)) + (define (get-first-element-name prefix msgstr) + "Returns the name of the first XML element in MSGSTR whose name +begins with PREFIX, or #f if there is none." + (let ((m (string-match + (string-append "<(" (regexp-quote prefix) "[^>/.]+)/?>") msgstr))) + (and m (match:substring m 1)))) + (define (prefix+counter prefix counter) + "Returns PREFIX with the number COUNTER appended." + (string-append prefix (number->string counter))) + (let loop ((exp exp) + (msgstr msgstr) + (prefix "")) + (define (unwrap-marked-expression exp) + "Returns two values for an expression EXP containing a (possibly +quoted/unquoted) marking for translation with a simple keyword at its +root. The first return value is a list with the inner expression, the +second is a procedure to wrap the processed inner expression in the +same quotes or unquotes again." + (match exp + (('quote inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'quote (quotation res)))))) + (('quasiquote inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'quasiquote (quotation res)))))) + (('unquote inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'unquote (quotation res)))))) + (('unquote-splicing inner-exp) + (receive (unwrapped quotation) + (unwrap-marked-expression inner-exp) + (values unwrapped + (lambda (res) + (list 'unquote-splicing (quotation res)))))) + ((marking . rest) ;list with marking as car + ;; assume arg to translate is first argument to marking: + (values (list-ref rest 0) identity)))) + (define (assemble-parenthesized-expression prefix tagged) + "Returns a parenthesized expression deconstructed from MSGSTR +with the meaning of XML elements taken from the name->expression +association list TAGGED. The special tags [prefix]pre and +[prefix]post are associated with a list of expressions before or after +all others in the parenthesized expression with the prefix, +respectively, in reverse order." + (append ;prepend pre elements to what is in msgstr + (reverse (or (assoc-ref tagged (string-append prefix "pre")) '())) + (let assemble ((rest msgstr)) + (let ((name (get-first-element-name prefix rest))) + (cond + ((and name (find-empty-element rest name)) => + ;; first XML element in rest is empty element + (lambda (m) + (cons* + (match:prefix m) ;prepend string before name + (assoc-ref tagged name) ;and expression for name + (assemble (match:suffix m))))) + ((and name (find-element-with-content rest name)) => + ;; first XML element in rest has content + (lambda (m) + (receive (unwrapped quotation) + (unwrap-marked-expression (assoc-ref tagged name)) + (cons* + (match:prefix m) ;prepend string before name + ;; and the deconstructed element with the content as msgstr: + (quotation + (loop + unwrapped + (match:substring m 1) + (string-append name "."))) + (assemble (match:suffix m)))))) + (else + ;; there is no first element + (cons + rest ;return remaining string + (reverse ;and post expressions + (or (assoc-ref tagged (string-append prefix "post")) '()))))))))) + (match exp + (() '()) + (('quote singleton) + (cons 'quote (list (loop singleton msgstr prefix)))) + (('quasiquote singleton) + (cons 'quasiquote (list (loop singleton msgstr prefix)))) + (('unquote singleton) + (cons 'unquote (list (loop singleton msgstr prefix)))) + (('unquote-splicing singleton) + (cons 'unquote-splicing (list (loop singleton msgstr prefix)))) + ((singleton) + (list (loop singleton msgstr prefix))) + ((first-component . components) + (cond + ((gettext-keyword? first-component) + ;; another marking for translation + ;; -> should be an error anyway; just retain exp + exp) + (else + ;; This handles a single level of a parenthesized expression. + ;; assemble-parenthesized-expression will call loop to + ;; recurse to deeper levels. + (let ((tagged-state + (fold + (lambda (component prev-state) + (match prev-state + (($ tagged maybe-tagged counter) + (let inner-loop ((exp component) ;sexp to handle + (quoting identity)) ;for wrapping state + (define (tagged-with-maybes) + "Returns the value of tagged after adding +all maybe-tagged expressions. This should be used as the base value +for tagged when a string or marked expression is seen." + (match counter + (#f + (alist-cons (string-append prefix "pre") + maybe-tagged + tagged)) + ((? number?) + (let accumulate ((prev-counter counter) + (maybes (reverse maybe-tagged))) + (match maybes + (() tagged) + ((head . tail) + (alist-cons + (prefix+counter prefix prev-counter) + head + (accumulate (1+ prev-counter) tail)))))))) + (define (add-maybe exp) + "Returns a deconstruct-fold-state with EXP +added to maybe-tagged. This should be used for expressions that are +neither strings nor marked for translation with a simple keyword." + (make-deconstruct-fold-state + tagged + (cons (quoting exp) maybe-tagged) + counter)) + (define (counter-with-maybes) + "Returns the old counter value incremented +by one for each expression in maybe-tagged. This should be used +together with tagged-with-maybes." + (match counter + ((? number?) + (+ counter (length maybe-tagged))) + (#f + 1))) + (define (add-tagged exp) + "Returns a deconstruct-fold-state with an +added association in tagged from the current counter to EXP. If +MAYBE-TAGGED is not empty, associations for its expressions are added +to pre or their respective counter. This should be used for +expressions marked for translation with a simple keyword." + (let ((c (counter-with-maybes))) + (make-deconstruct-fold-state + (alist-cons + (prefix+counter prefix c) + (quoting exp) + (tagged-with-maybes)) + '() + (1+ c)))) + (match exp + (('quote inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'quote res)))) + (('quasiquote inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'quasiquote res)))) + (('unquote inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'unquote res)))) + (('unquote-splicing inner-exp) + (inner-loop inner-exp + (lambda (res) + (list 'unquote-splicing res)))) + (((? gettext-keyword?) . rest) + (add-tagged exp)) + ((or (? symbol?) (? keyword?) (? list?)) + (add-maybe exp)) + ((? string?) + ;; elements in maybe-tagged appear between strings + (let ((c (counter-with-maybes))) + (make-deconstruct-fold-state + (tagged-with-maybes) + '() + c)))))))) + (make-deconstruct-fold-state '() '() #f) + exp))) + (match tagged-state + (($ tagged maybe-tagged counter) + (assemble-parenthesized-expression + prefix + (match maybe-tagged + (() tagged) + (else ;associate maybe-tagged with pre or post + (alist-cons + (cond ;if there already is a pre, use post + ((assoc-ref tagged (string-append prefix "pre")) + (string-append prefix "post")) + (else (string-append prefix "pre"))) + maybe-tagged + tagged)))))))))) + ((? string?) msgstr) + (else (error "Single symbol marked for translation." exp))))) + +(define (sgettext x) + "After choosing an identifier for marking s-expressions for +translation, make it usable by defining a macro with it calling +sgettext. If for example the chosen identifier is G_, +use (define-syntax G_ sgettext)." + (syntax-case x () + ((_ exp) + (let* ((msgstr (sexp->msgid (syntax->datum #'exp))) + (new-exp (deconstruct (syntax->datum #'exp) + (gettext msgstr)))) + (datum->syntax #'exp new-exp))))) + +(define (sngettext x) + "After choosing an identifier for behavior similar to ngettext:1,2, +make it usable like (define-syntax N_ sngettext)." + (syntax-case x () + ((_ msgid1 msgid2 n) + (let* ((msgstr1 (sexp->msgid (syntax->datum #'msgid1))) + (msgstr2 (sexp->msgid (syntax->datum #'msgid2))) + (applicable (if (= #'n 1) #'msgid1 #'msgid2)) + (new-exp (deconstruct (syntax->datum applicable) + (ngettext msgstr1 msgstr2 #'n)))) + (datum->syntax #'msgid1 new-exp))))) + +;; gettext’s share/gettext/gettext.h tells us we can prepend a msgctxt +;; and #\eot before a msgid in a gettext call. + +(define (spgettext x) + "After choosing an identifier for behavior similar to pgettext:1c,2, +make it usable like (define-syntax C_ spgettext)." + (syntax-case x () + ((_ msgctxt exp) + (let* ((gettext-context-glue #\eot) ;as defined in gettext.h + (lookup (string-append (syntax->datum #'msgctxt) + (string gettext-context-glue) + (sexp->msgid (syntax->datum #'exp)))) + (msgstr (car (reverse (string-split (gettext lookup) + gettext-context-glue)))) + (new-exp (deconstruct (syntax->datum #'exp) + msgstr))) + (datum->syntax #'exp new-exp))))) + +(define (snpgettext x) + "After choosing an identifier for behavior similar to npgettext:1c,2,3, +make it usable like (define-syntax NC_ snpgettext)." + (syntax-case x () + ((_ msgctxt msgid1 msgid2 n) + (let* ((gettext-context-glue #\eot) ;as defined in gettext.h + (lookup1 (string-append (syntax->datum #'msgctxt) + (string gettext-context-glue) + (sexp->msgid (syntax->datum #'msgid1)))) + ;; gettext.h implementation shows: msgctxt is only part of msgid1. + (lookup2 (sexp->msgid (syntax->datum #'msgid2))) + (msgstr (car (reverse + (string-split (gettext (ngettext lookup1 lookup2 #'n)) + gettext-context-glue)))) + (applicable (if (= #'n 1) #'msgid1 #'msgid2)) + (new-exp (deconstruct (syntax->datum applicable) + msgstr))) + (datum->syntax #'msgid1 new-exp))))) -- 2.22.0