;; gettext.scm -- gettext superset implemented in Scheme ;; ;; Copyright (c) 2003-2007 Alex Shinn. All rights reserved. ;; BSD-style license: http://synthcode.com/license.txt ;; This is *not* gettext, nor does it use the C gettext library. ;; ;; This is a full gettext superset written in pure Scheme from reading ;; the gettext documentation - I have never looked at the gettext source ;; code, so this may be used under a more liberal BSD-style license as ;; above. ;; ;; This library includes various extensions, including the ability to ;; support multiple domains, locales and search paths; the ability to ;; read both .po and .mo files directly as message catalogs; and a more ;; Schemeish dispatch interface. ;; ;; The multiple domain interface is useful because it allows multiple ;; applications to share message catalogs while still extending their ;; own messages. Many applications use many of the same messages, such ;; as those for menu names, and these messages can easily be leveraged ;; in Scheme as follows: ;; ;; (textdomain '("myapp" "gimp")) ; search 1st myapp, then gimp ;; (gettext "/File/Close") ; "Close" from gimp unless overridden ;; ;; Multiple locales can be useful while translations are still in ;; progress. It is not fair to assume that English (or whatever the ;; native source uses) is the best alternative for a message that has ;; not yet been translated, so the locale may also be a list: ;; ;; (textdomain "myapp" '("ru" "uk")) ; search 1st Russian then Ukranian, ;; (gettext "Hello, World!") ; which are somewhat similar ;; ;; Note in both cases the domain and locale may be either a single ;; string (as in the C gettext) or a list of strings in order of ;; decreasing priority. Also TEXTDOMAIN takes locale as an optional 2nd ;; parameter (to override the Unix environment variable), and in fact ;; the full parameter list is as follows: ;; ;; (textdomain domain [locale] [dirs] [cdir] [cached?] [lookup-cached?]) ;; ;; DOMAIN is a string or list of strings specifying the domain (name of ;; .mo or .po files) as in C gettext. ;; ;; LOCALE is a string or list of strings in the standard Unix format of ;; LANG[_REGION][.ENCODING] ;; ;; DIRS is the search path of directories which should hold the ;; LOCALE/CDIR/ directories which contain the actual message catalogs. ;; This is always appended with the system default, e.g. ;; "/usr/share/locale", and may also inherit from the GETTEXT_PATH ;; colon-delimited environment variable. ;; ;; CDIR is the catagory directory, defaulting to either the LC_CATEGORY ;; environment variable or the appropriate system default ;; (e.g. LC_MESSAGES). You generally won't need this. ;; ;; CACHED? means to cache individual messages, and defaults to #t. ;; ;; LOOKUP-CACHED? means to cache the lookup dispatch generated by these ;; parameters, and defaults to #t. ;; ;; TEXTDOMAIN just passes these parameters to the internal MAKE-GETTEXT, ;; and binds the result to the global dispatch used by GETTEXT. You may ;; build these closures manually for convenience in using multiple ;; separate domains or locales at once (useful for server environments): ;; ;; (define my-gettext (make-gettext "myapp")) ;; (define _ (my-gettext 'getter)) ;; (_"Hello, World!") (require-extension (srfi 1 2 6 9 13 26 69)) (use charconv posix mime) ;; ^^^ Non-SRFI imports: ;; ;; WITH-INPUT-FROM-ENCODED-FILE, CES-CONVERT and DETECT-FILE-ENCODING ;; from charconv (Gauche compatible API) ;; GETENV and FILE-READ-ACCESS? from posix ;; RFC822-HEADER->LIST from mime (port from Gauche) ;; LET-OPTIONALS* from Shivers' SRFIs ;; STRING-SPLIT from Chicken and Gauche ;; CALL-WITH-INPUT-STRING and WITH-INPUT-FROM-STRING (almost ubiquitous) ;; CONDITION-CASE from SRFI-12 ;; WARNING (like error, but diagnostic only) ;; SET-FILE-POSITION! (ftell) ;; READ-STRING (READ-BLOCK in Gauche, reads N chars) ;; READ-LINE ;; Other portability issues: ;; * assumes strings can contain arbitrary binary data ;; * assumes CHAR->INTEGER and INTEGER->CHAR are ASCII ;; * uses (EVAL ... (SCHEME-REPORT-ENVIRONMENT 5)) ;; in one place on simple arithmetic expressions (cond-expand ((and chicken compiling) (declare (export ;; standard gettext interface gettext textdomain dgettext dcgettext bindtextdomain ngettext dngettext dcngettext ;; the parameter for the standard interface default-gettext-lookup ;; more flexible interface for building lookups make-gettext ;; gfile accessors gfile? gfile-filename gfile-locale gfile-encoding gfile-properties gfile-type gfile-plural-index make-gettext-file ;; low-level parsers lookup-po-message lookup-mo-message ))) (else )) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; this bit isn't portable (define null-ch (integer->char 0)) (define null-str (string (integer->char 0))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; list utils (from Gauche's util.combinations) (define (cartesian-product lol) (if (null? lol) (list '()) (let ((l (car lol)) (rest (cartesian-product (cdr lol)))) (append-map (lambda (x) (map (lambda (sub-prod) (cons x sub-prod)) rest)) l)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; mime utils (from hato) (define (mime-split-name+value s) (let ((i (string-index s #\=))) (if i (cons (string-downcase (string-trim-both (substring s 0 i))) (if (= i (string-length s)) "" (if (eqv? #\" (string-ref s (+ i 1))) (substring/shared s (+ i 2) (- (string-length s) 2)) (substring/shared s (+ i 1))))) (cons (string-downcase (string-trim-both s)) #f)))) (define (mime-parse-content-type str) (map mime-split-name+value (string-split str ";"))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; binary I/O utils (from SRFI-56) (define (read-byte . o) (let* ((in (if (pair? o) (car o) (current-input-port))) (ch (read-char in))) (if (eof-object? ch) ch (char->integer ch)))) (define (read-binary-uint32-le . o) (let* ((in (if (pair? o) (car o) (current-input-port))) (b1 (read-byte in)) (b2 (read-byte in)) (b3 (read-byte in)) (b4 (read-byte in))) (if (eof-object? b4) b4 (+ (arithmetic-shift b4 24) (arithmetic-shift b3 16) (arithmetic-shift b2 8) b1)))) (define (read-binary-uint32-be . o) (let* ((in (if (pair? o) (car o) (current-input-port))) (b1 (read-byte in)) (b2 (read-byte in)) (b3 (read-byte in)) (b4 (read-byte in))) (if (eof-object? b4) b4 (+ (arithmetic-shift b1 24) (arithmetic-shift b2 16) (arithmetic-shift b3 8) b4)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Customize this to the appropriate value for your system: (define message-path '("/usr/share/locale")) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; store meta info for gettext files (define-record-type gfile (%make-gfile filename locale encoding properties type plural-index) gfile? (filename gfile-filename) ;; these are all immutable (locale gfile-locale) (encoding gfile-encoding) (properties gfile-properties) (type gfile-type) (plural-index gfile-plural-index) ) (define (make-gettext-file filename locale) (let* ((encoding (detect-file-encoding filename locale)) (property-msg (lookup-message filename "" #f encoding)) (properties (if property-msg (call-with-input-string property-msg rfc822-header->list) '())) (content-type (mime-parse-content-type (cond ((assoc "content-type" properties) => cadr) (else "")))) (encoding (cond ((assoc "charset" content-type) => cdr) (else encoding))) (plural-index (cond ((assoc "plural-forms" properties) => (lambda (x) (cond ((assoc "plural" (mime-parse-content-type (cadr x))) => (lambda (x) (C->Scheme (cdr x)))) (else (lambda (n) 0))))) (else (lambda (n) 0))))) (%make-gfile filename locale encoding properties (if (string-suffix? ".mo" filename) 'mo 'po) plural-index))) ;; take a list or a single argument which is interpretted as a one ;; element list (define (listify arg) (if (or (pair? arg) (null? arg)) arg (list arg))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; the default gettext lookup (define domain-message-paths (make-hash-table)) (define default-gettext-lookup (make-parameter #f)) (define (gettext msgid) ((default-gettext-lookup) 'get msgid)) (define (dgettext domain msgid) ((make-gettext domain) 'get msgid)) (define (dcgettext domain msgid locale) ((make-gettext domain (list locale)) 'get msgid)) ;; plural forms (define (ngettext . opt) (apply (default-gettext-lookup) 'nget opt)) (define (dngettext domain . opt) (apply (make-gettext domain) 'nget opt)) (define (dcngettext domain msgid locale . opt) (apply (make-gettext domain (list locale)) 'nget msgid opt)) ;; bind the default domain (define (textdomain . opt) (if (pair? opt) (let ((accessor (apply make-gettext opt))) (default-gettext-lookup accessor) accessor) ((default-gettext-lookup) 'domain))) (define (bindtextdomain domain dirs) (hash-table-set! domain-message-paths domain (listify dirs))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; The gettext .po parser. ;; We sequentially scan all the .po msgstr entries until the one ;; matching the msg string is found. This is slow but only meant ;; for development, so that you can quickly test your message ;; files without compiling them to .mo files. (define (lookup-po-message file msg msg2 encoding) ;; resisting jokes about indigent messages... ;; grab the 2nd scheme object in a string (define (tail-str str) (call-with-input-string str (lambda (p) (read p) (read p)))) ;; read a sequence of lines in "" starting w/ an initial string. ;; doesn't affect trailing lines. (define (read-str default) (let reader ((res (list default))) (cond ((and-let* ((ch (peek-char)) ((eqv? ch #\")) (line (string-trim-both (read-line))) (len (string-length line)) ((and (>= len 2) (eqv? #\" (string-ref line 0)) (eqv? #\" (string-ref line (- len 1)))))) (call-with-input-string line read)) => (lambda (str) (reader (cons str res)))) (else (string-concatenate-reverse res))))) (define (read-plural default) (let reader ((res (list default))) (cond ((and-let* (((eqv? (peek-char) #\m)) (line (read-line)) (len (string-length line)) ((>= len 10)) ((string-prefix? "msgstr[" line)) (i (string-index line #\] 7)) (n (string->number (substring line 8 i))) (str (call-with-input-string (substring/shared line (+ i 1)) read)) ((string? str))) (cons n str)) => (lambda (x) (reader (cons x res)))) (else (reverse res))))) ;; read from the file if it exists (and (file-read-access? file) (condition-case (with-input-from-encoded-file file encoding (lambda () (let search ((line (read-line))) (cond ((eof-object? line) #f) ((string-prefix? "msgid " line) (let ((msgid (read-str (tail-str line)))) (cond ((string=? msgid msg) (let lp ((line (read-line))) (cond ((eof-object? line) #f) ((string-prefix? "msgid_plural " line) (read-plural (read-str (tail-str line)))) ((string-prefix? "msgstr " line) (read-str (tail-str line))) (else (lp (read-line)))))) (else (search (read-line)))))) (else (search (read-line))))))) (exn () (print-error-message exn (current-error-port) "Warning: lookup-po-message") ;;(print-call-chain (current-error-port)) #f)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; The gettext binary .mo file parser. ;; The format is well described in the GNU gettext documentation. ;; Essentially it's an index of source strings with offsets to their ;; translation string, and we binary search the index. (define (lookup-mo-message file msg msg2 encoding) (and (file-read-access? file) (condition-case (with-input-from-encoded-file file encoding (lambda () (define (search read-int) (let* ((key (if msg2 (string-append msg null-str msg2) msg)) (format (read-int)) (count (read-int)) (src-offset (read-int)) (trans-offset (read-int)) (hash-size (read-int)) (hash-offset (read-int)) (diff (- trans-offset src-offset)) (end (+ src-offset (* (- count 1) 8)))) (define (string-at pos) (set-file-position! (current-input-port) pos) (let* ((len (read-int)) (off (read-int))) (set-file-position! (current-input-port) off) (ces-convert (read-string len) encoding))) (cond ;; check endpoints ((string=? key (string-at src-offset)) (string-at (+ src-offset diff))) ((and (> end src-offset) (string=? key (string-at end))) (string-at (+ end diff))) (else ;; binary search (let loop ((lo 0) (hi (- count 1))) (if (>= lo hi) #f (let* ((mid (+ lo (quotient (- hi lo) 2))) (pos (+ src-offset (* mid 8))) (str (string-at pos))) (cond ((string= mid hi) #f (loop lo mid))) ((string>? key str) (if (<= mid lo) #f (loop mid hi))) (else ;; match (string-at (+ pos diff))))))))))) (let* ((b1 (read-byte)) (b2 (read-byte)) (b3 (read-byte)) (b4 (read-byte)) (magic (list b1 b2 b3 b4))) (cond ((equal? magic '(#xde #x12 #x04 #x95)) (search read-binary-uint32-le)) ((equal? magic '(#x95 #x04 #x12 #xde)) (search read-binary-uint32-be)) (else (warning "invalid .mo file magic" magic) #f))))) (exn () (print-error-message exn (current-error-port) "Warning: lookup-mo-message") ;;(print-call-chain (current-error-port)) #f)))) (define (lookup-message gfile msg msg2 . opt) (if (gfile? gfile) ((if (eq? (gfile-type gfile) 'mo) lookup-mo-message lookup-po-message) (gfile-filename gfile) msg msg2 (if (pair? opt) (car opt) (gfile-encoding gfile))) ((if (string-suffix? ".mo" gfile) lookup-mo-message lookup-po-message) gfile msg msg2 (if (pair? opt) (car opt) 'utf8)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; the subset C parser for ngettext plural forms (define (C->Scheme str) (define (read-number c) (let loop ((ls (list c))) (let ((c2 (peek-char))) (cond ((and (not (eof-object? c2)) (char-numeric? c2)) (read-char) (loop (cons c2 ls))) (else (string->number (list->string (reverse ls)))))))) (define (read-comment) (read-char) (let loop ((c (read-char))) (if (eof-object? c) c ;; maybe signal error (if (eqv? c #\*) (let ((c2 (read-char))) (if (eqv? c2 #\/) #f (loop c2))) (loop (read-char)))))) (define (next-token) (let ((c (read-char))) (if (eof-object? c) c (case c ((#\() 'open) ((#\)) 'close) ((#\/) (if (eqv? (peek-char) #\*) (read-comment) '/)) ((#\- #\+ #\* #\% #\? #\:) (string->symbol (string c))) ((#\&) (if (eqv? (peek-char) c) (begin (read-char) 'and) 'logand)) ((#\|) (if (eqv? (peek-char) c) (begin (read-char) 'or) 'logior)) ((#\! #\> #\<) (cond ((eqv? (peek-char) #\=) (read-char) (string->symbol (string c #\=))) (else (string->symbol (string c))))) ((#\=) (cond ((eqv? (peek-char) #\=) (read-char) '==) (else (warning "invalid assignment in C code") #f))) ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) (read-number c)) ((#\n) 'n) ((#\space #\newline) (next-token)) (else (warning "invalid character in C code: ~S" c) #f))))) (define (C-parse str) (define (precedence x) ;; lower value is higher precedence (case x ((**) 10) ((&) 70) ((! ~) 20) ((^ logand logior) 80) ((* / %) 30) ((and) 90) ((+ -) 40) ((or) 100) ((< > <= >=) 50) ((?) 110) ((== != <=>) 60) (else 120))) (define (parse1) (let ((x (next-token))) (cond ((not x) (parse1)) ((eof-object? x) 'eof) ((eq? x 'open) (parse-until 'close)) ((memq x '(! ~)) `(,x ,(parse1))) (else x)))) (define (parse-until end) (define (group op left right) (cond ((or (eq? right end) (eq? right 'eof)) (warning "expected 2nd argument to" op) `(op ,left)) ((eq? op 'and) `(if (zero? ,left) 0 ,right)) ((eq? op 'or) `(if (zero? ,left) ,right 1)) (else `(,op ,left ,right)))) (define (join x stack) (if (null? stack) x (join (group (car stack) (cadr stack) x) (cddr stack)))) (let ((init (parse1))) (if (equal? init end) '() (let parse ((left init) (op (parse1)) (stack '())) (cond ((eq? op end) (join left stack)) ((eq? op 'eof) (warning "unexpected #") (join left stack)) ((eq? op '?) ;; trinary ? : (right-assoc) (let* ((pass (parse-until ':)) (fail (parse1)) (op2 (parse1))) (cond ((or (eq? op2 end) (eq? op2 'eof)) `(if (zero? ,left) ,fail ,pass)) ((< (precedence op) (precedence op2)) (parse `(if (zero? ,left) ,fail ,pass) op2 stack)) (else (join `(if (zero? ,left) ,(parse fail op2 '()) ,pass) stack))))) (else ;; assume a (left-assoc) binary operator (let* ((right (parse1)) (op2 (parse1))) (cond ((or (eq? op2 end) (eq? op2 'eof)) (join (group op left right) stack)) ((<= (precedence op) (precedence op2)) ;; op2 has less than or equal precedence, group (let loop2 ((x (group op left right)) (s stack)) (if (and (pair? s) (< (precedence (car s)) (precedence op2))) (loop2 (group (car s) (cadr s) x) (cddr s)) (parse x op2 s)))) (else ;; op2 has higher precedence, push on the stack (parse right op2 (cons op (cons left stack)))))))))))) (with-input-from-string str (lambda () (parse-until 'eof)))) (define (map-C-names x) (cond ((symbol? x) (case x ((/) 'quotient) ((%) 'modulo) ((**) 'expt) ((~) 'lognot) ((^) 'logxor) ((<<) 'arithmetic-shift) ;; C conflates booleans with integers ((!) '(lambda (a) (if (zero? a) 1 0))) ((>>) '(lambda (a b) (arithmetic-shift a (- b)))) ((==) '(lambda (a b) (if (eqv? a b) 1 0))) ((!=) '(lambda (a b) (if (eqv? a b) 0 1))) ((>) '(lambda (a b) (if (> a b) 1 0))) ((<) '(lambda (a b) (if (< a b) 1 0))) ((>=) '(lambda (a b) (if (>= a b) 1 0))) ((<=) '(lambda (a b) (if (<= a b) 1 0))) (else x))) ((pair? x) (cons (map-C-names (car x)) (map-C-names (cdr x)))) (else x))) (let ((body (map-C-names (C-parse str)))) ;; could build from chained closures w/o using eval but this is ;; faster at runtime (eval `(lambda (n) ,body) (scheme-report-environment 5)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; internal routines for building/caching files and lookups (define (split-langs lang) (define (split-at ch) (cond ((string-index lang ch) => (lambda (i) (list (substring lang 0 i)))) (else '()))) (cons lang (append (split-at #\.) (split-at #\_)))) (define (make-gettext-internal domain locale dirs cdir cached?) (define (make-cache) (make-hash-table)) (define (make-file-list) (define suffixes '(".mo" ".po")) (reverse (fold (lambda (x res) (let ((path (string-append (caddr x) "/" (car x) "/" cdir "/" (cadr x) (cadddr x)))) (if (file-read-access? path) (cons (make-gettext-file path (car x)) res) res))) '() (cartesian-product (list (append-map split-langs locale) domain dirs suffixes))))) (let ((files (make-file-list)) (cache (make-cache))) (define (search msg . opt) (if (and cached? (hash-table-exists? cache msg)) (hash-table-ref/default cache msg #f) (let-optionals* opt ((msg2 #f) (n #f)) (let ((split? (number? n))) (any (lambda (gf) (and-let* ((x0 (lookup-message gf msg msg2)) (x (if (and split? (eq? (gfile-type gf) 'mo)) (cons (or msg2 msg) (let ((l (string-split x0 null-ch))) (map cons (iota (length l)) l))) x0)) (res (cons x gf))) (if cached? (hash-table-set! cache msg res)) res)) files))))) (define (get msg) (let ((res (search msg))) (if res (if (pair? (car res)) (caar res) (car res)) msg))) (define (nget msg . opt) ;; [msg2] [n] (let ((msg2 #f) (n #f)) ;; option parsing, both optional (when (pair? opt) (let ((x (car opt))) (if (number? x) (set! n x) (set! msg2 x))) (when (pair? (cdr opt)) (let ((x (cadr opt))) (if (number? x) (set! n x) (set! msg2 x))))) (let ((res (search msg msg2 n))) (if (pair? res) (let ((plural-index (gfile-plural-index (cdr res)))) (or (and (procedure? plural-index) (assv-ref (cdar res) (plural-index (or n 1)))) (if (eqv? n 1) msg (caar res)))) (if (or (eqv? n 1) (not msg2)) msg msg2))))) (define (set msg val) (hash-table-set! cache msg val)) (define (reset!) (set! files (make-file-list)) (set! cache (make-cache))) ;; return the dispatcher (lambda (dispatch . args) (case dispatch ((searcher) search) ((getter) get) ((ngetter) nget) ((setter) set) ((search) (apply search args)) ((get) (apply get args)) ((nget) (apply nget args)) ((set!) (apply set args)) ((locale) locale) ((domain) domain) ((dirs) dirs) ((files) files) ((set-locale!) (set! locale (listify (car args))) (reset!)) ((set-domain!) (set! domain (listify (car args))) (reset!)) ((set-dirs!) (set! dirs (listify (car args))) (reset!)) ((use-cache) (set! cached? (car args))) ((clear) (set! cache (make-cache))) )))) ;; cache the lookups and provide a more friendly interface. should this ;; take keyword arguments? ;; (make-gettext domain locale dirs cdir gettext-cached? lookup-cached?) (define make-gettext (let ((gettext-lookup-cache (make-hash-table))) (lambda opt (let-optionals* opt ((domain0 '("default")) (locale0 #f) (dirs0 #f) (cdir0 #f) (gettext-cached? #t) (lookup-cached? #t)) (let* ((domain (listify domain0)) (locale (listify (or locale0 (getenv "LANG") (getenv "LC_ALL") "C"))) (dirs1 (listify (or dirs0 (cond ((getenv "GETTEXT_PATH") => (cut string-split <> ":")) (else '()))))) ;; prepend default dirs based on domain (dirs (append (hash-table-ref/default domain-message-paths domain message-path) dirs1)) (cdir (or cdir0 (getenv "LC_CATEGORY") "LC_MESSAGES"))) ;; optionally lookup from cache (if lookup-cached? (let* ((key (list domain locale dirs cdir gettext-cached?)) (lookup (hash-table-ref/default gettext-lookup-cache key #f))) (unless lookup (set! lookup (make-gettext-internal domain locale dirs cdir gettext-cached?)) (hash-table-set! gettext-lookup-cache key lookup)) lookup) (make-gettext-internal domain locale dirs cdir gettext-cached?)))))))