123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412 |
- (define-module (texinfo string-utils)
- #:use-module (srfi srfi-13)
- #:use-module (srfi srfi-14)
- #:use-module (oop goops)
- #:export (escape-special-chars
- transform-string
- expand-tabs
- center-string
- left-justify-string
- right-justify-string
- collapse-repeated-chars
- make-text-wrapper
- fill-string
- string->wrapped-lines))
- (define* (transform-string str match? replace #:optional (start #f) (end #f))
- "Uses @var{match?} against each character in @var{str}, and performs a
- replacement on each character for which matches are found.
- @var{match?} may either be a function, a character, a string, or
- @code{#t}. If @var{match?} is a function, then it takes a single
- character as input, and should return @samp{#t} for matches.
- @var{match?} is a character, it is compared to each string character
- using @code{char=?}. If @var{match?} is a string, then any character
- in that string will be considered a match. @code{#t} will cause
- every character to be a match.
- If @var{replace} is a function, it is called with the matched
- character as an argument, and the returned value is sent to the output
- string via @samp{display}. If @var{replace} is anything else, it is
- sent through the output string via @samp{display}.
- Note that te replacement for the matched characters does not need to
- be a single character. That is what differentiates this function from
- @samp{string-map}, and what makes it useful for applications such as
- converting @samp{#\\&} to @samp{\"&\"} in web page text. Some other
- functions in this module are just wrappers around common uses of
- @samp{transform-string}. Transformations not possible with this
- function should probably be done with regular expressions.
- If @var{start} and @var{end} are given, they control which portion
- of the string undergoes transformation. The entire input string
- is still output, though. So, if @var{start} is @samp{5}, then the
- first five characters of @var{str} will still appear in the returned
- string.
- @lisp
- ; these two are equivalent...
- (transform-string str #\\space #\\-) ; change all spaces to -'s
- (transform-string str (lambda (c) (char=? #\\space c)) #\\-)
- @end lisp"
-
-
- (let* ((os (open-output-string))
- (matcher (cond ((char? match?)
- (lambda (c) (char=? match? c)))
- ((procedure? match?)
- match?)
- ((string? match?)
- (lambda (c) (string-index match? c)))
- ((boolean? match?)
- (lambda (c) match?))
- (else (throw 'bad-type "expected #t, char, string, or procedure"))))
- (replacer (if (procedure? replace)
- (lambda (c) (display (replace c) os))
- (lambda (c) (display replace os)))))
-
- (if (and start (<= start (string-length str)))
- (display (substring str 0 start) os))
-
- (string-for-each
- (lambda (c)
- (if (matcher c)
-
- (replacer c)
-
- (write-char c os)))
- str
- (or start 0)
- (or end (string-length str)))
-
- (if (and end (< end (string-length str)))
- (display (substring str end) os))
- (get-output-string os)))
- (define* (expand-tabs str #:optional (tab-size 8))
- "Returns a copy of @var{str} with all tabs expanded to spaces. @var{tab-size} defaults to 8.
- Assuming tab size of 8, this is equivalent to: @lisp
- (transform-string str #\\tab \" \")
- @end lisp"
- (transform-string str
- #\tab
- (make-string tab-size #\space)))
- (define (escape-special-chars str special-chars escape-char)
- "Returns a copy of @var{str} with all given special characters preceded
- by the given @var{escape-char}.
- @var{special-chars} can either be a single character, or a string consisting
- of all the special characters.
- @lisp
- ;; make a string regexp-safe...
- (escape-special-chars \"***(Example String)***\"
- \"[]()/*.\"
- #\\\\)
- => \"\\\\*\\\\*\\\\*\\\\(Example String\\\\)\\\\*\\\\*\\\\*\"
- ;; also can escape a singe char...
- (escape-special-chars \"richardt@@vzavenue.net\"
- #\\@@
- #\\@@)
- => \"richardt@@@@vzavenue.net\"
- @end lisp"
- (transform-string str
- (if (char? special-chars)
-
- (lambda (c) (char=? c special-chars))
-
- (lambda (c) (string-index special-chars c)))
-
- (lambda (c) (string escape-char c))))
- (define* (center-string str #:optional (width 80) (chr #\space) (rchr #f))
- "Returns a copy of @var{str} centered in a field of @var{width}
- characters. Any needed padding is done by character @var{chr}, which
- defaults to @samp{#\\space}. If @var{rchr} is provided, then the
- padding to the right will use it instead. See the examples below.
- left and @var{rchr} on the right. The default @var{width} is 80. The
- default @var{chr} and @var{rchr} is @samp{#\\space}. The string is
- never truncated.
- @lisp
- (center-string \"Richard Todd\" 24)
- => \" Richard Todd \"
- (center-string \" Richard Todd \" 24 #\\=)
- => \"===== Richard Todd =====\"
- (center-string \" Richard Todd \" 24 #\\< #\\>)
- => \"<<<<< Richard Todd >>>>>\"
- @end lisp"
- (let* ((len (string-length str))
- (lpad (make-string (max (quotient (- width len) 2) 0) chr))
-
- (right-chr (or rchr chr))
- (rpad (if (char=? right-chr chr)
- lpad
- (make-string (max (quotient (- width len) 2) 0) right-chr))))
- (if (>= len width)
- str
- (string-append lpad str rpad (if (odd? (- width len)) (string right-chr) "")))))
- (define* (left-justify-string str #:optional (width 80) (chr #\space))
- "@code{left-justify-string str [width chr]}.
- Returns a copy of @var{str} padded with @var{chr} such that it is left
- justified in a field of @var{width} characters. The default
- @var{width} is 80. Unlike @samp{string-pad} from srfi-13, the string
- is never truncated."
- (let* ((len (string-length str))
- (pad (make-string (max (- width len) 0) chr)))
- (if (>= len width)
- str
- (string-append str pad))))
- (define* (right-justify-string str #:optional (width 80) (chr #\space))
- "Returns a copy of @var{str} padded with @var{chr} such that it is
- right justified in a field of @var{width} characters. The default
- @var{width} is 80. The default @var{chr} is @samp{#\\space}. Unlike
- @samp{string-pad} from srfi-13, the string is never truncated."
- (let* ((len (string-length str))
- (pad (make-string (max (- width len) 0) chr)))
- (if (>= len width)
- str
- (string-append pad str))))
- (define* (collapse-repeated-chars str #:optional (chr #\space) (num 1))
- "Returns a copy of @var{str} with all repeated instances of
- @var{chr} collapsed down to at most @var{num} instances.
- The default value for @var{chr} is @samp{#\\space}, and
- the default value for @var{num} is 1.
- @lisp
- (collapse-repeated-chars \"H e l l o\")
- => \"H e l l o\"
- (collapse-repeated-chars \"H--e--l--l--o\" #\\-)
- => \"H-e-l-l-o\"
- (collapse-repeated-chars \"H-e--l---l----o\" #\\- 2)
- => \"H-e--l--l--o\"
- @end lisp"
-
-
- (let ((repeat-locator
-
- (let ((prev-chr (if (char=? chr #\space) #\A #\space))
- (match-count 0))
- (lambda (c)
- (if (and (char=? c prev-chr)
- (char=? prev-chr chr))
-
- (begin
- (set! match-count (+ 1 match-count))
- (>= match-count num))
-
- (begin (set! match-count 0)
- (set! prev-chr c)
- #f))))))
-
-
- (transform-string str repeat-locator "")))
- (define (split-by-single-words str)
- (let ((non-wschars (char-set-complement char-set:whitespace)))
- (let loop ((ans '())
- (index 0))
- (let ((next-non-ws (string-index str non-wschars index)))
- (if next-non-ws
-
- (let ((next-ws (string-index str char-set:whitespace next-non-ws)))
- (if next-ws
-
- (loop (cons (substring str index next-ws) ans)
- next-ws)
-
- (reverse (cons (substring str index) ans))))
-
- (reverse ans))))))
- (define (end-of-sentence? str)
- "Return #t when STR likely denotes the end of sentence."
- (let ((len (string-length str)))
- (and (> len 1)
- (eqv? #\. (string-ref str (- len 1)))
- (not (eqv? #\. (string-ref str (- len 2)))))))
- (define* (make-text-wrapper #:key
- (line-width 80)
- (expand-tabs? #t)
- (tab-width 8)
- (collapse-whitespace? #t)
- (subsequent-indent "")
- (initial-indent "")
- (break-long-words? #t))
- "Returns a procedure that will split a string into lines according to the
- given parameters.
- @table @code
- @item #:line-width
- This is the target length used when deciding where to wrap lines.
- Default is 80.
- @item #:expand-tabs?
- Boolean describing whether tabs in the input should be expanded. Default
- is #t.
- @item #:tab-width
- If tabs are expanded, this will be the number of spaces to which they
- expand. Default is 8.
- @item #:collapse-whitespace?
- Boolean describing whether the whitespace inside the existing text
- should be removed or not. Default is #t.
- If text is already well-formatted, and is just being wrapped to fit in a
- different width, then set this to @samp{#f}. This way, many common text
- conventions (such as two spaces between sentences) can be preserved if
- in the original text. If the input text spacing cannot be trusted, then
- leave this setting at the default, and all repeated whitespace will be
- collapsed down to a single space.
- @item #:initial-indent
- Defines a string that will be put in front of the first line of wrapped
- text. Default is the empty string, ``''.
- @item #:subsequent-indent
- Defines a string that will be put in front of all lines of wrapped
- text, except the first one. Default is the empty string, ``''.
- @item #:break-long-words?
- If a single word is too big to fit on a line, this setting tells the
- wrapper what to do. Defaults to #t, which will break up long words.
- When set to #f, the line will be allowed, even though it is longer
- than the defined @code{#:line-width}.
- @end table
- The return value is a procedure of one argument, the input string, which
- returns a list of strings, where each element of the list is one line."
- (lambda (str)
-
- (set! str (transform-string str (lambda (c) (char=? c #\nl)) #\space))
-
- (if expand-tabs?
- (set! str (expand-tabs str tab-width)))
-
- (if collapse-whitespace?
- (set! str (collapse-repeated-chars str)))
-
-
- (set! str (string-trim str))
-
- (let loop ((ans '())
- (words (split-by-single-words str))
- (line initial-indent)
- (count 0))
- (if (null? words)
-
- (reverse (if (> count 0)
- (cons line ans)
- ans))
-
-
- (let ((length-left (- line-width
- (string-length line)))
- (next-word (if (= count 0)
- (string-trim (car words))
- (car words))))
- (cond
-
- ((<= (string-length next-word)
- length-left)
- (loop ans
- (cdr words)
- (if (and collapse-whitespace?
- (end-of-sentence? line))
-
- (string-append line " " next-word)
- (string-append line next-word))
- (+ count 1)))
-
- ((> count 0)
-
- (loop (cons line ans)
- words
- subsequent-indent
- 0))
-
-
-
- (break-long-words?
-
- (loop (cons (string-append line (substring next-word 0 length-left))
- ans)
- (cons (substring next-word length-left)
- (cdr words))
- subsequent-indent
- 0))
-
- (else
- (loop (cons (string-append line next-word)
- ans)
- (cdr words)
- subsequent-indent
- 0))))))))
- (define (string->wrapped-lines str . kwargs)
- "@code{string->wrapped-lines str keywds ...}. Wraps the text given in
- string @var{str} according to the parameters provided in @var{keywds},
- or the default setting if they are not given. Returns a list of strings
- representing the formatted lines. Valid keyword arguments are discussed
- in @code{make-text-wrapper}."
- ((apply make-text-wrapper kwargs) str))
- (define (fill-string str . kwargs)
- "Wraps the text given in string @var{str} according to the parameters
- provided in @var{kwargs}, or the default setting if they are not
- given. Returns a single string with the wrapped text. Valid keyword
- arguments are discussed in @code{make-text-wrapper}."
- (string-join (apply string->wrapped-lines str kwargs)
- "\n"
- 'infix))
|