sregex.el 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. ;;; sregex.el --- symbolic regular expressions
  2. ;; Copyright (C) 1997-1998, 2000-2012 Free Software Foundation, Inc.
  3. ;; Author: Bob Glickstein <bobg+sregex@zanshin.com>
  4. ;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com>
  5. ;; Keywords: extensions
  6. ;; Obsolete-since: 24.1
  7. ;; This file is part of GNU Emacs.
  8. ;; GNU Emacs is free software: you can redistribute it and/or modify
  9. ;; it under the terms of the GNU General Public License as published by
  10. ;; the Free Software Foundation, either version 3 of the License, or
  11. ;; (at your option) any later version.
  12. ;; GNU Emacs is distributed in the hope that it will be useful,
  13. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;; GNU General Public License for more details.
  16. ;; You should have received a copy of the GNU General Public License
  17. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  18. ;;; Commentary:
  19. ;; This package allows you to write regular expressions using a
  20. ;; totally new, Lisp-like syntax.
  21. ;; A "symbolic regular expression" (sregex for short) is a Lisp form
  22. ;; that, when evaluated, produces the string form of the specified
  23. ;; regular expression. Here's a simple example:
  24. ;; (sregexq (or "Bob" "Robert")) => "Bob\\|Robert"
  25. ;; As you can see, an sregex is specified by placing one or more
  26. ;; special clauses in a call to `sregexq'. The clause in this case is
  27. ;; the `or' of two strings (not to be confused with the Lisp function
  28. ;; `or'). The list of allowable clauses appears below.
  29. ;; With sregex, it is never necessary to "escape" magic characters
  30. ;; that are meant to be taken literally; that happens automatically.
  31. ;; For example:
  32. ;; (sregexq "M*A*S*H") => "M\\*A\\*S\\*H"
  33. ;; It is also unnecessary to "group" parts of the expression together
  34. ;; to overcome operator precedence; that also happens automatically.
  35. ;; For example:
  36. ;; (sregexq (opt (or "Bob" "Robert"))) => "\\(?:Bob\\|Robert\\)?"
  37. ;; It *is* possible to group parts of the expression in order to refer
  38. ;; to them with numbered backreferences:
  39. ;; (sregexq (group (or "Go" "Run"))
  40. ;; ", Spot, "
  41. ;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1"
  42. ;; `sregexq' is a macro. Each time it is used, it constructs a simple
  43. ;; Lisp expression that then invokes a moderately complex engine to
  44. ;; interpret the sregex and render the string form. Because of this,
  45. ;; I don't recommend sprinkling calls to `sregexq' throughout your
  46. ;; code, the way one normally does with string regexes (which are
  47. ;; cheap to evaluate). Instead, it's wiser to precompute the regexes
  48. ;; you need wherever possible instead of repeatedly constructing the
  49. ;; same ones over and over. Example:
  50. ;; (let ((field-regex (sregexq (opt "resent-")
  51. ;; (or "to" "cc" "bcc"))))
  52. ;; ...
  53. ;; (while ...
  54. ;; ...
  55. ;; (re-search-forward field-regex ...)
  56. ;; ...))
  57. ;; The arguments to `sregexq' are automatically quoted, but the
  58. ;; flipside of this is that it is not straightforward to include
  59. ;; computed (i.e., non-constant) values in `sregexq' expressions. So
  60. ;; `sregex' is a function that is like `sregexq' but which does not
  61. ;; automatically quote its values. Literal sregex clauses must be
  62. ;; explicitly quoted like so:
  63. ;; (sregex '(or "Bob" "Robert")) => "Bob\\|Robert"
  64. ;; but computed clauses can be included easily, allowing for the reuse
  65. ;; of common clauses:
  66. ;; (let ((dotstar '(0+ any))
  67. ;; (whitespace '(1+ (syntax ?-)))
  68. ;; (digits '(1+ (char (?0 . ?9)))))
  69. ;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+"
  70. ;; To use this package in a Lisp program, simply (require 'sregex).
  71. ;; Here are the clauses allowed in an `sregex' or `sregexq'
  72. ;; expression:
  73. ;; - a string
  74. ;; This stands for the literal string. If it contains
  75. ;; metacharacters, they will be escaped in the resulting regex
  76. ;; (using `regexp-quote').
  77. ;; - the symbol `any'
  78. ;; This stands for ".", a regex matching any character except
  79. ;; newline.
  80. ;; - the symbol `bol'
  81. ;; Stands for "^", matching the empty string at the beginning of a line
  82. ;; - the symbol `eol'
  83. ;; Stands for "$", matching the empty string at the end of a line
  84. ;; - (group CLAUSE ...)
  85. ;; Groups the given CLAUSEs using "\\(" and "\\)".
  86. ;; - (sequence CLAUSE ...)
  87. ;; Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)".
  88. ;; Clauses grouped by `sequence' do not count for purposes of
  89. ;; numbering backreferences. Use `sequence' in situations like
  90. ;; this:
  91. ;; (sregexq (or "dog" "cat"
  92. ;; (sequence (opt "sea ") "monkey")))
  93. ;; => "dog\\|cat\\|\\(?:sea \\)?monkey"
  94. ;; where a single `or' alternate needs to contain multiple
  95. ;; subclauses.
  96. ;; - (backref N)
  97. ;; Matches the same string previously matched by the Nth "group" in
  98. ;; the same sregex. N is a positive integer.
  99. ;; - (or CLAUSE ...)
  100. ;; Matches any one of the CLAUSEs by separating them with "\\|".
  101. ;; - (0+ CLAUSE ...)
  102. ;; Concatenates the given CLAUSEs and matches zero or more
  103. ;; occurrences by appending "*".
  104. ;; - (1+ CLAUSE ...)
  105. ;; Concatenates the given CLAUSEs and matches one or more
  106. ;; occurrences by appending "+".
  107. ;; - (opt CLAUSE ...)
  108. ;; Concatenates the given CLAUSEs and matches zero or one occurrence
  109. ;; by appending "?".
  110. ;; - (repeat MIN MAX CLAUSE ...)
  111. ;; Concatenates the given CLAUSEs and constructs a regex matching at
  112. ;; least MIN occurrences and at most MAX occurrences. MIN must be a
  113. ;; non-negative integer. MAX must be a non-negative integer greater
  114. ;; than or equal to MIN; or MAX can be nil to mean "infinity."
  115. ;; - (char CHAR-CLAUSE ...)
  116. ;; Creates a "character class" matching one character from the given
  117. ;; set. See below for how to construct a CHAR-CLAUSE.
  118. ;; - (not-char CHAR-CLAUSE ...)
  119. ;; Creates a "character class" matching any one character not in the
  120. ;; given set. See below for how to construct a CHAR-CLAUSE.
  121. ;; - the symbol `bot'
  122. ;; Stands for "\\`", matching the empty string at the beginning of
  123. ;; text (beginning of a string or of a buffer).
  124. ;; - the symbol `eot'
  125. ;; Stands for "\\'", matching the empty string at the end of text.
  126. ;; - the symbol `point'
  127. ;; Stands for "\\=", matching the empty string at point.
  128. ;; - the symbol `word-boundary'
  129. ;; Stands for "\\b", matching the empty string at the beginning or
  130. ;; end of a word.
  131. ;; - the symbol `not-word-boundary'
  132. ;; Stands for "\\B", matching the empty string not at the beginning
  133. ;; or end of a word.
  134. ;; - the symbol `bow'
  135. ;; Stands for "\\<", matching the empty string at the beginning of a
  136. ;; word.
  137. ;; - the symbol `eow'
  138. ;; Stands for "\\>", matching the empty string at the end of a word.
  139. ;; - the symbol `wordchar'
  140. ;; Stands for the regex "\\w", matching a word-constituent character
  141. ;; (as determined by the current syntax table)
  142. ;; - the symbol `not-wordchar'
  143. ;; Stands for the regex "\\W", matching a non-word-constituent
  144. ;; character.
  145. ;; - (syntax CODE)
  146. ;; Stands for the regex "\\sCODE", where CODE is a syntax table code
  147. ;; (a single character). Matches any character with the requested
  148. ;; syntax.
  149. ;; - (not-syntax CODE)
  150. ;; Stands for the regex "\\SCODE", where CODE is a syntax table code
  151. ;; (a single character). Matches any character without the
  152. ;; requested syntax.
  153. ;; - (regex REGEX)
  154. ;; This is a "trapdoor" for including ordinary regular expression
  155. ;; strings in the result. Some regular expressions are clearer when
  156. ;; written the old way: "[a-z]" vs. (sregexq (char (?a . ?z))), for
  157. ;; instance. However, see the note under "Bugs," below.
  158. ;; Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
  159. ;; has one of the following forms:
  160. ;; - a character
  161. ;; Adds that character to the set.
  162. ;; - a string
  163. ;; Adds all the characters in the string to the set.
  164. ;; - A pair (MIN . MAX)
  165. ;; Where MIN and MAX are characters, adds the range of characters
  166. ;; from MIN through MAX to the set.
  167. ;;; To do:
  168. ;; An earlier version of this package could optionally translate the
  169. ;; symbolic regex into other languages' syntaxes, e.g. Perl. For
  170. ;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would
  171. ;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore
  172. ;; such a facility.
  173. ;; - handle multibyte chars in sregex--char-aux
  174. ;; - add support for character classes ([:blank:], ...)
  175. ;; - add support for non-greedy operators *? and +?
  176. ;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?"
  177. ;;; Bugs:
  178. ;;; Code:
  179. (eval-when-compile (require 'cl))
  180. ;; Compatibility code for when we didn't have shy-groups
  181. (defvar sregex--current-sregex nil)
  182. (defun sregex-info () nil)
  183. (defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms))
  184. (defun sregex-replace-match (r &optional f l str subexp x)
  185. (replace-match r f l str subexp))
  186. (defun sregex-match-string (c &optional i x) (match-string c i))
  187. (defun sregex-match-string-no-properties (count &optional in-string sregex)
  188. (match-string-no-properties count in-string))
  189. (defun sregex-match-beginning (count &optional sregex) (match-beginning count))
  190. (defun sregex-match-end (count &optional sregex) (match-end count))
  191. (defun sregex-match-data (&optional sregex) (match-data))
  192. (defun sregex-backref-num (n &optional sregex) n)
  193. (defun sregex (&rest exps)
  194. "Symbolic regular expression interpreter.
  195. This is exactly like `sregexq' (q.v.) except that it evaluates all its
  196. arguments, so literal sregex clauses must be quoted. For example:
  197. (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
  198. An argument-evaluating sregex interpreter lets you reuse sregex
  199. subexpressions:
  200. (let ((dotstar '(0+ any))
  201. (whitespace '(1+ (syntax ?-)))
  202. (digits '(1+ (char (?0 . ?9)))))
  203. (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\""
  204. (sregex--sequence exps nil))
  205. (defmacro sregexq (&rest exps)
  206. "Symbolic regular expression interpreter.
  207. This macro allows you to specify a regular expression (regexp) in
  208. symbolic form, and converts it into the string form required by Emacs's
  209. regex functions such as `re-search-forward' and `looking-at'. Here is
  210. a simple example:
  211. (sregexq (or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
  212. As you can see, an sregex is specified by placing one or more special
  213. clauses in a call to `sregexq'. The clause in this case is the `or'
  214. of two strings (not to be confused with the Lisp function `or'). The
  215. list of allowable clauses appears below.
  216. With `sregex', it is never necessary to \"escape\" magic characters
  217. that are meant to be taken literally; that happens automatically.
  218. For example:
  219. (sregexq \"M*A*S*H\") => \"M\\\\*A\\\\*S\\\\*H\"
  220. It is also unnecessary to \"group\" parts of the expression together
  221. to overcome operator precedence; that also happens automatically.
  222. For example:
  223. (sregexq (opt (or \"Bob\" \"Robert\"))) => \"\\\\(Bob\\\\|Robert\\\\)?\"
  224. It *is* possible to group parts of the expression in order to refer
  225. to them with numbered backreferences:
  226. (sregexq (group (or \"Go\" \"Run\"))
  227. \", Spot, \"
  228. (backref 1)) => \"\\\\(Go\\\\|Run\\\\), Spot, \\\\1\"
  229. If `sregexq' needs to introduce its own grouping parentheses, it will
  230. automatically renumber your backreferences:
  231. (sregexq (opt \"resent-\")
  232. (group (or \"to\" \"cc\" \"bcc\"))
  233. \": \"
  234. (backref 1)) => \"\\\\(resent-\\\\)?\\\\(to\\\\|cc\\\\|bcc\\\\): \\\\2\"
  235. `sregexq' is a macro. Each time it is used, it constructs a simple
  236. Lisp expression that then invokes a moderately complex engine to
  237. interpret the sregex and render the string form. Because of this, I
  238. don't recommend sprinkling calls to `sregexq' throughout your code,
  239. the way one normally does with string regexes (which are cheap to
  240. evaluate). Instead, it's wiser to precompute the regexes you need
  241. wherever possible instead of repeatedly constructing the same ones
  242. over and over. Example:
  243. (let ((field-regex (sregexq (opt \"resent-\")
  244. (or \"to\" \"cc\" \"bcc\"))))
  245. ...
  246. (while ...
  247. ...
  248. (re-search-forward field-regex ...)
  249. ...))
  250. The arguments to `sregexq' are automatically quoted, but the
  251. flipside of this is that it is not straightforward to include
  252. computed (i.e., non-constant) values in `sregexq' expressions. So
  253. `sregex' is a function that is like `sregexq' but which does not
  254. automatically quote its values. Literal sregex clauses must be
  255. explicitly quoted like so:
  256. (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
  257. but computed clauses can be included easily, allowing for the reuse
  258. of common clauses:
  259. (let ((dotstar '(0+ any))
  260. (whitespace '(1+ (syntax ?-)))
  261. (digits '(1+ (char (?0 . ?9)))))
  262. (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"
  263. Here are the clauses allowed in an `sregex' or `sregexq' expression:
  264. - a string
  265. This stands for the literal string. If it contains
  266. metacharacters, they will be escaped in the resulting regex
  267. (using `regexp-quote').
  268. - the symbol `any'
  269. This stands for \".\", a regex matching any character except
  270. newline.
  271. - the symbol `bol'
  272. Stands for \"^\", matching the empty string at the beginning of a line
  273. - the symbol `eol'
  274. Stands for \"$\", matching the empty string at the end of a line
  275. - (group CLAUSE ...)
  276. Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\".
  277. - (sequence CLAUSE ...)
  278. Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\".
  279. Clauses grouped by `sequence' do not count for purposes of
  280. numbering backreferences. Use `sequence' in situations like
  281. this:
  282. (sregexq (or \"dog\" \"cat\"
  283. (sequence (opt \"sea \") \"monkey\")))
  284. => \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\"
  285. where a single `or' alternate needs to contain multiple
  286. subclauses.
  287. - (backref N)
  288. Matches the same string previously matched by the Nth \"group\" in
  289. the same sregex. N is a positive integer.
  290. - (or CLAUSE ...)
  291. Matches any one of the CLAUSEs by separating them with \"\\\\|\".
  292. - (0+ CLAUSE ...)
  293. Concatenates the given CLAUSEs and matches zero or more
  294. occurrences by appending \"*\".
  295. - (1+ CLAUSE ...)
  296. Concatenates the given CLAUSEs and matches one or more
  297. occurrences by appending \"+\".
  298. - (opt CLAUSE ...)
  299. Concatenates the given CLAUSEs and matches zero or one occurrence
  300. by appending \"?\".
  301. - (repeat MIN MAX CLAUSE ...)
  302. Concatenates the given CLAUSEs and constructs a regex matching at
  303. least MIN occurrences and at most MAX occurrences. MIN must be a
  304. non-negative integer. MAX must be a non-negative integer greater
  305. than or equal to MIN; or MAX can be nil to mean \"infinity.\"
  306. - (char CHAR-CLAUSE ...)
  307. Creates a \"character class\" matching one character from the given
  308. set. See below for how to construct a CHAR-CLAUSE.
  309. - (not-char CHAR-CLAUSE ...)
  310. Creates a \"character class\" matching any one character not in the
  311. given set. See below for how to construct a CHAR-CLAUSE.
  312. - the symbol `bot'
  313. Stands for \"\\\\`\", matching the empty string at the beginning of
  314. text (beginning of a string or of a buffer).
  315. - the symbol `eot'
  316. Stands for \"\\\\'\", matching the empty string at the end of text.
  317. - the symbol `point'
  318. Stands for \"\\\\=\\=\", matching the empty string at point.
  319. - the symbol `word-boundary'
  320. Stands for \"\\\\b\", matching the empty string at the beginning or
  321. end of a word.
  322. - the symbol `not-word-boundary'
  323. Stands for \"\\\\B\", matching the empty string not at the beginning
  324. or end of a word.
  325. - the symbol `bow'
  326. Stands for \"\\\\=\\<\", matching the empty string at the beginning of a
  327. word.
  328. - the symbol `eow'
  329. Stands for \"\\\\=\\>\", matching the empty string at the end of a word.
  330. - the symbol `wordchar'
  331. Stands for the regex \"\\\\w\", matching a word-constituent character
  332. (as determined by the current syntax table)
  333. - the symbol `not-wordchar'
  334. Stands for the regex \"\\\\W\", matching a non-word-constituent
  335. character.
  336. - (syntax CODE)
  337. Stands for the regex \"\\\\sCODE\", where CODE is a syntax table code
  338. (a single character). Matches any character with the requested
  339. syntax.
  340. - (not-syntax CODE)
  341. Stands for the regex \"\\\\SCODE\", where CODE is a syntax table code
  342. (a single character). Matches any character without the
  343. requested syntax.
  344. - (regex REGEX)
  345. This is a \"trapdoor\" for including ordinary regular expression
  346. strings in the result. Some regular expressions are clearer when
  347. written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for
  348. instance.
  349. Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
  350. has one of the following forms:
  351. - a character
  352. Adds that character to the set.
  353. - a string
  354. Adds all the characters in the string to the set.
  355. - A pair (MIN . MAX)
  356. Where MIN and MAX are characters, adds the range of characters
  357. from MIN through MAX to the set."
  358. `(apply 'sregex ',exps))
  359. (defun sregex--engine (exp combine)
  360. (cond
  361. ((stringp exp)
  362. (if (and combine
  363. (eq combine 'suffix)
  364. (/= (length exp) 1))
  365. (concat "\\(?:" (regexp-quote exp) "\\)")
  366. (regexp-quote exp)))
  367. ((symbolp exp)
  368. (ecase exp
  369. (any ".")
  370. (bol "^")
  371. (eol "$")
  372. (wordchar "\\w")
  373. (not-wordchar "\\W")
  374. (bot "\\`")
  375. (eot "\\'")
  376. (point "\\=")
  377. (word-boundary "\\b")
  378. (not-word-boundary "\\B")
  379. (bow "\\<")
  380. (eow "\\>")))
  381. ((consp exp)
  382. (funcall (intern (concat "sregex--"
  383. (symbol-name (car exp))))
  384. (cdr exp)
  385. combine))
  386. (t (error "Invalid expression: %s" exp))))
  387. (defun sregex--sequence (exps combine)
  388. (if (= (length exps) 1) (sregex--engine (car exps) combine)
  389. (let ((re (mapconcat
  390. (lambda (e) (sregex--engine e 'concat))
  391. exps "")))
  392. (if (eq combine 'suffix)
  393. (concat "\\(?:" re "\\)")
  394. re))))
  395. (defun sregex--or (exps combine)
  396. (if (= (length exps) 1) (sregex--engine (car exps) combine)
  397. (let ((re (mapconcat
  398. (lambda (e) (sregex--engine e 'or))
  399. exps "\\|")))
  400. (if (not (eq combine 'or))
  401. (concat "\\(?:" re "\\)")
  402. re))))
  403. (defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)"))
  404. (defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps))))
  405. (defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?"))
  406. (defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*"))
  407. (defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+"))
  408. (defun sregex--char (exps combine) (sregex--char-aux nil exps))
  409. (defun sregex--not-char (exps combine) (sregex--char-aux t exps))
  410. (defun sregex--syntax (exps combine) (format "\\s%c" (car exps)))
  411. (defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps)))
  412. (defun sregex--regex (exps combine)
  413. (if combine (concat "\\(?:" (car exps) "\\)") (car exps)))
  414. (defun sregex--repeat (exps combine)
  415. (let* ((min (or (pop exps) 0))
  416. (minstr (number-to-string min))
  417. (max (pop exps)))
  418. (concat (sregex--sequence exps 'suffix)
  419. (concat "\\{" minstr ","
  420. (when max (number-to-string max)) "\\}"))))
  421. (defun sregex--char-range (start end)
  422. (let ((startc (char-to-string start))
  423. (endc (char-to-string end)))
  424. (cond
  425. ((> end (+ start 2)) (concat startc "-" endc))
  426. ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc))
  427. ((> end start) (concat startc endc))
  428. (t startc))))
  429. (defun sregex--char-aux (complement args)
  430. ;; regex-opt does the same, we should join effort.
  431. (let ((chars (make-bool-vector 256 nil))) ; Yeah, right!
  432. (dolist (arg args)
  433. (cond ((integerp arg) (aset chars arg t))
  434. ((stringp arg) (mapc (lambda (c) (aset chars c t)) arg))
  435. ((consp arg)
  436. (let ((start (car arg))
  437. (end (cdr arg)))
  438. (when (> start end)
  439. (let ((tmp start)) (setq start end) (setq end tmp)))
  440. ;; now start <= end
  441. (let ((i start))
  442. (while (<= i end)
  443. (aset chars i t)
  444. (setq i (1+ i))))))))
  445. ;; now chars is a map of the characters in the class
  446. (let ((caret (aref chars ?^))
  447. (dash (aref chars ?-))
  448. (class (if (aref chars ?\]) "]" "")))
  449. (aset chars ?^ nil)
  450. (aset chars ?- nil)
  451. (aset chars ?\] nil)
  452. (let (start end)
  453. (dotimes (i 256)
  454. (if (aref chars i)
  455. (progn
  456. (unless start (setq start i))
  457. (setq end i)
  458. (aset chars i nil))
  459. (when start
  460. (setq class (concat class (sregex--char-range start end)))
  461. (setq start nil))))
  462. (if start
  463. (setq class (concat class (sregex--char-range start end)))))
  464. (if (> (length class) 0)
  465. (setq class (concat class (if caret "^") (if dash "-")))
  466. (setq class (concat class (if dash "-") (if caret "^"))))
  467. (if (and (not complement) (= (length class) 1))
  468. (regexp-quote class)
  469. (concat "[" (if complement "^") class "]")))))
  470. (provide 'sregex)
  471. ;;; sregex.el ends here