rng-cmpct.el 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941
  1. ;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas
  2. ;; Copyright (C) 2003, 2007-2012 Free Software Foundation, Inc.
  3. ;; Author: James Clark
  4. ;; Keywords: XML, RelaxNG
  5. ;; This file is part of GNU Emacs.
  6. ;; GNU Emacs is free software: you can redistribute it and/or modify
  7. ;; it under the terms of the GNU General Public License as published by
  8. ;; the Free Software Foundation, either version 3 of the License, or
  9. ;; (at your option) any later version.
  10. ;; GNU Emacs is distributed in the hope that it will be useful,
  11. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ;; GNU General Public License for more details.
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  16. ;;; Commentary:
  17. ;; This parses a RELAX NG Compact Syntax schema into the form
  18. ;; specified in rng-pttrn.el.
  19. ;;
  20. ;; RELAX NG Compact Syntax is specified by
  21. ;; http://relaxng.org/compact.html
  22. ;;
  23. ;; This file uses the prefix "rng-c-".
  24. ;;; Code:
  25. (require 'nxml-util)
  26. (require 'rng-util)
  27. (require 'rng-uri)
  28. (require 'rng-pttrn)
  29. ;;;###autoload
  30. (defun rng-c-load-schema (filename)
  31. "Load a schema in RELAX NG compact syntax from FILENAME.
  32. Return a pattern."
  33. (rng-c-parse-file filename))
  34. ;;; Error handling
  35. (put 'rng-c-incorrect-schema
  36. 'error-conditions
  37. '(error rng-error nxml-file-parse-error rng-c-incorrect-schema))
  38. (put 'rng-c-incorrect-schema
  39. 'error-message
  40. "Incorrect schema")
  41. (defun rng-c-signal-incorrect-schema (filename pos message)
  42. (nxml-signal-file-parse-error filename
  43. pos
  44. message
  45. 'rng-c-incorrect-schema))
  46. ;;; Lexing
  47. (defconst rng-c-keywords
  48. '("attribute"
  49. "default"
  50. "datatypes"
  51. "div"
  52. "element"
  53. "empty"
  54. "external"
  55. "grammar"
  56. "include"
  57. "inherit"
  58. "list"
  59. "mixed"
  60. "namespace"
  61. "notAllowed"
  62. "parent"
  63. "start"
  64. "string"
  65. "text"
  66. "token")
  67. "List of strings that are keywords in the compact syntax.")
  68. (defconst rng-c-anchored-keyword-re
  69. (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'")
  70. "Regular expression to match a keyword in the compact syntax.")
  71. (defvar rng-c-syntax-table nil
  72. "Syntax table for parsing the compact syntax.")
  73. (if rng-c-syntax-table
  74. ()
  75. (setq rng-c-syntax-table (make-syntax-table))
  76. (modify-syntax-entry ?# "<" rng-c-syntax-table)
  77. (modify-syntax-entry ?\n ">" rng-c-syntax-table)
  78. (modify-syntax-entry ?- "w" rng-c-syntax-table)
  79. (modify-syntax-entry ?. "w" rng-c-syntax-table)
  80. (modify-syntax-entry ?_ "w" rng-c-syntax-table)
  81. (modify-syntax-entry ?: "_" rng-c-syntax-table))
  82. (defconst rng-c-literal-1-re
  83. "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'"
  84. "Regular expression to match a single-quoted literal.")
  85. (defconst rng-c-literal-2-re
  86. (replace-regexp-in-string "'" "\"" rng-c-literal-1-re)
  87. "Regular expression to match a double-quoted literal.")
  88. (defconst rng-c-ncname-re "\\w+")
  89. (defconst rng-c-anchored-ncname-re
  90. (concat "\\`" rng-c-ncname-re "\\'"))
  91. (defconst rng-c-token-re
  92. (concat "[&|]=" "\\|"
  93. "[][()|&,*+?{}~=-]" "\\|"
  94. rng-c-literal-1-re "\\|"
  95. rng-c-literal-2-re "\\|"
  96. rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|"
  97. "\\\\" rng-c-ncname-re "\\|"
  98. ">>")
  99. "Regular expression to match a token in the compact syntax.")
  100. (defun rng-c-init-buffer ()
  101. (setq case-fold-search nil) ; automatically becomes buffer-local when set
  102. (set-buffer-multibyte t)
  103. (set-syntax-table rng-c-syntax-table))
  104. (defvar rng-c-current-token nil)
  105. (make-variable-buffer-local 'rng-c-current-token)
  106. (defun rng-c-advance ()
  107. (cond ((looking-at rng-c-token-re)
  108. (setq rng-c-current-token (match-string 0))
  109. (goto-char (match-end 0))
  110. (forward-comment (point-max)))
  111. ((= (point) (point-max))
  112. (setq rng-c-current-token ""))
  113. (t (rng-c-error "Invalid token"))))
  114. (defconst rng-c-anchored-datatype-name-re
  115. (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'"))
  116. (defsubst rng-c-current-token-keyword-p ()
  117. (string-match rng-c-anchored-keyword-re rng-c-current-token))
  118. (defsubst rng-c-current-token-prefixed-name-p ()
  119. (string-match rng-c-anchored-datatype-name-re rng-c-current-token))
  120. (defsubst rng-c-current-token-literal-p ()
  121. (string-match "\\`['\"]" rng-c-current-token))
  122. (defsubst rng-c-current-token-quoted-identifier-p ()
  123. (string-match "\\`\\\\" rng-c-current-token))
  124. (defsubst rng-c-current-token-ncname-p ()
  125. (string-match rng-c-anchored-ncname-re rng-c-current-token))
  126. (defsubst rng-c-current-token-ns-name-p ()
  127. (let ((len (length rng-c-current-token)))
  128. (and (> len 0)
  129. (= (aref rng-c-current-token (- len 1)) ?*))))
  130. ;;; Namespaces
  131. (defvar rng-c-inherit-namespace nil)
  132. (defvar rng-c-default-namespace nil)
  133. (defvar rng-c-default-namespace-declared nil)
  134. (defvar rng-c-namespace-decls nil
  135. "Alist of namespace declarations.")
  136. (defconst rng-c-no-namespace nil)
  137. (defun rng-c-declare-standard-namespaces ()
  138. (setq rng-c-namespace-decls
  139. (cons (cons "xml" nxml-xml-namespace-uri)
  140. rng-c-namespace-decls))
  141. (when (and (not rng-c-default-namespace-declared)
  142. rng-c-inherit-namespace)
  143. (setq rng-c-default-namespace rng-c-inherit-namespace)))
  144. (defun rng-c-expand-name (prefixed-name)
  145. (let ((i (string-match ":" prefixed-name)))
  146. (rng-make-name (rng-c-lookup-prefix (substring prefixed-name
  147. 0
  148. i))
  149. (substring prefixed-name (+ i 1)))))
  150. (defun rng-c-lookup-prefix (prefix)
  151. (let ((binding (assoc prefix rng-c-namespace-decls)))
  152. (or binding (rng-c-error "Undefined prefix %s" prefix))
  153. (cdr binding)))
  154. (defun rng-c-unqualified-namespace (attribute)
  155. (if attribute
  156. rng-c-no-namespace
  157. rng-c-default-namespace))
  158. (defun rng-c-make-context ()
  159. (cons rng-c-default-namespace rng-c-namespace-decls))
  160. ;;; Datatypes
  161. (defconst rng-string-datatype
  162. (rng-make-datatype rng-builtin-datatypes-uri "string"))
  163. (defconst rng-token-datatype
  164. (rng-make-datatype rng-builtin-datatypes-uri "token"))
  165. (defvar rng-c-datatype-decls nil
  166. "Alist of datatype declarations.
  167. Contains a list of pairs (PREFIX . URI) where PREFIX is a string
  168. and URI is a symbol.")
  169. (defun rng-c-declare-standard-datatypes ()
  170. (setq rng-c-datatype-decls
  171. (cons (cons "xsd" rng-xsd-datatypes-uri)
  172. rng-c-datatype-decls)))
  173. (defun rng-c-lookup-datatype-prefix (prefix)
  174. (let ((binding (assoc prefix rng-c-datatype-decls)))
  175. (or binding (rng-c-error "Undefined prefix %s" prefix))
  176. (cdr binding)))
  177. (defun rng-c-expand-datatype (prefixed-name)
  178. (let ((i (string-match ":" prefixed-name)))
  179. (rng-make-datatype
  180. (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i))
  181. (substring prefixed-name (+ i 1)))))
  182. ;;; Grammars
  183. (defvar rng-c-current-grammar nil)
  184. (defvar rng-c-parent-grammar nil)
  185. (defun rng-c-make-grammar ()
  186. (make-hash-table :test 'equal))
  187. (defconst rng-c-about-override-slot 0)
  188. (defconst rng-c-about-combine-slot 1)
  189. (defun rng-c-lookup-create (name grammar)
  190. "Return a def object for NAME.
  191. A def object is a pair \(ABOUT . REF) where REF is returned by
  192. `rng-make-ref'.
  193. ABOUT is a two-element vector [OVERRIDE COMBINE].
  194. COMBINE is either nil, choice or interleave.
  195. OVERRIDE is either nil, require or t."
  196. (let ((def (gethash name grammar)))
  197. (if def
  198. def
  199. (progn
  200. (setq def (cons (vector nil nil) (rng-make-ref name)))
  201. (puthash name def grammar)
  202. def))))
  203. (defun rng-c-make-ref (name)
  204. (or rng-c-current-grammar
  205. (rng-c-error "Reference not in a grammar"))
  206. (cdr (rng-c-lookup-create name rng-c-current-grammar)))
  207. (defun rng-c-make-parent-ref (name)
  208. (or rng-c-parent-grammar
  209. (rng-c-error "Reference to non-existent parent grammar"))
  210. (cdr (rng-c-lookup-create name rng-c-parent-grammar)))
  211. (defvar rng-c-overrides nil
  212. "Contains a list of (NAME . DEF) pairs.")
  213. (defun rng-c-merge-combine (def combine name)
  214. (let* ((about (car def))
  215. (current-combine (aref about rng-c-about-combine-slot)))
  216. (if combine
  217. (if current-combine
  218. (or (eq combine current-combine)
  219. (rng-c-error "Inconsistent combine for %s" name))
  220. (aset about rng-c-about-combine-slot combine))
  221. current-combine)))
  222. (defun rng-c-prepare-define (name combine in-include)
  223. (let* ((def (rng-c-lookup-create name rng-c-current-grammar))
  224. (about (car def))
  225. (overridden (aref about rng-c-about-override-slot)))
  226. (and in-include
  227. (setq rng-c-overrides (cons (cons name def) rng-c-overrides)))
  228. (cond (overridden (and (eq overridden 'require)
  229. (aset about rng-c-about-override-slot t))
  230. nil)
  231. (t (setq combine (rng-c-merge-combine def combine name))
  232. (and (rng-ref-get (cdr def))
  233. (not combine)
  234. (rng-c-error "Duplicate definition of %s" name))
  235. def))))
  236. (defun rng-c-start-include (overrides)
  237. (mapcar (lambda (name-def)
  238. (let* ((def (cdr name-def))
  239. (about (car def))
  240. (save (aref about rng-c-about-override-slot)))
  241. (aset about rng-c-about-override-slot 'require)
  242. (cons save name-def)))
  243. overrides))
  244. (defun rng-c-end-include (overrides)
  245. (mapcar (lambda (o)
  246. (let* ((saved (car o))
  247. (name-def (cdr o))
  248. (name (car name-def))
  249. (def (cdr name-def))
  250. (about (car def)))
  251. (and (eq (aref about rng-c-about-override-slot) 'require)
  252. (rng-c-error "Definition of %s in include did not override definition in included file" name))
  253. (aset about rng-c-about-override-slot saved)))
  254. overrides))
  255. (defun rng-c-define (def value)
  256. (and def
  257. (let ((current-value (rng-ref-get (cdr def))))
  258. (rng-ref-set (cdr def)
  259. (if current-value
  260. (if (eq (aref (car def) rng-c-about-combine-slot)
  261. 'choice)
  262. (rng-make-choice (list current-value value))
  263. (rng-make-interleave (list current-value value)))
  264. value)))))
  265. (defun rng-c-finish-grammar ()
  266. (maphash (lambda (key def)
  267. (or (rng-ref-get (cdr def))
  268. (rng-c-error "Reference to undefined pattern %s" key)))
  269. rng-c-current-grammar)
  270. (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar)
  271. (rng-c-error "No definition of start")))))
  272. ;;; Parsing
  273. (defvar rng-c-escape-positions nil)
  274. (make-variable-buffer-local 'rng-c-escape-positions)
  275. (defvar rng-c-file-name nil)
  276. (make-variable-buffer-local 'rng-c-file-name)
  277. (defvar rng-c-file-index nil)
  278. (defun rng-c-parse-file (filename &optional context)
  279. (with-current-buffer (get-buffer-create (rng-c-buffer-name context))
  280. (erase-buffer)
  281. (rng-c-init-buffer)
  282. (setq rng-c-file-name
  283. (car (insert-file-contents filename)))
  284. (setq rng-c-escape-positions nil)
  285. (rng-c-process-escapes)
  286. (rng-c-parse-top-level context)))
  287. (defun rng-c-buffer-name (context)
  288. (concat " *RNC Input"
  289. (if context
  290. (concat "<"
  291. (number-to-string (setq rng-c-file-index
  292. (1+ rng-c-file-index)))
  293. ">*")
  294. (setq rng-c-file-index 1)
  295. "*")))
  296. (defun rng-c-process-escapes ()
  297. ;; Check for any nuls, since we will use nul chars
  298. ;; for internal purposes.
  299. (let ((pos (search-forward "\C-@" nil t)))
  300. (and pos
  301. (rng-c-error "Nul character found (binary file?)")))
  302. (let ((offset 0))
  303. (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}"
  304. (point-max)
  305. t)
  306. (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16))))
  307. (if (and ch (> ch 0))
  308. (let ((begin (match-beginning 0))
  309. (end (match-end 0)))
  310. (delete-region begin end)
  311. ;; Represent an escaped newline by nul, so
  312. ;; that we can distinguish it from a literal newline.
  313. ;; We will translate it back into a real newline later.
  314. (insert (if (eq ch ?\n) 0 ch))
  315. (setq offset (+ offset (- end begin 1)))
  316. (setq rng-c-escape-positions
  317. (cons (cons (point) offset)
  318. rng-c-escape-positions)))
  319. (rng-c-error "Invalid character escape")))))
  320. (goto-char 1))
  321. (defun rng-c-translate-position (pos)
  322. (let ((tem rng-c-escape-positions))
  323. (while (and tem
  324. (> (caar tem) pos))
  325. (setq tem (cdr tem)))
  326. (if tem
  327. (+ pos (cdar tem))
  328. pos)))
  329. (defun rng-c-error (&rest args)
  330. (rng-c-signal-incorrect-schema rng-c-file-name
  331. (rng-c-translate-position (point))
  332. (apply 'format args)))
  333. (defun rng-c-parse-top-level (context)
  334. (let ((rng-c-namespace-decls nil)
  335. (rng-c-default-namespace nil)
  336. (rng-c-datatype-decls nil))
  337. (goto-char (point-min))
  338. (forward-comment (point-max))
  339. (rng-c-advance)
  340. (rng-c-parse-decls)
  341. (let ((p (if (eq context 'include)
  342. (if (rng-c-implicit-grammar-p)
  343. (rng-c-parse-grammar-body "")
  344. (rng-c-parse-included-grammar))
  345. (if (rng-c-implicit-grammar-p)
  346. (rng-c-parse-implicit-grammar)
  347. (rng-c-parse-pattern)))))
  348. (or (string-equal rng-c-current-token "")
  349. (rng-c-error "Unexpected characters after pattern"))
  350. p)))
  351. (defun rng-c-parse-included-grammar ()
  352. (or (string-equal rng-c-current-token "grammar")
  353. (rng-c-error "Included schema is not a grammar"))
  354. (rng-c-advance)
  355. (rng-c-expect "{")
  356. (rng-c-parse-grammar-body "}"))
  357. (defun rng-c-implicit-grammar-p ()
  358. (or (and (or (rng-c-current-token-prefixed-name-p)
  359. (rng-c-current-token-quoted-identifier-p)
  360. (and (rng-c-current-token-ncname-p)
  361. (not (rng-c-current-token-keyword-p))))
  362. (looking-at "\\["))
  363. (and (string-equal rng-c-current-token "[")
  364. (rng-c-parse-lead-annotation)
  365. nil)
  366. (member rng-c-current-token '("div" "include" ""))
  367. (looking-at "[|&]?=")))
  368. (defun rng-c-parse-decls ()
  369. (setq rng-c-default-namespace-declared nil)
  370. (while (progn
  371. (let ((binding
  372. (assoc rng-c-current-token
  373. '(("namespace" . rng-c-parse-namespace)
  374. ("datatypes" . rng-c-parse-datatypes)
  375. ("default" . rng-c-parse-default)))))
  376. (if binding
  377. (progn
  378. (rng-c-advance)
  379. (funcall (cdr binding))
  380. t)
  381. nil))))
  382. (rng-c-declare-standard-datatypes)
  383. (rng-c-declare-standard-namespaces))
  384. (defun rng-c-parse-datatypes ()
  385. (let ((prefix (rng-c-parse-identifier-or-keyword)))
  386. (or (not (assoc prefix rng-c-datatype-decls))
  387. (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix))
  388. (rng-c-expect "=")
  389. (setq rng-c-datatype-decls
  390. (cons (cons prefix
  391. (rng-make-datatypes-uri (rng-c-parse-literal)))
  392. rng-c-datatype-decls))))
  393. (defun rng-c-parse-namespace ()
  394. (rng-c-declare-namespace nil
  395. (rng-c-parse-identifier-or-keyword)))
  396. (defun rng-c-parse-default ()
  397. (rng-c-expect "namespace")
  398. (rng-c-declare-namespace t
  399. (if (string-equal rng-c-current-token "=")
  400. nil
  401. (rng-c-parse-identifier-or-keyword))))
  402. (defun rng-c-declare-namespace (declare-default prefix)
  403. (rng-c-expect "=")
  404. (let ((ns (cond ((string-equal rng-c-current-token "inherit")
  405. (rng-c-advance)
  406. rng-c-inherit-namespace)
  407. (t
  408. (nxml-make-namespace (rng-c-parse-literal))))))
  409. (and prefix
  410. (or (not (assoc prefix rng-c-namespace-decls))
  411. (rng-c-error "Duplicate namespace declaration for prefix %s"
  412. prefix))
  413. (setq rng-c-namespace-decls
  414. (cons (cons prefix ns) rng-c-namespace-decls)))
  415. (and declare-default
  416. (or (not rng-c-default-namespace-declared)
  417. (rng-c-error "Duplicate default namespace declaration"))
  418. (setq rng-c-default-namespace-declared t)
  419. (setq rng-c-default-namespace ns))))
  420. (defun rng-c-parse-implicit-grammar ()
  421. (let* ((rng-c-parent-grammar rng-c-current-grammar)
  422. (rng-c-current-grammar (rng-c-make-grammar)))
  423. (rng-c-parse-grammar-body "")
  424. (rng-c-finish-grammar)))
  425. (defun rng-c-parse-grammar-body (close-token &optional in-include)
  426. (while (not (string-equal rng-c-current-token close-token))
  427. (cond ((rng-c-current-token-keyword-p)
  428. (let ((kw (intern rng-c-current-token)))
  429. (cond ((eq kw 'start)
  430. (rng-c-parse-define 'start in-include))
  431. ((eq kw 'div)
  432. (rng-c-advance)
  433. (rng-c-parse-div in-include))
  434. ((eq kw 'include)
  435. (and in-include
  436. (rng-c-error "Nested include"))
  437. (rng-c-advance)
  438. (rng-c-parse-include))
  439. (t (rng-c-error "Invalid grammar keyword")))))
  440. ((rng-c-current-token-ncname-p)
  441. (if (looking-at "\\[")
  442. (rng-c-parse-annotation-element)
  443. (rng-c-parse-define rng-c-current-token
  444. in-include)))
  445. ((rng-c-current-token-quoted-identifier-p)
  446. (if (looking-at "\\[")
  447. (rng-c-parse-annotation-element)
  448. (rng-c-parse-define (substring rng-c-current-token 1)
  449. in-include)))
  450. ((rng-c-current-token-prefixed-name-p)
  451. (rng-c-parse-annotation-element))
  452. ((string-equal rng-c-current-token "[")
  453. (rng-c-parse-lead-annotation)
  454. (and (string-equal rng-c-current-token close-token)
  455. (rng-c-error "Missing annotation subject"))
  456. (and (looking-at "\\[")
  457. (rng-c-error "Leading annotation applied to annotation")))
  458. (t (rng-c-error "Invalid grammar content"))))
  459. (or (string-equal rng-c-current-token "")
  460. (rng-c-advance)))
  461. (defun rng-c-parse-div (in-include)
  462. (rng-c-expect "{")
  463. (rng-c-parse-grammar-body "}" in-include))
  464. (defun rng-c-parse-include ()
  465. (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
  466. (rng-c-inherit-namespace (rng-c-parse-opt-inherit))
  467. overrides)
  468. (cond ((string-equal rng-c-current-token "{")
  469. (rng-c-advance)
  470. (let ((rng-c-overrides nil))
  471. (rng-c-parse-grammar-body "}" t)
  472. (setq overrides rng-c-overrides))
  473. (setq overrides (rng-c-start-include overrides))
  474. (rng-c-parse-file filename 'include)
  475. (rng-c-end-include overrides))
  476. (t (rng-c-parse-file filename 'include)))))
  477. (defun rng-c-parse-define (name in-include)
  478. (rng-c-advance)
  479. (let ((assign (assoc rng-c-current-token
  480. '(("=" . nil)
  481. ("|=" . choice)
  482. ("&=" . interleave)))))
  483. (or assign
  484. (rng-c-error "Expected assignment operator"))
  485. (rng-c-advance)
  486. (let ((ref (rng-c-prepare-define name (cdr assign) in-include)))
  487. (rng-c-define ref (rng-c-parse-pattern)))))
  488. (defvar rng-c-had-except nil)
  489. (defun rng-c-parse-pattern ()
  490. (let* ((rng-c-had-except nil)
  491. (p (rng-c-parse-repeated))
  492. (op (assoc rng-c-current-token
  493. '(("|" . rng-make-choice)
  494. ("," . rng-make-group)
  495. ("&" . rng-make-interleave)))))
  496. (if op
  497. (if rng-c-had-except
  498. (rng-c-error "Parentheses required around pattern using -")
  499. (let* ((patterns (cons p nil))
  500. (tail patterns)
  501. (connector rng-c-current-token))
  502. (while (progn
  503. (rng-c-advance)
  504. (let ((newcdr (cons (rng-c-parse-repeated) nil)))
  505. (setcdr tail newcdr)
  506. (setq tail newcdr))
  507. (string-equal rng-c-current-token connector)))
  508. (funcall (cdr op) patterns)))
  509. p)))
  510. (defun rng-c-parse-repeated ()
  511. (let ((p (rng-c-parse-follow-annotations
  512. (rng-c-parse-primary)))
  513. (op (assoc rng-c-current-token
  514. '(("*" . rng-make-zero-or-more)
  515. ("+" . rng-make-one-or-more)
  516. ("?" . rng-make-optional)))))
  517. (if op
  518. (if rng-c-had-except
  519. (rng-c-error "Parentheses required around pattern using -")
  520. (rng-c-parse-follow-annotations
  521. (progn
  522. (rng-c-advance)
  523. (funcall (cdr op) p))))
  524. p)))
  525. (defun rng-c-parse-primary ()
  526. "Parse a primary expression.
  527. The current token must be the first token of the expression.
  528. After parsing the current token should be the token following
  529. the primary expression."
  530. (cond ((rng-c-current-token-keyword-p)
  531. (let ((parse-function (get (intern rng-c-current-token)
  532. 'rng-c-pattern)))
  533. (or parse-function
  534. (rng-c-error "Keyword %s does not introduce a pattern"
  535. rng-c-current-token))
  536. (rng-c-advance)
  537. (funcall parse-function)))
  538. ((rng-c-current-token-ncname-p)
  539. (rng-c-advance-with (rng-c-make-ref rng-c-current-token)))
  540. ((string-equal rng-c-current-token "(")
  541. (rng-c-advance)
  542. (let ((p (rng-c-parse-pattern)))
  543. (rng-c-expect ")")
  544. p))
  545. ((rng-c-current-token-prefixed-name-p)
  546. (let ((name (rng-c-expand-datatype rng-c-current-token)))
  547. (rng-c-advance)
  548. (rng-c-parse-data name)))
  549. ((rng-c-current-token-literal-p)
  550. (rng-make-value rng-token-datatype (rng-c-parse-literal) nil))
  551. ((rng-c-current-token-quoted-identifier-p)
  552. (rng-c-advance-with
  553. (rng-c-make-ref (substring rng-c-current-token 1))))
  554. ((string-equal rng-c-current-token "[")
  555. (rng-c-parse-lead-annotation)
  556. (rng-c-parse-primary))
  557. (t (rng-c-error "Invalid pattern"))))
  558. (defun rng-c-parse-parent ()
  559. (and (rng-c-current-token-keyword-p)
  560. (rng-c-error "Keyword following parent was not quoted"
  561. rng-c-current-token))
  562. (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword)))
  563. (defun rng-c-parse-literal ()
  564. (rng-c-fix-escaped-newlines
  565. (apply 'concat (rng-c-parse-literal-segments))))
  566. (defun rng-c-parse-literal-segments ()
  567. (let ((str (rng-c-parse-literal-segment)))
  568. (cons str
  569. (cond ((string-equal rng-c-current-token "~")
  570. (rng-c-advance)
  571. (rng-c-parse-literal-segments))
  572. (t nil)))))
  573. (defun rng-c-parse-literal-segment ()
  574. (or (rng-c-current-token-literal-p)
  575. (rng-c-error "Expected a literal"))
  576. (rng-c-advance-with
  577. (let ((n (if (and (>= (length rng-c-current-token) 6)
  578. (eq (aref rng-c-current-token 0)
  579. (aref rng-c-current-token 1)))
  580. 3
  581. 1)))
  582. (substring rng-c-current-token n (- n)))))
  583. (defun rng-c-fix-escaped-newlines (str)
  584. (let ((pos 0))
  585. (while (progn
  586. (let ((n (string-match "\C-@" str pos)))
  587. (and n
  588. (aset str n ?\n)
  589. (setq pos (1+ n)))))))
  590. str)
  591. (defun rng-c-parse-identifier-or-keyword ()
  592. (cond ((rng-c-current-token-ncname-p)
  593. (rng-c-advance-with rng-c-current-token))
  594. ((rng-c-current-token-quoted-identifier-p)
  595. (rng-c-advance-with (substring rng-c-current-token 1)))
  596. (t (rng-c-error "Expected identifier or keyword"))))
  597. (put 'string 'rng-c-pattern 'rng-c-parse-string)
  598. (put 'token 'rng-c-pattern 'rng-c-parse-token)
  599. (put 'element 'rng-c-pattern 'rng-c-parse-element)
  600. (put 'attribute 'rng-c-pattern 'rng-c-parse-attribute)
  601. (put 'list 'rng-c-pattern 'rng-c-parse-list)
  602. (put 'mixed 'rng-c-pattern 'rng-c-parse-mixed)
  603. (put 'text 'rng-c-pattern 'rng-c-parse-text)
  604. (put 'empty 'rng-c-pattern 'rng-c-parse-empty)
  605. (put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed)
  606. (put 'grammar 'rng-c-pattern 'rng-c-parse-grammar)
  607. (put 'parent 'rng-c-pattern 'rng-c-parse-parent)
  608. (put 'external 'rng-c-pattern 'rng-c-parse-external)
  609. (defun rng-c-parse-element ()
  610. (let ((name-class (rng-c-parse-name-class nil)))
  611. (rng-c-expect "{")
  612. (let ((pattern (rng-c-parse-pattern)))
  613. (rng-c-expect "}")
  614. (rng-make-element name-class pattern))))
  615. (defun rng-c-parse-attribute ()
  616. (let ((name-class (rng-c-parse-name-class 'attribute)))
  617. (rng-c-expect "{")
  618. (let ((pattern (rng-c-parse-pattern)))
  619. (rng-c-expect "}")
  620. (rng-make-attribute name-class pattern))))
  621. (defun rng-c-parse-name-class (attribute)
  622. (let* ((rng-c-had-except nil)
  623. (name-class
  624. (rng-c-parse-follow-annotations
  625. (rng-c-parse-primary-name-class attribute))))
  626. (if (string-equal rng-c-current-token "|")
  627. (let* ((name-classes (cons name-class nil))
  628. (tail name-classes))
  629. (or (not rng-c-had-except)
  630. (rng-c-error "Parentheses required around name-class using - operator"))
  631. (while (progn
  632. (rng-c-advance)
  633. (let ((newcdr
  634. (cons (rng-c-parse-follow-annotations
  635. (rng-c-parse-primary-name-class attribute))
  636. nil)))
  637. (setcdr tail newcdr)
  638. (setq tail newcdr))
  639. (string-equal rng-c-current-token "|")))
  640. (rng-make-choice-name-class name-classes))
  641. name-class)))
  642. (defun rng-c-parse-primary-name-class (attribute)
  643. (cond ((rng-c-current-token-ncname-p)
  644. (rng-c-advance-with
  645. (rng-make-name-name-class
  646. (rng-make-name (rng-c-unqualified-namespace attribute)
  647. rng-c-current-token))))
  648. ((rng-c-current-token-prefixed-name-p)
  649. (rng-c-advance-with
  650. (rng-make-name-name-class
  651. (rng-c-expand-name rng-c-current-token))))
  652. ((string-equal rng-c-current-token "*")
  653. (let ((except (rng-c-parse-opt-except-name-class attribute)))
  654. (if except
  655. (rng-make-any-name-except-name-class except)
  656. (rng-make-any-name-name-class))))
  657. ((rng-c-current-token-ns-name-p)
  658. (let* ((ns
  659. (rng-c-lookup-prefix (substring rng-c-current-token
  660. 0
  661. -2)))
  662. (except (rng-c-parse-opt-except-name-class attribute)))
  663. (if except
  664. (rng-make-ns-name-except-name-class ns except)
  665. (rng-make-ns-name-name-class ns))))
  666. ((string-equal rng-c-current-token "(")
  667. (rng-c-advance)
  668. (let ((name-class (rng-c-parse-name-class attribute)))
  669. (rng-c-expect ")")
  670. name-class))
  671. ((rng-c-current-token-quoted-identifier-p)
  672. (rng-c-advance-with
  673. (rng-make-name-name-class
  674. (rng-make-name (rng-c-unqualified-namespace attribute)
  675. (substring rng-c-current-token 1)))))
  676. ((string-equal rng-c-current-token "[")
  677. (rng-c-parse-lead-annotation)
  678. (rng-c-parse-primary-name-class attribute))
  679. (t (rng-c-error "Bad name class"))))
  680. (defun rng-c-parse-opt-except-name-class (attribute)
  681. (rng-c-advance)
  682. (and (string-equal rng-c-current-token "-")
  683. (or (not rng-c-had-except)
  684. (rng-c-error "Parentheses required around name-class using - operator"))
  685. (setq rng-c-had-except t)
  686. (progn
  687. (rng-c-advance)
  688. (rng-c-parse-primary-name-class attribute))))
  689. (defun rng-c-parse-mixed ()
  690. (rng-c-expect "{")
  691. (let ((pattern (rng-make-mixed (rng-c-parse-pattern))))
  692. (rng-c-expect "}")
  693. pattern))
  694. (defun rng-c-parse-list ()
  695. (rng-c-expect "{")
  696. (let ((pattern (rng-make-list (rng-c-parse-pattern))))
  697. (rng-c-expect "}")
  698. pattern))
  699. (defun rng-c-parse-text ()
  700. (rng-make-text))
  701. (defun rng-c-parse-empty ()
  702. (rng-make-empty))
  703. (defun rng-c-parse-not-allowed ()
  704. (rng-make-not-allowed))
  705. (defun rng-c-parse-string ()
  706. (rng-c-parse-data rng-string-datatype))
  707. (defun rng-c-parse-token ()
  708. (rng-c-parse-data rng-token-datatype))
  709. (defun rng-c-parse-data (name)
  710. (if (rng-c-current-token-literal-p)
  711. (rng-make-value name
  712. (rng-c-parse-literal)
  713. (and (car name)
  714. (rng-c-make-context)))
  715. (let ((params (rng-c-parse-optional-params)))
  716. (if (string-equal rng-c-current-token "-")
  717. (progn
  718. (if rng-c-had-except
  719. (rng-c-error "Parentheses required around pattern using -")
  720. (setq rng-c-had-except t))
  721. (rng-c-advance)
  722. (rng-make-data-except name
  723. params
  724. (rng-c-parse-primary)))
  725. (rng-make-data name params)))))
  726. (defun rng-c-parse-optional-params ()
  727. (and (string-equal rng-c-current-token "{")
  728. (let* ((head (cons nil nil))
  729. (tail head))
  730. (rng-c-advance)
  731. (while (not (string-equal rng-c-current-token "}"))
  732. (and (string-equal rng-c-current-token "[")
  733. (rng-c-parse-lead-annotation))
  734. (let ((name (rng-c-parse-identifier-or-keyword)))
  735. (rng-c-expect "=")
  736. (let ((newcdr (cons (cons (intern name)
  737. (rng-c-parse-literal))
  738. nil)))
  739. (setcdr tail newcdr)
  740. (setq tail newcdr))))
  741. (rng-c-advance)
  742. (cdr head))))
  743. (defun rng-c-parse-external ()
  744. (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
  745. (rng-c-inherit-namespace (rng-c-parse-opt-inherit)))
  746. (rng-c-parse-file filename 'external)))
  747. (defun rng-c-expand-file (uri)
  748. (condition-case err
  749. (rng-uri-file-name (rng-uri-resolve uri
  750. (rng-file-name-uri rng-c-file-name)))
  751. (rng-uri-error
  752. (rng-c-error (cadr err)))))
  753. (defun rng-c-parse-opt-inherit ()
  754. (cond ((string-equal rng-c-current-token "inherit")
  755. (rng-c-advance)
  756. (rng-c-expect "=")
  757. (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword)))
  758. (t rng-c-default-namespace)))
  759. (defun rng-c-parse-grammar ()
  760. (rng-c-expect "{")
  761. (let* ((rng-c-parent-grammar rng-c-current-grammar)
  762. (rng-c-current-grammar (rng-c-make-grammar)))
  763. (rng-c-parse-grammar-body "}")
  764. (rng-c-finish-grammar)))
  765. (defun rng-c-parse-lead-annotation ()
  766. (rng-c-parse-annotation-body)
  767. (and (string-equal rng-c-current-token "[")
  768. (rng-c-error "Multiple leading annotations")))
  769. (defun rng-c-parse-follow-annotations (obj)
  770. (while (string-equal rng-c-current-token ">>")
  771. (rng-c-advance)
  772. (if (rng-c-current-token-prefixed-name-p)
  773. (rng-c-advance)
  774. (rng-c-parse-identifier-or-keyword))
  775. (rng-c-parse-annotation-body t))
  776. obj)
  777. (defun rng-c-parse-annotation-element ()
  778. (rng-c-advance)
  779. (rng-c-parse-annotation-body t))
  780. ;; XXX need stricter checking of attribute names
  781. ;; XXX don't allow attributes after text
  782. (defun rng-c-parse-annotation-body (&optional allow-text)
  783. "Current token is [. Parse up to matching ].
  784. Current token after parse is token following ]."
  785. (or (string-equal rng-c-current-token "[")
  786. (rng-c-error "Expected ["))
  787. (rng-c-advance)
  788. (while (not (string-equal rng-c-current-token "]"))
  789. (cond ((rng-c-current-token-literal-p)
  790. (or allow-text
  791. (rng-c-error "Out of place text within annotation"))
  792. (rng-c-parse-literal))
  793. (t
  794. (if (rng-c-current-token-prefixed-name-p)
  795. (rng-c-advance)
  796. (rng-c-parse-identifier-or-keyword))
  797. (cond ((string-equal rng-c-current-token "[")
  798. (rng-c-parse-annotation-body t))
  799. ((string-equal rng-c-current-token "=")
  800. (rng-c-advance)
  801. (rng-c-parse-literal))
  802. (t (rng-c-error "Expected = or ["))))))
  803. (rng-c-advance))
  804. (defun rng-c-advance-with (pattern)
  805. (rng-c-advance)
  806. pattern)
  807. (defun rng-c-expect (str)
  808. (or (string-equal rng-c-current-token str)
  809. (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token))
  810. (rng-c-advance))
  811. (provide 'rng-cmpct)
  812. ;;; rng-cmpct.el