wisent.el 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. ;;; semantic/wisent.el --- Wisent - Semantic gateway
  2. ;; Copyright (C) 2001-2007, 2009-2012 Free Software Foundation, Inc.
  3. ;; Author: David Ponce <david@dponce.com>
  4. ;; Maintainer: David Ponce <david@dponce.com>
  5. ;; Created: 30 Aug 2001
  6. ;; Keywords: syntax
  7. ;; This file is part of GNU Emacs.
  8. ;; GNU Emacs is free software: you can redistribute it and/or modify
  9. ;; it under the terms of the GNU General Public License as published by
  10. ;; the Free Software Foundation, either version 3 of the License, or
  11. ;; (at your option) any later version.
  12. ;; GNU Emacs is distributed in the hope that it will be useful,
  13. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;; GNU General Public License for more details.
  16. ;; You should have received a copy of the GNU General Public License
  17. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  18. ;;; Commentary:
  19. ;;
  20. ;; Here are functions necessary to use the Wisent LALR parser from
  21. ;; Semantic environment.
  22. ;;; History:
  23. ;;
  24. ;;; Code:
  25. (require 'semantic)
  26. (require 'semantic/wisent/wisent)
  27. ;;; Lexical analysis
  28. ;;
  29. (defvar wisent-lex-istream nil
  30. "Input stream of `semantic-lex' syntactic tokens.")
  31. (defvar wisent-lex-lookahead nil
  32. "Extra lookahead token.
  33. When non-nil it is directly returned by `wisent-lex-function'.")
  34. ;; Maintain this alias for compatibility until all WY grammars have
  35. ;; been translated again to Elisp code.
  36. (semantic-alias-obsolete 'wisent-lex-make-token-table
  37. 'semantic-lex-make-type-table "23.2")
  38. (defmacro wisent-lex-eoi ()
  39. "Return an End-Of-Input lexical token.
  40. The EOI token is like this: ($EOI \"\" POINT-MAX . POINT-MAX)."
  41. `(cons ',wisent-eoi-term
  42. (cons ""
  43. (cons (point-max) (point-max)))))
  44. (defmacro define-wisent-lexer (name doc &rest body)
  45. "Create a new lexical analyzer with NAME.
  46. DOC is a documentation string describing this analyzer.
  47. When a token is available in `wisent-lex-istream', eval BODY forms
  48. sequentially. BODY must return a lexical token for the LALR parser.
  49. Each token in input was produced by `semantic-lex', it is a list:
  50. (TOKSYM START . END)
  51. TOKSYM is a terminal symbol used in the grammar.
  52. START and END mark boundary in the current buffer of that token's
  53. value.
  54. Returned tokens must have the form:
  55. (TOKSYM VALUE START . END)
  56. where VALUE is the buffer substring between START and END positions."
  57. `(defun
  58. ,name () ,doc
  59. (cond
  60. (wisent-lex-lookahead
  61. (prog1 wisent-lex-lookahead
  62. (setq wisent-lex-lookahead nil)))
  63. (wisent-lex-istream
  64. ,@body)
  65. ((wisent-lex-eoi)))))
  66. (define-wisent-lexer wisent-lex
  67. "Return the next available lexical token in Wisent's form.
  68. The variable `wisent-lex-istream' contains the list of lexical tokens
  69. produced by `semantic-lex'. Pop the next token available and convert
  70. it to a form suitable for the Wisent's parser."
  71. (let* ((tk (car wisent-lex-istream)))
  72. ;; Eat input stream
  73. (setq wisent-lex-istream (cdr wisent-lex-istream))
  74. (cons (semantic-lex-token-class tk)
  75. (cons (semantic-lex-token-text tk)
  76. (semantic-lex-token-bounds tk)))))
  77. ;;; Syntax analysis
  78. ;;
  79. (defvar wisent-error-function nil
  80. "Function used to report parse error.
  81. By default use the function `wisent-message'.")
  82. (make-variable-buffer-local 'wisent-error-function)
  83. (defvar wisent-lexer-function 'wisent-lex
  84. "Function used to obtain the next lexical token in input.
  85. Should be a lexical analyzer created with `define-wisent-lexer'.")
  86. (make-variable-buffer-local 'wisent-lexer-function)
  87. ;; Tag production
  88. ;;
  89. (defsubst wisent-raw-tag (semantic-tag)
  90. "Return raw form of given Semantic tag SEMANTIC-TAG.
  91. Should be used in semantic actions, in grammars, to build a Semantic
  92. parse tree."
  93. (nconc semantic-tag
  94. (if (or $region
  95. (setq $region (nthcdr 2 wisent-input)))
  96. (list (car $region) (cdr $region))
  97. (list (point-max) (point-max)))))
  98. (defsubst wisent-cook-tag (raw-tag)
  99. "From raw form of Semantic tag RAW-TAG, return a list of cooked tags.
  100. Should be used in semantic actions, in grammars, to build a Semantic
  101. parse tree."
  102. (let* ((cooked (semantic--tag-expand raw-tag))
  103. (l cooked))
  104. (while l
  105. (semantic--tag-put-property (car l) 'reparse-symbol $nterm)
  106. (setq l (cdr l)))
  107. cooked))
  108. ;; Unmatched syntax collector
  109. ;;
  110. (defun wisent-collect-unmatched-syntax (nomatch)
  111. "Add lexical token NOMATCH to the cache of unmatched tokens.
  112. See also the variable `semantic-unmatched-syntax-cache'.
  113. NOMATCH is in Wisent's form: (SYMBOL VALUE START . END)
  114. and will be collected in `semantic-lex' form: (SYMBOL START . END)."
  115. (let ((region (cddr nomatch)))
  116. (and (number-or-marker-p (car region))
  117. (number-or-marker-p (cdr region))
  118. (setq semantic-unmatched-syntax-cache
  119. (cons (cons (car nomatch) region)
  120. semantic-unmatched-syntax-cache)))))
  121. ;; Parser plug-ins
  122. ;;
  123. ;; The following functions permit to plug the Wisent LALR parser in
  124. ;; Semantic toolkit. They use the standard API provided by Semantic
  125. ;; to plug parsers in.
  126. ;;
  127. ;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME:
  128. ;;
  129. ;; - `wisent-parse-stream' designed to override the standard function
  130. ;; `semantic-parse-stream'.
  131. ;;
  132. ;; - `wisent-parse-region' designed to override the standard function
  133. ;; `semantic-parse-region'.
  134. ;;
  135. ;; Maybe the latter is faster because it eliminates a lot of function
  136. ;; call.
  137. ;;
  138. (defun wisent-parse-stream (stream goal)
  139. "Parse STREAM using the Wisent LALR parser.
  140. GOAL is a nonterminal symbol to start parsing at.
  141. Return the list (STREAM SEMANTIC-STREAM) where STREAM are those
  142. elements of STREAM that have not been used. SEMANTIC-STREAM is the
  143. list of semantic tags found.
  144. The LALR parser automaton must be available in buffer local variable
  145. `semantic--parse-table'.
  146. Must be installed by `semantic-install-function-overrides' to override
  147. the standard function `semantic-parse-stream'."
  148. (let (wisent-lex-istream wisent-lex-lookahead la-elt cache)
  149. ;; IMPLEMENTATION NOTES:
  150. ;; `wisent-parse' returns a lookahead token when it stopped
  151. ;; parsing before encountering the end of input. To re-enter the
  152. ;; parser it is necessary to push back in the lexical input stream
  153. ;; the last lookahead token issued. Because the format of
  154. ;; lookahead tokens and tokens in STREAM can be different the
  155. ;; lookahead token is put in the variable `wisent-lex-lookahead'
  156. ;; before calling `wisent-parse'. Wisent's lexers always pop the
  157. ;; next lexical token from that variable when non nil, then from
  158. ;; the lexical input stream.
  159. ;;
  160. ;; The first element of STREAM is used to keep lookahead tokens
  161. ;; across successive calls to `wisent-parse-stream'. In fact
  162. ;; what is kept is a stack of lookaheads encountered so far. It
  163. ;; is cleared when `wisent-parse' returns a valid semantic tag,
  164. ;; or twice the same lookahead token! The latter indicates that
  165. ;; there is a syntax error on that token. If so, tokens currently
  166. ;; in the lookahead stack have not been used, and are moved into
  167. ;; `semantic-unmatched-syntax-cache'. When the parser will be
  168. ;; re-entered, a new lexical token will be read from STREAM.
  169. ;;
  170. ;; The first element of STREAM that contains the lookahead stack
  171. ;; has this format (compatible with the format of `semantic-lex'
  172. ;; tokens):
  173. ;;
  174. ;; (LOOKAHEAD-STACK START . END)
  175. ;;
  176. ;; where LOOKAHEAD-STACK is a list of lookahead tokens. And
  177. ;; START/END are the bounds of the lookahead at top of stack.
  178. ;; Retrieve lookahead token from stack
  179. (setq la-elt (car stream))
  180. (if (consp (car la-elt))
  181. ;; The first elt of STREAM contains a lookahead stack
  182. (setq wisent-lex-lookahead (caar la-elt)
  183. stream (cdr stream))
  184. (setq la-elt nil))
  185. ;; Parse
  186. (setq wisent-lex-istream stream
  187. cache (semantic-safe "wisent-parse-stream: %s"
  188. (condition-case error-to-filter
  189. (wisent-parse semantic--parse-table
  190. wisent-lexer-function
  191. wisent-error-function
  192. goal)
  193. (args-out-of-range
  194. (if (and (not debug-on-error)
  195. (= wisent-parse-max-stack-size
  196. (nth 2 error-to-filter)))
  197. (progn
  198. (message "wisent-parse-stream: %s"
  199. (error-message-string error-to-filter))
  200. (message "wisent-parse-max-stack-size \
  201. might need to be increased"))
  202. (apply 'signal error-to-filter))))))
  203. ;; Manage returned lookahead token
  204. (if wisent-lookahead
  205. (if (eq (caar la-elt) wisent-lookahead)
  206. ;; It is already at top of lookahead stack
  207. (progn
  208. (setq cache nil
  209. la-elt (car la-elt))
  210. (while la-elt
  211. ;; Collect unmatched tokens from the stack
  212. (run-hook-with-args
  213. 'wisent-discarding-token-functions (car la-elt))
  214. (setq la-elt (cdr la-elt))))
  215. ;; New lookahead token
  216. (if (or (consp cache) ;; Clear the stack if parse succeeded
  217. (null la-elt))
  218. (setq la-elt (cons nil nil)))
  219. ;; Push it into the stack
  220. (setcar la-elt (cons wisent-lookahead (car la-elt)))
  221. ;; Update START/END
  222. (setcdr la-elt (cddr wisent-lookahead))
  223. ;; Push (LOOKAHEAD-STACK START . END) in STREAM
  224. (setq wisent-lex-istream (cons la-elt wisent-lex-istream))))
  225. ;; Return (STREAM SEMANTIC-STREAM)
  226. (list wisent-lex-istream
  227. (if (consp cache) cache '(nil))
  228. )))
  229. (defun wisent-parse-region (start end &optional goal depth returnonerror)
  230. "Parse the area between START and END using the Wisent LALR parser.
  231. Return the list of semantic tags found.
  232. Optional arguments GOAL is a nonterminal symbol to start parsing at,
  233. DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to
  234. stop parsing on syntax error, when non-nil.
  235. The LALR parser automaton must be available in buffer local variable
  236. `semantic--parse-table'.
  237. Must be installed by `semantic-install-function-overrides' to override
  238. the standard function `semantic-parse-region'."
  239. (if (or (< start (point-min)) (> end (point-max)) (< end start))
  240. (error "Invalid bounds [%s %s] passed to `wisent-parse-region'"
  241. start end))
  242. (let* ((case-fold-search semantic-case-fold)
  243. (wisent-lex-istream (semantic-lex start end depth))
  244. ptree tag cooked lstack wisent-lex-lookahead)
  245. ;; Loop while there are lexical tokens available
  246. (while wisent-lex-istream
  247. ;; Parse
  248. (setq wisent-lex-lookahead (car lstack)
  249. tag (semantic-safe "wisent-parse-region: %s"
  250. (wisent-parse semantic--parse-table
  251. wisent-lexer-function
  252. wisent-error-function
  253. goal)))
  254. ;; Manage returned lookahead token
  255. (if wisent-lookahead
  256. (if (eq (car lstack) wisent-lookahead)
  257. ;; It is already at top of lookahead stack
  258. (progn
  259. (setq tag nil)
  260. (while lstack
  261. ;; Collect unmatched tokens from lookahead stack
  262. (run-hook-with-args
  263. 'wisent-discarding-token-functions (car lstack))
  264. (setq lstack (cdr lstack))))
  265. ;; Push new lookahead token into the stack
  266. (setq lstack (cons wisent-lookahead lstack))))
  267. ;; Manage the parser result
  268. (cond
  269. ;; Parse succeeded, cook result
  270. ((consp tag)
  271. (setq lstack nil ;; Clear the lookahead stack
  272. cooked (semantic--tag-expand tag)
  273. ptree (append cooked ptree))
  274. (while cooked
  275. (setq tag (car cooked)
  276. cooked (cdr cooked))
  277. (or (semantic--tag-get-property tag 'reparse-symbol)
  278. (semantic--tag-put-property tag 'reparse-symbol goal)))
  279. )
  280. ;; Return on error if requested
  281. (returnonerror
  282. (setq wisent-lex-istream nil)
  283. ))
  284. ;; Work in progress...
  285. (if wisent-lex-istream
  286. (and (eq semantic-working-type 'percent)
  287. (boundp 'semantic--progress-reporter)
  288. semantic--progress-reporter
  289. (progress-reporter-update
  290. semantic--progress-reporter
  291. (/ (* 100 (semantic-lex-token-start
  292. (car wisent-lex-istream)))
  293. (point-max))))))
  294. ;; Return parse tree
  295. (nreverse ptree)))
  296. ;;; Interfacing with edebug
  297. ;;
  298. (add-hook
  299. 'edebug-setup-hook
  300. #'(lambda ()
  301. (def-edebug-spec define-wisent-lexer
  302. (&define name stringp def-body)
  303. )
  304. ))
  305. (provide 'semantic/wisent)
  306. ;;; semantic/wisent.el ends here