lex.el 80 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046
  1. ;;; semantic/lex.el --- Lexical Analyzer builder
  2. ;; Copyright (C) 1999-2012 Free Software Foundation, Inc.
  3. ;; Author: Eric M. Ludlam <zappo@gnu.org>
  4. ;; This file is part of GNU Emacs.
  5. ;; GNU Emacs is free software: you can redistribute it and/or modify
  6. ;; it under the terms of the GNU General Public License as published by
  7. ;; the Free Software Foundation, either version 3 of the License, or
  8. ;; (at your option) any later version.
  9. ;; GNU Emacs is distributed in the hope that it will be useful,
  10. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. ;; GNU General Public License for more details.
  13. ;; You should have received a copy of the GNU General Public License
  14. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  15. ;;; Commentary:
  16. ;;
  17. ;; This file handles the creation of lexical analyzers for different
  18. ;; languages in Emacs Lisp. The purpose of a lexical analyzer is to
  19. ;; convert a buffer into a list of lexical tokens. Each token
  20. ;; contains the token class (such as 'number, 'symbol, 'IF, etc) and
  21. ;; the location in the buffer it was found. Optionally, a token also
  22. ;; contains a string representing what is at the designated buffer
  23. ;; location.
  24. ;;
  25. ;; Tokens are pushed onto a token stream, which is basically a list of
  26. ;; all the lexical tokens from the analyzed region. The token stream
  27. ;; is then handed to the grammar which parsers the file.
  28. ;;
  29. ;;; How it works
  30. ;;
  31. ;; Each analyzer specifies a condition and forms. These conditions
  32. ;; and forms are assembled into a function by `define-lex' that does
  33. ;; the lexical analysis.
  34. ;;
  35. ;; In the lexical analyzer created with `define-lex', each condition
  36. ;; is tested for a given point. When the condition is true, the forms
  37. ;; run.
  38. ;;
  39. ;; The forms can push a lexical token onto the token stream. The
  40. ;; analyzer forms also must move the current analyzer point. If the
  41. ;; analyzer point is moved without pushing a token, then the matched
  42. ;; syntax is effectively ignored, or skipped.
  43. ;;
  44. ;; Thus, starting at the beginning of a region to be analyzed, each
  45. ;; condition is tested. One will match, and a lexical token might be
  46. ;; pushed, and the point is moved to the end of the lexical token
  47. ;; identified. At the new position, the process occurs again until
  48. ;; the end of the specified region is reached.
  49. ;;
  50. ;;; How to use semantic-lex
  51. ;;
  52. ;; To create a lexer for a language, use the `define-lex' macro.
  53. ;;
  54. ;; The `define-lex' macro accepts a list of lexical analyzers. Each
  55. ;; analyzer is created with `define-lex-analyzer', or one of the
  56. ;; derivative macros. A single analyzer defines a regular expression
  57. ;; to match text in a buffer, and a short segment of code to create
  58. ;; one lexical token.
  59. ;;
  60. ;; Each analyzer has a NAME, DOC, a CONDITION, and possibly some
  61. ;; FORMS. The NAME is the name used in `define-lex'. The DOC
  62. ;; describes what the analyzer should do.
  63. ;;
  64. ;; The CONDITION evaluates the text at the current point in the
  65. ;; current buffer. If CONDITION is true, then the FORMS will be
  66. ;; executed.
  67. ;;
  68. ;; The purpose of the FORMS is to push new lexical tokens onto the
  69. ;; list of tokens for the current buffer, and to move point after the
  70. ;; matched text.
  71. ;;
  72. ;; Some macros for creating one analyzer are:
  73. ;;
  74. ;; define-lex-analyzer - A generic analyzer associating any style of
  75. ;; condition to forms.
  76. ;; define-lex-regex-analyzer - Matches a regular expression.
  77. ;; define-lex-simple-regex-analyzer - Matches a regular expressions,
  78. ;; and pushes the match.
  79. ;; define-lex-block-analyzer - Matches list syntax, and defines
  80. ;; handles open/close delimiters.
  81. ;;
  82. ;; These macros are used by the grammar compiler when lexical
  83. ;; information is specified in a grammar:
  84. ;; define-lex- * -type-analyzer - Matches syntax specified in
  85. ;; a grammar, and pushes one token for it. The * would
  86. ;; be `sexp' for things like lists or strings, and
  87. ;; `string' for things that need to match some special
  88. ;; string, such as "\\." where a literal match is needed.
  89. ;;
  90. ;;; Lexical Tables
  91. ;;
  92. ;; There are tables of different symbols managed in semantic-lex.el.
  93. ;; They are:
  94. ;;
  95. ;; Lexical keyword table - A Table of symbols declared in a grammar
  96. ;; file with the %keyword declaration.
  97. ;; Keywords are used by `semantic-lex-symbol-or-keyword'
  98. ;; to create lexical tokens based on the keyword.
  99. ;;
  100. ;; Lexical type table - A table of symbols declared in a grammar
  101. ;; file with the %type declaration.
  102. ;; The grammar compiler uses the type table to create new
  103. ;; lexical analyzers. These analyzers are then used to when
  104. ;; a new lexical analyzer is made for a language.
  105. ;;
  106. ;;; Lexical Types
  107. ;;
  108. ;; A lexical type defines a kind of lexical analyzer that will be
  109. ;; automatically generated from a grammar file based on some
  110. ;; predetermined attributes. For now these two attributes are
  111. ;; recognized :
  112. ;;
  113. ;; * matchdatatype : define the kind of lexical analyzer. That is :
  114. ;;
  115. ;; - regexp : define a regexp analyzer (see
  116. ;; `define-lex-regex-type-analyzer')
  117. ;;
  118. ;; - string : define a string analyzer (see
  119. ;; `define-lex-string-type-analyzer')
  120. ;;
  121. ;; - block : define a block type analyzer (see
  122. ;; `define-lex-block-type-analyzer')
  123. ;;
  124. ;; - sexp : define a sexp analyzer (see
  125. ;; `define-lex-sexp-type-analyzer')
  126. ;;
  127. ;; - keyword : define a keyword analyzer (see
  128. ;; `define-lex-keyword-type-analyzer')
  129. ;;
  130. ;; * syntax : define the syntax that matches a syntactic
  131. ;; expression. When syntax is matched the corresponding type
  132. ;; analyzer is entered and the resulting match data will be
  133. ;; interpreted based on the kind of analyzer (see matchdatatype
  134. ;; above).
  135. ;;
  136. ;; The following lexical types are predefined :
  137. ;;
  138. ;; +-------------+---------------+--------------------------------+
  139. ;; | type | matchdatatype | syntax |
  140. ;; +-------------+---------------+--------------------------------+
  141. ;; | punctuation | string | "\\(\\s.\\|\\s$\\|\\s'\\)+" |
  142. ;; | keyword | keyword | "\\(\\sw\\|\\s_\\)+" |
  143. ;; | symbol | regexp | "\\(\\sw\\|\\s_\\)+" |
  144. ;; | string | sexp | "\\s\"" |
  145. ;; | number | regexp | semantic-lex-number-expression |
  146. ;; | block | block | "\\s(\\|\\s)" |
  147. ;; +-------------+---------------+--------------------------------+
  148. ;;
  149. ;; In a grammar you must use a %type expression to automatically generate
  150. ;; the corresponding analyzers of that type.
  151. ;;
  152. ;; Here is an example to auto-generate punctuation analyzers
  153. ;; with 'matchdatatype and 'syntax predefined (see table above)
  154. ;;
  155. ;; %type <punctuation> ;; will auto-generate this kind of analyzers
  156. ;;
  157. ;; It is equivalent to write :
  158. ;;
  159. ;; %type <punctuation> syntax "\\(\\s.\\|\\s$\\|\\s'\\)+" matchdatatype string
  160. ;;
  161. ;; ;; Some punctuation based on the type defines above
  162. ;;
  163. ;; %token <punctuation> NOT "!"
  164. ;; %token <punctuation> NOTEQ "!="
  165. ;; %token <punctuation> MOD "%"
  166. ;; %token <punctuation> MODEQ "%="
  167. ;;
  168. ;;; On the Semantic 1.x lexer
  169. ;;
  170. ;; In semantic 1.x, the lexical analyzer was an all purpose routine.
  171. ;; To boost efficiency, the analyzer is now a series of routines that
  172. ;; are constructed at build time into a single routine. This will
  173. ;; eliminate unneeded if statements to speed the lexer.
  174. (require 'semantic/fw)
  175. ;;; Code:
  176. ;;; Semantic 2.x lexical analysis
  177. ;;
  178. (defun semantic-lex-map-symbols (fun table &optional property)
  179. "Call function FUN on every symbol in TABLE.
  180. If optional PROPERTY is non-nil, call FUN only on every symbol which
  181. as a PROPERTY value. FUN receives a symbol as argument."
  182. (if (arrayp table)
  183. (mapatoms
  184. #'(lambda (symbol)
  185. (if (or (null property) (get symbol property))
  186. (funcall fun symbol)))
  187. table)))
  188. ;;; Lexical keyword table handling.
  189. ;;
  190. ;; These keywords are keywords defined for using in a grammar with the
  191. ;; %keyword declaration, and are not keywords used in Emacs Lisp.
  192. (defvar semantic-flex-keywords-obarray nil
  193. "Buffer local keyword obarray for the lexical analyzer.
  194. These keywords are matched explicitly, and converted into special symbols.")
  195. (make-variable-buffer-local 'semantic-flex-keywords-obarray)
  196. (defmacro semantic-lex-keyword-invalid (name)
  197. "Signal that NAME is an invalid keyword name."
  198. `(signal 'wrong-type-argument '(semantic-lex-keyword-p ,name)))
  199. (defsubst semantic-lex-keyword-symbol (name)
  200. "Return keyword symbol with NAME or nil if not found."
  201. (and (arrayp semantic-flex-keywords-obarray)
  202. (stringp name)
  203. (intern-soft name semantic-flex-keywords-obarray)))
  204. (defsubst semantic-lex-keyword-p (name)
  205. "Return non-nil if a keyword with NAME exists in the keyword table.
  206. Return nil otherwise."
  207. (and (setq name (semantic-lex-keyword-symbol name))
  208. (symbol-value name)))
  209. (defsubst semantic-lex-keyword-set (name value)
  210. "Set value of keyword with NAME to VALUE and return VALUE."
  211. (set (intern name semantic-flex-keywords-obarray) value))
  212. (defsubst semantic-lex-keyword-value (name)
  213. "Return value of keyword with NAME.
  214. Signal an error if a keyword with NAME does not exist."
  215. (let ((keyword (semantic-lex-keyword-symbol name)))
  216. (if keyword
  217. (symbol-value keyword)
  218. (semantic-lex-keyword-invalid name))))
  219. (defsubst semantic-lex-keyword-put (name property value)
  220. "For keyword with NAME, set its PROPERTY to VALUE."
  221. (let ((keyword (semantic-lex-keyword-symbol name)))
  222. (if keyword
  223. (put keyword property value)
  224. (semantic-lex-keyword-invalid name))))
  225. (defsubst semantic-lex-keyword-get (name property)
  226. "For keyword with NAME, return its PROPERTY value."
  227. (let ((keyword (semantic-lex-keyword-symbol name)))
  228. (if keyword
  229. (get keyword property)
  230. (semantic-lex-keyword-invalid name))))
  231. (defun semantic-lex-make-keyword-table (specs &optional propspecs)
  232. "Convert keyword SPECS into an obarray and return it.
  233. SPECS must be a list of (NAME . TOKSYM) elements, where:
  234. NAME is the name of the keyword symbol to define.
  235. TOKSYM is the lexical token symbol of that keyword.
  236. If optional argument PROPSPECS is non nil, then interpret it, and
  237. apply those properties.
  238. PROPSPECS must be a list of (NAME PROPERTY VALUE) elements."
  239. ;; Create the symbol hash table
  240. (let ((semantic-flex-keywords-obarray (make-vector 13 0))
  241. spec)
  242. ;; fill it with stuff
  243. (while specs
  244. (setq spec (car specs)
  245. specs (cdr specs))
  246. (semantic-lex-keyword-set (car spec) (cdr spec)))
  247. ;; Apply all properties
  248. (while propspecs
  249. (setq spec (car propspecs)
  250. propspecs (cdr propspecs))
  251. (semantic-lex-keyword-put (car spec) (nth 1 spec) (nth 2 spec)))
  252. semantic-flex-keywords-obarray))
  253. (defsubst semantic-lex-map-keywords (fun &optional property)
  254. "Call function FUN on every lexical keyword.
  255. If optional PROPERTY is non-nil, call FUN only on every keyword which
  256. as a PROPERTY value. FUN receives a lexical keyword as argument."
  257. (semantic-lex-map-symbols
  258. fun semantic-flex-keywords-obarray property))
  259. (defun semantic-lex-keywords (&optional property)
  260. "Return a list of lexical keywords.
  261. If optional PROPERTY is non-nil, return only keywords which have a
  262. PROPERTY set."
  263. (let (keywords)
  264. (semantic-lex-map-keywords
  265. #'(lambda (symbol) (setq keywords (cons symbol keywords)))
  266. property)
  267. keywords))
  268. ;;; Inline functions:
  269. (defvar semantic-lex-unterminated-syntax-end-function)
  270. (defvar semantic-lex-analysis-bounds)
  271. (defvar semantic-lex-end-point)
  272. (defsubst semantic-lex-token-bounds (token)
  273. "Fetch the start and end locations of the lexical token TOKEN.
  274. Return a pair (START . END)."
  275. (if (not (numberp (car (cdr token))))
  276. (cdr (cdr token))
  277. (cdr token)))
  278. (defsubst semantic-lex-token-start (token)
  279. "Fetch the start position of the lexical token TOKEN.
  280. See also the function `semantic-lex-token'."
  281. (car (semantic-lex-token-bounds token)))
  282. (defsubst semantic-lex-token-end (token)
  283. "Fetch the end position of the lexical token TOKEN.
  284. See also the function `semantic-lex-token'."
  285. (cdr (semantic-lex-token-bounds token)))
  286. (defsubst semantic-lex-unterminated-syntax-detected (syntax)
  287. "Inside a lexical analyzer, use this when unterminated syntax was found.
  288. Argument SYNTAX indicates the type of syntax that is unterminated.
  289. The job of this function is to move (point) to a new logical location
  290. so that analysis can continue, if possible."
  291. (goto-char
  292. (funcall semantic-lex-unterminated-syntax-end-function
  293. syntax
  294. (car semantic-lex-analysis-bounds)
  295. (cdr semantic-lex-analysis-bounds)
  296. ))
  297. (setq semantic-lex-end-point (point)))
  298. ;;; Type table handling.
  299. ;;
  300. ;; The lexical type table manages types that occur in a grammar file
  301. ;; with the %type declaration. Types represent different syntaxes.
  302. ;; See code for `semantic-lex-preset-default-types' for the classic
  303. ;; types of syntax.
  304. (defvar semantic-lex-types-obarray nil
  305. "Buffer local types obarray for the lexical analyzer.")
  306. (make-variable-buffer-local 'semantic-lex-types-obarray)
  307. (defmacro semantic-lex-type-invalid (type)
  308. "Signal that TYPE is an invalid lexical type name."
  309. `(signal 'wrong-type-argument '(semantic-lex-type-p ,type)))
  310. (defsubst semantic-lex-type-symbol (type)
  311. "Return symbol with TYPE or nil if not found."
  312. (and (arrayp semantic-lex-types-obarray)
  313. (stringp type)
  314. (intern-soft type semantic-lex-types-obarray)))
  315. (defsubst semantic-lex-type-p (type)
  316. "Return non-nil if a symbol with TYPE name exists."
  317. (and (setq type (semantic-lex-type-symbol type))
  318. (symbol-value type)))
  319. (defsubst semantic-lex-type-set (type value)
  320. "Set value of symbol with TYPE name to VALUE and return VALUE."
  321. (set (intern type semantic-lex-types-obarray) value))
  322. (defsubst semantic-lex-type-value (type &optional noerror)
  323. "Return value of symbol with TYPE name.
  324. If optional argument NOERROR is non-nil return nil if a symbol with
  325. TYPE name does not exist. Otherwise signal an error."
  326. (let ((sym (semantic-lex-type-symbol type)))
  327. (if sym
  328. (symbol-value sym)
  329. (unless noerror
  330. (semantic-lex-type-invalid type)))))
  331. (defsubst semantic-lex-type-put (type property value &optional add)
  332. "For symbol with TYPE name, set its PROPERTY to VALUE.
  333. If optional argument ADD is non-nil, create a new symbol with TYPE
  334. name if it does not already exist. Otherwise signal an error."
  335. (let ((sym (semantic-lex-type-symbol type)))
  336. (unless sym
  337. (or add (semantic-lex-type-invalid type))
  338. (semantic-lex-type-set type nil)
  339. (setq sym (semantic-lex-type-symbol type)))
  340. (put sym property value)))
  341. (defsubst semantic-lex-type-get (type property &optional noerror)
  342. "For symbol with TYPE name, return its PROPERTY value.
  343. If optional argument NOERROR is non-nil return nil if a symbol with
  344. TYPE name does not exist. Otherwise signal an error."
  345. (let ((sym (semantic-lex-type-symbol type)))
  346. (if sym
  347. (get sym property)
  348. (unless noerror
  349. (semantic-lex-type-invalid type)))))
  350. (defun semantic-lex-preset-default-types ()
  351. "Install useful default properties for well known types."
  352. (semantic-lex-type-put "punctuation" 'matchdatatype 'string t)
  353. (semantic-lex-type-put "punctuation" 'syntax "\\(\\s.\\|\\s$\\|\\s'\\)+")
  354. (semantic-lex-type-put "keyword" 'matchdatatype 'keyword t)
  355. (semantic-lex-type-put "keyword" 'syntax "\\(\\sw\\|\\s_\\)+")
  356. (semantic-lex-type-put "symbol" 'matchdatatype 'regexp t)
  357. (semantic-lex-type-put "symbol" 'syntax "\\(\\sw\\|\\s_\\)+")
  358. (semantic-lex-type-put "string" 'matchdatatype 'sexp t)
  359. (semantic-lex-type-put "string" 'syntax "\\s\"")
  360. (semantic-lex-type-put "number" 'matchdatatype 'regexp t)
  361. (semantic-lex-type-put "number" 'syntax 'semantic-lex-number-expression)
  362. (semantic-lex-type-put "block" 'matchdatatype 'block t)
  363. (semantic-lex-type-put "block" 'syntax "\\s(\\|\\s)")
  364. )
  365. (defun semantic-lex-make-type-table (specs &optional propspecs)
  366. "Convert type SPECS into an obarray and return it.
  367. SPECS must be a list of (TYPE . TOKENS) elements, where:
  368. TYPE is the name of the type symbol to define.
  369. TOKENS is an list of (TOKSYM . MATCHER) elements, where:
  370. TOKSYM is any lexical token symbol.
  371. MATCHER is a string or regexp a text must match to be a such
  372. lexical token.
  373. If optional argument PROPSPECS is non nil, then interpret it, and
  374. apply those properties.
  375. PROPSPECS must be a list of (TYPE PROPERTY VALUE)."
  376. ;; Create the symbol hash table
  377. (let* ((semantic-lex-types-obarray (make-vector 13 0))
  378. spec type tokens token alist default)
  379. ;; fill it with stuff
  380. (while specs
  381. (setq spec (car specs)
  382. specs (cdr specs)
  383. type (car spec)
  384. tokens (cdr spec)
  385. default nil
  386. alist nil)
  387. (while tokens
  388. (setq token (car tokens)
  389. tokens (cdr tokens))
  390. (if (cdr token)
  391. (setq alist (cons token alist))
  392. (setq token (car token))
  393. (if default
  394. (message
  395. "*Warning* default value of <%s> tokens changed to %S, was %S"
  396. type default token))
  397. (setq default token)))
  398. ;; Ensure the default matching spec is the first one.
  399. (semantic-lex-type-set type (cons default (nreverse alist))))
  400. ;; Install useful default types & properties
  401. (semantic-lex-preset-default-types)
  402. ;; Apply all properties
  403. (while propspecs
  404. (setq spec (car propspecs)
  405. propspecs (cdr propspecs))
  406. ;; Create the type if necessary.
  407. (semantic-lex-type-put (car spec) (nth 1 spec) (nth 2 spec) t))
  408. semantic-lex-types-obarray))
  409. (defsubst semantic-lex-map-types (fun &optional property)
  410. "Call function FUN on every lexical type.
  411. If optional PROPERTY is non-nil, call FUN only on every type symbol
  412. which as a PROPERTY value. FUN receives a type symbol as argument."
  413. (semantic-lex-map-symbols
  414. fun semantic-lex-types-obarray property))
  415. (defun semantic-lex-types (&optional property)
  416. "Return a list of lexical type symbols.
  417. If optional PROPERTY is non-nil, return only type symbols which have
  418. PROPERTY set."
  419. (let (types)
  420. (semantic-lex-map-types
  421. #'(lambda (symbol) (setq types (cons symbol types)))
  422. property)
  423. types))
  424. ;;; Lexical Analyzer framework settings
  425. ;;
  426. (defvar semantic-lex-analyzer 'semantic-flex
  427. "The lexical analyzer used for a given buffer.
  428. See `semantic-lex' for documentation.
  429. For compatibility with Semantic 1.x it defaults to `semantic-flex'.")
  430. (make-variable-buffer-local 'semantic-lex-analyzer)
  431. (defvar semantic-lex-tokens
  432. '(
  433. (bol)
  434. (charquote)
  435. (close-paren)
  436. (comment)
  437. (newline)
  438. (open-paren)
  439. (punctuation)
  440. (semantic-list)
  441. (string)
  442. (symbol)
  443. (whitespace)
  444. )
  445. "An alist of semantic token types.
  446. As of December 2001 (semantic 1.4beta13), this variable is not used in
  447. any code. The only use is to refer to the doc-string from elsewhere.
  448. The key to this alist is the symbol representing token type that
  449. \\[semantic-flex] returns. These are
  450. - bol: Empty string matching a beginning of line.
  451. This token is produced with
  452. `semantic-lex-beginning-of-line'.
  453. - charquote: String sequences that match `\\s\\+' regexp.
  454. This token is produced with `semantic-lex-charquote'.
  455. - close-paren: Characters that match `\\s)' regexp.
  456. These are typically `)', `}', `]', etc.
  457. This token is produced with
  458. `semantic-lex-close-paren'.
  459. - comment: A comment chunk. These token types are not
  460. produced by default.
  461. This token is produced with `semantic-lex-comments'.
  462. Comments are ignored with `semantic-lex-ignore-comments'.
  463. Comments are treated as whitespace with
  464. `semantic-lex-comments-as-whitespace'.
  465. - newline Characters matching `\\s-*\\(\n\\|\\s>\\)' regexp.
  466. This token is produced with `semantic-lex-newline'.
  467. - open-paren: Characters that match `\\s(' regexp.
  468. These are typically `(', `{', `[', etc.
  469. If `semantic-lex-paren-or-list' is used,
  470. then `open-paren' is not usually generated unless
  471. the `depth' argument to \\[semantic-lex] is
  472. greater than 0.
  473. This token is always produced if the analyzer
  474. `semantic-lex-open-paren' is used.
  475. - punctuation: Characters matching `{\\(\\s.\\|\\s$\\|\\s'\\)'
  476. regexp.
  477. This token is produced with `semantic-lex-punctuation'.
  478. Always specify this analyzer after the comment
  479. analyzer.
  480. - semantic-list: String delimited by matching parenthesis, braces,
  481. etc. that the lexer skipped over, because the
  482. `depth' parameter to \\[semantic-flex] was not high
  483. enough.
  484. This token is produced with `semantic-lex-paren-or-list'.
  485. - string: Quoted strings, i.e., string sequences that start
  486. and end with characters matching `\\s\"'
  487. regexp. The lexer relies on @code{forward-sexp} to
  488. find the matching end.
  489. This token is produced with `semantic-lex-string'.
  490. - symbol: String sequences that match `\\(\\sw\\|\\s_\\)+'
  491. regexp.
  492. This token is produced with
  493. `semantic-lex-symbol-or-keyword'. Always add this analyzer
  494. after `semantic-lex-number', or other analyzers that
  495. match its regular expression.
  496. - whitespace: Characters that match `\\s-+' regexp.
  497. This token is produced with `semantic-lex-whitespace'.")
  498. (defvar semantic-lex-syntax-modifications nil
  499. "Changes to the syntax table for this buffer.
  500. These changes are active only while the buffer is being flexed.
  501. This is a list where each element has the form:
  502. (CHAR CLASS)
  503. CHAR is the char passed to `modify-syntax-entry',
  504. and CLASS is the string also passed to `modify-syntax-entry' to define
  505. what syntax class CHAR has.")
  506. (make-variable-buffer-local 'semantic-lex-syntax-modifications)
  507. (defvar semantic-lex-syntax-table nil
  508. "Syntax table used by lexical analysis.
  509. See also `semantic-lex-syntax-modifications'.")
  510. (make-variable-buffer-local 'semantic-lex-syntax-table)
  511. (defvar semantic-lex-comment-regex nil
  512. "Regular expression for identifying comment start during lexical analysis.
  513. This may be automatically set when semantic initializes in a mode, but
  514. may need to be overridden for some special languages.")
  515. (make-variable-buffer-local 'semantic-lex-comment-regex)
  516. (defvar semantic-lex-number-expression
  517. ;; This expression was written by David Ponce for Java, and copied
  518. ;; here for C and any other similar language.
  519. (eval-when-compile
  520. (concat "\\("
  521. "\\<[0-9]+[.][0-9]+\\([eE][-+]?[0-9]+\\)?[fFdD]?\\>"
  522. "\\|"
  523. "\\<[0-9]+[.][eE][-+]?[0-9]+[fFdD]?\\>"
  524. "\\|"
  525. "\\<[0-9]+[.][fFdD]\\>"
  526. "\\|"
  527. "\\<[0-9]+[.]"
  528. "\\|"
  529. "[.][0-9]+\\([eE][-+]?[0-9]+\\)?[fFdD]?\\>"
  530. "\\|"
  531. "\\<[0-9]+[eE][-+]?[0-9]+[fFdD]?\\>"
  532. "\\|"
  533. "\\<0[xX][0-9a-fA-F]+[lL]?\\>"
  534. "\\|"
  535. "\\<[0-9]+[lLfFdD]?\\>"
  536. "\\)"
  537. ))
  538. "Regular expression for matching a number.
  539. If this value is nil, no number extraction is done during lex.
  540. This expression tries to match C and Java like numbers.
  541. DECIMAL_LITERAL:
  542. [1-9][0-9]*
  543. ;
  544. HEX_LITERAL:
  545. 0[xX][0-9a-fA-F]+
  546. ;
  547. OCTAL_LITERAL:
  548. 0[0-7]*
  549. ;
  550. INTEGER_LITERAL:
  551. <DECIMAL_LITERAL>[lL]?
  552. | <HEX_LITERAL>[lL]?
  553. | <OCTAL_LITERAL>[lL]?
  554. ;
  555. EXPONENT:
  556. [eE][+-]?[09]+
  557. ;
  558. FLOATING_POINT_LITERAL:
  559. [0-9]+[.][0-9]*<EXPONENT>?[fFdD]?
  560. | [.][0-9]+<EXPONENT>?[fFdD]?
  561. | [0-9]+<EXPONENT>[fFdD]?
  562. | [0-9]+<EXPONENT>?[fFdD]
  563. ;")
  564. (make-variable-buffer-local 'semantic-lex-number-expression)
  565. (defvar semantic-lex-depth 0
  566. "Default lexing depth.
  567. This specifies how many lists to create tokens in.")
  568. (make-variable-buffer-local 'semantic-lex-depth)
  569. (defvar semantic-lex-unterminated-syntax-end-function
  570. (lambda (syntax syntax-start lex-end) lex-end)
  571. "Function called when unterminated syntax is encountered.
  572. This should be set to one function. That function should take three
  573. parameters. The SYNTAX, or type of syntax which is unterminated.
  574. SYNTAX-START where the broken syntax begins.
  575. LEX-END is where the lexical analysis was asked to end.
  576. This function can be used for languages that can intelligently fix up
  577. broken syntax, or the exit lexical analysis via `throw' or `signal'
  578. when finding unterminated syntax.")
  579. ;;; Interactive testing commands
  580. (declare-function semantic-elapsed-time "semantic")
  581. (defun semantic-lex-test (arg)
  582. "Test the semantic lexer in the current buffer.
  583. If universal argument ARG, then try the whole buffer."
  584. (interactive "P")
  585. (require 'semantic)
  586. (let* ((start (current-time))
  587. (result (semantic-lex
  588. (if arg (point-min) (point))
  589. (point-max)))
  590. (end (current-time)))
  591. (message "Elapsed Time: %.2f seconds."
  592. (semantic-elapsed-time start end))
  593. (pop-to-buffer "*Lexer Output*")
  594. (require 'pp)
  595. (erase-buffer)
  596. (insert (pp-to-string result))
  597. (goto-char (point-min))
  598. ))
  599. (defvar semantic-lex-debug nil
  600. "When non-nil, debug the local lexical analyzer.")
  601. (defun semantic-lex-debug (arg)
  602. "Debug the semantic lexer in the current buffer.
  603. Argument ARG specifies of the analyze the whole buffer, or start at point.
  604. While engaged, each token identified by the lexer will be highlighted
  605. in the target buffer A description of the current token will be
  606. displayed in the minibuffer. Press SPC to move to the next lexical token."
  607. (interactive "P")
  608. (require 'semantic/debug)
  609. (let ((semantic-lex-debug t))
  610. (semantic-lex-test arg)))
  611. (defun semantic-lex-highlight-token (token)
  612. "Highlight the lexical TOKEN.
  613. TOKEN is a lexical token with a START And END position.
  614. Return the overlay."
  615. (let ((o (semantic-make-overlay (semantic-lex-token-start token)
  616. (semantic-lex-token-end token))))
  617. (semantic-overlay-put o 'face 'highlight)
  618. o))
  619. (defsubst semantic-lex-debug-break (token)
  620. "Break during lexical analysis at TOKEN."
  621. (when semantic-lex-debug
  622. (let ((o nil))
  623. (unwind-protect
  624. (progn
  625. (when token
  626. (setq o (semantic-lex-highlight-token token)))
  627. (semantic-read-event
  628. (format "%S :: SPC - continue" token))
  629. )
  630. (when o
  631. (semantic-overlay-delete o))))))
  632. ;;; Lexical analyzer creation
  633. ;;
  634. ;; Code for creating a lex function from lists of analyzers.
  635. ;;
  636. ;; A lexical analyzer is created from a list of individual analyzers.
  637. ;; Each individual analyzer specifies a single match, and code that
  638. ;; goes with it.
  639. ;;
  640. ;; Creation of an analyzer assembles these analyzers into a new function
  641. ;; with the behaviors of all the individual analyzers.
  642. ;;
  643. (defmacro semantic-lex-one-token (analyzers)
  644. "Calculate one token from the current buffer at point.
  645. Uses locally bound variables from `define-lex'.
  646. Argument ANALYZERS is the list of analyzers being used."
  647. (cons 'cond (mapcar #'symbol-value analyzers)))
  648. (defvar semantic-lex-end-point nil
  649. "The end point as tracked through lexical functions.")
  650. (defvar semantic-lex-current-depth nil
  651. "The current depth as tracked through lexical functions.")
  652. (defvar semantic-lex-maximum-depth nil
  653. "The maximum depth of parenthesis as tracked through lexical functions.")
  654. (defvar semantic-lex-token-stream nil
  655. "The current token stream we are collecting.")
  656. (defvar semantic-lex-analysis-bounds nil
  657. "The bounds of the current analysis.")
  658. (defvar semantic-lex-block-streams nil
  659. "Streams of tokens inside collapsed blocks.
  660. This is an alist of (ANCHOR . STREAM) elements where ANCHOR is the
  661. start position of the block, and STREAM is the list of tokens in that
  662. block.")
  663. (defvar semantic-lex-reset-hooks nil
  664. "Abnormal hook used by major-modes to reset lexical analyzers.
  665. Hook functions are called with START and END values for the
  666. current lexical pass. Should be set with `add-hook', specifying
  667. a LOCAL option.")
  668. ;; Stack of nested blocks.
  669. (defvar semantic-lex-block-stack nil)
  670. ;;(defvar semantic-lex-timeout 5
  671. ;; "*Number of sections of lexing before giving up.")
  672. (defmacro define-lex (name doc &rest analyzers)
  673. "Create a new lexical analyzer with NAME.
  674. DOC is a documentation string describing this analyzer.
  675. ANALYZERS are small code snippets of analyzers to use when
  676. building the new NAMED analyzer. Only use analyzers which
  677. are written to be used in `define-lex'.
  678. Each analyzer should be an analyzer created with `define-lex-analyzer'.
  679. Note: The order in which analyzers are listed is important.
  680. If two analyzers can match the same text, it is important to order the
  681. analyzers so that the one you want to match first occurs first. For
  682. example, it is good to put a number analyzer in front of a symbol
  683. analyzer which might mistake a number for as a symbol."
  684. `(defun ,name (start end &optional depth length)
  685. ,(concat doc "\nSee `semantic-lex' for more information.")
  686. ;; Make sure the state of block parsing starts over.
  687. (setq semantic-lex-block-streams nil)
  688. ;; Allow specialty reset items.
  689. (run-hook-with-args 'semantic-lex-reset-hooks start end)
  690. ;; Lexing state.
  691. (let* (;(starttime (current-time))
  692. (starting-position (point))
  693. (semantic-lex-token-stream nil)
  694. (semantic-lex-block-stack nil)
  695. (tmp-start start)
  696. (semantic-lex-end-point start)
  697. (semantic-lex-current-depth 0)
  698. ;; Use the default depth when not specified.
  699. (semantic-lex-maximum-depth
  700. (or depth semantic-lex-depth))
  701. ;; Bounds needed for unterminated syntax
  702. (semantic-lex-analysis-bounds (cons start end))
  703. ;; This entry prevents text properties from
  704. ;; confusing our lexical analysis. See Emacs 22 (CVS)
  705. ;; version of C++ mode with template hack text properties.
  706. (parse-sexp-lookup-properties nil)
  707. )
  708. ;; Maybe REMOVE THIS LATER.
  709. ;; Trying to find incremental parser bug.
  710. (when (> end (point-max))
  711. (error ,(format "%s: end (%%d) > point-max (%%d)" name)
  712. end (point-max)))
  713. (with-syntax-table semantic-lex-syntax-table
  714. (goto-char start)
  715. (while (and (< (point) end)
  716. (or (not length)
  717. (<= (length semantic-lex-token-stream) length)))
  718. (semantic-lex-one-token ,analyzers)
  719. (when (eq semantic-lex-end-point tmp-start)
  720. (error ,(format "%s: endless loop at %%d, after %%S" name)
  721. tmp-start (car semantic-lex-token-stream)))
  722. (setq tmp-start semantic-lex-end-point)
  723. (goto-char semantic-lex-end-point)
  724. ;;(when (> (semantic-elapsed-time starttime (current-time))
  725. ;; semantic-lex-timeout)
  726. ;; (error "Timeout during lex at char %d" (point)))
  727. (semantic-throw-on-input 'lex)
  728. (semantic-lex-debug-break (car semantic-lex-token-stream))
  729. ))
  730. ;; Check that there is no unterminated block.
  731. (when semantic-lex-block-stack
  732. (let* ((last (pop semantic-lex-block-stack))
  733. (blk last))
  734. (while blk
  735. (message
  736. ,(format "%s: `%%s' block from %%S is unterminated" name)
  737. (car blk) (cadr blk))
  738. (setq blk (pop semantic-lex-block-stack)))
  739. (semantic-lex-unterminated-syntax-detected (car last))))
  740. ;; Return to where we started.
  741. ;; Do not wrap in protective stuff so that if there is an error
  742. ;; thrown, the user knows where.
  743. (goto-char starting-position)
  744. ;; Return the token stream
  745. (nreverse semantic-lex-token-stream))))
  746. ;;; Collapsed block tokens delimited by any tokens.
  747. ;;
  748. (defun semantic-lex-start-block (syntax)
  749. "Mark the last read token as the beginning of a SYNTAX block."
  750. (if (or (not semantic-lex-maximum-depth)
  751. (< semantic-lex-current-depth semantic-lex-maximum-depth))
  752. (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
  753. (push (list syntax (car semantic-lex-token-stream))
  754. semantic-lex-block-stack)))
  755. (defun semantic-lex-end-block (syntax)
  756. "Process the end of a previously marked SYNTAX block.
  757. That is, collapse the tokens inside that block, including the
  758. beginning and end of block tokens, into a high level block token of
  759. class SYNTAX.
  760. The token at beginning of block is the one marked by a previous call
  761. to `semantic-lex-start-block'. The current token is the end of block.
  762. The collapsed tokens are saved in `semantic-lex-block-streams'."
  763. (if (null semantic-lex-block-stack)
  764. (setq semantic-lex-current-depth (1- semantic-lex-current-depth))
  765. (let* ((stream semantic-lex-token-stream)
  766. (blk (pop semantic-lex-block-stack))
  767. (bstream (cdr blk))
  768. (first (car bstream))
  769. (last (pop stream)) ;; The current token mark the EOBLK
  770. tok)
  771. (if (not (eq (car blk) syntax))
  772. ;; SYNTAX doesn't match the syntax of the current block in
  773. ;; the stack. So we encountered the end of the SYNTAX block
  774. ;; before the end of the current one in the stack which is
  775. ;; signaled unterminated.
  776. (semantic-lex-unterminated-syntax-detected (car blk))
  777. ;; Move tokens found inside the block from the main stream
  778. ;; into a separate block stream.
  779. (while (and stream (not (eq (setq tok (pop stream)) first)))
  780. (push tok bstream))
  781. ;; The token marked as beginning of block was not encountered.
  782. ;; This should not happen!
  783. (or (eq tok first)
  784. (error "Token %S not found at beginning of block `%s'"
  785. first syntax))
  786. ;; Save the block stream for future reuse, to avoid to redo
  787. ;; the lexical analysis of the block content!
  788. ;; Anchor the block stream with its start position, so we can
  789. ;; use: (cdr (assq start semantic-lex-block-streams)) to
  790. ;; quickly retrieve the lexical stream associated to a block.
  791. (setcar blk (semantic-lex-token-start first))
  792. (setcdr blk (nreverse bstream))
  793. (push blk semantic-lex-block-streams)
  794. ;; In the main stream, replace the tokens inside the block by
  795. ;; a high level block token of class SYNTAX.
  796. (setq semantic-lex-token-stream stream)
  797. (semantic-lex-push-token
  798. (semantic-lex-token
  799. syntax (car blk) (semantic-lex-token-end last)))
  800. ))))
  801. ;;; Lexical token API
  802. ;;
  803. ;; Functions for accessing parts of a token. Use these functions
  804. ;; instead of accessing the list structure directly because the
  805. ;; contents of the lexical may change.
  806. ;;
  807. (defmacro semantic-lex-token (symbol start end &optional str)
  808. "Create a lexical token.
  809. SYMBOL is a symbol representing the class of syntax found.
  810. START and END define the bounds of the token in the current buffer.
  811. Optional STR is the string for the token only if the bounds in
  812. the buffer do not cover the string they represent. (As from
  813. macro expansion.)"
  814. ;; This if statement checks the existence of a STR argument at
  815. ;; compile time, where STR is some symbol or constant. If the
  816. ;; variable STr (runtime) is nil, this will make an incorrect decision.
  817. ;;
  818. ;; It is like this to maintain the original speed of the compiled
  819. ;; code.
  820. (if str
  821. `(cons ,symbol (cons ,str (cons ,start ,end)))
  822. `(cons ,symbol (cons ,start ,end))))
  823. (defun semantic-lex-token-p (thing)
  824. "Return non-nil if THING is a semantic lex token.
  825. This is an exhaustively robust check."
  826. (and (consp thing)
  827. (symbolp (car thing))
  828. (or (and (numberp (nth 1 thing))
  829. (numberp (nthcdr 2 thing)))
  830. (and (stringp (nth 1 thing))
  831. (numberp (nth 2 thing))
  832. (numberp (nthcdr 3 thing)))
  833. ))
  834. )
  835. (defun semantic-lex-token-with-text-p (thing)
  836. "Return non-nil if THING is a semantic lex token.
  837. This is an exhaustively robust check."
  838. (and (consp thing)
  839. (symbolp (car thing))
  840. (= (length thing) 4)
  841. (stringp (nth 1 thing))
  842. (numberp (nth 2 thing))
  843. (numberp (nth 3 thing)))
  844. )
  845. (defun semantic-lex-token-without-text-p (thing)
  846. "Return non-nil if THING is a semantic lex token.
  847. This is an exhaustively robust check."
  848. (and (consp thing)
  849. (symbolp (car thing))
  850. (= (length thing) 3)
  851. (numberp (nth 1 thing))
  852. (numberp (nth 2 thing)))
  853. )
  854. (eval-and-compile
  855. (defun semantic-lex-expand-block-specs (specs)
  856. "Expand block specifications SPECS into a Lisp form.
  857. SPECS is a list of (BLOCK BEGIN END) elements where BLOCK, BEGIN, and
  858. END are token class symbols that indicate to produce one collapsed
  859. BLOCK token from tokens found between BEGIN and END ones.
  860. BLOCK must be a non-nil symbol, and at least one of the BEGIN or END
  861. symbols must be non-nil too.
  862. When BEGIN is non-nil, generate a call to `semantic-lex-start-block'
  863. when a BEGIN token class is encountered.
  864. When END is non-nil, generate a call to `semantic-lex-end-block' when
  865. an END token class is encountered."
  866. (let ((class (make-symbol "class"))
  867. (form nil))
  868. (dolist (spec specs)
  869. (when (car spec)
  870. (when (nth 1 spec)
  871. (push `((eq ',(nth 1 spec) ,class)
  872. (semantic-lex-start-block ',(car spec)))
  873. form))
  874. (when (nth 2 spec)
  875. (push `((eq ',(nth 2 spec) ,class)
  876. (semantic-lex-end-block ',(car spec)))
  877. form))))
  878. (when form
  879. `((let ((,class (semantic-lex-token-class
  880. (car semantic-lex-token-stream))))
  881. (cond ,@(nreverse form))))
  882. )))
  883. )
  884. (defmacro semantic-lex-push-token (token &rest blockspecs)
  885. "Push TOKEN in the lexical analyzer token stream.
  886. Return the lexical analysis current end point.
  887. If optional arguments BLOCKSPECS is non-nil, it specifies to process
  888. collapsed block tokens. See `semantic-lex-expand-block-specs' for
  889. more details.
  890. This macro should only be called within the bounds of
  891. `define-lex-analyzer'. It changes the values of the lexical analyzer
  892. variables `token-stream' and `semantic-lex-end-point'. If you need to
  893. move `semantic-lex-end-point' somewhere else, just modify this
  894. variable after calling `semantic-lex-push-token'."
  895. `(progn
  896. (push ,token semantic-lex-token-stream)
  897. ,@(semantic-lex-expand-block-specs blockspecs)
  898. (setq semantic-lex-end-point
  899. (semantic-lex-token-end (car semantic-lex-token-stream)))
  900. ))
  901. (defsubst semantic-lex-token-class (token)
  902. "Fetch the class of the lexical token TOKEN.
  903. See also the function `semantic-lex-token'."
  904. (car token))
  905. (defsubst semantic-lex-token-text (token)
  906. "Fetch the text associated with the lexical token TOKEN.
  907. See also the function `semantic-lex-token'."
  908. (if (stringp (car (cdr token)))
  909. (car (cdr token))
  910. (buffer-substring-no-properties
  911. (semantic-lex-token-start token)
  912. (semantic-lex-token-end token))))
  913. (defun semantic-lex-init ()
  914. "Initialize any lexical state for this buffer."
  915. (unless semantic-lex-comment-regex
  916. (setq semantic-lex-comment-regex
  917. (if comment-start-skip
  918. (concat "\\(\\s<\\|" comment-start-skip "\\)")
  919. "\\(\\s<\\)")))
  920. ;; Setup the lexer syntax-table
  921. (setq semantic-lex-syntax-table (copy-syntax-table (syntax-table)))
  922. (dolist (mod semantic-lex-syntax-modifications)
  923. (modify-syntax-entry
  924. (car mod) (nth 1 mod) semantic-lex-syntax-table)))
  925. ;;;###autoload
  926. (define-overloadable-function semantic-lex (start end &optional depth length)
  927. "Lexically analyze text in the current buffer between START and END.
  928. Optional argument DEPTH indicates at what level to scan over entire
  929. lists. The last argument, LENGTH specifies that `semantic-lex'
  930. should only return LENGTH tokens. The return value is a token stream.
  931. Each element is a list, such of the form
  932. (symbol start-expression . end-expression)
  933. where SYMBOL denotes the token type.
  934. See `semantic-lex-tokens' variable for details on token types. END
  935. does not mark the end of the text scanned, only the end of the
  936. beginning of text scanned. Thus, if a string extends past END, the
  937. end of the return token will be larger than END. To truly restrict
  938. scanning, use `narrow-to-region'."
  939. (funcall semantic-lex-analyzer start end depth length))
  940. (defsubst semantic-lex-buffer (&optional depth)
  941. "Lex the current buffer.
  942. Optional argument DEPTH is the depth to scan into lists."
  943. (semantic-lex (point-min) (point-max) depth))
  944. (defsubst semantic-lex-list (semlist depth)
  945. "Lex the body of SEMLIST to DEPTH."
  946. (semantic-lex (semantic-lex-token-start semlist)
  947. (semantic-lex-token-end semlist)
  948. depth))
  949. ;;; Analyzer creation macros
  950. ;;
  951. ;; An individual analyzer is a condition and code that goes with it.
  952. ;;
  953. ;; Created analyzers become variables with the code associated with them
  954. ;; as the symbol value. These analyzers are assembled into a lexer
  955. ;; to create new lexical analyzers.
  956. (defcustom semantic-lex-debug-analyzers nil
  957. "Non nil means to debug analyzers with syntax protection.
  958. Only in effect if `debug-on-error' is also non-nil."
  959. :group 'semantic
  960. :type 'boolean)
  961. (defmacro semantic-lex-unterminated-syntax-protection (syntax &rest forms)
  962. "For SYNTAX, execute FORMS with protection for unterminated syntax.
  963. If FORMS throws an error, treat this as a syntax problem, and
  964. execute the unterminated syntax code. FORMS should return a position.
  965. Irregardless of an error, the cursor should be moved to the end of
  966. the desired syntax, and a position returned.
  967. If `debug-on-error' is set, errors are not caught, so that you can
  968. debug them.
  969. Avoid using a large FORMS since it is duplicated."
  970. `(if (and debug-on-error semantic-lex-debug-analyzers)
  971. (progn ,@forms)
  972. (condition-case nil
  973. (progn ,@forms)
  974. (error
  975. (semantic-lex-unterminated-syntax-detected ,syntax)))))
  976. (put 'semantic-lex-unterminated-syntax-protection
  977. 'lisp-indent-function 1)
  978. (defmacro define-lex-analyzer (name doc condition &rest forms)
  979. "Create a single lexical analyzer NAME with DOC.
  980. When an analyzer is called, the current buffer and point are
  981. positioned in a buffer at the location to be analyzed.
  982. CONDITION is an expression which returns t if FORMS should be run.
  983. Within the bounds of CONDITION and FORMS, the use of backquote
  984. can be used to evaluate expressions at compile time.
  985. While forms are running, the following variables will be locally bound:
  986. `semantic-lex-analysis-bounds' - The bounds of the current analysis.
  987. of the form (START . END)
  988. `semantic-lex-maximum-depth' - The maximum depth of semantic-list
  989. for the current analysis.
  990. `semantic-lex-current-depth' - The current depth of `semantic-list' that has
  991. been descended.
  992. `semantic-lex-end-point' - End Point after match.
  993. Analyzers should set this to a buffer location if their
  994. match string does not represent the end of the matched text.
  995. `semantic-lex-token-stream' - The token list being collected.
  996. Add new lexical tokens to this list.
  997. Proper action in FORMS is to move the value of `semantic-lex-end-point' to
  998. after the location of the analyzed entry, and to add any discovered tokens
  999. at the beginning of `semantic-lex-token-stream'.
  1000. This can be done by using `semantic-lex-push-token'."
  1001. `(eval-and-compile
  1002. (defvar ,name nil ,doc)
  1003. (defun ,name nil)
  1004. ;; Do this part separately so that re-evaluation rebuilds this code.
  1005. (setq ,name '(,condition ,@forms))
  1006. ;; Build a single lexical analyzer function, so the doc for
  1007. ;; function help is automatically provided, and perhaps the
  1008. ;; function could be useful for testing and debugging one
  1009. ;; analyzer.
  1010. (fset ',name (lambda () ,doc
  1011. (let ((semantic-lex-token-stream nil)
  1012. (semantic-lex-end-point (point))
  1013. (semantic-lex-analysis-bounds
  1014. (cons (point) (point-max)))
  1015. (semantic-lex-current-depth 0)
  1016. (semantic-lex-maximum-depth
  1017. semantic-lex-depth)
  1018. )
  1019. (when ,condition ,@forms)
  1020. semantic-lex-token-stream)))
  1021. ))
  1022. (defmacro define-lex-regex-analyzer (name doc regexp &rest forms)
  1023. "Create a lexical analyzer with NAME and DOC that will match REGEXP.
  1024. FORMS are evaluated upon a successful match.
  1025. See `define-lex-analyzer' for more about analyzers."
  1026. `(define-lex-analyzer ,name
  1027. ,doc
  1028. (looking-at ,regexp)
  1029. ,@forms
  1030. ))
  1031. (defmacro define-lex-simple-regex-analyzer (name doc regexp toksym
  1032. &optional index
  1033. &rest forms)
  1034. "Create a lexical analyzer with NAME and DOC that match REGEXP.
  1035. TOKSYM is the symbol to use when creating a semantic lexical token.
  1036. INDEX is the index into the match that defines the bounds of the token.
  1037. Index should be a plain integer, and not specified in the macro as an
  1038. expression.
  1039. FORMS are evaluated upon a successful match BEFORE the new token is
  1040. created. It is valid to ignore FORMS.
  1041. See `define-lex-analyzer' for more about analyzers."
  1042. `(define-lex-analyzer ,name
  1043. ,doc
  1044. (looking-at ,regexp)
  1045. ,@forms
  1046. (semantic-lex-push-token
  1047. (semantic-lex-token ,toksym
  1048. (match-beginning ,(or index 0))
  1049. (match-end ,(or index 0))))
  1050. ))
  1051. (defmacro define-lex-block-analyzer (name doc spec1 &rest specs)
  1052. "Create a lexical analyzer NAME for paired delimiters blocks.
  1053. It detects a paired delimiters block or the corresponding open or
  1054. close delimiter depending on the value of the variable
  1055. `semantic-lex-current-depth'. DOC is the documentation string of the lexical
  1056. analyzer. SPEC1 and SPECS specify the token symbols and open, close
  1057. delimiters used. Each SPEC has the form:
  1058. \(BLOCK-SYM (OPEN-DELIM OPEN-SYM) (CLOSE-DELIM CLOSE-SYM))
  1059. where BLOCK-SYM is the symbol returned in a block token. OPEN-DELIM
  1060. and CLOSE-DELIM are respectively the open and close delimiters
  1061. identifying a block. OPEN-SYM and CLOSE-SYM are respectively the
  1062. symbols returned in open and close tokens."
  1063. (let ((specs (cons spec1 specs))
  1064. spec open olist clist)
  1065. (while specs
  1066. (setq spec (car specs)
  1067. specs (cdr specs)
  1068. open (nth 1 spec)
  1069. ;; build alist ((OPEN-DELIM OPEN-SYM BLOCK-SYM) ...)
  1070. olist (cons (list (car open) (cadr open) (car spec)) olist)
  1071. ;; build alist ((CLOSE-DELIM CLOSE-SYM) ...)
  1072. clist (cons (nth 2 spec) clist)))
  1073. `(define-lex-analyzer ,name
  1074. ,doc
  1075. (and
  1076. (looking-at "\\(\\s(\\|\\s)\\)")
  1077. (let ((text (match-string 0)) match)
  1078. (cond
  1079. ((setq match (assoc text ',olist))
  1080. (if (or (not semantic-lex-maximum-depth)
  1081. (< semantic-lex-current-depth semantic-lex-maximum-depth))
  1082. (progn
  1083. (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
  1084. (semantic-lex-push-token
  1085. (semantic-lex-token
  1086. (nth 1 match)
  1087. (match-beginning 0) (match-end 0))))
  1088. (semantic-lex-push-token
  1089. (semantic-lex-token
  1090. (nth 2 match)
  1091. (match-beginning 0)
  1092. (save-excursion
  1093. (semantic-lex-unterminated-syntax-protection (nth 2 match)
  1094. (forward-list 1)
  1095. (point)))
  1096. ))
  1097. ))
  1098. ((setq match (assoc text ',clist))
  1099. (setq semantic-lex-current-depth (1- semantic-lex-current-depth))
  1100. (semantic-lex-push-token
  1101. (semantic-lex-token
  1102. (nth 1 match)
  1103. (match-beginning 0) (match-end 0)))))))
  1104. )))
  1105. ;;; Analyzers
  1106. ;;
  1107. ;; Pre-defined common analyzers.
  1108. ;;
  1109. (define-lex-analyzer semantic-lex-default-action
  1110. "The default action when no other lexical actions match text.
  1111. This action will just throw an error."
  1112. t
  1113. (error "Unmatched Text during Lexical Analysis"))
  1114. (define-lex-analyzer semantic-lex-beginning-of-line
  1115. "Detect and create a beginning of line token (BOL)."
  1116. (and (bolp)
  1117. ;; Just insert a (bol N . N) token in the token stream,
  1118. ;; without moving the point. N is the point at the
  1119. ;; beginning of line.
  1120. (semantic-lex-push-token (semantic-lex-token 'bol (point) (point)))
  1121. nil) ;; CONTINUE
  1122. ;; We identify and add the BOL token onto the stream, but since
  1123. ;; semantic-lex-end-point doesn't move, we always fail CONDITION, and have no
  1124. ;; FORMS body.
  1125. nil)
  1126. (define-lex-simple-regex-analyzer semantic-lex-newline
  1127. "Detect and create newline tokens."
  1128. "\\s-*\\(\n\\|\\s>\\)" 'newline 1)
  1129. (define-lex-regex-analyzer semantic-lex-newline-as-whitespace
  1130. "Detect and create newline tokens.
  1131. Use this ONLY if newlines are not whitespace characters (such as when
  1132. they are comment end characters) AND when you want whitespace tokens."
  1133. "\\s-*\\(\n\\|\\s>\\)"
  1134. ;; Language wants whitespaces. Create a token for it.
  1135. (if (eq (semantic-lex-token-class (car semantic-lex-token-stream))
  1136. 'whitespace)
  1137. ;; Merge whitespace tokens together if they are adjacent. Two
  1138. ;; whitespace tokens may be separated by a comment which is not in
  1139. ;; the token stream.
  1140. (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
  1141. (match-end 0))
  1142. (semantic-lex-push-token
  1143. (semantic-lex-token
  1144. 'whitespace (match-beginning 0) (match-end 0)))))
  1145. (define-lex-regex-analyzer semantic-lex-ignore-newline
  1146. "Detect and ignore newline tokens.
  1147. Use this ONLY if newlines are not whitespace characters (such as when
  1148. they are comment end characters)."
  1149. "\\s-*\\(\n\\|\\s>\\)"
  1150. (setq semantic-lex-end-point (match-end 0)))
  1151. (define-lex-regex-analyzer semantic-lex-whitespace
  1152. "Detect and create whitespace tokens."
  1153. ;; catch whitespace when needed
  1154. "\\s-+"
  1155. ;; Language wants whitespaces. Create a token for it.
  1156. (if (eq (semantic-lex-token-class (car semantic-lex-token-stream))
  1157. 'whitespace)
  1158. ;; Merge whitespace tokens together if they are adjacent. Two
  1159. ;; whitespace tokens may be separated by a comment which is not in
  1160. ;; the token stream.
  1161. (progn
  1162. (setq semantic-lex-end-point (match-end 0))
  1163. (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
  1164. semantic-lex-end-point))
  1165. (semantic-lex-push-token
  1166. (semantic-lex-token
  1167. 'whitespace (match-beginning 0) (match-end 0)))))
  1168. (define-lex-regex-analyzer semantic-lex-ignore-whitespace
  1169. "Detect and skip over whitespace tokens."
  1170. ;; catch whitespace when needed
  1171. "\\s-+"
  1172. ;; Skip over the detected whitespace, do not create a token for it.
  1173. (setq semantic-lex-end-point (match-end 0)))
  1174. (define-lex-simple-regex-analyzer semantic-lex-number
  1175. "Detect and create number tokens.
  1176. See `semantic-lex-number-expression' for details on matching numbers,
  1177. and number formats."
  1178. semantic-lex-number-expression 'number)
  1179. (define-lex-regex-analyzer semantic-lex-symbol-or-keyword
  1180. "Detect and create symbol and keyword tokens."
  1181. "\\(\\sw\\|\\s_\\)+"
  1182. (semantic-lex-push-token
  1183. (semantic-lex-token
  1184. (or (semantic-lex-keyword-p (match-string 0)) 'symbol)
  1185. (match-beginning 0) (match-end 0))))
  1186. (define-lex-simple-regex-analyzer semantic-lex-charquote
  1187. "Detect and create charquote tokens."
  1188. ;; Character quoting characters (ie, \n as newline)
  1189. "\\s\\+" 'charquote)
  1190. (define-lex-simple-regex-analyzer semantic-lex-punctuation
  1191. "Detect and create punctuation tokens."
  1192. "\\(\\s.\\|\\s$\\|\\s'\\)" 'punctuation)
  1193. (define-lex-analyzer semantic-lex-punctuation-type
  1194. "Detect and create a punctuation type token.
  1195. Recognized punctuation is defined in the current table of lexical
  1196. types, as the value of the `punctuation' token type."
  1197. (and (looking-at "\\(\\s.\\|\\s$\\|\\s'\\)+")
  1198. (let* ((key (match-string 0))
  1199. (pos (match-beginning 0))
  1200. (end (match-end 0))
  1201. (len (- end pos))
  1202. (lst (semantic-lex-type-value "punctuation" t))
  1203. (def (car lst)) ;; default lexical symbol or nil
  1204. (lst (cdr lst)) ;; alist of (LEX-SYM . PUNCT-STRING)
  1205. (elt nil))
  1206. (if lst
  1207. ;; Starting with the longest one, search if the
  1208. ;; punctuation string is defined for this language.
  1209. (while (and (> len 0) (not (setq elt (rassoc key lst))))
  1210. (setq len (1- len)
  1211. key (substring key 0 len))))
  1212. (if elt ;; Return the punctuation token found
  1213. (semantic-lex-push-token
  1214. (semantic-lex-token (car elt) pos (+ pos len)))
  1215. (if def ;; Return a default generic token
  1216. (semantic-lex-push-token
  1217. (semantic-lex-token def pos end))
  1218. ;; Nothing match
  1219. )))))
  1220. (define-lex-regex-analyzer semantic-lex-paren-or-list
  1221. "Detect open parenthesis.
  1222. Return either a paren token or a semantic list token depending on
  1223. `semantic-lex-current-depth'."
  1224. "\\s("
  1225. (if (or (not semantic-lex-maximum-depth)
  1226. (< semantic-lex-current-depth semantic-lex-maximum-depth))
  1227. (progn
  1228. (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
  1229. (semantic-lex-push-token
  1230. (semantic-lex-token
  1231. 'open-paren (match-beginning 0) (match-end 0))))
  1232. (semantic-lex-push-token
  1233. (semantic-lex-token
  1234. 'semantic-list (match-beginning 0)
  1235. (save-excursion
  1236. (semantic-lex-unterminated-syntax-protection 'semantic-list
  1237. (forward-list 1)
  1238. (point))
  1239. )))
  1240. ))
  1241. (define-lex-simple-regex-analyzer semantic-lex-open-paren
  1242. "Detect and create an open parenthesis token."
  1243. "\\s(" 'open-paren 0 (setq semantic-lex-current-depth (1+ semantic-lex-current-depth)))
  1244. (define-lex-simple-regex-analyzer semantic-lex-close-paren
  1245. "Detect and create a close parenthesis token."
  1246. "\\s)" 'close-paren 0 (setq semantic-lex-current-depth (1- semantic-lex-current-depth)))
  1247. (define-lex-regex-analyzer semantic-lex-string
  1248. "Detect and create a string token."
  1249. "\\s\""
  1250. ;; Zing to the end of this string.
  1251. (semantic-lex-push-token
  1252. (semantic-lex-token
  1253. 'string (point)
  1254. (save-excursion
  1255. (semantic-lex-unterminated-syntax-protection 'string
  1256. (forward-sexp 1)
  1257. (point))
  1258. ))))
  1259. (define-lex-regex-analyzer semantic-lex-comments
  1260. "Detect and create a comment token."
  1261. semantic-lex-comment-regex
  1262. (save-excursion
  1263. (forward-comment 1)
  1264. ;; Generate newline token if enabled
  1265. (if (bolp) (backward-char 1))
  1266. (setq semantic-lex-end-point (point))
  1267. ;; Language wants comments or want them as whitespaces,
  1268. ;; link them together.
  1269. (if (eq (semantic-lex-token-class (car semantic-lex-token-stream)) 'comment)
  1270. (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
  1271. semantic-lex-end-point)
  1272. (semantic-lex-push-token
  1273. (semantic-lex-token
  1274. 'comment (match-beginning 0) semantic-lex-end-point)))))
  1275. (define-lex-regex-analyzer semantic-lex-comments-as-whitespace
  1276. "Detect comments and create a whitespace token."
  1277. semantic-lex-comment-regex
  1278. (save-excursion
  1279. (forward-comment 1)
  1280. ;; Generate newline token if enabled
  1281. (if (bolp) (backward-char 1))
  1282. (setq semantic-lex-end-point (point))
  1283. ;; Language wants comments or want them as whitespaces,
  1284. ;; link them together.
  1285. (if (eq (semantic-lex-token-class (car semantic-lex-token-stream)) 'whitespace)
  1286. (setcdr (semantic-lex-token-bounds (car semantic-lex-token-stream))
  1287. semantic-lex-end-point)
  1288. (semantic-lex-push-token
  1289. (semantic-lex-token
  1290. 'whitespace (match-beginning 0) semantic-lex-end-point)))))
  1291. (define-lex-regex-analyzer semantic-lex-ignore-comments
  1292. "Detect and create a comment token."
  1293. semantic-lex-comment-regex
  1294. (let ((comment-start-point (point)))
  1295. (forward-comment 1)
  1296. (if (eq (point) comment-start-point)
  1297. ;; In this case our start-skip string failed
  1298. ;; to work properly. Lets try and move over
  1299. ;; whatever white space we matched to begin
  1300. ;; with.
  1301. (skip-syntax-forward "-.'" (point-at-eol))
  1302. ;; We may need to back up so newlines or whitespace is generated.
  1303. (if (bolp)
  1304. (backward-char 1)))
  1305. (if (eq (point) comment-start-point)
  1306. (error "Strange comment syntax prevents lexical analysis"))
  1307. (setq semantic-lex-end-point (point))))
  1308. ;;; Comment lexer
  1309. ;;
  1310. ;; Predefined lexers that could be used instead of creating new
  1311. ;; analyzers.
  1312. (define-lex semantic-comment-lexer
  1313. "A simple lexical analyzer that handles comments.
  1314. This lexer will only return comment tokens. It is the default lexer
  1315. used by `semantic-find-doc-snarf-comment' to snarf up the comment at
  1316. point."
  1317. semantic-lex-ignore-whitespace
  1318. semantic-lex-ignore-newline
  1319. semantic-lex-comments
  1320. semantic-lex-default-action)
  1321. ;;; Test Lexer
  1322. ;;
  1323. (define-lex semantic-simple-lexer
  1324. "A simple lexical analyzer that handles simple buffers.
  1325. This lexer ignores comments and whitespace, and will return
  1326. syntax as specified by the syntax table."
  1327. semantic-lex-ignore-whitespace
  1328. semantic-lex-ignore-newline
  1329. semantic-lex-number
  1330. semantic-lex-symbol-or-keyword
  1331. semantic-lex-charquote
  1332. semantic-lex-paren-or-list
  1333. semantic-lex-close-paren
  1334. semantic-lex-string
  1335. semantic-lex-ignore-comments
  1336. semantic-lex-punctuation
  1337. semantic-lex-default-action)
  1338. ;;; Analyzers generated from grammar.
  1339. ;;
  1340. ;; Some analyzers are hand written. Analyzers created with these
  1341. ;; functions are generated from the grammar files.
  1342. (defmacro define-lex-keyword-type-analyzer (name doc syntax)
  1343. "Define a keyword type analyzer NAME with DOC string.
  1344. SYNTAX is the regexp that matches a keyword syntactic expression."
  1345. (let ((key (make-symbol "key")))
  1346. `(define-lex-analyzer ,name
  1347. ,doc
  1348. (and (looking-at ,syntax)
  1349. (let ((,key (semantic-lex-keyword-p (match-string 0))))
  1350. (when ,key
  1351. (semantic-lex-push-token
  1352. (semantic-lex-token
  1353. ,key (match-beginning 0) (match-end 0)))))))
  1354. ))
  1355. (defmacro define-lex-sexp-type-analyzer (name doc syntax token)
  1356. "Define a sexp type analyzer NAME with DOC string.
  1357. SYNTAX is the regexp that matches the beginning of the s-expression.
  1358. TOKEN is the lexical token returned when SYNTAX matches."
  1359. `(define-lex-regex-analyzer ,name
  1360. ,doc
  1361. ,syntax
  1362. (semantic-lex-push-token
  1363. (semantic-lex-token
  1364. ,token (point)
  1365. (save-excursion
  1366. (semantic-lex-unterminated-syntax-protection ,token
  1367. (forward-sexp 1)
  1368. (point))))))
  1369. )
  1370. (defmacro define-lex-regex-type-analyzer (name doc syntax matches default)
  1371. "Define a regexp type analyzer NAME with DOC string.
  1372. SYNTAX is the regexp that matches a syntactic expression.
  1373. MATCHES is an alist of lexical elements used to refine the syntactic
  1374. expression.
  1375. DEFAULT is the default lexical token returned when no MATCHES."
  1376. (if matches
  1377. (let* ((val (make-symbol "val"))
  1378. (lst (make-symbol "lst"))
  1379. (elt (make-symbol "elt"))
  1380. (pos (make-symbol "pos"))
  1381. (end (make-symbol "end")))
  1382. `(define-lex-analyzer ,name
  1383. ,doc
  1384. (and (looking-at ,syntax)
  1385. (let* ((,val (match-string 0))
  1386. (,pos (match-beginning 0))
  1387. (,end (match-end 0))
  1388. (,lst ,matches)
  1389. ,elt)
  1390. (while (and ,lst (not ,elt))
  1391. (if (string-match (cdar ,lst) ,val)
  1392. (setq ,elt (caar ,lst))
  1393. (setq ,lst (cdr ,lst))))
  1394. (semantic-lex-push-token
  1395. (semantic-lex-token (or ,elt ,default) ,pos ,end))))
  1396. ))
  1397. `(define-lex-simple-regex-analyzer ,name
  1398. ,doc
  1399. ,syntax ,default)
  1400. ))
  1401. (defmacro define-lex-string-type-analyzer (name doc syntax matches default)
  1402. "Define a string type analyzer NAME with DOC string.
  1403. SYNTAX is the regexp that matches a syntactic expression.
  1404. MATCHES is an alist of lexical elements used to refine the syntactic
  1405. expression.
  1406. DEFAULT is the default lexical token returned when no MATCHES."
  1407. (if matches
  1408. (let* ((val (make-symbol "val"))
  1409. (lst (make-symbol "lst"))
  1410. (elt (make-symbol "elt"))
  1411. (pos (make-symbol "pos"))
  1412. (end (make-symbol "end"))
  1413. (len (make-symbol "len")))
  1414. `(define-lex-analyzer ,name
  1415. ,doc
  1416. (and (looking-at ,syntax)
  1417. (let* ((,val (match-string 0))
  1418. (,pos (match-beginning 0))
  1419. (,end (match-end 0))
  1420. (,len (- ,end ,pos))
  1421. (,lst ,matches)
  1422. ,elt)
  1423. ;; Starting with the longest one, search if a lexical
  1424. ;; value match a token defined for this language.
  1425. (while (and (> ,len 0) (not (setq ,elt (rassoc ,val ,lst))))
  1426. (setq ,len (1- ,len)
  1427. ,val (substring ,val 0 ,len)))
  1428. (when ,elt ;; Adjust token end position.
  1429. (setq ,elt (car ,elt)
  1430. ,end (+ ,pos ,len)))
  1431. (semantic-lex-push-token
  1432. (semantic-lex-token (or ,elt ,default) ,pos ,end))))
  1433. ))
  1434. `(define-lex-simple-regex-analyzer ,name
  1435. ,doc
  1436. ,syntax ,default)
  1437. ))
  1438. (defmacro define-lex-block-type-analyzer (name doc syntax matches)
  1439. "Define a block type analyzer NAME with DOC string.
  1440. SYNTAX is the regexp that matches block delimiters, typically the
  1441. open (`\\\\s(') and close (`\\\\s)') parenthesis syntax classes.
  1442. MATCHES is a pair (OPEN-SPECS . CLOSE-SPECS) that defines blocks.
  1443. OPEN-SPECS is a list of (OPEN-DELIM OPEN-TOKEN BLOCK-TOKEN) elements
  1444. where:
  1445. OPEN-DELIM is a string: the block open delimiter character.
  1446. OPEN-TOKEN is the lexical token class associated to the OPEN-DELIM
  1447. delimiter.
  1448. BLOCK-TOKEN is the lexical token class associated to the block
  1449. that starts at the OPEN-DELIM delimiter.
  1450. CLOSE-SPECS is a list of (CLOSE-DELIM CLOSE-TOKEN) elements where:
  1451. CLOSE-DELIM is a string: the block end delimiter character.
  1452. CLOSE-TOKEN is the lexical token class associated to the
  1453. CLOSE-DELIM delimiter.
  1454. Each element in OPEN-SPECS must have a corresponding element in
  1455. CLOSE-SPECS.
  1456. The lexer will return a BLOCK-TOKEN token when the value of
  1457. `semantic-lex-current-depth' is greater than or equal to the maximum
  1458. depth of parenthesis tracking (see also the function `semantic-lex').
  1459. Otherwise it will return OPEN-TOKEN and CLOSE-TOKEN tokens.
  1460. TO DO: Put the following in the developer's guide and just put a
  1461. reference here.
  1462. In the grammar:
  1463. The value of a block token must be a string that contains a readable
  1464. sexp of the form:
  1465. \"(OPEN-TOKEN CLOSE-TOKEN)\"
  1466. OPEN-TOKEN and CLOSE-TOKEN represent the block delimiters, and must be
  1467. lexical tokens of respectively `open-paren' and `close-paren' types.
  1468. Their value is the corresponding delimiter character as a string.
  1469. Here is a small example to analyze a parenthesis block:
  1470. %token <block> PAREN_BLOCK \"(LPAREN RPAREN)\"
  1471. %token <open-paren> LPAREN \"(\"
  1472. %token <close-paren> RPAREN \")\"
  1473. When the lexer encounters the open-paren delimiter \"(\":
  1474. - If the maximum depth of parenthesis tracking is not reached (that
  1475. is, current depth < max depth), it returns a (LPAREN start . end)
  1476. token, then continue analysis inside the block. Later, when the
  1477. corresponding close-paren delimiter \")\" will be encountered, it
  1478. will return a (RPAREN start . end) token.
  1479. - If the maximum depth of parenthesis tracking is reached (current
  1480. depth >= max depth), it returns the whole parenthesis block as
  1481. a (PAREN_BLOCK start . end) token."
  1482. (let* ((val (make-symbol "val"))
  1483. (lst (make-symbol "lst"))
  1484. (elt (make-symbol "elt")))
  1485. `(define-lex-analyzer ,name
  1486. ,doc
  1487. (and
  1488. (looking-at ,syntax) ;; "\\(\\s(\\|\\s)\\)"
  1489. (let ((,val (match-string 0))
  1490. (,lst ,matches)
  1491. ,elt)
  1492. (cond
  1493. ((setq ,elt (assoc ,val (car ,lst)))
  1494. (if (or (not semantic-lex-maximum-depth)
  1495. (< semantic-lex-current-depth semantic-lex-maximum-depth))
  1496. (progn
  1497. (setq semantic-lex-current-depth (1+ semantic-lex-current-depth))
  1498. (semantic-lex-push-token
  1499. (semantic-lex-token
  1500. (nth 1 ,elt)
  1501. (match-beginning 0) (match-end 0))))
  1502. (semantic-lex-push-token
  1503. (semantic-lex-token
  1504. (nth 2 ,elt)
  1505. (match-beginning 0)
  1506. (save-excursion
  1507. (semantic-lex-unterminated-syntax-protection (nth 2 ,elt)
  1508. (forward-list 1)
  1509. (point)))))))
  1510. ((setq ,elt (assoc ,val (cdr ,lst)))
  1511. (setq semantic-lex-current-depth (1- semantic-lex-current-depth))
  1512. (semantic-lex-push-token
  1513. (semantic-lex-token
  1514. (nth 1 ,elt)
  1515. (match-beginning 0) (match-end 0))))
  1516. ))))
  1517. ))
  1518. ;;; Lexical Safety
  1519. ;;
  1520. ;; The semantic lexers, unlike other lexers, can throw errors on
  1521. ;; unbalanced syntax. Since editing is all about changing text
  1522. ;; we need to provide a convenient way to protect against syntactic
  1523. ;; inequalities.
  1524. (defmacro semantic-lex-catch-errors (symbol &rest forms)
  1525. "Using SYMBOL, execute FORMS catching lexical errors.
  1526. If FORMS results in a call to the parser that throws a lexical error,
  1527. the error will be caught here without the buffer's cache being thrown
  1528. out of date.
  1529. If there is an error, the syntax that failed is returned.
  1530. If there is no error, then the last value of FORMS is returned."
  1531. (let ((ret (make-symbol "ret"))
  1532. (syntax (make-symbol "syntax"))
  1533. (start (make-symbol "start"))
  1534. (end (make-symbol "end")))
  1535. `(let* ((semantic-lex-unterminated-syntax-end-function
  1536. (lambda (,syntax ,start ,end)
  1537. (throw ',symbol ,syntax)))
  1538. ;; Delete the below when semantic-flex is fully retired.
  1539. (semantic-flex-unterminated-syntax-end-function
  1540. semantic-lex-unterminated-syntax-end-function)
  1541. (,ret (catch ',symbol
  1542. (save-excursion
  1543. ,@forms
  1544. nil))))
  1545. ;; Great Sadness. Assume that FORMS execute within the
  1546. ;; confines of the current buffer only! Mark this thing
  1547. ;; unparsable iff the special symbol was thrown. This
  1548. ;; will prevent future calls from parsing, but will allow
  1549. ;; then to still return the cache.
  1550. (when ,ret
  1551. ;; Leave this message off. If an APP using this fcn wants
  1552. ;; a message, they can do it themselves. This cleans up
  1553. ;; problems with the idle scheduler obscuring useful data.
  1554. ;;(message "Buffer not currently parsable (%S)." ,ret)
  1555. (semantic-parse-tree-unparseable))
  1556. ,ret)))
  1557. (put 'semantic-lex-catch-errors 'lisp-indent-function 1)
  1558. ;;; Interfacing with edebug
  1559. ;;
  1560. (add-hook
  1561. 'edebug-setup-hook
  1562. #'(lambda ()
  1563. (def-edebug-spec define-lex
  1564. (&define name stringp (&rest symbolp))
  1565. )
  1566. (def-edebug-spec define-lex-analyzer
  1567. (&define name stringp form def-body)
  1568. )
  1569. (def-edebug-spec define-lex-regex-analyzer
  1570. (&define name stringp form def-body)
  1571. )
  1572. (def-edebug-spec define-lex-simple-regex-analyzer
  1573. (&define name stringp form symbolp [ &optional form ] def-body)
  1574. )
  1575. (def-edebug-spec define-lex-block-analyzer
  1576. (&define name stringp form (&rest form))
  1577. )
  1578. (def-edebug-spec semantic-lex-catch-errors
  1579. (symbolp def-body)
  1580. )
  1581. ))
  1582. ;;; Compatibility with Semantic 1.x lexical analysis
  1583. ;;
  1584. ;; NOTE: DELETE THIS SOMEDAY SOON
  1585. (semantic-alias-obsolete 'semantic-flex-start 'semantic-lex-token-start "23.2")
  1586. (semantic-alias-obsolete 'semantic-flex-end 'semantic-lex-token-end "23.2")
  1587. (semantic-alias-obsolete 'semantic-flex-text 'semantic-lex-token-text "23.2")
  1588. (semantic-alias-obsolete 'semantic-flex-make-keyword-table 'semantic-lex-make-keyword-table "23.2")
  1589. (semantic-alias-obsolete 'semantic-flex-keyword-p 'semantic-lex-keyword-p "23.2")
  1590. (semantic-alias-obsolete 'semantic-flex-keyword-put 'semantic-lex-keyword-put "23.2")
  1591. (semantic-alias-obsolete 'semantic-flex-keyword-get 'semantic-lex-keyword-get "23.2")
  1592. (semantic-alias-obsolete 'semantic-flex-map-keywords 'semantic-lex-map-keywords "23.2")
  1593. (semantic-alias-obsolete 'semantic-flex-keywords 'semantic-lex-keywords "23.2")
  1594. (semantic-alias-obsolete 'semantic-flex-buffer 'semantic-lex-buffer "23.2")
  1595. (semantic-alias-obsolete 'semantic-flex-list 'semantic-lex-list "23.2")
  1596. ;; This simple scanner uses the syntax table to generate a stream of
  1597. ;; simple tokens of the form:
  1598. ;;
  1599. ;; (SYMBOL START . END)
  1600. ;;
  1601. ;; Where symbol is the type of thing it is. START and END mark that
  1602. ;; objects boundary.
  1603. (defvar semantic-flex-tokens semantic-lex-tokens
  1604. "An alist of semantic token types.
  1605. See variable `semantic-lex-tokens'.")
  1606. (defvar semantic-flex-unterminated-syntax-end-function
  1607. (lambda (syntax syntax-start flex-end) flex-end)
  1608. "Function called when unterminated syntax is encountered.
  1609. This should be set to one function. That function should take three
  1610. parameters. The SYNTAX, or type of syntax which is unterminated.
  1611. SYNTAX-START where the broken syntax begins.
  1612. FLEX-END is where the lexical analysis was asked to end.
  1613. This function can be used for languages that can intelligently fix up
  1614. broken syntax, or the exit lexical analysis via `throw' or `signal'
  1615. when finding unterminated syntax.")
  1616. (defvar semantic-flex-extensions nil
  1617. "Buffer local extensions to the lexical analyzer.
  1618. This should contain an alist with a key of a regex and a data element of
  1619. a function. The function should both move point, and return a lexical
  1620. token of the form:
  1621. ( TYPE START . END)
  1622. nil is also a valid return value.
  1623. TYPE can be any type of symbol, as long as it doesn't occur as a
  1624. nonterminal in the language definition.")
  1625. (make-variable-buffer-local 'semantic-flex-extensions)
  1626. (defvar semantic-flex-syntax-modifications nil
  1627. "Changes to the syntax table for this buffer.
  1628. These changes are active only while the buffer is being flexed.
  1629. This is a list where each element has the form:
  1630. (CHAR CLASS)
  1631. CHAR is the char passed to `modify-syntax-entry',
  1632. and CLASS is the string also passed to `modify-syntax-entry' to define
  1633. what syntax class CHAR has.")
  1634. (make-variable-buffer-local 'semantic-flex-syntax-modifications)
  1635. (defvar semantic-ignore-comments t
  1636. "Default comment handling.
  1637. The value t means to strip comments when flexing; nil means
  1638. to keep comments as part of the token stream.")
  1639. (make-variable-buffer-local 'semantic-ignore-comments)
  1640. (defvar semantic-flex-enable-newlines nil
  1641. "When flexing, report 'newlines as syntactic elements.
  1642. Useful for languages where the newline is a special case terminator.
  1643. Only set this on a per mode basis, not globally.")
  1644. (make-variable-buffer-local 'semantic-flex-enable-newlines)
  1645. (defvar semantic-flex-enable-whitespace nil
  1646. "When flexing, report 'whitespace as syntactic elements.
  1647. Useful for languages where the syntax is whitespace dependent.
  1648. Only set this on a per mode basis, not globally.")
  1649. (make-variable-buffer-local 'semantic-flex-enable-whitespace)
  1650. (defvar semantic-flex-enable-bol nil
  1651. "When flexing, report beginning of lines as syntactic elements.
  1652. Useful for languages like python which are indentation sensitive.
  1653. Only set this on a per mode basis, not globally.")
  1654. (make-variable-buffer-local 'semantic-flex-enable-bol)
  1655. (defvar semantic-number-expression semantic-lex-number-expression
  1656. "See variable `semantic-lex-number-expression'.")
  1657. (make-variable-buffer-local 'semantic-number-expression)
  1658. (defvar semantic-flex-depth 0
  1659. "Default flexing depth.
  1660. This specifies how many lists to create tokens in.")
  1661. (make-variable-buffer-local 'semantic-flex-depth)
  1662. (defun semantic-flex (start end &optional depth length)
  1663. "Using the syntax table, do something roughly equivalent to flex.
  1664. Semantically check between START and END. Optional argument DEPTH
  1665. indicates at what level to scan over entire lists.
  1666. The return value is a token stream. Each element is a list, such of
  1667. the form (symbol start-expression . end-expression) where SYMBOL
  1668. denotes the token type.
  1669. See `semantic-flex-tokens' variable for details on token types.
  1670. END does not mark the end of the text scanned, only the end of the
  1671. beginning of text scanned. Thus, if a string extends past END, the
  1672. end of the return token will be larger than END. To truly restrict
  1673. scanning, use `narrow-to-region'.
  1674. The last argument, LENGTH specifies that `semantic-flex' should only
  1675. return LENGTH tokens."
  1676. (message "`semantic-flex' is an obsolete function. Use `define-lex' to create lexers.")
  1677. (if (not semantic-flex-keywords-obarray)
  1678. (setq semantic-flex-keywords-obarray [ nil ]))
  1679. (let ((ts nil)
  1680. (pos (point))
  1681. (ep nil)
  1682. (curdepth 0)
  1683. (cs (if comment-start-skip
  1684. (concat "\\(\\s<\\|" comment-start-skip "\\)")
  1685. (concat "\\(\\s<\\)")))
  1686. (newsyntax (copy-syntax-table (syntax-table)))
  1687. (mods semantic-flex-syntax-modifications)
  1688. ;; Use the default depth if it is not specified.
  1689. (depth (or depth semantic-flex-depth)))
  1690. ;; Update the syntax table
  1691. (while mods
  1692. (modify-syntax-entry (car (car mods)) (car (cdr (car mods))) newsyntax)
  1693. (setq mods (cdr mods)))
  1694. (with-syntax-table newsyntax
  1695. (goto-char start)
  1696. (while (and (< (point) end) (or (not length) (<= (length ts) length)))
  1697. (cond
  1698. ;; catch beginning of lines when needed.
  1699. ;; Must be done before catching any other tokens!
  1700. ((and semantic-flex-enable-bol
  1701. (bolp)
  1702. ;; Just insert a (bol N . N) token in the token stream,
  1703. ;; without moving the point. N is the point at the
  1704. ;; beginning of line.
  1705. (setq ts (cons (cons 'bol (cons (point) (point))) ts))
  1706. nil)) ;; CONTINUE
  1707. ;; special extensions, includes whitespace, nl, etc.
  1708. ((and semantic-flex-extensions
  1709. (let ((fe semantic-flex-extensions)
  1710. (r nil))
  1711. (while fe
  1712. (if (looking-at (car (car fe)))
  1713. (setq ts (cons (funcall (cdr (car fe))) ts)
  1714. r t
  1715. fe nil
  1716. ep (point)))
  1717. (setq fe (cdr fe)))
  1718. (if (and r (not (car ts))) (setq ts (cdr ts)))
  1719. r)))
  1720. ;; catch newlines when needed
  1721. ((looking-at "\\s-*\\(\n\\|\\s>\\)")
  1722. (if semantic-flex-enable-newlines
  1723. (setq ep (match-end 1)
  1724. ts (cons (cons 'newline
  1725. (cons (match-beginning 1) ep))
  1726. ts))))
  1727. ;; catch whitespace when needed
  1728. ((looking-at "\\s-+")
  1729. (if semantic-flex-enable-whitespace
  1730. ;; Language wants whitespaces, link them together.
  1731. (if (eq (car (car ts)) 'whitespace)
  1732. (setcdr (cdr (car ts)) (match-end 0))
  1733. (setq ts (cons (cons 'whitespace
  1734. (cons (match-beginning 0)
  1735. (match-end 0)))
  1736. ts)))))
  1737. ;; numbers
  1738. ((and semantic-number-expression
  1739. (looking-at semantic-number-expression))
  1740. (setq ts (cons (cons 'number
  1741. (cons (match-beginning 0)
  1742. (match-end 0)))
  1743. ts)))
  1744. ;; symbols
  1745. ((looking-at "\\(\\sw\\|\\s_\\)+")
  1746. (setq ts (cons (cons
  1747. ;; Get info on if this is a keyword or not
  1748. (or (semantic-lex-keyword-p (match-string 0))
  1749. 'symbol)
  1750. (cons (match-beginning 0) (match-end 0)))
  1751. ts)))
  1752. ;; Character quoting characters (ie, \n as newline)
  1753. ((looking-at "\\s\\+")
  1754. (setq ts (cons (cons 'charquote
  1755. (cons (match-beginning 0) (match-end 0)))
  1756. ts)))
  1757. ;; Open parens, or semantic-lists.
  1758. ((looking-at "\\s(")
  1759. (if (or (not depth) (< curdepth depth))
  1760. (progn
  1761. (setq curdepth (1+ curdepth))
  1762. (setq ts (cons (cons 'open-paren
  1763. (cons (match-beginning 0) (match-end 0)))
  1764. ts)))
  1765. (setq ts (cons
  1766. (cons 'semantic-list
  1767. (cons (match-beginning 0)
  1768. (save-excursion
  1769. (condition-case nil
  1770. (forward-list 1)
  1771. ;; This case makes flex robust
  1772. ;; to broken lists.
  1773. (error
  1774. (goto-char
  1775. (funcall
  1776. semantic-flex-unterminated-syntax-end-function
  1777. 'semantic-list
  1778. start end))))
  1779. (setq ep (point)))))
  1780. ts))))
  1781. ;; Close parens
  1782. ((looking-at "\\s)")
  1783. (setq ts (cons (cons 'close-paren
  1784. (cons (match-beginning 0) (match-end 0)))
  1785. ts))
  1786. (setq curdepth (1- curdepth)))
  1787. ;; String initiators
  1788. ((looking-at "\\s\"")
  1789. ;; Zing to the end of this string.
  1790. (setq ts (cons (cons 'string
  1791. (cons (match-beginning 0)
  1792. (save-excursion
  1793. (condition-case nil
  1794. (forward-sexp 1)
  1795. ;; This case makes flex
  1796. ;; robust to broken strings.
  1797. (error
  1798. (goto-char
  1799. (funcall
  1800. semantic-flex-unterminated-syntax-end-function
  1801. 'string
  1802. start end))))
  1803. (setq ep (point)))))
  1804. ts)))
  1805. ;; comments
  1806. ((looking-at cs)
  1807. (if (and semantic-ignore-comments
  1808. (not semantic-flex-enable-whitespace))
  1809. ;; If the language doesn't deal with comments nor
  1810. ;; whitespaces, ignore them here.
  1811. (let ((comment-start-point (point)))
  1812. (forward-comment 1)
  1813. (if (eq (point) comment-start-point)
  1814. ;; In this case our start-skip string failed
  1815. ;; to work properly. Lets try and move over
  1816. ;; whatever white space we matched to begin
  1817. ;; with.
  1818. (skip-syntax-forward "-.'" (point-at-eol))
  1819. ;;(forward-comment 1)
  1820. ;; Generate newline token if enabled
  1821. (if (and semantic-flex-enable-newlines
  1822. (bolp))
  1823. (backward-char 1)))
  1824. (if (eq (point) comment-start-point)
  1825. (error "Strange comment syntax prevents lexical analysis"))
  1826. (setq ep (point)))
  1827. (let ((tk (if semantic-ignore-comments 'whitespace 'comment)))
  1828. (save-excursion
  1829. (forward-comment 1)
  1830. ;; Generate newline token if enabled
  1831. (if (and semantic-flex-enable-newlines
  1832. (bolp))
  1833. (backward-char 1))
  1834. (setq ep (point)))
  1835. ;; Language wants comments or want them as whitespaces,
  1836. ;; link them together.
  1837. (if (eq (car (car ts)) tk)
  1838. (setcdr (cdr (car ts)) ep)
  1839. (setq ts (cons (cons tk (cons (match-beginning 0) ep))
  1840. ts))))))
  1841. ;; punctuation
  1842. ((looking-at "\\(\\s.\\|\\s$\\|\\s'\\)")
  1843. (setq ts (cons (cons 'punctuation
  1844. (cons (match-beginning 0) (match-end 0)))
  1845. ts)))
  1846. ;; unknown token
  1847. (t
  1848. (error "What is that?")))
  1849. (goto-char (or ep (match-end 0)))
  1850. (setq ep nil)))
  1851. ;; maybe catch the last beginning of line when needed
  1852. (and semantic-flex-enable-bol
  1853. (= (point) end)
  1854. (bolp)
  1855. (setq ts (cons (cons 'bol (cons (point) (point))) ts)))
  1856. (goto-char pos)
  1857. ;;(message "Flexing muscles...done")
  1858. (nreverse ts)))
  1859. (provide 'semantic/lex)
  1860. ;; Local variables:
  1861. ;; generated-autoload-file: "loaddefs.el"
  1862. ;; generated-autoload-load-name: "semantic/lex"
  1863. ;; End:
  1864. ;;; semantic/lex.el ends here