comp.el 124 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543
  1. ;;; semantic/wisent/comp.el --- GNU Bison for Emacs - Grammar compiler
  2. ;; Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000-2007, 2009-2012
  3. ;; Free Software Foundation, Inc.
  4. ;; Author: David Ponce <david@dponce.com>
  5. ;; Maintainer: David Ponce <david@dponce.com>
  6. ;; Created: 30 January 2002
  7. ;; Keywords: syntax
  8. ;; This file is part of GNU Emacs.
  9. ;; GNU Emacs is free software: you can redistribute it and/or modify
  10. ;; it under the terms of the GNU General Public License as published by
  11. ;; the Free Software Foundation, either version 3 of the License, or
  12. ;; (at your option) any later version.
  13. ;; GNU Emacs is distributed in the hope that it will be useful,
  14. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. ;; GNU General Public License for more details.
  17. ;; You should have received a copy of the GNU General Public License
  18. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  19. ;;; Commentary:
  20. ;;
  21. ;; Grammar compiler that produces Wisent's LALR automatons.
  22. ;;
  23. ;; Wisent (the European Bison ;-) is an Elisp implementation of the
  24. ;; GNU Compiler Compiler Bison. The Elisp code is a port of the C
  25. ;; code of GNU Bison 1.28 & 1.31.
  26. ;;
  27. ;; For more details on the basic concepts for understanding Wisent,
  28. ;; read the Bison manual ;)
  29. ;;
  30. ;; For more details on Wisent itself read the Wisent manual.
  31. ;;; History:
  32. ;;
  33. ;;; Code:
  34. (require 'semantic/wisent)
  35. ;;;; -------------------
  36. ;;;; Misc. useful things
  37. ;;;; -------------------
  38. ;; As much as possible I would like to keep the name of global
  39. ;; variables used in Bison without polluting too much the Elisp global
  40. ;; name space. Elisp dynamic binding allows that ;-)
  41. ;; Here are simple macros to easily define and use set of variables
  42. ;; bound locally, without all these "reference to free variable"
  43. ;; compiler warnings!
  44. (defmacro wisent-context-name (name)
  45. "Return the context name from NAME."
  46. `(if (and ,name (symbolp ,name))
  47. (intern (format "wisent-context-%s" ,name))
  48. (error "Invalid context name: %S" ,name)))
  49. (defmacro wisent-context-bindings (name)
  50. "Return the variables in context NAME."
  51. `(symbol-value (wisent-context-name ,name)))
  52. (defmacro wisent-defcontext (name &rest vars)
  53. "Define a context NAME that will bind variables VARS."
  54. (let* ((context (wisent-context-name name))
  55. (bindings (mapcar #'(lambda (v) (list 'defvar v)) vars)))
  56. `(eval-when-compile
  57. ,@bindings
  58. (defvar ,context ',vars))))
  59. (put 'wisent-defcontext 'lisp-indent-function 1)
  60. (defmacro wisent-with-context (name &rest body)
  61. "Bind variables in context NAME then eval BODY."
  62. `(let* ,(wisent-context-bindings name)
  63. ,@body))
  64. (put 'wisent-with-context 'lisp-indent-function 1)
  65. ;; A naive implementation of data structures! But it suffice here ;-)
  66. (defmacro wisent-struct (name &rest fields)
  67. "Define a simple data structure called NAME.
  68. Which contains data stored in FIELDS. FIELDS is a list of symbols
  69. which are field names or pairs (FIELD INITIAL-VALUE) where
  70. INITIAL-VALUE is a constant used as the initial value of FIELD when
  71. the data structure is created. INITIAL-VALUE defaults to nil.
  72. This defines a `make-NAME' constructor, get-able `NAME-FIELD' and
  73. set-able `set-NAME-FIELD' accessors."
  74. (let ((size (length fields))
  75. (i 0)
  76. accors field sufx fun ivals)
  77. (while (< i size)
  78. (setq field (car fields)
  79. fields (cdr fields))
  80. (if (consp field)
  81. (setq ivals (cons (cadr field) ivals)
  82. field (car field))
  83. (setq ivals (cons nil ivals)))
  84. (setq sufx (format "%s-%s" name field)
  85. fun (intern (format "%s" sufx))
  86. accors (cons `(defmacro ,fun (s)
  87. (list 'aref s ,i))
  88. accors)
  89. fun (intern (format "set-%s" sufx))
  90. accors (cons `(defmacro ,fun (s v)
  91. (list 'aset s ,i v))
  92. accors)
  93. i (1+ i)))
  94. `(progn
  95. (defmacro ,(intern (format "make-%s" name)) ()
  96. (cons 'vector ',(nreverse ivals)))
  97. ,@accors)))
  98. (put 'wisent-struct 'lisp-indent-function 1)
  99. ;; Other utilities
  100. (defsubst wisent-pad-string (s n &optional left)
  101. "Fill string S with spaces.
  102. Return a new string of at least N characters. Insert spaces on right.
  103. If optional LEFT is non-nil insert spaces on left."
  104. (let ((i (length s)))
  105. (if (< i n)
  106. (if left
  107. (concat (make-string (- n i) ?\ ) s)
  108. (concat s (make-string (- n i) ?\ )))
  109. s)))
  110. ;;;; ------------------------
  111. ;;;; Environment dependencies
  112. ;;;; ------------------------
  113. (defconst wisent-BITS-PER-WORD
  114. (let ((i 1))
  115. (while (not (zerop (lsh 1 i)))
  116. (setq i (1+ i)))
  117. i))
  118. (defsubst wisent-WORDSIZE (n)
  119. "(N + BITS-PER-WORD - 1) / BITS-PER-WORD."
  120. (/ (1- (+ n wisent-BITS-PER-WORD)) wisent-BITS-PER-WORD))
  121. (defsubst wisent-SETBIT (x i)
  122. "X[I/BITS-PER-WORD] |= 1 << (I % BITS-PER-WORD)."
  123. (let ((k (/ i wisent-BITS-PER-WORD)))
  124. (aset x k (logior (aref x k)
  125. (lsh 1 (% i wisent-BITS-PER-WORD))))))
  126. (defsubst wisent-RESETBIT (x i)
  127. "X[I/BITS-PER-WORD] &= ~(1 << (I % BITS-PER-WORD))."
  128. (let ((k (/ i wisent-BITS-PER-WORD)))
  129. (aset x k (logand (aref x k)
  130. (lognot (lsh 1 (% i wisent-BITS-PER-WORD)))))))
  131. (defsubst wisent-BITISSET (x i)
  132. "(X[I/BITS-PER-WORD] & (1 << (I % BITS-PER-WORD))) != 0."
  133. (not (zerop (logand (aref x (/ i wisent-BITS-PER-WORD))
  134. (lsh 1 (% i wisent-BITS-PER-WORD))))))
  135. (defsubst wisent-noninteractive ()
  136. "Return non-nil if running without interactive terminal."
  137. (if (featurep 'xemacs)
  138. (noninteractive)
  139. noninteractive))
  140. (defvar wisent-debug-flag nil
  141. "Non-nil means enable some debug stuff.")
  142. ;;;; --------------
  143. ;;;; Logging/Output
  144. ;;;; --------------
  145. (defconst wisent-log-buffer-name "*wisent-log*"
  146. "Name of the log buffer.")
  147. (defvar wisent-new-log-flag nil
  148. "Non-nil means to start a new report.")
  149. (defvar wisent-verbose-flag nil
  150. "*Non-nil means to report verbose information on generated parser.")
  151. (defun wisent-toggle-verbose-flag ()
  152. "Toggle whether to report verbose information on generated parser."
  153. (interactive)
  154. (setq wisent-verbose-flag (not wisent-verbose-flag))
  155. (when (called-interactively-p 'interactive)
  156. (message "Verbose report %sabled"
  157. (if wisent-verbose-flag "en" "dis"))))
  158. (defmacro wisent-log-buffer ()
  159. "Return the log buffer.
  160. Its name is defined in constant `wisent-log-buffer-name'."
  161. `(get-buffer-create wisent-log-buffer-name))
  162. (defmacro wisent-clear-log ()
  163. "Delete the entire contents of the log buffer."
  164. `(with-current-buffer (wisent-log-buffer)
  165. (erase-buffer)))
  166. (defvar byte-compile-current-file)
  167. (defun wisent-source ()
  168. "Return the current source file name or nil."
  169. (let ((source (or (and (boundp 'byte-compile-current-file)
  170. byte-compile-current-file)
  171. load-file-name (buffer-file-name))))
  172. (if source
  173. (file-relative-name source))))
  174. (defun wisent-new-log ()
  175. "Start a new entry into the log buffer."
  176. (setq wisent-new-log-flag nil)
  177. (let ((text (format "\n\n*** Wisent %s - %s\n\n"
  178. (or (wisent-source) (buffer-name))
  179. (format-time-string "%Y-%m-%d %R"))))
  180. (with-current-buffer (wisent-log-buffer)
  181. (goto-char (point-max))
  182. (insert text))))
  183. (defsubst wisent-log (&rest args)
  184. "Insert text into the log buffer.
  185. `format' is applied to ARGS and the result string is inserted into the
  186. log buffer returned by the function `wisent-log-buffer'."
  187. (and wisent-new-log-flag (wisent-new-log))
  188. (with-current-buffer (wisent-log-buffer)
  189. (insert (apply 'format args))))
  190. (defconst wisent-log-file "wisent.output"
  191. "The log file.
  192. Used when running without interactive terminal.")
  193. (defun wisent-append-to-log-file ()
  194. "Append contents of logging buffer to `wisent-log-file'."
  195. (if (get-buffer wisent-log-buffer-name)
  196. (condition-case err
  197. (with-current-buffer (wisent-log-buffer)
  198. (widen)
  199. (if (> (point-max) (point-min))
  200. (write-region (point-min) (point-max)
  201. wisent-log-file t)))
  202. (error
  203. (message "*** %s" (error-message-string err))))))
  204. ;;;; -----------------------------------
  205. ;;;; Representation of the grammar rules
  206. ;;;; -----------------------------------
  207. ;; ntokens is the number of tokens, and nvars is the number of
  208. ;; variables (nonterminals). nsyms is the total number, ntokens +
  209. ;; nvars.
  210. ;; Each symbol (either token or variable) receives a symbol number.
  211. ;; Numbers 0 to ntokens-1 are for tokens, and ntokens to nsyms-1 are
  212. ;; for variables. Symbol number zero is the end-of-input token. This
  213. ;; token is counted in ntokens.
  214. ;; The rules receive rule numbers 1 to nrules in the order they are
  215. ;; written. Actions and guards are accessed via the rule number.
  216. ;; The rules themselves are described by three arrays: rrhs, rlhs and
  217. ;; ritem. rlhs[R] is the symbol number of the left hand side of rule
  218. ;; R. The right hand side is stored as symbol numbers in a portion of
  219. ;; ritem. rrhs[R] contains the index in ritem of the beginning of the
  220. ;; portion for rule R.
  221. ;; The length of the portion is one greater than the number of symbols
  222. ;; in the rule's right hand side. The last element in the portion
  223. ;; contains minus R, which identifies it as the end of a portion and
  224. ;; says which rule it is for.
  225. ;; The portions of ritem come in order of increasing rule number and
  226. ;; are followed by an element which is nil to mark the end. nitems is
  227. ;; the total length of ritem, not counting the final nil. Each
  228. ;; element of ritem is called an "item" and its index in ritem is an
  229. ;; item number.
  230. ;; Item numbers are used in the finite state machine to represent
  231. ;; places that parsing can get to.
  232. ;; The vector rprec contains for each rule, the item number of the
  233. ;; symbol giving its precedence level to this rule. The precedence
  234. ;; level and associativity of each symbol is recorded in respectively
  235. ;; the properties 'wisent--prec and 'wisent--assoc.
  236. ;; Precedence levels are assigned in increasing order starting with 1
  237. ;; so that numerically higher precedence values mean tighter binding
  238. ;; as they ought to. nil as a symbol or rule's precedence means none
  239. ;; is assigned.
  240. (defcustom wisent-state-table-size 1009
  241. "The size of the state table."
  242. :type 'integer
  243. :group 'wisent)
  244. ;; These variables only exist locally in the function
  245. ;; `wisent-compile-grammar' and are shared by all other nested
  246. ;; callees.
  247. (wisent-defcontext compile-grammar
  248. F LA LAruleno accessing-symbol conflicts consistent default-prec
  249. derives err-table fderives final-state first-reduction first-shift
  250. first-state firsts from-state goto-map includes itemset nitemset
  251. kernel-base kernel-end kernel-items last-reduction last-shift
  252. last-state lookaheads lookaheadset lookback maxrhs ngotos nitems
  253. nrules nshifts nstates nsyms ntokens nullable nvars rassoc redset
  254. reduction-table ritem rlhs rprec rrc-count rrc-total rrhs ruseful
  255. rcode ruleset rulesetsize shift-symbol shift-table shiftset
  256. src-count src-total start-table state-table tags this-state to-state
  257. tokensetsize ;; nb of words req. to hold a bit for each rule
  258. varsetsize ;; nb of words req. to hold a bit for each variable
  259. error-token-number start-symbol token-list var-list
  260. N P V V1 nuseless-nonterminals nuseless-productions
  261. ptable ;; symbols & characters properties
  262. )
  263. (defmacro wisent-ISTOKEN (s)
  264. "Return non-nil if item number S defines a token (terminal).
  265. That is if S < `ntokens'."
  266. `(< ,s ntokens))
  267. (defmacro wisent-ISVAR(s)
  268. "Return non-nil if item number S defines a nonterminal.
  269. That is if S >= `ntokens'."
  270. `(>= ,s ntokens))
  271. (defsubst wisent-tag (s)
  272. "Return printable form of item number S."
  273. (wisent-item-to-string (aref tags s)))
  274. ;; Symbol and character properties
  275. (defsubst wisent-put (object propname value)
  276. "Store OBJECT's PROPNAME property with value VALUE.
  277. Use `eq' to locate OBJECT."
  278. (let ((entry (assq object ptable)))
  279. (or entry (setq entry (list object) ptable (cons entry ptable)))
  280. (setcdr entry (plist-put (cdr entry) propname value))))
  281. (defsubst wisent-get (object propname)
  282. "Return the value of OBJECT's PROPNAME property.
  283. Use `eq' to locate OBJECT."
  284. (plist-get (cdr (assq object ptable)) propname))
  285. (defsubst wisent-item-number (x)
  286. "Return the item number of symbol X."
  287. (wisent-get x 'wisent--item-no))
  288. (defsubst wisent-set-item-number (x n)
  289. "Set the item number of symbol X to N."
  290. (wisent-put x 'wisent--item-no n))
  291. (defsubst wisent-assoc (x)
  292. "Return the associativity of symbol X."
  293. (wisent-get x 'wisent--assoc))
  294. (defsubst wisent-set-assoc (x a)
  295. "Set the associativity of symbol X to A."
  296. (wisent-put x 'wisent--assoc a))
  297. (defsubst wisent-prec (x)
  298. "Return the precedence level of symbol X."
  299. (wisent-get x 'wisent--prec))
  300. (defsubst wisent-set-prec (x p)
  301. "Set the precedence level of symbol X to P."
  302. (wisent-put x 'wisent--prec p))
  303. ;;;; ----------------------------------------------------------
  304. ;;;; Type definitions for nondeterministic finite state machine
  305. ;;;; ----------------------------------------------------------
  306. ;; These type definitions are used to represent a nondeterministic
  307. ;; finite state machine that parses the specified grammar. This
  308. ;; information is generated by the function `wisent-generate-states'.
  309. ;; Each state of the machine is described by a set of items --
  310. ;; particular positions in particular rules -- that are the possible
  311. ;; places where parsing could continue when the machine is in this
  312. ;; state. These symbols at these items are the allowable inputs that
  313. ;; can follow now.
  314. ;; A core represents one state. States are numbered in the number
  315. ;; field. When `wisent-generate-states' is finished, the starting
  316. ;; state is state 0 and `nstates' is the number of states. (A
  317. ;; transition to a state whose state number is `nstates' indicates
  318. ;; termination.) All the cores are chained together and `first-state'
  319. ;; points to the first one (state 0).
  320. ;; For each state there is a particular symbol which must have been
  321. ;; the last thing accepted to reach that state. It is the
  322. ;; accessing-symbol of the core.
  323. ;; Each core contains a vector of `nitems' items which are the indices
  324. ;; in the `ritems' vector of the items that are selected in this
  325. ;; state.
  326. ;; The link field is used for chaining buckets that hash states by
  327. ;; their itemsets. This is for recognizing equivalent states and
  328. ;; combining them when the states are generated.
  329. ;; The two types of transitions are shifts (push the lookahead token
  330. ;; and read another) and reductions (combine the last n things on the
  331. ;; stack via a rule, replace them with the symbol that the rule
  332. ;; derives, and leave the lookahead token alone). When the states are
  333. ;; generated, these transitions are represented in two other lists.
  334. ;; Each shifts structure describes the possible shift transitions out
  335. ;; of one state, the state whose number is in the number field. The
  336. ;; shifts structures are linked through next and first-shift points to
  337. ;; them. Each contains a vector of numbers of the states that shift
  338. ;; transitions can go to. The accessing-symbol fields of those
  339. ;; states' cores say what kind of input leads to them.
  340. ;; A shift to state zero should be ignored. Conflict resolution
  341. ;; deletes shifts by changing them to zero.
  342. ;; Each reductions structure describes the possible reductions at the
  343. ;; state whose number is in the number field. The data is a list of
  344. ;; nreds rules, represented by their rule numbers. `first-reduction'
  345. ;; points to the list of these structures.
  346. ;; Conflict resolution can decide that certain tokens in certain
  347. ;; states should explicitly be errors (for implementing %nonassoc).
  348. ;; For each state, the tokens that are errors for this reason are
  349. ;; recorded in an errs structure, which has the state number in its
  350. ;; number field. The rest of the errs structure is full of token
  351. ;; numbers.
  352. ;; There is at least one shift transition present in state zero. It
  353. ;; leads to a next-to-final state whose accessing-symbol is the
  354. ;; grammar's start symbol. The next-to-final state has one shift to
  355. ;; the final state, whose accessing-symbol is zero (end of input).
  356. ;; The final state has one shift, which goes to the termination state
  357. ;; (whose number is `nstates'-1).
  358. ;; The reason for the extra state at the end is to placate the
  359. ;; parser's strategy of making all decisions one token ahead of its
  360. ;; actions.
  361. (wisent-struct core
  362. next ; -> core
  363. link ; -> core
  364. (number 0)
  365. (accessing-symbol 0)
  366. (nitems 0)
  367. (items [0]))
  368. (wisent-struct shifts
  369. next ; -> shifts
  370. (number 0)
  371. (nshifts 0)
  372. (shifts [0]))
  373. (wisent-struct reductions
  374. next ; -> reductions
  375. (number 0)
  376. (nreds 0)
  377. (rules [0]))
  378. (wisent-struct errs
  379. (nerrs 0)
  380. (errs [0]))
  381. ;;;; --------------------------------------------------------
  382. ;;;; Find unreachable terminals, nonterminals and productions
  383. ;;;; --------------------------------------------------------
  384. (defun wisent-bits-equal (L R n)
  385. "Visit L and R and return non-nil if their first N elements are `='.
  386. L and R must be vectors of integers."
  387. (let* ((i (1- n))
  388. (iseq t))
  389. (while (and iseq (natnump i))
  390. (setq iseq (= (aref L i) (aref R i))
  391. i (1- i)))
  392. iseq))
  393. (defun wisent-nbits (i)
  394. "Return number of bits set in integer I."
  395. (let ((count 0))
  396. (while (not (zerop i))
  397. ;; i ^= (i & ((unsigned) (-(int) i)))
  398. (setq i (logxor i (logand i (- i)))
  399. count (1+ count)))
  400. count))
  401. (defun wisent-bits-size (S n)
  402. "In vector S count the total of bits set in first N elements.
  403. S must be a vector of integers."
  404. (let* ((i (1- n))
  405. (count 0))
  406. (while (natnump i)
  407. (setq count (+ count (wisent-nbits (aref S i)))
  408. i (1- i)))
  409. count))
  410. (defun wisent-useful-production (i N0)
  411. "Return non-nil if production I is in useful set N0."
  412. (let* ((useful t)
  413. (r (aref rrhs i))
  414. n)
  415. (while (and useful (> (setq n (aref ritem r)) 0))
  416. (if (wisent-ISVAR n)
  417. (setq useful (wisent-BITISSET N0 (- n ntokens))))
  418. (setq r (1+ r)))
  419. useful))
  420. (defun wisent-useless-nonterminals ()
  421. "Find out which nonterminals are used."
  422. (let (Np Ns i n break)
  423. ;; N is set as built. Np is set being built this iteration. P is
  424. ;; set of all productions which have a RHS all in N.
  425. (setq n (wisent-WORDSIZE nvars)
  426. Np (make-vector n 0))
  427. ;; The set being computed is a set of nonterminals which can
  428. ;; derive the empty string or strings consisting of all
  429. ;; terminals. At each iteration a nonterminal is added to the set
  430. ;; if there is a production with that nonterminal as its LHS for
  431. ;; which all the nonterminals in its RHS are already in the set.
  432. ;; Iterate until the set being computed remains unchanged. Any
  433. ;; nonterminals not in the set at that point are useless in that
  434. ;; they will never be used in deriving a sentence of the language.
  435. ;; This iteration doesn't use any special traversal over the
  436. ;; productions. A set is kept of all productions for which all
  437. ;; the nonterminals in the RHS are in useful. Only productions
  438. ;; not in this set are scanned on each iteration. At the end,
  439. ;; this set is saved to be used when finding useful productions:
  440. ;; only productions in this set will appear in the final grammar.
  441. (while (not break)
  442. (setq i (1- n))
  443. (while (natnump i)
  444. ;; Np[i] = N[i]
  445. (aset Np i (aref N i))
  446. (setq i (1- i)))
  447. (setq i 1)
  448. (while (<= i nrules)
  449. (if (not (wisent-BITISSET P i))
  450. (when (wisent-useful-production i N)
  451. (wisent-SETBIT Np (- (aref rlhs i) ntokens))
  452. (wisent-SETBIT P i)))
  453. (setq i (1+ i)))
  454. (if (wisent-bits-equal N Np n)
  455. (setq break t)
  456. (setq Ns Np
  457. Np N
  458. N Ns)))
  459. (setq N Np)))
  460. (defun wisent-inaccessable-symbols ()
  461. "Find out which productions are reachable and which symbols are used."
  462. ;; Starting with an empty set of productions and a set of symbols
  463. ;; which only has the start symbol in it, iterate over all
  464. ;; productions until the set of productions remains unchanged for an
  465. ;; iteration. For each production which has a LHS in the set of
  466. ;; reachable symbols, add the production to the set of reachable
  467. ;; productions, and add all of the nonterminals in the RHS of the
  468. ;; production to the set of reachable symbols.
  469. ;; Consider only the (partially) reduced grammar which has only
  470. ;; nonterminals in N and productions in P.
  471. ;; The result is the set P of productions in the reduced grammar,
  472. ;; and the set V of symbols in the reduced grammar.
  473. ;; Although this algorithm also computes the set of terminals which
  474. ;; are reachable, no terminal will be deleted from the grammar. Some
  475. ;; terminals might not be in the grammar but might be generated by
  476. ;; semantic routines, and so the user might want them available with
  477. ;; specified numbers. (Is this true?) However, the non reachable
  478. ;; terminals are printed (if running in verbose mode) so that the
  479. ;; user can know.
  480. (let (Vp Vs Pp i tt r n m break)
  481. (setq n (wisent-WORDSIZE nsyms)
  482. m (wisent-WORDSIZE (1+ nrules))
  483. Vp (make-vector n 0)
  484. Pp (make-vector m 0))
  485. ;; If the start symbol isn't useful, then nothing will be useful.
  486. (when (wisent-BITISSET N (- start-symbol ntokens))
  487. (wisent-SETBIT V start-symbol)
  488. (while (not break)
  489. (setq i (1- n))
  490. (while (natnump i)
  491. (aset Vp i (aref V i))
  492. (setq i (1- i)))
  493. (setq i 1)
  494. (while (<= i nrules)
  495. (when (and (not (wisent-BITISSET Pp i))
  496. (wisent-BITISSET P i)
  497. (wisent-BITISSET V (aref rlhs i)))
  498. (setq r (aref rrhs i))
  499. (while (natnump (setq tt (aref ritem r)))
  500. (if (or (wisent-ISTOKEN tt)
  501. (wisent-BITISSET N (- tt ntokens)))
  502. (wisent-SETBIT Vp tt))
  503. (setq r (1+ r)))
  504. (wisent-SETBIT Pp i))
  505. (setq i (1+ i)))
  506. (if (wisent-bits-equal V Vp n)
  507. (setq break t)
  508. (setq Vs Vp
  509. Vp V
  510. V Vs))))
  511. (setq V Vp)
  512. ;; Tokens 0, 1 are internal to Wisent. Consider them useful.
  513. (wisent-SETBIT V 0) ;; end-of-input token
  514. (wisent-SETBIT V 1) ;; error token
  515. (setq P Pp)
  516. (setq nuseless-productions (- nrules (wisent-bits-size P m))
  517. nuseless-nonterminals nvars
  518. i ntokens)
  519. (while (< i nsyms)
  520. (if (wisent-BITISSET V i)
  521. (setq nuseless-nonterminals (1- nuseless-nonterminals)))
  522. (setq i (1+ i)))
  523. ;; A token that was used in %prec should not be warned about.
  524. (setq i 1)
  525. (while (<= i nrules)
  526. (if (aref rprec i)
  527. (wisent-SETBIT V1 (aref rprec i)))
  528. (setq i (1+ i)))
  529. ))
  530. (defun wisent-reduce-grammar-tables ()
  531. "Disable useless productions."
  532. (if (> nuseless-productions 0)
  533. (let ((pn 1))
  534. (while (<= pn nrules)
  535. (aset ruseful pn (wisent-BITISSET P pn))
  536. (setq pn (1+ pn))))))
  537. (defun wisent-nonterminals-reduce ()
  538. "Remove useless nonterminals."
  539. (let (i n r item nontermmap tags-sorted)
  540. ;; Map the nonterminals to their new index: useful first, useless
  541. ;; afterwards. Kept for later report.
  542. (setq nontermmap (make-vector nvars 0)
  543. n ntokens
  544. i ntokens)
  545. (while (< i nsyms)
  546. (when (wisent-BITISSET V i)
  547. (aset nontermmap (- i ntokens) n)
  548. (setq n (1+ n)))
  549. (setq i (1+ i)))
  550. (setq i ntokens)
  551. (while (< i nsyms)
  552. (unless (wisent-BITISSET V i)
  553. (aset nontermmap (- i ntokens) n)
  554. (setq n (1+ n)))
  555. (setq i (1+ i)))
  556. ;; Shuffle elements of tables indexed by symbol number
  557. (setq tags-sorted (make-vector nvars nil)
  558. i ntokens)
  559. (while (< i nsyms)
  560. (setq n (aref nontermmap (- i ntokens)))
  561. (aset tags-sorted (- n ntokens) (aref tags i))
  562. (setq i (1+ i)))
  563. (setq i ntokens)
  564. (while (< i nsyms)
  565. (aset tags i (aref tags-sorted (- i ntokens)))
  566. (setq i (1+ i)))
  567. ;; Replace all symbol numbers in valid data structures.
  568. (setq i 1)
  569. (while (<= i nrules)
  570. (aset rlhs i (aref nontermmap (- (aref rlhs i) ntokens)))
  571. (setq i (1+ i)))
  572. (setq r 0)
  573. (while (setq item (aref ritem r))
  574. (if (wisent-ISVAR item)
  575. (aset ritem r (aref nontermmap (- item ntokens))))
  576. (setq r (1+ r)))
  577. (setq start-symbol (aref nontermmap (- start-symbol ntokens))
  578. nsyms (- nsyms nuseless-nonterminals)
  579. nvars (- nvars nuseless-nonterminals))
  580. ))
  581. (defun wisent-total-useless ()
  582. "Report number of useless nonterminals and productions."
  583. (let* ((src (wisent-source))
  584. (src (if src (concat " in " src) ""))
  585. (msg (format "Grammar%s contains" src)))
  586. (if (> nuseless-nonterminals 0)
  587. (setq msg (format "%s %d useless nonterminal%s"
  588. msg nuseless-nonterminals
  589. (if (> nuseless-nonterminals 0) "s" ""))))
  590. (if (and (> nuseless-nonterminals 0) (> nuseless-productions 0))
  591. (setq msg (format "%s and" msg)))
  592. (if (> nuseless-productions 0)
  593. (setq msg (format "%s %d useless rule%s"
  594. msg nuseless-productions
  595. (if (> nuseless-productions 0) "s" ""))))
  596. (message msg)))
  597. (defun wisent-reduce-grammar ()
  598. "Find unreachable terminals, nonterminals and productions."
  599. ;; Allocate the global sets used to compute the reduced grammar
  600. (setq N (make-vector (wisent-WORDSIZE nvars) 0)
  601. P (make-vector (wisent-WORDSIZE (1+ nrules)) 0)
  602. V (make-vector (wisent-WORDSIZE nsyms) 0)
  603. V1 (make-vector (wisent-WORDSIZE nsyms) 0)
  604. nuseless-nonterminals 0
  605. nuseless-productions 0)
  606. (wisent-useless-nonterminals)
  607. (wisent-inaccessable-symbols)
  608. (when (> (+ nuseless-nonterminals nuseless-productions) 0)
  609. (wisent-total-useless)
  610. (or (wisent-BITISSET N (- start-symbol ntokens))
  611. (error "Start symbol `%s' does not derive any sentence"
  612. (wisent-tag start-symbol)))
  613. (wisent-reduce-grammar-tables)
  614. (if (> nuseless-nonterminals 0)
  615. (wisent-nonterminals-reduce))))
  616. (defun wisent-print-useless ()
  617. "Output the detailed results of the reductions."
  618. (let (i b r)
  619. (when (> nuseless-nonterminals 0)
  620. ;; Useless nonterminals have been moved after useful ones.
  621. (wisent-log "\n\nUseless nonterminals:\n\n")
  622. (setq i 0)
  623. (while (< i nuseless-nonterminals)
  624. (wisent-log " %s\n" (wisent-tag (+ nsyms i)))
  625. (setq i (1+ i))))
  626. (setq b nil
  627. i 0)
  628. (while (< i ntokens)
  629. (unless (or (wisent-BITISSET V i) (wisent-BITISSET V1 i))
  630. (or b
  631. (wisent-log "\n\nTerminals which are not used:\n\n"))
  632. (setq b t)
  633. (wisent-log " %s\n" (wisent-tag i)))
  634. (setq i (1+ i)))
  635. (when (> nuseless-productions 0)
  636. (wisent-log "\n\nUseless rules:\n\n")
  637. (setq i 1)
  638. (while (<= i nrules)
  639. (unless (aref ruseful i)
  640. (wisent-log "#%s " (wisent-pad-string (format "%d" i) 4))
  641. (wisent-log "%s:" (wisent-tag (aref rlhs i)))
  642. (setq r (aref rrhs i))
  643. (while (natnump (aref ritem r))
  644. (wisent-log " %s" (wisent-tag (aref ritem r)))
  645. (setq r (1+ r)))
  646. (wisent-log ";\n"))
  647. (setq i (1+ i))))
  648. (if (or b (> nuseless-nonterminals 0) (> nuseless-productions 0))
  649. (wisent-log "\n\n"))
  650. ))
  651. ;;;; -----------------------------
  652. ;;;; Match rules with nonterminals
  653. ;;;; -----------------------------
  654. (defun wisent-set-derives ()
  655. "Find, for each variable (nonterminal), which rules can derive it.
  656. It sets up the value of DERIVES so that DERIVES[i - NTOKENS] points to
  657. a list of rule numbers, terminated with -1."
  658. (let (i lhs p q dset delts)
  659. (setq dset (make-vector nvars nil)
  660. delts (make-vector (1+ nrules) 0))
  661. (setq p 0 ;; p = delts
  662. i nrules)
  663. (while (> i 0)
  664. (when (aref ruseful i)
  665. (setq lhs (aref rlhs i))
  666. ;; p->next = dset[lhs];
  667. ;; p->value = i;
  668. (aset delts p (cons i (aref dset (- lhs ntokens)))) ;; (value . next)
  669. (aset dset (- lhs ntokens) p) ;; dset[lhs] = p
  670. (setq p (1+ p)) ;; p++
  671. )
  672. (setq i (1- i)))
  673. (setq derives (make-vector nvars nil)
  674. i ntokens)
  675. (while (< i nsyms)
  676. (setq q nil
  677. p (aref dset (- i ntokens))) ;; p = dset[i]
  678. (while p
  679. (setq p (aref delts p)
  680. q (cons (car p) q) ;;q++ = p->value
  681. p (cdr p))) ;; p = p->next
  682. (setq q (nreverse (cons -1 q))) ;; *q++ = -1
  683. (aset derives (- i ntokens) q) ;; derives[i] = q
  684. (setq i (1+ i)))
  685. ))
  686. ;;;; --------------------------------------------------------
  687. ;;;; Find which nonterminals can expand into the null string.
  688. ;;;; --------------------------------------------------------
  689. (defun wisent-print-nullable ()
  690. "Print NULLABLE."
  691. (let (i)
  692. (wisent-log "NULLABLE\n")
  693. (setq i ntokens)
  694. (while (< i nsyms)
  695. (wisent-log "\t%s: %s\n" (wisent-tag i)
  696. (if (aref nullable (- i ntokens))
  697. "yes" : "no"))
  698. (setq i (1+ i)))
  699. (wisent-log "\n\n")))
  700. (defun wisent-set-nullable ()
  701. "Set up NULLABLE.
  702. A vector saying which nonterminals can expand into the null string.
  703. NULLABLE[i - NTOKENS] is nil if symbol I can do so."
  704. (let (ruleno s1 s2 p r squeue rcount rsets relts item any-tokens)
  705. (setq squeue (make-vector nvars 0)
  706. rcount (make-vector (1+ nrules) 0)
  707. rsets (make-vector nvars nil) ;; - ntokens
  708. relts (make-vector (+ nitems nvars 1) nil)
  709. nullable (make-vector nvars nil)) ;; - ntokens
  710. (setq s1 0 s2 0 ;; s1 = s2 = squeue
  711. p 0 ;; p = relts
  712. ruleno 1)
  713. (while (<= ruleno nrules)
  714. (when (aref ruseful ruleno)
  715. (if (> (aref ritem (aref rrhs ruleno)) 0)
  716. (progn
  717. ;; This rule has a non empty RHS.
  718. (setq any-tokens nil
  719. r (aref rrhs ruleno))
  720. (while (> (aref ritem r) 0)
  721. (if (wisent-ISTOKEN (aref ritem r))
  722. (setq any-tokens t))
  723. (setq r (1+ r)))
  724. ;; This rule has only nonterminals: schedule it for the
  725. ;; second pass.
  726. (unless any-tokens
  727. (setq r (aref rrhs ruleno))
  728. (while (> (setq item (aref ritem r)) 0)
  729. (aset rcount ruleno (1+ (aref rcount ruleno)))
  730. ;; p->next = rsets[item];
  731. ;; p->value = ruleno;
  732. (aset relts p (cons ruleno (aref rsets (- item ntokens))))
  733. ;; rsets[item] = p;
  734. (aset rsets (- item ntokens) p)
  735. (setq p (1+ p)
  736. r (1+ r)))))
  737. ;; This rule has an empty RHS.
  738. ;; assert (ritem[rrhs[ruleno]] == -ruleno)
  739. (when (and (aref ruseful ruleno)
  740. (setq item (aref rlhs ruleno))
  741. (not (aref nullable (- item ntokens))))
  742. (aset nullable (- item ntokens) t)
  743. (aset squeue s2 item)
  744. (setq s2 (1+ s2)))
  745. )
  746. )
  747. (setq ruleno (1+ ruleno)))
  748. (while (< s1 s2)
  749. ;; p = rsets[*s1++]
  750. (setq p (aref rsets (- (aref squeue s1) ntokens))
  751. s1 (1+ s1))
  752. (while p
  753. (setq p (aref relts p)
  754. ruleno (car p)
  755. p (cdr p)) ;; p = p->next
  756. ;; if (--rcount[ruleno] == 0)
  757. (when (zerop (aset rcount ruleno (1- (aref rcount ruleno))))
  758. (setq item (aref rlhs ruleno))
  759. (aset nullable (- item ntokens) t)
  760. (aset squeue s2 item)
  761. (setq s2 (1+ s2)))))
  762. (if wisent-debug-flag
  763. (wisent-print-nullable))
  764. ))
  765. ;;;; -----------
  766. ;;;; Subroutines
  767. ;;;; -----------
  768. (defun wisent-print-fderives ()
  769. "Print FDERIVES."
  770. (let (i j rp)
  771. (wisent-log "\n\n\nFDERIVES\n")
  772. (setq i ntokens)
  773. (while (< i nsyms)
  774. (wisent-log "\n\n%s derives\n\n" (wisent-tag i))
  775. (setq rp (aref fderives (- i ntokens))
  776. j 0)
  777. (while (<= j nrules)
  778. (if (wisent-BITISSET rp j)
  779. (wisent-log " %d\n" j))
  780. (setq j (1+ j)))
  781. (setq i (1+ i)))))
  782. (defun wisent-set-fderives ()
  783. "Set up FDERIVES.
  784. An NVARS by NRULES matrix of bits indicating which rules can help
  785. derive the beginning of the data for each nonterminal. For example,
  786. if symbol 5 can be derived as the sequence of symbols 8 3 20, and one
  787. of the rules for deriving symbol 8 is rule 4, then the
  788. \[5 - NTOKENS, 4] bit in FDERIVES is set."
  789. (let (i j k)
  790. (setq fderives (make-vector nvars nil))
  791. (setq i 0)
  792. (while (< i nvars)
  793. (aset fderives i (make-vector rulesetsize 0))
  794. (setq i (1+ i)))
  795. (wisent-set-firsts)
  796. (setq i ntokens)
  797. (while (< i nsyms)
  798. (setq j ntokens)
  799. (while (< j nsyms)
  800. ;; if (BITISSET (FIRSTS (i), j - ntokens))
  801. (when (wisent-BITISSET (aref firsts (- i ntokens)) (- j ntokens))
  802. (setq k (aref derives (- j ntokens)))
  803. (while (> (car k) 0) ;; derives[j][k] > 0
  804. ;; SETBIT (FDERIVES (i), derives[j][k]);
  805. (wisent-SETBIT (aref fderives (- i ntokens)) (car k))
  806. (setq k (cdr k))))
  807. (setq j (1+ j)))
  808. (setq i (1+ i)))
  809. (if wisent-debug-flag
  810. (wisent-print-fderives))
  811. ))
  812. (defun wisent-print-firsts ()
  813. "Print FIRSTS."
  814. (let (i j v)
  815. (wisent-log "\n\n\nFIRSTS\n\n")
  816. (setq i ntokens)
  817. (while (< i nsyms)
  818. (wisent-log "\n\n%s firsts\n\n" (wisent-tag i))
  819. (setq v (aref firsts (- i ntokens))
  820. j 0)
  821. (while (< j nvars)
  822. (if (wisent-BITISSET v j)
  823. (wisent-log "\t\t%d (%s)\n"
  824. (+ j ntokens) (wisent-tag (+ j ntokens))))
  825. (setq j (1+ j)))
  826. (setq i (1+ i)))))
  827. (defun wisent-TC (R n)
  828. "Transitive closure.
  829. Given R an N by N matrix of bits, modify its contents to be the
  830. transitive closure of what was given."
  831. (let (i j k)
  832. ;; R (J, I) && R (I, K) => R (J, K).
  833. ;; I *must* be the outer loop.
  834. (setq i 0)
  835. (while (< i n)
  836. (setq j 0)
  837. (while (< j n)
  838. (when (wisent-BITISSET (aref R j) i)
  839. (setq k 0)
  840. (while (< k n)
  841. (if (wisent-BITISSET (aref R i) k)
  842. (wisent-SETBIT (aref R j) k))
  843. (setq k (1+ k))))
  844. (setq j (1+ j)))
  845. (setq i (1+ i)))))
  846. (defun wisent-RTC (R n)
  847. "Reflexive Transitive Closure.
  848. Same as `wisent-TC' and then set all the bits on the diagonal of R, an
  849. N by N matrix of bits."
  850. (let (i)
  851. (wisent-TC R n)
  852. (setq i 0)
  853. (while (< i n)
  854. (wisent-SETBIT (aref R i) i)
  855. (setq i (1+ i)))))
  856. (defun wisent-set-firsts ()
  857. "Set up FIRSTS.
  858. An NVARS by NVARS bit matrix indicating which items can represent the
  859. beginning of the input corresponding to which other items. For
  860. example, if some rule expands symbol 5 into the sequence of symbols 8
  861. 3 20, the symbol 8 can be the beginning of the data for symbol 5, so
  862. the bit [8 - NTOKENS, 5 - NTOKENS] in FIRSTS is set."
  863. (let (row symbol sp rowsize i)
  864. (setq rowsize (wisent-WORDSIZE nvars)
  865. varsetsize rowsize
  866. firsts (make-vector nvars nil)
  867. i 0)
  868. (while (< i nvars)
  869. (aset firsts i (make-vector rowsize 0))
  870. (setq i (1+ i)))
  871. (setq row 0 ;; row = firsts
  872. i ntokens)
  873. (while (< i nsyms)
  874. (setq sp (aref derives (- i ntokens)))
  875. (while (>= (car sp) 0)
  876. (setq symbol (aref ritem (aref rrhs (car sp)))
  877. sp (cdr sp))
  878. (when (wisent-ISVAR symbol)
  879. (setq symbol (- symbol ntokens))
  880. (wisent-SETBIT (aref firsts row) symbol)
  881. ))
  882. (setq row (1+ row)
  883. i (1+ i)))
  884. (wisent-RTC firsts nvars)
  885. (if wisent-debug-flag
  886. (wisent-print-firsts))
  887. ))
  888. (defun wisent-initialize-closure (n)
  889. "Allocate the ITEMSET and RULESET vectors.
  890. And precompute useful data so that `wisent-closure' can be called.
  891. N is the number of elements to allocate for ITEMSET."
  892. (setq itemset (make-vector n 0)
  893. rulesetsize (wisent-WORDSIZE (1+ nrules))
  894. ruleset (make-vector rulesetsize 0))
  895. (wisent-set-fderives))
  896. (defun wisent-print-closure ()
  897. "Print ITEMSET."
  898. (let (i)
  899. (wisent-log "\n\nclosure n = %d\n\n" nitemset)
  900. (setq i 0) ;; isp = itemset
  901. (while (< i nitemset)
  902. (wisent-log " %d\n" (aref itemset i))
  903. (setq i (1+ i)))))
  904. (defun wisent-closure (core n)
  905. "Set up RULESET and ITEMSET for the transitions out of CORE state.
  906. Given a vector of item numbers items, of length N, set up RULESET and
  907. ITEMSET to indicate what rules could be run and which items could be
  908. accepted when those items are the active ones.
  909. RULESET contains a bit for each rule. `wisent-closure' sets the bits
  910. for all rules which could potentially describe the next input to be
  911. read.
  912. ITEMSET is a vector of item numbers; NITEMSET is the number of items
  913. in ITEMSET. `wisent-closure' places there the indices of all items
  914. which represent units of input that could arrive next."
  915. (let (c r v symbol ruleno itemno)
  916. (if (zerop n)
  917. (progn
  918. (setq r 0
  919. v (aref fderives (- start-symbol ntokens)))
  920. (while (< r rulesetsize)
  921. ;; ruleset[r] = FDERIVES (start-symbol)[r];
  922. (aset ruleset r (aref v r))
  923. (setq r (1+ r)))
  924. )
  925. (fillarray ruleset 0)
  926. (setq c 0)
  927. (while (< c n)
  928. (setq symbol (aref ritem (aref core c)))
  929. (when (wisent-ISVAR symbol)
  930. (setq r 0
  931. v (aref fderives (- symbol ntokens)))
  932. (while (< r rulesetsize)
  933. ;; ruleset[r] |= FDERIVES (ritem[core[c]])[r];
  934. (aset ruleset r (logior (aref ruleset r) (aref v r)))
  935. (setq r (1+ r))))
  936. (setq c (1+ c)))
  937. )
  938. (setq nitemset 0
  939. c 0
  940. ruleno 0
  941. r (* rulesetsize wisent-BITS-PER-WORD))
  942. (while (< ruleno r)
  943. (when (wisent-BITISSET ruleset ruleno)
  944. (setq itemno (aref rrhs ruleno))
  945. (while (and (< c n) (< (aref core c) itemno))
  946. (aset itemset nitemset (aref core c))
  947. (setq nitemset (1+ nitemset)
  948. c (1+ c)))
  949. (aset itemset nitemset itemno)
  950. (setq nitemset (1+ nitemset)))
  951. (setq ruleno (1+ ruleno)))
  952. (while (< c n)
  953. (aset itemset nitemset (aref core c))
  954. (setq nitemset (1+ nitemset)
  955. c (1+ c)))
  956. (if wisent-debug-flag
  957. (wisent-print-closure))
  958. ))
  959. ;;;; --------------------------------------------------
  960. ;;;; Generate the nondeterministic finite state machine
  961. ;;;; --------------------------------------------------
  962. (defun wisent-allocate-itemsets ()
  963. "Allocate storage for itemsets."
  964. (let (symbol i count symbol-count)
  965. ;; Count the number of occurrences of all the symbols in RITEMS.
  966. ;; Note that useless productions (hence useless nonterminals) are
  967. ;; browsed too, hence we need to allocate room for _all_ the
  968. ;; symbols.
  969. (setq count 0
  970. symbol-count (make-vector (+ nsyms nuseless-nonterminals) 0)
  971. i 0)
  972. (while (setq symbol (aref ritem i))
  973. (when (> symbol 0)
  974. (setq count (1+ count))
  975. (aset symbol-count symbol (1+ (aref symbol-count symbol))))
  976. (setq i (1+ i)))
  977. ;; See comments before `wisent-new-itemsets'. All the vectors of
  978. ;; items live inside kernel-items. The number of active items
  979. ;; after some symbol cannot be more than the number of times that
  980. ;; symbol appears as an item, which is symbol-count[symbol]. We
  981. ;; allocate that much space for each symbol.
  982. (setq kernel-base (make-vector nsyms nil)
  983. kernel-items (make-vector count 0)
  984. count 0
  985. i 0)
  986. (while (< i nsyms)
  987. (aset kernel-base i count)
  988. (setq count (+ count (aref symbol-count i))
  989. i (1+ i)))
  990. (setq shift-symbol symbol-count
  991. kernel-end (make-vector nsyms nil))
  992. ))
  993. (defun wisent-allocate-storage ()
  994. "Allocate storage for the state machine."
  995. (wisent-allocate-itemsets)
  996. (setq shiftset (make-vector nsyms 0)
  997. redset (make-vector (1+ nrules) 0)
  998. state-table (make-vector wisent-state-table-size nil)))
  999. (defun wisent-new-itemsets ()
  1000. "Find which symbols can be shifted in the current state.
  1001. And for each one record which items would be active after that shift.
  1002. Uses the contents of ITEMSET. SHIFT-SYMBOL is set to a vector of the
  1003. symbols that can be shifted. For each symbol in the grammar,
  1004. KERNEL-BASE[symbol] points to a vector of item numbers activated if
  1005. that symbol is shifted, and KERNEL-END[symbol] points after the end of
  1006. that vector."
  1007. (let (i shiftcount isp ksp symbol)
  1008. (fillarray kernel-end nil)
  1009. (setq shiftcount 0
  1010. isp 0)
  1011. (while (< isp nitemset)
  1012. (setq i (aref itemset isp)
  1013. isp (1+ isp)
  1014. symbol (aref ritem i))
  1015. (when (> symbol 0)
  1016. (setq ksp (aref kernel-end symbol))
  1017. (when (not ksp)
  1018. ;; shift-symbol[shiftcount++] = symbol;
  1019. (aset shift-symbol shiftcount symbol)
  1020. (setq shiftcount (1+ shiftcount)
  1021. ksp (aref kernel-base symbol)))
  1022. ;; *ksp++ = i + 1;
  1023. (aset kernel-items ksp (1+ i))
  1024. (setq ksp (1+ ksp))
  1025. (aset kernel-end symbol ksp)))
  1026. (setq nshifts shiftcount)))
  1027. (defun wisent-new-state (symbol)
  1028. "Create a new state for those items, if necessary.
  1029. SYMBOL is the core accessing-symbol.
  1030. Subroutine of `wisent-get-state'."
  1031. (let (n p isp1 isp2 iend items)
  1032. (setq isp1 (aref kernel-base symbol)
  1033. iend (aref kernel-end symbol)
  1034. n (- iend isp1)
  1035. p (make-core)
  1036. items (make-vector n 0))
  1037. (set-core-accessing-symbol p symbol)
  1038. (set-core-number p nstates)
  1039. (set-core-nitems p n)
  1040. (set-core-items p items)
  1041. (setq isp2 0) ;; isp2 = p->items
  1042. (while (< isp1 iend)
  1043. ;; *isp2++ = *isp1++;
  1044. (aset items isp2 (aref kernel-items isp1))
  1045. (setq isp1 (1+ isp1)
  1046. isp2 (1+ isp2)))
  1047. (set-core-next last-state p)
  1048. (setq last-state p
  1049. nstates (1+ nstates))
  1050. p))
  1051. (defun wisent-get-state (symbol)
  1052. "Find the state we would get to by shifting SYMBOL.
  1053. Return the state number for the state we would get to (from the
  1054. current state) by shifting SYMBOL. Create a new state if no
  1055. equivalent one exists already. Used by `wisent-append-states'."
  1056. (let (key isp1 isp2 iend sp sp2 found n)
  1057. (setq isp1 (aref kernel-base symbol)
  1058. iend (aref kernel-end symbol)
  1059. n (- iend isp1)
  1060. key 0)
  1061. ;; Add up the target state's active item numbers to get a hash key
  1062. (while (< isp1 iend)
  1063. (setq key (+ key (aref kernel-items isp1))
  1064. isp1 (1+ isp1)))
  1065. (setq key (% key wisent-state-table-size)
  1066. sp (aref state-table key))
  1067. (if sp
  1068. (progn
  1069. (setq found nil)
  1070. (while (not found)
  1071. (when (= (core-nitems sp) n)
  1072. (setq found t
  1073. isp1 (aref kernel-base symbol)
  1074. ;; isp2 = sp->items;
  1075. sp2 (core-items sp)
  1076. isp2 0)
  1077. (while (and found (< isp1 iend))
  1078. ;; if (*isp1++ != *isp2++)
  1079. (if (not (= (aref kernel-items isp1)
  1080. (aref sp2 isp2)))
  1081. (setq found nil))
  1082. (setq isp1 (1+ isp1)
  1083. isp2 (1+ isp2))))
  1084. (if (not found)
  1085. (if (core-link sp)
  1086. (setq sp (core-link sp))
  1087. ;; sp = sp->link = new-state(symbol)
  1088. (setq sp (set-core-link sp (wisent-new-state symbol))
  1089. found t)))))
  1090. ;; bucket is empty
  1091. ;; state-table[key] = sp = new-state(symbol)
  1092. (setq sp (wisent-new-state symbol))
  1093. (aset state-table key sp))
  1094. ;; return (sp->number);
  1095. (core-number sp)))
  1096. (defun wisent-append-states ()
  1097. "Find or create the core structures for states.
  1098. Use the information computed by `wisent-new-itemsets' to find the
  1099. state numbers reached by each shift transition from the current state.
  1100. SHIFTSET is set up as a vector of state numbers of those states."
  1101. (let (i j symbol)
  1102. ;; First sort shift-symbol into increasing order
  1103. (setq i 1)
  1104. (while (< i nshifts)
  1105. (setq symbol (aref shift-symbol i)
  1106. j i)
  1107. (while (and (> j 0) (> (aref shift-symbol (1- j)) symbol))
  1108. (aset shift-symbol j (aref shift-symbol (1- j)))
  1109. (setq j (1- j)))
  1110. (aset shift-symbol j symbol)
  1111. (setq i (1+ i)))
  1112. (setq i 0)
  1113. (while (< i nshifts)
  1114. (setq symbol (aref shift-symbol i))
  1115. (aset shiftset i (wisent-get-state symbol))
  1116. (setq i (1+ i)))
  1117. ))
  1118. (defun wisent-initialize-states ()
  1119. "Initialize states."
  1120. (let ((p (make-core)))
  1121. (setq first-state p
  1122. last-state p
  1123. this-state p
  1124. nstates 1)))
  1125. (defun wisent-save-shifts ()
  1126. "Save the NSHIFTS of SHIFTSET into the current linked list."
  1127. (let (p i shifts)
  1128. (setq p (make-shifts)
  1129. shifts (make-vector nshifts 0)
  1130. i 0)
  1131. (set-shifts-number p (core-number this-state))
  1132. (set-shifts-nshifts p nshifts)
  1133. (set-shifts-shifts p shifts)
  1134. (while (< i nshifts)
  1135. ;; (p->shifts)[i] = shiftset[i];
  1136. (aset shifts i (aref shiftset i))
  1137. (setq i (1+ i)))
  1138. (if last-shift
  1139. (set-shifts-next last-shift p)
  1140. (setq first-shift p))
  1141. (setq last-shift p)))
  1142. (defun wisent-insert-start-shift ()
  1143. "Create the next-to-final state.
  1144. That is the state to which a shift has already been made in the
  1145. initial state. Subroutine of `wisent-augment-automaton'."
  1146. (let (statep sp)
  1147. (setq statep (make-core))
  1148. (set-core-number statep nstates)
  1149. (set-core-accessing-symbol statep start-symbol)
  1150. (set-core-next last-state statep)
  1151. (setq last-state statep)
  1152. ;; Make a shift from this state to (what will be) the final state.
  1153. (setq sp (make-shifts))
  1154. (set-shifts-number sp nstates)
  1155. (setq nstates (1+ nstates))
  1156. (set-shifts-nshifts sp 1)
  1157. (set-shifts-shifts sp (vector nstates))
  1158. (set-shifts-next last-shift sp)
  1159. (setq last-shift sp)))
  1160. (defun wisent-augment-automaton ()
  1161. "Set up initial and final states as parser wants them.
  1162. Make sure that the initial state has a shift that accepts the
  1163. grammar's start symbol and goes to the next-to-final state, which has
  1164. a shift going to the final state, which has a shift to the termination
  1165. state. Create such states and shifts if they don't happen to exist
  1166. already."
  1167. (let (i k statep sp sp2 sp1 shifts)
  1168. (setq sp first-shift)
  1169. (if sp
  1170. (progn
  1171. (if (zerop (shifts-number sp))
  1172. (progn
  1173. (setq k (shifts-nshifts sp)
  1174. statep (core-next first-state))
  1175. ;; The states reached by shifts from first-state are
  1176. ;; numbered 1...K. Look for one reached by
  1177. ;; START-SYMBOL.
  1178. (while (and (< (core-accessing-symbol statep) start-symbol)
  1179. (< (core-number statep) k))
  1180. (setq statep (core-next statep)))
  1181. (if (= (core-accessing-symbol statep) start-symbol)
  1182. (progn
  1183. ;; We already have a next-to-final state. Make
  1184. ;; sure it has a shift to what will be the final
  1185. ;; state.
  1186. (setq k (core-number statep))
  1187. (while (and sp (< (shifts-number sp) k))
  1188. (setq sp1 sp
  1189. sp (shifts-next sp)))
  1190. (if (and sp (= (shifts-number sp) k))
  1191. (progn
  1192. (setq i (shifts-nshifts sp)
  1193. sp2 (make-shifts)
  1194. shifts (make-vector (1+ i) 0))
  1195. (set-shifts-number sp2 k)
  1196. (set-shifts-nshifts sp2 (1+ i))
  1197. (set-shifts-shifts sp2 shifts)
  1198. (aset shifts 0 nstates)
  1199. (while (> i 0)
  1200. ;; sp2->shifts[i] = sp->shifts[i - 1];
  1201. (aset shifts i (aref (shifts-shifts sp) (1- i)))
  1202. (setq i (1- i)))
  1203. ;; Patch sp2 into the chain of shifts in
  1204. ;; place of sp, following sp1.
  1205. (set-shifts-next sp2 (shifts-next sp))
  1206. (set-shifts-next sp1 sp2)
  1207. (if (eq sp last-shift)
  1208. (setq last-shift sp2))
  1209. )
  1210. (setq sp2 (make-shifts))
  1211. (set-shifts-number sp2 k)
  1212. (set-shifts-nshifts sp2 1)
  1213. (set-shifts-shifts sp2 (vector nstates))
  1214. ;; Patch sp2 into the chain of shifts between
  1215. ;; sp1 and sp.
  1216. (set-shifts-next sp2 sp)
  1217. (set-shifts-next sp1 sp2)
  1218. (if (not sp)
  1219. (setq last-shift sp2))
  1220. )
  1221. )
  1222. ;; There is no next-to-final state as yet.
  1223. ;; Add one more shift in FIRST-SHIFT, going to the
  1224. ;; next-to-final state (yet to be made).
  1225. (setq sp first-shift
  1226. sp2 (make-shifts)
  1227. i (shifts-nshifts sp)
  1228. shifts (make-vector (1+ i) 0))
  1229. (set-shifts-nshifts sp2 (1+ i))
  1230. (set-shifts-shifts sp2 shifts)
  1231. ;; Stick this shift into the vector at the proper place.
  1232. (setq statep (core-next first-state)
  1233. k 0
  1234. i 0)
  1235. (while (< i (shifts-nshifts sp))
  1236. (when (and (> (core-accessing-symbol statep) start-symbol)
  1237. (= i k))
  1238. (aset shifts k nstates)
  1239. (setq k (1+ k)))
  1240. (aset shifts k (aref (shifts-shifts sp) i))
  1241. (setq statep (core-next statep))
  1242. (setq i (1+ i)
  1243. k (1+ k)))
  1244. (when (= i k)
  1245. (aset shifts k nstates)
  1246. (setq k (1+ k)))
  1247. ;; Patch sp2 into the chain of shifts in place of
  1248. ;; sp, at the beginning.
  1249. (set-shifts-next sp2 (shifts-next sp))
  1250. (setq first-shift sp2)
  1251. (if (eq last-shift sp)
  1252. (setq last-shift sp2))
  1253. ;; Create the next-to-final state, with shift to
  1254. ;; what will be the final state.
  1255. (wisent-insert-start-shift)))
  1256. ;; The initial state didn't even have any shifts. Give it
  1257. ;; one shift, to the next-to-final state.
  1258. (setq sp (make-shifts))
  1259. (set-shifts-nshifts sp 1)
  1260. (set-shifts-shifts sp (vector nstates))
  1261. ;; Patch sp into the chain of shifts at the beginning.
  1262. (set-shifts-next sp first-shift)
  1263. (setq first-shift sp)
  1264. ;; Create the next-to-final state, with shift to what will
  1265. ;; be the final state.
  1266. (wisent-insert-start-shift)))
  1267. ;; There are no shifts for any state. Make one shift, from the
  1268. ;; initial state to the next-to-final state.
  1269. (setq sp (make-shifts))
  1270. (set-shifts-nshifts sp 1)
  1271. (set-shifts-shifts sp (vector nstates))
  1272. ;; Initialize the chain of shifts with sp.
  1273. (setq first-shift sp
  1274. last-shift sp)
  1275. ;; Create the next-to-final state, with shift to what will be
  1276. ;; the final state.
  1277. (wisent-insert-start-shift))
  1278. ;; Make the final state--the one that follows a shift from the
  1279. ;; next-to-final state. The symbol for that shift is 0
  1280. ;; (end-of-file).
  1281. (setq statep (make-core))
  1282. (set-core-number statep nstates)
  1283. (set-core-next last-state statep)
  1284. (setq last-state statep)
  1285. ;; Make the shift from the final state to the termination state.
  1286. (setq sp (make-shifts))
  1287. (set-shifts-number sp nstates)
  1288. (setq nstates (1+ nstates))
  1289. (set-shifts-nshifts sp 1)
  1290. (set-shifts-shifts sp (vector nstates))
  1291. (set-shifts-next last-shift sp)
  1292. (setq last-shift sp)
  1293. ;; Note that the variable FINAL-STATE refers to what we sometimes
  1294. ;; call the termination state.
  1295. (setq final-state nstates)
  1296. ;; Make the termination state.
  1297. (setq statep (make-core))
  1298. (set-core-number statep nstates)
  1299. (setq nstates (1+ nstates))
  1300. (set-core-next last-state statep)
  1301. (setq last-state statep)))
  1302. (defun wisent-save-reductions ()
  1303. "Make a reductions structure.
  1304. Find which rules can be used for reduction transitions from the
  1305. current state and make a reductions structure for the state to record
  1306. their rule numbers."
  1307. (let (i item count p rules)
  1308. ;; Find and count the active items that represent ends of rules.
  1309. (setq count 0
  1310. i 0)
  1311. (while (< i nitemset)
  1312. (setq item (aref ritem (aref itemset i)))
  1313. (when (< item 0)
  1314. (aset redset count (- item))
  1315. (setq count (1+ count)))
  1316. (setq i (1+ i)))
  1317. ;; Make a reductions structure and copy the data into it.
  1318. (when (> count 0)
  1319. (setq p (make-reductions)
  1320. rules (make-vector count 0))
  1321. (set-reductions-number p (core-number this-state))
  1322. (set-reductions-nreds p count)
  1323. (set-reductions-rules p rules)
  1324. (setq i 0)
  1325. (while (< i count)
  1326. ;; (p->rules)[i] = redset[i]
  1327. (aset rules i (aref redset i))
  1328. (setq i (1+ i)))
  1329. (if last-reduction
  1330. (set-reductions-next last-reduction p)
  1331. (setq first-reduction p))
  1332. (setq last-reduction p))))
  1333. (defun wisent-generate-states ()
  1334. "Compute the nondeterministic finite state machine from the grammar."
  1335. (wisent-allocate-storage)
  1336. (wisent-initialize-closure nitems)
  1337. (wisent-initialize-states)
  1338. (while this-state
  1339. ;; Set up RULESET and ITEMSET for the transitions out of this
  1340. ;; state. RULESET gets a 1 bit for each rule that could reduce
  1341. ;; now. ITEMSET gets a vector of all the items that could be
  1342. ;; accepted next.
  1343. (wisent-closure (core-items this-state) (core-nitems this-state))
  1344. ;; Record the reductions allowed out of this state.
  1345. (wisent-save-reductions)
  1346. ;; Find the itemsets of the states that shifts can reach.
  1347. (wisent-new-itemsets)
  1348. ;; Find or create the core structures for those states.
  1349. (wisent-append-states)
  1350. ;; Create the shifts structures for the shifts to those states,
  1351. ;; now that the state numbers transitioning to are known.
  1352. (if (> nshifts 0)
  1353. (wisent-save-shifts))
  1354. ;; States are queued when they are created; process them all.
  1355. (setq this-state (core-next this-state)))
  1356. ;; Set up initial and final states as parser wants them.
  1357. (wisent-augment-automaton))
  1358. ;;;; ---------------------------
  1359. ;;;; Compute look-ahead criteria
  1360. ;;;; ---------------------------
  1361. ;; Compute how to make the finite state machine deterministic; find
  1362. ;; which rules need lookahead in each state, and which lookahead
  1363. ;; tokens they accept.
  1364. ;; `wisent-lalr', the entry point, builds these data structures:
  1365. ;; GOTO-MAP, FROM-STATE and TO-STATE record each shift transition
  1366. ;; which accepts a variable (a nonterminal). NGOTOS is the number of
  1367. ;; such transitions.
  1368. ;; FROM-STATE[t] is the state number which a transition leads from and
  1369. ;; TO-STATE[t] is the state number it leads to.
  1370. ;; All the transitions that accept a particular variable are grouped
  1371. ;; together and GOTO-MAP[i - NTOKENS] is the index in FROM-STATE and
  1372. ;; TO-STATE of the first of them.
  1373. ;; CONSISTENT[s] is non-nil if no lookahead is needed to decide what
  1374. ;; to do in state s.
  1375. ;; LARULENO is a vector which records the rules that need lookahead in
  1376. ;; various states. The elements of LARULENO that apply to state s are
  1377. ;; those from LOOKAHEADS[s] through LOOKAHEADS[s+1]-1. Each element
  1378. ;; of LARULENO is a rule number.
  1379. ;; If LR is the length of LARULENO, then a number from 0 to LR-1 can
  1380. ;; specify both a rule and a state where the rule might be applied.
  1381. ;; LA is a LR by NTOKENS matrix of bits.
  1382. ;; LA[l, i] is 1 if the rule LARULENO[l] is applicable in the
  1383. ;; appropriate state when the next token is symbol i.
  1384. ;; If LA[l, i] and LA[l, j] are both 1 for i != j, it is a conflict.
  1385. (wisent-defcontext digraph
  1386. INDEX R VERTICES
  1387. infinity top)
  1388. (defun wisent-traverse (i)
  1389. "Traverse I."
  1390. (let (j k height Ri Fi break)
  1391. (setq top (1+ top)
  1392. height top)
  1393. (aset VERTICES top i) ;; VERTICES[++top] = i
  1394. (aset INDEX i top) ;; INDEX[i] = height = top
  1395. (setq Ri (aref R i))
  1396. (when Ri
  1397. (setq j 0)
  1398. (while (>= (aref Ri j) 0)
  1399. (if (zerop (aref INDEX (aref Ri j)))
  1400. (wisent-traverse (aref Ri j)))
  1401. ;; if (INDEX[i] > INDEX[R[i][j]])
  1402. (if (> (aref INDEX i) (aref INDEX (aref Ri j)))
  1403. ;; INDEX[i] = INDEX[R[i][j]];
  1404. (aset INDEX i (aref INDEX (aref Ri j))))
  1405. (setq Fi (aref F i)
  1406. k 0)
  1407. (while (< k tokensetsize)
  1408. ;; F (i)[k] |= F (R[i][j])[k];
  1409. (aset Fi k (logior (aref Fi k)
  1410. (aref (aref F (aref Ri j)) k)))
  1411. (setq k (1+ k)))
  1412. (setq j (1+ j))))
  1413. (when (= (aref INDEX i) height)
  1414. (setq break nil)
  1415. (while (not break)
  1416. (setq j (aref VERTICES top) ;; j = VERTICES[top--]
  1417. top (1- top))
  1418. (aset INDEX j infinity)
  1419. (if (= i j)
  1420. (setq break t)
  1421. (setq k 0)
  1422. (while (< k tokensetsize)
  1423. ;; F (j)[k] = F (i)[k];
  1424. (aset (aref F j) k (aref (aref F i) k))
  1425. (setq k (1+ k))))))
  1426. ))
  1427. (defun wisent-digraph (relation)
  1428. "Digraph RELATION."
  1429. (wisent-with-context digraph
  1430. (setq infinity (+ ngotos 2)
  1431. INDEX (make-vector (1+ ngotos) 0)
  1432. VERTICES (make-vector (1+ ngotos) 0)
  1433. top 0
  1434. R relation)
  1435. (let ((i 0))
  1436. (while (< i ngotos)
  1437. (if (and (= (aref INDEX i) 0) (aref R i))
  1438. (wisent-traverse i))
  1439. (setq i (1+ i))))))
  1440. (defun wisent-set-state-table ()
  1441. "Build state table."
  1442. (let (sp)
  1443. (setq state-table (make-vector nstates nil)
  1444. sp first-state)
  1445. (while sp
  1446. (aset state-table (core-number sp) sp)
  1447. (setq sp (core-next sp)))))
  1448. (defun wisent-set-accessing-symbol ()
  1449. "Build accessing symbol table."
  1450. (let (sp)
  1451. (setq accessing-symbol (make-vector nstates 0)
  1452. sp first-state)
  1453. (while sp
  1454. (aset accessing-symbol (core-number sp) (core-accessing-symbol sp))
  1455. (setq sp (core-next sp)))))
  1456. (defun wisent-set-shift-table ()
  1457. "Build shift table."
  1458. (let (sp)
  1459. (setq shift-table (make-vector nstates nil)
  1460. sp first-shift)
  1461. (while sp
  1462. (aset shift-table (shifts-number sp) sp)
  1463. (setq sp (shifts-next sp)))))
  1464. (defun wisent-set-reduction-table ()
  1465. "Build reduction table."
  1466. (let (rp)
  1467. (setq reduction-table (make-vector nstates nil)
  1468. rp first-reduction)
  1469. (while rp
  1470. (aset reduction-table (reductions-number rp) rp)
  1471. (setq rp (reductions-next rp)))))
  1472. (defun wisent-set-maxrhs ()
  1473. "Setup MAXRHS length."
  1474. (let (i len max)
  1475. (setq len 0
  1476. max 0
  1477. i 0)
  1478. (while (aref ritem i)
  1479. (if (> (aref ritem i) 0)
  1480. (setq len (1+ len))
  1481. (if (> len max)
  1482. (setq max len))
  1483. (setq len 0))
  1484. (setq i (1+ i)))
  1485. (setq maxrhs max)))
  1486. (defun wisent-initialize-LA ()
  1487. "Set up LA."
  1488. (let (i j k count rp sp np v)
  1489. (setq consistent (make-vector nstates nil)
  1490. lookaheads (make-vector (1+ nstates) 0)
  1491. count 0
  1492. i 0)
  1493. (while (< i nstates)
  1494. (aset lookaheads i count)
  1495. (setq rp (aref reduction-table i)
  1496. sp (aref shift-table i))
  1497. ;; if (rp &&
  1498. ;; (rp->nreds > 1
  1499. ;; || (sp && ! ISVAR(accessing-symbol[sp->shifts[0]]))))
  1500. (if (and rp
  1501. (or (> (reductions-nreds rp) 1)
  1502. (and sp
  1503. (not (wisent-ISVAR
  1504. (aref accessing-symbol
  1505. (aref (shifts-shifts sp) 0)))))))
  1506. (setq count (+ count (reductions-nreds rp)))
  1507. (aset consistent i t))
  1508. (when sp
  1509. (setq k 0
  1510. j (shifts-nshifts sp)
  1511. v (shifts-shifts sp))
  1512. (while (< k j)
  1513. (when (= (aref accessing-symbol (aref v k))
  1514. error-token-number)
  1515. (aset consistent i nil)
  1516. (setq k j)) ;; break
  1517. (setq k (1+ k))))
  1518. (setq i (1+ i)))
  1519. (aset lookaheads nstates count)
  1520. (if (zerop count)
  1521. (progn
  1522. (setq LA (make-vector 1 nil)
  1523. LAruleno (make-vector 1 0)
  1524. lookback (make-vector 1 nil)))
  1525. (setq LA (make-vector count nil)
  1526. LAruleno (make-vector count 0)
  1527. lookback (make-vector count nil)))
  1528. (setq i 0 j (length LA))
  1529. (while (< i j)
  1530. (aset LA i (make-vector tokensetsize 0))
  1531. (setq i (1+ i)))
  1532. (setq np 0
  1533. i 0)
  1534. (while (< i nstates)
  1535. (when (not (aref consistent i))
  1536. (setq rp (aref reduction-table i))
  1537. (when rp
  1538. (setq j 0
  1539. k (reductions-nreds rp)
  1540. v (reductions-rules rp))
  1541. (while (< j k)
  1542. (aset LAruleno np (aref v j))
  1543. (setq np (1+ np)
  1544. j (1+ j)))))
  1545. (setq i (1+ i)))))
  1546. (defun wisent-set-goto-map ()
  1547. "Set up GOTO-MAP."
  1548. (let (sp i j symbol k temp-map state1 state2 v)
  1549. (setq goto-map (make-vector (1+ nvars) 0)
  1550. temp-map (make-vector (1+ nvars) 0))
  1551. (setq ngotos 0
  1552. sp first-shift)
  1553. (while sp
  1554. (setq i (1- (shifts-nshifts sp))
  1555. v (shifts-shifts sp))
  1556. (while (>= i 0)
  1557. (setq symbol (aref accessing-symbol (aref v i)))
  1558. (if (wisent-ISTOKEN symbol)
  1559. (setq i 0) ;; break
  1560. (setq ngotos (1+ ngotos))
  1561. ;; goto-map[symbol]++;
  1562. (aset goto-map (- symbol ntokens)
  1563. (1+ (aref goto-map (- symbol ntokens)))))
  1564. (setq i (1- i)))
  1565. (setq sp (shifts-next sp)))
  1566. (setq k 0
  1567. i ntokens
  1568. j 0)
  1569. (while (< i nsyms)
  1570. (aset temp-map j k)
  1571. (setq k (+ k (aref goto-map j))
  1572. i (1+ i)
  1573. j (1+ j)))
  1574. (setq i ntokens
  1575. j 0)
  1576. (while (< i nsyms)
  1577. (aset goto-map j (aref temp-map j))
  1578. (setq i (1+ i)
  1579. j (1+ j)))
  1580. ;; goto-map[nsyms] = ngotos;
  1581. ;; temp-map[nsyms] = ngotos;
  1582. (aset goto-map j ngotos)
  1583. (aset temp-map j ngotos)
  1584. (setq from-state (make-vector ngotos 0)
  1585. to-state (make-vector ngotos 0)
  1586. sp first-shift)
  1587. (while sp
  1588. (setq state1 (shifts-number sp)
  1589. v (shifts-shifts sp)
  1590. i (1- (shifts-nshifts sp)))
  1591. (while (>= i 0)
  1592. (setq state2 (aref v i)
  1593. symbol (aref accessing-symbol state2))
  1594. (if (wisent-ISTOKEN symbol)
  1595. (setq i 0) ;; break
  1596. ;; k = temp-map[symbol]++;
  1597. (setq k (aref temp-map (- symbol ntokens)))
  1598. (aset temp-map (- symbol ntokens) (1+ k))
  1599. (aset from-state k state1)
  1600. (aset to-state k state2))
  1601. (setq i (1- i)))
  1602. (setq sp (shifts-next sp)))
  1603. ))
  1604. (defun wisent-map-goto (state symbol)
  1605. "Map a STATE/SYMBOL pair into its numeric representation."
  1606. (let (high low middle s result)
  1607. ;; low = goto-map[symbol];
  1608. ;; high = goto-map[symbol + 1] - 1;
  1609. (setq low (aref goto-map (- symbol ntokens))
  1610. high (1- (aref goto-map (- (1+ symbol) ntokens))))
  1611. (while (and (not result) (<= low high))
  1612. (setq middle (/ (+ low high) 2)
  1613. s (aref from-state middle))
  1614. (cond
  1615. ((= s state)
  1616. (setq result middle))
  1617. ((< s state)
  1618. (setq low (1+ middle)))
  1619. (t
  1620. (setq high (1- middle)))))
  1621. (or result
  1622. (error "Internal error in `wisent-map-goto'"))
  1623. ))
  1624. (defun wisent-initialize-F ()
  1625. "Set up F."
  1626. (let (i j k sp edge rowp rp reads nedges stateno symbol v break)
  1627. (setq F (make-vector ngotos nil)
  1628. i 0)
  1629. (while (< i ngotos)
  1630. (aset F i (make-vector tokensetsize 0))
  1631. (setq i (1+ i)))
  1632. (setq reads (make-vector ngotos nil)
  1633. edge (make-vector (1+ ngotos) 0)
  1634. nedges 0
  1635. rowp 0 ;; rowp = F
  1636. i 0)
  1637. (while (< i ngotos)
  1638. (setq stateno (aref to-state i)
  1639. sp (aref shift-table stateno))
  1640. (when sp
  1641. (setq k (shifts-nshifts sp)
  1642. v (shifts-shifts sp)
  1643. j 0
  1644. break nil)
  1645. (while (and (not break) (< j k))
  1646. ;; symbol = accessing-symbol[sp->shifts[j]];
  1647. (setq symbol (aref accessing-symbol (aref v j)))
  1648. (if (wisent-ISVAR symbol)
  1649. (setq break t) ;; break
  1650. (wisent-SETBIT (aref F rowp) symbol)
  1651. (setq j (1+ j))))
  1652. (while (< j k)
  1653. ;; symbol = accessing-symbol[sp->shifts[j]];
  1654. (setq symbol (aref accessing-symbol (aref v j)))
  1655. (when (aref nullable (- symbol ntokens))
  1656. (aset edge nedges (wisent-map-goto stateno symbol))
  1657. (setq nedges (1+ nedges)))
  1658. (setq j (1+ j)))
  1659. (when (> nedges 0)
  1660. ;; reads[i] = rp = NEW2(nedges + 1, short);
  1661. (setq rp (make-vector (1+ nedges) 0)
  1662. j 0)
  1663. (aset reads i rp)
  1664. (while (< j nedges)
  1665. ;; rp[j] = edge[j];
  1666. (aset rp j (aref edge j))
  1667. (setq j (1+ j)))
  1668. (aset rp nedges -1)
  1669. (setq nedges 0)))
  1670. (setq rowp (1+ rowp))
  1671. (setq i (1+ i)))
  1672. (wisent-digraph reads)
  1673. ))
  1674. (defun wisent-add-lookback-edge (stateno ruleno gotono)
  1675. "Add a lookback edge.
  1676. STATENO, RULENO, GOTONO are self-explanatory."
  1677. (let (i k found)
  1678. (setq i (aref lookaheads stateno)
  1679. k (aref lookaheads (1+ stateno))
  1680. found nil)
  1681. (while (and (not found) (< i k))
  1682. (if (= (aref LAruleno i) ruleno)
  1683. (setq found t)
  1684. (setq i (1+ i))))
  1685. (or found
  1686. (error "Internal error in `wisent-add-lookback-edge'"))
  1687. ;; value . next
  1688. ;; lookback[i] = (gotono . lookback[i])
  1689. (aset lookback i (cons gotono (aref lookback i)))))
  1690. (defun wisent-transpose (R-arg n)
  1691. "Return the transpose of R-ARG, of size N.
  1692. Destroy R-ARG, as it is replaced with the result. R-ARG[I] is nil or
  1693. a -1 terminated list of numbers. RESULT[NUM] is nil or the -1
  1694. terminated list of the I such as NUM is in R-ARG[I]."
  1695. (let (i j new-R end-R nedges v sp)
  1696. (setq new-R (make-vector n nil)
  1697. end-R (make-vector n nil)
  1698. nedges (make-vector n 0))
  1699. ;; Count.
  1700. (setq i 0)
  1701. (while (< i n)
  1702. (setq v (aref R-arg i))
  1703. (when v
  1704. (setq j 0)
  1705. (while (>= (aref v j) 0)
  1706. (aset nedges (aref v j) (1+ (aref nedges (aref v j))))
  1707. (setq j (1+ j))))
  1708. (setq i (1+ i)))
  1709. ;; Allocate.
  1710. (setq i 0)
  1711. (while (< i n)
  1712. (when (> (aref nedges i) 0)
  1713. (setq sp (make-vector (1+ (aref nedges i)) 0))
  1714. (aset sp (aref nedges i) -1)
  1715. (aset new-R i sp)
  1716. (aset end-R i 0))
  1717. (setq i (1+ i)))
  1718. ;; Store.
  1719. (setq i 0)
  1720. (while (< i n)
  1721. (setq v (aref R-arg i))
  1722. (when v
  1723. (setq j 0)
  1724. (while (>= (aref v j) 0)
  1725. (aset (aref new-R (aref v j)) (aref end-R (aref v j)) i)
  1726. (aset end-R (aref v j) (1+ (aref end-R (aref v j))))
  1727. (setq j (1+ j))))
  1728. (setq i (1+ i)))
  1729. new-R))
  1730. (defun wisent-build-relations ()
  1731. "Build relations."
  1732. (let (i j k rulep rp sp length nedges done state1 stateno
  1733. symbol1 symbol2 edge states v)
  1734. (setq includes (make-vector ngotos nil)
  1735. edge (make-vector (1+ ngotos) 0)
  1736. states (make-vector (1+ maxrhs) 0)
  1737. i 0)
  1738. (while (< i ngotos)
  1739. (setq nedges 0
  1740. state1 (aref from-state i)
  1741. symbol1 (aref accessing-symbol (aref to-state i))
  1742. rulep (aref derives (- symbol1 ntokens)))
  1743. (while (> (car rulep) 0)
  1744. (aset states 0 state1)
  1745. (setq length 1
  1746. stateno state1
  1747. rp (aref rrhs (car rulep))) ;; rp = ritem + rrhs[*rulep]
  1748. (while (> (aref ritem rp) 0) ;; *rp > 0
  1749. (setq symbol2 (aref ritem rp)
  1750. sp (aref shift-table stateno)
  1751. k (shifts-nshifts sp)
  1752. v (shifts-shifts sp)
  1753. j 0)
  1754. (while (< j k)
  1755. (setq stateno (aref v j))
  1756. (if (= (aref accessing-symbol stateno) symbol2)
  1757. (setq j k) ;; break
  1758. (setq j (1+ j))))
  1759. ;; states[length++] = stateno;
  1760. (aset states length stateno)
  1761. (setq length (1+ length))
  1762. (setq rp (1+ rp)))
  1763. (if (not (aref consistent stateno))
  1764. (wisent-add-lookback-edge stateno (car rulep) i))
  1765. (setq length (1- length)
  1766. done nil)
  1767. (while (not done)
  1768. (setq done t
  1769. rp (1- rp))
  1770. (when (and (>= rp 0) (wisent-ISVAR (aref ritem rp)))
  1771. ;; stateno = states[--length];
  1772. (setq length (1- length)
  1773. stateno (aref states length))
  1774. (aset edge nedges (wisent-map-goto stateno (aref ritem rp)))
  1775. (setq nedges (1+ nedges))
  1776. (if (aref nullable (- (aref ritem rp) ntokens))
  1777. (setq done nil))))
  1778. (setq rulep (cdr rulep)))
  1779. (when (> nedges 0)
  1780. (setq v (make-vector (1+ nedges) 0)
  1781. j 0)
  1782. (aset includes i v)
  1783. (while (< j nedges)
  1784. (aset v j (aref edge j))
  1785. (setq j (1+ j)))
  1786. (aset v nedges -1))
  1787. (setq i (1+ i)))
  1788. (setq includes (wisent-transpose includes ngotos))
  1789. ))
  1790. (defun wisent-compute-FOLLOWS ()
  1791. "Compute follows."
  1792. (wisent-digraph includes))
  1793. (defun wisent-compute-lookaheads ()
  1794. "Compute lookaheads."
  1795. (let (i j n v1 v2 sp)
  1796. (setq n (aref lookaheads nstates)
  1797. i 0)
  1798. (while (< i n)
  1799. (setq sp (aref lookback i))
  1800. (while sp
  1801. (setq v1 (aref LA i)
  1802. v2 (aref F (car sp))
  1803. j 0)
  1804. (while (< j tokensetsize)
  1805. ;; LA (i)[j] |= F (sp->value)[j]
  1806. (aset v1 j (logior (aref v1 j) (aref v2 j)))
  1807. (setq j (1+ j)))
  1808. (setq sp (cdr sp)))
  1809. (setq i (1+ i)))))
  1810. (defun wisent-lalr ()
  1811. "Make the nondeterministic finite state machine deterministic."
  1812. (setq tokensetsize (wisent-WORDSIZE ntokens))
  1813. (wisent-set-state-table)
  1814. (wisent-set-accessing-symbol)
  1815. (wisent-set-shift-table)
  1816. (wisent-set-reduction-table)
  1817. (wisent-set-maxrhs)
  1818. (wisent-initialize-LA)
  1819. (wisent-set-goto-map)
  1820. (wisent-initialize-F)
  1821. (wisent-build-relations)
  1822. (wisent-compute-FOLLOWS)
  1823. (wisent-compute-lookaheads))
  1824. ;;;; -----------------------------------------------
  1825. ;;;; Find and resolve or report look-ahead conflicts
  1826. ;;;; -----------------------------------------------
  1827. (defsubst wisent-log-resolution (state LAno token resolution)
  1828. "Log a shift-reduce conflict resolution.
  1829. In specified STATE between rule pointed by lookahead number LANO and
  1830. TOKEN, resolved as RESOLUTION."
  1831. (if (or wisent-verbose-flag wisent-debug-flag)
  1832. (wisent-log
  1833. "Conflict in state %d between rule %d and token %s resolved as %s.\n"
  1834. state (aref LAruleno LAno) (wisent-tag token) resolution)))
  1835. (defun wisent-flush-shift (state token)
  1836. "Turn off the shift recorded in the specified STATE for TOKEN.
  1837. Used when we resolve a shift-reduce conflict in favor of the reduction."
  1838. (let (shiftp i k v)
  1839. (when (setq shiftp (aref shift-table state))
  1840. (setq k (shifts-nshifts shiftp)
  1841. v (shifts-shifts shiftp)
  1842. i 0)
  1843. (while (< i k)
  1844. (if (and (not (zerop (aref v i)))
  1845. (= token (aref accessing-symbol (aref v i))))
  1846. (aset v i 0))
  1847. (setq i (1+ i))))))
  1848. (defun wisent-resolve-sr-conflict (state lookaheadnum)
  1849. "Attempt to resolve shift-reduce conflict for one rule.
  1850. Resolve by means of precedence declarations. The conflict occurred in
  1851. specified STATE for the rule pointed by the lookahead symbol
  1852. LOOKAHEADNUM. It has already been checked that the rule has a
  1853. precedence. A conflict is resolved by modifying the shift or reduce
  1854. tables so that there is no longer a conflict."
  1855. (let (i redprec errp errs nerrs token sprec sassoc)
  1856. ;; Find the rule to reduce by to get precedence of reduction
  1857. (setq token (aref tags (aref rprec (aref LAruleno lookaheadnum)))
  1858. redprec (wisent-prec token)
  1859. errp (make-errs)
  1860. errs (make-vector ntokens 0)
  1861. nerrs 0
  1862. i 0)
  1863. (set-errs-errs errp errs)
  1864. (while (< i ntokens)
  1865. (setq token (aref tags i))
  1866. (when (and (wisent-BITISSET (aref LA lookaheadnum) i)
  1867. (wisent-BITISSET lookaheadset i)
  1868. (setq sprec (wisent-prec token)))
  1869. ;; Shift-reduce conflict occurs for token number I and it has
  1870. ;; a precedence. The precedence of shifting is that of token
  1871. ;; I.
  1872. (cond
  1873. ((< sprec redprec)
  1874. (wisent-log-resolution state lookaheadnum i "reduce")
  1875. ;; Flush the shift for this token
  1876. (wisent-RESETBIT lookaheadset i)
  1877. (wisent-flush-shift state i)
  1878. )
  1879. ((> sprec redprec)
  1880. (wisent-log-resolution state lookaheadnum i "shift")
  1881. ;; Flush the reduce for this token
  1882. (wisent-RESETBIT (aref LA lookaheadnum) i)
  1883. )
  1884. (t
  1885. ;; Matching precedence levels.
  1886. ;; For left association, keep only the reduction.
  1887. ;; For right association, keep only the shift.
  1888. ;; For nonassociation, keep neither.
  1889. (setq sassoc (wisent-assoc token))
  1890. (cond
  1891. ((eq sassoc 'right)
  1892. (wisent-log-resolution state lookaheadnum i "shift"))
  1893. ((eq sassoc 'left)
  1894. (wisent-log-resolution state lookaheadnum i "reduce"))
  1895. ((eq sassoc 'nonassoc)
  1896. (wisent-log-resolution state lookaheadnum i "an error"))
  1897. )
  1898. (when (not (eq sassoc 'right))
  1899. ;; Flush the shift for this token
  1900. (wisent-RESETBIT lookaheadset i)
  1901. (wisent-flush-shift state i))
  1902. (when (not (eq sassoc 'left))
  1903. ;; Flush the reduce for this token
  1904. (wisent-RESETBIT (aref LA lookaheadnum) i))
  1905. (when (eq sassoc 'nonassoc)
  1906. ;; Record an explicit error for this token
  1907. (aset errs nerrs i)
  1908. (setq nerrs (1+ nerrs)))
  1909. )))
  1910. (setq i (1+ i)))
  1911. (when (> nerrs 0)
  1912. (set-errs-nerrs errp nerrs)
  1913. (aset err-table state errp))
  1914. ))
  1915. (defun wisent-set-conflicts (state)
  1916. "Find and attempt to resolve conflicts in specified STATE."
  1917. (let (i j k v shiftp symbol)
  1918. (unless (aref consistent state)
  1919. (fillarray lookaheadset 0)
  1920. (when (setq shiftp (aref shift-table state))
  1921. (setq k (shifts-nshifts shiftp)
  1922. v (shifts-shifts shiftp)
  1923. i 0)
  1924. (while (and (< i k)
  1925. (wisent-ISTOKEN
  1926. (setq symbol (aref accessing-symbol (aref v i)))))
  1927. (or (zerop (aref v i))
  1928. (wisent-SETBIT lookaheadset symbol))
  1929. (setq i (1+ i))))
  1930. ;; Loop over all rules which require lookahead in this state
  1931. ;; first check for shift-reduce conflict, and try to resolve
  1932. ;; using precedence
  1933. (setq i (aref lookaheads state)
  1934. k (aref lookaheads (1+ state)))
  1935. (while (< i k)
  1936. (when (aref rprec (aref LAruleno i))
  1937. (setq v (aref LA i)
  1938. j 0)
  1939. (while (< j tokensetsize)
  1940. (if (zerop (logand (aref v j) (aref lookaheadset j)))
  1941. (setq j (1+ j))
  1942. ;; if (LA (i)[j] & lookaheadset[j])
  1943. (wisent-resolve-sr-conflict state i)
  1944. (setq j tokensetsize)))) ;; break
  1945. (setq i (1+ i)))
  1946. ;; Loop over all rules which require lookahead in this state
  1947. ;; Check for conflicts not resolved above.
  1948. (setq i (aref lookaheads state))
  1949. (while (< i k)
  1950. (setq v (aref LA i)
  1951. j 0)
  1952. (while (< j tokensetsize)
  1953. ;; if (LA (i)[j] & lookaheadset[j])
  1954. (if (not (zerop (logand (aref v j) (aref lookaheadset j))))
  1955. (aset conflicts state t))
  1956. (setq j (1+ j)))
  1957. (setq j 0)
  1958. (while (< j tokensetsize)
  1959. ;; lookaheadset[j] |= LA (i)[j];
  1960. (aset lookaheadset j (logior (aref lookaheadset j)
  1961. (aref v j)))
  1962. (setq j (1+ j)))
  1963. (setq i (1+ i)))
  1964. )))
  1965. (defun wisent-resolve-conflicts ()
  1966. "Find and resolve conflicts."
  1967. (let (i)
  1968. (setq conflicts (make-vector nstates nil)
  1969. shiftset (make-vector tokensetsize 0)
  1970. lookaheadset (make-vector tokensetsize 0)
  1971. err-table (make-vector nstates nil)
  1972. i 0)
  1973. (while (< i nstates)
  1974. (wisent-set-conflicts i)
  1975. (setq i (1+ i)))))
  1976. (defun wisent-count-sr-conflicts (state)
  1977. "Count the number of shift/reduce conflicts in specified STATE."
  1978. (let (i j k shiftp symbol v)
  1979. (setq src-count 0
  1980. shiftp (aref shift-table state))
  1981. (when shiftp
  1982. (fillarray shiftset 0)
  1983. (fillarray lookaheadset 0)
  1984. (setq k (shifts-nshifts shiftp)
  1985. v (shifts-shifts shiftp)
  1986. i 0)
  1987. (while (< i k)
  1988. (when (not (zerop (aref v i)))
  1989. (setq symbol (aref accessing-symbol (aref v i)))
  1990. (if (wisent-ISVAR symbol)
  1991. (setq i k) ;; break
  1992. (wisent-SETBIT shiftset symbol)))
  1993. (setq i (1+ i)))
  1994. (setq k (aref lookaheads (1+ state))
  1995. i (aref lookaheads state))
  1996. (while (< i k)
  1997. (setq v (aref LA i)
  1998. j 0)
  1999. (while (< j tokensetsize)
  2000. ;; lookaheadset[j] |= LA (i)[j]
  2001. (aset lookaheadset j (logior (aref lookaheadset j)
  2002. (aref v j)))
  2003. (setq j (1+ j)))
  2004. (setq i (1+ i)))
  2005. (setq k 0)
  2006. (while (< k tokensetsize)
  2007. ;; lookaheadset[k] &= shiftset[k];
  2008. (aset lookaheadset k (logand (aref lookaheadset k)
  2009. (aref shiftset k)))
  2010. (setq k (1+ k)))
  2011. (setq i 0)
  2012. (while (< i ntokens)
  2013. (if (wisent-BITISSET lookaheadset i)
  2014. (setq src-count (1+ src-count)))
  2015. (setq i (1+ i))))
  2016. src-count))
  2017. (defun wisent-count-rr-conflicts (state)
  2018. "Count the number of reduce/reduce conflicts in specified STATE."
  2019. (let (i j count n m)
  2020. (setq rrc-count 0
  2021. m (aref lookaheads state)
  2022. n (aref lookaheads (1+ state)))
  2023. (when (>= (- n m) 2)
  2024. (setq i 0)
  2025. (while (< i ntokens)
  2026. (setq count 0
  2027. j m)
  2028. (while (< j n)
  2029. (if (wisent-BITISSET (aref LA j) i)
  2030. (setq count (1+ count)))
  2031. (setq j (1+ j)))
  2032. (if (>= count 2)
  2033. (setq rrc-count (1+ rrc-count)))
  2034. (setq i (1+ i))))
  2035. rrc-count))
  2036. (defvar wisent-expected-conflicts nil
  2037. "*If non-nil suppress the warning about shift/reduce conflicts.
  2038. It is a decimal integer N that says there should be no warning if
  2039. there are N shift/reduce conflicts and no reduce/reduce conflicts. A
  2040. warning is given if there are either more or fewer conflicts, or if
  2041. there are any reduce/reduce conflicts.")
  2042. (defun wisent-total-conflicts ()
  2043. "Report the total number of conflicts."
  2044. (unless (and (zerop rrc-total)
  2045. (or (zerop src-total)
  2046. (= src-total (or wisent-expected-conflicts 0))))
  2047. (let* ((src (wisent-source))
  2048. (src (if src (concat " in " src) ""))
  2049. (msg (format "Grammar%s contains" src)))
  2050. (if (> src-total 0)
  2051. (setq msg (format "%s %d shift/reduce conflict%s"
  2052. msg src-total (if (> src-total 1)
  2053. "s" ""))))
  2054. (if (and (> src-total 0) (> rrc-total 0))
  2055. (setq msg (format "%s and" msg)))
  2056. (if (> rrc-total 0)
  2057. (setq msg (format "%s %d reduce/reduce conflict%s"
  2058. msg rrc-total (if (> rrc-total 1)
  2059. "s" ""))))
  2060. (message msg))))
  2061. (defun wisent-print-conflicts ()
  2062. "Report conflicts."
  2063. (let (i)
  2064. (setq src-total 0
  2065. rrc-total 0
  2066. i 0)
  2067. (while (< i nstates)
  2068. (when (aref conflicts i)
  2069. (wisent-count-sr-conflicts i)
  2070. (wisent-count-rr-conflicts i)
  2071. (setq src-total (+ src-total src-count)
  2072. rrc-total (+ rrc-total rrc-count))
  2073. (when (or wisent-verbose-flag wisent-debug-flag)
  2074. (wisent-log "State %d contains" i)
  2075. (if (> src-count 0)
  2076. (wisent-log " %d shift/reduce conflict%s"
  2077. src-count (if (> src-count 1) "s" "")))
  2078. (if (and (> src-count 0) (> rrc-count 0))
  2079. (wisent-log " and"))
  2080. (if (> rrc-count 0)
  2081. (wisent-log " %d reduce/reduce conflict%s"
  2082. rrc-count (if (> rrc-count 1) "s" "")))
  2083. (wisent-log ".\n")))
  2084. (setq i (1+ i)))
  2085. (wisent-total-conflicts)))
  2086. ;;;; --------------------------------------
  2087. ;;;; Report information on generated parser
  2088. ;;;; --------------------------------------
  2089. (defun wisent-print-grammar ()
  2090. "Print grammar."
  2091. (let (i j r break left-count right-count)
  2092. (wisent-log "\n\nGrammar\n\n Number, Rule\n")
  2093. (setq i 1)
  2094. (while (<= i nrules)
  2095. ;; Don't print rules disabled in `wisent-reduce-grammar-tables'.
  2096. (when (aref ruseful i)
  2097. (wisent-log " %s %s ->"
  2098. (wisent-pad-string (number-to-string i) 6)
  2099. (wisent-tag (aref rlhs i)))
  2100. (setq r (aref rrhs i))
  2101. (if (> (aref ritem r) 0)
  2102. (while (> (aref ritem r) 0)
  2103. (wisent-log " %s" (wisent-tag (aref ritem r)))
  2104. (setq r (1+ r)))
  2105. (wisent-log " /* empty */"))
  2106. (wisent-log "\n"))
  2107. (setq i (1+ i)))
  2108. (wisent-log "\n\nTerminals, with rules where they appear\n\n")
  2109. (wisent-log "%s (-1)\n" (wisent-tag 0))
  2110. (setq i 1)
  2111. (while (< i ntokens)
  2112. (wisent-log "%s (%d)" (wisent-tag i) i)
  2113. (setq j 1)
  2114. (while (<= j nrules)
  2115. (setq r (aref rrhs j)
  2116. break nil)
  2117. (while (and (not break) (> (aref ritem r) 0))
  2118. (if (setq break (= (aref ritem r) i))
  2119. (wisent-log " %d" j)
  2120. (setq r (1+ r))))
  2121. (setq j (1+ j)))
  2122. (wisent-log "\n")
  2123. (setq i (1+ i)))
  2124. (wisent-log "\n\nNonterminals, with rules where they appear\n\n")
  2125. (setq i ntokens)
  2126. (while (< i nsyms)
  2127. (setq left-count 0
  2128. right-count 0
  2129. j 1)
  2130. (while (<= j nrules)
  2131. (if (= (aref rlhs j) i)
  2132. (setq left-count (1+ left-count)))
  2133. (setq r (aref rrhs j)
  2134. break nil)
  2135. (while (and (not break) (> (aref ritem r) 0))
  2136. (if (= (aref ritem r) i)
  2137. (setq right-count (1+ right-count)
  2138. break t)
  2139. (setq r (1+ r))))
  2140. (setq j (1+ j)))
  2141. (wisent-log "%s (%d)\n " (wisent-tag i) i)
  2142. (when (> left-count 0)
  2143. (wisent-log " on left:")
  2144. (setq j 1)
  2145. (while (<= j nrules)
  2146. (if (= (aref rlhs j) i)
  2147. (wisent-log " %d" j))
  2148. (setq j (1+ j))))
  2149. (when (> right-count 0)
  2150. (if (> left-count 0)
  2151. (wisent-log ","))
  2152. (wisent-log " on right:")
  2153. (setq j 1)
  2154. (while (<= j nrules)
  2155. (setq r (aref rrhs j)
  2156. break nil)
  2157. (while (and (not break) (> (aref ritem r) 0))
  2158. (if (setq break (= (aref ritem r) i))
  2159. (wisent-log " %d" j)
  2160. (setq r (1+ r))))
  2161. (setq j (1+ j))))
  2162. (wisent-log "\n")
  2163. (setq i (1+ i)))
  2164. ))
  2165. (defun wisent-print-reductions (state)
  2166. "Print reductions on STATE."
  2167. (let (i j k v symbol m n defaulted
  2168. default-LA default-rule cmax count shiftp errp nodefault)
  2169. (setq nodefault nil
  2170. i 0)
  2171. (fillarray shiftset 0)
  2172. (setq shiftp (aref shift-table state))
  2173. (when shiftp
  2174. (setq k (shifts-nshifts shiftp)
  2175. v (shifts-shifts shiftp)
  2176. i 0)
  2177. (while (< i k)
  2178. (when (not (zerop (aref v i)))
  2179. (setq symbol (aref accessing-symbol (aref v i)))
  2180. (if (wisent-ISVAR symbol)
  2181. (setq i k) ;; break
  2182. ;; If this state has a shift for the error token, don't
  2183. ;; use a default rule.
  2184. (if (= symbol error-token-number)
  2185. (setq nodefault t))
  2186. (wisent-SETBIT shiftset symbol)))
  2187. (setq i (1+ i))))
  2188. (setq errp (aref err-table state))
  2189. (when errp
  2190. (setq k (errs-nerrs errp)
  2191. v (errs-errs errp)
  2192. i 0)
  2193. (while (< i k)
  2194. (if (not (zerop (setq symbol (aref v i))))
  2195. (wisent-SETBIT shiftset symbol))
  2196. (setq i (1+ i))))
  2197. (setq m (aref lookaheads state)
  2198. n (aref lookaheads (1+ state)))
  2199. (cond
  2200. ((and (= (- n m) 1) (not nodefault))
  2201. (setq default-rule (aref LAruleno m)
  2202. v (aref LA m)
  2203. k 0)
  2204. (while (< k tokensetsize)
  2205. (aset lookaheadset k (logand (aref v k)
  2206. (aref shiftset k)))
  2207. (setq k (1+ k)))
  2208. (setq i 0)
  2209. (while (< i ntokens)
  2210. (if (wisent-BITISSET lookaheadset i)
  2211. (wisent-log " %s\t[reduce using rule %d (%s)]\n"
  2212. (wisent-tag i) default-rule
  2213. (wisent-tag (aref rlhs default-rule))))
  2214. (setq i (1+ i)))
  2215. (wisent-log " $default\treduce using rule %d (%s)\n\n"
  2216. default-rule
  2217. (wisent-tag (aref rlhs default-rule)))
  2218. )
  2219. ((>= (- n m) 1)
  2220. (setq cmax 0
  2221. default-LA -1
  2222. default-rule 0)
  2223. (when (not nodefault)
  2224. (setq i m)
  2225. (while (< i n)
  2226. (setq v (aref LA i)
  2227. count 0
  2228. k 0)
  2229. (while (< k tokensetsize)
  2230. ;; lookaheadset[k] = LA (i)[k] & ~shiftset[k]
  2231. (aset lookaheadset k
  2232. (logand (aref v k)
  2233. (lognot (aref shiftset k))))
  2234. (setq k (1+ k)))
  2235. (setq j 0)
  2236. (while (< j ntokens)
  2237. (if (wisent-BITISSET lookaheadset j)
  2238. (setq count (1+ count)))
  2239. (setq j (1+ j)))
  2240. (if (> count cmax)
  2241. (setq cmax count
  2242. default-LA i
  2243. default-rule (aref LAruleno i)))
  2244. (setq k 0)
  2245. (while (< k tokensetsize)
  2246. (aset shiftset k (logior (aref shiftset k)
  2247. (aref lookaheadset k)))
  2248. (setq k (1+ k)))
  2249. (setq i (1+ i))))
  2250. (fillarray shiftset 0)
  2251. (when shiftp
  2252. (setq k (shifts-nshifts shiftp)
  2253. v (shifts-shifts shiftp)
  2254. i 0)
  2255. (while (< i k)
  2256. (when (not (zerop (aref v i)))
  2257. (setq symbol (aref accessing-symbol (aref v i)))
  2258. (if (wisent-ISVAR symbol)
  2259. (setq i k) ;; break
  2260. (wisent-SETBIT shiftset symbol)))
  2261. (setq i (1+ i))))
  2262. (setq i 0)
  2263. (while (< i ntokens)
  2264. (setq defaulted nil
  2265. count (if (wisent-BITISSET shiftset i) 1 0)
  2266. j m)
  2267. (while (< j n)
  2268. (when (wisent-BITISSET (aref LA j) i)
  2269. (if (zerop count)
  2270. (progn
  2271. (if (not (= j default-LA))
  2272. (wisent-log
  2273. " %s\treduce using rule %d (%s)\n"
  2274. (wisent-tag i) (aref LAruleno j)
  2275. (wisent-tag (aref rlhs (aref LAruleno j))))
  2276. (setq defaulted t))
  2277. (setq count (1+ count)))
  2278. (if defaulted
  2279. (wisent-log
  2280. " %s\treduce using rule %d (%s)\n"
  2281. (wisent-tag i) (aref LAruleno default-LA)
  2282. (wisent-tag (aref rlhs (aref LAruleno default-LA)))))
  2283. (setq defaulted nil)
  2284. (wisent-log
  2285. " %s\t[reduce using rule %d (%s)]\n"
  2286. (wisent-tag i) (aref LAruleno j)
  2287. (wisent-tag (aref rlhs (aref LAruleno j))))))
  2288. (setq j (1+ j)))
  2289. (setq i (1+ i)))
  2290. (if (>= default-LA 0)
  2291. (wisent-log
  2292. " $default\treduce using rule %d (%s)\n"
  2293. default-rule
  2294. (wisent-tag (aref rlhs default-rule))))
  2295. ))))
  2296. (defun wisent-print-actions (state)
  2297. "Print actions on STATE."
  2298. (let (i j k v state1 symbol shiftp errp redp rule nerrs break)
  2299. (setq shiftp (aref shift-table state)
  2300. redp (aref reduction-table state)
  2301. errp (aref err-table state))
  2302. (if (and (not shiftp) (not redp))
  2303. (if (= final-state state)
  2304. (wisent-log " $default\taccept\n")
  2305. (wisent-log " NO ACTIONS\n"))
  2306. (if (not shiftp)
  2307. (setq i 0
  2308. k 0)
  2309. (setq k (shifts-nshifts shiftp)
  2310. v (shifts-shifts shiftp)
  2311. i 0
  2312. break nil)
  2313. (while (and (not break) (< i k))
  2314. (if (zerop (setq state1 (aref v i)))
  2315. (setq i (1+ i))
  2316. (setq symbol (aref accessing-symbol state1))
  2317. ;; The following line used to be turned off.
  2318. (if (wisent-ISVAR symbol)
  2319. (setq break t) ;; break
  2320. (wisent-log " %s\tshift, and go to state %d\n"
  2321. (wisent-tag symbol) state1)
  2322. (setq i (1+ i)))))
  2323. (if (> i 0)
  2324. (wisent-log "\n")))
  2325. (when errp
  2326. (setq nerrs (errs-nerrs errp)
  2327. v (errs-errs errp)
  2328. j 0)
  2329. (while (< j nerrs)
  2330. (if (aref v j)
  2331. (wisent-log " %s\terror (nonassociative)\n"
  2332. (wisent-tag (aref v j))))
  2333. (setq j (1+ j)))
  2334. (if (> j 0)
  2335. (wisent-log "\n")))
  2336. (cond
  2337. ((and (aref consistent state) redp)
  2338. (setq rule (aref (reductions-rules redp) 0)
  2339. symbol (aref rlhs rule))
  2340. (wisent-log " $default\treduce using rule %d (%s)\n\n"
  2341. rule (wisent-tag symbol))
  2342. )
  2343. (redp
  2344. (wisent-print-reductions state)
  2345. ))
  2346. (when (< i k)
  2347. (setq v (shifts-shifts shiftp))
  2348. (while (< i k)
  2349. (when (setq state1 (aref v i))
  2350. (setq symbol (aref accessing-symbol state1))
  2351. (wisent-log " %s\tgo to state %d\n"
  2352. (wisent-tag symbol) state1))
  2353. (setq i (1+ i)))
  2354. (wisent-log "\n"))
  2355. )))
  2356. (defun wisent-print-core (state)
  2357. "Print STATE core."
  2358. (let (i k rule statep sp sp1)
  2359. (setq statep (aref state-table state)
  2360. k (core-nitems statep))
  2361. (when (> k 0)
  2362. (setq i 0)
  2363. (while (< i k)
  2364. ;; sp1 = sp = ritem + statep->items[i];
  2365. (setq sp1 (aref (core-items statep) i)
  2366. sp sp1)
  2367. (while (> (aref ritem sp) 0)
  2368. (setq sp (1+ sp)))
  2369. (setq rule (- (aref ritem sp)))
  2370. (wisent-log " %s -> " (wisent-tag (aref rlhs rule)))
  2371. (setq sp (aref rrhs rule))
  2372. (while (< sp sp1)
  2373. (wisent-log "%s " (wisent-tag (aref ritem sp)))
  2374. (setq sp (1+ sp)))
  2375. (wisent-log ".")
  2376. (while (> (aref ritem sp) 0)
  2377. (wisent-log " %s" (wisent-tag (aref ritem sp)))
  2378. (setq sp (1+ sp)))
  2379. (wisent-log " (rule %d)\n" rule)
  2380. (setq i (1+ i)))
  2381. (wisent-log "\n"))))
  2382. (defun wisent-print-state (state)
  2383. "Print information on STATE."
  2384. (wisent-log "\n\nstate %d\n\n" state)
  2385. (wisent-print-core state)
  2386. (wisent-print-actions state))
  2387. (defun wisent-print-states ()
  2388. "Print information on states."
  2389. (let ((i 0))
  2390. (while (< i nstates)
  2391. (wisent-print-state i)
  2392. (setq i (1+ i)))))
  2393. (defun wisent-print-results ()
  2394. "Print information on generated parser.
  2395. Report detailed information if `wisent-verbose-flag' or
  2396. `wisent-debug-flag' are non-nil."
  2397. (when (or wisent-verbose-flag wisent-debug-flag)
  2398. (wisent-print-useless))
  2399. (wisent-print-conflicts)
  2400. (when (or wisent-verbose-flag wisent-debug-flag)
  2401. (wisent-print-grammar)
  2402. (wisent-print-states))
  2403. ;; Append output to log file when running in batch mode
  2404. (when (wisent-noninteractive)
  2405. (wisent-append-to-log-file)
  2406. (wisent-clear-log)))
  2407. ;;;; ---------------------------------
  2408. ;;;; Build the generated parser tables
  2409. ;;;; ---------------------------------
  2410. (defun wisent-action-row (state actrow)
  2411. "Figure out the actions for the specified STATE.
  2412. Decide what to do for each type of token if seen as the lookahead
  2413. token in specified state. The value returned is used as the default
  2414. action for the state. In addition, ACTROW is filled with what to do
  2415. for each kind of token, index by symbol number, with nil meaning do
  2416. the default action. The value 'error, means this situation is an
  2417. error. The parser recognizes this value specially.
  2418. This is where conflicts are resolved. The loop over lookahead rules
  2419. considered lower-numbered rules last, and the last rule considered
  2420. that likes a token gets to handle it."
  2421. (let (i j k m n v default-rule nreds rule max count
  2422. shift-state symbol redp shiftp errp nodefault)
  2423. (fillarray actrow nil)
  2424. (setq default-rule 0
  2425. nodefault nil ;; nil inhibit having any default reduction
  2426. nreds 0
  2427. m 0
  2428. n 0
  2429. redp (aref reduction-table state))
  2430. (when redp
  2431. (setq nreds (reductions-nreds redp))
  2432. (when (>= nreds 1)
  2433. ;; loop over all the rules available here which require
  2434. ;; lookahead
  2435. (setq m (aref lookaheads state)
  2436. n (aref lookaheads (1+ state))
  2437. i (1- n))
  2438. (while (>= i m)
  2439. ;; and find each token which the rule finds acceptable to
  2440. ;; come next
  2441. (setq j 0)
  2442. (while (< j ntokens)
  2443. ;; and record this rule as the rule to use if that token
  2444. ;; follows.
  2445. (if (wisent-BITISSET (aref LA i) j)
  2446. (aset actrow j (- (aref LAruleno i)))
  2447. )
  2448. (setq j (1+ j)))
  2449. (setq i (1- i)))))
  2450. ;; Now see which tokens are allowed for shifts in this state. For
  2451. ;; them, record the shift as the thing to do. So shift is
  2452. ;; preferred to reduce.
  2453. (setq shiftp (aref shift-table state))
  2454. (when shiftp
  2455. (setq k (shifts-nshifts shiftp)
  2456. v (shifts-shifts shiftp)
  2457. i 0)
  2458. (while (< i k)
  2459. (setq shift-state (aref v i))
  2460. (if (zerop shift-state)
  2461. nil ;; continue
  2462. (setq symbol (aref accessing-symbol shift-state))
  2463. (if (wisent-ISVAR symbol)
  2464. (setq i k) ;; break
  2465. (aset actrow symbol shift-state)
  2466. ;; Do not use any default reduction if there is a shift
  2467. ;; for error
  2468. (if (= symbol error-token-number)
  2469. (setq nodefault t))))
  2470. (setq i (1+ i))))
  2471. ;; See which tokens are an explicit error in this state (due to
  2472. ;; %nonassoc). For them, record error as the action.
  2473. (setq errp (aref err-table state))
  2474. (when errp
  2475. (setq k (errs-nerrs errp)
  2476. v (errs-errs errp)
  2477. i 0)
  2478. (while (< i k)
  2479. (aset actrow (aref v i) wisent-error-tag)
  2480. (setq i (1+ i))))
  2481. ;; Now find the most common reduction and make it the default
  2482. ;; action for this state.
  2483. (when (and (>= nreds 1) (not nodefault))
  2484. (if (aref consistent state)
  2485. (setq default-rule (- (aref (reductions-rules redp) 0)))
  2486. (setq max 0
  2487. i m)
  2488. (while (< i n)
  2489. (setq count 0
  2490. rule (- (aref LAruleno i))
  2491. j 0)
  2492. (while (< j ntokens)
  2493. (if (and (numberp (aref actrow j))
  2494. (= (aref actrow j) rule))
  2495. (setq count (1+ count)))
  2496. (setq j (1+ j)))
  2497. (if (> count max)
  2498. (setq max count
  2499. default-rule rule))
  2500. (setq i (1+ i)))
  2501. ;; actions which match the default are replaced with zero,
  2502. ;; which means "use the default"
  2503. (when (> max 0)
  2504. (setq j 0)
  2505. (while (< j ntokens)
  2506. (if (and (numberp (aref actrow j))
  2507. (= (aref actrow j) default-rule))
  2508. (aset actrow j nil))
  2509. (setq j (1+ j)))
  2510. )))
  2511. ;; If have no default rule, if this is the final state the default
  2512. ;; is accept else it is an error. So replace any action which
  2513. ;; says "error" with "use default".
  2514. (when (zerop default-rule)
  2515. (if (= final-state state)
  2516. (setq default-rule wisent-accept-tag)
  2517. (setq j 0)
  2518. (while (< j ntokens)
  2519. (if (eq (aref actrow j) wisent-error-tag)
  2520. (aset actrow j nil))
  2521. (setq j (1+ j)))
  2522. (setq default-rule wisent-error-tag)))
  2523. default-rule))
  2524. (defconst wisent-default-tag 'default
  2525. "Tag used in an action table to indicate a default action.")
  2526. ;; These variables only exist locally in the function
  2527. ;; `wisent-state-actions' and are shared by all other nested callees.
  2528. (wisent-defcontext semantic-actions
  2529. ;; Uninterned symbols used in code generation.
  2530. stack sp gotos state
  2531. ;; Name of the current semantic action
  2532. NAME)
  2533. (defun wisent-state-actions ()
  2534. "Figure out the actions for every state.
  2535. Return the action table."
  2536. ;; Store the semantic action obarray in (unused) RCODE[0].
  2537. (aset rcode 0 (make-vector 13 0))
  2538. (let (i j action-table actrow action)
  2539. (setq action-table (make-vector nstates nil)
  2540. actrow (make-vector ntokens nil)
  2541. i 0)
  2542. (wisent-with-context semantic-actions
  2543. (setq stack (make-symbol "stack")
  2544. sp (make-symbol "sp")
  2545. gotos (make-symbol "gotos")
  2546. state (make-symbol "state"))
  2547. (while (< i nstates)
  2548. (setq action (wisent-action-row i actrow))
  2549. ;; Translate a reduction into semantic action
  2550. (and (integerp action) (< action 0)
  2551. (setq action (wisent-semantic-action (- action))))
  2552. (aset action-table i (list (cons wisent-default-tag action)))
  2553. (setq j 0)
  2554. (while (< j ntokens)
  2555. (when (setq action (aref actrow j))
  2556. ;; Translate a reduction into semantic action
  2557. (and (integerp action) (< action 0)
  2558. (setq action (wisent-semantic-action (- action))))
  2559. (aset action-table i (cons (cons (aref tags j) action)
  2560. (aref action-table i)))
  2561. )
  2562. (setq j (1+ j)))
  2563. (aset action-table i (nreverse (aref action-table i)))
  2564. (setq i (1+ i)))
  2565. action-table)))
  2566. (defun wisent-goto-actions ()
  2567. "Figure out what to do after reducing with each rule.
  2568. Depending on the saved state from before the beginning of parsing the
  2569. data that matched this rule. Return the goto table."
  2570. (let (i j m n symbol state goto-table)
  2571. (setq goto-table (make-vector nstates nil)
  2572. i ntokens)
  2573. (while (< i nsyms)
  2574. (setq symbol (- i ntokens)
  2575. m (aref goto-map symbol)
  2576. n (aref goto-map (1+ symbol))
  2577. j m)
  2578. (while (< j n)
  2579. (setq state (aref from-state j))
  2580. (aset goto-table state
  2581. (cons (cons (aref tags i) (aref to-state j))
  2582. (aref goto-table state)))
  2583. (setq j (1+ j)))
  2584. (setq i (1+ i)))
  2585. goto-table))
  2586. (defsubst wisent-quote-p (sym)
  2587. "Return non-nil if SYM is bound to the `quote' function."
  2588. (condition-case nil
  2589. (eq (indirect-function sym)
  2590. (indirect-function 'quote))
  2591. (error nil)))
  2592. (defsubst wisent-backquote-p (sym)
  2593. "Return non-nil if SYM is bound to the `backquote' function."
  2594. (condition-case nil
  2595. (eq (indirect-function sym)
  2596. (indirect-function 'backquote))
  2597. (error nil)))
  2598. (defun wisent-check-$N (x m)
  2599. "Return non-nil if X is a valid $N or $regionN symbol.
  2600. That is if X is a $N or $regionN symbol with N >= 1 and N <= M.
  2601. Also warn if X is a $N or $regionN symbol with N < 1 or N > M."
  2602. (when (symbolp x)
  2603. (let* ((n (symbol-name x))
  2604. (i (and (string-match "\\`\\$\\(region\\)?\\([0-9]+\\)\\'" n)
  2605. (string-to-number (match-string 2 n)))))
  2606. (when i
  2607. (if (and (>= i 1) (<= i m))
  2608. t
  2609. (message
  2610. "*** In %s, %s might be a free variable (rule has %s)"
  2611. NAME x (format (cond ((< m 1) "no component")
  2612. ((= m 1) "%d component")
  2613. ("%d components"))
  2614. m))
  2615. nil)))))
  2616. (defun wisent-semantic-action-expand-body (body n &optional found)
  2617. "Parse BODY of semantic action.
  2618. N is the maximum number of $N variables that can be referenced in
  2619. BODY. Warn on references out of permitted range.
  2620. Optional argument FOUND is the accumulated list of '$N' references
  2621. encountered so far.
  2622. Return a cons (FOUND . XBODY), where FOUND is the list of $N
  2623. references found in BODY, and XBODY is BODY expression with
  2624. `backquote' forms expanded."
  2625. (if (not (listp body))
  2626. ;; BODY is an atom, no expansion needed
  2627. (progn
  2628. (if (wisent-check-$N body n)
  2629. ;; Accumulate $i symbol
  2630. (add-to-list 'found body))
  2631. (cons found body))
  2632. ;; BODY is a list, expand inside it
  2633. (let (xbody sexpr)
  2634. ;; If backquote expand it first
  2635. (if (wisent-backquote-p (car body))
  2636. (setq body (macroexpand body)))
  2637. (while body
  2638. (setq sexpr (car body)
  2639. body (cdr body))
  2640. (cond
  2641. ;; Function call excepted quote expression
  2642. ((and (consp sexpr)
  2643. (not (wisent-quote-p (car sexpr))))
  2644. (setq sexpr (wisent-semantic-action-expand-body sexpr n found)
  2645. found (car sexpr)
  2646. sexpr (cdr sexpr)))
  2647. ;; $i symbol
  2648. ((wisent-check-$N sexpr n)
  2649. ;; Accumulate $i symbol
  2650. (add-to-list 'found sexpr))
  2651. )
  2652. ;; Accumulate expanded forms
  2653. (setq xbody (nconc xbody (list sexpr))))
  2654. (cons found xbody))))
  2655. (defun wisent-semantic-action (r)
  2656. "Set up the Elisp function for semantic action at rule R.
  2657. On entry RCODE[R] contains a vector [BODY N (NTERM I)] where BODY is the
  2658. body of the semantic action, N is the maximum number of values
  2659. available in the parser's stack, NTERM is the nonterminal the semantic
  2660. action belongs to, and I is the index of the semantic action inside
  2661. NTERM definition. Return the semantic action symbol.
  2662. The semantic action function accepts three arguments:
  2663. - the state/value stack
  2664. - the top-of-stack index
  2665. - the goto table
  2666. And returns the updated top-of-stack index."
  2667. (if (not (aref ruseful r))
  2668. (aset rcode r nil)
  2669. (let* ((actn (aref rcode r))
  2670. (n (aref actn 1)) ; nb of val avail. in stack
  2671. (NAME (apply 'format "%s:%d" (aref actn 2)))
  2672. (form (wisent-semantic-action-expand-body (aref actn 0) n))
  2673. ($l (car form)) ; list of $vars used in body
  2674. (form (cdr form)) ; expanded form of body
  2675. (nt (aref rlhs r)) ; nonterminal item no.
  2676. (bl nil) ; `let*' binding list
  2677. $v i j)
  2678. ;; Compute $N and $regionN bindings
  2679. (setq i n)
  2680. (while (> i 0)
  2681. (setq j (1+ (* 2 (- n i))))
  2682. ;; Only bind $regionI if used in action
  2683. (setq $v (intern (format "$region%d" i)))
  2684. (if (memq $v $l)
  2685. (setq bl (cons `(,$v (cdr (aref ,stack (- ,sp ,j)))) bl)))
  2686. ;; Only bind $I if used in action
  2687. (setq $v (intern (format "$%d" i)))
  2688. (if (memq $v $l)
  2689. (setq bl (cons `(,$v (car (aref ,stack (- ,sp ,j)))) bl)))
  2690. (setq i (1- i)))
  2691. ;; Compute J, the length of rule's RHS. It will give the
  2692. ;; current parser state at STACK[SP - 2*J], and where to push
  2693. ;; the new semantic value and the next state, respectively at:
  2694. ;; STACK[SP - 2*J + 1] and STACK[SP - 2*J + 2]. Generally N,
  2695. ;; the maximum number of values available in the stack, is equal
  2696. ;; to J. But, for mid-rule actions, N is the number of rule
  2697. ;; elements before the action and J is always 0 (empty rule).
  2698. (setq i (aref rrhs r)
  2699. j 0)
  2700. (while (> (aref ritem i) 0)
  2701. (setq j (1+ j)
  2702. i (1+ i)))
  2703. ;; Create the semantic action symbol.
  2704. (setq actn (intern NAME (aref rcode 0)))
  2705. ;; Store source code in function cell of the semantic action
  2706. ;; symbol. It will be byte-compiled at automaton's compilation
  2707. ;; time. Using a byte-compiled automaton can significantly
  2708. ;; speed up parsing!
  2709. (fset actn
  2710. `(lambda (,stack ,sp ,gotos)
  2711. (let* (,@bl
  2712. ($region
  2713. ,(cond
  2714. ((= n 1)
  2715. (if (assq '$region1 bl)
  2716. '$region1
  2717. `(cdr (aref ,stack (1- ,sp)))))
  2718. ((> n 1)
  2719. `(wisent-production-bounds
  2720. ,stack (- ,sp ,(1- (* 2 n))) (1- ,sp)))))
  2721. ($action ,NAME)
  2722. ($nterm ',(aref tags nt))
  2723. ,@(and (> j 0) `((,sp (- ,sp ,(* j 2)))))
  2724. (,state (cdr (assq $nterm
  2725. (aref ,gotos
  2726. (aref ,stack ,sp))))))
  2727. (setq ,sp (+ ,sp 2))
  2728. ;; push semantic value
  2729. (aset ,stack (1- ,sp) (cons ,form $region))
  2730. ;; push next state
  2731. (aset ,stack ,sp ,state)
  2732. ;; return new top of stack
  2733. ,sp)))
  2734. ;; Return the semantic action symbol
  2735. actn)))
  2736. ;;;; ----------------------------
  2737. ;;;; Build parser LALR automaton.
  2738. ;;;; ----------------------------
  2739. (defun wisent-parser-automaton ()
  2740. "Compute and return LALR(1) automaton from GRAMMAR.
  2741. GRAMMAR is in internal format. GRAM/ACTS are grammar rules
  2742. in internal format. STARTS defines the start symbols."
  2743. ;; Check for useless stuff
  2744. (wisent-reduce-grammar)
  2745. (wisent-set-derives)
  2746. (wisent-set-nullable)
  2747. ;; convert to nondeterministic finite state machine.
  2748. (wisent-generate-states)
  2749. ;; make it deterministic.
  2750. (wisent-lalr)
  2751. ;; Find and record any conflicts: places where one token of
  2752. ;; lookahead is not enough to disambiguate the parsing. Also
  2753. ;; resolve s/r conflicts based on precedence declarations.
  2754. (wisent-resolve-conflicts)
  2755. (wisent-print-results)
  2756. (vector (wisent-state-actions) ; action table
  2757. (wisent-goto-actions) ; goto table
  2758. start-table ; start symbols
  2759. (aref rcode 0) ; sem. action symbol obarray
  2760. )
  2761. )
  2762. ;;;; -------------------
  2763. ;;;; Parse input grammar
  2764. ;;;; -------------------
  2765. (defconst wisent-reserved-symbols (list wisent-error-term)
  2766. "The list of reserved symbols.
  2767. Also all symbols starting with a character defined in
  2768. `wisent-reserved-capitals' are reserved for internal use.")
  2769. (defconst wisent-reserved-capitals '(?\$ ?\@)
  2770. "The list of reserved capital letters.
  2771. All symbol starting with one of these letters are reserved for
  2772. internal use.")
  2773. (defconst wisent-starts-nonterm '$STARTS
  2774. "Main start symbol.
  2775. It gives the rules for start symbols.")
  2776. (defvar wisent-single-start-flag nil
  2777. "Non-nil means allows only one start symbol like in Bison.
  2778. That is don't add extra start rules to the grammar. This is
  2779. useful to compare the Wisent's generated automaton with the Bison's
  2780. one.")
  2781. (defsubst wisent-ISVALID-VAR (x)
  2782. "Return non-nil if X is a character or an allowed symbol."
  2783. (and x (symbolp x)
  2784. (not (memq (aref (symbol-name x) 0) wisent-reserved-capitals))
  2785. (not (memq x wisent-reserved-symbols))))
  2786. (defsubst wisent-ISVALID-TOKEN (x)
  2787. "Return non-nil if X is a character or an allowed symbol."
  2788. (or (wisent-char-p x)
  2789. (wisent-ISVALID-VAR x)))
  2790. (defun wisent-push-token (symbol &optional nocheck)
  2791. "Push a new SYMBOL in the list of tokens.
  2792. Bypass checking if NOCHECK is non-nil."
  2793. ;; Check
  2794. (or nocheck (wisent-ISVALID-TOKEN symbol)
  2795. (error "Invalid terminal symbol: %S" symbol))
  2796. (if (memq symbol token-list)
  2797. (message "*** duplicate terminal `%s' ignored" symbol)
  2798. ;; Set up properties
  2799. (wisent-set-prec symbol nil)
  2800. (wisent-set-assoc symbol nil)
  2801. (wisent-set-item-number symbol ntokens)
  2802. ;; Add
  2803. (setq ntokens (1+ ntokens)
  2804. token-list (cons symbol token-list))))
  2805. (defun wisent-push-var (symbol &optional nocheck)
  2806. "Push a new SYMBOL in the list of nonterminals.
  2807. Bypass checking if NOCHECK is non-nil."
  2808. ;; Check
  2809. (unless nocheck
  2810. (or (wisent-ISVALID-VAR symbol)
  2811. (error "Invalid nonterminal symbol: %S" symbol))
  2812. (if (memq symbol var-list)
  2813. (error "Nonterminal `%s' already defined" symbol)))
  2814. ;; Set up properties
  2815. (wisent-set-item-number symbol nvars)
  2816. ;; Add
  2817. (setq nvars (1+ nvars)
  2818. var-list (cons symbol var-list)))
  2819. (defun wisent-parse-nonterminals (defs)
  2820. "Parse nonterminal definitions in DEFS.
  2821. Fill in each element of the global arrays RPREC, RCODE, RUSEFUL with
  2822. respectively rule precedence level, semantic action code and
  2823. usefulness flag. Return a list of rules of the form (LHS . RHS) where
  2824. LHS and RHS are respectively the Left Hand Side and Right Hand Side of
  2825. the rule."
  2826. (setq rprec nil
  2827. rcode nil
  2828. nitems 0
  2829. nrules 0)
  2830. (let (def nonterm rlist rule rules rhs rest item items
  2831. rhl plevel semact @n @count iactn)
  2832. (setq @count 0)
  2833. (while defs
  2834. (setq def (car defs)
  2835. defs (cdr defs)
  2836. nonterm (car def)
  2837. rlist (cdr def)
  2838. iactn 0)
  2839. (or (consp rlist)
  2840. (error "Invalid nonterminal definition syntax: %S" def))
  2841. (while rlist
  2842. (setq rule (car rlist)
  2843. rlist (cdr rlist)
  2844. items (car rule)
  2845. rest (cdr rule)
  2846. rhl 0
  2847. rhs nil)
  2848. ;; Check & count items
  2849. (setq nitems (1+ nitems)) ;; LHS item
  2850. (while items
  2851. (setq item (car items)
  2852. items (cdr items)
  2853. nitems (1+ nitems)) ;; RHS items
  2854. (if (listp item)
  2855. ;; Mid-rule action
  2856. (progn
  2857. (setq @count (1+ @count)
  2858. @n (intern (format "@%d" @count)))
  2859. (wisent-push-var @n t)
  2860. ;; Push a new empty rule with the mid-rule action
  2861. (setq semact (vector item rhl (list nonterm iactn))
  2862. iactn (1+ iactn)
  2863. plevel nil
  2864. rcode (cons semact rcode)
  2865. rprec (cons plevel rprec)
  2866. item @n ;; Replace action by @N nonterminal
  2867. rules (cons (list item) rules)
  2868. nitems (1+ nitems)
  2869. nrules (1+ nrules)))
  2870. ;; Check terminal or nonterminal symbol
  2871. (cond
  2872. ((or (memq item token-list) (memq item var-list)))
  2873. ;; Create new literal character token
  2874. ((wisent-char-p item) (wisent-push-token item t))
  2875. ((error "Symbol `%s' is used, but is not defined as a token and has no rules"
  2876. item))))
  2877. (setq rhl (1+ rhl)
  2878. rhs (cons item rhs)))
  2879. ;; Check & collect rule precedence level
  2880. (setq plevel (when (vectorp (car rest))
  2881. (setq item (car rest)
  2882. rest (cdr rest))
  2883. (if (and (= (length item) 1)
  2884. (memq (aref item 0) token-list)
  2885. (wisent-prec (aref item 0)))
  2886. (wisent-item-number (aref item 0))
  2887. (error "Invalid rule precedence level syntax: %S" item)))
  2888. rprec (cons plevel rprec))
  2889. ;; Check & collect semantic action body
  2890. (setq semact (vector
  2891. (if rest
  2892. (if (cdr rest)
  2893. (error "Invalid semantic action syntax: %S" rest)
  2894. (car rest))
  2895. ;; Give a default semantic action body: nil
  2896. ;; for an empty rule or $1, the value of the
  2897. ;; first symbol in the rule, otherwise.
  2898. (if (> rhl 0) '$1 '()))
  2899. rhl
  2900. (list nonterm iactn))
  2901. iactn (1+ iactn)
  2902. rcode (cons semact rcode))
  2903. (setq rules (cons (cons nonterm (nreverse rhs)) rules)
  2904. nrules (1+ nrules))))
  2905. (setq ruseful (make-vector (1+ nrules) t)
  2906. rprec (vconcat (cons nil (nreverse rprec)))
  2907. rcode (vconcat (cons nil (nreverse rcode))))
  2908. (nreverse rules)
  2909. ))
  2910. (defun wisent-parse-grammar (grammar &optional start-list)
  2911. "Parse GRAMMAR and build a suitable internal representation.
  2912. Optional argument START-LIST defines the start symbols.
  2913. GRAMMAR is a list of form: (TOKENS ASSOCS . NONTERMS)
  2914. TOKENS is a list of terminal symbols (tokens).
  2915. ASSOCS is nil or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
  2916. describing the associativity of TOKENS. ASSOC-TYPE must be one of the
  2917. `default-prec' `nonassoc', `left' or `right' symbols. When ASSOC-TYPE
  2918. is `default-prec', ASSOC-VALUE must be nil or t (the default).
  2919. Otherwise it is a list of tokens which must have been previously
  2920. declared in TOKENS.
  2921. NONTERMS is the list of non terminal definitions (see function
  2922. `wisent-parse-nonterminals')."
  2923. (or (and (consp grammar) (> (length grammar) 2))
  2924. (error "Bad input grammar"))
  2925. (let (i r rhs pre dpre lst start-var assoc rules item
  2926. token var def tokens defs ep-token ep-var ep-def)
  2927. ;; Built-in tokens
  2928. (setq ntokens 0 nvars 0)
  2929. (wisent-push-token wisent-eoi-term t)
  2930. (wisent-push-token wisent-error-term t)
  2931. ;; Check/collect terminals
  2932. (setq lst (car grammar))
  2933. (while lst
  2934. (wisent-push-token (car lst))
  2935. (setq lst (cdr lst)))
  2936. ;; Check/Set up tokens precedence & associativity
  2937. (setq lst (nth 1 grammar)
  2938. pre 0
  2939. defs nil
  2940. dpre nil
  2941. default-prec t)
  2942. (while lst
  2943. (setq def (car lst)
  2944. assoc (car def)
  2945. tokens (cdr def)
  2946. lst (cdr lst))
  2947. (if (eq assoc 'default-prec)
  2948. (progn
  2949. (or (null (cdr tokens))
  2950. (memq (car tokens) '(t nil))
  2951. (error "Invalid default-prec value: %S" tokens))
  2952. (setq default-prec (car tokens))
  2953. (if dpre
  2954. (message "*** redefining default-prec to %s"
  2955. default-prec))
  2956. (setq dpre t))
  2957. (or (memq assoc '(left right nonassoc))
  2958. (error "Invalid associativity syntax: %S" assoc))
  2959. (setq pre (1+ pre))
  2960. (while tokens
  2961. (setq token (car tokens)
  2962. tokens (cdr tokens))
  2963. (if (memq token defs)
  2964. (message "*** redefining precedence of `%s'" token))
  2965. (or (memq token token-list)
  2966. ;; Define token not previously declared.
  2967. (wisent-push-token token))
  2968. (setq defs (cons token defs))
  2969. ;; Record the precedence and associativity of the terminal.
  2970. (wisent-set-prec token pre)
  2971. (wisent-set-assoc token assoc))))
  2972. ;; Check/Collect nonterminals
  2973. (setq lst (nthcdr 2 grammar)
  2974. defs nil)
  2975. (while lst
  2976. (setq def (car lst)
  2977. lst (cdr lst))
  2978. (or (consp def)
  2979. (error "Invalid nonterminal definition: %S" def))
  2980. (if (memq (car def) token-list)
  2981. (error "Nonterminal `%s' already defined as token" (car def)))
  2982. (wisent-push-var (car def))
  2983. (setq defs (cons def defs)))
  2984. (or defs
  2985. (error "No input grammar"))
  2986. (setq defs (nreverse defs))
  2987. ;; Set up the start symbol.
  2988. (setq start-table nil)
  2989. (cond
  2990. ;; 1. START-LIST is nil, the start symbol is the first
  2991. ;; nonterminal defined in the grammar (Bison like).
  2992. ((null start-list)
  2993. (setq start-var (caar defs)))
  2994. ;; 2. START-LIST contains only one element, it is the start
  2995. ;; symbol (Bison like).
  2996. ((or wisent-single-start-flag (null (cdr start-list)))
  2997. (setq start-var (car start-list))
  2998. (or (assq start-var defs)
  2999. (error "Start symbol `%s' has no rule" start-var)))
  3000. ;; 3. START-LIST contains more than one element. All defines
  3001. ;; potential start symbols. One of them (the first one by
  3002. ;; default) will be given at parse time to be the parser goal.
  3003. ;; If `wisent-single-start-flag' is non-nil that feature is
  3004. ;; disabled and the first nonterminal in START-LIST defines
  3005. ;; the start symbol, like in case 2 above.
  3006. ((not wisent-single-start-flag)
  3007. ;; START-LIST is a list of nonterminals '(nt0 ... ntN).
  3008. ;; Build and push ad hoc start rules in the grammar:
  3009. ;; ($STARTS ((nt0) $1) ((nt1) $1) ... ((ntN) $1))
  3010. ;; ($nt1 (($$nt1 nt1) $2))
  3011. ;; ...
  3012. ;; ($ntN (($$ntN ntN) $2))
  3013. ;; Where internal symbols $ntI and $$ntI are respectively
  3014. ;; nonterminals and terminals.
  3015. ;; The internal start symbol $STARTS is used to build the
  3016. ;; LALR(1) automaton. The true default start symbol used by the
  3017. ;; parser is the first nonterminal in START-LIST (nt0).
  3018. (setq start-var wisent-starts-nonterm
  3019. lst (nreverse start-list))
  3020. (while lst
  3021. (setq var (car lst)
  3022. lst (cdr lst))
  3023. (or (memq var var-list)
  3024. (error "Start symbol `%s' has no rule" var))
  3025. (unless (assq var start-table) ;; Ignore duplicates
  3026. ;; For each nt start symbol
  3027. (setq ep-var (intern (format "$%s" var))
  3028. ep-token (intern (format "$$%s" var)))
  3029. (wisent-push-token ep-token t)
  3030. (wisent-push-var ep-var t)
  3031. (setq
  3032. ;; Add entry (nt . $$nt) to start-table
  3033. start-table (cons (cons var ep-token) start-table)
  3034. ;; Add rule ($nt (($$nt nt) $2))
  3035. defs (cons (list ep-var (list (list ep-token var) '$2)) defs)
  3036. ;; Add start rule (($nt) $1)
  3037. ep-def (cons (list (list ep-var) '$1) ep-def))
  3038. ))
  3039. (wisent-push-var start-var t)
  3040. (setq defs (cons (cons start-var ep-def) defs))))
  3041. ;; Set up rules main data structure & RPREC, RCODE, RUSEFUL
  3042. (setq rules (wisent-parse-nonterminals defs))
  3043. ;; Set up the terminal & nonterminal lists.
  3044. (setq nsyms (+ ntokens nvars)
  3045. token-list (nreverse token-list)
  3046. lst var-list
  3047. var-list nil)
  3048. (while lst
  3049. (setq var (car lst)
  3050. lst (cdr lst)
  3051. var-list (cons var var-list))
  3052. (wisent-set-item-number ;; adjust nonterminal item number to
  3053. var (+ ntokens (wisent-item-number var)))) ;; I += NTOKENS
  3054. ;; Store special item numbers
  3055. (setq error-token-number (wisent-item-number wisent-error-term)
  3056. start-symbol (wisent-item-number start-var))
  3057. ;; Keep symbols in the TAGS vector so that TAGS[I] is the symbol
  3058. ;; associated to item number I.
  3059. (setq tags (vconcat token-list var-list))
  3060. ;; Set up RLHS RRHS & RITEM data structures from list of rules
  3061. ;; (LHS . RHS) received from `wisent-parse-nonterminals'.
  3062. (setq rlhs (make-vector (1+ nrules) nil)
  3063. rrhs (make-vector (1+ nrules) nil)
  3064. ritem (make-vector (1+ nitems) nil)
  3065. i 0
  3066. r 1)
  3067. (while rules
  3068. (aset rlhs r (wisent-item-number (caar rules)))
  3069. (aset rrhs r i)
  3070. (setq rhs (cdar rules)
  3071. pre nil)
  3072. (while rhs
  3073. (setq item (wisent-item-number (car rhs)))
  3074. ;; Get default precedence level of rule, that is the
  3075. ;; precedence of the last terminal in it.
  3076. (and (wisent-ISTOKEN item)
  3077. default-prec
  3078. (setq pre item))
  3079. (aset ritem i item)
  3080. (setq i (1+ i)
  3081. rhs (cdr rhs)))
  3082. ;; Setup the precedence level of the rule, that is the one
  3083. ;; specified by %prec or the default one.
  3084. (and (not (aref rprec r)) ;; Already set by %prec
  3085. pre
  3086. (wisent-prec (aref tags pre))
  3087. (aset rprec r pre))
  3088. (aset ritem i (- r))
  3089. (setq i (1+ i)
  3090. r (1+ r))
  3091. (setq rules (cdr rules)))
  3092. ))
  3093. ;;;; ---------------------
  3094. ;;;; Compile input grammar
  3095. ;;;; ---------------------
  3096. (defun wisent-compile-grammar (grammar &optional start-list)
  3097. "Compile the LALR(1) GRAMMAR.
  3098. GRAMMAR is a list (TOKENS ASSOCS . NONTERMS) where:
  3099. - TOKENS is a list of terminal symbols (tokens).
  3100. - ASSOCS is nil, or an alist of (ASSOC-TYPE . ASSOC-VALUE) elements
  3101. describing the associativity of TOKENS. ASSOC-TYPE must be one of
  3102. the `default-prec' `nonassoc', `left' or `right' symbols. When
  3103. ASSOC-TYPE is `default-prec', ASSOC-VALUE must be nil or t (the
  3104. default). Otherwise it is a list of tokens which must have been
  3105. previously declared in TOKENS.
  3106. - NONTERMS is a list of nonterminal definitions.
  3107. Optional argument START-LIST specify the possible grammar start
  3108. symbols. This is a list of nonterminals which must have been
  3109. previously declared in GRAMMAR's NONTERMS form. By default, the start
  3110. symbol is the first nonterminal defined. When START-LIST contains
  3111. only one element, it is the start symbol. Otherwise, all elements are
  3112. possible start symbols, unless `wisent-single-start-flag' is non-nil.
  3113. In that case, the first element is the start symbol, and others are
  3114. ignored.
  3115. Return an automaton as a vector: [ACTIONS GOTOS STARTS FUNCTIONS]
  3116. where:
  3117. - ACTIONS is a state/token matrix telling the parser what to do at
  3118. every state based on the current lookahead token. That is shift,
  3119. reduce, accept or error.
  3120. - GOTOS is a state/nonterminal matrix telling the parser the next
  3121. state to go to after reducing with each rule.
  3122. - STARTS is an alist which maps the allowed start nonterminal symbols
  3123. to tokens that will be first shifted into the parser stack.
  3124. - FUNCTIONS is an obarray of semantic action symbols. Each symbol's
  3125. function definition is the semantic action lambda expression."
  3126. (if (wisent-automaton-p grammar)
  3127. grammar ;; Grammar already compiled just return it
  3128. (wisent-with-context compile-grammar
  3129. (let* ((gc-cons-threshold 1000000))
  3130. (garbage-collect)
  3131. (setq wisent-new-log-flag t)
  3132. ;; Parse input grammar
  3133. (wisent-parse-grammar grammar start-list)
  3134. ;; Generate the LALR(1) automaton
  3135. (wisent-parser-automaton)))))
  3136. ;;;; --------------------------
  3137. ;;;; Byte compile input grammar
  3138. ;;;; --------------------------
  3139. (require 'bytecomp)
  3140. (defun wisent-byte-compile-grammar (form)
  3141. "Byte compile the `wisent-compile-grammar' FORM.
  3142. Automatically called by the Emacs Lisp byte compiler as a
  3143. `byte-compile' handler."
  3144. ;; Eval the `wisent-compile-grammar' form to obtain an LALR
  3145. ;; automaton internal data structure. Then, because the internal
  3146. ;; data structure contains an obarray, convert it to a lisp form so
  3147. ;; it can be byte-compiled.
  3148. (byte-compile-form
  3149. ;; FIXME: we macroexpand here since `byte-compile-form' expects
  3150. ;; macroexpanded code, but that's just a workaround: for lexical-binding
  3151. ;; the lisp form should have to pass through closure-conversion and
  3152. ;; `wisent-byte-compile-grammar' is called much too late for that.
  3153. ;; Why isn't this `wisent-automaton-lisp-form' performed at
  3154. ;; macroexpansion time? --Stef
  3155. (macroexpand-all
  3156. (wisent-automaton-lisp-form (eval form)))))
  3157. ;; FIXME: We shouldn't use a `byte-compile' handler. Maybe using a hash-table
  3158. ;; instead of an obarray would work around the problem that obarrays
  3159. ;; aren't printable. Then (put 'wisent-compile-grammar 'side-effect-free t).
  3160. (put 'wisent-compile-grammar 'byte-compile 'wisent-byte-compile-grammar)
  3161. (defun wisent-automaton-lisp-form (automaton)
  3162. "Return a Lisp form that produces AUTOMATON.
  3163. See also `wisent-compile-grammar' for more details on AUTOMATON."
  3164. (or (wisent-automaton-p automaton)
  3165. (signal 'wrong-type-argument
  3166. (list 'wisent-automaton-p automaton)))
  3167. (let ((obn (make-symbol "ob")) ; Generated obarray name
  3168. (obv (aref automaton 3)) ; Semantic actions obarray
  3169. )
  3170. `(let ((,obn (make-vector 13 0)))
  3171. ;; Generate code to initialize the semantic actions obarray,
  3172. ;; in local variable OBN.
  3173. ,@(let (obcode)
  3174. (mapatoms
  3175. #'(lambda (s)
  3176. (setq obcode
  3177. (cons `(fset (intern ,(symbol-name s) ,obn)
  3178. #',(symbol-function s))
  3179. obcode)))
  3180. obv)
  3181. obcode)
  3182. ;; Generate code to create the automaton.
  3183. (vector
  3184. ;; In code generated to initialize the action table, take
  3185. ;; care of symbols that are interned in the semantic actions
  3186. ;; obarray.
  3187. (vector
  3188. ,@(mapcar
  3189. #'(lambda (state) ;; for each state
  3190. `(list
  3191. ,@(mapcar
  3192. #'(lambda (tr) ;; for each transition
  3193. (let ((k (car tr)) ; token
  3194. (a (cdr tr))) ; action
  3195. (if (and (symbolp a)
  3196. (intern-soft (symbol-name a) obv))
  3197. `(cons ,(if (symbolp k) `(quote ,k) k)
  3198. (intern-soft ,(symbol-name a) ,obn))
  3199. `(quote ,tr))))
  3200. state)))
  3201. (aref automaton 0)))
  3202. ;; The code of the goto table is unchanged.
  3203. ,(aref automaton 1)
  3204. ;; The code of the alist of start symbols is unchanged.
  3205. ',(aref automaton 2)
  3206. ;; The semantic actions obarray is in the local variable OBN.
  3207. ,obn))))
  3208. (provide 'semantic/wisent/comp)
  3209. ;;; semantic/wisent/comp.el ends here