lexer.c 71 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804
  1. /* ------------------------------------------------------------------------- */
  2. /* "lexer" : Lexical analyser */
  3. /* */
  4. /* Part of Inform 6.33 */
  5. /* copyright (c) Graham Nelson 1993 - 2014 */
  6. /* */
  7. /* ------------------------------------------------------------------------- */
  8. #include "header.h"
  9. int total_source_line_count, /* Number of source lines so far */
  10. no_hash_printed_yet, /* Have not yet printed the first # */
  11. hash_printed_since_newline, /* A hash has been printed since the
  12. most recent new-line was printed
  13. (generally as a result of an error
  14. message or the start of pass) */
  15. dont_enter_into_symbol_table, /* Return names as text (with
  16. token type DQ_TT, i.e., as if
  17. they had double-quotes around)
  18. and not as entries in the symbol
  19. table, when TRUE. If -2, only the
  20. keyword table is searched. */
  21. return_sp_as_variable; /* When TRUE, the word "sp" denotes
  22. the stack pointer variable
  23. (used in assembly language only) */
  24. int next_token_begins_syntax_line; /* When TRUE, start a new syntax
  25. line (for error reporting, etc.)
  26. on the source code line where
  27. the next token appears */
  28. int32 last_mapped_line; /* Last syntax line reported to debugging file */
  29. /* ------------------------------------------------------------------------- */
  30. /* The lexer's output is a sequence of triples, each called a "token", */
  31. /* representing one lexical unit (or "lexeme") each. Instead of providing */
  32. /* "lookahead" (that is, always having available the next token after the */
  33. /* current one, so that syntax analysers higher up in Inform can have */
  34. /* advance knowledge of what is coming), the lexer instead has a system */
  35. /* where tokens can be read in and then "put back again". */
  36. /* The meaning of the number (and to some extent the text) supplied with */
  37. /* a token depends on its type: see "header.h" for the list of types. */
  38. /* For example, the lexeme "$1e3" is understood by Inform as a hexadecimal */
  39. /* number, and translated to the token: */
  40. /* type NUMBER_TT, value 483, text "$1e3" */
  41. /* ------------------------------------------------------------------------- */
  42. /* These three variables are set to the current token on a call to */
  43. /* get_next_token() (but are not changed by a call to put_token_back()). */
  44. /* ------------------------------------------------------------------------- */
  45. int token_type;
  46. int32 token_value;
  47. char *token_text;
  48. /* ------------------------------------------------------------------------- */
  49. /* The next two variables are the head and tail of a singly linked list. */
  50. /* The tail stores the portion most recently read from the current */
  51. /* lexical block; its end values therefore describe the location of the */
  52. /* current token, and are updated whenever the three variables above are */
  53. /* via set_token_location(...). Earlier vertices, if any, represent the */
  54. /* regions of lexical blocks read beforehand, where new vertices are */
  55. /* only introduced by interruptions like a file inclusion or an EOF. */
  56. /* Vertices are deleted off of the front of the list once they are no */
  57. /* longer referenced by pending debug information records. */
  58. /* ------------------------------------------------------------------------- */
  59. static debug_locations *first_token_locations;
  60. static debug_locations *last_token_location;
  61. extern debug_location get_token_location(void)
  62. { debug_location result;
  63. debug_location *location = &(last_token_location->location);
  64. result.file_index = location->file_index;
  65. result.beginning_byte_index = location->end_byte_index;
  66. result.end_byte_index = location->end_byte_index;
  67. result.beginning_line_number = location->end_line_number;
  68. result.end_line_number = location->end_line_number;
  69. result.beginning_character_number = location->end_character_number;
  70. result.end_character_number = location->end_character_number;
  71. return result;
  72. }
  73. extern debug_locations get_token_locations(void)
  74. { debug_locations result;
  75. result.location = get_token_location();
  76. result.next = NULL;
  77. result.reference_count = 0;
  78. return result;
  79. }
  80. static void set_token_location(debug_location location)
  81. { if (location.file_index == last_token_location->location.file_index)
  82. { last_token_location->location.end_byte_index =
  83. location.end_byte_index;
  84. last_token_location->location.end_line_number =
  85. location.end_line_number;
  86. last_token_location->location.end_character_number =
  87. location.end_character_number;
  88. } else
  89. { debug_locations*successor =
  90. my_malloc
  91. (sizeof(debug_locations),
  92. "debug locations of recent tokens");
  93. successor->location = location;
  94. successor->next = NULL;
  95. successor->reference_count = 0;
  96. last_token_location->next = successor;
  97. last_token_location = successor;
  98. }
  99. }
  100. extern debug_location_beginning get_token_location_beginning(void)
  101. { debug_location_beginning result;
  102. ++(last_token_location->reference_count);
  103. result.head = last_token_location;
  104. result.beginning_byte_index =
  105. last_token_location->location.end_byte_index;
  106. result.beginning_line_number =
  107. last_token_location->location.end_line_number;
  108. result.beginning_character_number =
  109. last_token_location->location.end_character_number;
  110. return result;
  111. }
  112. static void cleanup_token_locations(debug_location_beginning*beginning)
  113. { if (first_token_locations)
  114. { while (first_token_locations &&
  115. !first_token_locations->reference_count)
  116. { debug_locations*moribund = first_token_locations;
  117. first_token_locations = moribund->next;
  118. my_free(&moribund, "debug locations of recent tokens");
  119. if (beginning &&
  120. (beginning->head == moribund || !first_token_locations))
  121. { compiler_error
  122. ("Records needed by a debug_location_beginning are no "
  123. "longer allocated, perhaps because of an invalid reuse "
  124. "of this or an earlier beginning");
  125. }
  126. }
  127. } else
  128. { if (beginning)
  129. { compiler_error
  130. ("Attempt to use a debug_location_beginning when no token "
  131. "locations are defined");
  132. } else
  133. { compiler_error
  134. ("Attempt to clean up token locations when no token locations "
  135. "are defined");
  136. }
  137. }
  138. }
  139. extern void discard_token_location(debug_location_beginning beginning)
  140. { --(beginning.head->reference_count);
  141. }
  142. extern debug_locations get_token_location_end
  143. (debug_location_beginning beginning)
  144. { debug_locations result;
  145. cleanup_token_locations(&beginning);
  146. --(beginning.head->reference_count);
  147. /* Sometimes we know what we'll read before we switch to the lexical block
  148. where we'll read it. In such cases the beginning will be placed in the
  149. prior block and last exactly zero bytes there. It's misleading to
  150. include such ranges, so we gobble them. */
  151. if (beginning.head->location.end_byte_index ==
  152. beginning.beginning_byte_index &&
  153. beginning.head->next)
  154. { beginning.head = beginning.head->next;
  155. result.location = beginning.head->location;
  156. result.location.beginning_byte_index = 0;
  157. result.location.beginning_line_number = 1;
  158. result.location.beginning_character_number = 1;
  159. } else
  160. { result.location = beginning.head->location;
  161. result.location.beginning_byte_index =
  162. beginning.beginning_byte_index;
  163. result.location.beginning_line_number =
  164. beginning.beginning_line_number;
  165. result.location.beginning_character_number =
  166. beginning.beginning_character_number;
  167. }
  168. result.next = beginning.head->next;
  169. result.reference_count = 0;
  170. return result;
  171. }
  172. /* ------------------------------------------------------------------------- */
  173. /* In order to be able to put tokens back efficiently, the lexer stores */
  174. /* tokens in a "circle": the variable circle_position ranges between */
  175. /* 0 and CIRCLE_SIZE-1. We only need a circle size as large as the */
  176. /* maximum number of tokens ever put back at once, plus 1 (in effect, the */
  177. /* maximum token lookahead ever needed in syntax analysis, plus 1). */
  178. /* */
  179. /* Unlike some compilers, Inform does not have a context-free lexer: in */
  180. /* fact it has 12288 different possible states. However, the context only */
  181. /* affects the interpretation of "identifiers": lexemes beginning with a */
  182. /* letter and containing up to 32 chars of alphanumeric and underscore */
  183. /* chars. (For example, "default" may refer to the directive or statement */
  184. /* of that name, and which token values are returned depends on the */
  185. /* current lexical context.) */
  186. /* */
  187. /* Along with each token, we also store the lexical context it was */
  188. /* translated under; because if it is called for again, there may need */
  189. /* to be a fresh interpretation of it if the context has changed. */
  190. /* ------------------------------------------------------------------------- */
  191. #define CIRCLE_SIZE 6
  192. /* (The worst case for token lookahead is distinguishing between an
  193. old-style "objectloop (a in b)" and a new "objectloop (a in b ...)".) */
  194. static int circle_position;
  195. static token_data circle[CIRCLE_SIZE];
  196. static int token_contexts[CIRCLE_SIZE];
  197. /* ------------------------------------------------------------------------- */
  198. /* A complication, however, is that the text of some lexemes needs to be */
  199. /* held in Inform's memory for much longer periods: for example, a */
  200. /* dictionary word lexeme (like "'south'") must have its text preserved */
  201. /* until the code generation time for the expression it occurs in, when */
  202. /* the dictionary reference is actually made. Code generation in general */
  203. /* occurs as early as possible in Inform: pending some better method of */
  204. /* garbage collection, we simply use a buffer so large that unless */
  205. /* expressions spread across 10K of source code are found, there can be */
  206. /* no problem. */
  207. /* ------------------------------------------------------------------------- */
  208. static char *lexeme_memory;
  209. static char *lex_p; /* Current write position */
  210. /* ------------------------------------------------------------------------- */
  211. /* The lexer itself needs up to 3 characters of lookahead (it uses an */
  212. /* LR(3) grammar to translate characters into tokens). */
  213. /* ------------------------------------------------------------------------- */
  214. #define LOOKAHEAD_SIZE 3
  215. static int current, lookahead, /* The latest character read, and */
  216. lookahead2, lookahead3; /* the three characters following it */
  217. static int pipeline_made; /* Whether or not the pipeline of
  218. characters has been constructed
  219. yet (this pass) */
  220. static int (* get_next_char)(void); /* Routine for reading the stream of
  221. characters: the lexer does not
  222. need any "ungetc" routine for
  223. putting them back again. End of
  224. stream is signalled by returning
  225. zero. */
  226. static char *source_to_analyse; /* The current lexical source:
  227. NULL for "load from source files",
  228. otherwise this points to a string
  229. containing Inform code */
  230. static int tokens_put_back; /* Count of the number of backward
  231. moves made from the last-read
  232. token */
  233. extern void describe_token(token_data t)
  234. {
  235. /* Many of the token types are not set in this file, but later on in
  236. Inform's higher stages (for example, in the expression evaluator);
  237. but this routine describes them all. */
  238. printf("{ ");
  239. switch(t.type)
  240. {
  241. /* The following token types occur in lexer output: */
  242. case SYMBOL_TT: printf("symbol ");
  243. describe_symbol(t.value);
  244. break;
  245. case NUMBER_TT: printf("literal number %d", t.value);
  246. break;
  247. case DQ_TT: printf("string \"%s\"", t.text);
  248. break;
  249. case SQ_TT: printf("string '%s'", t.text);
  250. break;
  251. case SEP_TT: printf("separator '%s'", t.text);
  252. break;
  253. case EOF_TT: printf("end of file");
  254. break;
  255. case STATEMENT_TT: printf("statement name '%s'", t.text);
  256. break;
  257. case SEGMENT_MARKER_TT: printf("object segment marker '%s'", t.text);
  258. break;
  259. case DIRECTIVE_TT: printf("directive name '%s'", t.text);
  260. break;
  261. case CND_TT: printf("textual conditional '%s'", t.text);
  262. break;
  263. case OPCODE_NAME_TT: printf("opcode name '%s'", t.text);
  264. break;
  265. case SYSFUN_TT: printf("built-in function name '%s'", t.text);
  266. break;
  267. case LOCAL_VARIABLE_TT: printf("local variable name '%s'", t.text);
  268. break;
  269. case MISC_KEYWORD_TT: printf("statement keyword '%s'", t.text);
  270. break;
  271. case DIR_KEYWORD_TT: printf("directive keyword '%s'", t.text);
  272. break;
  273. case TRACE_KEYWORD_TT: printf("'trace' keyword '%s'", t.text);
  274. break;
  275. case SYSTEM_CONSTANT_TT: printf("system constant name '%s'", t.text);
  276. break;
  277. /* The remaining are etoken types, not set by the lexer */
  278. case OP_TT: printf("operator '%s'",
  279. operators[t.value].description);
  280. break;
  281. case ENDEXP_TT: printf("end of expression");
  282. break;
  283. case SUBOPEN_TT: printf("open bracket");
  284. break;
  285. case SUBCLOSE_TT: printf("close bracket");
  286. break;
  287. case LARGE_NUMBER_TT: printf("large number: '%s'=%d",t.text,t.value);
  288. break;
  289. case SMALL_NUMBER_TT: printf("small number: '%s'=%d",t.text,t.value);
  290. break;
  291. case VARIABLE_TT: printf("variable '%s'=%d", t.text, t.value);
  292. break;
  293. case DICTWORD_TT: printf("dictionary word '%s'", t.text);
  294. break;
  295. case ACTION_TT: printf("action name '%s'", t.text);
  296. break;
  297. default:
  298. printf("** unknown token type %d, text='%s', value=%d **",
  299. t.type, t.text, t.value);
  300. }
  301. printf(" }");
  302. }
  303. /* ------------------------------------------------------------------------- */
  304. /* All but one of the 280 Inform keywords (118 of them opcode names used */
  305. /* only by the assembler). (The one left over is "sp", a keyword used in */
  306. /* assembly language only.) */
  307. /* */
  308. /* A "keyword group" is a set of keywords to be searched for. If a match */
  309. /* is made on an identifier, the token type becomes that given in the KG */
  310. /* and the token value is its index in the KG. */
  311. /* */
  312. /* The keyword ordering must correspond with the appropriate #define's in */
  313. /* "header.h" but is otherwise not significant. */
  314. /* ------------------------------------------------------------------------- */
  315. #define MAX_KEYWORDS 350
  316. /* The values will be filled in at compile time, when we know
  317. which opcode set to use. */
  318. keyword_group opcode_names =
  319. { { "" },
  320. OPCODE_NAME_TT, FALSE, TRUE
  321. };
  322. static char *opcode_list_z[] = {
  323. "je", "jl", "jg", "dec_chk", "inc_chk", "jin", "test", "or", "and",
  324. "test_attr", "set_attr", "clear_attr", "store", "insert_obj", "loadw",
  325. "loadb", "get_prop", "get_prop_addr", "get_next_prop", "add", "sub",
  326. "mul", "div", "mod", "call", "storew", "storeb", "put_prop", "sread",
  327. "print_char", "print_num", "random", "push", "pull", "split_window",
  328. "set_window", "output_stream", "input_stream", "sound_effect", "jz",
  329. "get_sibling", "get_child", "get_parent", "get_prop_len", "inc", "dec",
  330. "print_addr", "remove_obj", "print_obj", "ret", "jump", "print_paddr",
  331. "load", "not", "rtrue", "rfalse", "print", "print_ret", "nop", "save",
  332. "restore", "restart", "ret_popped", "pop", "quit", "new_line",
  333. "show_status", "verify", "call_2s", "call_vs", "aread", "call_vs2",
  334. "erase_window", "erase_line", "set_cursor", "get_cursor",
  335. "set_text_style", "buffer_mode", "read_char", "scan_table", "call_1s",
  336. "call_2n", "set_colour", "throw", "call_vn", "call_vn2", "tokenise",
  337. "encode_text", "copy_table", "print_table", "check_arg_count", "call_1n",
  338. "catch", "piracy", "log_shift", "art_shift", "set_font", "save_undo",
  339. "restore_undo", "draw_picture", "picture_data", "erase_picture",
  340. "set_margins", "move_window", "window_size", "window_style",
  341. "get_wind_prop", "scroll_window", "pop_stack", "read_mouse",
  342. "mouse_window", "push_stack", "put_wind_prop", "print_form",
  343. "make_menu", "picture_table", "print_unicode", "check_unicode",
  344. ""
  345. };
  346. static char *opcode_list_g[] = {
  347. "nop", "add", "sub", "mul", "div", "mod", "neg", "bitand", "bitor",
  348. "bitxor", "bitnot", "shiftl", "sshiftr", "ushiftr", "jump", "jz",
  349. "jnz", "jeq", "jne", "jlt", "jge", "jgt", "jle",
  350. "jltu", "jgeu", "jgtu", "jleu",
  351. "call", "return",
  352. "catch", "throw", "tailcall",
  353. "copy", "copys", "copyb", "sexs", "sexb", "aload",
  354. "aloads", "aloadb", "aloadbit", "astore", "astores", "astoreb",
  355. "astorebit", "stkcount", "stkpeek", "stkswap", "stkroll", "stkcopy",
  356. "streamchar", "streamnum", "streamstr",
  357. "gestalt", "debugtrap", "getmemsize", "setmemsize", "jumpabs",
  358. "random", "setrandom", "quit", "verify",
  359. "restart", "save", "restore", "saveundo", "restoreundo", "protect",
  360. "glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys",
  361. "linearsearch", "binarysearch", "linkedsearch",
  362. "callf", "callfi", "callfii", "callfiii",
  363. "streamunichar",
  364. "mzero", "mcopy", "malloc", "mfree",
  365. "accelfunc", "accelparam",
  366. "numtof", "ftonumz", "ftonumn", "ceil", "floor",
  367. "fadd", "fsub", "fmul", "fdiv", "fmod",
  368. "sqrt", "exp", "log", "pow",
  369. "sin", "cos", "tan", "asin", "acos", "atan", "atan2",
  370. "jfeq", "jfne", "jflt", "jfle", "jfgt", "jfge", "jisnan", "jisinf",
  371. ""
  372. };
  373. keyword_group opcode_macros =
  374. { { "" },
  375. OPCODE_MACRO_TT, FALSE, TRUE
  376. };
  377. static char *opmacro_list_z[] = { "" };
  378. static char *opmacro_list_g[] = {
  379. "pull", "push",
  380. ""
  381. };
  382. keyword_group directives =
  383. { { "abbreviate", "array", "attribute", "class", "constant",
  384. "default", "dictionary", "end", "endif", "extend", "fake_action",
  385. "global", "ifdef", "ifndef", "ifnot", "ifv3", "ifv5", "iftrue",
  386. "iffalse", "import", "include", "link", "lowstring", "message",
  387. "nearby", "object", "property", "release", "replace",
  388. "serial", "switches", "statusline", "stub", "system_file", "trace",
  389. "undef", "verb", "version", "zcharacter",
  390. "" },
  391. DIRECTIVE_TT, FALSE, FALSE
  392. };
  393. keyword_group trace_keywords =
  394. { { "dictionary", "symbols", "objects", "verbs",
  395. "assembly", "expressions", "lines", "tokens", "linker",
  396. "on", "off", "" },
  397. TRACE_KEYWORD_TT, FALSE, TRUE
  398. };
  399. keyword_group segment_markers =
  400. { { "class", "has", "private", "with", "" },
  401. SEGMENT_MARKER_TT, FALSE, TRUE
  402. };
  403. keyword_group directive_keywords =
  404. { { "alias", "long", "additive",
  405. "score", "time",
  406. "noun", "held", "multi", "multiheld", "multiexcept",
  407. "multiinside", "creature", "special", "number", "scope", "topic",
  408. "reverse", "meta", "only", "replace", "first", "last",
  409. "string", "table", "buffer", "data", "initial", "initstr",
  410. "with", "private", "has", "class",
  411. "error", "fatalerror", "warning",
  412. "terminating",
  413. "" },
  414. DIR_KEYWORD_TT, FALSE, TRUE
  415. };
  416. keyword_group misc_keywords =
  417. { { "char", "name", "the", "a", "an", "The", "number",
  418. "roman", "reverse", "bold", "underline", "fixed", "on", "off",
  419. "to", "address", "string", "object", "near", "from", "property", "A", "" },
  420. MISC_KEYWORD_TT, FALSE, TRUE
  421. };
  422. keyword_group statements =
  423. { { "box", "break", "continue", "default", "do", "else", "font", "for",
  424. "give", "if", "inversion", "jump", "move", "new_line", "objectloop",
  425. "print", "print_ret", "quit", "read", "remove", "restore", "return",
  426. "rfalse", "rtrue", "save", "spaces", "string", "style", "switch",
  427. "until", "while", "" },
  428. STATEMENT_TT, FALSE, TRUE
  429. };
  430. keyword_group conditions =
  431. { { "has", "hasnt", "in", "notin", "ofclass", "or", "provides", "" },
  432. CND_TT, FALSE, TRUE
  433. };
  434. keyword_group system_functions =
  435. { { "child", "children", "elder", "eldest", "indirect", "parent", "random",
  436. "sibling", "younger", "youngest", "metaclass", "glk", "" },
  437. SYSFUN_TT, FALSE, TRUE
  438. };
  439. keyword_group system_constants =
  440. { { "adjectives_table", "actions_table", "classes_table",
  441. "identifiers_table", "preactions_table", "version_number",
  442. "largest_object", "strings_offset", "code_offset",
  443. "dict_par1", "dict_par2", "dict_par3", "actual_largest_object",
  444. "static_memory_offset", "array_names_offset", "readable_memory_offset",
  445. "cpv__start", "cpv__end", "ipv__start", "ipv__end",
  446. "array__start", "array__end",
  447. "lowest_attribute_number", "highest_attribute_number",
  448. "attribute_names_array",
  449. "lowest_property_number", "highest_property_number",
  450. "property_names_array",
  451. "lowest_action_number", "highest_action_number",
  452. "action_names_array",
  453. "lowest_fake_action_number", "highest_fake_action_number",
  454. "fake_action_names_array",
  455. "lowest_routine_number", "highest_routine_number", "routines_array",
  456. "routine_names_array", "routine_flags_array",
  457. "lowest_global_number", "highest_global_number", "globals_array",
  458. "global_names_array", "global_flags_array",
  459. "lowest_array_number", "highest_array_number", "arrays_array",
  460. "array_names_array", "array_flags_array",
  461. "lowest_constant_number", "highest_constant_number", "constants_array",
  462. "constant_names_array",
  463. "lowest_class_number", "highest_class_number", "class_objects_array",
  464. "lowest_object_number", "highest_object_number",
  465. "oddeven_packing",
  466. "grammar_table", "dictionary_table", "dynam_string_table",
  467. "" },
  468. SYSTEM_CONSTANT_TT, FALSE, TRUE
  469. };
  470. keyword_group *keyword_groups[12]
  471. = { NULL, &opcode_names, &directives, &trace_keywords, &segment_markers,
  472. &directive_keywords, &misc_keywords, &statements, &conditions,
  473. &system_functions, &system_constants, &opcode_macros};
  474. keyword_group local_variables =
  475. { { "" }, /* Filled in when routine declared */
  476. LOCAL_VARIABLE_TT, FALSE, FALSE
  477. };
  478. static int lexical_context(void)
  479. {
  480. /* The lexical context is a number representing all of the context
  481. information in the lexical analyser: the same input text will
  482. always translate to the same output tokens whenever the context
  483. is the same.
  484. In fact, for efficiency reasons this number omits the bit of
  485. information held in the variable "dont_enter_into_symbol_table".
  486. Inform never needs to backtrack through tokens parsed in that
  487. way (thankfully, as it would be expensive indeed to check
  488. the tokens). */
  489. int c = 0;
  490. if (opcode_names.enabled) c |= 1;
  491. if (directives.enabled) c |= 2;
  492. if (trace_keywords.enabled) c |= 4;
  493. if (segment_markers.enabled) c |= 8;
  494. if (directive_keywords.enabled) c |= 16;
  495. if (misc_keywords.enabled) c |= 32;
  496. if (statements.enabled) c |= 64;
  497. if (conditions.enabled) c |= 128;
  498. if (system_functions.enabled) c |= 256;
  499. if (system_constants.enabled) c |= 512;
  500. if (local_variables.enabled) c |= 1024;
  501. if (return_sp_as_variable) c |= 2048;
  502. return(c);
  503. }
  504. static void print_context(int c)
  505. {
  506. if ((c & 1) != 0) printf("OPC ");
  507. if ((c & 2) != 0) printf("DIR ");
  508. if ((c & 4) != 0) printf("TK ");
  509. if ((c & 8) != 0) printf("SEG ");
  510. if ((c & 16) != 0) printf("DK ");
  511. if ((c & 32) != 0) printf("MK ");
  512. if ((c & 64) != 0) printf("STA ");
  513. if ((c & 128) != 0) printf("CND ");
  514. if ((c & 256) != 0) printf("SFUN ");
  515. if ((c & 512) != 0) printf("SCON ");
  516. if ((c & 1024) != 0) printf("LV ");
  517. if ((c & 2048) != 0) printf("sp ");
  518. }
  519. static int *keywords_hash_table;
  520. static int *keywords_hash_ends_table;
  521. static int *keywords_data_table;
  522. static int *local_variable_hash_table;
  523. static int *local_variable_hash_codes;
  524. char **local_variable_texts;
  525. static char *local_variable_text_table;
  526. static char one_letter_locals[128];
  527. static void make_keywords_tables(void)
  528. { int i, j, h, tp=0;
  529. char **oplist, **maclist;
  530. if (!glulx_mode) {
  531. oplist = opcode_list_z;
  532. maclist = opmacro_list_z;
  533. }
  534. else {
  535. oplist = opcode_list_g;
  536. maclist = opmacro_list_g;
  537. }
  538. for (j=0; *(oplist[j]); j++) {
  539. opcode_names.keywords[j] = oplist[j];
  540. }
  541. opcode_names.keywords[j] = "";
  542. for (j=0; *(maclist[j]); j++) {
  543. opcode_macros.keywords[j] = maclist[j];
  544. }
  545. opcode_macros.keywords[j] = "";
  546. for (i=0; i<HASH_TAB_SIZE; i++)
  547. { keywords_hash_table[i] = -1;
  548. keywords_hash_ends_table[i] = -1;
  549. }
  550. for (i=1; i<=11; i++)
  551. { keyword_group *kg = keyword_groups[i];
  552. for (j=0; *(kg->keywords[j]) != 0; j++)
  553. { h = hash_code_from_string(kg->keywords[j]);
  554. if (keywords_hash_table[h] == -1)
  555. keywords_hash_table[h] = tp;
  556. else
  557. *(keywords_data_table + 3*(keywords_hash_ends_table[h]) + 2) = tp;
  558. keywords_hash_ends_table[h] = tp;
  559. *(keywords_data_table + 3*tp) = i;
  560. *(keywords_data_table + 3*tp+1) = j;
  561. *(keywords_data_table + 3*tp+2) = -1;
  562. tp++;
  563. }
  564. }
  565. }
  566. extern void construct_local_variable_tables(void)
  567. { int i, h; char *p = local_variable_text_table;
  568. for (i=0; i<HASH_TAB_SIZE; i++) local_variable_hash_table[i] = -1;
  569. for (i=0; i<128; i++) one_letter_locals[i] = MAX_LOCAL_VARIABLES;
  570. for (i=0; i<no_locals; i++)
  571. { char *q = local_variables.keywords[i];
  572. if (q[1] == 0)
  573. { one_letter_locals[(uchar)q[0]] = i;
  574. if (isupper(q[0])) one_letter_locals[tolower(q[0])] = i;
  575. if (islower(q[0])) one_letter_locals[toupper(q[0])] = i;
  576. }
  577. h = hash_code_from_string(q);
  578. if (local_variable_hash_table[h] == -1)
  579. local_variable_hash_table[h] = i;
  580. local_variable_hash_codes[i] = h;
  581. local_variable_texts[i] = p;
  582. strcpy(p, q);
  583. p += strlen(p)+1;
  584. }
  585. for (;i<MAX_LOCAL_VARIABLES-1;i++)
  586. local_variable_texts[i] = "<no such local variable>";
  587. }
  588. static void interpret_identifier(int pos, int dirs_only_flag)
  589. { int index, hashcode; char *p = circle[pos].text;
  590. /* An identifier is either a keyword or a "symbol", a name which the
  591. lexical analyser leaves to higher levels of Inform to understand. */
  592. hashcode = hash_code_from_string(p);
  593. if (dirs_only_flag) goto KeywordSearch;
  594. /* If this is assembly language, perhaps it is "sp"? */
  595. if (return_sp_as_variable && (p[0]=='s') && (p[1]=='p') && (p[2]==0))
  596. { circle[pos].value = 0; circle[pos].type = LOCAL_VARIABLE_TT;
  597. return;
  598. }
  599. /* Test for local variables first, quite quickly. */
  600. if (local_variables.enabled)
  601. { if (p[1]==0)
  602. { index = one_letter_locals[(uchar)p[0]];
  603. if (index<MAX_LOCAL_VARIABLES)
  604. { circle[pos].type = LOCAL_VARIABLE_TT;
  605. circle[pos].value = index+1;
  606. return;
  607. }
  608. }
  609. index = local_variable_hash_table[hashcode];
  610. if (index >= 0)
  611. { for (;index<no_locals;index++)
  612. { if (hashcode == local_variable_hash_codes[index])
  613. { if (strcmpcis(p, local_variable_texts[index])==0)
  614. { circle[pos].type = LOCAL_VARIABLE_TT;
  615. circle[pos].value = index+1;
  616. return;
  617. }
  618. }
  619. }
  620. }
  621. }
  622. /* Now the bulk of the keywords. Note that the lexer doesn't recognise
  623. the name of a system function which has been Replaced. */
  624. KeywordSearch:
  625. index = keywords_hash_table[hashcode];
  626. while (index >= 0)
  627. { int *i = keywords_data_table + 3*index;
  628. keyword_group *kg = keyword_groups[*i];
  629. if (((!dirs_only_flag) && (kg->enabled))
  630. || (dirs_only_flag && (kg == &directives)))
  631. { char *q = kg->keywords[*(i+1)];
  632. if (((kg->case_sensitive) && (strcmp(p, q)==0))
  633. || ((!(kg->case_sensitive)) && (strcmpcis(p, q)==0)))
  634. { if ((kg != &system_functions)
  635. || (system_function_usage[*(i+1)]!=2))
  636. { circle[pos].type = kg->change_token_type;
  637. circle[pos].value = *(i+1);
  638. return;
  639. }
  640. }
  641. }
  642. index = *(i+2);
  643. }
  644. if (dirs_only_flag) return;
  645. /* Search for the name; create it if necessary. */
  646. circle[pos].value = symbol_index(p, hashcode);
  647. circle[pos].type = SYMBOL_TT;
  648. }
  649. /* ------------------------------------------------------------------------- */
  650. /* The tokeniser grid aids a rapid decision about the consequences of a */
  651. /* character reached in the buffer. In effect it is an efficiently stored */
  652. /* transition table using an algorithm similar to that of S. C. Johnson's */
  653. /* "yacc" lexical analyser (see Aho, Sethi and Ullman, section 3.9). */
  654. /* My thanks to Dilip Sequeira for suggesting this. */
  655. /* */
  656. /* tokeniser_grid[c] is (16*n + m) if c is the first character of */
  657. /* separator numbers n, n+1, ..., n+m-1 */
  658. /* or certain special values (QUOTE_CODE, etc) */
  659. /* or 0 otherwise */
  660. /* */
  661. /* Since 1000/16 = 62, the code numbers below will need increasing if the */
  662. /* number of separators supported exceeds 61. */
  663. /* ------------------------------------------------------------------------- */
  664. static int tokeniser_grid[256];
  665. #define QUOTE_CODE 1000
  666. #define DQUOTE_CODE 1001
  667. #define NULL_CODE 1002
  668. #define SPACE_CODE 1003
  669. #define NEGATIVE_CODE 1004
  670. #define DIGIT_CODE 1005
  671. #define RADIX_CODE 1006
  672. #define KEYWORD_CODE 1007
  673. #define EOF_CODE 1008
  674. #define WHITESPACE_CODE 1009
  675. #define COMMENT_CODE 1010
  676. #define IDENTIFIER_CODE 1011
  677. /* This list cannot safely be changed without also changing the header
  678. separator #defines. The ordering is significant in that (i) all entries
  679. beginning with the same character must be adjacent and (ii) that if
  680. X is a an initial substring of Y then X must come before Y.
  681. E.g. --> must occur before -- to prevent "-->0" being tokenised
  682. wrongly as "--", ">", "0" rather than "-->", "0". */
  683. static const char separators[NUMBER_SEPARATORS][4] =
  684. { "->", "-->", "--", "-", "++", "+", "*", "/", "%",
  685. "||", "|", "&&", "&", "~~",
  686. "~=", "~", "==", "=", ">=", ">",
  687. "<=", "<", "(", ")", ",",
  688. ".&", ".#", "..&", "..#", "..", ".",
  689. "::", ":", "@", ";", "[", "]", "{", "}",
  690. "$", "?~", "?",
  691. "#a$", "#g$", "#n$", "#r$", "#w$", "##", "#"
  692. };
  693. static void make_tokeniser_grid(void)
  694. {
  695. /* Construct the grid to the specification above. */
  696. int i, j;
  697. for (i=0; i<256; i++) tokeniser_grid[i]=0;
  698. for (i=0; i<NUMBER_SEPARATORS; i++)
  699. { j=separators[i][0];
  700. if (tokeniser_grid[j]==0)
  701. tokeniser_grid[j]=i*16+1; else tokeniser_grid[j]++;
  702. }
  703. tokeniser_grid['\''] = QUOTE_CODE;
  704. tokeniser_grid['\"'] = DQUOTE_CODE;
  705. tokeniser_grid[0] = EOF_CODE;
  706. tokeniser_grid[' '] = WHITESPACE_CODE;
  707. tokeniser_grid['\n'] = WHITESPACE_CODE;
  708. tokeniser_grid['$'] = RADIX_CODE;
  709. tokeniser_grid['!'] = COMMENT_CODE;
  710. tokeniser_grid['0'] = DIGIT_CODE;
  711. tokeniser_grid['1'] = DIGIT_CODE;
  712. tokeniser_grid['2'] = DIGIT_CODE;
  713. tokeniser_grid['3'] = DIGIT_CODE;
  714. tokeniser_grid['4'] = DIGIT_CODE;
  715. tokeniser_grid['5'] = DIGIT_CODE;
  716. tokeniser_grid['6'] = DIGIT_CODE;
  717. tokeniser_grid['7'] = DIGIT_CODE;
  718. tokeniser_grid['8'] = DIGIT_CODE;
  719. tokeniser_grid['9'] = DIGIT_CODE;
  720. tokeniser_grid['a'] = IDENTIFIER_CODE;
  721. tokeniser_grid['b'] = IDENTIFIER_CODE;
  722. tokeniser_grid['c'] = IDENTIFIER_CODE;
  723. tokeniser_grid['d'] = IDENTIFIER_CODE;
  724. tokeniser_grid['e'] = IDENTIFIER_CODE;
  725. tokeniser_grid['f'] = IDENTIFIER_CODE;
  726. tokeniser_grid['g'] = IDENTIFIER_CODE;
  727. tokeniser_grid['h'] = IDENTIFIER_CODE;
  728. tokeniser_grid['i'] = IDENTIFIER_CODE;
  729. tokeniser_grid['j'] = IDENTIFIER_CODE;
  730. tokeniser_grid['k'] = IDENTIFIER_CODE;
  731. tokeniser_grid['l'] = IDENTIFIER_CODE;
  732. tokeniser_grid['m'] = IDENTIFIER_CODE;
  733. tokeniser_grid['n'] = IDENTIFIER_CODE;
  734. tokeniser_grid['o'] = IDENTIFIER_CODE;
  735. tokeniser_grid['p'] = IDENTIFIER_CODE;
  736. tokeniser_grid['q'] = IDENTIFIER_CODE;
  737. tokeniser_grid['r'] = IDENTIFIER_CODE;
  738. tokeniser_grid['s'] = IDENTIFIER_CODE;
  739. tokeniser_grid['t'] = IDENTIFIER_CODE;
  740. tokeniser_grid['u'] = IDENTIFIER_CODE;
  741. tokeniser_grid['v'] = IDENTIFIER_CODE;
  742. tokeniser_grid['w'] = IDENTIFIER_CODE;
  743. tokeniser_grid['x'] = IDENTIFIER_CODE;
  744. tokeniser_grid['y'] = IDENTIFIER_CODE;
  745. tokeniser_grid['z'] = IDENTIFIER_CODE;
  746. tokeniser_grid['A'] = IDENTIFIER_CODE;
  747. tokeniser_grid['B'] = IDENTIFIER_CODE;
  748. tokeniser_grid['C'] = IDENTIFIER_CODE;
  749. tokeniser_grid['D'] = IDENTIFIER_CODE;
  750. tokeniser_grid['E'] = IDENTIFIER_CODE;
  751. tokeniser_grid['F'] = IDENTIFIER_CODE;
  752. tokeniser_grid['G'] = IDENTIFIER_CODE;
  753. tokeniser_grid['H'] = IDENTIFIER_CODE;
  754. tokeniser_grid['I'] = IDENTIFIER_CODE;
  755. tokeniser_grid['J'] = IDENTIFIER_CODE;
  756. tokeniser_grid['K'] = IDENTIFIER_CODE;
  757. tokeniser_grid['L'] = IDENTIFIER_CODE;
  758. tokeniser_grid['M'] = IDENTIFIER_CODE;
  759. tokeniser_grid['N'] = IDENTIFIER_CODE;
  760. tokeniser_grid['O'] = IDENTIFIER_CODE;
  761. tokeniser_grid['P'] = IDENTIFIER_CODE;
  762. tokeniser_grid['Q'] = IDENTIFIER_CODE;
  763. tokeniser_grid['R'] = IDENTIFIER_CODE;
  764. tokeniser_grid['S'] = IDENTIFIER_CODE;
  765. tokeniser_grid['T'] = IDENTIFIER_CODE;
  766. tokeniser_grid['U'] = IDENTIFIER_CODE;
  767. tokeniser_grid['V'] = IDENTIFIER_CODE;
  768. tokeniser_grid['W'] = IDENTIFIER_CODE;
  769. tokeniser_grid['X'] = IDENTIFIER_CODE;
  770. tokeniser_grid['Y'] = IDENTIFIER_CODE;
  771. tokeniser_grid['Z'] = IDENTIFIER_CODE;
  772. tokeniser_grid['_'] = IDENTIFIER_CODE;
  773. }
  774. /* ------------------------------------------------------------------------- */
  775. /* Definition of a lexical block: a source file or a string containing */
  776. /* text for lexical analysis; an independent source from the point of */
  777. /* view of issuing error reports. */
  778. /* ------------------------------------------------------------------------- */
  779. typedef struct LexicalBlock_s
  780. { char *filename; /* Full translated name */
  781. int main_flag; /* TRUE if the main file
  782. (the first one opened) */
  783. int sys_flag; /* TRUE if a System_File */
  784. int source_line; /* Line number count */
  785. int line_start; /* Char number within file
  786. where the current line
  787. starts */
  788. int chars_read; /* Char number of read pos */
  789. int file_no; /* Or 255 if not from a
  790. file; used for debug
  791. information */
  792. } LexicalBlock;
  793. static LexicalBlock NoFileOpen =
  794. { "<before compilation>", FALSE, FALSE, 0, 0, 0, 255 };
  795. static LexicalBlock MakingOutput =
  796. { "<constructing output>", FALSE, FALSE, 0, 0, 0, 255 };
  797. static LexicalBlock StringLB =
  798. { "<veneer routine>", FALSE, TRUE, 0, 0, 0, 255 };
  799. static LexicalBlock *CurrentLB; /* The current lexical
  800. block of input text */
  801. extern void declare_systemfile(void)
  802. { CurrentLB->sys_flag = TRUE;
  803. }
  804. extern int is_systemfile(void)
  805. { return ((CurrentLB->sys_flag)?1:0);
  806. }
  807. extern debug_location get_current_debug_location(void)
  808. { debug_location result;
  809. /* Assume that all input characters are one byte. */
  810. result.file_index = CurrentLB->file_no;
  811. result.beginning_byte_index = CurrentLB->chars_read - LOOKAHEAD_SIZE;
  812. result.end_byte_index = result.beginning_byte_index;
  813. result.beginning_line_number = CurrentLB->source_line;
  814. result.end_line_number = result.beginning_line_number;
  815. result.beginning_character_number =
  816. CurrentLB->chars_read - CurrentLB->line_start;
  817. result.end_character_number = result.beginning_character_number;
  818. return result;
  819. }
  820. static debug_location ErrorReport_debug_location;
  821. extern void report_errors_at_current_line(void)
  822. { ErrorReport.line_number = CurrentLB->source_line;
  823. ErrorReport.file_number = CurrentLB->file_no;
  824. if (ErrorReport.file_number == 255)
  825. ErrorReport.file_number = -1;
  826. ErrorReport.source = CurrentLB->filename;
  827. ErrorReport.main_flag = CurrentLB->main_flag;
  828. if (debugfile_switch)
  829. ErrorReport_debug_location = get_current_debug_location();
  830. }
  831. extern debug_location get_error_report_debug_location(void)
  832. { return ErrorReport_debug_location;
  833. }
  834. extern int32 get_current_line_start(void)
  835. { return CurrentLB->line_start;
  836. }
  837. /* ------------------------------------------------------------------------- */
  838. /* Hash printing and line counting */
  839. /* ------------------------------------------------------------------------- */
  840. static void print_hash(void)
  841. {
  842. /* Hash-printing is the practice of printing a # character every 100
  843. lines of source code (the -x switch), reassuring the user that
  844. progress is being made */
  845. if (no_hash_printed_yet)
  846. { printf("::"); no_hash_printed_yet = FALSE;
  847. }
  848. printf("#"); hash_printed_since_newline = TRUE;
  849. #ifndef MAC_FACE
  850. /* On some systems, text output is buffered to a line at a time, and
  851. this would frustrate the point of hash-printing, so: */
  852. fflush(stdout);
  853. #endif
  854. }
  855. static void reached_new_line(void)
  856. {
  857. /* Called to signal that a new line has been reached in the source code */
  858. forerrors_pointer = 0;
  859. CurrentLB->source_line++;
  860. CurrentLB->line_start = CurrentLB->chars_read;
  861. total_source_line_count++;
  862. if (total_source_line_count%100==0)
  863. { if (hash_switch) print_hash();
  864. #ifdef MAC_MPW
  865. SpinCursor(32); /* I.e., allow other tasks to run */
  866. #endif
  867. }
  868. #ifdef MAC_FACE
  869. if (total_source_line_count%((**g_pm_hndl).linespercheck) == 0)
  870. { ProcessEvents (&g_proc);
  871. if (g_proc != true)
  872. { free_arrays();
  873. close_all_source();
  874. if (temporary_files_switch)
  875. remove_temp_files();
  876. if (store_the_text)
  877. my_free(&all_text,"transcription text");
  878. abort_transcript_file();
  879. longjmp (g_fallback, 1);
  880. }
  881. }
  882. #endif
  883. }
  884. static void new_syntax_line(void)
  885. { if (source_to_analyse != NULL) forerrors_pointer = 0;
  886. report_errors_at_current_line();
  887. }
  888. /* Return 10 raised to the expo power.
  889. *
  890. * I'm avoiding the standard pow() function for a rather lame reason:
  891. * it's in the libmath (-lm) library, and I don't want to change the
  892. * build model for the compiler. So, this is implemented with a stupid
  893. * lookup table. It's faster than pow() for small values of expo.
  894. * Probably not as fast if expo is 200, but "$+1e200" is an overflow
  895. * anyway, so I don't expect that to be a problem.
  896. *
  897. * (For some reason, frexp() and ldexp(), which are used later on, do
  898. * not require libmath to be linked in.)
  899. */
  900. static double pow10_cheap(int expo)
  901. {
  902. #define POW10_RANGE (8)
  903. static double powers[POW10_RANGE*2+1] = {
  904. 0.00000001, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1,
  905. 1.0,
  906. 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, 10000000.0, 100000000.0
  907. };
  908. double res = 1.0;
  909. if (expo < 0) {
  910. for (; expo < -POW10_RANGE; expo += POW10_RANGE) {
  911. res *= powers[0];
  912. }
  913. return res * powers[POW10_RANGE+expo];
  914. }
  915. else {
  916. for (; expo > POW10_RANGE; expo -= POW10_RANGE) {
  917. res *= powers[POW10_RANGE*2];
  918. }
  919. return res * powers[POW10_RANGE+expo];
  920. }
  921. }
  922. /* Return the IEEE-754 single-precision encoding of a floating-point
  923. * number. See http://www.psc.edu/general/software/packages/ieee/ieee.php
  924. * for an explanation.
  925. *
  926. * The number is provided in the pieces it was parsed in:
  927. * [+|-] intv "." fracv "e" [+|-]expo
  928. *
  929. * If the magnitude is too large (beyond about 3.4e+38), this returns
  930. * an infinite value (0x7f800000 or 0xff800000). If the magnitude is too
  931. * small (below about 1e-45), this returns a zero value (0x00000000 or
  932. * 0x80000000). If any of the inputs are NaN, this returns NaN (but the
  933. * lexer should never do that).
  934. *
  935. * Note that using a float constant does *not* set the uses_float_features
  936. * flag (which would cause the game file to be labelled 3.1.2). There's
  937. * no VM feature here, just an integer. Of course, any use of the float
  938. * *opcodes* will set the flag.
  939. *
  940. * The math functions in this routine require #including <math.h>, but
  941. * they should not require linking the math library (-lm). At least,
  942. * they do not on OSX and Linux.
  943. */
  944. static int32 construct_float(int signbit, double intv, double fracv, int expo)
  945. {
  946. double absval = (intv + fracv) * pow10_cheap(expo);
  947. int32 sign = (signbit ? 0x80000000 : 0x0);
  948. double mant;
  949. int32 fbits;
  950. if (isinf(absval)) {
  951. return sign | 0x7f800000; /* infinity */
  952. }
  953. if (isnan(absval)) {
  954. return sign | 0x7fc00000;
  955. }
  956. mant = frexp(absval, &expo);
  957. /* Normalize mantissa to be in the range [1.0, 2.0) */
  958. if (0.5 <= mant && mant < 1.0) {
  959. mant *= 2.0;
  960. expo--;
  961. }
  962. else if (mant == 0.0) {
  963. expo = 0;
  964. }
  965. else {
  966. return sign | 0x7f800000; /* infinity */
  967. }
  968. if (expo >= 128) {
  969. return sign | 0x7f800000; /* infinity */
  970. }
  971. else if (expo < -126) {
  972. /* Denormalized (very small) number */
  973. mant = ldexp(mant, 126 + expo);
  974. expo = 0;
  975. }
  976. else if (!(expo == 0 && mant == 0.0)) {
  977. expo += 127;
  978. mant -= 1.0; /* Get rid of leading 1 */
  979. }
  980. mant *= 8388608.0; /* 2^23 */
  981. fbits = (int32)(mant + 0.5); /* round mant to nearest int */
  982. if (fbits >> 23) {
  983. /* The carry propagated out of a string of 23 1 bits. */
  984. fbits = 0;
  985. expo++;
  986. if (expo >= 255) {
  987. return sign | 0x7f800000; /* infinity */
  988. }
  989. }
  990. return (sign) | ((int32)(expo << 23)) | (fbits);
  991. }
  992. /* ------------------------------------------------------------------------- */
  993. /* Characters are read via a "pipeline" of variables, allowing us to look */
  994. /* up to three characters ahead of the current position. */
  995. /* */
  996. /* There are two possible sources: from the source files being loaded in, */
  997. /* and from a string inside Inform (which is where the code for veneer */
  998. /* routines comes from). Each source has its own get-next-character */
  999. /* routine. */
  1000. /* ------------------------------------------------------------------------- */
  1001. /* Source 1: from files */
  1002. /* */
  1003. /* Note that file_load_chars(p, size) loads "size" bytes into buffer "p" */
  1004. /* from the current input file. If the file runs out, then if it was */
  1005. /* the last source file 4 EOF characters are placed in the buffer: if it */
  1006. /* was only an Include file ending, then a '\n' character is placed there */
  1007. /* (essentially to force termination of any comment line) followed by */
  1008. /* three harmless spaces. */
  1009. /* */
  1010. /* The routine returns the number of characters it has written, and note */
  1011. /* that this conveniently ensures that all characters in the buffer come */
  1012. /* from the same file. */
  1013. /* ------------------------------------------------------------------------- */
  1014. #define SOURCE_BUFFER_SIZE 4096 /* Typical disc block size */
  1015. typedef struct Sourcefile_s
  1016. { char *buffer; /* Input buffer */
  1017. int read_pos; /* Read position in buffer */
  1018. int size; /* Number of meaningful
  1019. characters in buffer */
  1020. int la, la2, la3; /* Three characters of
  1021. lookahead pipeline */
  1022. int file_no; /* Internal file number
  1023. (1, 2, 3, ...) */
  1024. LexicalBlock LB;
  1025. } Sourcefile;
  1026. static Sourcefile *FileStack;
  1027. static int File_sp; /* Stack pointer */
  1028. static Sourcefile *CF; /* Top entry on stack */
  1029. static int last_no_files;
  1030. static void begin_buffering_file(int i, int file_no)
  1031. { int j, cnt; uchar *p;
  1032. if (i >= MAX_INCLUSION_DEPTH)
  1033. memoryerror("MAX_INCLUSION_DEPTH",MAX_INCLUSION_DEPTH);
  1034. p = (uchar *) FileStack[i].buffer;
  1035. if (i>0)
  1036. { FileStack[i-1].la = lookahead;
  1037. FileStack[i-1].la2 = lookahead2;
  1038. FileStack[i-1].la3 = lookahead3;
  1039. }
  1040. FileStack[i].file_no = file_no;
  1041. FileStack[i].size = file_load_chars(file_no,
  1042. (char *) p, SOURCE_BUFFER_SIZE);
  1043. lookahead = source_to_iso_grid[p[0]];
  1044. lookahead2 = source_to_iso_grid[p[1]];
  1045. lookahead3 = source_to_iso_grid[p[2]];
  1046. if (LOOKAHEAD_SIZE != 3)
  1047. compiler_error
  1048. ("Lexer lookahead size does not match hard-coded lookahead code");
  1049. FileStack[i].read_pos = LOOKAHEAD_SIZE;
  1050. if (file_no==1) FileStack[i].LB.main_flag = TRUE;
  1051. else FileStack[i].LB.main_flag = FALSE;
  1052. FileStack[i].LB.sys_flag = FALSE;
  1053. FileStack[i].LB.source_line = 1;
  1054. FileStack[i].LB.line_start = LOOKAHEAD_SIZE;
  1055. FileStack[i].LB.chars_read = LOOKAHEAD_SIZE;
  1056. FileStack[i].LB.filename = InputFiles[file_no-1].filename;
  1057. FileStack[i].LB.file_no = file_no;
  1058. CurrentLB = &(FileStack[i].LB);
  1059. CF = &(FileStack[i]);
  1060. /* Check for recursive inclusion */
  1061. cnt = 0;
  1062. for (j=0; j<i; j++)
  1063. { if (!strcmp(FileStack[i].LB.filename, FileStack[j].LB.filename))
  1064. cnt++;
  1065. }
  1066. if (cnt==1)
  1067. warning_named("File included more than once",
  1068. FileStack[j].LB.filename);
  1069. }
  1070. static void create_char_pipeline(void)
  1071. {
  1072. File_sp = 0;
  1073. begin_buffering_file(File_sp++, 1);
  1074. pipeline_made = TRUE; last_no_files = input_file;
  1075. }
  1076. static int get_next_char_from_pipeline(void)
  1077. { uchar *p;
  1078. while (last_no_files < input_file)
  1079. {
  1080. /* An "Include" file must have opened since the last character
  1081. was read... */
  1082. begin_buffering_file(File_sp++, ++last_no_files);
  1083. }
  1084. last_no_files = input_file;
  1085. if (File_sp == 0)
  1086. { lookahead = 0; lookahead2 = 0; lookahead3 = 0; return 0;
  1087. }
  1088. if (CF->read_pos == CF->size)
  1089. { CF->size =
  1090. file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
  1091. CF->read_pos = 0;
  1092. }
  1093. else
  1094. if (CF->read_pos == -(CF->size))
  1095. { set_token_location(get_current_debug_location());
  1096. File_sp--;
  1097. if (File_sp == 0)
  1098. { lookahead = 0; lookahead2 = 0; lookahead3 = 0; return 0;
  1099. }
  1100. CF = &(FileStack[File_sp-1]);
  1101. CurrentLB = &(FileStack[File_sp-1].LB);
  1102. lookahead = CF->la; lookahead2 = CF->la2; lookahead3 = CF->la3;
  1103. if (CF->read_pos == CF->size)
  1104. { CF->size =
  1105. file_load_chars(CF->file_no, CF->buffer, SOURCE_BUFFER_SIZE);
  1106. CF->read_pos = 0;
  1107. }
  1108. set_token_location(get_current_debug_location());
  1109. }
  1110. p = (uchar *) (CF->buffer);
  1111. current = lookahead;
  1112. lookahead = lookahead2;
  1113. lookahead2 = lookahead3;
  1114. lookahead3 = source_to_iso_grid[p[CF->read_pos++]];
  1115. CurrentLB->chars_read++;
  1116. if (forerrors_pointer < 511)
  1117. forerrors_buff[forerrors_pointer++] = current;
  1118. if (current == '\n') reached_new_line();
  1119. return(current);
  1120. }
  1121. /* ------------------------------------------------------------------------- */
  1122. /* Source 2: from a string */
  1123. /* ------------------------------------------------------------------------- */
  1124. static int source_to_analyse_pointer; /* Current read position */
  1125. static int get_next_char_from_string(void)
  1126. { uchar *p = (uchar *) source_to_analyse + source_to_analyse_pointer++;
  1127. current = source_to_iso_grid[p[0]];
  1128. if (current == 0) lookahead = 0;
  1129. else lookahead = source_to_iso_grid[p[1]];
  1130. if (lookahead == 0) lookahead2 = 0;
  1131. else lookahead2 = source_to_iso_grid[p[2]];
  1132. if (lookahead2 == 0) lookahead3 = 0;
  1133. else lookahead3 = source_to_iso_grid[p[3]];
  1134. CurrentLB->chars_read++;
  1135. if (forerrors_pointer < 511)
  1136. forerrors_buff[forerrors_pointer++] = current;
  1137. if (current == '\n') reached_new_line();
  1138. return(current);
  1139. }
  1140. /* ========================================================================= */
  1141. /* The interface between the lexer and Inform's higher levels: */
  1142. /* */
  1143. /* put_token_back() (effectively) move the read position */
  1144. /* back by one token */
  1145. /* */
  1146. /* get_next_token() copy the token at the current read */
  1147. /* position into the triple */
  1148. /* (token_type, token_value, token_text) */
  1149. /* and move the read position forward */
  1150. /* by one */
  1151. /* */
  1152. /* restart_lexer(source, name) if source is NULL, initialise the lexer */
  1153. /* to read from source files; */
  1154. /* otherwise, to read from this string. */
  1155. /* ------------------------------------------------------------------------- */
  1156. extern void put_token_back(void)
  1157. { tokens_put_back++;
  1158. if (tokens_trace_level > 0)
  1159. { if (tokens_trace_level == 1) printf("<- ");
  1160. else printf("<-\n");
  1161. }
  1162. /* The following error, of course, should never happen! */
  1163. if (tokens_put_back == CIRCLE_SIZE)
  1164. { compiler_error("The lexical analyser has collapsed because of a wrong \
  1165. assumption inside Inform");
  1166. tokens_put_back--;
  1167. return;
  1168. }
  1169. }
  1170. extern void get_next_token(void)
  1171. { int d, i, j, k, quoted_size, e, radix, context; int32 n; char *r;
  1172. int returning_a_put_back_token = TRUE;
  1173. context = lexical_context();
  1174. if (tokens_put_back > 0)
  1175. { i = circle_position - tokens_put_back + 1;
  1176. if (i<0) i += CIRCLE_SIZE;
  1177. tokens_put_back--;
  1178. if (context != token_contexts[i])
  1179. { j = circle[i].type;
  1180. if ((j==0) || ((j>=100) && (j<200)))
  1181. interpret_identifier(i, FALSE);
  1182. }
  1183. goto ReturnBack;
  1184. }
  1185. returning_a_put_back_token = FALSE;
  1186. if (circle_position == CIRCLE_SIZE-1) circle_position = 0;
  1187. else circle_position++;
  1188. if (lex_p > lexeme_memory + 4*MAX_QTEXT_SIZE)
  1189. lex_p = lexeme_memory;
  1190. circle[circle_position].text = lex_p;
  1191. circle[circle_position].value = 0;
  1192. *lex_p = 0;
  1193. StartTokenAgain:
  1194. d = (*get_next_char)();
  1195. e = tokeniser_grid[d];
  1196. if (next_token_begins_syntax_line)
  1197. { if ((e != WHITESPACE_CODE) && (e != COMMENT_CODE))
  1198. { new_syntax_line();
  1199. next_token_begins_syntax_line = FALSE;
  1200. }
  1201. }
  1202. circle[circle_position].location = get_current_debug_location();
  1203. switch(e)
  1204. { case 0: char_error("Illegal character found in source:", d);
  1205. goto StartTokenAgain;
  1206. case WHITESPACE_CODE:
  1207. while (tokeniser_grid[lookahead] == WHITESPACE_CODE)
  1208. (*get_next_char)();
  1209. goto StartTokenAgain;
  1210. case COMMENT_CODE:
  1211. while ((lookahead != '\n') && (lookahead != 0))
  1212. (*get_next_char)();
  1213. goto StartTokenAgain;
  1214. case EOF_CODE:
  1215. circle[circle_position].type = EOF_TT;
  1216. strcpy(lex_p, "<end of file>");
  1217. lex_p += strlen(lex_p) + 1;
  1218. break;
  1219. case DIGIT_CODE:
  1220. radix = 10;
  1221. ReturnNumber:
  1222. n=0;
  1223. do
  1224. { n = n*radix + character_digit_value[d];
  1225. *lex_p++ = d;
  1226. } while ((character_digit_value[lookahead] < radix)
  1227. && (d = (*get_next_char)(), TRUE));
  1228. *lex_p++ = 0;
  1229. circle[circle_position].type = NUMBER_TT;
  1230. circle[circle_position].value = n;
  1231. break;
  1232. FloatNumber:
  1233. { int expo=0; double intv=0, fracv=0;
  1234. int expocount=0, intcount=0, fraccount=0;
  1235. int signbit = (d == '-');
  1236. *lex_p++ = d;
  1237. while (character_digit_value[lookahead] < 10) {
  1238. intv = 10.0*intv + character_digit_value[lookahead];
  1239. intcount++;
  1240. *lex_p++ = lookahead;
  1241. (*get_next_char)();
  1242. }
  1243. if (lookahead == '.') {
  1244. double fracpow = 1.0;
  1245. *lex_p++ = lookahead;
  1246. (*get_next_char)();
  1247. while (character_digit_value[lookahead] < 10) {
  1248. fracpow *= 0.1;
  1249. fracv = fracv + fracpow*character_digit_value[lookahead];
  1250. fraccount++;
  1251. *lex_p++ = lookahead;
  1252. (*get_next_char)();
  1253. }
  1254. }
  1255. if (lookahead == 'e' || lookahead == 'E') {
  1256. int exposign = 0;
  1257. *lex_p++ = lookahead;
  1258. (*get_next_char)();
  1259. if (lookahead == '+' || lookahead == '-') {
  1260. exposign = (lookahead == '-');
  1261. *lex_p++ = lookahead;
  1262. (*get_next_char)();
  1263. }
  1264. while (character_digit_value[lookahead] < 10) {
  1265. expo = 10*expo + character_digit_value[lookahead];
  1266. expocount++;
  1267. *lex_p++ = lookahead;
  1268. (*get_next_char)();
  1269. }
  1270. if (expocount == 0)
  1271. error("Floating-point literal must have digits after the 'e'");
  1272. if (exposign) { expo = -expo; }
  1273. }
  1274. if (intcount + fraccount == 0)
  1275. error("Floating-point literal must have digits");
  1276. n = construct_float(signbit, intv, fracv, expo);
  1277. }
  1278. *lex_p++ = 0;
  1279. circle[circle_position].type = NUMBER_TT;
  1280. circle[circle_position].value = n;
  1281. if (!glulx_mode && dont_enter_into_symbol_table != -2) error("Floating-point literals are not available in Z-code");
  1282. break;
  1283. case RADIX_CODE:
  1284. radix = 16; d = (*get_next_char)();
  1285. if (d == '-' || d == '+') { goto FloatNumber; }
  1286. if (d == '$') { d = (*get_next_char)(); radix = 2; }
  1287. if (character_digit_value[d] >= radix)
  1288. { if (radix == 2)
  1289. error("Binary number expected after '$$'");
  1290. else
  1291. error("Hexadecimal number expected after '$'");
  1292. }
  1293. goto ReturnNumber;
  1294. case QUOTE_CODE: /* Single-quotes: scan a literal string */
  1295. quoted_size=0;
  1296. do
  1297. { e = d; d = (*get_next_char)(); *lex_p++ = d;
  1298. if (quoted_size++==64)
  1299. { error(
  1300. "Too much text for one pair of quotations '...' to hold");
  1301. *lex_p='\''; break;
  1302. }
  1303. if ((d == '\'') && (e != '@'))
  1304. { if (quoted_size == 1)
  1305. { d = (*get_next_char)(); *lex_p++ = d;
  1306. if (d != '\'')
  1307. error("No text between quotation marks ''");
  1308. }
  1309. break;
  1310. }
  1311. } while (d != EOF);
  1312. if (d==EOF) ebf_error("'\''", "end of file");
  1313. *(lex_p-1) = 0;
  1314. circle[circle_position].type = SQ_TT;
  1315. break;
  1316. case DQUOTE_CODE: /* Double-quotes: scan a literal string */
  1317. quoted_size=0;
  1318. do
  1319. { d = (*get_next_char)(); *lex_p++ = d;
  1320. if (quoted_size++==MAX_QTEXT_SIZE)
  1321. { memoryerror("MAX_QTEXT_SIZE", MAX_QTEXT_SIZE);
  1322. break;
  1323. }
  1324. if (d == '\n')
  1325. { lex_p--;
  1326. while (*(lex_p-1) == ' ') lex_p--;
  1327. if (*(lex_p-1) != '^') *lex_p++ = ' ';
  1328. while ((lookahead != EOF) &&
  1329. (tokeniser_grid[lookahead] == WHITESPACE_CODE))
  1330. (*get_next_char)();
  1331. }
  1332. else if (d == '\\')
  1333. { int newline_passed = FALSE;
  1334. lex_p--;
  1335. while ((lookahead != EOF) &&
  1336. (tokeniser_grid[lookahead] == WHITESPACE_CODE))
  1337. if ((d = (*get_next_char)()) == '\n')
  1338. newline_passed = TRUE;
  1339. if (!newline_passed)
  1340. { char chb[4];
  1341. chb[0] = '\"'; chb[1] = lookahead;
  1342. chb[2] = '\"'; chb[3] = 0;
  1343. ebf_error("empty rest of line after '\\' in string",
  1344. chb);
  1345. }
  1346. }
  1347. } while ((d != EOF) && (d!='\"'));
  1348. if (d==EOF) ebf_error("'\"'", "end of file");
  1349. *(lex_p-1) = 0;
  1350. circle[circle_position].type = DQ_TT;
  1351. break;
  1352. case IDENTIFIER_CODE: /* Letter or underscore: an identifier */
  1353. *lex_p++ = d; n=1;
  1354. while ((n<=MAX_IDENTIFIER_LENGTH)
  1355. && ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
  1356. || (tokeniser_grid[lookahead] == DIGIT_CODE)))
  1357. n++, *lex_p++ = (*get_next_char)();
  1358. *lex_p++ = 0;
  1359. if (n > MAX_IDENTIFIER_LENGTH)
  1360. { char bad_length[100];
  1361. sprintf(bad_length,
  1362. "Name exceeds the maximum length of %d characters:",
  1363. MAX_IDENTIFIER_LENGTH);
  1364. error_named(bad_length, circle[circle_position].text);
  1365. }
  1366. if (dont_enter_into_symbol_table)
  1367. { circle[circle_position].type = DQ_TT;
  1368. circle[circle_position].value = 0;
  1369. if (dont_enter_into_symbol_table == -2)
  1370. interpret_identifier(circle_position, TRUE);
  1371. break;
  1372. }
  1373. interpret_identifier(circle_position, FALSE);
  1374. break;
  1375. default:
  1376. /* The character is initial to at least one of the separators */
  1377. for (j=e>>4, k=j+(e&0x0f); j<k; j++)
  1378. { r = (char *) separators[j];
  1379. if (r[1]==0)
  1380. { *lex_p++=d; *lex_p++=0;
  1381. goto SeparatorMatched;
  1382. }
  1383. else
  1384. if (r[2]==0)
  1385. { if (*(r+1) == lookahead)
  1386. { *lex_p++=d;
  1387. *lex_p++=(*get_next_char)();
  1388. *lex_p++=0;
  1389. goto SeparatorMatched;
  1390. }
  1391. }
  1392. else
  1393. { if ((*(r+1) == lookahead) && (*(r+2) == lookahead2))
  1394. { *lex_p++=d;
  1395. *lex_p++=(*get_next_char)();
  1396. *lex_p++=(*get_next_char)();
  1397. *lex_p++=0;
  1398. goto SeparatorMatched;
  1399. }
  1400. }
  1401. }
  1402. /* The following contingency never in fact arises with the
  1403. current set of separators, but might in future */
  1404. *lex_p++ = d; *lex_p++ = lookahead; *lex_p++ = lookahead2;
  1405. *lex_p++ = 0;
  1406. error_named("Unrecognised combination in source:", lex_p);
  1407. goto StartTokenAgain;
  1408. SeparatorMatched:
  1409. circle[circle_position].type = SEP_TT;
  1410. circle[circle_position].value = j;
  1411. switch(j)
  1412. { case SEMICOLON_SEP: break;
  1413. case HASHNDOLLAR_SEP:
  1414. case HASHWDOLLAR_SEP:
  1415. if (tokeniser_grid[lookahead] == WHITESPACE_CODE)
  1416. { error_named("Character expected after",
  1417. circle[circle_position].text);
  1418. break;
  1419. }
  1420. lex_p--;
  1421. *lex_p++ = (*get_next_char)();
  1422. while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
  1423. || (tokeniser_grid[lookahead] == DIGIT_CODE))
  1424. *lex_p++ = (*get_next_char)();
  1425. *lex_p++ = 0;
  1426. break;
  1427. case HASHADOLLAR_SEP:
  1428. case HASHGDOLLAR_SEP:
  1429. case HASHRDOLLAR_SEP:
  1430. case HASHHASH_SEP:
  1431. if (tokeniser_grid[lookahead] != IDENTIFIER_CODE)
  1432. { error_named("Alphabetic character expected after",
  1433. circle[circle_position].text);
  1434. break;
  1435. }
  1436. lex_p--;
  1437. while ((tokeniser_grid[lookahead] == IDENTIFIER_CODE)
  1438. || (tokeniser_grid[lookahead] == DIGIT_CODE))
  1439. *lex_p++ = (*get_next_char)();
  1440. *lex_p++ = 0;
  1441. break;
  1442. }
  1443. break;
  1444. }
  1445. i = circle_position;
  1446. ReturnBack:
  1447. token_value = circle[i].value;
  1448. token_type = circle[i].type;
  1449. token_text = circle[i].text;
  1450. if (!returning_a_put_back_token)
  1451. { set_token_location(circle[i].location);
  1452. }
  1453. token_contexts[i] = context;
  1454. if (tokens_trace_level > 0)
  1455. { if (tokens_trace_level == 1)
  1456. printf("'%s' ", circle[i].text);
  1457. else
  1458. { printf("-> "); describe_token(circle[i]);
  1459. printf(" ");
  1460. if (tokens_trace_level > 2) print_context(token_contexts[i]);
  1461. printf("\n");
  1462. }
  1463. }
  1464. }
  1465. static char veneer_error_title[64];
  1466. extern void restart_lexer(char *lexical_source, char *name)
  1467. { int i;
  1468. circle_position = 0;
  1469. for (i=0; i<CIRCLE_SIZE; i++)
  1470. { circle[i].type = 0;
  1471. circle[i].value = 0;
  1472. circle[i].text = "(if this is ever visible, there is a bug)";
  1473. token_contexts[i] = 0;
  1474. }
  1475. lex_p = lexeme_memory;
  1476. tokens_put_back = 0;
  1477. forerrors_pointer = 0;
  1478. dont_enter_into_symbol_table = FALSE;
  1479. return_sp_as_variable = FALSE;
  1480. next_token_begins_syntax_line = TRUE;
  1481. source_to_analyse = lexical_source;
  1482. if (source_to_analyse == NULL)
  1483. { get_next_char = get_next_char_from_pipeline;
  1484. if (!pipeline_made) create_char_pipeline();
  1485. forerrors_buff[0] = 0; forerrors_pointer = 0;
  1486. }
  1487. else
  1488. { get_next_char = get_next_char_from_string;
  1489. source_to_analyse_pointer = 0;
  1490. CurrentLB = &StringLB;
  1491. sprintf(veneer_error_title, "<veneer routine '%s'>", name);
  1492. StringLB.filename = veneer_error_title;
  1493. CurrentLB->source_line = 1;
  1494. CurrentLB->line_start = 0;
  1495. CurrentLB->chars_read = 0;
  1496. }
  1497. }
  1498. /* ========================================================================= */
  1499. /* Data structure management routines */
  1500. /* ------------------------------------------------------------------------- */
  1501. extern void init_lexer_vars(void)
  1502. {
  1503. }
  1504. extern void lexer_begin_prepass(void)
  1505. { total_source_line_count = 0;
  1506. CurrentLB = &NoFileOpen;
  1507. report_errors_at_current_line();
  1508. }
  1509. extern void lexer_begin_pass(void)
  1510. { no_hash_printed_yet = TRUE;
  1511. hash_printed_since_newline = FALSE;
  1512. pipeline_made = FALSE;
  1513. restart_lexer(NULL, NULL);
  1514. }
  1515. extern void lexer_endpass(void)
  1516. { CurrentLB = &MakingOutput;
  1517. report_errors_at_current_line();
  1518. }
  1519. extern void lexer_allocate_arrays(void)
  1520. { int i;
  1521. FileStack = my_malloc(MAX_INCLUSION_DEPTH*sizeof(Sourcefile),
  1522. "filestack buffer");
  1523. for (i=0; i<MAX_INCLUSION_DEPTH; i++)
  1524. FileStack[i].buffer = my_malloc(SOURCE_BUFFER_SIZE+4, "source file buffer");
  1525. lexeme_memory = my_malloc(5*MAX_QTEXT_SIZE, "lexeme memory");
  1526. keywords_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
  1527. "keyword hash table");
  1528. keywords_hash_ends_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
  1529. "keyword hash end table");
  1530. keywords_data_table = my_calloc(sizeof(int), 3*MAX_KEYWORDS,
  1531. "keyword hashing linked list");
  1532. local_variable_hash_table = my_calloc(sizeof(int), HASH_TAB_SIZE,
  1533. "local variable hash table");
  1534. local_variable_text_table = my_malloc(
  1535. (MAX_LOCAL_VARIABLES-1)*(MAX_IDENTIFIER_LENGTH+1),
  1536. "text of local variable names");
  1537. local_variable_hash_codes = my_calloc(sizeof(int), MAX_LOCAL_VARIABLES,
  1538. "local variable hash codes");
  1539. local_variable_texts = my_calloc(sizeof(char *), MAX_LOCAL_VARIABLES,
  1540. "local variable text pointers");
  1541. make_tokeniser_grid();
  1542. make_keywords_tables();
  1543. first_token_locations =
  1544. my_malloc(sizeof(debug_locations), "debug locations of recent tokens");
  1545. first_token_locations->location.file_index = 0;
  1546. first_token_locations->location.beginning_byte_index = 0;
  1547. first_token_locations->location.end_byte_index = 0;
  1548. first_token_locations->location.beginning_line_number = 0;
  1549. first_token_locations->location.end_line_number = 0;
  1550. first_token_locations->location.beginning_character_number = 0;
  1551. first_token_locations->location.end_character_number = 0;
  1552. first_token_locations->next = NULL;
  1553. first_token_locations->reference_count = 0;
  1554. last_token_location = first_token_locations;
  1555. }
  1556. extern void lexer_free_arrays(void)
  1557. { int i; char *p;
  1558. for (i=0; i<MAX_INCLUSION_DEPTH; i++)
  1559. { p = FileStack[i].buffer;
  1560. my_free(&p, "source file buffer");
  1561. }
  1562. my_free(&FileStack, "filestack buffer");
  1563. my_free(&lexeme_memory, "lexeme memory");
  1564. my_free(&keywords_hash_table, "keyword hash table");
  1565. my_free(&keywords_hash_ends_table, "keyword hash end table");
  1566. my_free(&keywords_data_table, "keyword hashing linked list");
  1567. my_free(&local_variable_hash_table, "local variable hash table");
  1568. my_free(&local_variable_text_table, "text of local variable names");
  1569. my_free(&local_variable_hash_codes, "local variable hash codes");
  1570. my_free(&local_variable_texts, "local variable text pointers");
  1571. cleanup_token_locations(NULL);
  1572. }
  1573. /* ========================================================================= */