hilite.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. /* hlite.c, generic syntax highlighting, Ait Emacs, Kevin Bloom, BSD 3-Clause, 2023-2025 */
  2. #include "header.h"
  3. #include "util.h"
  4. int state = ID_DEFAULT;
  5. int next_state = ID_DEFAULT;
  6. int skip_count = 0;
  7. int exclude_state = ID_DEFAULT;
  8. int exclude_count = 0;
  9. char_t get_at(buffer_t *bp, point_t pt)
  10. {
  11. return (*ptr(bp, pt));
  12. }
  13. void set_parse_state(buffer_t *bp, point_t pt, window_t *wp, int loop)
  14. {
  15. register point_t po;
  16. state = ID_DEFAULT;
  17. next_state = ID_DEFAULT;
  18. skip_count = 0;
  19. if(bp->b_mode != NULL && loop) {
  20. for (po =0; po < pt; po++)
  21. parse_text(bp, po, TRUE);
  22. wp->w_hilite = state;
  23. }
  24. }
  25. void write_parse_state(window_t *wp)
  26. {
  27. state = wp->w_hilite;
  28. next_state = wp->w_hilite;
  29. skip_count = 0;
  30. }
  31. /* we don't bother running the syntax logic on space chars
  32. therefore we must manually decrease the skip_count if we are
  33. in a syntax highlight that includes space chars.
  34. */
  35. void dec_skip()
  36. {
  37. skip_count--;
  38. }
  39. /* quick set to TRUE causes the syntax highlighting for keywords to
  40. be disabled. The purpose is so that when we have to determine the
  41. state at b_page we can do it quicker and reduce potential lag.
  42. */
  43. int parse_text(buffer_t *bp, point_t pt, int quick)
  44. {
  45. // if(bp->b_mode == NULL)
  46. // return state;
  47. if (skip_count-- > 0) {
  48. if(exclude_count != 0)
  49. exclude_count--;
  50. if(exclude_state != ID_DEFAULT &&
  51. exclude_count == 0) {
  52. state = exclude_state;
  53. exclude_state = ID_DEFAULT;
  54. }
  55. return state;
  56. }
  57. char_t c_now = get_at(bp, pt);
  58. char_t c_prev = get_at(bp, pt-1);
  59. char_t next = c_now;
  60. int valid = TRUE, k = 0;
  61. state = next_state;
  62. if (state == ID_DEFAULT &&
  63. bp->b_mode != NULL &&
  64. bp->b_mode->mlc != NULL) {
  65. next = c_now;
  66. for(int i = 0; bp->b_mode->mlc[i] != '\0'; i++) {
  67. next = get_at(bp, pt + i);
  68. if(next != bp->b_mode->mlc[i]) {
  69. valid = FALSE;
  70. break;
  71. }
  72. }
  73. if(valid) {
  74. skip_count = 1;
  75. return (next_state = state = ID_BLOCK_COMMENT);
  76. }
  77. valid = TRUE;
  78. }
  79. if (state == ID_BLOCK_COMMENT &&
  80. bp->b_mode != NULL &&
  81. bp->b_mode->emlc != NULL) {
  82. next = c_now;
  83. for(int i = 0; bp->b_mode->emlc[i] != '\0'; i++) {
  84. next = get_at(bp, pt + i);
  85. if(next != bp->b_mode->emlc[i]) {
  86. valid = FALSE;
  87. break;
  88. }
  89. }
  90. if(valid) {
  91. skip_count = strlen(bp->b_mode->emlc) - 1;
  92. next_state = ID_DEFAULT;
  93. return ID_BLOCK_COMMENT;
  94. }
  95. valid = TRUE;
  96. }
  97. if (state == ID_DEFAULT &&
  98. bp->b_mode != NULL &&
  99. bp->b_mode->slc != NULL &&
  100. bp->b_mode->slc[0] != '\0') {
  101. next = c_now;
  102. for(int i = 0; bp->b_mode->slc[i] != '\0'; i++) {
  103. next = get_at(bp, pt + i);
  104. if(next != bp->b_mode->slc[i]) {
  105. valid = FALSE;
  106. break;
  107. }
  108. }
  109. if(valid) {
  110. return (next_state = state = ID_LINE_COMMENT);
  111. }
  112. valid = TRUE;
  113. }
  114. if (state == ID_LINE_COMMENT && c_now == '\n')
  115. return (next_state = ID_DEFAULT);
  116. if (state == ID_DEFAULT && c_now == '"') {
  117. int enable = FALSE;
  118. char_t z = get_at(bp, pt+1);
  119. point_t end = pos(bp, bp->b_ebuf);
  120. for(point_t i = pt+1; z != '\n' && i <= end; i++, z = get_at(bp, i)) {
  121. if(z == '"') {
  122. enable = TRUE;
  123. break;
  124. }
  125. if((bp->b_mode != NULL && !bp->b_mode->bmls) || (z == '\\' && get_at(bp, i+1) == '\n')) {
  126. enable = TRUE;
  127. break;
  128. }
  129. }
  130. if(enable)
  131. return (next_state = ID_DOUBLE_STRING);
  132. }
  133. if (state == ID_DEFAULT &&
  134. bp->b_mode != NULL &&
  135. bp->b_mode->bqas &&
  136. c_now == '`')
  137. return (next_state = ID_BACK_STRING);
  138. if (state == ID_DEFAULT &&
  139. bp->b_mode != NULL &&
  140. bp->b_mode->sqas &&
  141. c_now == '\'') {
  142. int enable = FALSE;
  143. char_t z = get_at(bp, pt+1);
  144. point_t end = pos(bp, bp->b_ebuf);
  145. for(point_t i = pt+1; z != '\n' && i <= end; i++, z = get_at(bp, i)) {
  146. if(z == '\'') {
  147. enable = TRUE;
  148. break;
  149. }
  150. }
  151. if(enable)
  152. return (next_state = ID_SINGLE_STRING);
  153. }
  154. if (state == ID_DOUBLE_STRING && c_now == '\\') {
  155. skip_count = 1;
  156. return (next_state = ID_DOUBLE_STRING);
  157. }
  158. if (state == ID_DOUBLE_STRING && c_now == '"') {
  159. next_state = ID_DEFAULT;
  160. return ID_DOUBLE_STRING;
  161. }
  162. if (state == ID_SINGLE_STRING && c_now == '\\') {
  163. skip_count = 1;
  164. return (next_state = ID_SINGLE_STRING);
  165. }
  166. if (state == ID_DEFAULT &&
  167. bp->b_mode != NULL &&
  168. bp->b_mode->bqas &&
  169. c_now == '`')
  170. return (next_state = ID_BACK_STRING);
  171. if (state == ID_BACK_STRING && c_now == '\\') {
  172. skip_count = 1;
  173. return (next_state = ID_BACK_STRING);
  174. }
  175. if (state == ID_SINGLE_STRING && c_now == '\'') {
  176. next_state = ID_DEFAULT;
  177. return ID_SINGLE_STRING;
  178. }
  179. if (state == ID_BACK_STRING && c_now == '`') {
  180. next_state = ID_DEFAULT;
  181. return ID_BACK_STRING;
  182. }
  183. point_t ep = pos(bp, bp->b_ebuf);
  184. int sub = 1;
  185. if(bp->b_mode != NULL && !quick &&
  186. bp->b_mode->keywords != NULL &&
  187. state == ID_DEFAULT) {
  188. for(int i = 0; bp->b_mode->keywords[i].word != NULL; i++) {
  189. int l = 0, t = 0;
  190. k = 0;
  191. sub = 1;
  192. exclude_count = 0;
  193. exclude_state = ID_DEFAULT;
  194. if(bp->b_mode->keywords[i].word[l] != '' && (pt == 0 ||
  195. (is_symbol(c_prev) &&
  196. (c_prev != '-' && c_prev != '_'))
  197. || isspace(c_prev))) {
  198. // do nothing
  199. } else if(bp->b_mode->keywords[i].word[l] == '') {
  200. l++;
  201. } else {
  202. return (state = ID_DEFAULT);
  203. }
  204. if(bp->b_mode->keywords[i].word[l] == '') {
  205. if(c_prev != '\n' && pt != 0)
  206. return (state = ID_DEFAULT);
  207. l++;
  208. }
  209. for(k = 0; bp->b_mode->keywords[i].word[l] != '\0'; k++, l++) {
  210. c_now = get_at(bp, pt+k);
  211. /* at the end */
  212. if(bp->b_mode->keywords[i].word[l] == '') {
  213. l++;
  214. if(bp->b_mode->keywords[i].word[l] == '\0') {
  215. for(; c_now != '\n' && pt+k != ep; k++) {
  216. c_now = get_at(bp, pt+k);
  217. }
  218. k--;
  219. break;
  220. } else if(bp->b_mode->keywords[i].word[l] > 32) {
  221. for(; c_now != bp->b_mode->keywords[i].word[l] && pt+k != ep; k++) {
  222. c_now = get_at(bp, pt+k);
  223. }
  224. k--;
  225. } else if(bp->b_mode->keywords[i].word[l] == '') {
  226. for(; c_now != '\n' && pt+k != ep ; k++) {
  227. c_now = get_at(bp, pt+k);
  228. if(bp->b_mode->keywords[i].word[l] == '' &&
  229. bp->b_mode->keywords[i].word[l+1] == c_now) {
  230. t = 2;
  231. break;
  232. }
  233. }
  234. if(t == 0) {
  235. k = 0;
  236. break;
  237. }
  238. if(t == 2) {
  239. l++;
  240. k--;
  241. sub++;
  242. continue;
  243. }
  244. }
  245. }
  246. if(bp->b_mode->keywords[i].word[l] == '' ||
  247. bp->b_mode->keywords[i].word[l] == '') {
  248. int all = bp->b_mode->keywords[i].word[l] == '';
  249. if(bp->b_mode->keywords[i].word[l+1] == '\0') {
  250. for(; !isspace(c_now) &&
  251. (all ? TRUE : !is_symbolis(
  252. c_now,
  253. bp->b_mode->saiv
  254. ));
  255. k++) {
  256. c_now = get_at(bp, pt+k);
  257. }
  258. k--;
  259. break;
  260. } else {
  261. l++;
  262. if(all) {
  263. for(; !isspace(c_now); k++) {
  264. if(bp->b_mode->keywords[i].word[l] == c_now) {
  265. t = 1;
  266. break;
  267. }
  268. if(bp->b_mode->keywords[i].word[l] == '' &&
  269. bp->b_mode->keywords[i].word[l+1] == c_now) {
  270. t = 2;
  271. break;
  272. }
  273. if(bp->b_mode->keywords[i].word[l] == '' &&
  274. isspace(get_at(bp, pt+k))) {
  275. t = 3;
  276. break;
  277. }
  278. if(pt+k == ep)
  279. break;
  280. c_now = get_at(bp, pt+k);
  281. }
  282. } else {
  283. for(; !isspace(c_now) &&
  284. (bp->b_mode->keywords[i].word[l] < 32 ||
  285. !is_symboli(
  286. c_now,
  287. bp->b_mode->keywords[i].word[l]
  288. ));
  289. k++) {
  290. if(bp->b_mode->keywords[i].word[l] == c_now) {
  291. t = 1;
  292. break;
  293. }
  294. if(bp->b_mode->keywords[i].word[l] == '' &&
  295. bp->b_mode->keywords[i].word[l+1] == c_now) {
  296. t = 2;
  297. break;
  298. }
  299. if(bp->b_mode->keywords[i].word[l] == '' &&
  300. isspace(get_at(bp, pt+k))) {
  301. t = 3;
  302. break;
  303. }
  304. if(pt+k == ep)
  305. break;
  306. c_now = get_at(bp, pt+k);
  307. }
  308. }
  309. if(t == 0) {
  310. k = 0;
  311. break;
  312. }
  313. if(t == 1) {
  314. k--;
  315. continue;
  316. }
  317. if(t == 2) {
  318. l++;
  319. k--;
  320. sub++;
  321. continue;
  322. }
  323. if(t == 3) {
  324. // do nothing
  325. }
  326. }
  327. }
  328. if(bp->b_mode->keywords[i].word[l] == '') {
  329. l++;
  330. c_now = get_at(bp, pt+k);
  331. for(; isspace(c_now) && pt+k != ep; k++) {
  332. c_now = get_at(bp, pt+k);
  333. }
  334. k--;
  335. c_now = get_at(bp, pt+k);
  336. }
  337. if(bp->b_mode->keywords[i].word[l] == '') {
  338. if(bp->b_mode->keywords[i].word[l+1] == c_now) {
  339. sub++;
  340. l++;
  341. continue;
  342. } else {
  343. k = 0;
  344. exclude_count = 0;
  345. break;
  346. }
  347. }
  348. if(bp->b_mode->keywords[i].word[l] == '') {
  349. if(bp->b_mode->keywords[i].word[l+1] == c_now) {
  350. exclude_state = bp->b_mode->keywords[i].color;
  351. exclude_count++;
  352. k--;
  353. continue;
  354. } else {
  355. k = 0;
  356. exclude_count = 0;
  357. break;
  358. }
  359. }
  360. if(bp->b_mode->keywords[i].word[l] != c_now) {
  361. k = 0;
  362. break;
  363. }
  364. }
  365. c_now = get_at(bp, pt+k);
  366. int ignoreAfter = FALSE;
  367. if(is_symbol(bp->b_mode->keywords[i].word[l])) {
  368. ignoreAfter = TRUE;
  369. }
  370. if(k > 0 && (ignoreAfter || (isspace(c_now) ||
  371. (is_symbol(c_now) &&
  372. (c_now != '-' && (
  373. bp->b_mode->keywords[i].word[l] == '_' || c_now != '_'))))) &&
  374. (bp->b_mode->keywords[i].word[l] == '\0' ||
  375. bp->b_mode->keywords[i].word[l+1] == '\0')) {
  376. skip_count = k-sub;
  377. next_state = ID_DEFAULT;
  378. if(exclude_state != ID_DEFAULT) {
  379. return (state = ID_DEFAULT);
  380. }
  381. return (state = bp->b_mode->keywords[i].color);
  382. }
  383. }
  384. }
  385. if (state != ID_DEFAULT)
  386. return (next_state = state);
  387. // if (state == ID_DEFAULT && c_now >= '0' && c_now <= '9') {
  388. // next_state = ID_DEFAULT;
  389. // return (state = ID_DIGITS);
  390. // }
  391. // if (state == ID_DEFAULT && 1 == is_symbol(c_now)) {
  392. // next_state = ID_DEFAULT;
  393. // return (state = ID_SYMBOL);
  394. // }
  395. return (next_state = state);
  396. }