regexp.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. /* Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees.
  2. See file COPYING. */
  3. /*
  4. * Scheme 48/POSIX regex interface
  5. */
  6. #include <sys/types.h>
  7. #include <regex.h> /* POSIX.2 */
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <unistd.h>
  11. #include "scheme48.h"
  12. extern void s48_init_posix_regex(void);
  13. static s48_ref_t posix_compile_regexp(s48_call_t call,
  14. s48_ref_t pattern,
  15. s48_ref_t extended_p,
  16. s48_ref_t ignore_case_p,
  17. s48_ref_t submatches_p,
  18. s48_ref_t newline_p),
  19. posix_regexp_match(s48_call_t call,
  20. s48_ref_t sch_regex,
  21. s48_ref_t string,
  22. s48_ref_t start,
  23. s48_ref_t submatches_p,
  24. s48_ref_t bol_p,
  25. s48_ref_t eol_p),
  26. posix_regexp_error_message(s48_call_t call,
  27. s48_ref_t pattern,
  28. s48_ref_t extended_p,
  29. s48_ref_t ignore_case_p,
  30. s48_ref_t submatches_p,
  31. s48_ref_t newline_p),
  32. posix_free_regexp(s48_call_t call, s48_ref_t sch_regex);
  33. /*
  34. * Record type imported from Scheme.
  35. */
  36. static s48_ref_t posix_regexp_match_type_binding;
  37. /*
  38. * Install all exported functions in Scheme48.
  39. */
  40. void
  41. s48_init_posix_regexp(void)
  42. {
  43. /* Export our stuff. */
  44. S48_EXPORT_FUNCTION(posix_compile_regexp);
  45. S48_EXPORT_FUNCTION(posix_regexp_match);
  46. S48_EXPORT_FUNCTION(posix_regexp_error_message);
  47. S48_EXPORT_FUNCTION(posix_free_regexp);
  48. /* Protect and import the regex-match type. */
  49. posix_regexp_match_type_binding =
  50. s48_get_imported_binding_2("posix-regexp-match-type");
  51. }
  52. /*
  53. * Interface to regcomp. We encode the flags, make the return value, and
  54. * then call regcomp() to fill it in.
  55. */
  56. static s48_ref_t
  57. posix_compile_regexp(s48_call_t call, s48_ref_t pattern,
  58. s48_ref_t extended_p, s48_ref_t ignore_case_p,
  59. s48_ref_t submatches_p, s48_ref_t newline_p)
  60. {
  61. s48_ref_t sch_regex;
  62. int status;
  63. int flags =
  64. (s48_extract_boolean_2(call, extended_p) ? REG_EXTENDED : 0) |
  65. (s48_extract_boolean_2(call, ignore_case_p) ? REG_ICASE : 0) |
  66. (s48_extract_boolean_2(call, submatches_p) ? 0 : REG_NOSUB) |
  67. (s48_extract_boolean_2(call, newline_p) ? REG_NEWLINE : 0);
  68. s48_check_byte_vector_2(call, pattern);
  69. sch_regex = s48_make_value_2(call, regex_t);
  70. status = regcomp(s48_unsafe_extract_value_pointer_2(call, sch_regex, regex_t),
  71. s48_extract_byte_vector_readonly_2(call, pattern),
  72. flags);
  73. if (status == 0)
  74. return sch_regex;
  75. else
  76. return s48_enter_long_2(call, status); /* not that it can do them much good */
  77. }
  78. /*
  79. * Interface to regexec.
  80. *
  81. * Returns #f if there is no match, #t if there is a match and submatches_p
  82. * is false, and a list of regex-match records otherwise.
  83. *
  84. * Most of this is making the buffer for the match structs and then translating
  85. * them into Scheme match records.
  86. */
  87. static s48_ref_t
  88. posix_regexp_match(s48_call_t call, s48_ref_t sch_regex, s48_ref_t string, s48_ref_t sch_start,
  89. s48_ref_t submatches_p,
  90. s48_ref_t bol_p, s48_ref_t eol_p)
  91. {
  92. int status;
  93. s48_ref_t result;
  94. int start = s48_extract_long_2(call, sch_start);
  95. int len = strlen(s48_extract_byte_vector_readonly_2(call, string));
  96. /* re_nsub doesn't include the full pattern */
  97. size_t nmatch = 1 + s48_extract_value_pointer_2(call, sch_regex, regex_t)->re_nsub;
  98. regmatch_t *pmatch,
  99. pmatch_buffer[32];
  100. int flags =
  101. (s48_extract_boolean_2(call, bol_p) ? 0 : REG_NOTBOL) |
  102. (s48_extract_boolean_2(call, eol_p) ? 0 : REG_NOTEOL);
  103. if ((start < 0) || (start > len))
  104. s48_assertion_violation_2(call,
  105. "posix_regexp_match", "start out of range", 3,
  106. sch_start,
  107. s48_enter_long_2(call, 0),
  108. s48_enter_long_2(call, len));
  109. if (nmatch <= 32)
  110. pmatch = pmatch_buffer;
  111. else {
  112. pmatch = (regmatch_t *) malloc(nmatch * sizeof(regmatch_t));
  113. if (pmatch == NULL)
  114. s48_out_of_memory_error_2(call); }
  115. status = regexec(s48_extract_value_pointer_2(call, sch_regex, regex_t),
  116. s48_extract_byte_vector_readonly_2(call, string) + start,
  117. nmatch, pmatch, flags);
  118. if (status == REG_NOMATCH)
  119. result = s48_false_2(call);
  120. else if (! s48_extract_boolean_2(call, submatches_p))
  121. result = s48_true_2(call);
  122. else {
  123. s48_ref_t matches = s48_null_2(call);
  124. s48_ref_t match;
  125. int i;
  126. for(i = nmatch - 1; i > -1; i--) {
  127. if (pmatch[i].rm_so == -1)
  128. match = s48_false_2(call);
  129. else {
  130. match = s48_make_record_2(call, posix_regexp_match_type_binding);
  131. s48_unsafe_record_set_2(call, match,
  132. 0,
  133. s48_enter_long_2(call, pmatch[i].rm_so + start));
  134. s48_unsafe_record_set_2(call, match,
  135. 1,
  136. s48_enter_long_2(call, pmatch[i].rm_eo + start));
  137. s48_unsafe_record_set_2(call, match, 2, s48_false_2(call)); } /* submatches */
  138. matches = s48_cons_2(call, match, matches); }
  139. result = matches; }
  140. if (nmatch > 32)
  141. free(pmatch);
  142. return result;
  143. }
  144. /*
  145. * Interface to regcomp.
  146. *
  147. * This takes the same arguments as `compile_regexp' but returns the error
  148. * message, if any, that `regcomp()' returns. For some reason `regerror()'
  149. * requires both the status code and the compiled pattern buffer returned
  150. * by `regcomp()'. `compile_regexp' only returned the status so we have to
  151. * redo the compilation.
  152. *
  153. */
  154. static s48_ref_t
  155. posix_regexp_error_message(s48_call_t call, s48_ref_t pattern,
  156. s48_ref_t extended_p, s48_ref_t ignore_case_p,
  157. s48_ref_t submatches_p, s48_ref_t newline_p)
  158. {
  159. regex_t compiled_regex;
  160. int status;
  161. int flags =
  162. (s48_extract_boolean_2(call, extended_p) ? REG_EXTENDED : 0) |
  163. (s48_extract_boolean_2(call, ignore_case_p) ? REG_ICASE : 0) |
  164. (s48_extract_boolean_2(call, submatches_p) ? 0 : REG_NOSUB) |
  165. (s48_extract_boolean_2(call, newline_p) ? REG_NEWLINE : 0);
  166. s48_check_string_2(call, pattern);
  167. status = regcomp(&compiled_regex, s48_extract_byte_vector_readonly_2(call, pattern), flags);
  168. if (status == 0)
  169. return s48_false_2(call);
  170. else {
  171. size_t buffer_size;
  172. s48_ref_t buffer;
  173. buffer_size = regerror(status, &compiled_regex, NULL, 0);
  174. /* For string lengths C counts the nul, Scheme doesn't. */
  175. buffer = s48_make_byte_vector_2(call, buffer_size);
  176. regerror(status,
  177. &compiled_regex,
  178. s48_extract_byte_vector_readonly_2(call, buffer),
  179. buffer_size);
  180. return buffer; }
  181. }
  182. /*
  183. * Stub for regfree().
  184. */
  185. static s48_ref_t
  186. posix_free_regexp(s48_call_t call, s48_ref_t sch_regex)
  187. {
  188. regfree(s48_extract_value_pointer_2(call, sch_regex, regex_t));
  189. return s48_unspecific_2(call);
  190. }