regexp.c 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. /* Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees.
  2. See file COPYING. */
  3. /*
  4. * Scheme 48/POSIX regex interface
  5. */
  6. #include <sys/types.h>
  7. #include <regex.h> /* POSIX.2 */
  8. #include <stdlib.h>
  9. #include <string.h>
  10. #include <unistd.h>
  11. #include "scheme48.h"
  12. extern void s48_init_posix_regex(void);
  13. static s48_value posix_compile_regexp(s48_value pattern,
  14. s48_value extended_p,
  15. s48_value ignore_case_p,
  16. s48_value submatches_p,
  17. s48_value newline_p),
  18. posix_regexp_match(s48_value sch_regex,
  19. s48_value string,
  20. s48_value start,
  21. s48_value submatches_p,
  22. s48_value bol_p,
  23. s48_value eol_p),
  24. posix_regexp_error_message(s48_value pattern,
  25. s48_value extended_p,
  26. s48_value ignore_case_p,
  27. s48_value submatches_p,
  28. s48_value newline_p),
  29. posix_free_regexp(s48_value sch_regex);
  30. /*
  31. * Record type imported from Scheme.
  32. */
  33. static s48_value posix_regexp_match_type_binding = S48_FALSE;
  34. /*
  35. * Install all exported functions in Scheme48.
  36. */
  37. void
  38. s48_init_posix_regexp(void)
  39. {
  40. /* Export our stuff. */
  41. S48_EXPORT_FUNCTION(posix_compile_regexp);
  42. S48_EXPORT_FUNCTION(posix_regexp_match);
  43. S48_EXPORT_FUNCTION(posix_regexp_error_message);
  44. S48_EXPORT_FUNCTION(posix_free_regexp);
  45. /* Protect and import the regex-match type. */
  46. S48_GC_PROTECT_GLOBAL(posix_regexp_match_type_binding);
  47. posix_regexp_match_type_binding =
  48. s48_get_imported_binding("posix-regexp-match-type");
  49. }
  50. /*
  51. * Interface to regcomp. We encode the flags, make the return value, and
  52. * then call regcomp() to fill it in.
  53. */
  54. static s48_value
  55. posix_compile_regexp(s48_value pattern,
  56. s48_value extended_p, s48_value ignore_case_p,
  57. s48_value submatches_p, s48_value newline_p)
  58. {
  59. s48_value sch_regex;
  60. int status;
  61. S48_DECLARE_GC_PROTECT(1);
  62. int flags = (S48_EXTRACT_BOOLEAN(extended_p) ? REG_EXTENDED : 0) |
  63. (S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE : 0) |
  64. (S48_EXTRACT_BOOLEAN(submatches_p) ? 0 : REG_NOSUB) |
  65. (S48_EXTRACT_BOOLEAN(newline_p) ? REG_NEWLINE : 0);
  66. S48_GC_PROTECT_1(pattern);
  67. S48_CHECK_BYTE_VECTOR(pattern);
  68. sch_regex = S48_MAKE_VALUE(regex_t);
  69. status = regcomp(S48_UNSAFE_EXTRACT_VALUE_POINTER(sch_regex, regex_t),
  70. S48_UNSAFE_EXTRACT_BYTE_VECTOR(pattern),
  71. flags);
  72. S48_GC_UNPROTECT();
  73. if (status == 0)
  74. return sch_regex;
  75. else
  76. return S48_UNSAFE_ENTER_FIXNUM(status); /* not that it can do them much good */
  77. }
  78. /*
  79. * Interface to regexec.
  80. *
  81. * Returns #f if there is no match, #t if there is a match and submatches_p
  82. * is false, and a list of regex-match records otherwise.
  83. *
  84. * Most of this is making the buffer for the match structs and then translating
  85. * them into Scheme match records.
  86. */
  87. static s48_value
  88. posix_regexp_match(s48_value sch_regex, s48_value string, s48_value sch_start,
  89. s48_value submatches_p,
  90. s48_value bol_p, s48_value eol_p)
  91. {
  92. int status;
  93. s48_value result;
  94. int start = s48_extract_fixnum(sch_start);
  95. int len = strlen(s48_extract_byte_vector(string));
  96. /* re_nsub doesn't include the full pattern */
  97. size_t nmatch = 1 + S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t)->re_nsub;
  98. regmatch_t *pmatch,
  99. pmatch_buffer[32];
  100. int flags = (S48_EXTRACT_BOOLEAN(bol_p) ? 0 : REG_NOTBOL) |
  101. (S48_EXTRACT_BOOLEAN(eol_p) ? 0 : REG_NOTEOL);
  102. if ((start < 0) || (start > len))
  103. s48_raise_range_error(sch_start,
  104. s48_enter_fixnum(0),
  105. s48_enter_fixnum(len));
  106. if (nmatch <= 32)
  107. pmatch = pmatch_buffer;
  108. else {
  109. pmatch = (regmatch_t *) malloc(nmatch * sizeof(regmatch_t));
  110. if (pmatch == NULL)
  111. s48_raise_out_of_memory_error(); }
  112. status = regexec(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t),
  113. S48_UNSAFE_EXTRACT_BYTE_VECTOR(string) + start,
  114. nmatch, pmatch, flags);
  115. if (status == REG_NOMATCH)
  116. result = S48_FALSE;
  117. else if (! S48_EXTRACT_BOOLEAN(submatches_p))
  118. result = S48_TRUE;
  119. else {
  120. s48_value match = S48_FALSE;
  121. s48_value matches = S48_NULL;
  122. int i;
  123. S48_DECLARE_GC_PROTECT(2);
  124. S48_GC_PROTECT_2(match, matches);
  125. for(i = nmatch - 1; i > -1; i--) {
  126. if (pmatch[i].rm_so == -1)
  127. match = S48_FALSE;
  128. else {
  129. match = s48_make_record(posix_regexp_match_type_binding);
  130. S48_UNSAFE_RECORD_SET(match,
  131. 0,
  132. s48_enter_fixnum(pmatch[i].rm_so + start));
  133. S48_UNSAFE_RECORD_SET(match,
  134. 1,
  135. s48_enter_fixnum(pmatch[i].rm_eo + start));
  136. S48_UNSAFE_RECORD_SET(match, 2, S48_FALSE); } /* submatches */
  137. matches = s48_cons(match, matches); }
  138. S48_GC_UNPROTECT();
  139. result = matches; }
  140. if (nmatch > 32)
  141. free(pmatch);
  142. return result;
  143. }
  144. /*
  145. * Interface to regcomp.
  146. *
  147. * This takes the same arguments as `compile_regexp' but returns the error
  148. * message, if any, that `regcomp()' returns. For some reason `regerror()'
  149. * requires both the status code and the compiled pattern buffer returned
  150. * by `regcomp()'. `compile_regexp' only returned the status so we have to
  151. * redo the compilation.
  152. *
  153. */
  154. static s48_value
  155. posix_regexp_error_message(s48_value pattern,
  156. s48_value extended_p, s48_value ignore_case_p,
  157. s48_value submatches_p, s48_value newline_p)
  158. {
  159. regex_t compiled_regex;
  160. int status;
  161. int flags = (S48_EXTRACT_BOOLEAN(extended_p) ? REG_EXTENDED : 0) |
  162. (S48_EXTRACT_BOOLEAN(ignore_case_p) ? REG_ICASE : 0) |
  163. (S48_EXTRACT_BOOLEAN(submatches_p) ? 0 : REG_NOSUB) |
  164. (S48_EXTRACT_BOOLEAN(newline_p) ? REG_NEWLINE : 0);
  165. S48_CHECK_STRING(pattern);
  166. status = regcomp(&compiled_regex, S48_UNSAFE_EXTRACT_BYTE_VECTOR(pattern), flags);
  167. if (status == 0)
  168. return S48_FALSE;
  169. else {
  170. size_t buffer_size;
  171. s48_value buffer;
  172. buffer_size = regerror(status, &compiled_regex, NULL, 0);
  173. /* For string lengths C counts the nul, Scheme doesn't. */
  174. buffer = s48_make_byte_vector(buffer_size);
  175. regerror(status,
  176. &compiled_regex,
  177. S48_UNSAFE_EXTRACT_BYTE_VECTOR(buffer),
  178. buffer_size);
  179. return buffer; }
  180. }
  181. /*
  182. * Stub for regfree().
  183. */
  184. static s48_value
  185. posix_free_regexp(s48_value sch_regex)
  186. {
  187. regfree(S48_EXTRACT_VALUE_POINTER(sch_regex, regex_t));
  188. return S48_UNSPECIFIC;
  189. }