simple_string_search.sf 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. #!/usr/bin/ruby
  2. # A very simple algorithm for searching for one string within another,
  3. # using the sliding window technique, returning the position of the first
  4. # occurrence of the substring in the main string.
  5. # The algorithm runs in, essentially, linear time.
  6. func simple_index(str, substr, pos = 0) {
  7. var substr_chars = substr.chars.map { .ord }
  8. var target_len = substr_chars.len
  9. var target_sum = substr_chars.sum
  10. var current_slice = []
  11. var current_slice_len = 0
  12. var current_sum = 0
  13. var str_chars = str.chars
  14. for k in (pos .. str_chars.end) {
  15. current_slice << str_chars[k].ord
  16. current_sum += current_slice[-1]
  17. if (++current_slice_len >= target_len) {
  18. if (target_sum == current_sum) {
  19. if (current_slice == substr_chars) {
  20. return (k - target_len + 1)
  21. }
  22. }
  23. current_sum -= current_slice.shift
  24. }
  25. }
  26. return -1
  27. }
  28. func find_all_matches (substr, str) {
  29. var pos = -1
  30. var matches = []
  31. while ((pos = simple_index(str, substr, pos+1)) != -1) {
  32. matches << pos
  33. }
  34. return matches
  35. }
  36. say simple_index("Sidef is great", "S") # Returns 0
  37. say simple_index("Sidef is great", "g") # Returns 9
  38. say simple_index("Sidef is great", "great") # Also returns 9
  39. say simple_index("Sidef is great", "e", 5) # Returns 11
  40. say simple_index(File(__FILE__).read, "Sidef") # Returns 1200
  41. assert_eq(find_all_matches("bar", "foobartestbarend"), [3, 10])
  42. assert_eq(find_all_matches("bar", "foo-bar-test-bar-end"), [4, 13])
  43. assert_eq(find_all_matches("Bar", "foo-bar-test-Bar-end-Bar"), [13, 21])
  44. assert_eq(find_all_matches("-test-", "foo-bar-test-Bar-end-Bar"), [7])
  45. assert_eq(find_all_matches("-Bar-end", "foo-Bar-test-Bar-end-Bar"), [12])
  46. assert_eq(find_all_matches("-Bar", "foo-Bar-test-Bar-end-Bar"), [3, 12, 20])