knuth-morris-pratt_string_search.sf 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. #!/usr/bin/ruby
  2. # https://rosettacode.org/wiki/Knuth-Morris-Pratt_string_search#Sidef
  3. func kmp_search (String S, String W) {
  4. S.graphemes!
  5. W.graphemes!
  6. func kmp_table {
  7. var (pos, cnd, *T) = (1, 0, -1)
  8. for (nil; pos < W.len; (++pos; ++cnd)) {
  9. if (W[pos] == W[cnd]) {
  10. T[pos] = T[cnd]
  11. }
  12. else {
  13. T[pos] = cnd
  14. while ((cnd >= 0) && (W[pos] != W[cnd])) {
  15. cnd = T[cnd]
  16. }
  17. }
  18. }
  19. T[pos] = cnd
  20. return T
  21. }
  22. var (k, T, *I) = (0, kmp_table())
  23. for (var j = 0; j < S.len; nil) {
  24. if (W[k] == S[j]) {
  25. ++j
  26. ++k
  27. if (k == W.len) {
  28. I << (j - k)
  29. k = T[k]
  30. }
  31. }
  32. elsif ((k = T[k]) < 0) {
  33. ++j
  34. ++k
  35. }
  36. }
  37. return I
  38. }
  39. var texts = [
  40. "GCTAGCTCTACGAGTCTA",
  41. "GGCTATAATGCGTA",
  42. "there would have been a time for such a word",
  43. "needle need noodle needle",
  44. "BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhārat"+
  45. "amഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரத"+
  46. "ம்BhāratamഭാരതംBhāratadēsamభారతదేశం",
  47. "InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnu"+
  48. "thusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblyl"+
  49. "anguagestoillustratetheconceptsandalgorithmsastheyarepresented",
  50. "Nearby farms grew a half acre of alfalfa on the dairy's behalf, with ba"+
  51. "les of all that alfalfa exchanged for milk.",
  52. ]
  53. var pats = ["TCTA", "TAATAAA", "word", "needle", "ഭാരതം", "put", "and", "alfalfa"]
  54. texts.each_kv {|k,v| say "text#{k} = #{v}" }; say ''
  55. pats.each_kv {|k,v|
  56. var j = (k < 6 ? k : k-1)
  57. say ("Found '#{v}' in 'text#{j}' at indices ", kmp_search(texts[j], v))
  58. }
  59. func string_search(pat, str) {
  60. kmp_search(str, pat)
  61. }
  62. say(string_search("bar", "foobartestbarend")) #=> [3, 10]
  63. say(string_search("bar", "foo-bar-test-bar-end")) #=> [4, 13]
  64. say(string_search("Bar", "foo-bar-test-Bar-end-Bar")) #=> [13, 21]
  65. say(string_search("-test-", "foo-bar-test-Bar-end-Bar")) #=> [7]
  66. say(string_search("-Bar-end", "foo-Bar-test-Bar-end-Bar")) #=> [12]
  67. say(string_search("-Bar", "foo-Bar-test-Bar-end-Bar")) #=> [3, 12, 20]
  68. __END__
  69. text0 = GCTAGCTCTACGAGTCTA
  70. text1 = GGCTATAATGCGTA
  71. text2 = there would have been a time for such a word
  72. text3 = needle need noodle needle
  73. text4 = BharôtভাৰতBharôtভারতIndiaBhāratભારતBhāratभारतBhārataಭಾರತBhāratभारतBhāratamഭാരതംBhāratभारतBhāratभारतBharôtôଭାରତBhāratਭਾਰਤBhāratamभारतम्Bārataபாரதம்BhāratamഭാരതംBhāratadēsamభారతదేశం
  74. text5 = InhisbookseriesTheArtofComputerProgrammingpublishedbyAddisonWesleyDKnuthusesanimaginarycomputertheMIXanditsassociatedmachinecodeandassemblylanguagestoillustratetheconceptsandalgorithmsastheyarepresented
  75. text6 = Nearby farms grew a half acre of alfalfa on the dairy's behalf, with bales of all that alfalfa exchanged for milk.
  76. Found 'TCTA' in 'text0' at indices [6, 14]
  77. Found 'TAATAAA' in 'text1' at indices []
  78. Found 'word' in 'text2' at indices [40]
  79. Found 'needle' in 'text3' at indices [0, 19]
  80. Found 'ഭാരതം' in 'text4' at indices [68, 138]
  81. Found 'put' in 'text5' at indices [26, 90]
  82. Found 'and' in 'text5' at indices [101, 128, 171]
  83. Found 'alfalfa' in 'text6' at indices [33, 87]