two_way.ha (1757B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use types; 5 6 // Implements the so called Two-way string matching algorithm by Crochemore and 7 // Perrin. It pre-processes the needle in O(len(needle)) steps and performs the 8 // matching in O(len(haystack)) steps. It does so without any space overhead. 9 fn index_tw(haystack: []u8, needle: []u8) (size | void) = { 10 const n = len(haystack); 11 const m = len(needle); 12 if (n < m) { 13 return void; 14 }; 15 if (n == m) { 16 return if (equal(haystack, needle)) 0 else void; 17 }; 18 19 // pre-processing 20 let tup1 = max_suf(needle, false); 21 let i = tup1.0, p = tup1.1; 22 let tup2 = max_suf(needle, true); 23 let j = tup2.0, q = tup2.1; 24 let ms = i; 25 let per = p; 26 if (i + 1 < j + 1) { 27 ms = j; 28 per = q; 29 }; 30 31 let mem0 = 0z, mem = 0z; 32 if (equal(haystack[..ms + 1], haystack[per..per + ms + 1])) { 33 if (ms + 1 > m - ms - 1) { 34 per = ms + 2; 35 } else { 36 per = m - ms; 37 }; 38 mem0 = m - per; 39 }; 40 41 42 j = 0; 43 for (j <= n - m) { 44 // right half 45 i = if (ms + 1 > mem) ms + 1 else mem; 46 for (i < m && needle[i] == haystack[j + i]) { 47 i += 1; 48 }; 49 if (i < m) { 50 j += i - ms; 51 mem = 0; 52 continue; 53 }; 54 55 // left half 56 i = ms + 1; 57 for (i > mem && needle[i - 1] == haystack[j + i - 1]) { 58 i -= 1; 59 }; 60 if (i <= mem) { 61 return j; 62 }; 63 j += per; 64 mem = mem0; 65 }; 66 }; 67 68 fn max_suf(x: []u8, inv: bool) (size, size) = { 69 let i = types::SIZE_MAX; 70 let j = 0z; 71 let k = 1z, p = 1z; 72 let m = len(x); 73 for (j + k < m) { 74 let a = x[j + k]; 75 let b = x[i + k]; 76 if (a == b) { 77 if (k == p) { 78 j += p; 79 k = 1; 80 } else { 81 k += 1; 82 }; 83 } else if (a < b ^^ inv) { 84 j += k; 85 k = 1; 86 p = j - i; 87 } else { 88 i = j; 89 j += 1; 90 k = 1; 91 p = 1; 92 }; 93 }; 94 return (i, p); 95 };