hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

two_way.ha (1757B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use types;
      5 
      6 // Implements the so called Two-way string matching algorithm by Crochemore and
      7 // Perrin. It pre-processes the needle in O(len(needle)) steps and performs the
      8 // matching in O(len(haystack)) steps. It does so without any space overhead.
      9 fn index_tw(haystack: []u8, needle: []u8) (size | void) = {
     10 	const n = len(haystack);
     11 	const m = len(needle);
     12 	if (n < m) {
     13 		return void;
     14 	};
     15 	if (n == m) {
     16 		return if (equal(haystack, needle)) 0 else void;
     17 	};
     18 
     19 	// pre-processing
     20 	let tup1 = max_suf(needle, false);
     21 	let i = tup1.0, p = tup1.1;
     22 	let tup2 = max_suf(needle, true);
     23 	let j = tup2.0, q = tup2.1;
     24 	let ms = i;
     25 	let per = p;
     26 	if (i + 1 < j + 1) {
     27 		ms = j;
     28 		per = q;
     29 	};
     30 
     31 	let mem0 = 0z, mem = 0z;
     32 	if (equal(haystack[..ms + 1], haystack[per..per + ms + 1])) {
     33 		if (ms + 1 > m - ms - 1) {
     34 			per = ms + 2;
     35 		} else {
     36 			per = m - ms;
     37 		};
     38 		mem0 = m - per;
     39 	};
     40 
     41 
     42 	j = 0;
     43 	for (j <= n - m) {
     44 		// right half
     45 		i = if (ms + 1 > mem) ms + 1 else mem;
     46 		for (i < m && needle[i] == haystack[j + i]) {
     47 			i += 1;
     48 		};
     49 		if (i < m) {
     50 			j += i - ms;
     51 			mem = 0;
     52 			continue;
     53 		};
     54 
     55 		// left half
     56 		i = ms + 1;
     57 		for (i > mem && needle[i - 1] == haystack[j + i - 1]) {
     58 			i -= 1;
     59 		};
     60 		if (i <= mem) {
     61 			return j;
     62 		};
     63 		j += per;
     64 		mem = mem0;
     65 	};
     66 };
     67 
     68 fn max_suf(x: []u8, inv: bool) (size, size) = {
     69 	let i = types::SIZE_MAX;
     70 	let j = 0z;
     71 	let k = 1z, p = 1z;
     72 	let m = len(x);
     73 	for (j + k < m) {
     74 		let a = x[j + k];
     75 		let b = x[i + k];
     76 		if (a == b) {
     77 			if (k == p) {
     78 				j += p;
     79 				k = 1;
     80 			} else {
     81 				k += 1;
     82 			};
     83 		} else if (a < b ^^ inv) {
     84 			j += k;
     85 			k = 1;
     86 			p = j - i;
     87 		} else {
     88 			i = j;
     89 			j += 1;
     90 			k = 1;
     91 			p = 1;
     92 		};
     93 	};
     94 	return (i, p);
     95 };