hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

index.ha (6713B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 // Returns the offset of the first instance of "needle" in a "haystack" of
      5 // bytes, or void if it is not found.
      6 export fn index(haystack: []u8, needle: (u8 | []u8)) (size | void) = {
      7 	return match (needle) {
      8 	case let b: u8 =>
      9 		yield index_byte(haystack, b);
     10 	case let b: []u8 =>
     11 		yield index_slice(haystack, b);
     12 	};
     13 };
     14 
     15 fn index_byte(haystack: []u8, needle: u8) (size | void) = {
     16 	for (let i = 0z; i < len(haystack); i += 1) {
     17 		if (haystack[i] == needle) {
     18 			return i;
     19 		};
     20 	};
     21 };
     22 
     23 fn index_2bytes(haystack: []u8, needle: u16) (size | void) = {
     24 	if (len(haystack) > 1) {
     25 		let x = haystack[0]: u16;
     26 		for (let i = 1z; i < len(haystack); i += 1) {
     27 			x = x << 8 | haystack[i];
     28 			if (x == needle) {
     29 				return i - 1;
     30 			};
     31 		};
     32 	};
     33 };
     34 
     35 fn index_3bytes(haystack: []u8, needle: u32) (size | void) = {
     36 	if (len(haystack) > 2) {
     37 		let x = haystack[0]: u32 << 8 | haystack[1];
     38 		for (let i = 2z; i < len(haystack); i += 1) {
     39 			x = x << 16 >> 8 | haystack[i];
     40 			if (x == needle) {
     41 				return i - 2;
     42 			};
     43 		};
     44 	};
     45 };
     46 
     47 fn index_4bytes(haystack: []u8, needle: u32) (size | void) = {
     48 	if (len(haystack) > 3) {
     49 		let x = haystack[0]: u32 << 16 |
     50 			haystack[1]: u32 << 8 |
     51 			haystack[2];
     52 		for (let i = 3z; i < len(haystack); i += 1) {
     53 			x = x << 8 | haystack[i];
     54 			if (x == needle) {
     55 				return i - 3;
     56 			};
     57 		};
     58 	};
     59 };
     60 
     61 fn index_slice(haystack: []u8, b: []u8) (size | void) = {
     62 	switch (len(b)) {
     63 	case 0 =>
     64 		return 0;
     65 	case 1 =>
     66 		return index_byte(haystack, b[0]);
     67 	case 2 =>
     68 		return index_2bytes(haystack, b[0]: u16 << 8 | b[1]);
     69 	case 3 =>
     70 		return index_3bytes(
     71 			haystack,
     72 			b[0]: u32 << 16 | b[1]: u32 << 8 | b[2],
     73 		);
     74 	case 4 =>
     75 		return index_4bytes(
     76 			haystack,
     77 			b[0]: u32 << 24 | b[1]: u32 << 16 | b[2]: u32 << 8 | b[3],
     78 		);
     79 	case =>
     80 		return index_tw(haystack, b);
     81 	};
     82 };
     83 
     84 // Returns the offset of the last instance of "needle" in a "haystack" of
     85 // bytes, or void if it is not found.
     86 export fn rindex(haystack: []u8, needle: (u8 | []u8)) (size | void) = {
     87 	match (needle) {
     88 	case let b: u8 =>
     89 		return rindex_byte(haystack, b);
     90 	case let b: []u8 =>
     91 		return rindex_slice(haystack, b);
     92 	};
     93 };
     94 
     95 fn rindex_byte(haystack: []u8, needle: u8) (size | void) = {
     96 	for (let i = len(haystack); i > 0; i -= 1) {
     97 		if (haystack[i - 1] == needle) {
     98 			return i - 1;
     99 		};
    100 	};
    101 };
    102 
    103 fn rindex_slice(haystack: []u8, needle: []u8) (size | void) = {
    104 	for (let i = 0z; i + len(needle) <= len(haystack); i += 1) {
    105 		let r = len(haystack) - i;
    106 		if (equal(haystack[r - len(needle)..r], needle)) {
    107 			return r - len(needle);
    108 		};
    109 	};
    110 };
    111 
    112 @test fn index() void = {
    113 	// Bytes
    114 	const a: [4]u8 = [1, 3, 3, 7];
    115 	assert(index(a, 7) as size == 3);
    116 	assert(index(a, 42) is void);
    117 	assert(index([], 42) is void);
    118 
    119 	assert(rindex(a, 3) as size == 2);
    120 	assert(rindex(a, 42) is void);
    121 	assert(rindex([], 42) is void);
    122 
    123 	// Slices
    124 	const a: [4]u8 = [1, 1, 1, 2];
    125 	assert(rindex(a, [1, 1]) as size == 1);
    126 	assert(rindex(a, [1, 2]) as size == 2);
    127 
    128 	// len(haystack) < len(needle)
    129 	assert(index([], [1, 2, 3]) is void);
    130 	assert(index([1, 2, 3], [1, 2, 3, 4]) is void);
    131 
    132 	// len(haystack) == len(needle)
    133 	assert(index([42, 20], [42, 20]) as size == 0);
    134 	assert(index([1, 1, 1, 2], [1, 1, 1, 3]) is void);
    135 
    136 	assert(index([1, 42, 24], [42, 24]) as size == 1);
    137 	assert(index([1, 3, 3, 7], [3, 3]) as size == 1);
    138 	assert(index([1, 1, 1, 2], [1, 1, 2]) as size == 1);
    139 	assert(index([1, 1, 1, 3, 2], [1, 1, 1, 2]) is void);
    140 
    141 	const tests: [_](str, str) = [
    142 		("3214567844", "0123456789"),
    143 		("3214567889012345", "0123456789012345"),
    144 		("32145678890123456789", "01234567890123456789"),
    145 		("32145678890123456789012345678901234567890211", "01234567890123456789012345678901"),
    146 		("321456788901234567890123456789012345678911", "0123456789012345678901234567890"),
    147 		("abc", "x"),
    148 		("oxoxoxoxoxoxoxoxoxoxoxox", "oy"),
    149 		("x", "a"),
    150 		("x01234567x89", "0123456789"),
    151 		("xabcqxq", "abcqq"),
    152 		("xabxc", "abc"),
    153 		("xabxcqq", "abcqq"),
    154 		("xbc", "abc"),
    155 		("xbcd", "abcd"),
    156 		("xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx"),
    157 		("xyz012345678", "0123456789"),
    158 		("xyz0123456789012345678", "01234567890123456789"),
    159 		("xyz0123456789012345678901234567890", "01234567890123456789012345678901"),
    160 	];
    161 	for (let i = 0z; i < len(tests); i += 1) {
    162 		let haystack = *(&tests[i].0: *[]u8);
    163 		let needle = *(&tests[i].1: *[]u8);
    164 		index(haystack, needle) as void;
    165 	};
    166 
    167 	const tests: [_](str, str, size) = [
    168 		("", "", 0),
    169 		("01234567", "01234567", 0),
    170 		("0123456789", "0123456789", 0),
    171 		("0123456789012345", "0123456789012345", 0),
    172 		("01234567890123456789", "01234567890123456789", 0),
    173 		("0123456789012345678901234567890", "0123456789012345678901234567890", 0),
    174 		("01234567890123456789012345678901", "01234567890123456789012345678901", 0),
    175 		("ab", "ab", 0),
    176 		("abc", "a", 0),
    177 		("abc", "abc", 0),
    178 		("abc", "b", 1),
    179 		("abc", "c", 2),
    180 		("abcABCabc", "A", 3),
    181 		("abcd", "abcd", 0),
    182 		("abcqq", "abcqq", 0),
    183 		("barfoobarfoo", "foo", 3),
    184 		("foo", "", 0),
    185 		("foo", "foo", 0),
    186 		("foo", "o", 1),
    187 		("oofofoofooo", "f", 2),
    188 		("oofofoofooo", "foo", 4),
    189 		("oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22),
    190 		("x", "x", 0),
    191 		("x01234567", "01234567", 1),
    192 		("x0123456789", "0123456789", 1),
    193 		("x0123456789012345", "0123456789012345", 1),
    194 		("x01234567890123456789", "01234567890123456789", 1),
    195 		("x0123456789012345678901234567890", "0123456789012345678901234567890", 1),
    196 		("x01234567890123456789012345678901", "01234567890123456789012345678901", 1),
    197 		("x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33),
    198 		("x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32),
    199 		("x0123456789012345678x01234567890123456789", "01234567890123456789", 21),
    200 		("x012345678901234x0123456789012345", "0123456789012345", 17),
    201 		("x012345678x0123456789", "0123456789", 11),
    202 		("x0123456x01234567", "01234567", 9),
    203 		("xab", "ab", 1),
    204 		("xabc", "abc", 1),
    205 		("xabcd", "abcd", 1),
    206 		("xabcqq", "abcqq", 1),
    207 		("xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2),
    208 		("xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65),
    209 		("xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6),
    210 	];
    211 	for (let i = 0z; i < len(tests); i += 1) {
    212 		let haystack = *(&tests[i].0: *[]u8);
    213 		let needle = *(&tests[i].1: *[]u8);
    214 		assert(index(haystack, needle) as size == tests[i].2);
    215 	};
    216 };