index.ha (6713B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 // Returns the offset of the first instance of "needle" in a "haystack" of 5 // bytes, or void if it is not found. 6 export fn index(haystack: []u8, needle: (u8 | []u8)) (size | void) = { 7 return match (needle) { 8 case let b: u8 => 9 yield index_byte(haystack, b); 10 case let b: []u8 => 11 yield index_slice(haystack, b); 12 }; 13 }; 14 15 fn index_byte(haystack: []u8, needle: u8) (size | void) = { 16 for (let i = 0z; i < len(haystack); i += 1) { 17 if (haystack[i] == needle) { 18 return i; 19 }; 20 }; 21 }; 22 23 fn index_2bytes(haystack: []u8, needle: u16) (size | void) = { 24 if (len(haystack) > 1) { 25 let x = haystack[0]: u16; 26 for (let i = 1z; i < len(haystack); i += 1) { 27 x = x << 8 | haystack[i]; 28 if (x == needle) { 29 return i - 1; 30 }; 31 }; 32 }; 33 }; 34 35 fn index_3bytes(haystack: []u8, needle: u32) (size | void) = { 36 if (len(haystack) > 2) { 37 let x = haystack[0]: u32 << 8 | haystack[1]; 38 for (let i = 2z; i < len(haystack); i += 1) { 39 x = x << 16 >> 8 | haystack[i]; 40 if (x == needle) { 41 return i - 2; 42 }; 43 }; 44 }; 45 }; 46 47 fn index_4bytes(haystack: []u8, needle: u32) (size | void) = { 48 if (len(haystack) > 3) { 49 let x = haystack[0]: u32 << 16 | 50 haystack[1]: u32 << 8 | 51 haystack[2]; 52 for (let i = 3z; i < len(haystack); i += 1) { 53 x = x << 8 | haystack[i]; 54 if (x == needle) { 55 return i - 3; 56 }; 57 }; 58 }; 59 }; 60 61 fn index_slice(haystack: []u8, b: []u8) (size | void) = { 62 switch (len(b)) { 63 case 0 => 64 return 0; 65 case 1 => 66 return index_byte(haystack, b[0]); 67 case 2 => 68 return index_2bytes(haystack, b[0]: u16 << 8 | b[1]); 69 case 3 => 70 return index_3bytes( 71 haystack, 72 b[0]: u32 << 16 | b[1]: u32 << 8 | b[2], 73 ); 74 case 4 => 75 return index_4bytes( 76 haystack, 77 b[0]: u32 << 24 | b[1]: u32 << 16 | b[2]: u32 << 8 | b[3], 78 ); 79 case => 80 return index_tw(haystack, b); 81 }; 82 }; 83 84 // Returns the offset of the last instance of "needle" in a "haystack" of 85 // bytes, or void if it is not found. 86 export fn rindex(haystack: []u8, needle: (u8 | []u8)) (size | void) = { 87 match (needle) { 88 case let b: u8 => 89 return rindex_byte(haystack, b); 90 case let b: []u8 => 91 return rindex_slice(haystack, b); 92 }; 93 }; 94 95 fn rindex_byte(haystack: []u8, needle: u8) (size | void) = { 96 for (let i = len(haystack); i > 0; i -= 1) { 97 if (haystack[i - 1] == needle) { 98 return i - 1; 99 }; 100 }; 101 }; 102 103 fn rindex_slice(haystack: []u8, needle: []u8) (size | void) = { 104 for (let i = 0z; i + len(needle) <= len(haystack); i += 1) { 105 let r = len(haystack) - i; 106 if (equal(haystack[r - len(needle)..r], needle)) { 107 return r - len(needle); 108 }; 109 }; 110 }; 111 112 @test fn index() void = { 113 // Bytes 114 const a: [4]u8 = [1, 3, 3, 7]; 115 assert(index(a, 7) as size == 3); 116 assert(index(a, 42) is void); 117 assert(index([], 42) is void); 118 119 assert(rindex(a, 3) as size == 2); 120 assert(rindex(a, 42) is void); 121 assert(rindex([], 42) is void); 122 123 // Slices 124 const a: [4]u8 = [1, 1, 1, 2]; 125 assert(rindex(a, [1, 1]) as size == 1); 126 assert(rindex(a, [1, 2]) as size == 2); 127 128 // len(haystack) < len(needle) 129 assert(index([], [1, 2, 3]) is void); 130 assert(index([1, 2, 3], [1, 2, 3, 4]) is void); 131 132 // len(haystack) == len(needle) 133 assert(index([42, 20], [42, 20]) as size == 0); 134 assert(index([1, 1, 1, 2], [1, 1, 1, 3]) is void); 135 136 assert(index([1, 42, 24], [42, 24]) as size == 1); 137 assert(index([1, 3, 3, 7], [3, 3]) as size == 1); 138 assert(index([1, 1, 1, 2], [1, 1, 2]) as size == 1); 139 assert(index([1, 1, 1, 3, 2], [1, 1, 1, 2]) is void); 140 141 const tests: [_](str, str) = [ 142 ("3214567844", "0123456789"), 143 ("3214567889012345", "0123456789012345"), 144 ("32145678890123456789", "01234567890123456789"), 145 ("32145678890123456789012345678901234567890211", "01234567890123456789012345678901"), 146 ("321456788901234567890123456789012345678911", "0123456789012345678901234567890"), 147 ("abc", "x"), 148 ("oxoxoxoxoxoxoxoxoxoxoxox", "oy"), 149 ("x", "a"), 150 ("x01234567x89", "0123456789"), 151 ("xabcqxq", "abcqq"), 152 ("xabxc", "abc"), 153 ("xabxcqq", "abcqq"), 154 ("xbc", "abc"), 155 ("xbcd", "abcd"), 156 ("xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456xxx"), 157 ("xyz012345678", "0123456789"), 158 ("xyz0123456789012345678", "01234567890123456789"), 159 ("xyz0123456789012345678901234567890", "01234567890123456789012345678901"), 160 ]; 161 for (let i = 0z; i < len(tests); i += 1) { 162 let haystack = *(&tests[i].0: *[]u8); 163 let needle = *(&tests[i].1: *[]u8); 164 index(haystack, needle) as void; 165 }; 166 167 const tests: [_](str, str, size) = [ 168 ("", "", 0), 169 ("01234567", "01234567", 0), 170 ("0123456789", "0123456789", 0), 171 ("0123456789012345", "0123456789012345", 0), 172 ("01234567890123456789", "01234567890123456789", 0), 173 ("0123456789012345678901234567890", "0123456789012345678901234567890", 0), 174 ("01234567890123456789012345678901", "01234567890123456789012345678901", 0), 175 ("ab", "ab", 0), 176 ("abc", "a", 0), 177 ("abc", "abc", 0), 178 ("abc", "b", 1), 179 ("abc", "c", 2), 180 ("abcABCabc", "A", 3), 181 ("abcd", "abcd", 0), 182 ("abcqq", "abcqq", 0), 183 ("barfoobarfoo", "foo", 3), 184 ("foo", "", 0), 185 ("foo", "foo", 0), 186 ("foo", "o", 1), 187 ("oofofoofooo", "f", 2), 188 ("oofofoofooo", "foo", 4), 189 ("oxoxoxoxoxoxoxoxoxoxoxoy", "oy", 22), 190 ("x", "x", 0), 191 ("x01234567", "01234567", 1), 192 ("x0123456789", "0123456789", 1), 193 ("x0123456789012345", "0123456789012345", 1), 194 ("x01234567890123456789", "01234567890123456789", 1), 195 ("x0123456789012345678901234567890", "0123456789012345678901234567890", 1), 196 ("x01234567890123456789012345678901", "01234567890123456789012345678901", 1), 197 ("x0123456789012345678901234567890x01234567890123456789012345678901", "01234567890123456789012345678901", 33), 198 ("x012345678901234567890123456789x0123456789012345678901234567890", "0123456789012345678901234567890", 32), 199 ("x0123456789012345678x01234567890123456789", "01234567890123456789", 21), 200 ("x012345678901234x0123456789012345", "0123456789012345", 17), 201 ("x012345678x0123456789", "0123456789", 11), 202 ("x0123456x01234567", "01234567", 9), 203 ("xab", "ab", 1), 204 ("xabc", "abc", 1), 205 ("xabcd", "abcd", 1), 206 ("xabcqq", "abcqq", 1), 207 ("xx012345678901234567890123456789012345678901234567890123456789012", "0123456789012345678901234567890123456789", 2), 208 ("xx0123456789012345678901234567890123456789012345678901234567890120123456789012345678901234567890123456xxx", "0123456789012345678901234567890123456xxx", 65), 209 ("xxxxxx012345678901234567890123456789012345678901234567890123456789012", "012345678901234567890123456789012345678901234567890123456789012", 6), 210 ]; 211 for (let i = 0z; i < len(tests); i += 1) { 212 let haystack = *(&tests[i].0: *[]u8); 213 let needle = *(&tests[i].1: *[]u8); 214 assert(index(haystack, needle) as size == tests[i].2); 215 }; 216 };