iter.ha (3989B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use encoding::utf8; 5 6 export type iterator = struct { 7 dec: utf8::decoder, 8 reverse: bool, 9 }; 10 11 // Initializes a string iterator, starting at the beginning of the string. You 12 // may copy the iterator to save its state. 13 // 14 // let iter = strings::iter("hi!"); 15 // strings::next(&iter); // 'h' 16 // strings::next(&iter); // 'i' 17 // 18 // // Copying the iterator copies its state: 19 // let dup = iter; 20 // strings::next(&iter); // '!' 21 // strings::next(&iter); // void 22 // strings::next(&dup); // '!' 23 // strings::next(&dup); // void 24 export fn iter(src: str) iterator = iterator { 25 dec = utf8::decode(toutf8(src)), 26 reverse = false, 27 }; 28 29 // Initializes a string iterator, starting at the end of the string and moving 30 // backwards with each call to [[next]]. 31 export fn riter(src: str) iterator = { 32 let ret = iterator { 33 dec = utf8::decode(toutf8(src)), 34 reverse = true, 35 }; 36 ret.dec.offs = len(src); 37 return ret; 38 }; 39 40 // Get the next rune from an iterator, or done if there are none left. 41 // 42 // Be aware that a rune is not the minimum lexographical unit of language in 43 // Unicode strings. If you use these runes to construct a new string, 44 // reordering, editing, or omitting any of the runes without careful discretion 45 // may cause linguistic errors to arise. To avoid this, you may need to use a 46 // third-party Unicode module instead. 47 export fn next(iter: *iterator) (rune | done) = move(!iter.reverse, iter); 48 49 // Get the previous rune from an iterator, or done when at the start of the 50 // string. 51 export fn prev(iter: *iterator) (rune | done) = move(iter.reverse, iter); 52 53 fn move(forward: bool, iter: *iterator) (rune | done) = { 54 let fun = if (forward) &utf8::next else &utf8::prev; 55 return match (fun(&iter.dec)) { 56 case void => yield done; 57 case (utf8::more | utf8::invalid) => 58 abort("Invalid UTF-8 string (this should not happen)"); 59 case let r: rune => 60 yield r; 61 }; 62 }; 63 64 // Return a substring from the next rune to the end of the string if initialized 65 // with [[iter]], or the beginning of the string if initialized with [[riter]]. 66 export fn iterstr(iter: *iterator) str = { 67 if (iter.reverse) { 68 return fromutf8_unsafe(iter.dec.src[..iter.dec.offs]); 69 } else { 70 return fromutf8_unsafe(iter.dec.src[iter.dec.offs..]); 71 }; 72 }; 73 74 // Return a substring from the position of the first iterator to the position of 75 // the second iterator. The iterators must originate from the same string and 76 // the position of the second iterator must not be before the position of the 77 // first one. 78 export fn slice(begin: *iterator, end: *iterator) str = { 79 assert(begin.dec.src: *[*]u8 == end.dec.src: *[*]u8 80 && begin.dec.offs <= end.dec.offs); 81 return fromutf8_unsafe(begin.dec.src[begin.dec.offs..end.dec.offs]); 82 }; 83 84 @test fn iter() void = { 85 let s = iter("こんにちは"); 86 assert(prev(&s) is done); 87 const expected1 = ['こ', 'ん']; 88 for (let i = 0z; i < len(expected1); i += 1) { 89 assert(next(&s) as rune == expected1[i]); 90 }; 91 assert(iterstr(&s) == "にちは"); 92 assert(prev(&s) as rune == 'ん'); 93 const expected2 = ['ん', 'に', 'ち', 'は']; 94 for (let i = 0z; i < len(expected2); i += 1) { 95 assert(next(&s) as rune == expected2[i]); 96 }; 97 assert(next(&s) is done); 98 assert(next(&s) is done); 99 assert(prev(&s) as rune == 'は'); 100 101 s = riter("にちは"); 102 const expected3 = ['は', 'ち', 'に']; 103 for (let i = 0z; i < len(expected3); i += 1) { 104 assert(next(&s) as rune == expected3[i]); 105 }; 106 assert(next(&s) is done); 107 assert(prev(&s) as rune == 'に'); 108 }; 109 110 @test fn slice() void = { 111 let s = iter("こんにちは"); 112 let t = s; 113 assert(len(slice(&s, &t)) == 0 && len(slice(&t, &s)) == 0); 114 for (let i = 0; i < 2; i += 1) { 115 next(&s); 116 next(&t); 117 }; 118 assert(len(slice(&s, &t)) == 0 && len(slice(&t, &s)) == 0); 119 for (let i = 0; i < 3; i += 1) { 120 next(&t); 121 }; 122 assert(slice(&s, &t) == "にちは"); 123 for (let i = 0; i < 3; i += 1) { 124 next(&s); 125 }; 126 assert(len(slice(&s, &t)) == 0 && len(slice(&t, &s)) == 0); 127 };