iter.ha (3859B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use encoding::utf8; 5 6 export type iterator = struct { 7 dec: utf8::decoder, 8 reverse: bool, 9 }; 10 11 // Initializes a string iterator, starting at the beginning of the string. You 12 // may copy the iterator to save its state. 13 // 14 // let iter = strings::iter("hi!"); 15 // strings::next(&iter); // 'h' 16 // strings::next(&iter); // 'i' 17 // 18 // // Copying the iterator copies its state: 19 // let dup = iter; 20 // strings::next(&iter); // '!' 21 // strings::next(&iter); // done 22 // strings::next(&dup); // '!' 23 // strings::next(&dup); // done 24 export fn iter(src: str) iterator = iterator { 25 dec = utf8::decode(toutf8(src)), 26 reverse = false, 27 }; 28 29 // Initializes a string iterator, starting at the end of the string and moving 30 // backwards with each call to [[next]]. 31 export fn riter(src: str) iterator = { 32 let ret = iterator { 33 dec = utf8::decode(toutf8(src)), 34 reverse = true, 35 }; 36 ret.dec.offs = len(src); 37 return ret; 38 }; 39 40 // Gets the next rune from an iterator, or done if there are none left. 41 // 42 // Be aware that a rune is not the minimum lexographical unit of language in 43 // Unicode strings. If you use these runes to construct a new string, 44 // reordering, editing, or omitting any of the runes without careful discretion 45 // may cause linguistic errors to arise. To avoid this, you may need to use a 46 // third-party Unicode module instead. 47 export fn next(iter: *iterator) (rune | done) = move(!iter.reverse, iter); 48 49 // Gets the previous rune from an iterator, or done when at the start of the 50 // string. 51 export fn prev(iter: *iterator) (rune | done) = move(iter.reverse, iter); 52 53 fn move(forward: bool, iter: *iterator) (rune | done) = { 54 let fun = if (forward) &utf8::next else &utf8::prev; 55 match (fun(&iter.dec)) { 56 case (utf8::more | utf8::invalid) => 57 abort("Invalid UTF-8 string (this should not happen)"); 58 case let r: (rune | done) => 59 return r; 60 }; 61 }; 62 63 // Returns a substring from the next rune to the end of the string if initialized 64 // with [[iter]], or the beginning of the string if initialized with [[riter]]. 65 export fn iterstr(iter: *iterator) str = { 66 if (iter.reverse) { 67 return fromutf8_unsafe(iter.dec.src[..iter.dec.offs]); 68 } else { 69 return fromutf8_unsafe(iter.dec.src[iter.dec.offs..]); 70 }; 71 }; 72 73 // Returns a substring from the position of the first iterator to the position of 74 // the second iterator. The iterators must originate from the same string and 75 // the position of the second iterator must not be before the position of the 76 // first one. 77 export fn slice(begin: *iterator, end: *iterator) str = { 78 return fromutf8_unsafe(utf8::slice(begin, end)); 79 }; 80 81 @test fn iter() void = { 82 let s = iter("こんにちは"); 83 assert(prev(&s) is done); 84 const expected1 = ['こ', 'ん']; 85 for (let i = 0z; i < len(expected1); i += 1) { 86 assert(next(&s) as rune == expected1[i]); 87 }; 88 assert(iterstr(&s) == "にちは"); 89 assert(prev(&s) as rune == 'ん'); 90 const expected2 = ['ん', 'に', 'ち', 'は']; 91 for (let i = 0z; i < len(expected2); i += 1) { 92 assert(next(&s) as rune == expected2[i]); 93 }; 94 assert(next(&s) is done); 95 assert(next(&s) is done); 96 assert(prev(&s) as rune == 'は'); 97 98 s = riter("にちは"); 99 const expected3 = ['は', 'ち', 'に']; 100 for (let i = 0z; i < len(expected3); i += 1) { 101 assert(next(&s) as rune == expected3[i]); 102 }; 103 assert(next(&s) is done); 104 assert(prev(&s) as rune == 'に'); 105 }; 106 107 @test fn slice() void = { 108 let s = iter("こんにちは"); 109 let t = s; 110 assert(len(slice(&s, &t)) == 0 && len(slice(&t, &s)) == 0); 111 for (let i = 0; i < 2; i += 1) { 112 next(&s); 113 next(&t); 114 }; 115 assert(len(slice(&s, &t)) == 0 && len(slice(&t, &s)) == 0); 116 for (let i = 0; i < 3; i += 1) { 117 next(&t); 118 }; 119 assert(slice(&s, &t) == "にちは"); 120 for (let i = 0; i < 3; i += 1) { 121 next(&s); 122 }; 123 assert(len(slice(&s, &t)) == 0 && len(slice(&t, &s)) == 0); 124 };