sub.ha (1481B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use encoding::utf8; 5 6 export type end = void; 7 8 fn utf8_byte_len_bounded(iter: *iterator, end: size) size = { 9 for (let i = 0z; i < end; i += 1) { 10 match (next(iter)) { 11 case let r: rune => 12 continue; 13 case done => 14 abort("index exceeds string length"); 15 }; 16 }; 17 return iter.dec.offs; 18 }; 19 20 // Returns a substring in the range [start, end - 1], where each argument is the 21 // index of the Nth rune. If the end argument is given as [[end]], the end of 22 // the substring is the end of the original string. The lifetime of the 23 // substring is the same as that of the original string. 24 // 25 // Note that substringing runewise is not always the correct thing to do, and it 26 // may cause unexpected linguistic errors to arise. You may want to use a 27 // third-party Unicode module instead. 28 export fn sub(s: str, start: size, end: (size | end)) str = { 29 let iter = iter(s); 30 let starti = utf8_byte_len_bounded(&iter, start); 31 let endi = match (end) { 32 case let sz: size => 33 assert(start <= sz, "start is higher than end"); 34 yield utf8_byte_len_bounded(&iter, sz - start); 35 case => 36 yield len(s); 37 }; 38 let bytes = toutf8(s); 39 return fromutf8_unsafe(bytes[starti..endi]); 40 }; 41 42 @test fn sub() void = { 43 assert(sub("a string", 2, end) == "string"); 44 assert(sub("a string", 0, 1) == "a"); 45 assert(sub("a string", 0, 3) == "a s"); 46 assert(sub("a string", 2, 8) == "string"); 47 assert(sub("a string", 4, 4) == ""); 48 };