hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

iter.ha (3859B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use encoding::utf8;
      5 
      6 export type iterator = struct {
      7 	dec:  utf8::decoder,
      8 	reverse: bool,
      9 };
     10 
     11 // Initializes a string iterator, starting at the beginning of the string. You
     12 // may copy the iterator to save its state.
     13 //
     14 //	let iter = strings::iter("hi!");
     15 //	strings::next(&iter);	// 'h'
     16 //	strings::next(&iter);	// 'i'
     17 //
     18 //	// Copying the iterator copies its state:
     19 //	let dup = iter;
     20 //	strings::next(&iter);	// '!'
     21 //	strings::next(&iter);	// done
     22 //	strings::next(&dup);	// '!'
     23 //	strings::next(&dup);	// done
     24 export fn iter(src: str) iterator = iterator {
     25 	dec = utf8::decode(toutf8(src)),
     26 	reverse = false,
     27 };
     28 
     29 // Initializes a string iterator, starting at the end of the string and moving
     30 // backwards with each call to [[next]].
     31 export fn riter(src: str) iterator = {
     32 	let ret = iterator {
     33 		dec = utf8::decode(toutf8(src)),
     34 		reverse = true,
     35 	};
     36 	ret.dec.offs = len(src);
     37 	return ret;
     38 };
     39 
     40 // Gets the next rune from an iterator, or done if there are none left.
     41 //
     42 // Be aware that a rune is not the minimum lexographical unit of language in
     43 // Unicode strings. If you use these runes to construct a new string,
     44 // reordering, editing, or omitting any of the runes without careful discretion
     45 // may cause linguistic errors to arise. To avoid this, you may need to use a
     46 // third-party Unicode module instead.
     47 export fn next(iter: *iterator) (rune | done) = move(!iter.reverse, iter);
     48 
     49 // Gets the previous rune from an iterator, or done when at the start of the
     50 // string.
     51 export fn prev(iter: *iterator) (rune | done) = move(iter.reverse, iter);
     52 
     53 fn move(forward: bool, iter: *iterator) (rune | done) = {
     54 	let fun = if (forward) &utf8::next else &utf8::prev;
     55 	match (fun(&iter.dec)) {
     56 	case (utf8::more | utf8::invalid) =>
     57 		abort("Invalid UTF-8 string (this should not happen)");
     58 	case let r: (rune | done) =>
     59 		return r;
     60 	};
     61 };
     62 
     63 // Returns a substring from the next rune to the end of the string if initialized
     64 // with [[iter]], or the beginning of the string if initialized with [[riter]].
     65 export fn iterstr(iter: *iterator) str = {
     66 	if (iter.reverse) {
     67 		return fromutf8_unsafe(iter.dec.src[..iter.dec.offs]);
     68 	} else {
     69 		return fromutf8_unsafe(iter.dec.src[iter.dec.offs..]);
     70 	};
     71 };
     72 
     73 // Returns a substring from the position of the first iterator to the position of
     74 // the second iterator. The iterators must originate from the same string and
     75 // the position of the second iterator must not be before the position of the
     76 // first one.
     77 export fn slice(begin: *iterator, end: *iterator) str = {
     78 	return fromutf8_unsafe(utf8::slice(begin, end));
     79 };
     80 
     81 @test fn iter() void = {
     82 	let s = iter("こんにちは");
     83 	assert(prev(&s) is done);
     84 	const expected1 = ['こ', 'ん'];
     85 	for (let i = 0z; i < len(expected1); i += 1) {
     86 		assert(next(&s) as rune == expected1[i]);
     87 	};
     88 	assert(iterstr(&s) == "にちは");
     89 	assert(prev(&s) as rune == 'ん');
     90 	const expected2 = ['ん', 'に', 'ち', 'は'];
     91 	for (let i = 0z; i < len(expected2); i += 1) {
     92 		assert(next(&s) as rune == expected2[i]);
     93 	};
     94 	assert(next(&s) is done);
     95 	assert(next(&s) is done);
     96 	assert(prev(&s) as rune == 'は');
     97 
     98 	s = riter("にちは");
     99 	const expected3 = ['は', 'ち', 'に'];
    100 	for (let i = 0z; i < len(expected3); i += 1) {
    101 		assert(next(&s) as rune == expected3[i]);
    102 	};
    103 	assert(next(&s) is done);
    104 	assert(prev(&s) as rune == 'に');
    105 };
    106 
    107 @test fn slice() void = {
    108 	let s = iter("こんにちは");
    109 	let t = s;
    110 	assert(len(slice(&s, &t)) == 0 && len(slice(&t, &s)) == 0);
    111 	for (let i = 0; i < 2; i += 1) {
    112 		next(&s);
    113 		next(&t);
    114 	};
    115 	assert(len(slice(&s, &t)) == 0 && len(slice(&t, &s)) == 0);
    116 	for (let i = 0; i < 3; i += 1) {
    117 		next(&t);
    118 	};
    119 	assert(slice(&s, &t) == "にちは");
    120 	for (let i = 0; i < 3; i += 1) {
    121 		next(&s);
    122 	};
    123 	assert(len(slice(&s, &t)) == 0 && len(slice(&t, &s)) == 0);
    124 };