commit 811a95be1519fb5d8c72175460eb5bc995b6d601
parent 1361b59551f0269f3c4d42a6591779f74d3a23bd
Author: Andri Yngvason <andri@yngvason.is>
Date: Sun, 7 Feb 2021 12:59:30 +0000
strings: add substring function
Diffstat:
A | strings/sub.ha | | | 54 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 54 insertions(+), 0 deletions(-)
diff --git a/strings/sub.ha b/strings/sub.ha
@@ -0,0 +1,54 @@
+use encoding::utf8;
+
+export type end = void;
+
+fn utf8_byte_len_bounded(iter: *iterator, end: size) size = {
+ let pos = 0z;
+ for (let i = 0z; i < end; i += 1z) {
+ let r: rune = match (strings::next(iter)) {
+ void => break,
+ r: rune => r,
+ };
+
+ pos += utf8::runesz(r);
+ };
+ return pos;
+};
+
+fn utf8_byte_len_unbounded(iter: *iterator) size = {
+ let pos = 0z;
+ for (true) {
+ let r: rune = match (strings::next(iter)) {
+ void => break,
+ r: rune => r,
+ };
+
+ pos += utf8::runesz(r);
+ };
+ return pos;
+};
+
+// Returns a substring with an inclusive range given by the start and end
+// arguments. If the end argument is given as [strings::end], the end of the
+// substring is the end of the original string. The lifetime of the substring is
+// the same as that of the original string.
+export fn sub(s: str, start: size, end: (size | end)) str = {
+ let iter = iter(s);
+
+ let start_pos = utf8_byte_len_bounded(&iter, start);
+
+ let end_pos = match (end) {
+ sz: size => start_pos + utf8_byte_len_bounded(&iter, sz - start + 1z),
+ end => start_pos + utf8_byte_len_unbounded(&iter),
+ };
+
+ let bytes = to_utf8(s);
+ return from_utf8_unsafe(bytes[start_pos..end_pos]);
+};
+
+@test fn sub() void = {
+ assert(sub("a string", 2z, end) == "string");
+ assert(sub("a string", 0z, 0z) == "a");
+ assert(sub("a string", 0z, 2z) == "a s");
+ assert(sub("a string", 2z, 7z) == "string");
+};