commit cd546049a280514240ed29e39af0f3ca39f3490b
parent 4f4919be38db84c32d92eab47705302c8bce1662
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date: Wed, 17 Nov 2021 14:13:43 +0100
Add strings::replace,padleft,padright,index_string
Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>
Diffstat:
4 files changed, 136 insertions(+), 7 deletions(-)
diff --git a/stdlib.mk b/stdlib.mk
@@ -1508,13 +1508,15 @@ stdlib_strings_any_srcs= \
$(STDLIB)/strings/contains.ha \
$(STDLIB)/strings/cstrings.ha \
$(STDLIB)/strings/dup.ha \
+ $(STDLIB)/strings/index.ha \
$(STDLIB)/strings/iter.ha \
+ $(STDLIB)/strings/pad.ha \
+ $(STDLIB)/strings/replace.ha \
$(STDLIB)/strings/sub.ha \
$(STDLIB)/strings/suffix.ha \
$(STDLIB)/strings/tokenize.ha \
- $(STDLIB)/strings/utf8.ha \
- $(STDLIB)/strings/index.ha \
- $(STDLIB)/strings/trim.ha
+ $(STDLIB)/strings/trim.ha \
+ $(STDLIB)/strings/utf8.ha
$(HARECACHE)/strings/strings-any.ssa: $(stdlib_strings_any_srcs) $(stdlib_rt) $(stdlib_bytes_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_types_$(PLATFORM))
@printf 'HAREC \t$@\n'
@@ -3262,13 +3264,15 @@ testlib_strings_any_srcs= \
$(STDLIB)/strings/contains.ha \
$(STDLIB)/strings/cstrings.ha \
$(STDLIB)/strings/dup.ha \
+ $(STDLIB)/strings/index.ha \
$(STDLIB)/strings/iter.ha \
+ $(STDLIB)/strings/pad.ha \
+ $(STDLIB)/strings/replace.ha \
$(STDLIB)/strings/sub.ha \
$(STDLIB)/strings/suffix.ha \
$(STDLIB)/strings/tokenize.ha \
- $(STDLIB)/strings/utf8.ha \
- $(STDLIB)/strings/index.ha \
- $(STDLIB)/strings/trim.ha
+ $(STDLIB)/strings/trim.ha \
+ $(STDLIB)/strings/utf8.ha
$(TESTCACHE)/strings/strings-any.ssa: $(testlib_strings_any_srcs) $(testlib_rt) $(testlib_bytes_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_types_$(PLATFORM))
@printf 'HAREC \t$@\n'
diff --git a/strings/index.ha b/strings/index.ha
@@ -8,7 +8,7 @@ export fn index(haystack: str, needle: (str | rune)) (size | void) = {
case r: rune =>
return index_rune(haystack, r);
case s: str =>
- abort(); // TODO
+ return index_string(haystack, s);
};
};
@@ -26,8 +26,40 @@ fn index_rune(s: str, r: rune) (size | void) = {
};
};
+fn index_string(s: str, needle: str) (size | void) = {
+ let s_iter = iter(s);
+ for (let i = 0z; true; i += 1) {
+ let rest_iter = s_iter;
+ let needle_iter = iter(needle);
+ for (true) {
+ const rest_rune = next(&rest_iter);
+ const needle_rune = next(&needle_iter);
+ if (rest_rune is void && !(needle_rune is void)) {
+ break;
+ };
+ if (needle_rune is void) {
+ return i;
+ };
+ if ((rest_rune as rune) != (needle_rune as rune)) {
+ break;
+ };
+ };
+ if (next(&s_iter) is void) {
+ break;
+ };
+ };
+};
+
@test fn index() void = {
assert(index("hello world", 'w') as size == 6);
assert(index("こんにちは", 'ち') as size == 3);
assert(index("こんにちは", 'q') is void);
+
+ assert(index("hello", "hello") as size == 0);
+ assert(index("hello world!", "hello") as size == 0);
+ assert(index("hello world!", "world") as size == 6);
+ assert(index("hello world!", "orld!") as size == 7);
+ assert(index("hello world!", "word") is void);
+ assert(index("こんにちは", "ちは") as size == 3);
+ assert(index("こんにちは", "きょうは") is void);
};
diff --git a/strings/pad.ha b/strings/pad.ha
@@ -0,0 +1,57 @@
+use encoding::utf8;
+
+// Pads a string's start with 'prefix' until it reaches length 'target_len'.
+// The caller must free the return value.
+export fn padstart(s: str, prefix: rune, target_len: size) str = {
+ if (len(s) >= target_len) {
+ return dup(s);
+ };
+ let res: []u8 = alloc([], target_len);
+ for (let i = 0z; i < target_len - len(s); i += 1) {
+ append(res, encoding::utf8::encoderune(prefix)...);
+ };
+ append(res, toutf8(s)...);
+ return fromutf8_unsafe(res[..target_len]);
+};
+
+@test fn padstart() void = {
+ let s = padstart("2", '0', 5);
+ assert(s == "00002");
+ free(s);
+
+ let s = padstart("12345", '0', 5);
+ assert(s == "12345");
+ free(s);
+
+ let s = padstart("", '0', 5);
+ assert(s == "00000");
+ free(s);
+};
+
+// Pads a string's end with 'prefix' until it reaches length 'target_len'.
+// The caller must free the return value.
+export fn padend(s: str, prefix: rune, target_len: size) str = {
+ if (len(s) >= target_len) {
+ return dup(s);
+ };
+ let res: []u8 = alloc([], target_len);
+ append(res, toutf8(s)...);
+ for (let i = 0z; i < target_len - len(s); i += 1) {
+ append(res, encoding::utf8::encoderune(prefix)...);
+ };
+ return fromutf8_unsafe(res[..target_len]);
+};
+
+@test fn padend() void = {
+ let s = padend("2", '0', 5);
+ assert(s == "20000");
+ free(s);
+
+ let s = padend("12345", '0', 5);
+ assert(s == "12345");
+ free(s);
+
+ let s = padend("", '0', 5);
+ assert(s == "00000");
+ free(s);
+};
diff --git a/strings/replace.ha b/strings/replace.ha
@@ -0,0 +1,36 @@
+// Replaces all instances of 'needle' with 'target' in 's'.
+// The caller must free the return value.
+export fn replace(s: str, needle: str, target: str) str = {
+ let res = dup(s);
+
+ let n_needle_runes = 0z;
+ let needle_iter = iter(needle);
+ for (!(next(&needle_iter) is void)) {
+ n_needle_runes += 1;
+ };
+
+ for (true) {
+ const match_idx = match(index(res, needle)) {
+ case s: size =>
+ yield s;
+ case void =>
+ break;
+ };
+ let old_res = res;
+ const bytes = toutf8(res);
+ // TODO: Stop allocating so much, as it will become a perf issue
+ // with many replacements.
+ res = concat(sub(res, 0, match_idx),
+ target,
+ sub(res, match_idx + n_needle_runes, end));
+ free(old_res);
+ };
+ return res;
+};
+
+@test fn replace() void = {
+ assert(replace("Hello world!", "world", "there") == "Hello there!");
+ assert(replace("I like dogs, dogs, birds, dogs", "dogs", "cats") ==
+ "I like cats, cats, birds, cats");
+ assert(replace("こんにちは", "にち", "ばん") == "こんばんは");
+};