hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit cd546049a280514240ed29e39af0f3ca39f3490b
parent 4f4919be38db84c32d92eab47705302c8bce1662
Author: Vlad-Stefan Harbuz <vlad@vladh.net>
Date:   Wed, 17 Nov 2021 14:13:43 +0100

Add strings::replace,padleft,padright,index_string

Signed-off-by: Vlad-Stefan Harbuz <vlad@vladh.net>

Diffstat:
Mstdlib.mk | 16++++++++++------
Mstrings/index.ha | 34+++++++++++++++++++++++++++++++++-
Astrings/pad.ha | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astrings/replace.ha | 36++++++++++++++++++++++++++++++++++++
4 files changed, 136 insertions(+), 7 deletions(-)

diff --git a/stdlib.mk b/stdlib.mk @@ -1508,13 +1508,15 @@ stdlib_strings_any_srcs= \ $(STDLIB)/strings/contains.ha \ $(STDLIB)/strings/cstrings.ha \ $(STDLIB)/strings/dup.ha \ + $(STDLIB)/strings/index.ha \ $(STDLIB)/strings/iter.ha \ + $(STDLIB)/strings/pad.ha \ + $(STDLIB)/strings/replace.ha \ $(STDLIB)/strings/sub.ha \ $(STDLIB)/strings/suffix.ha \ $(STDLIB)/strings/tokenize.ha \ - $(STDLIB)/strings/utf8.ha \ - $(STDLIB)/strings/index.ha \ - $(STDLIB)/strings/trim.ha + $(STDLIB)/strings/trim.ha \ + $(STDLIB)/strings/utf8.ha $(HARECACHE)/strings/strings-any.ssa: $(stdlib_strings_any_srcs) $(stdlib_rt) $(stdlib_bytes_$(PLATFORM)) $(stdlib_encoding_utf8_$(PLATFORM)) $(stdlib_types_$(PLATFORM)) @printf 'HAREC \t$@\n' @@ -3262,13 +3264,15 @@ testlib_strings_any_srcs= \ $(STDLIB)/strings/contains.ha \ $(STDLIB)/strings/cstrings.ha \ $(STDLIB)/strings/dup.ha \ + $(STDLIB)/strings/index.ha \ $(STDLIB)/strings/iter.ha \ + $(STDLIB)/strings/pad.ha \ + $(STDLIB)/strings/replace.ha \ $(STDLIB)/strings/sub.ha \ $(STDLIB)/strings/suffix.ha \ $(STDLIB)/strings/tokenize.ha \ - $(STDLIB)/strings/utf8.ha \ - $(STDLIB)/strings/index.ha \ - $(STDLIB)/strings/trim.ha + $(STDLIB)/strings/trim.ha \ + $(STDLIB)/strings/utf8.ha $(TESTCACHE)/strings/strings-any.ssa: $(testlib_strings_any_srcs) $(testlib_rt) $(testlib_bytes_$(PLATFORM)) $(testlib_encoding_utf8_$(PLATFORM)) $(testlib_types_$(PLATFORM)) @printf 'HAREC \t$@\n' diff --git a/strings/index.ha b/strings/index.ha @@ -8,7 +8,7 @@ export fn index(haystack: str, needle: (str | rune)) (size | void) = { case r: rune => return index_rune(haystack, r); case s: str => - abort(); // TODO + return index_string(haystack, s); }; }; @@ -26,8 +26,40 @@ fn index_rune(s: str, r: rune) (size | void) = { }; }; +fn index_string(s: str, needle: str) (size | void) = { + let s_iter = iter(s); + for (let i = 0z; true; i += 1) { + let rest_iter = s_iter; + let needle_iter = iter(needle); + for (true) { + const rest_rune = next(&rest_iter); + const needle_rune = next(&needle_iter); + if (rest_rune is void && !(needle_rune is void)) { + break; + }; + if (needle_rune is void) { + return i; + }; + if ((rest_rune as rune) != (needle_rune as rune)) { + break; + }; + }; + if (next(&s_iter) is void) { + break; + }; + }; +}; + @test fn index() void = { assert(index("hello world", 'w') as size == 6); assert(index("こんにちは", 'ち') as size == 3); assert(index("こんにちは", 'q') is void); + + assert(index("hello", "hello") as size == 0); + assert(index("hello world!", "hello") as size == 0); + assert(index("hello world!", "world") as size == 6); + assert(index("hello world!", "orld!") as size == 7); + assert(index("hello world!", "word") is void); + assert(index("こんにちは", "ちは") as size == 3); + assert(index("こんにちは", "きょうは") is void); }; diff --git a/strings/pad.ha b/strings/pad.ha @@ -0,0 +1,57 @@ +use encoding::utf8; + +// Pads a string's start with 'prefix' until it reaches length 'target_len'. +// The caller must free the return value. +export fn padstart(s: str, prefix: rune, target_len: size) str = { + if (len(s) >= target_len) { + return dup(s); + }; + let res: []u8 = alloc([], target_len); + for (let i = 0z; i < target_len - len(s); i += 1) { + append(res, encoding::utf8::encoderune(prefix)...); + }; + append(res, toutf8(s)...); + return fromutf8_unsafe(res[..target_len]); +}; + +@test fn padstart() void = { + let s = padstart("2", '0', 5); + assert(s == "00002"); + free(s); + + let s = padstart("12345", '0', 5); + assert(s == "12345"); + free(s); + + let s = padstart("", '0', 5); + assert(s == "00000"); + free(s); +}; + +// Pads a string's end with 'prefix' until it reaches length 'target_len'. +// The caller must free the return value. +export fn padend(s: str, prefix: rune, target_len: size) str = { + if (len(s) >= target_len) { + return dup(s); + }; + let res: []u8 = alloc([], target_len); + append(res, toutf8(s)...); + for (let i = 0z; i < target_len - len(s); i += 1) { + append(res, encoding::utf8::encoderune(prefix)...); + }; + return fromutf8_unsafe(res[..target_len]); +}; + +@test fn padend() void = { + let s = padend("2", '0', 5); + assert(s == "20000"); + free(s); + + let s = padend("12345", '0', 5); + assert(s == "12345"); + free(s); + + let s = padend("", '0', 5); + assert(s == "00000"); + free(s); +}; diff --git a/strings/replace.ha b/strings/replace.ha @@ -0,0 +1,36 @@ +// Replaces all instances of 'needle' with 'target' in 's'. +// The caller must free the return value. +export fn replace(s: str, needle: str, target: str) str = { + let res = dup(s); + + let n_needle_runes = 0z; + let needle_iter = iter(needle); + for (!(next(&needle_iter) is void)) { + n_needle_runes += 1; + }; + + for (true) { + const match_idx = match(index(res, needle)) { + case s: size => + yield s; + case void => + break; + }; + let old_res = res; + const bytes = toutf8(res); + // TODO: Stop allocating so much, as it will become a perf issue + // with many replacements. + res = concat(sub(res, 0, match_idx), + target, + sub(res, match_idx + n_needle_runes, end)); + free(old_res); + }; + return res; +}; + +@test fn replace() void = { + assert(replace("Hello world!", "world", "there") == "Hello there!"); + assert(replace("I like dogs, dogs, birds, dogs", "dogs", "cats") == + "I like cats, cats, birds, cats"); + assert(replace("こんにちは", "にち", "ばん") == "こんばんは"); +};