hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 4e72da523d192c219cc7086f6d260b1b37211f4c
parent 5db6a44a63ff95295e5c2310cd3169833f4fccde
Author: Sudipto Mallick <smlckz@disroot.org>
Date:   Sat, 10 Jul 2021 12:52:40 +0000

strings: add ltrim, rtrim and trim

Signed-off-by: Sudipto Mallick <smlckz@disroot.org>

Diffstat:
Mscripts/gen-stdlib | 3++-
Mstdlib.mk | 6++++--
Astrings/trim.ha | 92+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 98 insertions(+), 3 deletions(-)

diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -725,7 +725,8 @@ strings() { suffix.ha \ tokenize.ha \ utf8.ha \ - index.ha + index.ha \ + trim.ha gen_ssa strings bytes encoding::utf8 types } diff --git a/stdlib.mk b/stdlib.mk @@ -1031,7 +1031,8 @@ stdlib_strings_srcs= \ $(STDLIB)/strings/suffix.ha \ $(STDLIB)/strings/tokenize.ha \ $(STDLIB)/strings/utf8.ha \ - $(STDLIB)/strings/index.ha + $(STDLIB)/strings/index.ha \ + $(STDLIB)/strings/trim.ha $(HARECACHE)/strings/strings.ssa: $(stdlib_strings_srcs) $(stdlib_rt) $(stdlib_bytes) $(stdlib_encoding_utf8) $(stdlib_types) @printf 'HAREC \t$@\n' @@ -2221,7 +2222,8 @@ testlib_strings_srcs= \ $(STDLIB)/strings/suffix.ha \ $(STDLIB)/strings/tokenize.ha \ $(STDLIB)/strings/utf8.ha \ - $(STDLIB)/strings/index.ha + $(STDLIB)/strings/index.ha \ + $(STDLIB)/strings/trim.ha $(TESTCACHE)/strings/strings.ssa: $(testlib_strings_srcs) $(testlib_rt) $(testlib_bytes) $(testlib_encoding_utf8) $(testlib_types) @printf 'HAREC \t$@\n' diff --git a/strings/trim.ha b/strings/trim.ha @@ -0,0 +1,92 @@ +use encoding::utf8; + +const whitespace: [_]rune = [' ', '\n', '\t', '\r']; + +// Returns a string (borrowed from given input string) after trimming off of +// the start of the input string the characters in the given list of runes. If +// no runes are given, returns the string with leading whitespace stripped off. +export fn ltrim(input: str, trim: rune...) str = { + if (len(trim) == 0) { + trim = whitespace; + }; + let it = iter(input); + for (true) { + const r = match (next(&it)) { + r: rune => r, + void => break, + }; + let found = false; + for (let i = 0z; i < len(trim); i += 1) { + if (r == trim[i]) { + found = true; + break; + }; + }; + if (!found) { + prev(&it); + break; + }; + }; + return fromutf8(it.dec.src[it.dec.offs..]); +}; + +// Returns a string (borrowed from given input string) after trimming off of +// the end of the input string the characters in the given list of runes. If no +// runes are given, returns the string with trailing whitespace stripped off. +export fn rtrim(input: str, trim: rune...) str = { + if (len(trim) == 0) { + trim = whitespace; + }; + let it = riter(input); + for (true) { + const r = match (prev(&it)) { + r: rune => r, + void => break, + }; + let found = false; + for (let i = 0z; i < len(trim); i += 1) { + if (r == trim[i]) { + found = true; + break; + }; + }; + if (!found) { + next(&it); + break; + }; + }; + return fromutf8(it.dec.src[..it.dec.offs]); +}; + +// Returns a string (borrowed from given input string) after trimming off of +// the both ends of the input string the characters in the given list of runes. +// If no runes are given, returns the string with both leading and trailing +// whitespace stripped off. +export fn trim(input: str, exclude: rune...) str = + ltrim(rtrim(input, exclude...), exclude...); + +@test fn trim() void = { + assert(ltrim("") == ""); + assert(ltrim(" hi") == "hi"); + assert(ltrim("\t\r\n hello") == "hello"); + assert(ltrim("((()(())))())", '(', ')') == ""); + assert(ltrim("abacadabra", 'a', 'b', 'c', 'd') == "ra"); + assert(ltrim("𝚊𝚋𝚊𝚌𝚊𝚍𝚊𝚋𝚛𝚊", '𝚊', '𝚋', '𝚌', '𝚍') == "𝚛𝚊"); + + assert(rtrim("") == ""); + assert(rtrim("hello ") == "hello"); + assert(rtrim("hello, world\r\n\r\n") == "hello, world"); + assert(rtrim("Sentimentalized sensationalism sensationalized sentimentalisms", + ' ', 's', 'i', 'l', 'z', 't', 'm', 'n', 'o', 'e', 'a', 'd') == "S"); + assert(rtrim("\\/\\/\\\\//\\//\\////\\/\\", '/', '\\') == ""); + assert(rtrim("yellowwooddoor", 'w', 'd', 'o', 'r') == "yell"); + + assert(trim("") == ""); + assert(trim(" ​ ") == "​"); + assert(trim("mississippi", 'm', 'i', 'p', 's') == ""); + assert(trim("[[][[[]]][][].[[]][]]][]]]", '[', ']') == "."); + assert(trim("AAAΑА𝖠AAAA", 'A') == "ΑА𝖠"); + assert(trim(" চিত্ত যেথা ভয়শূন্য, উচ্চ যেথা শির ") == "চিত্ত যেথা ভয়শূন্য, উচ্চ যেথা শির"); + assert(trim("𝖺𝖻𝖺𝖼𝖺𝖽𝖺𝖻‌𝗋‌𝖺𝖼𝖺𝖽𝖺𝖻𝖼𝖺", '𝖺', '𝖻', '𝖼', '𝖽') == "‌𝗋‌"); +}; +