hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 7609148ab49a74c15b6618780ccfad8b6f13e8b2
parent e21ab557e00b7c94febf4ede8eebe4fb6cee8c34
Author: Autumn! <autumnull@posteo.net>
Date:   Fri, 12 May 2023 14:12:48 +0000

ascii: add strlower, strupper, rewrite strcasecmp

Signed-off-by: Autumn! <autumnull@posteo.net>

Diffstat:
Dascii/strcmp.ha | 47-----------------------------------------------
Aascii/string.ha | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 2+-
Mstdlib.mk | 4++--
4 files changed, 67 insertions(+), 50 deletions(-)

diff --git a/ascii/strcmp.ha b/ascii/strcmp.ha @@ -1,47 +0,0 @@ -// License: MPL-2.0 -// (c) 2021 Drew DeVault <sir@cmpwn.com> -// (c) 2021 Ember Sawady <ecs@d2evs.net> -use strings; - -// Compares two strings by their ASCII sort order, treating all capital letters -// as their lowercase counterpart (i.e. a case-insensitive comparison is -// performed). Zero is returned if the strings are equal, a negative value if a -// is less than b, or a positive value if a is greater than b. Aborts if a -// non-ASCII byte is encountered. -export fn strcasecmp(a: str, b: str) int = { - let a = strings::iter(a), b = strings::iter(b); - for (true) { - let ra = match (strings::next(&a)) { - case void => - match (strings::next(&b)) { - case void => - break; - case rune => - return -1; - }; - case let r: rune => - yield r; - }; - let rb = match (strings::next(&b)) { - case void => - return 1; - case let r: rune => - yield r; - }; - assert(valid(ra) && valid(rb)); - let ra = tolower(ra), rb = tolower(rb); - if (ra != rb) { - return ra: u32: int - rb: u32: int; - }; - }; - return 0; -}; - -@test fn strcmp() void = { - assert(strcasecmp("ABC", "ABC") == 0); - assert(strcasecmp("ABC", "abc") == 0); - assert(strcasecmp("ABC", "aB") > 0); - assert(strcasecmp("ab", "Abc") < 0); - assert(strcasecmp("bcd", "ABC") > 0); - assert(strcasecmp("ABC", "[[[") > 0); -}; diff --git a/ascii/string.ha b/ascii/string.ha @@ -0,0 +1,64 @@ +// License: MPL-2.0 +// (c) 2021 Drew DeVault <sir@cmpwn.com> +// (c) 2021 Ember Sawady <ecs@d2evs.net> +use strings; + +// Convert all ascii uppercase characters in a string to their lowercase +// representation. Modifies the original string. +export fn strlower(s: str) str = { + let bs = strings::toutf8(s); + for (let i = 0z; i < len(bs); i += 1) { + if (bs[i] < 128 && cclass[bs[i]] & U != 0) { + bs[i] += 'a': u8 - 'A'; + }; + }; + return s; +}; + +// Convert all ascii lowercase characters in a string to their uppercase +// representation. Modifies the original string. +export fn strupper(s: str) str = { + let bs = strings::toutf8(s); + for (let i = 0z; i < len(bs); i += 1) { + if (bs[i] < 128 && cclass[bs[i]] & L != 0) { + bs[i] -= 'a': u8 - 'A'; + }; + }; + return s; +}; + +// Compares two strings by their sort order, treating all ascii capital letters +// as their lowercase counterpart (i.e. an ascii-case-insensitive comparison is +// performed). Zero is returned if the strings are equal, a negative value if a +// is less than b, or a positive value if a is greater than b. +export fn strcasecmp(a: str, b: str) int = { + let abs = strings::toutf8(a); + let bbs = strings::toutf8(b); + for (let i = 0z; i < len(abs) && i < len(bbs); i += 1) { + // you know that i am called "the Cast"... + // because i *really* love to cast... + // sometimes i sit and cast all day... ha ha, but + // sometimes i get carried away! + let cmp = tolower(abs[i]: u32: rune): u32: int - tolower(bbs[i]: u32: rune): u32: int; + if (cmp != 0) return cmp; + }; + return len(abs): int - len(bbs): int; +}; + +@test fn strcasecmp() void = { + assert(strupper("ABC") == "ABC"); + assert(strlower("ABC") == "abc"); + assert(strupper("abc") == "ABC"); + assert(strlower("abc") == "abc"); + assert(strupper("[[[") == "[[["); + assert(strlower("[[[") == "[[["); + assert(strupper("こ") == "こ"); + assert(strlower("こ") == "こ"); + + assert(strcasecmp("ABC", "ABC") == 0); + assert(strcasecmp("ABC", "abc") == 0); + assert(strcasecmp("ABC", "aB") > 0); + assert(strcasecmp("ab", "Abc") < 0); + assert(strcasecmp("bcd", "ABC") > 0); + assert(strcasecmp("ABC", "[[[") > 0); +}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -168,7 +168,7 @@ test() { ascii() { gen_srcs ascii \ ctype.ha \ - strcmp.ha \ + string.ha \ valid.ha gen_ssa ascii strings } diff --git a/stdlib.mk b/stdlib.mk @@ -767,7 +767,7 @@ stdlib_uuid_freebsd = $(stdlib_uuid_any) # ascii (+any) stdlib_ascii_any_srcs = \ $(STDLIB)/ascii/ctype.ha \ - $(STDLIB)/ascii/strcmp.ha \ + $(STDLIB)/ascii/string.ha \ $(STDLIB)/ascii/valid.ha $(HARECACHE)/ascii/ascii-any.ssa: $(stdlib_ascii_any_srcs) $(stdlib_rt) $(stdlib_strings_$(PLATFORM)) @@ -3037,7 +3037,7 @@ testlib_uuid_freebsd = $(testlib_uuid_any) # ascii (+any) testlib_ascii_any_srcs = \ $(STDLIB)/ascii/ctype.ha \ - $(STDLIB)/ascii/strcmp.ha \ + $(STDLIB)/ascii/string.ha \ $(STDLIB)/ascii/valid.ha $(TESTCACHE)/ascii/ascii-any.ssa: $(testlib_ascii_any_srcs) $(testlib_rt) $(testlib_strings_$(PLATFORM))