commit 7609148ab49a74c15b6618780ccfad8b6f13e8b2
parent e21ab557e00b7c94febf4ede8eebe4fb6cee8c34
Author: Autumn! <autumnull@posteo.net>
Date: Fri, 12 May 2023 14:12:48 +0000
ascii: add strlower, strupper, rewrite strcasecmp
Signed-off-by: Autumn! <autumnull@posteo.net>
Diffstat:
4 files changed, 67 insertions(+), 50 deletions(-)
diff --git a/ascii/strcmp.ha b/ascii/strcmp.ha
@@ -1,47 +0,0 @@
-// License: MPL-2.0
-// (c) 2021 Drew DeVault <sir@cmpwn.com>
-// (c) 2021 Ember Sawady <ecs@d2evs.net>
-use strings;
-
-// Compares two strings by their ASCII sort order, treating all capital letters
-// as their lowercase counterpart (i.e. a case-insensitive comparison is
-// performed). Zero is returned if the strings are equal, a negative value if a
-// is less than b, or a positive value if a is greater than b. Aborts if a
-// non-ASCII byte is encountered.
-export fn strcasecmp(a: str, b: str) int = {
- let a = strings::iter(a), b = strings::iter(b);
- for (true) {
- let ra = match (strings::next(&a)) {
- case void =>
- match (strings::next(&b)) {
- case void =>
- break;
- case rune =>
- return -1;
- };
- case let r: rune =>
- yield r;
- };
- let rb = match (strings::next(&b)) {
- case void =>
- return 1;
- case let r: rune =>
- yield r;
- };
- assert(valid(ra) && valid(rb));
- let ra = tolower(ra), rb = tolower(rb);
- if (ra != rb) {
- return ra: u32: int - rb: u32: int;
- };
- };
- return 0;
-};
-
-@test fn strcmp() void = {
- assert(strcasecmp("ABC", "ABC") == 0);
- assert(strcasecmp("ABC", "abc") == 0);
- assert(strcasecmp("ABC", "aB") > 0);
- assert(strcasecmp("ab", "Abc") < 0);
- assert(strcasecmp("bcd", "ABC") > 0);
- assert(strcasecmp("ABC", "[[[") > 0);
-};
diff --git a/ascii/string.ha b/ascii/string.ha
@@ -0,0 +1,64 @@
+// License: MPL-2.0
+// (c) 2021 Drew DeVault <sir@cmpwn.com>
+// (c) 2021 Ember Sawady <ecs@d2evs.net>
+use strings;
+
+// Convert all ascii uppercase characters in a string to their lowercase
+// representation. Modifies the original string.
+export fn strlower(s: str) str = {
+ let bs = strings::toutf8(s);
+ for (let i = 0z; i < len(bs); i += 1) {
+ if (bs[i] < 128 && cclass[bs[i]] & U != 0) {
+ bs[i] += 'a': u8 - 'A';
+ };
+ };
+ return s;
+};
+
+// Convert all ascii lowercase characters in a string to their uppercase
+// representation. Modifies the original string.
+export fn strupper(s: str) str = {
+ let bs = strings::toutf8(s);
+ for (let i = 0z; i < len(bs); i += 1) {
+ if (bs[i] < 128 && cclass[bs[i]] & L != 0) {
+ bs[i] -= 'a': u8 - 'A';
+ };
+ };
+ return s;
+};
+
+// Compares two strings by their sort order, treating all ascii capital letters
+// as their lowercase counterpart (i.e. an ascii-case-insensitive comparison is
+// performed). Zero is returned if the strings are equal, a negative value if a
+// is less than b, or a positive value if a is greater than b.
+export fn strcasecmp(a: str, b: str) int = {
+ let abs = strings::toutf8(a);
+ let bbs = strings::toutf8(b);
+ for (let i = 0z; i < len(abs) && i < len(bbs); i += 1) {
+ // you know that i am called "the Cast"...
+ // because i *really* love to cast...
+ // sometimes i sit and cast all day... ha ha, but
+ // sometimes i get carried away!
+ let cmp = tolower(abs[i]: u32: rune): u32: int - tolower(bbs[i]: u32: rune): u32: int;
+ if (cmp != 0) return cmp;
+ };
+ return len(abs): int - len(bbs): int;
+};
+
+@test fn strcasecmp() void = {
+ assert(strupper("ABC") == "ABC");
+ assert(strlower("ABC") == "abc");
+ assert(strupper("abc") == "ABC");
+ assert(strlower("abc") == "abc");
+ assert(strupper("[[[") == "[[[");
+ assert(strlower("[[[") == "[[[");
+ assert(strupper("こ") == "こ");
+ assert(strlower("こ") == "こ");
+
+ assert(strcasecmp("ABC", "ABC") == 0);
+ assert(strcasecmp("ABC", "abc") == 0);
+ assert(strcasecmp("ABC", "aB") > 0);
+ assert(strcasecmp("ab", "Abc") < 0);
+ assert(strcasecmp("bcd", "ABC") > 0);
+ assert(strcasecmp("ABC", "[[[") > 0);
+};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -168,7 +168,7 @@ test() {
ascii() {
gen_srcs ascii \
ctype.ha \
- strcmp.ha \
+ string.ha \
valid.ha
gen_ssa ascii strings
}
diff --git a/stdlib.mk b/stdlib.mk
@@ -767,7 +767,7 @@ stdlib_uuid_freebsd = $(stdlib_uuid_any)
# ascii (+any)
stdlib_ascii_any_srcs = \
$(STDLIB)/ascii/ctype.ha \
- $(STDLIB)/ascii/strcmp.ha \
+ $(STDLIB)/ascii/string.ha \
$(STDLIB)/ascii/valid.ha
$(HARECACHE)/ascii/ascii-any.ssa: $(stdlib_ascii_any_srcs) $(stdlib_rt) $(stdlib_strings_$(PLATFORM))
@@ -3037,7 +3037,7 @@ testlib_uuid_freebsd = $(testlib_uuid_any)
# ascii (+any)
testlib_ascii_any_srcs = \
$(STDLIB)/ascii/ctype.ha \
- $(STDLIB)/ascii/strcmp.ha \
+ $(STDLIB)/ascii/string.ha \
$(STDLIB)/ascii/valid.ha
$(TESTCACHE)/ascii/ascii-any.ssa: $(testlib_ascii_any_srcs) $(testlib_rt) $(testlib_strings_$(PLATFORM))