hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit e1f44d28d232640ea2553def2f7cf3418bd66278
parent f3015828b05cee7631259bfa0c8cfa55fe2f6dbe
Author: Autumn! <autumnull@posteo.net>
Date:   Sat,  8 Apr 2023 01:43:54 +0000

strings: change multireplace to work in a single pass

Signed-off-by: Autumn! <autumnull@posteo.net>

Diffstat:
Mstrings/replace.ha | 46++++++++++++++++++++++++++++------------------
1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/strings/replace.ha b/strings/replace.ha @@ -10,28 +10,36 @@ export fn replace(s: str, needle: str, target: str) str = { return multireplace(s, (needle, target)); }; -// For each tuple given by 'repls', a replacement of 's' is done like in -// [[replace]], in the order that they are passed as arguments. This function is -// nearly equivalent to calling [[replace]] multiple times, except only one new -// string is allocated. The caller must free the return value. +// Performs a replacement in 's' of each tuple given by 'repls'. Replacement +// occurs in a single pass of 's', and works like in [[replace]], except that +// replacement pairs found earlier in 'repls' will take precedence over later +// ones. For example: +// assert(multireplace("hello there", ("e", "a"), ("a", "x"), ("ell", "eww")) == "hallo thara"); +// assert(multireplace("hello there", ("ell", "eww"), ("e", "a")) == "hewwo thara"); +// The caller must free the return value. export fn multireplace(s: str, repls: (str, str)...) str = { - let res = toutf8(dup(s)); + let replb: []([]u8, []u8) = alloc([], len(repls)); + defer free(replb); for (let i = 0z; i < len(repls); i += 1) { - let needle = toutf8(repls[i].0); - let target = toutf8(repls[i].1); - let idx = 0z; - for (true) { - idx = match(bytes::index(res[idx..], needle)) { - case let s: size => - yield s + idx; - case void => - break; + static append(replb, (toutf8(repls[i].0), toutf8(repls[i].1))); + }; + let b = toutf8(s); + let res: []u8 = []; + let i = 0z; + let prev = 0z; // end of previous match, so we can append in chunks + for (i < len(b)) :step { + for (let j = 0z; j < len(replb); j += 1) { + if (bytes::hasprefix(b[i..], replb[j].0)) { + append(res, b[prev..i]...); + append(res, replb[j].1...); + i += len(replb[j].0); + prev = i; + continue :step; }; - delete(res[idx..idx + len(needle)]); - insert(res[idx], target...); - idx += len(target); }; + i += 1; }; + append(res, b[prev..i]...); return fromutf8(res)!; }; @@ -48,5 +56,7 @@ export fn multireplace(s: str, repls: (str, str)...) str = { assert(multireplace("Hello world", ("Hello", "Greetings"), ("world", "globe")) == "Greetings globe"); assert(multireplace("ababa", ("a", "ba"), ("b", "a"), ("a", "c")) == - "cccccccc"); + "baabaaba"); + assert(multireplace("hello there", ("e", "a"), ("a", "x"), ("ell", "eww")) == "hallo thara"); + assert(multireplace("hello there", ("ell", "eww"), ("e", "a")) == "hewwo thara"); };