hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 5b7f3ea99c224c8b050a79cfbe5a11c8f32c62b7
parent 548c618e08115fbffb5dece720d36a01d9b95a17
Author: Steven Guikal <void@fluix.one>
Date:   Mon, 19 Jul 2021 17:27:30 -0400

encoding::base64: new module

The acceptance of invalid padding is not implemented because of the
security considerations described in the RFC[1] and the fact that a
`no_padding` return type would make verifying that no padding was used
more difficult[2]. Should this be desired, a dedicated alphabet type or
set of flags allowing explicitly no padding, any padding, or (perhaps)
any invalid characters at all would be more clear

[1]https://datatracker.ietf.org/doc/html/rfc4648#section-12
[2]One would have to check that either `no_padding` was returned or, if it
wasn't, that the length of the data is a multiple of 4 and thus didn't
have padding in the first place. The latter part is important otherwise
an attacker could covertly send padding.

Signed-off-by: Steven Guikal <void@fluix.one>

Diffstat:
Aencoding/base64/README | 5+++++
Aencoding/base64/base64.ha | 348+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 7+++++++
Mstdlib.mk | 28++++++++++++++++++++++++++++
4 files changed, 388 insertions(+), 0 deletions(-)

diff --git a/encoding/base64/README b/encoding/base64/README @@ -0,0 +1,5 @@ +Implementation of the base 64 encoding as per RFC 4648. This implementation +does not support invalid padding due to security concerns described in the +RFC. + +https://datatracker.ietf.org/doc/html/rfc4648#section-12 diff --git a/encoding/base64/base64.ha b/encoding/base64/base64.ha @@ -0,0 +1,348 @@ +use bufio; +use bytes; +use io; +use strio; +use strings; + +// RFC 4648 standard "base64" base 64 encoding alphabet. +export const standard: []u8 = [ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', + 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '+', '/' +]; + +// RFC 4648 URL and filename safe "base64url" base 64 encoding alphabet. +export const urlsafe: []u8 = [ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', + 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', + 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', + 'w', 'x', 'y', 'z', '0', '1', '2', '3', + '4', '5', '6', '7', '8', '9', '-', '_' +]; + +// The padding character used at the end of encoding. +export def PADDING: u8 = '=': u32: u8; + +// Indicates that invalid input was found while decoding, either in the form of +// characters outside of the base 64 alphabet, insufficient padding, or trailing +// characters. Contains the index of the first invalid character. +export type invalid = !size; + +// Encodes a byte slice using a base 64 encoding alphabet, with padding, and +// writes it to an [[io::stream]]. The number of bytes written is returned. +export fn encode( + alphabet: []u8, + sink: *io::stream, + b: []u8 +) (size | io::error) = { + let z = 0z; + let i = 0z; + for (i + 2 < len(b); i += 3) { + z += io::write(sink, [ + alphabet[b[i] >> 2], + alphabet[(b[i] & 0x3) << 4 | b[i + 1] >> 4], + alphabet[(b[i + 1] & 0xf) << 2 | b[i + 2] >> 6], + alphabet[b[i + 2] & 0x3F], + ])?; + }; + if (len(b) - i > 0) { + if (len(b) - i == 2) { + z += io::write(sink, [ + alphabet[b[i] >> 2], + alphabet[(b[i] & 0x3) << 4 | b[i + 1] >> 4], + alphabet[(b[i + 1] & 0xf) << 2], + PADDING, + ])?; + } else { + z += io::write(sink, [ + alphabet[b[i] >> 2], + alphabet[(b[i] & 0x3) << 4], + PADDING, + PADDING, + ])?; + }; + }; + return z; +}; + +// Calls [[encode]] with the [[standard]] base 64 encoding alphabet. +export fn stdencode( + sink: *io::stream, + b: []u8, +) (size | io::error) = encode(standard, sink, b); + +// Calls [[encode]] with the [[urlsafe]] base 64 encoding alphabet. +export fn urlencode( + sink: *io::stream, + b: []u8, +) (size | io::error) = encode(urlsafe, sink, b); + +// Encodes a byte slice using a base 64 encoding alphabet, with padding, and +// returns it. The caller must free the return value. +export fn encodestr(alphabet: []u8, b: []u8) str = { + let sink = strio::dynamic(); + encode(alphabet, sink, b) as size; + return strio::finish(sink); +}; + +// Calls [[encodestr]] with the [[standard]] base 64 encoding alphabet. +export fn stdencodestr(b: []u8) str = encodestr(standard, b); + +// Calls [[encodestr]] with the [[urlsafe]] base 64 encoding alphabet. +export fn urlencodestr(b: []u8) str = encodestr(urlsafe, b); + +@test fn encode() void = { + const in: [_]u8 = ['f', 'o', 'o', 'b', 'a', 'r']; + const expect: [_]str = [ + "", + "Zg==", + "Zm8=", + "Zm9v", + "Zm9vYg==", + "Zm9vYmE=", + "Zm9vYmFy" + ]; + for (let i = 0z; i < len(in); i += 1) { + let s = encodestr(standard, in[..i]); + defer free(s); + assert(s == expect[i]); + }; +}; + +// Decodes base 64-encoded data in the given base 64 alphabet, with padding, +// from an [[io::stream]]. The number of bytes written is returned. +export fn decode( + alphabet: []u8, + in: *io::stream, + out: *io::stream, +) (size | invalid | io::error) = { + const INVALID_OR_PAD = 255u8; + let decoder: [256]u8 = [INVALID_OR_PAD...]; + for (let i = 0z; i < len(alphabet); i += 1) { + decoder[alphabet[i]] = i: u8; + }; + + let count = 0z; + let z = 0z; + for (true) { + let buf: [4]u8 = [0...]; + match (io::read(in, buf)) { + size => { + for (let i = 0z; i < 2; i += 1) { + if (decoder[buf[i]] == INVALID_OR_PAD) { + return (count + i): invalid; + } else { + buf[i] = decoder[buf[i]]; + }; + }; + + if (decoder[buf[2]] == INVALID_OR_PAD) { + if (buf[2] != PADDING) { + return (count + 2z): invalid; + }; + if (buf[3] != PADDING) { + return (count + 3z): invalid; + }; + z += io::write(out, [ + buf[0] << 2 | buf[1] >> 4, + ])?; + let extra: []u8 = [0]; + return match (io::read(in, extra)) { + size => (count + 4z): invalid, + io::EOF => z, + }; + } else { + buf[2] = decoder[buf[2]]; + }; + + if (decoder[buf[3]] == INVALID_OR_PAD) { + if (buf[3] != PADDING) { + return (count + 3z): invalid; + }; + z += io::write(out, [ + buf[0] << 2 | buf[1] >> 4, + buf[1] << 4 | buf[2] >> 2, + ])?; + let extra: []u8 = [0]; + return match (io::read(in, extra)) { + size => (count + 4z): invalid, + io::EOF => z, + }; + } else { + buf[3] = decoder[buf[3]]; + }; + + z += io::write(out, [ + buf[0] << 2 | buf[1] >> 4, + buf[1] << 4 | buf[2] >> 2, + buf[2] << 6 | buf[3], + ])?; + count += 4; + }, + io::EOF => { + break; + }, + }; + }; + return z; +}; + +// Calls [[decode]] with the [[standard]] base 64 encoding alphabet. +export fn stddecode( + in: *io::stream, + out: *io::stream, +) (size | invalid | io::error) = decode(standard, in, out); + +// Calls [[decode]] with the [[urlsafe]] base 64 encoding alphabet. +export fn urldecode( + in: *io::stream, + out: *io::stream, +) (size | invalid | io::error) = decode(urlsafe, in, out); + +// Decodes base 64-encoded data in the given base 64 alphabet, with padding, +// from an [[io::stream]]. Every four input bytes are guaranteed to either be +// decoded into the user-provided buffer or return [[invalid]]. The number of +// bytes written is returned. +export fn decode_static( + alphabet: []u8, + out: []u8, + in: *io::stream, +) (size | invalid) = { + let buf = bufio::fixed(out, io::mode::WRITE); + defer io::close(buf); + return match (decode(alphabet, in, buf)) { + io::error => abort(), + z: invalid => z: invalid, + z: size => z, + }; +}; + +// Calls [[decode_static]] with the [[standard]] base 64 encoding alphabet. +export fn stddecode_static(out: []u8, in: *io::stream) (size | invalid) = { + return decode_static(standard, out, in); +}; + +// Calls [[decode_static]] with the [[urlsafe]] base 64 encoding alphabet. +export fn urldecode_static(out: []u8, in: *io::stream) (size | invalid) = { + return decode_static(urlsafe, out, in); +}; + +// Decodes a string of base 64-encoded data in the given base 64 encoding +// alphabet, with padding, into a byte slice. The caller must free the return +// value. +export fn decodestr(alphabet: []u8, in: str) ([]u8 | invalid) = { + return decodeslice(alphabet, strings::toutf8(in)); +}; + +// Calls [[decodestr]] with the [[standard]] base 64 encoding alphabet. +export fn stddecodestr(in: str) ([]u8 | invalid) = decodestr(standard, in); + +// Calls [[decodestr]] with the [[urlsafe]] base 64 encoding alphabet. +export fn urldecodestr(in: str) ([]u8 | invalid) = decodestr(urlsafe, in); + +// Decodes a string of base 64-encoded data in the given base 64 encoding +// alphabet, with padding. Every four input bytes are guaranteed to either be +// decoded into the user-provided buffer or return [[invalid]]. The number of +// bytes written is returned. +export fn decodestr_static( + alphabet: []u8, + out: []u8, + in: str, +) (size | invalid) = { + return decodeslice_static(alphabet, out, strings::toutf8(in)); +}; + +// Calls [[decodestr_static]] with the [[standard]] base 64 encoding alphabet. +export fn stddecodestr_static(out: []u8, in: str) (size | invalid) = { + return decodestr_static(standard, out, in); +}; + +// Calls [[decodestr_static]] with the [[urlsafe]] base 64 encoding alphabet. +export fn urldecodestr_static(out: []u8, in: str) (size | invalid) = { + return decodestr_static(urlsafe, out, in); +}; + +// Decodes a byte slice of base 64-encoded data in the given base 64 encoding +// alphabet, with padding, into a byte slice. The caller must free the return +// value. +export fn decodeslice(alphabet: []u8, in: []u8) ([]u8 | invalid) = { + let out = bufio::dynamic(io::mode::WRITE); + let in = bufio::fixed(in, io::mode::READ); + defer io::close(in); + return match (decode(alphabet, in, out)) { + io::error => abort(), + z: invalid => z: invalid, + size => bufio::finish(out), + }; +}; + +// Calls [[decodeslice]] with the [[standard]] base 64 encoding alphabet. +export fn stddecodeslice(in: []u8) ([]u8 | invalid) = decodeslice(standard, in); + +// Calls [[decodeslice]] with the [[urlsafe]] base 64 encoding alphabet. +export fn urldecodeslice(in: []u8) ([]u8 | invalid) = decodeslice(urlsafe, in); + +// Decodes a byte slice of base 64-encoded data in the given base 64 encoding +// alphabet, with padding. Every four input bytes are guaranteed to either be +// decoded into the user-provided buffer or return [[invalid]]. The number of +// bytes written is returned. +export fn decodeslice_static( + alphabet: []u8, + out: []u8, + in: []u8, +) (size | invalid) = { + let in = bufio::fixed(in, io::mode::READ); + defer io::close(in); // bufio::finish? + return decode_static(alphabet, out, in); +}; + +// Calls [[decodeslice_static]] with the [[standard]] base 64 encoding alphabet. +export fn stddecodeslice_static(out: []u8, in: []u8) (size | invalid) = { + return decodeslice_static(standard, out, in); +}; + +// Calls [[decodeslice_static]] with the [[urlsafe]] base 64 encoding alphabet. +export fn urldecodeslice_static(out: []u8, in: []u8) (size | invalid) = { + return decodeslice_static(urlsafe, out, in); +}; + +@test fn decode() void = { + const in: [_]str = [ + "", + "Zg==", + "Zm8=", + "Zm9v", + "Zm9vYg==", + "Zm9vYmE=", + "Zm9vYmFy", + ]; + const expect: [_]u8 = ['f', 'o', 'o', 'b', 'a', 'r']; + for (let i = 0z; i < len(in); i += 1) { + let s = stddecodestr(in[i]) as []u8; + defer free(s); + assert(bytes::equal(s, expect[..i])); + }; + + const bad: [_]str = [ + "A", + "AA", + "AAA", + "!!!!", + "====", + "A=A=", + "AA=A", + "Zg==Zg==", + ]; + const badindex: [_]size = [1, 2, 3, 0, 0, 1, 3, 4]; + for (let i = 0z; i < len(bad); i += 1) { + assert(stddecodestr(bad[i]) as invalid == badindex[i]: invalid); + }; +}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -244,6 +244,12 @@ dirs() { gen_ssa dirs fs os path } +encoding_base64() { + gen_srcs encoding::base64 \ + base64.ha + gen_ssa encoding::base64 bufio bytes io strio strings +} + encoding_hex() { gen_srcs encoding::hex \ hex.ha @@ -827,6 +833,7 @@ crypto::sha1 crypto::sha256 crypto::sha512 dirs +encoding::base64 encoding::hex encoding::utf8 endian diff --git a/stdlib.mk b/stdlib.mk @@ -129,6 +129,10 @@ hare_stdlib_deps+=$(stdlib_crypto_sha512) stdlib_dirs=$(HARECACHE)/dirs/dirs.o hare_stdlib_deps+=$(stdlib_dirs) +# gen_lib encoding::base64 +stdlib_encoding_base64=$(HARECACHE)/encoding/base64/encoding_base64.o +hare_stdlib_deps+=$(stdlib_encoding_base64) + # gen_lib encoding::hex stdlib_encoding_hex=$(HARECACHE)/encoding/hex/encoding_hex.o hare_stdlib_deps+=$(stdlib_encoding_hex) @@ -489,6 +493,16 @@ $(HARECACHE)/dirs/dirs.ssa: $(stdlib_dirs_srcs) $(stdlib_rt) $(stdlib_fs) $(stdl @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Ndirs \ -t$(HARECACHE)/dirs/dirs.td $(stdlib_dirs_srcs) +# encoding::base64 +stdlib_encoding_base64_srcs= \ + $(STDLIB)/encoding/base64/base64.ha + +$(HARECACHE)/encoding/base64/encoding_base64.ssa: $(stdlib_encoding_base64_srcs) $(stdlib_rt) $(stdlib_bufio) $(stdlib_bytes) $(stdlib_io) $(stdlib_strio) $(stdlib_strings) + @printf 'HAREC \t$@\n' + @mkdir -p $(HARECACHE)/encoding/base64 + @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nencoding::base64 \ + -t$(HARECACHE)/encoding/base64/encoding_base64.td $(stdlib_encoding_base64_srcs) + # encoding::hex stdlib_encoding_hex_srcs= \ $(STDLIB)/encoding/hex/hex.ha @@ -1297,6 +1311,10 @@ hare_testlib_deps+=$(testlib_crypto_sha512) testlib_dirs=$(TESTCACHE)/dirs/dirs.o hare_testlib_deps+=$(testlib_dirs) +# gen_lib encoding::base64 +testlib_encoding_base64=$(TESTCACHE)/encoding/base64/encoding_base64.o +hare_testlib_deps+=$(testlib_encoding_base64) + # gen_lib encoding::hex testlib_encoding_hex=$(TESTCACHE)/encoding/hex/encoding_hex.o hare_testlib_deps+=$(testlib_encoding_hex) @@ -1664,6 +1682,16 @@ $(TESTCACHE)/dirs/dirs.ssa: $(testlib_dirs_srcs) $(testlib_rt) $(testlib_fs) $(t @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Ndirs \ -t$(TESTCACHE)/dirs/dirs.td $(testlib_dirs_srcs) +# encoding::base64 +testlib_encoding_base64_srcs= \ + $(STDLIB)/encoding/base64/base64.ha + +$(TESTCACHE)/encoding/base64/encoding_base64.ssa: $(testlib_encoding_base64_srcs) $(testlib_rt) $(testlib_bufio) $(testlib_bytes) $(testlib_io) $(testlib_strio) $(testlib_strings) + @printf 'HAREC \t$@\n' + @mkdir -p $(TESTCACHE)/encoding/base64 + @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nencoding::base64 \ + -t$(TESTCACHE)/encoding/base64/encoding_base64.td $(testlib_encoding_base64_srcs) + # encoding::hex testlib_encoding_hex_srcs= \ $(STDLIB)/encoding/hex/hex.ha