encoding::base64: new design - hare - The Hare programming language

commit c408ee26e73c163a865811c019d4ef5070746c9b
parent 1ac8e33e5a1d133f66d7aefab106a1f848f79eab
Author: Ajay R <ar324@protonmail.com>
Date:   Mon, 21 Feb 2022 07:00:04 +0000

encoding::base64: new design

base64 now uses an 'encoding' struct to represent the encoding alphabet,
which is more convenient to use than an array of characters.

This commit also introduces stream-based encoding.

Several of the previous design's functions accepted streams as
arguments. This was totally reworked, because if you're going to be
working with streams, you might as well create an encoder/decoder and
use io::read/write/copy. The convenience functions now only deal with
either strings or byte slices:

* encodeslice, decodeslice
* encodestr, decodestr

I've also updated the README to match the current design.

Signed-off-by: Ajay R <ar324@protonmail.com>
Signed-off-by: Drew DeVault <sir@cmpwn.com>

Diffstat:
M encoding/base64/README  | 25 +++++++++++++------------
M encoding/base64/base64.ha  | 692 +++++++++++++++++++++++++++++++++++++++----------------------------------------
M scripts/gen-stdlib  | 2 +-
M stdlib.mk  | 4 ++--

4 files changed, 361 insertions(+), 362 deletions(-)
diff --git a/encoding/base64/README b/encoding/base64/README
@@ -1,17 +1,18 @@
-Implementation of the base 64 encoding as defined by RFC 4648.
+Implementation of the base64 encoding scheme as defined by RFC 4648.
 
-There are various functions available for decoding and encoding. The decode
-family accepts an [[io::handle]] as input, while the decodeslice and decodestr
-family of functions accept slices and strings as input, respectively.
-[[decode]] accepts an [[io::handle]] for the output, and [[decodeslice]] and
-[[decodestr]] dynamically allocate a slice or string to write the output to, and
-return it to the caller (who is then responsible for freeing it). The _static
-family of functions, such as [[decode_static]], accept a caller-allocated slice
-to write the output to. A similar set of functions is provided for encoding.
+A stream-based encoding and decoding interface is available via [[new_encoder]]
+and [[new_decoder]], which transparently encode or decode bytes to or from
+base64 when reading from or writing to an underlying I/O handle.
 
-Each function accepts the desired base64 alphabet as its first argument. You may
-provide your own alphabet, but [[standard]] and [[urlsafe]], as defined by the
-RFC, are provided for your convenience.
+Convenience functions for decoding to or from byte slices or strings are also
+available; see [[encodeslice]], [[decodeslice]], [[encodestr]], and
+[[decodestr]]. These functions dynamically allocate their return value; use the
+stream interface if you require static allocation.
+
+Each function accepts the desired base64 encoding alphabet as its first
+argument. [[std_encoding]] and [[url_encoding]], as defined by the RFC, are
+provided for your convenience, but you may create your own encoding using
+[[encoding_init]].
 
 Due to security concerns described by the RFC, this implementation rejects
 invalid padding.
diff --git a/encoding/base64/base64.ha b/encoding/base64/base64.ha
@@ -1,427 +1,425 @@
+use ascii;
 use bufio;
 use bytes;
+use errors;
 use io;
-use strio;
+use os;
 use strings;
-use errors;
-
-// RFC 4648 standard "base64" base 64 encoding alphabet.
-export const standard: []u8 = [
-	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
-	'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
-	'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
-	'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
-	'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
-	'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
-	'w', 'x', 'y', 'z', '0', '1', '2', '3',
-	'4', '5', '6', '7', '8', '9', '+', '/'
-];
 
-// RFC 4648 URL and filename safe "base64url" base 64 encoding alphabet.
-export const urlsafe: []u8 = [
-	'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
-	'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
-	'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
-	'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f',
-	'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
-	'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
-	'w', 'x', 'y', 'z', '0', '1', '2', '3',
-	'4', '5', '6', '7', '8', '9', '-', '_'
-];
+def PADDING: u8 = '=';
 
-// The padding character used at the end of encoding.
-export def PADDING: u8 = '=': u32: u8;
+export type encoding = struct {
+	encmap: [64]u8,
+	decmap: [256]u8,
+	valid: [256]bool,
+};
 
-// Encodes a byte slice using a base 64 encoding alphabet, with padding, and
-// writes it to an [[io::handle]]. The number of bytes written is returned.
-export fn encode(
-	alphabet: []u8,
-	sink: io::handle,
-	b: []u8
-) (size | io::error) = {
-	let z = 0z;
-	let i = 0z;
-	for (i + 2 < len(b); i += 3) {
-		z += io::write(sink, [
-			alphabet[b[i] >> 2],
-			alphabet[(b[i] & 0x3) << 4 | b[i + 1] >> 4],
-			alphabet[(b[i + 1] & 0xf) << 2 | b[i + 2] >> 6],
-			alphabet[b[i + 2] & 0x3F],
-		])?;
-	};
-	if (len(b) - i > 0) {
-		if (len(b) - i == 2) {
-			z += io::write(sink, [
-				alphabet[b[i] >> 2],
-				alphabet[(b[i] & 0x3) << 4 | b[i + 1] >> 4],
-				alphabet[(b[i + 1] & 0xf) << 2],
-				PADDING,
-			])?;
-		} else {
-			z += io::write(sink, [
-				alphabet[b[i] >> 2],
-				alphabet[(b[i] & 0x3) << 4],
-				PADDING,
-				PADDING,
-			])?;
-		};
+// Represents the standard base-64 encoding alphabet as defined in RFC 4648.
+export const std_encoding: encoding = encoding { ... };
+
+// Represents the "base64url" alphabet as defined in RFC 4648, suitable for use
+// in URLs and file paths.
+export const url_encoding: encoding = encoding { ... };
+
+// Initializes a new encoding based on the passed alphabet, which must be a
+// 64-byte ASCII string.
+export fn encoding_init(enc: *encoding, alphabet: str) void = {
+	const alphabet = strings::toutf8(alphabet);
+	assert(len(alphabet) == 64);
+	for (let i: u8 = 0; i < 64; i += 1) {
+		const ch = alphabet[i];
+		assert(ascii::isascii(ch: u32: rune));
+		enc.encmap[i] = ch;
+		enc.decmap[ch] = i;
+		enc.valid[ch] = true;
 	};
-	return z;
 };
 
-// Encodes a byte slice using a base 64 encoding alphabet, with padding, and
-// returns it. The caller must free the return value.
-export fn encodestr(alphabet: []u8, b: []u8) str = {
-	let sink = strio::dynamic();
-	encode(alphabet, &sink, b) as size;
-	return strio::string(&sink);
+@init fn init() void = {
+	const std_alpha: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+	const url_alpha: str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+	encoding_init(&std_encoding, std_alpha);
+	encoding_init(&url_encoding, url_alpha);
 };
 
-@test fn encode() void = {
-	const in: [_]u8 = ['f', 'o', 'o', 'b', 'a', 'r'];
-	const expect: [_]str = [
-		"",
-		"Zg==",
-		"Zm8=",
-		"Zm9v",
-		"Zm9vYg==",
-		"Zm9vYmE=",
-		"Zm9vYmFy"
-	];
-	for (let i = 0z; i <= len(in); i += 1) {
-		let s = encodestr(standard, in[..i]);
-		defer free(s);
-		assert(s == expect[i]);
-	};
+export type encoder = struct {
+	io::stream,
+	out: io::handle,
+	enc: *encoding,
+	buf: [2]u8, // leftover input
+	avail: size, // bytes available in buf
+	err: (void | io::error),
 };
 
-// Decodes base 64-encoded data in the given base 64 alphabet, with padding,
-// from an [[io::handle]]. The number of bytes written is returned.
-export fn decode(
-	alphabet: []u8,
-	in: io::handle,
+// Creates a stream that encodes writes as base64 before writing them to a
+// secondary stream. The encoder stream must be closed to finalize any unwritten
+// bytes. Closing this stream will not close the underlying stream.
+export fn new_encoder(
+	enc: *encoding,
 	out: io::handle,
-) (size | errors::invalid | io::error) = {
-	let dec = decoder(alphabet, in);
-	match (io::copy(out, &dec)) {
-	case let err: io::error =>
-		match (err) {
-		case errors::invalid =>
-			return errors::invalid;
-		case =>
-			return err;
-		};
-	case let s: size =>
-		return s;
+) encoder = {
+	return encoder {
+		writer = &encode_writer,
+		closer = &encode_closer,
+		out = out,
+		enc = enc,
+		err = void,
+		...
 	};
 };
 
-// Decodes base 64-encoded data in the given base 64 alphabet, with padding,
-// from an [[io::handle]]. The number of bytes written is returned.
-export fn decode_static(
-	alphabet: []u8,
-	out: []u8,
-	in: io::handle,
-) (size | errors::invalid | io::error) = {
-	let buf = bufio::fixed(out, io::mode::WRITE);
-	let dec = decoder(alphabet, in);
-	match (io::copy(&buf, &dec)) {
+fn encode_writer(
+	s: *io::stream,
+	in: const []u8
+) (size | io::error) = {
+	let s = s: *encoder;
+	match(s.err) {
 	case let err: io::error =>
-		match (err) {
-		case errors::invalid =>
-			return errors::invalid;
-		case =>
+		return err;
+	case void =>
+		yield;
+	};
+	let n = 0z; // number of bytes processed
+	let l = len(in);
+	let i = 0z;
+	for (i + 2 < l + s.avail; i += 3) {
+		static let b: [3]u8 = [0...]; // 3 bytes get converted into 4 bytes
+		if (i < s.avail) {
+			for (let j = 0z; j < s.avail; j += 1) {
+				b[j] = s.buf[i];
+			};
+			for (let j = s.avail; j < 3; j += 1) {
+				b[j] = in[j - s.avail];
+			};
+		} else {
+			for (let j = 0z; j < 3; j += 1) {
+				b[j] = in[j - s.avail + i];
+			};
+		};
+		let encb: [4]u8 = [
+			s.enc.encmap[b[0] >> 2],
+			s.enc.encmap[(b[0] & 0x3) << 4 | b[1] >> 4],
+			s.enc.encmap[(b[1] & 0xf) << 2 | b[2] >> 6],
+			s.enc.encmap[b[2] & 0x3F],
+		];
+		match(io::write(s.out, encb)) {
+		case let err: io::error =>
+			s.err = err;
 			return err;
+		case size =>
+			yield;
 		};
-	case let s: size =>
-		return s;
+		n += 3;
 	};
+	// storing leftover bytes
+	if (l + s.avail < 3) {
+		for (let j = s.avail; j < s.avail + l; j += 1) {
+			s.buf[j] = in[j - s.avail];
+		};
+	} else {
+		const begin = (l + s.avail) / 3 * 3;
+		for (let j = begin; j < l + s.avail; j += 1) {
+			s.buf[j - begin] = in[j - s.avail];
+		};
+	};
+	s.avail = (l + s.avail) % 3;
+	return n;
 };
 
-// Decodes a string of base 64-encoded data in the given base 64 encoding
-// alphabet, with padding, into a byte slice. The caller must free the return
-// value.
-export fn decodestr(alphabet: []u8, in: str) ([]u8 | errors::invalid) = {
-	return decodeslice(alphabet, strings::toutf8(in));
-};
-
-// Decodes a string of base 64-encoded data in the given base 64 encoding
-// alphabet, with padding. The number of bytes written is returned.
-export fn decodestr_static(
-	alphabet: []u8,
-	out: []u8,
-	in: str,
-) (size | errors::invalid) = {
-	return decodeslice_static(alphabet, out, strings::toutf8(in));
+fn encode_closer(s: *io::stream) void = {
+	let s = s: *encoder;
+	if (s.avail == 0) {
+		return;
+	};
+	static let b: [3]u8 = [0...]; // the 3 bytes that will be encoded into 4 bytes
+	for (let i = 0z; i < 3; i += 1) {
+		b[i] = if (i < s.avail) s.buf[i] else 0;
+	};
+	let encb: [4]u8 = [
+		s.enc.encmap[b[0] >> 2],
+		s.enc.encmap[(b[0] & 0x3) << 4 | b[1] >> 4],
+		s.enc.encmap[(b[1] & 0xf) << 2 | b[2] >> 6],
+		s.enc.encmap[b[2] & 0x3F],
+	];
+	// adding padding as input length was not a multiple of 3
+	//                        0  1  2
+	static const npa: []u8 = [0, 2, 1];
+	const np = npa[s.avail];
+	for (let i = 0z; i < np; i += 1) {
+		encb[3 - i] = PADDING;
+	};
+	io::write(s.out, encb)!; // TODO https://todo.sr.ht/~sircmpwn/hare/568
 };
 
-// Decodes a byte slice of base 64-encoded data in the given base 64 encoding
-// alphabet, with padding, into a byte slice. The caller must free the return
-// value.
-export fn decodeslice(alphabet: []u8, in: []u8) ([]u8 | errors::invalid) = {
+// Encodes a byte slice in base 64, using the given encoding, returning a slice
+// of ASCII bytes. The caller must free the return value.
+export fn encodeslice(enc: *encoding, in: []u8) []u8 = {
 	let out = bufio::dynamic(io::mode::WRITE);
-	let in = bufio::fixed(in, io::mode::READ);
-	let dec = decoder(alphabet, &in);
-	match (io::copy(&out, &dec)) {
-	case io::error =>
-		io::close(&out);
-		return errors::invalid;
-	case size =>
-		return bufio::buffer(&out);
-	};
+	let encoder = new_encoder(enc, &out);
+	io::write(&encoder, in)!;
+	io::close(&encoder);
+	return bufio::buffer(&out);
 };
 
-// Decodes a byte slice of base 64-encoded data in the given base 64 encoding
-// alphabet, with padding. The number of bytes written is returned.
-export fn decodeslice_static(
-	alphabet: []u8,
-	out: []u8,
-	in: []u8,
-) (size | errors::invalid) = {
-	let in = bufio::fixed(in, io::mode::READ);
-	match (decode_static(alphabet, out, &in)) {
-	case let s: size =>
-		return s;
-	case errors::invalid =>
-		return errors::invalid;
-	case =>
-		abort();
-	};
+// Encodes a byte slice in base 64, using the given encoding, returning a
+// string. The caller must free the return value.
+export fn encodestr(enc: *encoding, in: []u8) str = {
+	return strings::fromutf8(encodeslice(enc, in));
 };
 
-@test fn decode() void = {
-	const in: [_]str = [
+@test fn encode() void = {
+	// RFC 4648 test vectors
+	const in: [_]u8 = ['f', 'o', 'o', 'b', 'a', 'r'];
+	const expect: [_]str = [
 		"",
 		"Zg==",
 		"Zm8=",
 		"Zm9v",
 		"Zm9vYg==",
 		"Zm9vYmE=",
-		"Zm9vYmFy",
+		"Zm9vYmFy"
 	];
-	const expect: [_]u8 = ['f', 'o', 'o', 'b', 'a', 'r'];
-	for (let i = 0z; i < len(in); i += 1) {
-		let s = decodestr(standard, in[i]) as []u8;
+	for (let i = 0z; i <= len(in); i += 1) {
+		let out = bufio::dynamic(io::mode::WRITE);
+		let encoder = new_encoder(&std_encoding, &out);
+		io::write(&encoder, in[..i])!;
+		io::close(&encoder);
+		let encb = bufio::buffer(&out);
+		defer free(encb);
+		assert(bytes::equal(encb, strings::toutf8(expect[i])));
+
+		// Testing encodestr should cover encodeslice too
+		let s = encodestr(&std_encoding, in[..i]);
 		defer free(s);
-		assert(bytes::equal(s, expect[..i]));
-	};
-
-	const bad: [_]str = [
-		"A",
-		"AA",
-		"AAA",
-		"!!!!",
-		"====",
-		"A=A=",
-		"AA=A",
-		"Zg==Zg==",
-	];
-	for (let i = 0z; i < len(bad); i += 1) {
-		let result = decodestr(standard, bad[i]);
-		assert(result is errors::invalid);
+		assert(s == expect[i]);
 	};
 };
 
-const INVALID_OR_PAD: u8 = 255;
+export type decoder = struct {
+	io::stream,
+	in: io::handle,
+	enc: *encoding,
+	avail: []u8, // leftover decoded output
+	pad: bool, // if padding was seen in a previous read
+	state: (void | io::EOF | io::error),
+};
 
-// Initialize a new base64 decoder stream wrapping the given [[io::handle]]
-export fn decoder(alphabet: []u8, in: io::handle) decode_stream = {
-	let decoder: [256]u8 = [INVALID_OR_PAD...];
-	for (let i = 0z; i < len(alphabet); i += 1) {
-		decoder[alphabet[i]] = i: u8;
-	};
-	return decode_stream {
-		reader = &decodestream_reader,
-		input = in,
-		decoder = decoder,
+// Creates a stream that reads and decodes base 64 data from a secondary stream.
+// This stream does not need to be closed, and closing it will not close the
+// underlying stream.
+export fn new_decoder(
+	enc: *encoding,
+	in: io::handle,
+) decoder = {
+	return decoder {
+		reader = &decode_reader,
+		in = in,
+		enc = enc,
+		state = void,
 		...
 	};
 };
 
-// An stream interface for base64. Wraps an [[io::handle]] and does on-the-fly
-// decoding with calls to read().
-export type decode_stream = struct {
-	io::stream,
-	input: io::handle,
-	buf: [4]u8,
-	avail: size, // How many bytes are already decoded, but didn't fit in a previous read
-	waseof: bool,
-	decoder: [256]u8,
-};
-
-fn decodestream_reader(s: *io::stream, out: []u8) (size | io::EOF | io::error) = {
-	assert(len(out) > 0, "zero-length buffer provided");
-	let s = s : *decode_stream;
-	let z = 0z;
-	let decoder = s.decoder;
-	let buf = s.buf;
-
-	// We may have already decoded some bytes that couldn't be pushed out
-	// in a previous call to read.
-	if (s.avail > 0) {
-		z += if (len(out) < s.avail) len(out) else s.avail;
-		out[..z] = s.buf[..z];
-		s.avail -= z;
-		s.buf[..s.avail] = s.buf[z..z+s.avail];
-		out = out[z..];
-		if (len(out) == 0) {
-			return z;
-		};
+fn decode_reader(
+	s: *io::stream,
+	out: []u8
+) (size | io::EOF | io::error) = {
+	let s = s: *decoder;
+	let n = 0z;
+	let l = len(out);
+	match(s.state) {
+	case let err: (io::EOF | io ::error) =>
+		return err;
+	case void =>
+		yield;
 	};
-
-	if (s.waseof) {
-		return io::EOF;
+	if (len(s.avail) > 0) {
+		n += if (l < len(s.avail)) l else len(s.avail);
+		out[..n] = s.avail[0..n];
+		s.avail = s.avail[n..];
+		if (l == n) {
+			return n;
+		};
 	};
-
-	// If we get here, we have pushed out all cached bytes and are ready to
-	// read some more. Reset the internal buffer here.
-	buf = [INVALID_OR_PAD...];
-	for (true) match (io::read(s.input, buf)?) {
-	case size =>
-		for (let i = 0z; i < 2; i += 1) {
-			if (decoder[buf[i]] == INVALID_OR_PAD) {
-				return errors::invalid;
-			} else {
-				buf[i] = decoder[buf[i]];
-			};
+	static let buf: [os::BUFSIZ]u8 = [0...];
+	static let obuf: [os::BUFSIZ / 4 * 3]u8 = [0...];
+	const nn = ((l - n) / 3 + 1) * 4; // 4 extra bytes may be read.
+	let nr = 0z;
+	for (nr < nn) {
+		match (io::read(s.in, buf[nr..])) {
+		case let n: size =>
+			nr += n;
+		case io::EOF =>
+			s.state = io::EOF;
+			break;
+		case let err: io::error =>
+			s.state = err;
+			return err;
 		};
-
-		if (decoder[buf[2]] == INVALID_OR_PAD) {
-			if (buf[2] != PADDING) {
-				return errors::invalid;
-			};
-			if (buf[3] != PADDING) {
-				return errors::invalid;
-			};
-			s.buf[0] = buf[0] << 2 | buf[1] >> 4;
-			s.avail += 1;
-			// End of stream...
-			let extra: []u8 = [0];
-			match (io::read(s.input, extra)?) {
-			case size =>
-				return errors::invalid;
-			case io::EOF =>
-				s.waseof = true;
-				if (len(out) > 0) {
-					out[0] = s.buf[0];
-					z += 1;
-					s.avail = 0;
-				};
+	};
+	if (nr % 4 != 0) {
+		s.state = errors::invalid;
+		return errors::invalid;
+	};
+	if (nr == 0) { // io::EOF already set
+		return n;
+	};
+	// Validating read buffer
+	let valid = true;
+	let np = 0; // Number of padding chars.
+	let p = true; // Pad allowed in buf
+	for (let i = nr: i64 - 1; i >= 0; i -= 1) {
+		const ch = buf[i];
+		if (ch == PADDING) {
+			if(s.pad || !p) {
+				valid = false;
 				break;
 			};
+			np += 1;
 		} else {
-			buf[2] = decoder[buf[2]];
-		};
-
-		if (decoder[buf[3]] == INVALID_OR_PAD) {
-			if (buf[3] != PADDING) {
-				return errors::invalid;
-			};
-			s.buf[..2] = [
-				buf[0] << 2 | buf[1] >> 4,
-				buf[1] << 4 | buf[2] >> 2,
-			];
-			s.avail += 2;
-			let extra: []u8 = [0];
-			match (io::read(s.input, extra)?) {
-			case size =>
-				return errors::invalid;
-			case io::EOF =>
-				let n = if (len(out) < s.avail) len(out) else s.avail;
-				out[..n] = s.buf[..n];
-				s.avail -= n;
-				out = out[n..];
-				s.buf[..s.avail] = s.buf[n..n+s.avail];
-				s.waseof = true;
-				z += n;
+			if (!s.enc.valid[ch]) {
+				valid = false;
 				break;
 			};
-		} else {
-			buf[3] = decoder[buf[3]];
+			// Disallow padding on seeing a non-padding char
+			p = false;
 		};
+	};
+	valid = valid && np <= 2;
+	if (np > 0) {
+		s.pad = true;
+	};
+	if (!valid) {
+		s.state = errors::invalid;
+		return errors::invalid;
+	};
+	for (let i = 0z; i < nr; i += 1) {
+		buf[i] = s.enc.decmap[buf[i]];
+	};
+	for (let i = 0z, j = 0z; i < nr) {
+		obuf[j] = buf[i] << 2 | buf[i + 1] >> 4;
+		obuf[j + 1] = buf[i + 1] << 4 | buf[i + 2] >> 2;
+		obuf[j + 2] = buf[i + 2] << 6 | buf[i + 3];
 
-		s.buf[..3] = [
-			buf[0] << 2 | buf[1] >> 4,
-			buf[1] << 4 | buf[2] >> 2,
-			buf[2] << 6 | buf[3]
-		];
-		s.avail += 3;
-
-		let n = if (len(out) < s.avail) len(out) else s.avail;
-		out[..n] = s.buf[..n];
-		s.avail -= n;
-		out = out[n..];
-		s.buf[..s.avail] = s.buf[n..n+s.avail];
-		z += n;
+		i += 4;
+		j += 3;
+	};
+	// Removing bytes added due to padding.
+	//                         0  1  2 // np
+	static const npr: [3]u8 = [0, 1, 2]; // bytes to discard
+	const navl = nr / 4 * 3 - npr[np];
+	const rem = if(l - n < navl) l - n else navl;
+	for (let i = n; i < n + rem; i += 1) {
+		out[i] = obuf[i - n];
+	};
+	s.avail = obuf[rem..navl];
+	return n + rem;
+};
 
-		if (len(out) == 0) {
-			break;
-		};
-	case io::EOF =>
-		s.waseof = true;
-		if (z == 0) {
-			return io::EOF;
-		};
-		break;
+// Decodes a byte slice of ASCII-encoded base 64 data, using the given encoding,
+// returning a slice of decoded bytes. The caller must free the return value.
+export fn decodeslice(
+	enc: *encoding,
+	in: []u8,
+) ([]u8 | errors::invalid) = {
+	let in = bufio::fixed(in, io::mode::READ);
+	let decoder = new_decoder(enc, &in);
+	let out = bufio::dynamic(io::mode::WRITE);
+	match (io::copy(&out, &decoder)) {
+	case io::error =>
+		io::close(&out);
+		return errors::invalid;
+	case size =>
+		return bufio::buffer(&out);
 	};
-	return z;
 };
 
-@test fn decode_stream() void = {
+// Decodes a string of ASCII-encoded base 64 data, using the given encoding,
+// returning a slice of decoded bytes. The caller must free the return value.
+export fn decodestr(enc: *encoding, in: str) ([]u8 | errors::invalid) = {
+	return decodeslice(enc, strings::toutf8(in));
+};
 
-	const cases: [](str, str) = [
-		("Y2hhbmdlbQ==", "changem"),
-		("Y2hhbmdlbWU=", "changeme"),
-		("Y2hhbmdlbWVt", "changemem"),
+@test fn decode() void = {
+	// RFC 4648 test vectors
+	const cases: [_](str, str, *encoding) = [
+		("", "", &std_encoding),
+		("Zg==", "f", &std_encoding),
+		("Zm8=", "fo", &std_encoding),
+		("Zm9v", "foo", &std_encoding),
+		("Zm9vYg==", "foob", &std_encoding),
+		("Zm9vYmE=", "fooba", &std_encoding),
+		("Zm9vYmFy", "foobar", &std_encoding),
 	];
-
 	for (let i = 0z; i < len(cases); i += 1) {
-		let s = cases[i].0;
-		let expected = cases[i].1;
-
-		let b = strings::toutf8(s);
-		let input = bufio::fixed(b, io::mode::READ);
-
-		let dec = decoder(standard, &input);
-		defer io::close(&dec);
-
+		let in = bufio::fixed(strings::toutf8(cases[i].0), io::mode::READ);
+		let decoder = new_decoder(cases[i].2, &in);
 		let buf: [1]u8 = [0];
-		let out: []u8 = [];
-		defer free(out);
-
-		for (true) match (io::read(&dec, buf)!) {
+		let decb: []u8 = [];
+		defer free(decb);
+		for (true) match (io::read(&decoder, buf)!) {
 		case let z: size =>
-			append(out, buf[0]);
-			assert(z == 1);
+			if (z > 0) {
+				append(decb, buf[0]);
+			};
 		case io::EOF =>
 			break;
 		};
+		assert(bytes::equal(decb, strings::toutf8(cases[i].1)));
 
-		assert(bytes::equal(out, strings::toutf8(expected)));
+		// Testing decodestr should cover decodeslice too
+		let decb = decodestr(cases[i].2, cases[i].0) as []u8;
+		defer free(decb);
+		assert(bytes::equal(decb, strings::toutf8(cases[i].1)));
 	};
-
 	// Repeat of the above, but with a larger buffer
 	for (let i = 0z; i < len(cases); i += 1) {
-		let s = cases[i].0;
-		let expected = cases[i].1;
-
-		let b = strings::toutf8(s);
-		let input = bufio::fixed(b, io::mode::READ);
-
-		let dec = decoder(standard, &input);
-		defer io::close(&dec);
-
-		let buf: [24]u8 = [0...];
-		let out: []u8 = [];
-		defer free(out);
-
-		for (true) match (io::read(&dec, buf)!) {
+		let in = bufio::fixed(strings::toutf8(cases[i].0), io::mode::READ);
+		let decoder = new_decoder(cases[i].2, &in);
+		let buf: [1024]u8 = [0...];
+		let decb: []u8 = [];
+		defer free(decb);
+		for (true) match (io::read(&decoder, buf)!) {
 		case let z: size =>
-			append(out, buf[..z]...);
+			if (z > 0) {
+				append(decb, buf[..z]...);
+			};
+		case io::EOF =>
+			break;
+		};
+		assert(bytes::equal(decb, strings::toutf8(cases[i].1)));
+	};
+
+	const invalid: [_](str, *encoding) = [
+		// invalid padding
+		("=", &std_encoding),
+		("==", &std_encoding),
+		("===", &std_encoding),
+		("=====", &std_encoding),
+		("======", &std_encoding),
+		// invalid characters
+		("@Zg=", &std_encoding),
+		("êg==", &std_encoding),
+		// data after padding is encountered
+		("Zg==Zg==", &std_encoding),
+		("Zm8=Zm8=", &std_encoding),
+	];
+	for (let i = 0z; i < len(invalid); i += 1) {
+		let in = bufio::fixed(strings::toutf8(invalid[i].0), io::mode::READ);
+		let decoder = new_decoder(invalid[i].1, &in);
+		let buf: [1]u8 = [0...];
+		let valid = false;
+		for (true) match(io::read(&decoder, buf)) {
+		case errors::invalid=>
+			break;
+		case size =>
+			valid = true;
 		case io::EOF =>
 			break;
 		};
+		assert(valid == false, "valid is not false");
 
-		assert(bytes::equal(out, strings::toutf8(expected)));
+		// Testing decodestr should cover decodeslice too
+		assert(decodestr(invalid[i].1, invalid[i].0) is errors::invalid);
 	};
 };
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -422,7 +422,7 @@ dirs() {
 encoding_base64() {
 	gen_srcs encoding::base64 \
 		base64.ha
-	gen_ssa encoding::base64 bufio bytes io strio strings
+	gen_ssa encoding::base64 ascii bufio bytes errors io os strings
 }
 
 encoding_base32() {
diff --git a/stdlib.mk b/stdlib.mk
@@ -899,7 +899,7 @@ $(HARECACHE)/dirs/dirs-any.ssa: $(stdlib_dirs_any_srcs) $(stdlib_rt) $(stdlib_fs
 stdlib_encoding_base64_any_srcs= \
 	$(STDLIB)/encoding/base64/base64.ha
 
-$(HARECACHE)/encoding/base64/encoding_base64-any.ssa: $(stdlib_encoding_base64_any_srcs) $(stdlib_rt) $(stdlib_bufio_$(PLATFORM)) $(stdlib_bytes_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_strings_$(PLATFORM))
+$(HARECACHE)/encoding/base64/encoding_base64-any.ssa: $(stdlib_encoding_base64_any_srcs) $(stdlib_rt) $(stdlib_ascii_$(PLATFORM)) $(stdlib_bufio_$(PLATFORM)) $(stdlib_bytes_$(PLATFORM)) $(stdlib_errors_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_os_$(PLATFORM)) $(stdlib_strings_$(PLATFORM))
 	@printf 'HAREC \t$@\n'
 	@mkdir -p $(HARECACHE)/encoding/base64
 	@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nencoding::base64 \
@@ -2742,7 +2742,7 @@ $(TESTCACHE)/dirs/dirs-any.ssa: $(testlib_dirs_any_srcs) $(testlib_rt) $(testlib
 testlib_encoding_base64_any_srcs= \
 	$(STDLIB)/encoding/base64/base64.ha
 
-$(TESTCACHE)/encoding/base64/encoding_base64-any.ssa: $(testlib_encoding_base64_any_srcs) $(testlib_rt) $(testlib_bufio_$(PLATFORM)) $(testlib_bytes_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_strings_$(PLATFORM))
+$(TESTCACHE)/encoding/base64/encoding_base64-any.ssa: $(testlib_encoding_base64_any_srcs) $(testlib_rt) $(testlib_ascii_$(PLATFORM)) $(testlib_bufio_$(PLATFORM)) $(testlib_bytes_$(PLATFORM)) $(testlib_errors_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_os_$(PLATFORM)) $(testlib_strings_$(PLATFORM))
 	@printf 'HAREC \t$@\n'
 	@mkdir -p $(TESTCACHE)/encoding/base64
 	@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nencoding::base64 \

	hare The Hare programming language
	git clone https://git.torresjrjr.com/hare.git
	Log \| Files \| Refs \| README \| LICENSE

M	encoding/base64/README	\|	25	+++++++++++++------------
M	encoding/base64/base64.ha	\|	692	+++++++++++++++++++++++++++++++++++++++----------------------------------------
M	scripts/gen-stdlib	\|	2	+-
M	stdlib.mk	\|	4	++--