rewrite lex_literal - harec - [hare] Hare compiler, written in C11 for POSIX OSs

commit c76479f8797dc039116561447c8e16bdc23bc4c5
parent a1021d0577c993b65da4a19438def0cc7368ef64
Author: Bor Grošelj Simić <bgs@turminal.net>
Date:   Fri, 10 Mar 2023 02:01:56 +0100

rewrite lex_literal

The old version grew another boolean variable for the state with every new
feature that was added. The state is now condensed into base information
plus 4 flags that work together nicely. Actual conversion to numbers is
also greatly simplified and the memory issues that the old one had are
avoided.

Signed-off-by: Bor Grošelj Simić <bgs@turminal.net>

Diffstat:
M rt/cstrings.ha  | 2 ++
M src/lex.c  | 359 ++++++++++++++++++++++++++++++++++---------------------------------------------
M tests/00-constants.ha  | 317 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------

3 files changed, 437 insertions(+), 241 deletions(-)
diff --git a/rt/cstrings.ha b/rt/cstrings.ha
@@ -4,6 +4,8 @@ type string = struct {
 	capacity: size,
 };
 
+export fn toutf8(s: str) []u8 = *(&s: *[]u8);
+
 fn constchar(s: str) *const char = {
 	let s = &s: *string;
 	return s.data: *const char;
diff --git a/src/lex.c b/src/lex.c
@@ -300,7 +300,7 @@ lex_name(struct lexer *lexer, struct token *out)
 }
 
 static uintmax_t
-compute_exp(uintmax_t n, int exponent, bool _signed, struct location *loc)
+compute_exp(uintmax_t n, int exponent, bool _signed)
 {
 	if (n == 0) {
 		return 0;
@@ -309,246 +309,192 @@ compute_exp(uintmax_t n, int exponent, bool _signed, struct location *loc)
 		uintmax_t old = n;
 		n *= 10;
 		if (n / 10 != old) {
-			error(loc, "Integer literal overflow");
+			errno = ERANGE;
+			return INT64_MAX;
 		}
 	}
 	if (_signed && n > (uintmax_t)INT64_MIN) {
-		error(loc, "Integer literal overflow");
+		errno = ERANGE;
+		return INT64_MAX;
 	}
 	return n;
 }
 
-static uint32_t
+static void
 lex_literal(struct lexer *lexer, struct token *out)
 {
-	uint32_t c = next(lexer, &out->loc, true);
-	assert(c != C_EOF && c <= 0x7F && isdigit(c));
+	enum bases {
+		BIN = 1, OCT, HEX, DEC = 0x07, MASK = DEC
+	};
+	static_assert((BIN | OCT | HEX | DEC) == DEC, "DEC bits must be a superset of all other bases");
+	enum flags {
+		FLT = 3, EXP, SUFF, DIG,
+	};
+
+	static const char chrs[][24] = {
+		[BIN] = "01",
+		[OCT] = "01234567",
+		[DEC] = "0123456789",
+		[HEX] = "0123456789abcdefABCDEF",
+	};
 
-	bool started = false, leadingzero = false;
-	int base = 10;
-	const char *basechrs = "0123456789";
+	static const char matching_states[0x80][6] = {
+		['.'] = {DEC, /*HEX,*/ 0},
+		['e'] = {DEC, DEC | 1<<FLT, 0},
+		['+'] = {DEC | 1<<EXP | 1<<DIG, DEC | 1<<FLT | 1<<EXP | 1<<DIG, 0},
+		['-'] = {DEC | 1<<EXP | 1<<DIG, DEC | 1<<FLT | 1<<EXP | 1<<DIG, 0},
+		['i'] = {BIN, OCT, HEX, DEC, DEC | 1<<EXP, 0},
+		['u'] = {BIN, OCT, HEX, DEC, DEC | 1<<EXP, 0},
+		['z'] = {BIN, OCT, HEX, DEC, DEC | 1<<EXP, 0},
+		['f'] = {DEC, DEC | 1<<FLT, DEC | 1<<EXP, DEC | 1<<FLT | 1<<EXP, 0},
+	};
+	int state = DEC, base = 10, oldstate = DEC;
+	uint32_t c = next(lexer, &out->loc, true), last = 0;
+	assert(c != C_EOF && c <= 0x7F && isdigit(c));
 	if (c == '0') {
-		switch ((c = next(lexer, NULL, true))) {
-		case 'b':
+		c = next(lexer, NULL, true);
+		if (c <= 0x7F && isdigit(c)) {
+			error(&out->loc, "Leading zero in base 10 literal");
+		} else if (c == 'b') {
+			state = BIN | 1 << DIG;
 			base = 2;
-			basechrs = "01";
-			consume(lexer, 2);
-			break;
-		case 'o':
+		} else if (c == 'o') {
+			state = OCT | 1 << DIG;
 			base = 8;
-			basechrs = "01234567";
-			consume(lexer, 2);
-			break;
-		case 'x':
+		} else if (c == 'x') {
+			state = HEX | 1 << DIG;
 			base = 16;
-			basechrs = "0123456789ABCDEFabcdef";
-			consume(lexer, 2);
-			break;
-		default:
-			started = true;
-			leadingzero = true;
-			push(lexer, c, true);
-			break;
 		}
-	} else {
-		started = true;
 	}
-
-	char *suff = NULL;
-	char *exp = NULL;
-	bool isfloat = false;
-	while ((c = next(lexer, NULL, true)) != C_EOF) {
-		if (!strchr(basechrs, c)) {
-			switch (c) {
-			case '.':
-				if (!started) {
-					push(lexer, c, true);
-					goto finalize;
-				}
-				if (lexer->require_int) {
-					push(lexer, '.', true);
-					goto finalize;
-				}
-				if (isfloat || suff || exp) {
-					push(lexer, c, true);
-					goto finalize;
-				}
-				if (!strchr(basechrs, c = next(lexer, NULL, false))) {
-					push(lexer, c, false);
-					push(lexer, '.', true);
-					goto finalize;
-				} else {
-					push(lexer, c, false);
-				}
-				isfloat = true;
-				break;
-			case 'e':
-				if (!started) {
-					push(lexer, c, true);
-					goto finalize;
-				}
-				if (exp || suff) {
-					push(lexer, c, true);
-					goto finalize;
-				}
-				// exponent is always in base 10
-				basechrs = "0123456789";
-				c = next(lexer, NULL, true);
-				if (c != '-' && c != '+' && !strchr(basechrs, c)) {
-					push(lexer, c, true);
-					push(lexer, 'e', true);
-					goto finalize;
-				};
-				exp = &lexer->buf[lexer->buflen - 1];
-				break;
-			case 'f':
-				if (base != 10) {
-					push(lexer, c, true);
-					goto finalize;
-				}
-				// Fallthrough
-			case 'i':
-			case 'u':
-			case 'z':
-				if (suff || !started) {
-					push(lexer, c, true);
-					goto finalize;
-				}
-				suff = &lexer->buf[lexer->buflen - 1];
-				basechrs = "0123456789";
-				break;
-			default:
-				push(lexer, c, true);
-				goto finalize;
+	if (state != DEC) {
+		last = c;
+		c = next(lexer, NULL, true);
+	}
+	size_t exp = 0, suff = 0;
+	do {
+		if (strchr(chrs[state & MASK], c)) {
+			state &= ~(1 << DIG);
+			last = c;
+			continue;
+		} else if (c > 0x7f || !strchr(matching_states[c], state)) {
+			goto end;
+		}
+		oldstate = state;
+		switch (c) {
+		case '.':
+			if (lexer->require_int) {
+				goto want_int;
 			}
+			state |= 1 << FLT;
+			break;
+		case '-':
+			state |= 1 << FLT;
+			/* fallthrough */
+		case 'e':
+		case '+':
+			state |= 1 << EXP;
+			exp = lexer->buflen - 1;
+			break;
+		case 'f':
+			state |= 1 << FLT;
+			/* fallthrough */
+		case 'i':
+		case 'u':
+		case 'z':
+			state |= DEC | 1 << SUFF;
+			suff = lexer->buflen - 1;
+			break;
+		default:
+			goto end;
 		}
-		started = true;
-	}
-
-finalize:
-	if (!started) {
-		error(&out->loc, "Invalid literal");
-	}
-	if (leadingzero && lexer->buflen >= 2 && strchr(basechrs, lexer->buf[1])) {
-		error(&out->loc, "Leading zero in base 10 literal");
+		if (state & 1 << FLT && lexer->require_int) {
+			error(&out->loc, "Expected integer literal");
+		}
+		last = c;
+		state |= 1 << DIG;
+	} while ((c = next(lexer, NULL, true)) != C_EOF);
+	last = 0;
+end:
+	if (last && !strchr("iuz", last) && !strchr(chrs[state & MASK], last)) {
+		state = oldstate;
+		push(lexer, c, true);
+		push(lexer, last, true);
+	} else if (c != C_EOF) {
+want_int:
+		push(lexer, c, true);
 	}
-	lexer->require_int = false;
 	out->token = T_LITERAL;
-	if (isfloat) {
-		out->storage = STORAGE_FCONST;
-	} else {
-		out->storage = STORAGE_ICONST;
-	}
-	if (suff) {
-		const char *suffs[] = {
-			[STORAGE_U8] = "u8",
-			[STORAGE_U16] = "u16",
-			[STORAGE_U32] = "u32",
-			[STORAGE_U64] = "u64",
-			[STORAGE_I8] = "i8",
-			[STORAGE_I16] = "i16",
-			[STORAGE_I32] = "i32",
-			[STORAGE_I64] = "i64",
+	lexer->require_int = false;
 
-			[STORAGE_UINT] = "u",
-			[STORAGE_INT] = "i",
-			[STORAGE_SIZE] = "z",
-			[STORAGE_F32] = "f32",
-			[STORAGE_F64] = "f64",
-		};
-		bool isvalid = false;
-		for (enum type_storage i = 0;
-				i < sizeof(suffs) / sizeof(suffs[0]); ++i) {
-			if (suffs[i] && strcmp(suff, suffs[i]) == 0) {
-				isvalid = true;
-				out->storage = i;
+	enum kind {
+		UNKNOWN = -1,
+		ICONST, SIGNED, UNSIGNED, FLOAT
+	} kind = UNKNOWN;
+	static const struct {
+		const char suff[4];
+		enum kind kind;
+		enum type_storage storage;
+	} storages[] = {
+		{"f32", FLOAT, STORAGE_F32},
+		{"f64", FLOAT, STORAGE_F64},
+		{"i", SIGNED, STORAGE_INT},
+		{"i16", SIGNED, STORAGE_I16},
+		{"i32", SIGNED, STORAGE_I32},
+		{"i64", SIGNED, STORAGE_I64},
+		{"i8", SIGNED, STORAGE_I8},
+		{"u", UNSIGNED, STORAGE_UINT},
+		{"u16", UNSIGNED, STORAGE_U16},
+		{"u32", UNSIGNED, STORAGE_U32},
+		{"u64", UNSIGNED, STORAGE_U64},
+		{"u8", UNSIGNED, STORAGE_U8},
+		{"z", UNSIGNED, STORAGE_SIZE},
+	};
+	if (suff) {
+		for (size_t i = 0; i < sizeof storages / sizeof storages[0]; i++) {
+			if (!strcmp(storages[i].suff, lexer->buf + suff)) {
+				out->storage = storages[i].storage;
+				kind = storages[i].kind;
 				break;
 			}
 		}
-		if (!isvalid) {
-			error(&out->loc, "Invalid numeric suffix");
+		if (kind == UNKNOWN) {
+			error(&out->loc, "Invalid suffix '%s'", lexer->buf + suff);
 		}
 	}
-
-	intmax_t exponent = 0;
-	if (exp) {
-		char *endptr = NULL;
-		errno = 0;
-		exponent = strtoimax(exp, &endptr, 10);
-		if (errno == ERANGE) {
-			error(&out->loc, "Numerical exponent overflow");
-		}
-		// integers can't have negative exponents
-		if (exponent < 0 && !suff) {
+	if (state & 1 << FLT) {
+		if (kind == UNKNOWN) {
 			out->storage = STORAGE_FCONST;
-		}
-		enum type_storage s = out->storage;
-		bool valid = exponent >= 0
-			|| s == STORAGE_F32
-			|| s == STORAGE_F64
-			|| s == STORAGE_FCONST;
-		if (endptr == exp || !valid) {
-			error(&out->loc, "Integers cannot have negative exponents");
-		}
-	}
-
-	if (isfloat) {
-		switch (out->storage) {
-		case STORAGE_F32:
-		case STORAGE_F64:
-		case STORAGE_FCONST:
-			break;
-		default:
+		} else if (kind != FLOAT) {
 			error(&out->loc, "Unexpected decimal point in integer literal");
 		}
+		out->fval = strtod(lexer->buf, NULL);
+		consume(lexer, -1);
+		return;
 	}
 
+	if (kind == UNKNOWN) {
+		kind = ICONST;
+		out->storage = STORAGE_ICONST;
+	}
+	uintmax_t exponent = 0;
 	errno = 0;
-	switch (out->storage) {
-	case STORAGE_U8:
-	case STORAGE_U16:
-	case STORAGE_U32:
-	case STORAGE_UINT:
-	case STORAGE_U64:
-	case STORAGE_SIZE:
-		out->uval = compute_exp(strtoumax(lexer->buf, NULL, base),
-				exponent, false, &out->loc);
-		break;
-	case STORAGE_ICONST:
-		out->uval = compute_exp(strtoumax(lexer->buf, NULL, base),
-				exponent, false, &out->loc);
-		if (out->uval > (uintmax_t)INT64_MAX) {
-			out->storage = STORAGE_U64;
-			break;
-		}
-		// Fallthrough
-	case STORAGE_I8:
-	case STORAGE_I16:
-	case STORAGE_I32:
-	case STORAGE_INT:
-	case STORAGE_I64:
-		out->uval = compute_exp(strtoumax(lexer->buf, NULL, base),
-				exponent, true, &out->loc);
-		if (out->uval == (uintmax_t)INT64_MIN) {
-			// XXX: Hack
-			out->ival = INT64_MIN;
-		} else {
-			out->ival = (intmax_t)out->uval;
-		}
-		break;
-	case STORAGE_F32:
-	case STORAGE_F64:
-	case STORAGE_FCONST:
-		out->fval = strtod(lexer->buf, NULL);
-		break;
-	default:
-		assert(0);
+	if (exp != 0) {
+		exponent = strtoumax(lexer->buf + exp + 1, NULL, 10);
 	}
-	if (errno == ERANGE && !isfloat) {
+	out->uval = strtoumax(lexer->buf + (base == 10 ? 0 : 2), NULL, base);
+	out->uval = compute_exp(out->uval, exponent, kind == SIGNED);
+	if (errno == ERANGE) {
 		error(&out->loc, "Integer literal overflow");
 	}
+	if (kind == ICONST && out->uval > (uintmax_t)INT64_MAX) {
+		out->storage = STORAGE_U64;
+	} else if (kind == SIGNED && out->uval == (uintmax_t)INT64_MIN) {
+		// XXX: Hack
+		out->ival = INT64_MIN;
+	} else if (kind != UNSIGNED) {
+		out->ival = (intmax_t)out->uval;
+	}
 	consume(lexer, -1);
-	return out->token;
 }
 
 static uint32_t
@@ -953,7 +899,8 @@ _lex(struct lexer *lexer, struct token *out)
 
 	if (c <= 0x7F && isdigit(c)) {
 		push(lexer, c, false);
-		return lex_literal(lexer, out);
+		lex_literal(lexer, out);
+		return T_LITERAL;
 	}
 
 	lexer->require_int = false;
diff --git a/tests/00-constants.ha b/tests/00-constants.ha
@@ -1,4 +1,4 @@
-use rt::{compile, exited, EXIT_SUCCESS};
+use rt::{compile, exited, EXIT_SUCCESS, toutf8};
 
 type my_enum = enum u8 {
 	FOO,
@@ -137,51 +137,298 @@ fn aggregates() void = {
 	u2arr as [3]u8;
 };
 
-fn basics() void = {
-	let i1 = 13, i2 = 13i, i3 = 13i8, i4 = 13i16, i5 = 13i32, i6 = 13i64;
-	let u1 = 13u, u2 = 13z, u3 = 13u8, u4 = 13u16, u5 = 13u32, u6 = 13u64;
-	let n1 = -13, n2 = -13u;
-	let b1 = true, b2 = false;
-	let p1: nullable *int = null;
-	let r1 = 'x', r2 = '\x0A', r3 = '\u1234', r4 = '\0', r5 = '\a',
-		r6 = '\b', r7 = '\f', r8 = '\n', r9 = '\r', r10 = '\t',
-		r11 = '\v', r12 = '\\', r13 = '\'', r14 = '\"',
-		r15 = '\U12345678';
-	let f1 = 1.0, f2 = 1f32, f3 = 1.0e2, f4 = 1.0f64;
-	let f5 = 1.23e+45, f6 = 9.87e-65, f7 = 1e-7, f8 = 5.0e-324;
-	let ie1 = 1e5i;
-
-	let failures: [_]str = [
-		// exponent overflow
-		"let x: u64 = 1e100;",
+fn numeric() void = {
+	let want: [_]i64 = [
+		42, 42, 42, 42, 42, 42, 42, 42,
+		0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1,
+		100, 100, 100, 100
+	];
+	let i = [
+		// basics
+		(42, 42i, 42i8, 42i16, 42i32, 42i64), // decimal
+		(42e0, 42e0i, 42e0i8, 42e0i16, 42e0i32, 42e0i64), // with exp
+		(42e00, 42e00i, 42e00i8, 42e00i16, 42e00i32, 42e00i64), // with leading zeros in exp
+		(42e+0, 42e+0i, 42e+0i8, 42e+0i16, 42e+0i32, 42e+0i64), // with + in exp
+		(42e+00, 42e+00i, 42e+00i8, 42e+00i16, 42e+00i32, 42e+00i64), // with + and leading zeros in exp
+		(0b101010, 0b101010i, 0b101010i8, 0b101010i16, 0b101010i32, 0b101010i64), // binary
+		(0o52, 0o52i, 0o52i8, 0o52i16, 0o52i32, 0o52i64), // octal
+		(0x2a, 0x2ai, 0x2ai8, 0x2ai16, 0x2ai32, 0x2ai64), // hex
+
+		// single digit
+		(0, 0i, 0i8, 0i16, 0i32, 0i64), // zero
+		(0b0, 0b0i, 0b0i8, 0b0i16, 0b0i32, 0b0i64), // binary
+		(0o0, 0o0i, 0o0i8, 0o0i16, 0o0i32, 0o0i64), // octal
+		(0x0, 0x0i, 0x0i8, 0x0i16, 0x0i32, 0x0i64), // hex
+
+		(1, 1i, 1i8, 1i16, 1i32, 1i64), // nonzero
+		(0b1, 0b1i, 0b1i8, 0b1i16, 0b1i32, 0b1i64), // binary
+		(0o1, 0o1i, 0o1i8, 0o1i16, 0o1i32, 0o1i64), // octal
+		(0x1, 0x1i, 0x1i8, 0x1i16, 0x1i32, 0x1i64), // hex
+
+		// with leading zero
+		(0b00, 0b00i, 0b00i8, 0b00i16, 0b00i32, 0b00i64), // binary
+		(0o00, 0o00i, 0o00i8, 0o00i16, 0o00i32, 0o00i64), // octal
+		(0x00, 0x00i, 0x00i8, 0x00i16, 0x00i32, 0x00i64), // hex
+
+		(0b01, 0b01i, 0b01i8, 0b01i16, 0b01i32, 0b01i64), // binary with leading zero
+		(0o01, 0o01i, 0o01i8, 0o01i16, 0o01i32, 0o01i64), // octal
+		(0x01, 0x01i, 0x01i8, 0x01i16, 0x01i32, 0x01i64), // hex
+
+		// exponents
+		(1e2, 1e2i, 1e2i8, 1e2i16, 1e2i32, 1e2i64),
+		(1e02, 1e02i, 1e02i8, 1e02i16, 1e02i32, 1e02i64), // with leading zeros in exp
+		(1e+2, 1e+2i, 1e+2i8, 1e+2i16, 1e+2i32, 1e+2i64), // with + in exp
+		(1e+02, 1e+02i, 1e+02i8, 1e+02i16, 1e+02i32, 1e+02i64), // with + and leading zeros in exp
+	];
+	for (let j = 0z; j < len(i); j += 1) {
+		let t = &i[j];
+		assert(want[j] == t.0 && t.0 == t.1 && t.1 == t.2 && t.2 == t.3
+			&& t.3 == t.4 && t.4 == t.5);
+	};
+
+	let u = [
+		// basics
+		(42z, 42u, 42u8, 42u16, 42u32, 42u64), // decimal
+		(42e0z, 42e0u, 42e0u8, 42e0u16, 42e0u32, 42e0u64), // with exp
+		(42e00z, 42e00u, 42e00u8, 42e00u16, 42e00u32, 42e00u64), // with leading zeros in exp
+		(42e+0z, 42e+0u, 42e+0u8, 42e+0u16, 42e+0u32, 42e+0u64), // with + in exp
+		(42e+00z, 42e+00u, 42e+00u8, 42e+00u16, 42e+00u32, 42e+00u64), // with + and leading zeros in exp
+		(0b101010z, 0b101010u, 0b101010u8, 0b101010u16, 0b101010u32, 0b101010u64), // binary
+		(0o52z, 0o52u, 0o52u8, 0o52u16, 0o52u32, 0o52u64), // octal
+		(0x2az, 0x2au, 0x2au8, 0x2au16, 0x2au32, 0x2au64), // hex
+
+		// single digit
+		(0z, 0u, 0u8, 0u16, 0u32, 0u64), // zero
+		(0b0z, 0b0u, 0b0u8, 0b0u16, 0b0u32, 0b0u64), // binary
+		(0o0z, 0o0u, 0o0u8, 0o0u16, 0o0u32, 0o0u64), // octal
+		(0x0z, 0x0u, 0x0u8, 0x0u16, 0x0u32, 0x0u64), // hex
+
+		(1z, 1u, 1u8, 1u16, 1u32, 1u64), // nonzero
+		(0b1z, 0b1u, 0b1u8, 0b1u16, 0b1u32, 0b1u64), // binary
+		(0o1z, 0o1u, 0o1u8, 0o1u16, 0o1u32, 0o1u64), // octal
+		(0x1z, 0x1u, 0x1u8, 0x1u16, 0x1u32, 0x1u64), // hex
+
+		// with leading zero
+		(0b00z, 0b00u, 0b00u8, 0b00u16, 0b00u32, 0b00u64), // binary
+		(0o00z, 0o00u, 0o00u8, 0o00u16, 0o00u32, 0o00u64), // octal
+		(0x00z, 0x00u, 0x00u8, 0x00u16, 0x00u32, 0x00u64), // hex
+
+		(0b01z, 0b01u, 0b01u8, 0b01u16, 0b01u32, 0b01u64), // binary with leading zero
+		(0o01z, 0o01u, 0o01u8, 0o01u16, 0o01u32, 0o01u64), // octal
+		(0x01z, 0x01u, 0x01u8, 0x01u16, 0x01u32, 0x01u64), // hex
+
+		// exponents
+		(1e2z, 1e2u, 1e2u8, 1e2u16, 1e2u32, 1e2u64),
+		(1e02z, 1e02u, 1e02u8, 1e02u16, 1e02u32, 1e02u64), // with leading zeros in exp
+		(1e+2z, 1e+2u, 1e+2u8, 1e+2u16, 1e+2u32, 1e+2u64), // with + in exp
+		(1e+02z, 1e+02u, 1e+02u8, 1e+02u16, 1e+02u32, 1e+02u64), // with + and leading zeros in exp
+	];
+	for (let j = 0z; j < len(u); j += 1) {
+		let t = &u[j];
+		assert(want[j]: u64 == t.0: u64 && t.0: u64 == t.1
+			&& t.1 == t.2 && t.2 == t.3 && t.3 == t.4 && t.4 == t.5);
+	};
+
+	let f = [0.0, 0.00, 0.0e0, 0.00e0, 0.0e1, 0.00e1, 0.0e+0, 0.0e+1, 0.0e-0, 0.0e00,
+		0.0e01, 0.0e+01, 0.0e+00, 0.0e-00, 0e-0, 0e-00, 0e-1, 0e-01];
+	for (let j = 0z; j < len(f); j+= 1) {
+		assert(f[j] == 0.0);
+	};
+
+	let _f32 = [0.0f32, 0.00f32, 0.0e0f32, 0.00e0f32, 0.0e1f32, 0.00e1f32, 0.0e+0f32,
+		0.0e+1f32, 0.0e-0f32, 0.0e00f32, 0.0e01f32, 0.0e+01f32, 0.0e+00f32, 0.0e-00,
+		0f32, 0e0f32, 0e1f32, 0e00f32, 0e01f32, 0e+0f32, 0e+00f32, 0e+1f32,
+		0e+01f32, 0e-0f32, 0e-00f32, 0e-1f32, 0e-01f32];
+	for (let j = 0z; j < len(_f32); j+= 1) {
+		assert(_f32[j] == 0f32);
+	};
+
+	let _f64 = [0.0f64, 0.00f64, 0.0e0f64, 0.00e0f64, 0.0e1f64, 0.00e1f64, 0.0e+0f64,
+		0.0e+1f64, 0.0e-0f64, 0.0e00f64, 0.0e01f64, 0.0e+01f64, 0.0e+00f64, 0.0e-00,
+		0f64, 0e0f64, 0e1f64, 0e00f64, 0e01f64, 0e+0f64, 0e+00f64, 0e+1f64,
+		0e+01f64, 0e-0f64, 0e-00f64, 0e-1f64, 0e-01f64];
+	for (let j = 0z; j < len(_f64); j+= 1) {
+		assert(_f64[j] == 0f64);
+	};
+
+	// double tuple subscript special case
+	let tup = (('a', 'b'), 'c');
+	assert(tup.0.0 == 'a');
+	// exponents
+	assert(tup.0e0.0 == 'a');
+	assert(tup.0.0e0 == 'a');
+	assert(tup.0e0.0e0 == 'a');
+	assert(tup.0e+0.0 == 'a');
+	assert(tup.0.0e+0 == 'a');
+	assert(tup.0e+0.0e+0 == 'a');
+	// signed
+	assert(tup.0i.0 == 'a');
+	assert(tup.0.0i == 'a');
+	assert(tup.0i.0i == 'a');
+	assert(tup.0i32.0 == 'a');
+	assert(tup.0.0i32 == 'a');
+	assert(tup.0i32.0i32 == 'a');
+	// unsigned
+	assert(tup.0u.0 == 'a');
+	assert(tup.0.0u == 'a');
+	assert(tup.0u.0u == 'a');
+	assert(tup.0u32.0 == 'a');
+	assert(tup.0.0u32 == 'a');
+	assert(tup.0u32.0u32 == 'a');
+	// bases
+	assert(tup.0b0.0 == 'a');
+	assert(tup.0.0b0 == 'a');
+	assert(tup.0b0.0b0 == 'a');
+	assert(tup.0o0.0 == 'a');
+	assert(tup.0.0o0 == 'a');
+	assert(tup.0o0.0o0 == 'a');
+	assert(tup.0x0.0 == 'a');
+	assert(tup.0.0x0 == 'a');
+	assert(tup.0x0.0x0 == 'a');
+
+	// zero with large exponent
+	assert(0e10000000 == 0);
+	assert(0e010000000 == 0);
+	assert(0e+10000000 == 0);
+	assert(0e+010000000 == 0);
 
-		// various invalid literals
-		"let x = 0x;",
-		"let x = 0xz;",
-		"let x = 0xu;",
-		"let x = 0xu64;",
-		"let x = 0be+0;",
-		"let x = 0bf64;",
+	// f32 and f64 are valid hex literals
+	assert(0xf32 == 3890);
+	assert(0xf64 == 3940);
+	assert(0x1f32 == 7986);
+	assert(0x1f64 == 8036);
+	assert(0xf321 == 62241);
+	assert(0xf641 == 63041);
+
+
+	// e is a valid hex digit
+	assert(0xe == 14);
+	assert(0xe+1 == 15);
+	assert(0xe-1 == 13);
+	assert(0x1e == 30);
+	assert(0x1e+1 == 31);
+	assert(0x1e-1 == 29);
+	assert(0x1e1 == 481);
+	assert(0x1e1f32 == 1974066);
+
+	let v = if (true) 5else 10;
+	assert(v == 5);
+
+	let invalid: [_]str = [
+
+		// invalid base
+		"0b", "0o", // 0x tested separately
+		"00b", "00o", "00x",
+		"01b", "01o", "01x",
+		"1b", "1o", "1x",
+		"11b", "11o", "11x",
+
+		// base with exponent
+		"0b1e1",
+		"0o1e1",
+		// with +/-
+		"0b1e+1",
+		"0o1e+1",
 
 		// invalid digits in smaller bases
-                "let x = 0b41;",
-                "let x = 0b14;",
-                "let x = 0o82;",
-                "let x = 0o28;",
-
-		// leading zeros
-		"let x = 05;"
-		"let x = 0000000010;"
+		"0b41", "0b14",
+		"0o82", "0o28",
+
+		// leading zeroes
+		"05", "00000010", "00.0", "01.0",
+		"05e3", "00000010e3", "00.0e3", "01.0e3",
+		"05e+3", "00000010e+3", "00.0e+3", "01.0e+3",
+		"05e-3", "00000010e-3", "00.0e-3", "01.0e-3",
+
+		// invalid sequences of special characters
+		"1.",
+		"1..",
+		"1..1",
+		"1.1.",
+		"1.1.1",
+
+		"1e",
+		"1e+",
+		"1e-",
+
+		"1e1+",
+		"1e1-",
+
+		"1ee",
+		"1e+e", "1ee+", "1e+e+",
+		"1e-e", "1ee-", "1e-e-",
+		"1e+e-", "1e-e+",
+
+		"1ee1",
+		"1e+e1", "1ee+1", "1e+e+1",
+		"1e-e1", "1ee-1", "1e-e-1",
+		"1e+e-1", "1e-e+1",
+
+		"1e1e",
+		"1e+1e", "1e1e+", "1e+1e+",
+		"1e-1e", "1e1e-", "1e-1e-",
+		"1e+1e-", "1e-1e+",
+
+		"1e1e1",
+		"1e+1e1", "1e1e+1", "1e+1e+1",
+		"1e-1e1", "1e1e-1", "1e-1e-1",
+		"1e+1e-1", "1e-1e+1",
+
+		"1.e", "1e.",
+		"1.e1", "1e.1",
+		"1.1e", "1e1.",
+		"1e1.1",
+
+		"1.e+", "1e+.",
+		"1.e+1", "1e+.1",
+		"1.1e+", "1e+1.",
+		"1e+1.1",
+
+		"1.e-", "1e-.",
+		"1.e-1", "1e-.1",
+		"1.1e-", "1e-1.",
+		"1e-1.1",
+	];
+	let extra: [_]str = [
+		"4e-0i;", "4e-1i;",
+		"4e-0i8;", "4e-1i8;",
+
+		"0b1e-1f32;",
+		"0o1e-1f32;",
+		"0x1e+1f32;",
+		"0x1e-1f32;",
+
+		// exponent overflow
+		"let t: u64 = 1e1000;",
 	];
-	for (let i = 0z; i < len(failures); i += 1) {
-		assert(compile(failures[i]) as exited != EXIT_SUCCESS);
+	let suffix = [";", "i;", "i8;", "f32;"];
+	let buf: [256]u8 = [0...];
+	for (let i = 0z; i < len(invalid); i += 1) {
+		for (let j = 0z; j < len(suffix); j += 1) {
+			let buf = buf[..0];
+			append(buf, toutf8("let t = ")...);
+			append(buf, toutf8(invalid[i])...);
+			append(buf, toutf8(suffix[j])...);
+			assert(compile(*(&buf: *str)) as exited != EXIT_SUCCESS);
+		};
 	};
+	for (let i = 0z; i < len(extra); i += 1) {
+		assert(compile(extra[i]) as exited != EXIT_SUCCESS);
+	};
+};
+
+fn basics() void = {
+	let b1 = true, b2 = false;
+	let p1: nullable *int = null;
+	let r1 = ['x', '\x0A', '\u1234', '\0', '\a', '\b', '\f', '\n', '\r', '\t',
+		'\v', '\\', '\'', '\"', '\U12345678'];
 };
 
 export fn main() void = {
 	// The interaction between constants and result type reduction is tested
 	// in 30-reduction.c
 	basics();
+	numeric();
 	assignment();
 	aggregates();
 };

	harec [hare] Hare compiler, written in C11 for POSIX OSs
	Log \| Files \| Refs \| README \| LICENSE

M	rt/cstrings.ha	\|	2	++
M	src/lex.c	\|	359	++++++++++++++++++++++++++++++++++---------------------------------------------
M	tests/00-constants.ha	\|	317	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------