commit 2440c5f065e6e782d4c208d79f421531b88ae0f2
parent e8c5f5fe3feb131a346aa8bd766b838cad45a92b
Author: Eyal Sawady <ecs@d2evs.net>
Date: Wed, 14 Apr 2021 15:06:43 -0400
hare::lex: implement literals
Float literals are still blocked on strconv::stof{32,64}
Diffstat:
3 files changed, 197 insertions(+), 4 deletions(-)
diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha
@@ -236,3 +236,26 @@ fn loc(line: uint, col: uint) location = location {
];
lextest(in, expected);
};
+
+@test fn literals() void = {
+ // TODO: Float literals
+ const in = "1e5 -1i32 9223372036854775809 1e2z 255u8 0o42u16\n"
+ "0b1000101u32 0xDEADBEEFu64 -0b10i8 -5e0i16 -0o16i32\n"
+ "0b00000010000001100000011100001111000000100000011000000111i64";
+ const expected: [_]token = [
+ (ltok::LIT_ICONST, 1e5i64, loc(1, 1)),
+ (ltok::LIT_I32, -1i64, loc(1, 5)),
+ (ltok::LIT_U64, 9223372036854775809u64, loc(1, 11)),
+ (ltok::LIT_SIZE, 1e2u64, loc(1, 31)),
+ (ltok::LIT_U8, 255u64, loc(1, 36)),
+ (ltok::LIT_U16, 0o42u64, loc(1, 42)),
+ (ltok::LIT_U32, 0b1000101u64, loc(2, 1)),
+ (ltok::LIT_U64, 0xDEADBEEFu64, loc(2, 14)),
+ (ltok::LIT_I8, -0b10i64, loc(2, 28)),
+ (ltok::LIT_I16, -5e0i64, loc(2, 36)),
+ (ltok::LIT_I32, -0o16i64, loc(2, 44)),
+ // Binary solo
+ (ltok::LIT_I64, 0b00000010000001100000011100001111000000100000011000000111i64, loc(3, 1)),
+ ];
+ lextest(in, expected);
+};
diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha
@@ -8,6 +8,7 @@ use sort;
use strconv;
use strings;
use strio;
+use types;
// State associated with a lexer.
export type lexer = struct {
@@ -88,7 +89,7 @@ export fn lex(lex: *lexer) (token | error) = {
};
if (ascii::isdigit(r)) {
unget(lex, r);
- abort(); // TODO: Literals
+ return lex_literal(lex, loc);
};
let tok: ltok = switch (r) {
@@ -272,6 +273,169 @@ fn lex_comment(lexr: *lexer, loc: location) (token | error) = {
return lex(lexr);
};
+fn lex_literal(lex: *lexer, loc: location) (token | error) = {
+ let chars: []u8 = [];
+ let r = match (next(lex)?) {
+ io::EOF => return (ltok::EOF, void, loc),
+ r: rune => r,
+ };
+ if (r == '-') {
+ append(chars, utf8::encoderune(r)...);
+ r = match (next(lex)?) {
+ io::EOF => return (ltok::EOF, void, loc),
+ r: rune => r,
+ };
+ };
+
+ let base = 10u;
+ if (r == '0') {
+ append(chars, utf8::encoderune(r)...);
+ r = match (next(lex)?) {
+ io::EOF => return (ltok::LIT_ICONST, 0i64, loc),
+ r: rune => r,
+ };
+ switch (r) {
+ 'b' => base = 2,
+ 'o' => base = 8,
+ 'x' => base = 16,
+ * => unget(lex, r),
+ };
+ } else unget(lex, r);
+ let basechrs = switch (base) {
+ 2 => "01",
+ 8 => "01234567",
+ 10 => "0123456789",
+ 16 => "0123456789ABCDEFabcdef",
+ };
+
+ let suff: (size | void) = void;
+ let exp: (size | void) = void;
+ let end = 0z;
+ let float = false;
+ for (true) {
+ r = match (next(lex)?) {
+ io::EOF => break,
+ r: rune => r,
+ };
+ if (!strings::contains(basechrs, r)) switch (r) {
+ '.' => if (float || exp is size || suff is size
+ || base != 10) {
+ unget(lex, r);
+ break;
+ } else {
+ r = match (next(lex)?) {
+ io::EOF => break,
+ r: rune => r,
+ };
+ if (!strings::contains(basechrs, r)) {
+ unget(lex, r);
+ unget(lex, '.');
+ break;
+ };
+ unget(lex, r);
+ float = true;
+ append(chars, utf8::encoderune('.')...);
+ },
+ 'e' => if (exp is size || suff is size || base != 10) {
+ unget(lex, r);
+ break;
+ } else {
+ if (end == 0) end = len(chars);
+ append(chars, utf8::encoderune(r)...);
+ exp = len(chars);
+ },
+ 'i', 'u', 'f', 'z' => if (suff is size) {
+ unget(lex, r);
+ break;
+ } else {
+ suff = len(chars);
+ if (end == 0) end = len(chars);
+ append(chars, utf8::encoderune(r)...);
+ basechrs = "0123456789";
+ },
+ * => {
+ unget(lex, r);
+ break;
+ },
+ } else append(chars, utf8::encoderune(r)...);
+ };
+ if (end == 0) end = len(chars);
+
+ let exp = match (exp) {
+ void => "0",
+ exp: size => {
+ let end = match (suff) {
+ void => len(chars),
+ suff: size => suff,
+ };
+ strings::fromutf8(chars[exp..end]);
+ },
+ };
+ let exp = match (strconv::stoz(exp)) {
+ exp: size => exp,
+ strconv::invalid => abort(), // Shouldn't be lexed in
+ strconv::overflow =>
+ return syntaxerr(loc, "overflow in exponent"),
+ };
+
+ let suff = match (suff) {
+ suff: size => strings::fromutf8(chars[suff..]),
+ void => "",
+ };
+ let suff = if (suff == "u8") ltok::LIT_U8
+ else if (suff == "u16") ltok::LIT_U16
+ else if (suff == "u32") ltok::LIT_U32
+ else if (suff == "u64") ltok::LIT_U64
+ else if (suff == "uint") ltok::LIT_UINT
+ else if (suff == "z") ltok::LIT_SIZE
+ else if (suff == "i8") ltok::LIT_I8
+ else if (suff == "i16") ltok::LIT_I16
+ else if (suff == "i32") ltok::LIT_I32
+ else if (suff == "i64") ltok::LIT_I64
+ else if (suff == "int") ltok::LIT_INT
+ else if (suff == "" && !float) ltok::LIT_ICONST
+ else if (suff == "f32") ltok::LIT_F32
+ else if (suff == "f64") ltok::LIT_F64
+ else if (suff == "" && float) ltok::LIT_FCONST
+ else return syntaxerr(loc, "invalid literal suffix");
+
+ let val = strings::fromutf8(chars[..end]);
+ let val = switch (suff) {
+ ltok::LIT_U8, ltok::LIT_U64, ltok::LIT_U32, ltok::LIT_U64,
+ ltok::LIT_UINT => strconv::stou64b(val, base),
+ ltok::LIT_ICONST => match (strconv::stoi64b(val, base)) {
+ i: i64 => i,
+ strconv::invalid => abort(),
+ strconv::overflow => if (chars[0] != '-': u32: u8) {
+ suff = ltok::LIT_U64;
+ strconv::stou64b(val, base);
+ } else strconv::overflow,
+ },
+ ltok::LIT_I8, ltok::LIT_I64, ltok::LIT_I32, ltok::LIT_I64,
+ ltok::LIT_INT => strconv::stoi64b(val, base),
+ ltok::LIT_F32, ltok::LIT_F64, ltok::LIT_FCONST => abort(), // TODO
+ };
+ let val = match (val) {
+ val: u64 => {
+ for (let i = 0z; i < exp; i += 1) {
+ val *= 10;
+ };
+ val;
+ },
+ val: i64 => {
+ for (let i = 0z; i < exp; i += 1) {
+ val *= 10;
+ };
+ val;
+ },
+ strconv::invalid => abort(), // Shouldn't be lexed in
+ strconv::overflow =>
+ return syntaxerr(loc, "overflow in exponent"),
+ };
+
+ return (suff, val, loc);
+};
+
fn lex2(lexr: *lexer, loc: location, r: rune) (token | error) = {
let n = next(lexr)?;
let tok: ltok = switch (r) {
@@ -315,7 +479,13 @@ fn lex2(lexr: *lexer, loc: location, r: rune) (token | error) = {
'-' => match (n) {
r: rune => switch (r) {
'=' => return (ltok::MINUSEQ, void, loc),
- * => ltok::MINUS,
+ * => if (ascii::isdigit(r)) {
+ unget(lexr, r);
+ unget(lexr, '-');
+ return lex_literal(lexr, loc);
+ } else {
+ ltok::MINUS;
+ },
},
io::EOF => ltok::MINUS,
},
diff --git a/hare/lex/token.ha b/hare/lex/token.ha
@@ -124,7 +124,7 @@ export type ltok = enum uint {
LIT_U32,
LIT_U64,
LIT_UINT,
- LIT_UINTPTR,
+ LIT_SIZE,
LIT_I8,
LIT_I16,
LIT_I32,
@@ -281,7 +281,7 @@ export fn tokstr(tok: token) const str = {
ltok::LIT_U32 => "u32",
ltok::LIT_U64 => "u64",
ltok::LIT_UINT => "uint",
- ltok::LIT_UINTPTR => "uintptr",
+ ltok::LIT_SIZE => "size",
ltok::LIT_I8 => "i8",
ltok::LIT_I16 => "i16",
ltok::LIT_I32 => "i32",