commit 3fd55fd1e44451fe438ccfd4951d21a7e50dfebe
parent 71e701faad89e9f040ab206e89eb21ef14224db6
Author: Drew DeVault <sir@cmpwn.com>
Date: Mon, 15 Feb 2021 13:57:38 -0500
hare::lex: lex3
Diffstat:
2 files changed, 136 insertions(+), 6 deletions(-)
diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha
@@ -34,7 +34,10 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = {
for (let i = 0z; i < len(expected); i += 1) {
let eline = expected[i].0, ecol = expected[i].1,
etok = expected[i].2;
- let tl = lex(&lexer) as (token, location);
+ let tl = match (lex(&lexer)) {
+ tl: (token, location) => tl,
+ * => abort(),
+ };
let tok = tl.0, loc = tl.1;
match (tok) {
b: btoken => if (etok as btoken != b) {
@@ -99,3 +102,22 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = {
];
lextest(in, expected);
};
+
+@test fn lex3() void = {
+ const in = ". .. ... < << <= <<= > >> >= >>= >>";
+ const expected: [_](uint, uint, token) = [
+ (1, 1, btoken::DOT),
+ (1, 3, btoken::SLICE),
+ (1, 6, btoken::ELLIPSIS),
+ (1, 10, btoken::LESS),
+ (1, 12, btoken::LSHIFT),
+ (1, 15, btoken::LESSEQ),
+ (1, 18, btoken::LSHIFTEQ),
+ (1, 22, btoken::GREATER),
+ (1, 24, btoken::RSHIFT),
+ (1, 27, btoken::GREATEREQ),
+ (1, 30, btoken::RSHIFTEQ),
+ (1, 34, btoken::RSHIFT),
+ ];
+ lextest(in, expected);
+};
diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha
@@ -2,7 +2,6 @@
use ascii;
use io;
use strings;
-use fmt;
// State associated with a lexer.
export type lexer = struct {
@@ -67,7 +66,7 @@ export fn lex(lex: *lexer) ((token, location) | io::EOF | error) = {
let tok: token = switch (r) {
* => return syntaxerr(loc),
'"', '\'' => abort(), // TODO: Strings/runes
- '.', '<', '>' => return lex3(lex, r),
+ '.', '<', '>' => return lex3(lex, loc, r),
'^', '*', '%', '/', '+', '-', ':', '!', '&', '|', '=' => {
return lex2(lex, loc, r);
},
@@ -167,9 +166,118 @@ fn lex2(
return (tok, loc);
};
-fn lex3(lex: *lexer, r: rune) ((token, location) | io::EOF | error) = {
- abort();
- return io::EOF; // TODO
+fn lex3(
+ lex: *lexer,
+ loc: location,
+ r: rune,
+) ((token, location) | io::EOF | error) = {
+ let n = match (next(lex)) {
+ err: io::error => return err,
+ io::EOF => return switch (r) {
+ '.' => (btoken::DOT: token, loc),
+ '<' => (btoken::LESS: token, loc),
+ '>' => (btoken::GREATER: token, loc),
+ },
+ r: rune => r,
+ };
+ return switch (r) {
+ '.' => lex3dot(lex, loc, n),
+ '<' => lex3lt(lex, loc, n),
+ '>' => lex3gt(lex, loc, n),
+ * => syntaxerr(loc),
+ };
+};
+
+fn lex3dot(
+ lex: *lexer,
+ loc: location,
+ n: rune,
+) ((token, location) | io::EOF | error) = {
+ let tok: token = switch (n) {
+ '.' => {
+ let q = match (next(lex)) {
+ err: io::error => return err,
+ io::EOF => io::EOF,
+ r: rune => r,
+ };
+ let t = match (q) {
+ r: rune => switch (r) {
+ '.' => return (btoken::ELLIPSIS: token, loc),
+ * => btoken::SLICE,
+ },
+ io::EOF => btoken::SLICE,
+ };
+ unget(lex, q);
+ t;
+ },
+ * => {
+ unget(lex, n);
+ btoken::DOT;
+ }
+ };
+ return (tok, loc);
+};
+
+fn lex3lt(
+ lex: *lexer,
+ loc: location,
+ n: rune,
+) ((token, location) | io::EOF | error) = {
+ let tok: token = switch (n) {
+ '<' => {
+ let q = match (next(lex)) {
+ err: io::error => return err,
+ io::EOF => io::EOF,
+ r: rune => r,
+ };
+ let t = match (q) {
+ r: rune => switch (r) {
+ '=' => return (btoken::LSHIFTEQ: token, loc),
+ * => btoken::LSHIFT,
+ },
+ io::EOF => btoken::LSHIFT,
+ };
+ unget(lex, q);
+ t;
+ },
+ '=' => btoken::LESSEQ,
+ * => {
+ unget(lex, n);
+ btoken::LESS;
+ }
+ };
+ return (tok, loc);
+};
+
+fn lex3gt(
+ lex: *lexer,
+ loc: location,
+ n: rune,
+) ((token, location) | io::EOF | error) = {
+ let tok: token = switch (n) {
+ '>' => {
+ let q = match (next(lex)) {
+ err: io::error => return err,
+ io::EOF => io::EOF,
+ r: rune => r,
+ };
+ let t = match (q) {
+ r: rune => switch (r) {
+ '=' => return (btoken::RSHIFTEQ: token, loc),
+ * => btoken::RSHIFT,
+ },
+ io::EOF => btoken::RSHIFT,
+ };
+ unget(lex, q);
+ t;
+ },
+ '=' => btoken::GREATEREQ,
+ * => {
+ unget(lex, n);
+ btoken::GREATER;
+ }
+ };
+ return (tok, loc);
};
// Unlex a single token. The next call to [lex] will return this token, location