hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 3fd55fd1e44451fe438ccfd4951d21a7e50dfebe
parent 71e701faad89e9f040ab206e89eb21ef14224db6
Author: Drew DeVault <sir@cmpwn.com>
Date:   Mon, 15 Feb 2021 13:57:38 -0500

hare::lex: lex3

Diffstat:
Mhare/lex/+test.ha | 24+++++++++++++++++++++++-
Mhare/lex/lex.ha | 118+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 136 insertions(+), 6 deletions(-)

diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha @@ -34,7 +34,10 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = { for (let i = 0z; i < len(expected); i += 1) { let eline = expected[i].0, ecol = expected[i].1, etok = expected[i].2; - let tl = lex(&lexer) as (token, location); + let tl = match (lex(&lexer)) { + tl: (token, location) => tl, + * => abort(), + }; let tok = tl.0, loc = tl.1; match (tok) { b: btoken => if (etok as btoken != b) { @@ -99,3 +102,22 @@ fn lextest(in: str, expected: [](uint, uint, token)) void = { ]; lextest(in, expected); }; + +@test fn lex3() void = { + const in = ". .. ... < << <= <<= > >> >= >>= >>"; + const expected: [_](uint, uint, token) = [ + (1, 1, btoken::DOT), + (1, 3, btoken::SLICE), + (1, 6, btoken::ELLIPSIS), + (1, 10, btoken::LESS), + (1, 12, btoken::LSHIFT), + (1, 15, btoken::LESSEQ), + (1, 18, btoken::LSHIFTEQ), + (1, 22, btoken::GREATER), + (1, 24, btoken::RSHIFT), + (1, 27, btoken::GREATEREQ), + (1, 30, btoken::RSHIFTEQ), + (1, 34, btoken::RSHIFT), + ]; + lextest(in, expected); +}; diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha @@ -2,7 +2,6 @@ use ascii; use io; use strings; -use fmt; // State associated with a lexer. export type lexer = struct { @@ -67,7 +66,7 @@ export fn lex(lex: *lexer) ((token, location) | io::EOF | error) = { let tok: token = switch (r) { * => return syntaxerr(loc), '"', '\'' => abort(), // TODO: Strings/runes - '.', '<', '>' => return lex3(lex, r), + '.', '<', '>' => return lex3(lex, loc, r), '^', '*', '%', '/', '+', '-', ':', '!', '&', '|', '=' => { return lex2(lex, loc, r); }, @@ -167,9 +166,118 @@ fn lex2( return (tok, loc); }; -fn lex3(lex: *lexer, r: rune) ((token, location) | io::EOF | error) = { - abort(); - return io::EOF; // TODO +fn lex3( + lex: *lexer, + loc: location, + r: rune, +) ((token, location) | io::EOF | error) = { + let n = match (next(lex)) { + err: io::error => return err, + io::EOF => return switch (r) { + '.' => (btoken::DOT: token, loc), + '<' => (btoken::LESS: token, loc), + '>' => (btoken::GREATER: token, loc), + }, + r: rune => r, + }; + return switch (r) { + '.' => lex3dot(lex, loc, n), + '<' => lex3lt(lex, loc, n), + '>' => lex3gt(lex, loc, n), + * => syntaxerr(loc), + }; +}; + +fn lex3dot( + lex: *lexer, + loc: location, + n: rune, +) ((token, location) | io::EOF | error) = { + let tok: token = switch (n) { + '.' => { + let q = match (next(lex)) { + err: io::error => return err, + io::EOF => io::EOF, + r: rune => r, + }; + let t = match (q) { + r: rune => switch (r) { + '.' => return (btoken::ELLIPSIS: token, loc), + * => btoken::SLICE, + }, + io::EOF => btoken::SLICE, + }; + unget(lex, q); + t; + }, + * => { + unget(lex, n); + btoken::DOT; + } + }; + return (tok, loc); +}; + +fn lex3lt( + lex: *lexer, + loc: location, + n: rune, +) ((token, location) | io::EOF | error) = { + let tok: token = switch (n) { + '<' => { + let q = match (next(lex)) { + err: io::error => return err, + io::EOF => io::EOF, + r: rune => r, + }; + let t = match (q) { + r: rune => switch (r) { + '=' => return (btoken::LSHIFTEQ: token, loc), + * => btoken::LSHIFT, + }, + io::EOF => btoken::LSHIFT, + }; + unget(lex, q); + t; + }, + '=' => btoken::LESSEQ, + * => { + unget(lex, n); + btoken::LESS; + } + }; + return (tok, loc); +}; + +fn lex3gt( + lex: *lexer, + loc: location, + n: rune, +) ((token, location) | io::EOF | error) = { + let tok: token = switch (n) { + '>' => { + let q = match (next(lex)) { + err: io::error => return err, + io::EOF => io::EOF, + r: rune => r, + }; + let t = match (q) { + r: rune => switch (r) { + '=' => return (btoken::RSHIFTEQ: token, loc), + * => btoken::RSHIFT, + }, + io::EOF => btoken::RSHIFT, + }; + unget(lex, q); + t; + }, + '=' => btoken::GREATEREQ, + * => { + unget(lex, n); + btoken::GREATER; + } + }; + return (tok, loc); }; // Unlex a single token. The next call to [lex] will return this token, location