commit 2b21998d1effb3bea66e7950b6d0732d73328318
parent 1fe756964bc0b85d1f7ca29d6aa719dac0c59527
Author: Drew DeVault <sir@cmpwn.com>
Date: Wed, 14 Apr 2021 10:43:51 -0400
hare::lex: add flag for lexing comments
Will be useful for haredoc
Diffstat:
4 files changed, 60 insertions(+), 19 deletions(-)
diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha
@@ -167,6 +167,19 @@ fn loc(line: uint, col: uint) location = location {
(ltok::NAME, "bar", loc(2, 1)),
];
lextest(in, expected);
+
+ let in = "hello world // foo\n// bar";
+ let buf = bufio::fixed(strings::toutf8(in), mode::READ);
+ defer io::close(buf);
+ let lexer = init(buf, "<input>", flags::COMMENTS);
+ assert(lex(&lexer) is token);
+ assert(lex(&lexer) is token);
+ let tok = lex(&lexer) as token;
+ assert(tok.0 == ltok::COMMENT);
+ assert(tok.1 as str == " foo\n");
+ let tok = lex(&lexer) as token;
+ assert(tok.0 == ltok::COMMENT);
+ assert(tok.1 as str == " bar");
};
@test fn runes() void = {
diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha
@@ -7,6 +7,7 @@ use io;
use sort;
use strconv;
use strings;
+use strio;
// State associated with a lexer.
export type lexer = struct {
@@ -15,6 +16,13 @@ export type lexer = struct {
loc: (uint, uint),
un: (token | void),
rb: [2](rune | io::EOF | void),
+ flags: flags,
+};
+
+// Flags which apply to this lexer
+export type flags = enum uint {
+ // Enables lexing comments
+ COMMENTS = 1 << 0,
};
// A syntax error
@@ -34,12 +42,19 @@ export fn strerror(err: error) const str = {
};
// Initializes a new lexer for the given input stream. The path is borrowed.
-export fn init(in: *io::stream, path: str) lexer = lexer {
- in = in,
- path = path,
- loc = (1, 1),
- un = void,
- rb = [void...],
+export fn init(in: *io::stream, path: str, flags: flags...) lexer = {
+ let f: flags = 0: flags;
+ for (let i = 0z; i < len(flags); i += 1) {
+ f |= flags[i];
+ };
+ return lexer {
+ in = in,
+ path = path,
+ loc = (1, 1),
+ un = void,
+ rb = [void...],
+ flags = f,
+ };
};
// Returns the next token from the lexer.
@@ -227,6 +242,26 @@ fn lex_name(lex: *lexer, loc: location) (token | error) = {
};
};
+fn lex_comment(lexr: *lexer, loc: location) (token | error) = {
+ if (lexr.flags & flags::COMMENTS != flags::COMMENTS) {
+ for (true) match (next(lexr)?) {
+ io::EOF => break,
+ r: rune => if (r == '\n') break,
+ };
+ return lex(lexr);
+ };
+
+ let buf = strio::dynamic();
+ for (true) match (next(lexr)?) {
+ io::EOF => break,
+ r: rune => {
+ strio::appendrune(buf, r);
+ if (r == '\n') break;
+ },
+ };
+ return (ltok::COMMENT, strio::finish(buf), loc);
+};
+
fn lex2(lexr: *lexer, loc: location, r: rune) (token | error) = {
let n = next(lexr)?;
let tok: ltok = switch (r) {
@@ -248,16 +283,7 @@ fn lex2(lexr: *lexer, loc: location, r: rune) (token | error) = {
'/' => match (n) {
r: rune => switch (r) {
'=' => return (ltok::DIVEQ, void, loc),
- '/' => {
- // Comment
- for (true) match (next(lexr)?) {
- io::EOF => break,
- r: rune => if (r == '\n') {
- break;
- },
- };
- return lex(lexr);
- },
+ '/' => return lex_comment(lexr, loc),
* => ltok::DIV,
},
io::EOF => ltok::DIV,
diff --git a/hare/lex/token.ha b/hare/lex/token.ha
@@ -2,7 +2,7 @@ use encoding::utf8;
use strings;
// A token with no additional context, such as '+'
-export type ltok = enum {
+export type ltok = enum uint {
// Keep ordered with bmap
// Alpha sorted
ATTR_FINI,
@@ -136,10 +136,11 @@ export type ltok = enum {
LIT_FCONST,
LIT_RUNE,
LIT_STR,
- LAST_LITERAL = STR,
+ LAST_LITERAL = LIT_STR,
NAME,
LABEL,
+ COMMENT,
EOF,
};
@@ -295,6 +296,7 @@ export fn tokstr(tok: token) const str = {
ltok::LIT_STR => "str",
ltok::NAME => tok.1 as str,
ltok::LABEL => abort(), // TODO
+ ltok::COMMENT => abort(), // TODO
ltok::EOF => "EOF",
* => abort(),
};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -336,7 +336,7 @@ hare_lex() {
gensrcs_hare_lex \
+test.ha
fi
- gen_ssa hare::lex io bufio strings types fmt sort
+ gen_ssa hare::lex io bufio strings types fmt sort strio
}
hare_module() {