commit 5bbe0cab224f1712d46a5a1cc8016e6d3d23ba31
parent 2764ee679fe8d434b201ee88502660d4156211b3
Author: Alexey Yerin <yyp@disroot.org>
Date: Thu, 28 Apr 2022 20:02:40 +0300
hare::lex: improve handling of invalid UTF-8
Signed-off-by: Alexey Yerin <yyp@disroot.org>
Diffstat:
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha
@@ -289,6 +289,19 @@ fn loc(line: uint, col: uint) location = location {
lextest(in, expected);
};
+@test fn invalid() void = {
+ // Using \x80 within a string literal will cause this to output an
+ // empty string
+ const in = ['1': u8, 0x80];
+
+ let buf = bufio::fixed(in, mode::READ);
+ let lexer = init(&buf, "<test>");
+
+ const s = lex(&lexer) as syntax;
+ assert(s.1 == "Source file is not valid UTF-8");
+};
+
+
// Small virtual machine for testing mkloc/prevloc.
// NEXT, UNGET, LEX, and UNLEX call the obvious functions (with UNGET and UNLEX
// pulling from a buffer that NEXT/LEX feed into).
diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha
@@ -247,6 +247,7 @@ fn lex_string(lex: *lexer, loc: location, delim: rune) (token | error) = {
};
};
line_comment(lex)?;
+
return (ltok::LIT_STR, strio::string(&buf), loc);
};
@@ -740,7 +741,7 @@ export fn unlex(lex: *lexer, tok: token) void = {
lex.un = tok;
};
-fn next(lex: *lexer) ((rune, location) | io::EOF | io::error) = {
+fn next(lex: *lexer) ((rune, location) | syntax | io::EOF | io::error) = {
match (lex.rb[0]) {
case void => void;
case let r: ((rune, location) | io::EOF) =>
@@ -759,6 +760,8 @@ fn next(lex: *lexer) ((rune, location) | io::EOF | io::error) = {
lex.prevrlocs[0] = loc;
lexloc(lex, r);
return (r, loc);
+ case utf8::invalid =>
+ return syntaxerr(mkloc(lex), "Source file is not valid UTF-8");
};
};
@@ -783,7 +786,10 @@ fn nextw(lex: *lexer) ((rune, location) | io::EOF | io::error) = {
abort();
};
-fn try(lex: *lexer, want: rune...) ((rune, location) | void | io::error) = {
+fn try(
+ lex: *lexer,
+ want: rune...
+) ((rune, location) | syntax | void | io::error) = {
let r = match (next(lex)?) {
case io::EOF =>
return;