hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 5bbe0cab224f1712d46a5a1cc8016e6d3d23ba31
parent 2764ee679fe8d434b201ee88502660d4156211b3
Author: Alexey Yerin <yyp@disroot.org>
Date:   Thu, 28 Apr 2022 20:02:40 +0300

hare::lex: improve handling of invalid UTF-8

Signed-off-by: Alexey Yerin <yyp@disroot.org>

Diffstat:
Mhare/lex/+test.ha | 13+++++++++++++
Mhare/lex/lex.ha | 10++++++++--
2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha @@ -289,6 +289,19 @@ fn loc(line: uint, col: uint) location = location { lextest(in, expected); }; +@test fn invalid() void = { + // Using \x80 within a string literal will cause this to output an + // empty string + const in = ['1': u8, 0x80]; + + let buf = bufio::fixed(in, mode::READ); + let lexer = init(&buf, "<test>"); + + const s = lex(&lexer) as syntax; + assert(s.1 == "Source file is not valid UTF-8"); +}; + + // Small virtual machine for testing mkloc/prevloc. // NEXT, UNGET, LEX, and UNLEX call the obvious functions (with UNGET and UNLEX // pulling from a buffer that NEXT/LEX feed into). diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha @@ -247,6 +247,7 @@ fn lex_string(lex: *lexer, loc: location, delim: rune) (token | error) = { }; }; line_comment(lex)?; + return (ltok::LIT_STR, strio::string(&buf), loc); }; @@ -740,7 +741,7 @@ export fn unlex(lex: *lexer, tok: token) void = { lex.un = tok; }; -fn next(lex: *lexer) ((rune, location) | io::EOF | io::error) = { +fn next(lex: *lexer) ((rune, location) | syntax | io::EOF | io::error) = { match (lex.rb[0]) { case void => void; case let r: ((rune, location) | io::EOF) => @@ -759,6 +760,8 @@ fn next(lex: *lexer) ((rune, location) | io::EOF | io::error) = { lex.prevrlocs[0] = loc; lexloc(lex, r); return (r, loc); + case utf8::invalid => + return syntaxerr(mkloc(lex), "Source file is not valid UTF-8"); }; }; @@ -783,7 +786,10 @@ fn nextw(lex: *lexer) ((rune, location) | io::EOF | io::error) = { abort(); }; -fn try(lex: *lexer, want: rune...) ((rune, location) | void | io::error) = { +fn try( + lex: *lexer, + want: rune... +) ((rune, location) | syntax | void | io::error) = { let r = match (next(lex)?) { case io::EOF => return;