+test.ha (12242B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use bufio; 5 use fmt; 6 use io; 7 use memio; 8 use strings; 9 10 fn initbuf(in: []u8, flags: flag = flag::NONE) lexer = { 11 static let buf: [256]u8 = [0...]; 12 static let s = memio::stream { 13 stream = null: io::stream, 14 ... 15 }; 16 static let sc = bufio::scanner { 17 stream = null: io::stream, 18 src = 0, 19 ... 20 }; 21 22 s = memio::fixed(in); 23 sc = bufio::newscanner_static(&s, buf); 24 return init(&sc, "<test>", flags); 25 }; 26 27 fn initstr(in: str, flags: flag = flag::NONE) lexer = { 28 return initbuf(strings::toutf8(in), flags); 29 }; 30 31 @test fn unlex() void = { 32 let sc = bufio::newscanner_static(io::empty, []); 33 let lexer = init(&sc, "<test>"); 34 unlex(&lexer, (ltok::IF, void, location { 35 path = "<test>", 36 line = 1234, 37 col = 1234, 38 })); 39 let t = lex(&lexer) as token; 40 assert(t.0 == ltok::IF); 41 assert(t.1 is void); 42 assert(t.2.path == "<test>"); 43 assert(t.2.line == 1234 && t.2.col == 1234); 44 }; 45 46 fn vassert(expected: value, actual: value) void = { 47 match (expected) { 48 case let expected: str => 49 assert(actual as str == expected); 50 case let expected: rune => 51 assert(actual as rune == expected); 52 case let expected: u64 => 53 assert(actual as u64 == expected); 54 case let expected: f64 => 55 assert(actual as f64 == expected); 56 case void => 57 assert(actual is void); 58 }; 59 }; 60 61 fn lextest(in: str, expected: []token) void = { 62 let lexer = initstr(in); 63 for (let i = 0z; i < len(expected); i += 1) { 64 let etok = expected[i]; 65 let tl = match (lex(&lexer)) { 66 case let tl: token => 67 yield tl; 68 case let err: error => 69 fmt::errorfln("{}: {}", i, strerror(err))!; 70 abort(); 71 }; 72 if (tl.0 != etok.0) { 73 fmt::errorfln("Expected {}, got {}", 74 tokstr(etok), tokstr(tl))!; 75 }; 76 assert(tl.0 == etok.0); 77 vassert(tl.1, etok.1); 78 if (tl.2.line != etok.2.line || tl.2.col != etok.2.col 79 || tl.2.path != etok.2.path) { 80 fmt::errorfln("{}:{}:{} != {}:{}:{}", 81 tl.2.path, tl.2.line, tl.2.col, 82 etok.2.path, etok.2.line, etok.2.col)!; 83 abort(); 84 }; 85 }; 86 let t = lex(&lexer) as token; 87 assert(t.0 == ltok::EOF); 88 }; 89 90 fn loc(line: uint, col: uint) location = location { 91 path = "<test>", 92 line = line, 93 col = col, 94 }; 95 96 @test fn lex1() void = { 97 const in = "~,{[(}]);"; 98 const expected: [_]token = [ 99 (ltok::BNOT, void, loc(1, 1)), 100 (ltok::COMMA, void, loc(1, 2)), 101 (ltok::LBRACE, void, loc(1, 3)), 102 (ltok::LBRACKET, void, loc(1, 4)), 103 (ltok::LPAREN, void, loc(1, 5)), 104 (ltok::RBRACE, void, loc(1, 6)), 105 (ltok::RBRACKET, void, loc(1, 7)), 106 (ltok::RPAREN, void, loc(1, 8)), 107 (ltok::SEMICOLON, void, loc(1, 9)), 108 ]; 109 lextest(in, expected); 110 }; 111 112 @test fn lex2() void = { 113 // Ends with = to test =, EOF 114 const in = "* *= % %= + += - -= : :: = == / /= ="; 115 const expected: [_]token = [ 116 (ltok::TIMES, void, loc(1, 1)), 117 (ltok::TIMESEQ, void, loc(1, 3)), 118 (ltok::MODULO, void, loc(1, 6)), 119 (ltok::MODEQ, void, loc(1, 8)), 120 (ltok::PLUS, void, loc(1, 11)), 121 (ltok::PLUSEQ, void, loc(1, 13)), 122 (ltok::MINUS, void, loc(1, 16)), 123 (ltok::MINUSEQ, void, loc(1, 18)), 124 (ltok::COLON, void, loc(1, 21)), 125 (ltok::DOUBLE_COLON, void, loc(1, 23)), 126 (ltok::EQUAL, void, loc(1, 26)), 127 (ltok::LEQUAL, void, loc(1, 28)), 128 (ltok::DIV, void, loc(1, 31)), 129 (ltok::DIVEQ, void, loc(1, 33)), 130 (ltok::EQUAL, void, loc(1, 36)), 131 ]; 132 lextest(in, expected); 133 }; 134 135 @test fn lex3() void = { 136 const in = ". .. ... < << <= <<= > >> >= >>= >>"; 137 const expected: [_]token = [ 138 (ltok::DOT, void, loc(1, 1)), 139 (ltok::DOUBLE_DOT, void, loc(1, 3)), 140 (ltok::ELLIPSIS, void, loc(1, 6)), 141 (ltok::LESS, void, loc(1, 10)), 142 (ltok::LSHIFT, void, loc(1, 12)), 143 (ltok::LESSEQ, void, loc(1, 15)), 144 (ltok::LSHIFTEQ, void, loc(1, 18)), 145 (ltok::GT, void, loc(1, 22)), 146 (ltok::RSHIFT, void, loc(1, 24)), 147 (ltok::GTEQ, void, loc(1, 27)), 148 (ltok::RSHIFTEQ, void, loc(1, 30)), 149 (ltok::RSHIFT, void, loc(1, 34)), 150 ]; 151 lextest(in, expected); 152 153 const in = "& && &= &&= | || |= ||= ^ ^^ ^= ^^= ^"; 154 const expected: [_]token = [ 155 (ltok::BAND, void, loc(1, 1)), 156 (ltok::LAND, void, loc(1, 3)), 157 (ltok::BANDEQ, void, loc(1, 6)), 158 (ltok::LANDEQ, void, loc(1, 9)), 159 (ltok::BOR, void, loc(1, 13)), 160 (ltok::LOR, void, loc(1, 15)), 161 (ltok::BOREQ, void, loc(1, 18)), 162 (ltok::LOREQ, void, loc(1, 21)), 163 (ltok::BXOR, void, loc(1, 25)), 164 (ltok::LXOR, void, loc(1, 27)), 165 (ltok::BXOREQ, void, loc(1, 30)), 166 (ltok::LXOREQ, void, loc(1, 33)), 167 (ltok::BXOR, void, loc(1, 37)), 168 ]; 169 lextest(in, expected); 170 }; 171 172 @test fn lexname() void = { 173 const in = "hello world return void foobar :foobaz"; 174 const expected: [_]token = [ 175 (ltok::NAME, "hello", loc(1, 1)), 176 (ltok::NAME, "world", loc(1, 7)), 177 (ltok::RETURN, void, loc(1, 13)), 178 (ltok::VOID, void, loc(1, 20)), 179 (ltok::NAME, "foobar", loc(1, 25)), 180 (ltok::COLON, void, loc(1, 32)), 181 (ltok::NAME, "foobaz", loc(1, 33)), 182 ]; 183 lextest(in, expected); 184 }; 185 186 @test fn keywords() void = { 187 let keywords = bmap[..ltok::LAST_KEYWORD+1]; 188 for (let i = 0z; i < len(keywords); i += 1) { 189 let lexer = initstr(keywords[i]); 190 let tok = lex(&lexer) as token; 191 assert(tok.0 == i: ltok); 192 }; 193 }; 194 195 @test fn comments() void = { 196 const in = "hello world // foo\nbar"; 197 const expected: [_]token = [ 198 (ltok::NAME, "hello", loc(1, 1)), 199 (ltok::NAME, "world", loc(1, 7)), 200 (ltok::NAME, "bar", loc(2, 1)), 201 ]; 202 lextest(in, expected); 203 204 let lexer = initstr("// foo\n// bar\nhello world// baz\n\n// bad\ntest", 205 flag::COMMENTS); 206 assert(lex(&lexer) is token); 207 assert(comment(&lexer) == " foo\n bar\n"); 208 assert(lex(&lexer) is token); 209 assert(comment(&lexer) == " baz\n"); 210 assert(lex(&lexer) is token); 211 assert(comment(&lexer) == " bad\n"); 212 }; 213 214 @test fn runes() void = { 215 const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' " 216 "'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U0010abcd'"; 217 const expected: [_]token = [ 218 (ltok::LIT_RCONST, 'a', loc(1, 1)), 219 (ltok::LIT_RCONST, 'b', loc(1, 5)), 220 (ltok::LIT_RCONST, '\a', loc(1, 9)), 221 (ltok::LIT_RCONST, '\b', loc(1, 14)), 222 (ltok::LIT_RCONST, '\f', loc(1, 19)), 223 (ltok::LIT_RCONST, '\n', loc(1, 24)), 224 (ltok::LIT_RCONST, '\r', loc(1, 29)), 225 (ltok::LIT_RCONST, '\t', loc(1, 34)), 226 (ltok::LIT_RCONST, '\v', loc(1, 39)), 227 (ltok::LIT_RCONST, '\0', loc(1, 44)), 228 (ltok::LIT_RCONST, '\\', loc(1, 49)), 229 (ltok::LIT_RCONST, '\'', loc(1, 54)), 230 (ltok::LIT_RCONST, '\x0A', loc(1, 59)), 231 (ltok::LIT_RCONST, '\u1234', loc(1, 66)), 232 (ltok::LIT_RCONST, '\U0010abcd', loc(1, 75)), 233 ]; 234 lextest(in, expected); 235 }; 236 237 @test fn strings() void = { 238 const in = `"a" "b" "\a" "\b" "\f" "\n" "\r" "\t" "\v" "\0" "\\" "\'"`; 239 const expected: [_]token = [ 240 (ltok::LIT_STR, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)), 241 ]; 242 lextest(in, expected); 243 const in = `"ab\a\b\f\n\r\t\v\0\\\'"`; 244 const expected: [_]token = [ 245 (ltok::LIT_STR, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)), 246 ]; 247 lextest(in, expected); 248 const in = `"hello world", "こんにちは", "return", "foo"`; 249 const expected: [_]token = [ 250 (ltok::LIT_STR, "hello world", loc(1, 1)), 251 (ltok::COMMA, void, loc(1, 14)), 252 (ltok::LIT_STR, "こんにちは", loc(1, 16)), 253 (ltok::COMMA, void, loc(1, 23)), 254 (ltok::LIT_STR, "return", loc(1, 25)), 255 (ltok::COMMA, void, loc(1, 33)), 256 (ltok::LIT_STR, "foo", loc(1, 35)), 257 ]; 258 lextest(in, expected); 259 const in = "\"foo\"\n" 260 "// bar\n" 261 "\"baz\""; 262 const expected: [_]token = [ 263 (ltok::LIT_STR, "foobaz", loc(1, 1)), 264 ]; 265 lextest(in, expected); 266 const in = `"\x7f" "\x1b" "\uabcd" "\U0010abcd"`; 267 const expected: [_]token = [ 268 (ltok::LIT_STR, "\x7f\x1b\uabcd\U0010abcd", loc(1, 1)), 269 ]; 270 lextest(in, expected); 271 }; 272 273 @test fn literals() void = { 274 const in = "1e5 -1i32 9223372036854775809 1e2z 255u8 0o42u16\n" 275 "0b1000101u32 0xDEADBEEFu64 -0b10i8 -5e0i16 -0o16i32\n" 276 "0b00000010000001100000011100001111000000100000011000000111i64\n" 277 "13.37 13.37f32 13.37f64 6.022e23 1.616255e-35f64 1e-1 0x1p-2"; 278 const expected: [_]token = [ 279 (ltok::LIT_ICONST, 1e5u64, loc(1, 1)), 280 (ltok::MINUS, void, loc(1, 5)), 281 (ltok::LIT_I32, 1u64, loc(1, 6)), 282 (ltok::LIT_ICONST, 9223372036854775809u64, loc(1, 11)), 283 (ltok::LIT_SIZE, 1e2u64, loc(1, 31)), 284 (ltok::LIT_U8, 255u64, loc(1, 36)), 285 (ltok::LIT_U16, 0o42u64, loc(1, 42)), 286 (ltok::LIT_U32, 0b1000101u64, loc(2, 1)), 287 (ltok::LIT_U64, 0xDEADBEEFu64, loc(2, 14)), 288 (ltok::MINUS, void, loc(2, 28)), 289 (ltok::LIT_I8, 0b10u64, loc(2, 29)), 290 (ltok::MINUS, void, loc(2, 36)), 291 (ltok::LIT_I16, 5e0u64, loc(2, 37)), 292 (ltok::MINUS, void, loc(2, 44)), 293 (ltok::LIT_I32, 0o16u64, loc(2, 45)), 294 (ltok::LIT_I64, 0b00000010000001100000011100001111000000100000011000000111u64, loc(3, 1)), 295 (ltok::LIT_FCONST, 13.37, loc(4, 1)), 296 (ltok::LIT_F32, 13.37, loc(4, 7)), 297 (ltok::LIT_F64, 13.37, loc(4, 16)), 298 (ltok::LIT_FCONST, 6.022e23, loc(4, 25)), 299 (ltok::LIT_F64, 1.616255e-35, loc(4, 34)), 300 (ltok::LIT_FCONST, 1e-1, loc(4, 50)), 301 (ltok::LIT_FCONST, 0x1p-2, loc(4, 55)), 302 ]; 303 lextest(in, expected); 304 }; 305 306 @test fn invalid() void = { 307 // Using \x80 within a string literal will cause this to output an 308 // empty string 309 let lexer = initbuf(['1', 0x80]); 310 const s = lex(&lexer) as error as syntax; 311 assert(s.1 == "Source file is not valid UTF-8"); 312 313 // Regression: invalid UTF-8 at the beginning of a token used to cause 314 // a crash in nextw 315 let lexer = initbuf([0x80]); 316 const s = lex(&lexer) as error as syntax; 317 assert(s.1 == "Source file is not valid UTF-8"); 318 319 // Regression: invalid escape sequences such as "\^" used to casue a 320 // crash 321 let lexer = initstr(`"\^"`); 322 const s = lex(&lexer) as error as syntax; 323 assert(s.1 == "unknown escape sequence"); 324 325 // Regression: <X>e followed by another token used to cause a crash 326 let lexer = initstr("0e)"); 327 const s = lex(&lexer) as error as syntax; 328 assert(s.1 == "expected exponent"); 329 }; 330 331 332 // Small virtual machine for testing mkloc/prevloc. 333 // NEXT, UNGET, LEX, and UNLEX call the obvious functions (with UNGET and UNLEX 334 // pulling from a buffer that NEXT/LEX feed into). 335 // After each instruction, the results of mkloc/prevloc are checked against the 336 // next element of the test vector. 337 type op = enum { 338 LEX, 339 NEXT, 340 UNGET, 341 UNLEX, 342 }; 343 344 @test fn loc() void = { 345 let lexer = initstr("h ello: my name is\nInigo Montoya."); 346 const ops: [_]op = [ 347 op::NEXT, 348 op::NEXT, 349 op::NEXT, 350 op::UNGET, 351 op::UNGET, 352 op::NEXT, 353 op::NEXT, 354 op::LEX, 355 op::LEX, 356 op::UNLEX, 357 op::LEX, 358 op::LEX, 359 op::UNLEX, 360 op::LEX, 361 op::LEX, 362 op::LEX, 363 op::LEX, 364 ]; 365 const vector: [_](location, location) = [ 366 (loc(1, 2), loc(1, 1)), 367 (loc(1, 3), loc(1, 2)), 368 (loc(1, 9), loc(1, 3)), 369 (loc(1, 3), loc(1, 2)), 370 (loc(1, 2), loc(1, 1)), 371 (loc(1, 3), loc(1, 2)), 372 (loc(1, 9), loc(1, 3)), 373 (loc(1, 13), loc(1, 12)), 374 (loc(1, 14), loc(1, 13)), 375 (loc(1, 13), loc(1, 12)), 376 (loc(1, 14), loc(1, 13)), 377 (loc(1, 17), loc(1, 16)), 378 (loc(1, 14), loc(1, 13)), 379 (loc(1, 17), loc(1, 16)), 380 (loc(1, 29), loc(1, 28)), 381 (loc(1, 32), loc(1, 31)), 382 (loc(2, 6), loc(2, 5)), 383 (loc(2, 14), loc(2, 13)), 384 ]; 385 386 // We could statically allocate r and t, but what's the point 387 let r: [](rune, location) = []; 388 defer free(r); 389 let t: []token = []; 390 defer free(t); 391 for (let i = 0z; i < len(ops); i += 1) { 392 switch (ops[i]) { 393 case op::LEX => 394 append(t, lex(&lexer)!); 395 case op::NEXT => 396 append(r, next(&lexer) as (rune, location)); 397 case op::UNGET => 398 unget(&lexer, r[len(r) - 1].0); 399 delete(r[len(r) - 1]); 400 case op::UNLEX => 401 unlex(&lexer, t[len(t) - 1]); 402 delete(t[len(t) - 1]); 403 }; 404 let loc = mkloc(&lexer); 405 let ploc = prevloc(&lexer); 406 assert(loc.path == vector[i].0.path 407 && loc.line == vector[i].0.line 408 && loc.col == vector[i].0.col); 409 assert(ploc.path == vector[i].1.path 410 && ploc.line == vector[i].1.line 411 && ploc.col == vector[i].1.col); 412 }; 413 }; 414 415 @test fn access_tuple() void = { 416 const in = "((0, 1), 2).0.1"; 417 const expected: []token = [ 418 (ltok::LPAREN, void, loc(1, 1)), 419 (ltok::LPAREN, void, loc(1, 2)), 420 (ltok::LIT_ICONST, 0, loc(1, 3)), 421 (ltok::COMMA, void, loc(1, 4)), 422 (ltok::LIT_ICONST, 1, loc(1, 6)), 423 (ltok::RPAREN, void, loc(1, 7)), 424 (ltok::COMMA, void, loc(1, 8)), 425 (ltok::LIT_ICONST, 2, loc(1, 10)), 426 (ltok::RPAREN, void, loc(1, 11)), 427 (ltok::DOT, void, loc(1, 12)), 428 (ltok::LIT_ICONST, 0, loc(1, 13)), 429 (ltok::DOT, void, loc(1, 14)), 430 (ltok::LIT_ICONST, 1, loc(1, 15)), 431 ]; 432 lextest(in, expected); 433 };