+test.ha (13010B)
1 // License: MPL-2.0 2 // (c) 2022 Alexey Yerin <yyp@disroot.org> 3 // (c) 2021 Armin Weigl <tb46305@gmail.com> 4 // (c) 2021 Bor Grošelj Simić <bor.groseljsimic@telemach.net> 5 // (c) 2021 Drew DeVault <sir@cmpwn.com> 6 // (c) 2021 Ember Sawady <ecs@d2evs.net> 7 // (c) 2021 Sudipto Mallick <smlckz@disroot.org> 8 use fmt; 9 use io; 10 use io::{mode}; 11 use memio; 12 use strings; 13 14 @test fn unget() void = { 15 let buf = memio::fixed(strings::toutf8("z")); 16 let lexer = init(&buf, "<test>"); 17 unget(&lexer, ('x', location { path = "<test>", line = 1, col = 2 })); 18 unget(&lexer, ('y', location { path = "<test>", line = 1, col = 3 })); 19 let r = next(&lexer) as (rune, location); 20 assert(r.0 == 'y'); 21 assert(r.1.path == "<test>" && r.1.line == 1 && r.1.col == 3); 22 r = next(&lexer) as (rune, location); 23 assert(r.0 == 'x'); 24 assert(r.1.path == "<test>" && r.1.line == 1 && r.1.col == 2); 25 r = next(&lexer) as (rune, location); 26 assert(r.0 == 'z'); 27 assert(r.1.path == "<test>" && r.1.line == 1 && r.1.col == 1); 28 assert(next(&lexer) is io::EOF); 29 unget(&lexer, io::EOF); 30 assert(next(&lexer) is io::EOF); 31 }; 32 33 @test fn unlex() void = { 34 let lexer = init(io::empty, "<test>"); 35 unlex(&lexer, (ltok::IF, void, location { 36 path = "<test>", 37 line = 1234, 38 col = 1234, 39 })); 40 let t = lex(&lexer) as token; 41 assert(t.0 == ltok::IF); 42 assert(t.1 is void); 43 assert(t.2.path == "<test>"); 44 assert(t.2.line == 1234 && t.2.col == 1234); 45 }; 46 47 fn vassert(expected: value, actual: value) void = { 48 match (expected) { 49 case let expected: str => 50 assert(actual as str == expected); 51 case let expected: rune => 52 assert(actual as rune == expected); 53 case let expected: u64 => 54 assert(actual as u64 == expected); 55 case let expected: f64 => 56 assert(actual as f64 == expected); 57 case void => 58 assert(actual is void); 59 }; 60 }; 61 62 fn lextest(in: str, expected: []token) void = { 63 let buf = memio::fixed(strings::toutf8(in)); 64 let lexer = init(&buf, "<test>"); 65 for (let i = 0z; i < len(expected); i += 1) { 66 let etok = expected[i]; 67 let tl = match (lex(&lexer)) { 68 case let tl: token => 69 yield tl; 70 case let err: error => 71 fmt::errorfln("{}: {}", i, strerror(err))!; 72 abort(); 73 }; 74 if (tl.0 != etok.0) { 75 fmt::errorfln("Expected {}, got {}", 76 tokstr(etok), tokstr(tl))!; 77 }; 78 assert(tl.0 == etok.0); 79 vassert(tl.1, etok.1); 80 if (tl.2.line != etok.2.line || tl.2.col != etok.2.col 81 || tl.2.path != etok.2.path) { 82 fmt::errorfln("{}:{}:{} != {}:{}:{}", 83 tl.2.path, tl.2.line, tl.2.col, 84 etok.2.path, etok.2.line, etok.2.col)!; 85 abort(); 86 }; 87 }; 88 let t = lex(&lexer) as token; 89 assert(t.0 == ltok::EOF); 90 }; 91 92 fn loc(line: uint, col: uint) location = location { 93 path = "<test>", 94 line = line, 95 col = col, 96 }; 97 98 @test fn lex1() void = { 99 const in = "~,{[(}]);"; 100 const expected: [_]token = [ 101 (ltok::BNOT, void, loc(1, 1)), 102 (ltok::COMMA, void, loc(1, 2)), 103 (ltok::LBRACE, void, loc(1, 3)), 104 (ltok::LBRACKET, void, loc(1, 4)), 105 (ltok::LPAREN, void, loc(1, 5)), 106 (ltok::RBRACE, void, loc(1, 6)), 107 (ltok::RBRACKET, void, loc(1, 7)), 108 (ltok::RPAREN, void, loc(1, 8)), 109 (ltok::SEMICOLON, void, loc(1, 9)), 110 ]; 111 lextest(in, expected); 112 }; 113 114 @test fn lex2() void = { 115 // Ends with = to test =, EOF 116 const in = "* *= % %= + += - -= : :: = == / /= ="; 117 const expected: [_]token = [ 118 (ltok::TIMES, void, loc(1, 1)), 119 (ltok::TIMESEQ, void, loc(1, 3)), 120 (ltok::MODULO, void, loc(1, 6)), 121 (ltok::MODEQ, void, loc(1, 8)), 122 (ltok::PLUS, void, loc(1, 11)), 123 (ltok::PLUSEQ, void, loc(1, 13)), 124 (ltok::MINUS, void, loc(1, 16)), 125 (ltok::MINUSEQ, void, loc(1, 18)), 126 (ltok::COLON, void, loc(1, 21)), 127 (ltok::DOUBLE_COLON, void, loc(1, 23)), 128 (ltok::EQUAL, void, loc(1, 26)), 129 (ltok::LEQUAL, void, loc(1, 28)), 130 (ltok::DIV, void, loc(1, 31)), 131 (ltok::DIVEQ, void, loc(1, 33)), 132 (ltok::EQUAL, void, loc(1, 36)), 133 ]; 134 lextest(in, expected); 135 }; 136 137 @test fn lex3() void = { 138 const in = ". .. ... < << <= <<= > >> >= >>= >>"; 139 const expected: [_]token = [ 140 (ltok::DOT, void, loc(1, 1)), 141 (ltok::SLICE, void, loc(1, 3)), 142 (ltok::ELLIPSIS, void, loc(1, 6)), 143 (ltok::LESS, void, loc(1, 10)), 144 (ltok::LSHIFT, void, loc(1, 12)), 145 (ltok::LESSEQ, void, loc(1, 15)), 146 (ltok::LSHIFTEQ, void, loc(1, 18)), 147 (ltok::GT, void, loc(1, 22)), 148 (ltok::RSHIFT, void, loc(1, 24)), 149 (ltok::GTEQ, void, loc(1, 27)), 150 (ltok::RSHIFTEQ, void, loc(1, 30)), 151 (ltok::RSHIFT, void, loc(1, 34)), 152 ]; 153 lextest(in, expected); 154 155 const in = "& && &= &&= | || |= ||= ^ ^^ ^= ^^= ^"; 156 const expected: [_]token = [ 157 (ltok::BAND, void, loc(1, 1)), 158 (ltok::LAND, void, loc(1, 3)), 159 (ltok::BANDEQ, void, loc(1, 6)), 160 (ltok::LANDEQ, void, loc(1, 9)), 161 (ltok::BOR, void, loc(1, 13)), 162 (ltok::LOR, void, loc(1, 15)), 163 (ltok::BOREQ, void, loc(1, 18)), 164 (ltok::LOREQ, void, loc(1, 21)), 165 (ltok::BXOR, void, loc(1, 25)), 166 (ltok::LXOR, void, loc(1, 27)), 167 (ltok::BXOREQ, void, loc(1, 30)), 168 (ltok::LXOREQ, void, loc(1, 33)), 169 (ltok::BXOR, void, loc(1, 37)), 170 ]; 171 lextest(in, expected); 172 }; 173 174 @test fn lexname() void = { 175 const in = "hello world return void foobar :foobaz"; 176 const expected: [_]token = [ 177 (ltok::NAME, "hello", loc(1, 1)), 178 (ltok::NAME, "world", loc(1, 7)), 179 (ltok::RETURN, void, loc(1, 13)), 180 (ltok::VOID, void, loc(1, 20)), 181 (ltok::NAME, "foobar", loc(1, 25)), 182 (ltok::COLON, void, loc(1, 32)), 183 (ltok::NAME, "foobaz", loc(1, 33)), 184 ]; 185 lextest(in, expected); 186 }; 187 188 @test fn keywords() void = { 189 let keywords = bmap[..ltok::LAST_KEYWORD+1]; 190 for (let i = 0z; i < len(keywords); i += 1) { 191 let buf = memio::fixed(strings::toutf8(keywords[i])); 192 let lexer = init(&buf, "<test>"); 193 let tok = lex(&lexer) as token; 194 assert(tok.0 == i: ltok); 195 }; 196 }; 197 198 @test fn comments() void = { 199 const in = "hello world // foo\nbar"; 200 const expected: [_]token = [ 201 (ltok::NAME, "hello", loc(1, 1)), 202 (ltok::NAME, "world", loc(1, 7)), 203 (ltok::NAME, "bar", loc(2, 1)), 204 ]; 205 lextest(in, expected); 206 207 let in = "// foo\n// bar\nhello world// baz\n\n// bad\ntest"; 208 let buf = memio::fixed(strings::toutf8(in)); 209 let lexer = init(&buf, "<input>", flag::COMMENTS); 210 assert(lex(&lexer) is token); 211 assert(comment(&lexer) == " foo\n bar\n"); 212 assert(lex(&lexer) is token); 213 assert(comment(&lexer) == " baz\n"); 214 assert(lex(&lexer) is token); 215 assert(comment(&lexer) == " bad\n"); 216 }; 217 218 @test fn runes() void = { 219 const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' " 220 "'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U12345678'"; 221 const expected: [_]token = [ 222 (ltok::LIT_RUNE, 'a', loc(1, 1)), 223 (ltok::LIT_RUNE, 'b', loc(1, 5)), 224 (ltok::LIT_RUNE, '\a', loc(1, 9)), 225 (ltok::LIT_RUNE, '\b', loc(1, 14)), 226 (ltok::LIT_RUNE, '\f', loc(1, 19)), 227 (ltok::LIT_RUNE, '\n', loc(1, 24)), 228 (ltok::LIT_RUNE, '\r', loc(1, 29)), 229 (ltok::LIT_RUNE, '\t', loc(1, 34)), 230 (ltok::LIT_RUNE, '\v', loc(1, 39)), 231 (ltok::LIT_RUNE, '\0', loc(1, 44)), 232 (ltok::LIT_RUNE, '\\', loc(1, 49)), 233 (ltok::LIT_RUNE, '\'', loc(1, 54)), 234 (ltok::LIT_RUNE, '\x0A', loc(1, 59)), 235 (ltok::LIT_RUNE, '\u1234', loc(1, 66)), 236 (ltok::LIT_RUNE, '\U12345678', loc(1, 75)), 237 ]; 238 lextest(in, expected); 239 }; 240 241 @test fn strings() void = { 242 const in = `"a" "b" "\a" "\b" "\f" "\n" "\r" "\t" "\v" "\0" "\\" "\'"`; 243 const expected: [_]token = [ 244 (ltok::LIT_STR, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)), 245 ]; 246 // TODO: test \x and \u and \U 247 lextest(in, expected); 248 const in = `"ab\a\b\f\n\r\t\v\0\\\'"`; 249 const expected: [_]token = [ 250 (ltok::LIT_STR, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)), 251 ]; 252 lextest(in, expected); 253 const in = `"hello world", "こんにちは", "return", "foo"`; 254 const expected: [_]token = [ 255 (ltok::LIT_STR, "hello world", loc(1, 1)), 256 (ltok::COMMA, void, loc(1, 14)), 257 (ltok::LIT_STR, "こんにちは", loc(1, 16)), 258 (ltok::COMMA, void, loc(1, 23)), 259 (ltok::LIT_STR, "return", loc(1, 25)), 260 (ltok::COMMA, void, loc(1, 33)), 261 (ltok::LIT_STR, "foo", loc(1, 35)), 262 ]; 263 lextest(in, expected); 264 const in = "\"foo\"\n" 265 "// bar\n" 266 "\"baz\""; 267 const expected: [_]token = [ 268 (ltok::LIT_STR, "foobaz", loc(1, 1)), 269 ]; 270 lextest(in, expected); 271 }; 272 273 @test fn literals() void = { 274 const in = "1e5 -1i32 9223372036854775809 1e2z 255u8 0o42u16\n" 275 "0b1000101u32 0xDEADBEEFu64 -0b10i8 -5e0i16 -0o16i32\n" 276 "0b00000010000001100000011100001111000000100000011000000111i64\n" 277 "13.37 13.37f32 13.37f64 6.022e23 1.616255e-35f64 1e-1"; 278 const expected: [_]token = [ 279 (ltok::LIT_ICONST, 1e5u64, loc(1, 1)), 280 (ltok::MINUS, void, loc(1, 5)), 281 (ltok::LIT_I32, 1u64, loc(1, 6)), 282 (ltok::LIT_ICONST, 9223372036854775809u64, loc(1, 11)), 283 (ltok::LIT_SIZE, 1e2u64, loc(1, 31)), 284 (ltok::LIT_U8, 255u64, loc(1, 36)), 285 (ltok::LIT_U16, 0o42u64, loc(1, 42)), 286 (ltok::LIT_U32, 0b1000101u64, loc(2, 1)), 287 (ltok::LIT_U64, 0xDEADBEEFu64, loc(2, 14)), 288 (ltok::MINUS, void, loc(2, 28)), 289 (ltok::LIT_I8, 0b10u64, loc(2, 29)), 290 (ltok::MINUS, void, loc(2, 36)), 291 (ltok::LIT_I16, 5e0u64, loc(2, 37)), 292 (ltok::MINUS, void, loc(2, 44)), 293 (ltok::LIT_I32, 0o16u64, loc(2, 45)), 294 (ltok::LIT_I64, 0b00000010000001100000011100001111000000100000011000000111u64, loc(3, 1)), 295 (ltok::LIT_FCONST, 13.37, loc(4, 1)), 296 (ltok::LIT_F32, 13.37, loc(4, 7)), 297 (ltok::LIT_F64, 13.37, loc(4, 16)), 298 (ltok::LIT_FCONST, 6.022e23, loc(4, 25)), 299 (ltok::LIT_F64, 1.616255e-35, loc(4, 34)), 300 (ltok::LIT_FCONST, 1e-1, loc(4, 50)), 301 ]; 302 lextest(in, expected); 303 }; 304 305 @test fn invalid() void = { 306 // Using \x80 within a string literal will cause this to output an 307 // empty string 308 const in = ['1': u8, 0x80]; 309 310 let buf = memio::fixed(in); 311 let lexer = init(&buf, "<test>"); 312 313 const s = lex(&lexer) as error as syntax; 314 assert(s.1 == "Source file is not valid UTF-8"); 315 316 // Regression: invalid UTF-8 at the beginning of a token used to cause 317 // a crash in nextw 318 const in = [0x80: u8]; 319 320 let buf = memio::fixed(in); 321 let lexer = init(&buf, "<test>"); 322 323 const s = lex(&lexer) as error as syntax; 324 assert(s.1 == "Source file is not valid UTF-8"); 325 326 // Regression: invalid escape sequences such as "\^" used to casue a 327 // crash 328 const in = ['"': u8, '\\': u8, '^': u8, '"': u8]; 329 330 let buf = memio::fixed(in); 331 let lexer = init(&buf, "<test>"); 332 333 const s = lex(&lexer) as error as syntax; 334 assert(s.1 == "unknown escape sequence"); 335 336 // Regression: <X>e followed by another token used to cause a crash 337 const in = ['0': u8, 'e': u8, ')': u8]; 338 339 let buf = memio::fixed(in); 340 let lexer = init(&buf, "<test>"); 341 342 const s = lex(&lexer) as error as syntax; 343 assert(s.1 == "expected exponent"); 344 }; 345 346 347 // Small virtual machine for testing mkloc/prevloc. 348 // NEXT, UNGET, LEX, and UNLEX call the obvious functions (with UNGET and UNLEX 349 // pulling from a buffer that NEXT/LEX feed into). 350 // After each instruction, the results of mkloc/prevloc are checked against the 351 // next element of the test vector. 352 type op = enum { 353 LEX, 354 NEXT, 355 UNGET, 356 UNLEX, 357 }; 358 359 @test fn loc() void = { 360 const src = "h ello: my name is Inigo Montoya"; 361 let buf = memio::fixed(strings::toutf8(src)); 362 let lexer = init(&buf, "<test>"); 363 const ops: [_]op = [ 364 op::NEXT, 365 op::NEXT, 366 op::NEXT, 367 op::UNGET, 368 op::UNGET, 369 op::NEXT, 370 op::NEXT, 371 op::LEX, 372 op::LEX, 373 op::UNLEX, 374 op::LEX, 375 op::LEX, 376 op::UNLEX, 377 op::LEX, 378 op::LEX, 379 op::LEX, 380 op::LEX, 381 ]; 382 const vector: [_](location, location) = [ 383 (loc(1, 2), loc(1, 1)), 384 (loc(1, 3), loc(1, 2)), 385 (loc(1, 9), loc(1, 3)), 386 (loc(1, 3), loc(1, 2)), 387 (loc(1, 2), loc(1, 1)), 388 (loc(1, 3), loc(1, 2)), 389 (loc(1, 9), loc(1, 3)), 390 (loc(1, 13), loc(1, 12)), 391 (loc(1, 14), loc(1, 13)), 392 (loc(1, 13), loc(1, 12)), 393 (loc(1, 14), loc(1, 13)), 394 (loc(1, 17), loc(1, 16)), 395 (loc(1, 14), loc(1, 13)), 396 (loc(1, 17), loc(1, 16)), 397 (loc(1, 29), loc(1, 28)), 398 (loc(1, 32), loc(1, 31)), 399 (loc(1, 38), loc(1, 37)), 400 ]; 401 402 // We could statically allocate r and t, but what's the point 403 let r: [](rune, location) = []; 404 defer free(r); 405 let t: []token = []; 406 defer free(t); 407 for (let i = 0z; i < len(ops); i += 1) { 408 switch (ops[i]) { 409 case op::LEX => 410 append(t, lex(&lexer)!); 411 case op::NEXT => 412 append(r, next(&lexer) as (rune, location)); 413 case op::UNGET => 414 unget(&lexer, r[len(r) - 1]); 415 delete(r[len(r) - 1]); 416 case op::UNLEX => 417 unlex(&lexer, t[len(t) - 1]); 418 delete(t[len(t) - 1]); 419 }; 420 let loc = mkloc(&lexer); 421 let ploc = prevloc(&lexer); 422 // TODO: Aggregate equality 423 assert(loc.path == vector[i].0.path 424 && loc.line == vector[i].0.line 425 && loc.col == vector[i].0.col); 426 assert(ploc.path == vector[i].1.path 427 && ploc.line == vector[i].1.line 428 && ploc.col == vector[i].1.col); 429 }; 430 }; 431 432 @test fn access_tuple() void = { 433 const in = "((0, 1), 2).0.1"; 434 const expected: []token = [ 435 (ltok::LPAREN, void, loc(1, 1)), 436 (ltok::LPAREN, void, loc(1, 2)), 437 (ltok::LIT_ICONST, 0, loc(1, 3)), 438 (ltok::COMMA, void, loc(1, 4)), 439 (ltok::LIT_ICONST, 1, loc(1, 6)), 440 (ltok::RPAREN, void, loc(1, 7)), 441 (ltok::COMMA, void, loc(1, 8)), 442 (ltok::LIT_ICONST, 2, loc(1, 10)), 443 (ltok::RPAREN, void, loc(1, 11)), 444 (ltok::DOT, void, loc(1, 12)), 445 (ltok::LIT_ICONST, 0, loc(1, 13)), 446 (ltok::DOT, void, loc(1, 14)), 447 (ltok::LIT_ICONST, 1, loc(1, 15)), 448 ]; 449 lextest(in, expected); 450 };