hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

+test.ha (13010B)


      1 // License: MPL-2.0
      2 // (c) 2022 Alexey Yerin <yyp@disroot.org>
      3 // (c) 2021 Armin Weigl <tb46305@gmail.com>
      4 // (c) 2021 Bor Grošelj Simić <bor.groseljsimic@telemach.net>
      5 // (c) 2021 Drew DeVault <sir@cmpwn.com>
      6 // (c) 2021 Ember Sawady <ecs@d2evs.net>
      7 // (c) 2021 Sudipto Mallick <smlckz@disroot.org>
      8 use fmt;
      9 use io;
     10 use io::{mode};
     11 use memio;
     12 use strings;
     13 
     14 @test fn unget() void = {
     15 	let buf = memio::fixed(strings::toutf8("z"));
     16 	let lexer = init(&buf, "<test>");
     17 	unget(&lexer, ('x', location { path = "<test>", line = 1, col = 2 }));
     18 	unget(&lexer, ('y', location { path = "<test>", line = 1, col = 3 }));
     19 	let r = next(&lexer) as (rune, location);
     20 	assert(r.0 == 'y');
     21 	assert(r.1.path == "<test>" && r.1.line == 1 && r.1.col == 3);
     22 	r = next(&lexer) as (rune, location);
     23 	assert(r.0 == 'x');
     24 	assert(r.1.path == "<test>" && r.1.line == 1 && r.1.col == 2);
     25 	r = next(&lexer) as (rune, location);
     26 	assert(r.0 == 'z');
     27 	assert(r.1.path == "<test>" && r.1.line == 1 && r.1.col == 1);
     28 	assert(next(&lexer) is io::EOF);
     29 	unget(&lexer, io::EOF);
     30 	assert(next(&lexer) is io::EOF);
     31 };
     32 
     33 @test fn unlex() void = {
     34 	let lexer = init(io::empty, "<test>");
     35 	unlex(&lexer, (ltok::IF, void, location {
     36 		path = "<test>",
     37 		line = 1234,
     38 		col = 1234,
     39 	}));
     40 	let t = lex(&lexer) as token;
     41 	assert(t.0 == ltok::IF);
     42 	assert(t.1 is void);
     43 	assert(t.2.path == "<test>");
     44 	assert(t.2.line == 1234 && t.2.col == 1234);
     45 };
     46 
     47 fn vassert(expected: value, actual: value) void = {
     48 	match (expected) {
     49 	case let expected: str =>
     50 		assert(actual as str == expected);
     51 	case let expected: rune =>
     52 		assert(actual as rune == expected);
     53 	case let expected: u64 =>
     54 		assert(actual as u64 == expected);
     55 	case let expected: f64 =>
     56 		assert(actual as f64 == expected);
     57 	case void =>
     58 		assert(actual is void);
     59 	};
     60 };
     61 
     62 fn lextest(in: str, expected: []token) void = {
     63 	let buf = memio::fixed(strings::toutf8(in));
     64 	let lexer = init(&buf, "<test>");
     65 	for (let i = 0z; i < len(expected); i += 1) {
     66 		let etok = expected[i];
     67 		let tl = match (lex(&lexer)) {
     68 		case let tl: token =>
     69 			yield tl;
     70 		case let err: error =>
     71 			fmt::errorfln("{}: {}", i, strerror(err))!;
     72 			abort();
     73 		};
     74 		if (tl.0 != etok.0) {
     75 			fmt::errorfln("Expected {}, got {}",
     76 				tokstr(etok), tokstr(tl))!;
     77 		};
     78 		assert(tl.0 == etok.0);
     79 		vassert(tl.1, etok.1);
     80 		if (tl.2.line != etok.2.line || tl.2.col != etok.2.col
     81 				|| tl.2.path != etok.2.path) {
     82 			fmt::errorfln("{}:{}:{} != {}:{}:{}",
     83 				tl.2.path, tl.2.line, tl.2.col,
     84 				etok.2.path, etok.2.line, etok.2.col)!;
     85 			abort();
     86 		};
     87 	};
     88 	let t = lex(&lexer) as token;
     89 	assert(t.0 == ltok::EOF);
     90 };
     91 
     92 fn loc(line: uint, col: uint) location = location {
     93 	path = "<test>",
     94 	line = line,
     95 	col = col,
     96 };
     97 
     98 @test fn lex1() void = {
     99 	const in = "~,{[(}]);";
    100 	const expected: [_]token = [
    101 		(ltok::BNOT, void, loc(1, 1)),
    102 		(ltok::COMMA, void, loc(1, 2)),
    103 		(ltok::LBRACE, void, loc(1, 3)),
    104 		(ltok::LBRACKET, void, loc(1, 4)),
    105 		(ltok::LPAREN, void, loc(1, 5)),
    106 		(ltok::RBRACE, void, loc(1, 6)),
    107 		(ltok::RBRACKET, void, loc(1, 7)),
    108 		(ltok::RPAREN, void, loc(1, 8)),
    109 		(ltok::SEMICOLON, void, loc(1, 9)),
    110 	];
    111 	lextest(in, expected);
    112 };
    113 
    114 @test fn lex2() void = {
    115 	// Ends with = to test =, EOF
    116 	const in = "* *= % %= + += - -= : :: = == / /= =";
    117 	const expected: [_]token = [
    118 		(ltok::TIMES, void, loc(1, 1)),
    119 		(ltok::TIMESEQ, void, loc(1, 3)),
    120 		(ltok::MODULO, void, loc(1, 6)),
    121 		(ltok::MODEQ, void, loc(1, 8)),
    122 		(ltok::PLUS, void, loc(1, 11)),
    123 		(ltok::PLUSEQ, void, loc(1, 13)),
    124 		(ltok::MINUS, void, loc(1, 16)),
    125 		(ltok::MINUSEQ, void, loc(1, 18)),
    126 		(ltok::COLON, void, loc(1, 21)),
    127 		(ltok::DOUBLE_COLON, void, loc(1, 23)),
    128 		(ltok::EQUAL, void, loc(1, 26)),
    129 		(ltok::LEQUAL, void, loc(1, 28)),
    130 		(ltok::DIV, void, loc(1, 31)),
    131 		(ltok::DIVEQ, void, loc(1, 33)),
    132 		(ltok::EQUAL, void, loc(1, 36)),
    133 	];
    134 	lextest(in, expected);
    135 };
    136 
    137 @test fn lex3() void = {
    138 	const in = ". .. ... < << <= <<= > >> >= >>= >>";
    139 	const expected: [_]token = [
    140 		(ltok::DOT, void, loc(1, 1)),
    141 		(ltok::SLICE, void, loc(1, 3)),
    142 		(ltok::ELLIPSIS, void, loc(1, 6)),
    143 		(ltok::LESS, void, loc(1, 10)),
    144 		(ltok::LSHIFT, void, loc(1, 12)),
    145 		(ltok::LESSEQ, void, loc(1, 15)),
    146 		(ltok::LSHIFTEQ, void, loc(1, 18)),
    147 		(ltok::GT, void, loc(1, 22)),
    148 		(ltok::RSHIFT, void, loc(1, 24)),
    149 		(ltok::GTEQ, void, loc(1, 27)),
    150 		(ltok::RSHIFTEQ, void, loc(1, 30)),
    151 		(ltok::RSHIFT, void, loc(1, 34)),
    152 	];
    153 	lextest(in, expected);
    154 
    155 	const in = "& && &= &&= | || |= ||= ^ ^^ ^= ^^= ^";
    156 	const expected: [_]token = [
    157 		(ltok::BAND, void, loc(1, 1)),
    158 		(ltok::LAND, void, loc(1, 3)),
    159 		(ltok::BANDEQ, void, loc(1, 6)),
    160 		(ltok::LANDEQ, void, loc(1, 9)),
    161 		(ltok::BOR, void, loc(1, 13)),
    162 		(ltok::LOR, void, loc(1, 15)),
    163 		(ltok::BOREQ, void, loc(1, 18)),
    164 		(ltok::LOREQ, void, loc(1, 21)),
    165 		(ltok::BXOR, void, loc(1, 25)),
    166 		(ltok::LXOR, void, loc(1, 27)),
    167 		(ltok::BXOREQ, void, loc(1, 30)),
    168 		(ltok::LXOREQ, void, loc(1, 33)),
    169 		(ltok::BXOR, void, loc(1, 37)),
    170 	];
    171 	lextest(in, expected);
    172 };
    173 
    174 @test fn lexname() void = {
    175 	const in = "hello world return void foobar :foobaz";
    176 	const expected: [_]token = [
    177 		(ltok::NAME, "hello", loc(1, 1)),
    178 		(ltok::NAME, "world", loc(1, 7)),
    179 		(ltok::RETURN, void, loc(1, 13)),
    180 		(ltok::VOID, void, loc(1, 20)),
    181 		(ltok::NAME, "foobar", loc(1, 25)),
    182 		(ltok::COLON, void, loc(1, 32)),
    183 		(ltok::NAME, "foobaz", loc(1, 33)),
    184 	];
    185 	lextest(in, expected);
    186 };
    187 
    188 @test fn keywords() void = {
    189 	let keywords = bmap[..ltok::LAST_KEYWORD+1];
    190 	for (let i = 0z; i < len(keywords); i += 1) {
    191 		let buf = memio::fixed(strings::toutf8(keywords[i]));
    192 		let lexer = init(&buf, "<test>");
    193 		let tok = lex(&lexer) as token;
    194 		assert(tok.0 == i: ltok);
    195 	};
    196 };
    197 
    198 @test fn comments() void = {
    199 	const in = "hello world // foo\nbar";
    200 	const expected: [_]token = [
    201 		(ltok::NAME, "hello", loc(1, 1)),
    202 		(ltok::NAME, "world", loc(1, 7)),
    203 		(ltok::NAME, "bar", loc(2, 1)),
    204 	];
    205 	lextest(in, expected);
    206 
    207 	let in = "// foo\n// bar\nhello world// baz\n\n// bad\ntest";
    208 	let buf = memio::fixed(strings::toutf8(in));
    209 	let lexer = init(&buf, "<input>", flag::COMMENTS);
    210 	assert(lex(&lexer) is token);
    211 	assert(comment(&lexer) == " foo\n bar\n");
    212 	assert(lex(&lexer) is token);
    213 	assert(comment(&lexer) == " baz\n");
    214 	assert(lex(&lexer) is token);
    215 	assert(comment(&lexer) == " bad\n");
    216 };
    217 
    218 @test fn runes() void = {
    219 	const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' "
    220 		"'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U12345678'";
    221 	const expected: [_]token = [
    222 		(ltok::LIT_RUNE, 'a', loc(1, 1)),
    223 		(ltok::LIT_RUNE, 'b', loc(1, 5)),
    224 		(ltok::LIT_RUNE, '\a', loc(1, 9)),
    225 		(ltok::LIT_RUNE, '\b', loc(1, 14)),
    226 		(ltok::LIT_RUNE, '\f', loc(1, 19)),
    227 		(ltok::LIT_RUNE, '\n', loc(1, 24)),
    228 		(ltok::LIT_RUNE, '\r', loc(1, 29)),
    229 		(ltok::LIT_RUNE, '\t', loc(1, 34)),
    230 		(ltok::LIT_RUNE, '\v', loc(1, 39)),
    231 		(ltok::LIT_RUNE, '\0', loc(1, 44)),
    232 		(ltok::LIT_RUNE, '\\', loc(1, 49)),
    233 		(ltok::LIT_RUNE, '\'', loc(1, 54)),
    234 		(ltok::LIT_RUNE, '\x0A', loc(1, 59)),
    235 		(ltok::LIT_RUNE, '\u1234', loc(1, 66)),
    236 		(ltok::LIT_RUNE, '\U12345678', loc(1, 75)),
    237 	];
    238 	lextest(in, expected);
    239 };
    240 
    241 @test fn strings() void = {
    242 	const in = `"a" "b" "\a" "\b" "\f" "\n" "\r" "\t" "\v" "\0" "\\" "\'"`;
    243 	const expected: [_]token = [
    244 		(ltok::LIT_STR, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)),
    245 	];
    246 	// TODO: test \x and \u and \U
    247 	lextest(in, expected);
    248 	const in = `"ab\a\b\f\n\r\t\v\0\\\'"`;
    249 	const expected: [_]token = [
    250 		(ltok::LIT_STR, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)),
    251 	];
    252 	lextest(in, expected);
    253 	const in = `"hello world", "こんにちは", "return", "foo"`;
    254 	const expected: [_]token = [
    255 		(ltok::LIT_STR, "hello world", loc(1, 1)),
    256 		(ltok::COMMA, void, loc(1, 14)),
    257 		(ltok::LIT_STR, "こんにちは", loc(1, 16)),
    258 		(ltok::COMMA, void, loc(1, 23)),
    259 		(ltok::LIT_STR, "return", loc(1, 25)),
    260 		(ltok::COMMA, void, loc(1, 33)),
    261 		(ltok::LIT_STR, "foo", loc(1, 35)),
    262 	];
    263 	lextest(in, expected);
    264 	const in = "\"foo\"\n"
    265 		"// bar\n"
    266 		"\"baz\"";
    267 	const expected: [_]token = [
    268 		(ltok::LIT_STR, "foobaz", loc(1, 1)),
    269 	];
    270 	lextest(in, expected);
    271 };
    272 
    273 @test fn literals() void = {
    274 	const in = "1e5 -1i32 9223372036854775809 1e2z 255u8 0o42u16\n"
    275 		"0b1000101u32 0xDEADBEEFu64 -0b10i8 -5e0i16 -0o16i32\n"
    276 		"0b00000010000001100000011100001111000000100000011000000111i64\n"
    277 		"13.37 13.37f32 13.37f64 6.022e23 1.616255e-35f64 1e-1";
    278 	const expected: [_]token = [
    279 		(ltok::LIT_ICONST, 1e5u64, loc(1, 1)),
    280 		(ltok::MINUS, void, loc(1, 5)),
    281 		(ltok::LIT_I32, 1u64, loc(1, 6)),
    282 		(ltok::LIT_ICONST, 9223372036854775809u64, loc(1, 11)),
    283 		(ltok::LIT_SIZE, 1e2u64, loc(1, 31)),
    284 		(ltok::LIT_U8, 255u64, loc(1, 36)),
    285 		(ltok::LIT_U16, 0o42u64, loc(1, 42)),
    286 		(ltok::LIT_U32, 0b1000101u64, loc(2, 1)),
    287 		(ltok::LIT_U64, 0xDEADBEEFu64, loc(2, 14)),
    288 		(ltok::MINUS, void, loc(2, 28)),
    289 		(ltok::LIT_I8, 0b10u64, loc(2, 29)),
    290 		(ltok::MINUS, void, loc(2, 36)),
    291 		(ltok::LIT_I16, 5e0u64, loc(2, 37)),
    292 		(ltok::MINUS, void, loc(2, 44)),
    293 		(ltok::LIT_I32, 0o16u64, loc(2, 45)),
    294 		(ltok::LIT_I64, 0b00000010000001100000011100001111000000100000011000000111u64, loc(3, 1)),
    295 		(ltok::LIT_FCONST, 13.37, loc(4, 1)),
    296 		(ltok::LIT_F32, 13.37, loc(4, 7)),
    297 		(ltok::LIT_F64, 13.37, loc(4, 16)),
    298 		(ltok::LIT_FCONST, 6.022e23, loc(4, 25)),
    299 		(ltok::LIT_F64, 1.616255e-35, loc(4, 34)),
    300 		(ltok::LIT_FCONST, 1e-1, loc(4, 50)),
    301 	];
    302 	lextest(in, expected);
    303 };
    304 
    305 @test fn invalid() void = {
    306 	// Using \x80 within a string literal will cause this to output an
    307 	// empty string
    308 	const in = ['1': u8, 0x80];
    309 
    310 	let buf = memio::fixed(in);
    311 	let lexer = init(&buf, "<test>");
    312 
    313 	const s = lex(&lexer) as error as syntax;
    314 	assert(s.1 == "Source file is not valid UTF-8");
    315 
    316 	// Regression: invalid UTF-8 at the beginning of a token used to cause
    317 	// a crash in nextw
    318 	const in = [0x80: u8];
    319 
    320 	let buf = memio::fixed(in);
    321 	let lexer = init(&buf, "<test>");
    322 
    323 	const s = lex(&lexer) as error as syntax;
    324 	assert(s.1 == "Source file is not valid UTF-8");
    325 
    326 	// Regression: invalid escape sequences such as "\^" used to casue a
    327 	// crash
    328 	const in = ['"': u8, '\\': u8, '^': u8, '"': u8];
    329 
    330 	let buf = memio::fixed(in);
    331 	let lexer = init(&buf, "<test>");
    332 
    333 	const s = lex(&lexer) as error as syntax;
    334 	assert(s.1 == "unknown escape sequence");
    335 
    336 	// Regression: <X>e followed by another token used to cause a crash
    337 	const in = ['0': u8, 'e': u8, ')': u8];
    338 
    339 	let buf = memio::fixed(in);
    340 	let lexer = init(&buf, "<test>");
    341 
    342 	const s = lex(&lexer) as error as syntax;
    343 	assert(s.1 == "expected exponent");
    344 };
    345 
    346 
    347 // Small virtual machine for testing mkloc/prevloc.
    348 // NEXT, UNGET, LEX, and UNLEX call the obvious functions (with UNGET and UNLEX
    349 // pulling from a buffer that NEXT/LEX feed into).
    350 // After each instruction, the results of mkloc/prevloc are checked against the
    351 // next element of the test vector.
    352 type op = enum {
    353 	LEX,
    354 	NEXT,
    355 	UNGET,
    356 	UNLEX,
    357 };
    358 
    359 @test fn loc() void = {
    360 	const src = "h 	ello: my	name is Inigo Montoya";
    361 	let buf = memio::fixed(strings::toutf8(src));
    362 	let lexer = init(&buf, "<test>");
    363 	const ops: [_]op = [
    364 		op::NEXT,
    365 		op::NEXT,
    366 		op::NEXT,
    367 		op::UNGET,
    368 		op::UNGET,
    369 		op::NEXT,
    370 		op::NEXT,
    371 		op::LEX,
    372 		op::LEX,
    373 		op::UNLEX,
    374 		op::LEX,
    375 		op::LEX,
    376 		op::UNLEX,
    377 		op::LEX,
    378 		op::LEX,
    379 		op::LEX,
    380 		op::LEX,
    381 	];
    382 	const vector: [_](location, location) = [
    383 		(loc(1, 2), loc(1, 1)),
    384 		(loc(1, 3), loc(1, 2)),
    385 		(loc(1, 9), loc(1, 3)),
    386 		(loc(1, 3), loc(1, 2)),
    387 		(loc(1, 2), loc(1, 1)),
    388 		(loc(1, 3), loc(1, 2)),
    389 		(loc(1, 9), loc(1, 3)),
    390 		(loc(1, 13), loc(1, 12)),
    391 		(loc(1, 14), loc(1, 13)),
    392 		(loc(1, 13), loc(1, 12)),
    393 		(loc(1, 14), loc(1, 13)),
    394 		(loc(1, 17), loc(1, 16)),
    395 		(loc(1, 14), loc(1, 13)),
    396 		(loc(1, 17), loc(1, 16)),
    397 		(loc(1, 29), loc(1, 28)),
    398 		(loc(1, 32), loc(1, 31)),
    399 		(loc(1, 38), loc(1, 37)),
    400 	];
    401 
    402 	// We could statically allocate r and t, but what's the point
    403 	let r: [](rune, location) = [];
    404 	defer free(r);
    405 	let t: []token = [];
    406 	defer free(t);
    407 	for (let i = 0z; i < len(ops); i += 1) {
    408 		switch (ops[i]) {
    409 		case op::LEX =>
    410 			append(t, lex(&lexer)!);
    411 		case op::NEXT =>
    412 			append(r, next(&lexer) as (rune, location));
    413 		case op::UNGET =>
    414 			unget(&lexer, r[len(r) - 1]);
    415 			delete(r[len(r) - 1]);
    416 		case op::UNLEX =>
    417 			unlex(&lexer, t[len(t) - 1]);
    418 			delete(t[len(t) - 1]);
    419 		};
    420 		let loc = mkloc(&lexer);
    421 		let ploc = prevloc(&lexer);
    422 		// TODO: Aggregate equality
    423 		assert(loc.path == vector[i].0.path
    424 			&& loc.line == vector[i].0.line
    425 			&& loc.col == vector[i].0.col);
    426 		assert(ploc.path == vector[i].1.path
    427 			&& ploc.line == vector[i].1.line
    428 			&& ploc.col == vector[i].1.col);
    429 	};
    430 };
    431 
    432 @test fn access_tuple() void = {
    433 	const in = "((0, 1), 2).0.1";
    434 	const expected: []token = [
    435 		(ltok::LPAREN, void, loc(1, 1)),
    436 		(ltok::LPAREN, void, loc(1, 2)),
    437 		(ltok::LIT_ICONST, 0, loc(1, 3)),
    438 		(ltok::COMMA, void, loc(1, 4)),
    439 		(ltok::LIT_ICONST, 1, loc(1, 6)),
    440 		(ltok::RPAREN, void, loc(1, 7)),
    441 		(ltok::COMMA, void, loc(1, 8)),
    442 		(ltok::LIT_ICONST, 2, loc(1, 10)),
    443 		(ltok::RPAREN, void, loc(1, 11)),
    444 		(ltok::DOT, void, loc(1, 12)),
    445 		(ltok::LIT_ICONST, 0, loc(1, 13)),
    446 		(ltok::DOT, void, loc(1, 14)),
    447 		(ltok::LIT_ICONST, 1, loc(1, 15)),
    448 	];
    449 	lextest(in, expected);
    450 };