hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

+test.ha (12242B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use bufio;
      5 use fmt;
      6 use io;
      7 use memio;
      8 use strings;
      9 
     10 fn initbuf(in: []u8, flags: flag = flag::NONE) lexer = {
     11 	static let buf: [256]u8 = [0...];
     12 	static let s = memio::stream {
     13 		stream = null: io::stream,
     14 		...
     15 	};
     16 	static let sc = bufio::scanner {
     17 		stream = null: io::stream,
     18 		src = 0,
     19 		...
     20 	};
     21 
     22 	s = memio::fixed(in);
     23 	sc = bufio::newscanner_static(&s, buf);
     24 	return init(&sc, "<test>", flags);
     25 };
     26 
     27 fn initstr(in: str, flags: flag = flag::NONE) lexer = {
     28 	return initbuf(strings::toutf8(in), flags);
     29 };
     30 
     31 @test fn unlex() void = {
     32 	let sc = bufio::newscanner_static(io::empty, []);
     33 	let lexer = init(&sc, "<test>");
     34 	unlex(&lexer, (ltok::IF, void, location {
     35 		path = "<test>",
     36 		line = 1234,
     37 		col = 1234,
     38 	}));
     39 	let t = lex(&lexer) as token;
     40 	assert(t.0 == ltok::IF);
     41 	assert(t.1 is void);
     42 	assert(t.2.path == "<test>");
     43 	assert(t.2.line == 1234 && t.2.col == 1234);
     44 };
     45 
     46 fn vassert(expected: value, actual: value) void = {
     47 	match (expected) {
     48 	case let expected: str =>
     49 		assert(actual as str == expected);
     50 	case let expected: rune =>
     51 		assert(actual as rune == expected);
     52 	case let expected: u64 =>
     53 		assert(actual as u64 == expected);
     54 	case let expected: f64 =>
     55 		assert(actual as f64 == expected);
     56 	case void =>
     57 		assert(actual is void);
     58 	};
     59 };
     60 
     61 fn lextest(in: str, expected: []token) void = {
     62 	let lexer = initstr(in);
     63 	for (let i = 0z; i < len(expected); i += 1) {
     64 		let etok = expected[i];
     65 		let tl = match (lex(&lexer)) {
     66 		case let tl: token =>
     67 			yield tl;
     68 		case let err: error =>
     69 			fmt::errorfln("{}: {}", i, strerror(err))!;
     70 			abort();
     71 		};
     72 		if (tl.0 != etok.0) {
     73 			fmt::errorfln("Expected {}, got {}",
     74 				tokstr(etok), tokstr(tl))!;
     75 		};
     76 		assert(tl.0 == etok.0);
     77 		vassert(tl.1, etok.1);
     78 		if (tl.2.line != etok.2.line || tl.2.col != etok.2.col
     79 				|| tl.2.path != etok.2.path) {
     80 			fmt::errorfln("{}:{}:{} != {}:{}:{}",
     81 				tl.2.path, tl.2.line, tl.2.col,
     82 				etok.2.path, etok.2.line, etok.2.col)!;
     83 			abort();
     84 		};
     85 	};
     86 	let t = lex(&lexer) as token;
     87 	assert(t.0 == ltok::EOF);
     88 };
     89 
     90 fn loc(line: uint, col: uint) location = location {
     91 	path = "<test>",
     92 	line = line,
     93 	col = col,
     94 };
     95 
     96 @test fn lex1() void = {
     97 	const in = "~,{[(}]);";
     98 	const expected: [_]token = [
     99 		(ltok::BNOT, void, loc(1, 1)),
    100 		(ltok::COMMA, void, loc(1, 2)),
    101 		(ltok::LBRACE, void, loc(1, 3)),
    102 		(ltok::LBRACKET, void, loc(1, 4)),
    103 		(ltok::LPAREN, void, loc(1, 5)),
    104 		(ltok::RBRACE, void, loc(1, 6)),
    105 		(ltok::RBRACKET, void, loc(1, 7)),
    106 		(ltok::RPAREN, void, loc(1, 8)),
    107 		(ltok::SEMICOLON, void, loc(1, 9)),
    108 	];
    109 	lextest(in, expected);
    110 };
    111 
    112 @test fn lex2() void = {
    113 	// Ends with = to test =, EOF
    114 	const in = "* *= % %= + += - -= : :: = == / /= =";
    115 	const expected: [_]token = [
    116 		(ltok::TIMES, void, loc(1, 1)),
    117 		(ltok::TIMESEQ, void, loc(1, 3)),
    118 		(ltok::MODULO, void, loc(1, 6)),
    119 		(ltok::MODEQ, void, loc(1, 8)),
    120 		(ltok::PLUS, void, loc(1, 11)),
    121 		(ltok::PLUSEQ, void, loc(1, 13)),
    122 		(ltok::MINUS, void, loc(1, 16)),
    123 		(ltok::MINUSEQ, void, loc(1, 18)),
    124 		(ltok::COLON, void, loc(1, 21)),
    125 		(ltok::DOUBLE_COLON, void, loc(1, 23)),
    126 		(ltok::EQUAL, void, loc(1, 26)),
    127 		(ltok::LEQUAL, void, loc(1, 28)),
    128 		(ltok::DIV, void, loc(1, 31)),
    129 		(ltok::DIVEQ, void, loc(1, 33)),
    130 		(ltok::EQUAL, void, loc(1, 36)),
    131 	];
    132 	lextest(in, expected);
    133 };
    134 
    135 @test fn lex3() void = {
    136 	const in = ". .. ... < << <= <<= > >> >= >>= >>";
    137 	const expected: [_]token = [
    138 		(ltok::DOT, void, loc(1, 1)),
    139 		(ltok::DOUBLE_DOT, void, loc(1, 3)),
    140 		(ltok::ELLIPSIS, void, loc(1, 6)),
    141 		(ltok::LESS, void, loc(1, 10)),
    142 		(ltok::LSHIFT, void, loc(1, 12)),
    143 		(ltok::LESSEQ, void, loc(1, 15)),
    144 		(ltok::LSHIFTEQ, void, loc(1, 18)),
    145 		(ltok::GT, void, loc(1, 22)),
    146 		(ltok::RSHIFT, void, loc(1, 24)),
    147 		(ltok::GTEQ, void, loc(1, 27)),
    148 		(ltok::RSHIFTEQ, void, loc(1, 30)),
    149 		(ltok::RSHIFT, void, loc(1, 34)),
    150 	];
    151 	lextest(in, expected);
    152 
    153 	const in = "& && &= &&= | || |= ||= ^ ^^ ^= ^^= ^";
    154 	const expected: [_]token = [
    155 		(ltok::BAND, void, loc(1, 1)),
    156 		(ltok::LAND, void, loc(1, 3)),
    157 		(ltok::BANDEQ, void, loc(1, 6)),
    158 		(ltok::LANDEQ, void, loc(1, 9)),
    159 		(ltok::BOR, void, loc(1, 13)),
    160 		(ltok::LOR, void, loc(1, 15)),
    161 		(ltok::BOREQ, void, loc(1, 18)),
    162 		(ltok::LOREQ, void, loc(1, 21)),
    163 		(ltok::BXOR, void, loc(1, 25)),
    164 		(ltok::LXOR, void, loc(1, 27)),
    165 		(ltok::BXOREQ, void, loc(1, 30)),
    166 		(ltok::LXOREQ, void, loc(1, 33)),
    167 		(ltok::BXOR, void, loc(1, 37)),
    168 	];
    169 	lextest(in, expected);
    170 };
    171 
    172 @test fn lexname() void = {
    173 	const in = "hello world return void foobar :foobaz";
    174 	const expected: [_]token = [
    175 		(ltok::NAME, "hello", loc(1, 1)),
    176 		(ltok::NAME, "world", loc(1, 7)),
    177 		(ltok::RETURN, void, loc(1, 13)),
    178 		(ltok::VOID, void, loc(1, 20)),
    179 		(ltok::NAME, "foobar", loc(1, 25)),
    180 		(ltok::COLON, void, loc(1, 32)),
    181 		(ltok::NAME, "foobaz", loc(1, 33)),
    182 	];
    183 	lextest(in, expected);
    184 };
    185 
    186 @test fn keywords() void = {
    187 	let keywords = bmap[..ltok::LAST_KEYWORD+1];
    188 	for (let i = 0z; i < len(keywords); i += 1) {
    189 		let lexer = initstr(keywords[i]);
    190 		let tok = lex(&lexer) as token;
    191 		assert(tok.0 == i: ltok);
    192 	};
    193 };
    194 
    195 @test fn comments() void = {
    196 	const in = "hello world // foo\nbar";
    197 	const expected: [_]token = [
    198 		(ltok::NAME, "hello", loc(1, 1)),
    199 		(ltok::NAME, "world", loc(1, 7)),
    200 		(ltok::NAME, "bar", loc(2, 1)),
    201 	];
    202 	lextest(in, expected);
    203 
    204 	let lexer = initstr("// foo\n// bar\nhello world// baz\n\n// bad\ntest",
    205 		flag::COMMENTS);
    206 	assert(lex(&lexer) is token);
    207 	assert(comment(&lexer) == " foo\n bar\n");
    208 	assert(lex(&lexer) is token);
    209 	assert(comment(&lexer) == " baz\n");
    210 	assert(lex(&lexer) is token);
    211 	assert(comment(&lexer) == " bad\n");
    212 };
    213 
    214 @test fn runes() void = {
    215 	const in = "'a' 'b' '\\a' '\\b' '\\f' '\\n' '\\r' '\\t' '\\v' '\\0' "
    216 		"'\\\\' '\\\'' '\\x0A' '\\u1234' '\\U0010abcd'";
    217 	const expected: [_]token = [
    218 		(ltok::LIT_RCONST, 'a', loc(1, 1)),
    219 		(ltok::LIT_RCONST, 'b', loc(1, 5)),
    220 		(ltok::LIT_RCONST, '\a', loc(1, 9)),
    221 		(ltok::LIT_RCONST, '\b', loc(1, 14)),
    222 		(ltok::LIT_RCONST, '\f', loc(1, 19)),
    223 		(ltok::LIT_RCONST, '\n', loc(1, 24)),
    224 		(ltok::LIT_RCONST, '\r', loc(1, 29)),
    225 		(ltok::LIT_RCONST, '\t', loc(1, 34)),
    226 		(ltok::LIT_RCONST, '\v', loc(1, 39)),
    227 		(ltok::LIT_RCONST, '\0', loc(1, 44)),
    228 		(ltok::LIT_RCONST, '\\', loc(1, 49)),
    229 		(ltok::LIT_RCONST, '\'', loc(1, 54)),
    230 		(ltok::LIT_RCONST, '\x0A', loc(1, 59)),
    231 		(ltok::LIT_RCONST, '\u1234', loc(1, 66)),
    232 		(ltok::LIT_RCONST, '\U0010abcd', loc(1, 75)),
    233 	];
    234 	lextest(in, expected);
    235 };
    236 
    237 @test fn strings() void = {
    238 	const in = `"a" "b" "\a" "\b" "\f" "\n" "\r" "\t" "\v" "\0" "\\" "\'"`;
    239 	const expected: [_]token = [
    240 		(ltok::LIT_STR, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)),
    241 	];
    242 	lextest(in, expected);
    243 	const in = `"ab\a\b\f\n\r\t\v\0\\\'"`;
    244 	const expected: [_]token = [
    245 		(ltok::LIT_STR, "ab\a\b\f\n\r\t\v\0\\\'", loc(1, 1)),
    246 	];
    247 	lextest(in, expected);
    248 	const in = `"hello world", "こんにちは", "return", "foo"`;
    249 	const expected: [_]token = [
    250 		(ltok::LIT_STR, "hello world", loc(1, 1)),
    251 		(ltok::COMMA, void, loc(1, 14)),
    252 		(ltok::LIT_STR, "こんにちは", loc(1, 16)),
    253 		(ltok::COMMA, void, loc(1, 23)),
    254 		(ltok::LIT_STR, "return", loc(1, 25)),
    255 		(ltok::COMMA, void, loc(1, 33)),
    256 		(ltok::LIT_STR, "foo", loc(1, 35)),
    257 	];
    258 	lextest(in, expected);
    259 	const in = "\"foo\"\n"
    260 		"// bar\n"
    261 		"\"baz\"";
    262 	const expected: [_]token = [
    263 		(ltok::LIT_STR, "foobaz", loc(1, 1)),
    264 	];
    265 	lextest(in, expected);
    266 	const in = `"\x7f" "\x1b" "\uabcd" "\U0010abcd"`;
    267 	const expected: [_]token = [
    268 		(ltok::LIT_STR, "\x7f\x1b\uabcd\U0010abcd", loc(1, 1)),
    269 	];
    270 	lextest(in, expected);
    271 };
    272 
    273 @test fn literals() void = {
    274 	const in = "1e5 -1i32 9223372036854775809 1e2z 255u8 0o42u16\n"
    275 		"0b1000101u32 0xDEADBEEFu64 -0b10i8 -5e0i16 -0o16i32\n"
    276 		"0b00000010000001100000011100001111000000100000011000000111i64\n"
    277 		"13.37 13.37f32 13.37f64 6.022e23 1.616255e-35f64 1e-1 0x1p-2";
    278 	const expected: [_]token = [
    279 		(ltok::LIT_ICONST, 1e5u64, loc(1, 1)),
    280 		(ltok::MINUS, void, loc(1, 5)),
    281 		(ltok::LIT_I32, 1u64, loc(1, 6)),
    282 		(ltok::LIT_ICONST, 9223372036854775809u64, loc(1, 11)),
    283 		(ltok::LIT_SIZE, 1e2u64, loc(1, 31)),
    284 		(ltok::LIT_U8, 255u64, loc(1, 36)),
    285 		(ltok::LIT_U16, 0o42u64, loc(1, 42)),
    286 		(ltok::LIT_U32, 0b1000101u64, loc(2, 1)),
    287 		(ltok::LIT_U64, 0xDEADBEEFu64, loc(2, 14)),
    288 		(ltok::MINUS, void, loc(2, 28)),
    289 		(ltok::LIT_I8, 0b10u64, loc(2, 29)),
    290 		(ltok::MINUS, void, loc(2, 36)),
    291 		(ltok::LIT_I16, 5e0u64, loc(2, 37)),
    292 		(ltok::MINUS, void, loc(2, 44)),
    293 		(ltok::LIT_I32, 0o16u64, loc(2, 45)),
    294 		(ltok::LIT_I64, 0b00000010000001100000011100001111000000100000011000000111u64, loc(3, 1)),
    295 		(ltok::LIT_FCONST, 13.37, loc(4, 1)),
    296 		(ltok::LIT_F32, 13.37, loc(4, 7)),
    297 		(ltok::LIT_F64, 13.37, loc(4, 16)),
    298 		(ltok::LIT_FCONST, 6.022e23, loc(4, 25)),
    299 		(ltok::LIT_F64, 1.616255e-35, loc(4, 34)),
    300 		(ltok::LIT_FCONST, 1e-1, loc(4, 50)),
    301 		(ltok::LIT_FCONST, 0x1p-2, loc(4, 55)),
    302 	];
    303 	lextest(in, expected);
    304 };
    305 
    306 @test fn invalid() void = {
    307 	// Using \x80 within a string literal will cause this to output an
    308 	// empty string
    309 	let lexer = initbuf(['1', 0x80]);
    310 	const s = lex(&lexer) as error as syntax;
    311 	assert(s.1 == "Source file is not valid UTF-8");
    312 
    313 	// Regression: invalid UTF-8 at the beginning of a token used to cause
    314 	// a crash in nextw
    315 	let lexer = initbuf([0x80]);
    316 	const s = lex(&lexer) as error as syntax;
    317 	assert(s.1 == "Source file is not valid UTF-8");
    318 
    319 	// Regression: invalid escape sequences such as "\^" used to casue a
    320 	// crash
    321 	let lexer = initstr(`"\^"`);
    322 	const s = lex(&lexer) as error as syntax;
    323 	assert(s.1 == "unknown escape sequence");
    324 
    325 	// Regression: <X>e followed by another token used to cause a crash
    326 	let lexer = initstr("0e)");
    327 	const s = lex(&lexer) as error as syntax;
    328 	assert(s.1 == "expected exponent");
    329 };
    330 
    331 
    332 // Small virtual machine for testing mkloc/prevloc.
    333 // NEXT, UNGET, LEX, and UNLEX call the obvious functions (with UNGET and UNLEX
    334 // pulling from a buffer that NEXT/LEX feed into).
    335 // After each instruction, the results of mkloc/prevloc are checked against the
    336 // next element of the test vector.
    337 type op = enum {
    338 	LEX,
    339 	NEXT,
    340 	UNGET,
    341 	UNLEX,
    342 };
    343 
    344 @test fn loc() void = {
    345 	let lexer = initstr("h 	ello: my	name is\nInigo Montoya.");
    346 	const ops: [_]op = [
    347 		op::NEXT,
    348 		op::NEXT,
    349 		op::NEXT,
    350 		op::UNGET,
    351 		op::UNGET,
    352 		op::NEXT,
    353 		op::NEXT,
    354 		op::LEX,
    355 		op::LEX,
    356 		op::UNLEX,
    357 		op::LEX,
    358 		op::LEX,
    359 		op::UNLEX,
    360 		op::LEX,
    361 		op::LEX,
    362 		op::LEX,
    363 		op::LEX,
    364 	];
    365 	const vector: [_](location, location) = [
    366 		(loc(1, 2), loc(1, 1)),
    367 		(loc(1, 3), loc(1, 2)),
    368 		(loc(1, 9), loc(1, 3)),
    369 		(loc(1, 3), loc(1, 2)),
    370 		(loc(1, 2), loc(1, 1)),
    371 		(loc(1, 3), loc(1, 2)),
    372 		(loc(1, 9), loc(1, 3)),
    373 		(loc(1, 13), loc(1, 12)),
    374 		(loc(1, 14), loc(1, 13)),
    375 		(loc(1, 13), loc(1, 12)),
    376 		(loc(1, 14), loc(1, 13)),
    377 		(loc(1, 17), loc(1, 16)),
    378 		(loc(1, 14), loc(1, 13)),
    379 		(loc(1, 17), loc(1, 16)),
    380 		(loc(1, 29), loc(1, 28)),
    381 		(loc(1, 32), loc(1, 31)),
    382 		(loc(2, 6), loc(2, 5)),
    383 		(loc(2, 14), loc(2, 13)),
    384 	];
    385 
    386 	// We could statically allocate r and t, but what's the point
    387 	let r: [](rune, location) = [];
    388 	defer free(r);
    389 	let t: []token = [];
    390 	defer free(t);
    391 	for (let i = 0z; i < len(ops); i += 1) {
    392 		switch (ops[i]) {
    393 		case op::LEX =>
    394 			append(t, lex(&lexer)!);
    395 		case op::NEXT =>
    396 			append(r, next(&lexer) as (rune, location));
    397 		case op::UNGET =>
    398 			unget(&lexer, r[len(r) - 1].0);
    399 			delete(r[len(r) - 1]);
    400 		case op::UNLEX =>
    401 			unlex(&lexer, t[len(t) - 1]);
    402 			delete(t[len(t) - 1]);
    403 		};
    404 		let loc = mkloc(&lexer);
    405 		let ploc = prevloc(&lexer);
    406 		assert(loc.path == vector[i].0.path
    407 			&& loc.line == vector[i].0.line
    408 			&& loc.col == vector[i].0.col);
    409 		assert(ploc.path == vector[i].1.path
    410 			&& ploc.line == vector[i].1.line
    411 			&& ploc.col == vector[i].1.col);
    412 	};
    413 };
    414 
    415 @test fn access_tuple() void = {
    416 	const in = "((0, 1), 2).0.1";
    417 	const expected: []token = [
    418 		(ltok::LPAREN, void, loc(1, 1)),
    419 		(ltok::LPAREN, void, loc(1, 2)),
    420 		(ltok::LIT_ICONST, 0, loc(1, 3)),
    421 		(ltok::COMMA, void, loc(1, 4)),
    422 		(ltok::LIT_ICONST, 1, loc(1, 6)),
    423 		(ltok::RPAREN, void, loc(1, 7)),
    424 		(ltok::COMMA, void, loc(1, 8)),
    425 		(ltok::LIT_ICONST, 2, loc(1, 10)),
    426 		(ltok::RPAREN, void, loc(1, 11)),
    427 		(ltok::DOT, void, loc(1, 12)),
    428 		(ltok::LIT_ICONST, 0, loc(1, 13)),
    429 		(ltok::DOT, void, loc(1, 14)),
    430 		(ltok::LIT_ICONST, 1, loc(1, 15)),
    431 	];
    432 	lextest(in, expected);
    433 };