hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

lex.ha (18752B)


      1 // License: MPL-2.0
      2 // (c) 2021-2022 Alexey Yerin <yyp@disroot.org>
      3 // (c) 2021 Armin Weigl <tb46305@gmail.com>
      4 // (c) 2021 Bor Grošelj Simić <bor.groseljsimic@telemach.net>
      5 // (c) 2021 Drew DeVault <sir@cmpwn.com>
      6 // (c) 2021 Ember Sawady <ecs@d2evs.net>
      7 // (c) 2021 Sudipto Mallick <smlckz@disroot.org>
      8 use ascii;
      9 use bufio;
     10 use encoding::utf8;
     11 use fmt;
     12 use io;
     13 use sort;
     14 use strconv;
     15 use strings;
     16 use strio;
     17 
     18 export type lexer = struct {
     19 	in: io::handle,
     20 	path: str,
     21 	loc: (uint, uint),
     22 	un: (token | void),
     23 	rb: [2]((rune, location) | io::EOF | void),
     24 	// 1 more than the size of un and rb respectively
     25 	prevunlocs: [2](location, location),
     26 	prevrlocs: [3]location,
     27 	flags: flags,
     28 	comment: str,
     29 	require_int: bool,
     30 };
     31 
     32 // Flags which apply to this lexer
     33 export type flags = enum uint {
     34 	NONE = 0,
     35 	// Enables lexing comments
     36 	COMMENTS = 1 << 0,
     37 };
     38 
     39 // A syntax error
     40 export type syntax = !(location, str);
     41 
     42 // All possible lexer errors
     43 export type error = !(io::error | syntax);
     44 
     45 // Returns a human-friendly string for a given error
     46 export fn strerror(err: error) const str = {
     47 	static let buf: [2048]u8 = [0...];
     48 	match (err) {
     49 	case let err: io::error =>
     50 		return io::strerror(err);
     51 	case let s: syntax =>
     52 		return fmt::bsprintf(buf, "{}:{},{}: Syntax error: {}",
     53 			s.0.path, s.0.line, s.0.col, s.1);
     54 	};
     55 };
     56 
     57 // Initializes a new lexer for the given input. The path is borrowed.
     58 export fn init(in: io::handle, path: str, flags: flags...) lexer = {
     59 	let f = flags::NONE;
     60 	for (let i = 0z; i < len(flags); i += 1) {
     61 		f |= flags[i];
     62 	};
     63 	const loc = location { path = path, line = 1, col = 1 };
     64 	return lexer {
     65 		in = in,
     66 		path = path,
     67 		loc = (1, 1),
     68 		un = void,
     69 		rb = [void...],
     70 		prevunlocs = [(loc, loc)...],
     71 		prevrlocs = [loc...],
     72 		flags = f,
     73 		...
     74 	};
     75 };
     76 
     77 // Returns the current value of the comment buffer, or empty string if unset (or
     78 // if [[flags::COMMENTS]] was not enabled for this lexer).
     79 export fn comment(lex: *lexer) str = lex.comment;
     80 
     81 // Returns the next token from the lexer.
     82 export fn lex(lex: *lexer) (token | error) = {
     83 	match (lex.un) {
     84 	case let tok: token =>
     85 		lex.un = void;
     86 		return tok;
     87 	case void => void;
     88 	};
     89 
     90 	defer {
     91 		lex.prevunlocs[1] = lex.prevunlocs[0];
     92 		lex.prevunlocs[0] = (prevloc(lex), mkloc(lex));
     93 	};
     94 
     95 	let r = match (nextw(lex)?) {
     96 	case io::EOF =>
     97 		return (ltok::EOF, void, mkloc(lex));
     98 	case let r: (rune, location) =>
     99 		yield r;
    100 	};
    101 
    102 	if (ascii::isdigit(r.0)) {
    103 		unget(lex, r);
    104 		return lex_literal(lex);
    105 	};
    106 
    107 	lex.require_int = false;
    108 	if (is_name(r.0, false)) {
    109 		unget(lex, r);
    110 		return lex_name(lex, r.1);
    111 	};
    112 
    113 	let tok = switch (r.0) {
    114 	case '"', '\'', '`' =>
    115 		unget(lex, r);
    116 		return lex_rn_str(lex);
    117 	case '.', '<', '>', '&', '|', '^' =>
    118 		unget(lex, r);
    119 		return lex3(lex);
    120 	case '*', '%', '/', '+', '-', ':', '!', '=' =>
    121 		unget(lex, r);
    122 		return lex2(lex);
    123 	case '~' =>
    124 		yield ltok::BNOT;
    125 	case ',' =>
    126 		yield ltok::COMMA;
    127 	case '{' =>
    128 		yield ltok::LBRACE;
    129 	case '[' =>
    130 		yield ltok::LBRACKET;
    131 	case '(' =>
    132 		yield ltok::LPAREN;
    133 	case '}' =>
    134 		yield ltok::RBRACE;
    135 	case ']' =>
    136 		yield ltok::RBRACKET;
    137 	case ')' =>
    138 		yield ltok::RPAREN;
    139 	case ';' =>
    140 		yield ltok::SEMICOLON;
    141 	case '?' =>
    142 		yield ltok::QUESTION;
    143 	case =>
    144 		return syntaxerr(r.1, "invalid character");
    145 	};
    146 
    147 	line_comment(lex)?;
    148 	return (tok, void, r.1);
    149 };
    150 
    151 fn is_name(r: rune, num: bool) bool =
    152 	ascii::isalpha(r) || r == '_' || r == '@' || (num && ascii::isdigit(r));
    153 
    154 fn lex_unicode(lex: *lexer, loc: location, n: size) (rune | error) = {
    155 	assert(n < 9);
    156 	let buf: [8]u8 = [0...];
    157 	for (let i = 0z; i < n; i += 1z) {
    158 		let r = match (next(lex)?) {
    159 		case io::EOF =>
    160 			return syntaxerr(loc,
    161 				"unexpected EOF scanning for escape");
    162 		case let r: (rune, location) =>
    163 			yield r.0;
    164 		};
    165 		if (!ascii::isxdigit(r)) {
    166 			return syntaxerr(loc,
    167 				"unexpected rune scanning for escape");
    168 		};
    169 		buf[i] = r: u32: u8;
    170 	};
    171 	let s = strings::fromutf8_unsafe(buf[..n]);
    172 	return strconv::stou32b(s, strconv::base::HEX) as u32: rune;
    173 };
    174 
    175 fn lex_rune(lex: *lexer, loc: location) (rune | error) = {
    176 	let r = match (next(lex)?) {
    177 	case io::EOF =>
    178 		return syntaxerr(loc, "unexpected EOF scanning for rune");
    179 	case let r: (rune, location) =>
    180 		yield r.0;
    181 	};
    182 	if (r != '\\') {
    183 		return r;
    184 	};
    185 	r = match (next(lex)?) {
    186 	case io::EOF =>
    187 		return syntaxerr(loc, "unexpected EOF scanning for escape");
    188 	case let r: (rune, location) =>
    189 		yield r.0;
    190 	};
    191 	switch (r) {
    192 	case '\\' =>
    193 		return '\\';
    194 	case '\'' =>
    195 		return '\'';
    196 	case '0' =>
    197 		return '\0';
    198 	case 'a' =>
    199 		return '\a';
    200 	case 'b' =>
    201 		return '\b';
    202 	case 'f' =>
    203 		return '\f';
    204 	case 'n' =>
    205 		return '\n';
    206 	case 'r' =>
    207 		return '\r';
    208 	case 't' =>
    209 		return '\t';
    210 	case 'v' =>
    211 		return '\v';
    212 	case '"' =>
    213 		return '\"';
    214 	case 'x' =>
    215 		return lex_unicode(lex, loc, 2);
    216 	case 'u' =>
    217 		return lex_unicode(lex, loc, 4);
    218 	case 'U' =>
    219 		return lex_unicode(lex, loc, 8);
    220 	};
    221 };
    222 
    223 fn lex_string(lex: *lexer, loc: location, delim: rune) (token | error) = {
    224 	let ret: token = (ltok::LIT_STR, "", loc);
    225 	let buf = strio::dynamic();
    226 	for (true) match (next(lex)?) {
    227 	case io::EOF =>
    228 		return syntaxerr(loc, "unexpected EOF scanning string literal");
    229 	case let r: (rune, location) =>
    230 		if (r.0 == delim) break
    231 		else if (delim == '"') {
    232 			unget(lex, r);
    233 			let r = lex_rune(lex, loc)?;
    234 			strio::appendrune(&buf, r)?;
    235 		} else {
    236 			strio::appendrune(&buf, r.0)?;
    237 		};
    238 	};
    239 	for (true) match (nextw(lex)?) {
    240 	case io::EOF =>
    241 		break;
    242 	case let r: (rune, location) =>
    243 		switch (r.0) {
    244 		case '"', '`' =>
    245 			const tok = lex_string(lex, loc, r.0)?;
    246 			const next = tok.1 as str;
    247 			strio::concat(&buf, next)!;
    248 			free(next);
    249 			break;
    250 		case '/' =>
    251 			match (nextw(lex)?) {
    252 			case io::EOF =>
    253 				unget(lex, r);
    254 			case let s: (rune, location) =>
    255 				if (s.0 == '/') {
    256 					lex_comment(lex)?;
    257 					continue;
    258 				} else {
    259 					unget(lex, s);
    260 					unget(lex, r);
    261 				};
    262 			};
    263 			break;
    264 		case =>
    265 			unget(lex, r);
    266 			break;
    267 		};
    268 	};
    269 	return (ltok::LIT_STR, strio::string(&buf), loc);
    270 };
    271 
    272 fn lex_rn_str(lex: *lexer) (token | error) = {
    273 	const loc = mkloc(lex);
    274 	let r = match (next(lex)) {
    275 	case let r: (rune, location) =>
    276 		yield r.0;
    277 	case (io::EOF | io::error) =>
    278 		abort();
    279 	};
    280 	switch (r) {
    281 	case '\'' => void;
    282 	case '\"', '`' =>
    283 		return lex_string(lex, loc, r);
    284 	case =>
    285 		abort(); // Invariant
    286 	};
    287 
    288 	// Rune literal
    289 	let ret: token = (ltok::LIT_RUNE, lex_rune(lex, loc)?, loc);
    290 	match (next(lex)?) {
    291 	case io::EOF =>
    292 		return syntaxerr(loc, "unexpected EOF");
    293 	case let n: (rune, location) =>
    294 		if (n.0 != '\'') {
    295 			return syntaxerr(n.1, "expected \"\'\"");
    296 		};
    297 	};
    298 	line_comment(lex)?;
    299 	return ret;
    300 };
    301 
    302 fn lex_name(lex: *lexer, loc: location) (token | error) = {
    303 	let buf = strio::dynamic();
    304 	match (next(lex)) {
    305 	case let r: (rune, location) =>
    306 		assert(is_name(r.0, false));
    307 		strio::appendrune(&buf, r.0)!;
    308 	case (io::EOF | io::error) =>
    309 		abort();
    310 	};
    311 
    312 	for (true) match (next(lex)?) {
    313 	case io::EOF => break;
    314 	case let r: (rune, location) =>
    315 		if (!is_name(r.0, true)) {
    316 			unget(lex, r);
    317 			break;
    318 		};
    319 		strio::appendrune(&buf, r.0)?;
    320 	};
    321 
    322 	line_comment(lex)?;
    323 
    324 	let n = strio::string(&buf);
    325 
    326 	match (sort::searchstrings(bmap[..ltok::LAST_KEYWORD+1], n)) {
    327 	case void =>
    328 		return (ltok::NAME, n, loc);
    329 	case let i: size =>
    330 		free(n);
    331 		return (i: ltok, void, loc);
    332 	};
    333 };
    334 
    335 fn line_comment(lex: *lexer) (void | error) = {
    336 	if (lex.flags & flags::COMMENTS != flags::COMMENTS) {
    337 		return;
    338 	};
    339 
    340 	let r: (rune, location) = ('\0', location { ... });
    341 	for (true) match (try(lex, '\t', ' ', '/')?) {
    342 	case void =>
    343 		return;
    344 	case let v: (rune, location) =>
    345 		switch (v.0) {
    346 		case '\t', ' ' => void;
    347 		case '/' =>
    348 			r = v;
    349 			break;
    350 		};
    351 	};
    352 
    353 	if (try(lex, '/')? is void) {
    354 		unget(lex, r);
    355 		return;
    356 	};
    357 
    358 	free(lex.comment);
    359 	lex.comment = "";
    360 	lex_comment(lex)?;
    361 };
    362 
    363 fn lex_comment(lexr: *lexer) (void | error) = {
    364 	if (lexr.flags & flags::COMMENTS != flags::COMMENTS) {
    365 		for (true) match (next(lexr)?) {
    366 		case io::EOF =>
    367 			break;
    368 		case let r: (rune, location) =>
    369 			if (r.0 == '\n') {
    370 				break;
    371 			};
    372 		};
    373 		return;
    374 	};
    375 
    376 	let buf = strio::dynamic();
    377 	defer io::close(&buf)!;
    378 	for (true) match (next(lexr)?) {
    379 	case io::EOF =>
    380 		break;
    381 	case let r: (rune, location) =>
    382 		strio::appendrune(&buf, r.0)!;
    383 		if (r.0 == '\n') {
    384 			break;
    385 		};
    386 	};
    387 	let bytes = strings::toutf8(lexr.comment);
    388 	append(bytes, strings::toutf8(strio::string(&buf))...);
    389 	lexr.comment = strings::fromutf8(bytes)!;
    390 };
    391 
    392 fn lex_literal(lex: *lexer) (token | error) = {
    393 	const loc = mkloc(lex);
    394 	let chars: []u8 = [];
    395 	let r = match (next(lex)?) {
    396 	case io::EOF =>
    397 		return (ltok::EOF, void, loc);
    398 	case let r: (rune, location) =>
    399 		yield r;
    400 	};
    401 
    402 	let started = false;
    403 	let base = 10u;
    404 	if (r.0 == '0') {
    405 		append(chars, utf8::encoderune(r.0)...);
    406 		r = match (next(lex)?) {
    407 		case io::EOF =>
    408 			return (ltok::LIT_ICONST, 0u64, loc);
    409 		case let r: (rune, location) =>
    410 			yield r;
    411 		};
    412 		switch (r.0) {
    413 		case 'b' =>
    414 			base = 2;
    415 		case 'o' =>
    416 			base = 8;
    417 		case 'x' =>
    418 			base = 16;
    419 		case =>
    420 			if (ascii::isdigit(r.0)) {
    421 				return syntaxerr(loc,
    422 					"Leading zeros in number literals aren't permitted (for octal, use the 0o prefix instead)");
    423 			};
    424 			started = true;
    425 			unget(lex, r);
    426 		};
    427 	} else unget(lex, r);
    428 	let basechrs = switch (base) {
    429 	case 2 =>
    430 		yield "01";
    431 	case 8 =>
    432 		yield "01234567";
    433 	case 10 =>
    434 		yield "0123456789";
    435 	case 16 =>
    436 		yield "0123456789ABCDEFabcdef";
    437 	};
    438 
    439 	let suff: (size | void) = void;
    440 	let exp: (size | void) = void;
    441 	let end = 0z;
    442 	let float = false;
    443 	for (true) {
    444 		r = match (next(lex)?) {
    445 		case io::EOF =>
    446 			break;
    447 		case let r: (rune, location) =>
    448 			yield r;
    449 		};
    450 		if (!strings::contains(basechrs, r.0)) switch (r.0) {
    451 		case '.' =>
    452 			if (!started) {
    453 				return syntaxerr(loc,
    454 					"Expected integer literal");
    455 			};
    456 			if (float || exp is size || suff is size
    457 					|| base != 10 || lex.require_int) {
    458 				unget(lex, r);
    459 				break;
    460 			} else {
    461 				r = match (next(lex)?) {
    462 				case io::EOF =>
    463 					break;
    464 				case let r: (rune, location) =>
    465 					yield r;
    466 				};
    467 				if (!strings::contains(basechrs, r.0)) {
    468 					unget(lex, r);
    469 					unget(lex, ('.', location {
    470 						path = r.1.path,
    471 						line = r.1.line,
    472 						col = r.1.col - 1,
    473 					}));
    474 					break;
    475 				};
    476 				unget(lex, r);
    477 				float = true;
    478 				append(chars, utf8::encoderune('.')...);
    479 			};
    480 		case 'e', 'E' =>
    481 			if (!started) {
    482 				return syntaxerr(loc,
    483 					"Expected integer literal");
    484 			};
    485 			if (exp is size || suff is size || base != 10) {
    486 				unget(lex, r);
    487 				break;
    488 			} else {
    489 				if (end == 0) end = len(chars);
    490 				append(chars, utf8::encoderune(r.0)...);
    491 				exp = len(chars);
    492 				r = match (next(lex)?) {
    493 				case io::EOF =>
    494 					break;
    495 				case let r: (rune, location) =>
    496 					yield r;
    497 				};
    498 				switch (r.0) {
    499 				case '+', '-' =>
    500 					append(chars, utf8::encoderune(r.0)...);
    501 				case =>
    502 					unget(lex, r);
    503 				};
    504 				basechrs = "0123456789";
    505 			};
    506 		case 'i', 'u', 'f', 'z' =>
    507 			if (!started) {
    508 				return syntaxerr(loc,
    509 					"Expected integer literal");
    510 			};
    511 			if (suff is size || r.0 != 'f' && float
    512 					|| r.0 == 'f' && base != 10) {
    513 				unget(lex, r);
    514 				break;
    515 			} else {
    516 				suff = len(chars);
    517 				if (end == 0) end = len(chars);
    518 				append(chars, utf8::encoderune(r.0)...);
    519 				basechrs = "0123456789";
    520 			};
    521 		case =>
    522 			unget(lex, r);
    523 			break;
    524 		} else append(chars, utf8::encoderune(r.0)...);
    525 		started = true;
    526 	};
    527 	if (!started) {
    528 		return syntaxerr(loc, "expected integer literal");
    529 	};
    530 	if (end == 0) end = len(chars);
    531 	lex.require_int = false;
    532 
    533 	let exp = match (exp) {
    534 	case void =>
    535 		yield "0";
    536 	case let exp: size =>
    537 		let end = match (suff) {
    538 		case void =>
    539 			yield len(chars);
    540 		case let suff: size =>
    541 			yield suff;
    542 		};
    543 		yield strings::fromutf8(chars[exp..end])!;
    544 	};
    545 	let exp = match (strconv::stoi(exp)) {
    546 	case let exp: int =>
    547 		yield exp;
    548 	case strconv::invalid =>
    549 		abort(); // Shouldn't be lexed in
    550 	case strconv::overflow =>
    551 		return syntaxerr(loc, "overflow in exponent");
    552 	};
    553 
    554 	let floatend = match (suff) {
    555 	case let suff: size =>
    556 		yield suff;
    557 	case void =>
    558 		yield len(chars);
    559 	};
    560 	let suff = match (suff) {
    561 	case let suff: size =>
    562 		yield strings::fromutf8(chars[suff..])!;
    563 	case void =>
    564 		yield "";
    565 	};
    566 	let suff = if (suff == "u8") ltok::LIT_U8
    567 		else if (suff == "u16") ltok::LIT_U16
    568 		else if (suff == "u32") ltok::LIT_U32
    569 		else if (suff == "u64") ltok::LIT_U64
    570 		else if (suff == "u") ltok::LIT_UINT
    571 		else if (suff == "z") ltok::LIT_SIZE
    572 		else if (suff == "i8") ltok::LIT_I8
    573 		else if (suff == "i16") ltok::LIT_I16
    574 		else if (suff == "i32") ltok::LIT_I32
    575 		else if (suff == "i64") ltok::LIT_I64
    576 		else if (suff == "i") ltok::LIT_INT
    577 		else if (suff == "" && !float && exp >= 0) ltok::LIT_ICONST
    578 		else if (suff == "f32") ltok::LIT_F32
    579 		else if (suff == "f64") ltok::LIT_F64
    580 		else if (suff == "" && (float || exp < 0)) ltok::LIT_FCONST
    581 		else return syntaxerr(loc, "invalid literal suffix");
    582 
    583 	let exp = if (exp < 0) switch (suff) {
    584 		case ltok::LIT_F32, ltok::LIT_F64, ltok::LIT_FCONST =>
    585 			yield exp: size;
    586 		case => return syntaxerr(loc,
    587 				"invalid negative exponent of integer");
    588 	} else exp: size;
    589 
    590 	let val = strings::fromutf8(chars[..end])!;
    591 	let val = switch (suff) {
    592 	case ltok::LIT_F32, ltok::LIT_F64, ltok::LIT_FCONST =>
    593 		val = strings::fromutf8(chars[..floatend])!;
    594 		yield strconv::stof64(val);
    595 	case =>
    596 		yield strconv::stou64b(val, base);
    597 	};
    598 	let val = match (val) {
    599 	case let val: u64 =>
    600 		for (let i = 0z; i < exp; i += 1) {
    601 			val *= 10;
    602 		};
    603 		yield val;
    604 	case let val: f64 =>
    605 		yield val;
    606 	case strconv::invalid =>
    607 		abort(); // Shouldn't be lexed in
    608 	case strconv::overflow =>
    609 		return syntaxerr(loc, "overflow in exponent");
    610 	};
    611 
    612 	line_comment(lex)?;
    613 	return (suff, val, loc);
    614 };
    615 
    616 fn lex2(lexr: *lexer) (token | error) = {
    617 	let first = next(lexr)? as (rune, location);
    618 	let tok: (ltok, [](rune, ltok)) = switch (first.0) {
    619 	case '*' =>
    620 		yield (ltok::TIMES, [('=', ltok::TIMESEQ)]);
    621 	case '%' =>
    622 		yield (ltok::MODULO, [('=', ltok::MODEQ)]);
    623 	case '/' =>
    624 		match (next(lexr)?) {
    625 		case let r: (rune, location) =>
    626 			switch (r.0) {
    627 			case '=' =>
    628 				line_comment(lexr)?;
    629 				return (ltok::DIVEQ, void, first.1);
    630 			case '/' =>
    631 				lex_comment(lexr)?;
    632 				return lex(lexr);
    633 			case =>
    634 				unget(lexr, r);
    635 				return (ltok::DIV, void, first.1);
    636 			};
    637 		case io::EOF =>
    638 			return (ltok::DIV, void, first.1);
    639 		};
    640 	case '+' =>
    641 		yield (ltok::PLUS, [('=', ltok::PLUSEQ)]);
    642 	case '-' =>
    643 		yield (ltok::MINUS, [('=', ltok::MINUSEQ)]);
    644 	case ':' =>
    645 		yield (ltok::COLON, [(':', ltok::DOUBLE_COLON)]);
    646 	case '!' =>
    647 		yield (ltok::LNOT, [('=', ltok::NEQUAL)]);
    648 	case '=' =>
    649 		yield (ltok::EQUAL, [('=', ltok::LEQUAL), ('>', ltok::ARROW)]);
    650 	case =>
    651 		return syntaxerr(first.1, "unknown token sequence");
    652 	};
    653 	match (next(lexr)?) {
    654 	case let r: (rune, location) =>
    655 		for (let i = 0z; i < len(tok.1); i += 1) {
    656 			if (tok.1[i].0 == r.0) {
    657 				line_comment(lexr)?;
    658 				return (tok.1[i].1, void, first.1);
    659 			};
    660 		};
    661 		unget(lexr, r);
    662 		line_comment(lexr)?;
    663 	case io::EOF => void;
    664 	};
    665 	return (tok.0, void, first.1);
    666 };
    667 
    668 fn lex3(lex: *lexer) (token | error) = {
    669 	let r = next(lex)? as (rune, location);
    670 	let toks = switch (r.0) {
    671 	case '.' =>
    672 		let tok = if (try(lex, '.')? is void) {
    673 			lex.require_int = true;
    674 			yield ltok::DOT;
    675 		} else if (try(lex, '.')? is void) {
    676 			yield ltok::SLICE;
    677 		} else ltok::ELLIPSIS;
    678 		line_comment(lex)?;
    679 		return (tok, void, r.1);
    680 	case '<' =>
    681 		yield [ltok::LESS, ltok::LESSEQ, ltok::LSHIFT, ltok::LSHIFTEQ];
    682 	case '>' =>
    683 		yield [ltok::GT, ltok::GTEQ, ltok::RSHIFT,
    684 			ltok::RSHIFTEQ];
    685 	case '&' =>
    686 		yield [ltok::BAND, ltok::BANDEQ, ltok::LAND, ltok::LANDEQ];
    687 	case '|' =>
    688 		yield [ltok::BOR, ltok::BOREQ, ltok::LOR, ltok::LOREQ];
    689 	case '^' =>
    690 		yield [ltok::BXOR, ltok::BXOREQ, ltok::LXOR, ltok::LXOREQ];
    691 	case =>
    692 		return syntaxerr(r.1, "unknown token sequence");
    693 	};
    694 	let idx = match (try(lex, r.0, '=')?) {
    695 	case void =>
    696 		yield 0; // X
    697 	case let n: (rune, location) =>
    698 		yield switch (n.0) {
    699 		case '=' =>
    700 			yield 1; // X=
    701 		case =>
    702 			yield match (try(lex, '=')?) {
    703 			case void =>
    704 				yield 2; // XX
    705 			case (rune, location) =>
    706 				yield 3; // XX=
    707 			};
    708 		};
    709 	};
    710 	line_comment(lex)?;
    711 	return (toks[idx], void, r.1);
    712 };
    713 
    714 // Unlex a single token. The next call to [[lex]] will return this token. Only one
    715 // unlex is supported at a time; you must call [[lex]] before calling [[unlex]]
    716 // again.
    717 export fn unlex(lex: *lexer, tok: token) void = {
    718 	assert(lex.un is void, "attempted to unlex more than one token");
    719 	lex.un = tok;
    720 };
    721 
    722 fn next(lex: *lexer) ((rune, location) | syntax | io::EOF | io::error) = {
    723 	match (lex.rb[0]) {
    724 	case void => void;
    725 	case let r: ((rune, location) | io::EOF) =>
    726 		lex.rb[0] = lex.rb[1];
    727 		lex.rb[1] = void;
    728 		return r;
    729 	};
    730 
    731 	match (bufio::scanrune(lex.in)) {
    732 	case let e: (io::EOF | io::error) =>
    733 		return e;
    734 	case let r: rune =>
    735 		const loc = mkloc(lex);
    736 		let tmp = lex.prevrlocs;
    737 		lex.prevrlocs[1..] = tmp[..len(tmp) - 1];
    738 		lex.prevrlocs[0] = loc;
    739 		lexloc(lex, r);
    740 		return (r, loc);
    741 	case utf8::invalid =>
    742 		return syntaxerr(mkloc(lex), "Source file is not valid UTF-8");
    743 	};
    744 };
    745 
    746 fn nextw(lex: *lexer) ((rune, location) | io::EOF | io::error) = {
    747 	for (true) match (next(lex)) {
    748 	case let e: (io::error | io::EOF) =>
    749 		return e;
    750 	case let r: (rune, location) =>
    751 		if (ascii::isspace(r.0)) {
    752 			if (r.0 == '\n') {
    753 				free(lex.comment);
    754 				lex.comment = "";
    755 			};
    756 			continue;
    757 		};
    758 		if (!is_name(r.0, true) && r.0 != '/') {
    759 			free(lex.comment);
    760 			lex.comment = "";
    761 		};
    762 		return r;
    763 	};
    764 	abort();
    765 };
    766 
    767 fn try(
    768 	lex: *lexer,
    769 	want: rune...
    770 ) ((rune, location) | syntax | void | io::error) = {
    771 	let r = match (next(lex)?) {
    772 	case io::EOF =>
    773 		return;
    774 	case let r: (rune, location) =>
    775 		yield r;
    776 	};
    777 	assert(len(want) > 0);
    778 	for (let i = 0z; i < len(want); i += 1) {
    779 		if (r.0 == want[i]) {
    780 			return r;
    781 		};
    782 	};
    783 	unget(lex, r);
    784 };
    785 
    786 fn lexloc(lex: *lexer, r: rune) void = {
    787 	switch (r) {
    788 	case '\n' =>
    789 		lex.loc.0 += 1;
    790 		lex.loc.1 = 1;
    791 	case '\t' =>
    792 		lex.loc.1 += 8 - lex.loc.1 % 8 + 1;
    793 	case =>
    794 		lex.loc.1 += 1;
    795 	};
    796 };
    797 
    798 fn unget(lex: *lexer, r: ((rune, location) | io::EOF)) void = {
    799 	if (!(lex.rb[0] is void)) {
    800 		assert(lex.rb[1] is void, "ungot too many runes");
    801 		lex.rb[1] = lex.rb[0];
    802 	};
    803 	lex.rb[0] = r;
    804 };
    805 
    806 export fn mkloc(lex: *lexer) location = {
    807 	match (lex.un) {
    808 	case let t: token =>
    809 		return lex.prevunlocs[1].1;
    810 	case void =>
    811 		match (lex.rb[0]) {
    812 		case let r: (rune, location) =>
    813 			return r.1;
    814 		case void =>
    815 			return location {
    816 				path = lex.path,
    817 				line = lex.loc.0,
    818 				col = lex.loc.1,
    819 			};
    820 		};
    821 	};
    822 };
    823 
    824 export fn prevloc(lex: *lexer) location = {
    825 	match (lex.un) {
    826 	case let t: token =>
    827 		return lex.prevunlocs[1].0;
    828 	case void =>
    829 		let i = 0z;
    830 		for (i < len(lex.rb); i += 1) if (lex.rb[i] is void) break;
    831 		return lex.prevrlocs[i];
    832 	};
    833 };
    834 
    835 fn syntaxerr(loc: location, why: str) error = (loc, why);