lex.ha (19407B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use ascii; 5 use bufio; 6 use encoding::utf8; 7 use fmt; 8 use io; 9 use memio; 10 use os; 11 use path; 12 use sort; 13 use sort::cmp; 14 use strconv; 15 use strings; 16 use types; 17 18 export type lexer = struct { 19 in: *bufio::scanner, 20 path: str, 21 loc: (uint, uint), 22 prevrloc: (uint, uint), 23 un: token, // ltok::EOF when no token was unlexed 24 prevunlocs: [2]((uint, uint), (uint, uint)), 25 flags: flag, 26 comment: str, 27 require_int: bool, 28 }; 29 30 // Flags which apply to this lexer 31 export type flag = enum uint { 32 NONE = 0, 33 // Enables lexing comments 34 COMMENTS = 1 << 0, 35 }; 36 37 // A syntax error 38 export type syntax = !(location, str); 39 40 // All possible lexer errors 41 export type error = !(io::error | syntax); 42 43 // Returns a human-friendly string for a given error. The result may be 44 // statically allocated. 45 export fn strerror(err: error) const str = { 46 static let buf: [2048]u8 = [0...]; 47 match (err) { 48 case let err: io::error => 49 return io::strerror(err); 50 case let s: syntax => 51 return fmt::bsprintf(buf, "{}:{}:{}: syntax error: {}", 52 s.0.path, s.0.line, s.0.col, s.1); 53 }; 54 }; 55 56 // Initializes a new lexer for the given [[bufio::scanner]]. The path is 57 // borrowed. 58 export fn init( 59 in: *bufio::scanner, 60 path: str, 61 flags: flag = flag::NONE, 62 ) lexer = { 63 const loc = location { path = path, line = 1, col = 1 }; 64 return lexer { 65 in = in, 66 path = path, 67 loc = (1, 1), 68 prevrloc = (1, 1), 69 un = (ltok::EOF, void, loc), 70 prevunlocs = [((1, 1), (1, 1))...], 71 flags = flags, 72 ... 73 }; 74 }; 75 76 // Returns the current value of the comment buffer, or empty string if unset (or 77 // if [[flag::COMMENTS]] was not enabled for this lexer). 78 export fn comment(lex: *lexer) str = lex.comment; 79 80 // Returns the next token from the lexer. 81 export fn lex(lex: *lexer) (token | error) = { 82 if (lex.un.0 != ltok::EOF) { 83 defer lex.un.0 = ltok::EOF; 84 return lex.un; 85 }; 86 87 defer { 88 lex.prevunlocs[1] = lex.prevunlocs[0]; 89 const prev = prevloc(lex); 90 const loc = mkloc(lex); 91 lex.prevunlocs[0] = ( 92 (prev.line, prev.col), 93 (loc.line, loc.col), 94 ); 95 }; 96 97 let r = match (nextw(lex)?) { 98 case io::EOF => 99 return (ltok::EOF, void, mkloc(lex)); 100 case let r: (rune, location) => 101 yield r; 102 }; 103 104 if (ascii::isdigit(r.0)) { 105 unget(lex, r.0); 106 return lex_literal(lex); 107 }; 108 109 lex.require_int = false; 110 if (is_name(r.0, false)) { 111 unget(lex, r.0); 112 return lex_name(lex, r.1); 113 }; 114 115 let tok = switch (r.0) { 116 case '"', '\'', '`' => 117 unget(lex, r.0); 118 return lex_rn_str(lex); 119 case '.', '<', '>', '&', '|', '^' => 120 unget(lex, r.0); 121 return lex3(lex); 122 case '*', '%', '/', '+', '-', ':', '!', '=' => 123 unget(lex, r.0); 124 return lex2(lex); 125 case '~' => 126 yield ltok::BNOT; 127 case ',' => 128 yield ltok::COMMA; 129 case '{' => 130 yield ltok::LBRACE; 131 case '[' => 132 yield ltok::LBRACKET; 133 case '(' => 134 yield ltok::LPAREN; 135 case '}' => 136 yield ltok::RBRACE; 137 case ']' => 138 yield ltok::RBRACKET; 139 case ')' => 140 yield ltok::RPAREN; 141 case ';' => 142 yield ltok::SEMICOLON; 143 case '?' => 144 yield ltok::QUESTION; 145 case => 146 return syntaxerr(r.1, "invalid character"); 147 }; 148 149 line_comment(lex)?; 150 return (tok, void, r.1); 151 }; 152 153 fn is_name(r: rune, num: bool) bool = 154 ascii::isalpha(r) || r == '_' || r == '@' || (num && ascii::isdigit(r)); 155 156 fn lex_unicode(lex: *lexer, loc: location, n: size) (rune | error) = { 157 assert(n < 9); 158 let buf: [8]u8 = [0...]; 159 for (let i = 0z; i < n; i += 1z) { 160 let r = match (next(lex)?) { 161 case io::EOF => 162 return syntaxerr(loc, 163 "unexpected EOF scanning for escape"); 164 case let r: (rune, location) => 165 yield r.0; 166 }; 167 if (!ascii::isxdigit(r)) { 168 return syntaxerr(loc, 169 "unexpected rune scanning for escape"); 170 }; 171 buf[i] = r: u8; 172 }; 173 let s = strings::fromutf8_unsafe(buf[..n]); 174 return strconv::stou32(s, strconv::base::HEX) as u32: rune; 175 }; 176 177 fn lex_rune(lex: *lexer, loc: location) (rune | error) = { 178 let r = match (next(lex)?) { 179 case io::EOF => 180 return syntaxerr(loc, "unexpected EOF scanning for rune"); 181 case let r: (rune, location) => 182 yield r.0; 183 }; 184 if (r != '\\') { 185 return r; 186 }; 187 r = match (next(lex)?) { 188 case io::EOF => 189 return syntaxerr(loc, "unexpected EOF scanning for escape"); 190 case let r: (rune, location) => 191 yield r.0; 192 }; 193 switch (r) { 194 case '\\' => 195 return '\\'; 196 case '\'' => 197 return '\''; 198 case '0' => 199 return '\0'; 200 case 'a' => 201 return '\a'; 202 case 'b' => 203 return '\b'; 204 case 'f' => 205 return '\f'; 206 case 'n' => 207 return '\n'; 208 case 'r' => 209 return '\r'; 210 case 't' => 211 return '\t'; 212 case 'v' => 213 return '\v'; 214 case '"' => 215 return '\"'; 216 case 'x' => 217 return lex_unicode(lex, loc, 2); 218 case 'u' => 219 return lex_unicode(lex, loc, 4); 220 case 'U' => 221 return lex_unicode(lex, loc, 8); 222 case => 223 return syntaxerr(mkloc(lex), "unknown escape sequence"); 224 }; 225 }; 226 227 fn lex_string(lex: *lexer, loc: location, delim: rune) (token | error) = { 228 let ret: token = (ltok::LIT_STR, "", loc); 229 let buf = memio::dynamic(); 230 for (true) match (next(lex)?) { 231 case io::EOF => 232 return syntaxerr(loc, "unexpected EOF scanning string literal"); 233 case let r: (rune, location) => 234 if (r.0 == delim) break 235 else if (delim == '"' && r.0 == '\\') { 236 unget(lex, r.0); 237 let r = lex_rune(lex, loc)?; 238 memio::appendrune(&buf, r)?; 239 } else { 240 memio::appendrune(&buf, r.0)?; 241 }; 242 }; 243 for (true) match (nextw(lex)?) { 244 case io::EOF => 245 break; 246 case let r: (rune, location) => 247 switch (r.0) { 248 case '"', '`' => 249 const tok = lex_string(lex, loc, r.0)?; 250 const next = tok.1 as str; 251 memio::concat(&buf, next)!; 252 free(next); 253 break; 254 case '/' => 255 match (nextw(lex)?) { 256 case io::EOF => 257 unget(lex, r.0); 258 case let s: (rune, location) => 259 if (s.0 == '/') { 260 lex_comment(lex)?; 261 continue; 262 } else { 263 unget(lex, s.0); 264 unget(lex, r.0); 265 }; 266 }; 267 break; 268 case => 269 unget(lex, r.0); 270 break; 271 }; 272 }; 273 return (ltok::LIT_STR, memio::string(&buf)!, loc); 274 }; 275 276 fn lex_rn_str(lex: *lexer) (token | error) = { 277 const loc = mkloc(lex); 278 let r = match (next(lex)) { 279 case let r: (rune, location) => 280 yield r.0; 281 case (io::EOF | io::error) => 282 abort(); 283 }; 284 switch (r) { 285 case '\'' => void; 286 case '\"', '`' => 287 return lex_string(lex, loc, r); 288 case => 289 abort(); // Invariant 290 }; 291 292 // Rune literal 293 let ret: token = (ltok::LIT_RCONST, lex_rune(lex, loc)?, loc); 294 match (next(lex)?) { 295 case io::EOF => 296 return syntaxerr(loc, "unexpected EOF"); 297 case let n: (rune, location) => 298 if (n.0 != '\'') { 299 return syntaxerr(n.1, "expected \"\'\""); 300 }; 301 }; 302 line_comment(lex)?; 303 return ret; 304 }; 305 306 fn lex_name(lex: *lexer, loc: location) (token | error) = { 307 let buf = memio::dynamic(); 308 match (next(lex)) { 309 case let r: (rune, location) => 310 assert(is_name(r.0, false)); 311 memio::appendrune(&buf, r.0)!; 312 case (io::EOF | io::error) => 313 abort(); 314 }; 315 316 for (true) match (next(lex)?) { 317 case io::EOF => break; 318 case let r: (rune, location) => 319 if (!is_name(r.0, true)) { 320 unget(lex, r.0); 321 break; 322 }; 323 memio::appendrune(&buf, r.0)?; 324 }; 325 326 line_comment(lex)?; 327 328 let n = memio::string(&buf)!; 329 330 match (sort::search(bmap[..ltok::LAST_KEYWORD+1], 331 size(str), &n, &cmp::strs)) { 332 case void => 333 return (ltok::NAME, n, loc); 334 case let i: size => 335 free(n); 336 return (i: ltok, void, loc); 337 }; 338 }; 339 340 fn line_comment(lex: *lexer) (void | error) = { 341 if (lex.flags & flag::COMMENTS != flag::COMMENTS) { 342 return; 343 }; 344 345 let r: (rune, location) = ('\0', location { ... }); 346 for (true) match (try(lex, '\t', ' ', '/')?) { 347 case void => 348 return; 349 case let v: (rune, location) => 350 switch (v.0) { 351 case '\t', ' ' => void; 352 case '/' => 353 r = v; 354 break; 355 case => abort(); // unreachable 356 }; 357 }; 358 359 if (try(lex, '/')? is void) { 360 unget(lex, r.0); 361 return; 362 }; 363 364 free(lex.comment); 365 lex.comment = ""; 366 lex_comment(lex)?; 367 }; 368 369 fn lex_comment(lexr: *lexer) (void | error) = { 370 if (lexr.flags & flag::COMMENTS != flag::COMMENTS) { 371 for (true) match (next(lexr)?) { 372 case io::EOF => 373 break; 374 case let r: (rune, location) => 375 if (r.0 == '\n') { 376 break; 377 }; 378 }; 379 return; 380 }; 381 382 let buf = memio::dynamic(); 383 defer io::close(&buf)!; 384 for (true) match (next(lexr)?) { 385 case io::EOF => 386 break; 387 case let r: (rune, location) => 388 memio::appendrune(&buf, r.0)!; 389 if (r.0 == '\n') { 390 break; 391 }; 392 }; 393 let bytes = strings::toutf8(lexr.comment); 394 append(bytes, strings::toutf8(memio::string(&buf)!)...); 395 lexr.comment = strings::fromutf8(bytes)!; 396 }; 397 398 fn lex_literal(lex: *lexer) (token | error) = { 399 const loc = mkloc(lex); 400 let chars: []u8 = []; 401 let r = match (next(lex)?) { 402 case io::EOF => 403 return (ltok::EOF, void, loc); 404 case let r: (rune, location) => 405 yield r; 406 }; 407 408 let started = false; 409 let base = strconv::base::DEC; 410 if (r.0 == '0') { 411 append(chars, utf8::encoderune(r.0)...); 412 r = match (next(lex)?) { 413 case io::EOF => 414 return (ltok::LIT_ICONST, 0u64, loc); 415 case let r: (rune, location) => 416 yield r; 417 }; 418 switch (r.0) { 419 case 'b' => 420 base = strconv::base::BIN; 421 case 'o' => 422 base = strconv::base::OCT; 423 case 'x' => 424 base = strconv::base::HEX; 425 case => 426 if (ascii::isdigit(r.0)) { 427 return syntaxerr(loc, 428 "Leading zeros in number literals aren't permitted (for octal, use the 0o prefix instead)"); 429 }; 430 started = true; 431 unget(lex, r.0); 432 }; 433 } else unget(lex, r.0); 434 let basechrs = switch (base) { 435 case strconv::base::BIN => 436 yield "01"; 437 case strconv::base::OCT => 438 yield "01234567"; 439 case strconv::base::DEC => 440 yield "0123456789"; 441 case strconv::base::HEX => 442 yield "0123456789ABCDEFabcdef"; 443 case => abort(); // unreachable 444 }; 445 446 let suff: (size | void) = void; 447 let exp: (size | void) = void; 448 let end = 0z; 449 let float = false; 450 for (true) { 451 r = match (next(lex)?) { 452 case io::EOF => 453 break; 454 case let r: (rune, location) => 455 yield r; 456 }; 457 if (!strings::contains(basechrs, r.0)) switch (r.0) { 458 case '.' => 459 if (!started) { 460 return syntaxerr(loc, 461 "Expected integer literal"); 462 }; 463 if (float || exp is size || suff is size 464 || lex.require_int) { 465 unget(lex, r.0); 466 break; 467 } else { 468 r = match (next(lex)?) { 469 case io::EOF => 470 break; 471 case let r: (rune, location) => 472 yield r; 473 }; 474 if (!strings::contains(basechrs, r.0)) { 475 unget(lex, r.0); 476 unget(lex, '.'); 477 break; 478 }; 479 unget(lex, r.0); 480 float = true; 481 append(chars, utf8::encoderune('.')...); 482 }; 483 case 'e', 'E', 'p', 'P' => 484 if (!started) { 485 return syntaxerr(loc, 486 "Expected integer literal"); 487 }; 488 if ((r.0 == 'e' || r.0 == 'E') != 489 (base == strconv::base::DEC)) { 490 unget(lex, r.0); 491 break; 492 }; 493 if (exp is size || suff is size) { 494 unget(lex, r.0); 495 break; 496 } else { 497 if (end == 0) end = len(chars); 498 append(chars, utf8::encoderune(r.0)...); 499 exp = len(chars); 500 r = match (next(lex)?) { 501 case io::EOF => 502 break; 503 case let r: (rune, location) => 504 yield r; 505 }; 506 switch (r.0) { 507 case '+', '-' => 508 append(chars, utf8::encoderune(r.0)...); 509 case => 510 unget(lex, r.0); 511 }; 512 basechrs = "0123456789"; 513 }; 514 case 'i', 'u', 'f', 'z' => 515 if (!started) { 516 return syntaxerr(loc, 517 "Expected integer literal"); 518 }; 519 if (suff is size || r.0 != 'f' && float 520 || r.0 == 'f' 521 && base != strconv::base::DEC) { 522 unget(lex, r.0); 523 break; 524 } else { 525 suff = len(chars); 526 if (end == 0) end = len(chars); 527 append(chars, utf8::encoderune(r.0)...); 528 basechrs = "0123456789"; 529 }; 530 case => 531 unget(lex, r.0); 532 break; 533 } else append(chars, utf8::encoderune(r.0)...); 534 started = true; 535 }; 536 if (!started) { 537 return syntaxerr(loc, "expected integer literal"); 538 }; 539 if (end == 0) end = len(chars); 540 lex.require_int = false; 541 542 let exp = match (exp) { 543 case void => 544 yield "0"; 545 case let exp: size => 546 let end = match (suff) { 547 case void => 548 yield len(chars); 549 case let suff: size => 550 yield suff; 551 }; 552 yield strings::fromutf8(chars[exp..end])!; 553 }; 554 let exp = match (strconv::stoi(exp)) { 555 case let exp: int => 556 yield exp; 557 case strconv::invalid => 558 return syntaxerr(mkloc(lex), "expected exponent"); 559 case strconv::overflow => 560 return syntaxerr(loc, "overflow in exponent"); 561 }; 562 563 let floatend = match (suff) { 564 case let suff: size => 565 yield suff; 566 case void => 567 yield len(chars); 568 }; 569 let suff = match (suff) { 570 case let suff: size => 571 yield strings::fromutf8(chars[suff..])!; 572 case void => 573 yield ""; 574 }; 575 let (suff, signed) = if (suff == "u8") (ltok::LIT_U8, false) 576 else if (suff == "u16") (ltok::LIT_U16, false) 577 else if (suff == "u32") (ltok::LIT_U32, false) 578 else if (suff == "u64") (ltok::LIT_U64, false) 579 else if (suff == "u") (ltok::LIT_UINT, false) 580 else if (suff == "z") (ltok::LIT_SIZE, false) 581 else if (suff == "i8") (ltok::LIT_I8, true) 582 else if (suff == "i16") (ltok::LIT_I16, true) 583 else if (suff == "i32") (ltok::LIT_I32, true) 584 else if (suff == "i64") (ltok::LIT_I64, true) 585 else if (suff == "i") (ltok::LIT_INT, true) 586 else if (suff == "" && !float && exp >= 0) (ltok::LIT_ICONST, false) 587 else if (suff == "f32") (ltok::LIT_F32, false) 588 else if (suff == "f64") (ltok::LIT_F64, false) 589 else if (suff == "" && (float || exp < 0)) (ltok::LIT_FCONST, false) 590 else return syntaxerr(loc, "invalid literal suffix"); 591 592 let exp = if (exp < 0) switch (suff) { 593 case ltok::LIT_F32, ltok::LIT_F64, ltok::LIT_FCONST => 594 yield exp: size; 595 case => return syntaxerr(loc, 596 "invalid negative exponent of integer"); 597 } else exp: size; 598 599 let val = strings::fromutf8(chars[..end])!; 600 let val = switch (suff) { 601 case ltok::LIT_F32, ltok::LIT_F64, ltok::LIT_FCONST => 602 val = strings::fromutf8(chars[..floatend])!; 603 yield strconv::stof64(val, base); 604 case => 605 yield strconv::stou64(val, base); 606 }; 607 let val = match (val) { 608 case let val: u64 => 609 for (let i = 0z; i < exp; i += 1) { 610 let old = val; 611 val *= 10; 612 if (val / 10 != old) { 613 return syntaxerr(loc, "overflow in exponent"); 614 }; 615 }; 616 if (signed && val > types::I64_MIN: u64) { 617 return syntaxerr(loc, "overflow in exponent"); 618 }; 619 yield val; 620 case let val: f64 => 621 yield val; 622 case strconv::invalid => 623 abort(); // Shouldn't be lexed in 624 case strconv::overflow => 625 return syntaxerr(loc, "literal overflow"); 626 }; 627 628 line_comment(lex)?; 629 return (suff, val, loc); 630 }; 631 632 fn lex2(lexr: *lexer) (token | error) = { 633 let first = next(lexr)? as (rune, location); 634 let tok: (ltok, [](rune, ltok)) = switch (first.0) { 635 case '*' => 636 yield (ltok::TIMES, [('=', ltok::TIMESEQ)]); 637 case '%' => 638 yield (ltok::MODULO, [('=', ltok::MODEQ)]); 639 case '/' => 640 match (next(lexr)?) { 641 case let r: (rune, location) => 642 switch (r.0) { 643 case '=' => 644 line_comment(lexr)?; 645 return (ltok::DIVEQ, void, first.1); 646 case '/' => 647 lex_comment(lexr)?; 648 return lex(lexr); 649 case => 650 unget(lexr, r.0); 651 return (ltok::DIV, void, first.1); 652 }; 653 case io::EOF => 654 return (ltok::DIV, void, first.1); 655 }; 656 case '+' => 657 yield (ltok::PLUS, [('=', ltok::PLUSEQ)]); 658 case '-' => 659 yield (ltok::MINUS, [('=', ltok::MINUSEQ)]); 660 case ':' => 661 yield (ltok::COLON, [(':', ltok::DOUBLE_COLON)]); 662 case '!' => 663 yield (ltok::LNOT, [('=', ltok::NEQUAL)]); 664 case '=' => 665 yield (ltok::EQUAL, [('=', ltok::LEQUAL), ('>', ltok::ARROW)]); 666 case => 667 return syntaxerr(first.1, "unknown token sequence"); 668 }; 669 match (next(lexr)?) { 670 case let r: (rune, location) => 671 for (let i = 0z; i < len(tok.1); i += 1) { 672 if (tok.1[i].0 == r.0) { 673 line_comment(lexr)?; 674 return (tok.1[i].1, void, first.1); 675 }; 676 }; 677 unget(lexr, r.0); 678 line_comment(lexr)?; 679 case io::EOF => void; 680 }; 681 return (tok.0, void, first.1); 682 }; 683 684 fn lex3(lex: *lexer) (token | error) = { 685 let r = next(lex)? as (rune, location); 686 let toks = switch (r.0) { 687 case '.' => 688 let tok = if (try(lex, '.')? is void) { 689 lex.require_int = true; 690 yield ltok::DOT; 691 } else if (try(lex, '.')? is void) { 692 yield ltok::DOUBLE_DOT; 693 } else ltok::ELLIPSIS; 694 line_comment(lex)?; 695 return (tok, void, r.1); 696 case '<' => 697 yield [ltok::LESS, ltok::LESSEQ, ltok::LSHIFT, ltok::LSHIFTEQ]; 698 case '>' => 699 yield [ltok::GT, ltok::GTEQ, ltok::RSHIFT, 700 ltok::RSHIFTEQ]; 701 case '&' => 702 yield [ltok::BAND, ltok::BANDEQ, ltok::LAND, ltok::LANDEQ]; 703 case '|' => 704 yield [ltok::BOR, ltok::BOREQ, ltok::LOR, ltok::LOREQ]; 705 case '^' => 706 yield [ltok::BXOR, ltok::BXOREQ, ltok::LXOR, ltok::LXOREQ]; 707 case => 708 return syntaxerr(r.1, "unknown token sequence"); 709 }; 710 let idx = match (try(lex, r.0, '=')?) { 711 case void => 712 yield 0; // X 713 case let n: (rune, location) => 714 yield switch (n.0) { 715 case '=' => 716 yield 1; // X= 717 case => 718 yield match (try(lex, '=')?) { 719 case void => 720 yield 2; // XX 721 case (rune, location) => 722 yield 3; // XX= 723 }; 724 }; 725 }; 726 line_comment(lex)?; 727 return (toks[idx], void, r.1); 728 }; 729 730 // Unlex a single token. The next call to [[lex]] will return this token. Only one 731 // unlex is supported at a time; you must call [[lex]] before calling [[unlex]] 732 // again. 733 export fn unlex(lex: *lexer, tok: token) void = { 734 assert(lex.un.0 == ltok::EOF, "attempted to unlex more than one token"); 735 lex.un = tok; 736 }; 737 738 fn next(lex: *lexer) ((rune, location) | syntax | io::EOF | io::error) = { 739 match (bufio::scan_rune(lex.in)) { 740 case let e: (io::EOF | io::error) => 741 return e; 742 case let r: rune => 743 const loc = mkloc(lex); 744 lexloc(lex, r); 745 return (r, loc); 746 case utf8::invalid => 747 return syntaxerr(mkloc(lex), "Source file is not valid UTF-8"); 748 }; 749 }; 750 751 fn nextw(lex: *lexer) ((rune, location) | io::EOF | error) = { 752 for (true) match (next(lex)?) { 753 case io::EOF => 754 return io::EOF; 755 case let r: (rune, location) => 756 if (ascii::isspace(r.0)) { 757 if (r.0 == '\n') { 758 free(lex.comment); 759 lex.comment = ""; 760 }; 761 continue; 762 }; 763 if (!is_name(r.0, true) && r.0 != '/') { 764 free(lex.comment); 765 lex.comment = ""; 766 }; 767 return r; 768 }; 769 }; 770 771 fn try( 772 lex: *lexer, 773 want: rune... 774 ) ((rune, location) | syntax | void | io::error) = { 775 let r = match (next(lex)?) { 776 case io::EOF => 777 return; 778 case let r: (rune, location) => 779 yield r; 780 }; 781 assert(len(want) > 0); 782 for (let i = 0z; i < len(want); i += 1) { 783 if (r.0 == want[i]) { 784 return r; 785 }; 786 }; 787 unget(lex, r.0); 788 }; 789 790 fn unget(lex: *lexer, r: rune) void = { 791 bufio::unreadrune(lex.in, r); 792 793 // here, we set the current location to the previous location, then 794 // subtract one from the previous location's column. this is always 795 // correct, even for tabs and newlines, since a tab or newline will 796 // never be ungot after a previous unget call. besides tabs and 797 // newlines, the rune will always be a printable ASCII character 798 assert(ascii::isprint(r) || r == '\t' || r == '\n'); 799 assert(r != '\n' || lex.prevrloc.0 == lex.loc.0 - 1); 800 801 lex.loc = lex.prevrloc; 802 lex.prevrloc.1 -= 1; 803 }; 804 805 fn lexloc(lex: *lexer, r: rune) void = { 806 lex.prevrloc = lex.loc; 807 switch (r) { 808 case '\n' => 809 lex.loc.0 += 1; 810 lex.loc.1 = 1; 811 case '\t' => 812 lex.loc.1 += 8 - lex.loc.1 % 8 + 1; 813 case => 814 lex.loc.1 += 1; 815 }; 816 }; 817 818 export fn mkloc(lex: *lexer) location = { 819 const loc = if (lex.un.0 == ltok::EOF) lex.loc 820 else lex.prevunlocs[1].1; 821 return location { 822 path = lex.path, 823 line = loc.0, 824 col = loc.1, 825 }; 826 }; 827 828 export fn prevloc(lex: *lexer) location = { 829 const loc = if (lex.un.0 == ltok::EOF) lex.prevrloc 830 else lex.prevunlocs[1].0; 831 return location { 832 path = lex.path, 833 line = loc.0, 834 col = loc.1, 835 }; 836 }; 837 838 export fn syntaxerr(loc: location, why: str) error = { 839 static let buf = path::buffer{...}; 840 path::set(&buf, loc.path)!; 841 loc.path = path::string(&buf); 842 return (loc, why); 843 };