parse.ha (12642B)
1 use ascii; 2 use bufio; 3 use io; 4 use regex; 5 use strconv; 6 use strings; 7 8 type ParseError = !( 9 UnknownCommand 10 | InvalidSuffix 11 | UnexpectedSuffix 12 | TrailingCharacters 13 | ExpectedArgument 14 | ExpectedMark 15 | InvalidDelimiter 16 | ExpectedDelimiter 17 ); 18 19 type UnknownCommand = !rune; 20 21 type InvalidSuffix = !rune; 22 23 type UnexpectedSuffix = !rune; 24 25 type TrailingCharacters = !void; 26 27 type ExpectedArgument = !void; 28 29 type ExpectedMark = !void; 30 31 type InvalidDelimiter = !void; 32 33 type ExpectedDelimiter = !void; 34 35 def NUL = '\x00'; 36 37 // Parses inputted commands. Returns true when command is ready. 38 fn parse(input: *bufio::scanner) (Command | ParseError | InteractionError) = { 39 let inputline = match (bufio::scan_line(input)?) { 40 case let s: const str => 41 yield s; 42 case io::EOF => 43 return io::EOF; 44 }; 45 46 let t = strings::iter(inputline); 47 let cmd = Command{ ... }; 48 49 cmd.addrs = scan_addrs(&t); 50 cmd.name = scan_cmdname(&t)?; 51 let ready = parse_cmdargs(&cmd, &t)?; 52 53 switch :input (cmd.name) { 54 case => void; 55 case 'a', 'c', 'i' => 56 for (true) { 57 let inputline = match (bufio::scan_line(input)?) { 58 case let s: const str => 59 yield s; 60 case io::EOF => 61 break; 62 }; 63 64 if (inputline == ".") 65 break; 66 67 append(cmd.textinput, strings::dup(inputline)); 68 }; 69 case 'g', 'v' => 70 if (!strings::hassuffix(cmd.arg2, '\\')) { 71 append(cmd.textinput, strings::dup(cmd.arg2)); 72 yield :input; 73 }; 74 append(cmd.textinput, 75 strings::dup(strings::rtrim(cmd.arg2, '\\'))); 76 77 for (true) { 78 let inputline = match (bufio::scan_line(input)?) { 79 case let s: const str => 80 yield s; 81 case io::EOF => 82 break; 83 }; 84 // workaround scan_line undelimited last line. 85 // note, GNU ed would use "p\n" instead. 86 if (inputline == "") { 87 append(cmd.textinput, strings::dup("\n")); 88 break; 89 }; 90 if (!strings::hassuffix(inputline, '\\')) { 91 append(cmd.textinput, strings::dup(inputline)); 92 break; 93 }; 94 append(cmd.textinput, 95 strings::dup(strings::rtrim(inputline, '\\'))); 96 }; 97 case 's' => 98 if (ready) 99 yield; 100 101 for (true) { 102 let inputline = match (bufio::scan_line(input)?) { 103 case let s: const str => 104 yield s; 105 case io::EOF => 106 break; 107 }; 108 109 let t = strings::iter(inputline); 110 let (part, seen_delim) = scan_item(&t, cmd.delim); 111 112 // cmd.textinput holds the replacement text 113 append(cmd.textinput, strings::dup(part)); 114 115 if (!seen_delim && strings::hassuffix(inputline, '\\')) 116 continue; 117 118 strings::next(&t); // skip delim 119 let (count, global, printmode) = scan_substitute_flags(&t); 120 //debug("count={} global={}", count, global); 121 cmd.count = count; 122 cmd.flag_global = global; 123 cmd.printmode = printmode; 124 125 scan_end_assert(&t)?; 126 127 break; 128 }; 129 }; 130 131 for (let i = 0z; i < len(cmd.textinput); i += 1) 132 debug("parse() cmd.textinput[{}]=<{}>", i, cmd.textinput[i]); 133 134 return cmd; 135 }; 136 137 fn parse_cmdargs(cmd: *Command, t: *strings::iterator) (bool | ParseError) = { 138 switch (cmd.name) { 139 // [ ] 140 case NUL => 141 return true; 142 143 // (q|Q) 144 case 'q', 'Q' => 145 scan_end_assert(t)?; 146 return true; 147 148 // .[ <file>] 149 case 'e', 'E', 'f', 'r' => 150 if (scan_blanks(t) == 0) 151 match (strings::next(t)) { 152 case let r: rune => 153 return r: UnexpectedSuffix; 154 case void => 155 return true; 156 }; 157 cmd.arg1 = scan_rest(t); 158 return true; 159 160 // w(q|[ <file>]) 161 case 'w' => 162 if (scan_blanks(t) == 0) 163 match (strings::next(t)) { 164 case let r: rune => 165 if (r == 'q') { 166 cmd.suffix = r; 167 return true; 168 } else { 169 return r: InvalidSuffix; 170 }; 171 case void => 172 return true; 173 }; 174 cmd.arg1 = scan_rest(t); 175 return true; 176 177 // k<x> 178 case 'k' => 179 match (strings::next(t)) { 180 case let r: rune => 181 cmd.suffix = r; 182 case void => 183 return ExpectedMark; 184 }; 185 scan_end_assert(t)?; 186 return true; 187 188 // !<shellcmd> 189 case '!' => 190 cmd.arg1 = scan_rest(t); 191 return true; 192 193 // .[s] where 's' is '(l|n|p)' 194 case 'd', 'h', 'H', 'j', 'l', 'n', 'p', 'P', 'u', '=' => 195 cmd.printmode = scan_printmode(t); 196 scan_end_assert(t)?; 197 return true; 198 199 // .[s] 200 case 'a', 'c', 'i' => 201 cmd.printmode = scan_printmode(t); 202 scan_end_assert(t)?; 203 return false; 204 205 // .[s][ ]<addr> 206 case 'm', 't' => 207 cmd.printmode = scan_printmode(t); 208 cmd.arg1 = scan_rest(t); 209 return true; 210 211 // ./<regex>[/] where delimiter '/' is arbitrary 212 case 'G', 'V' => 213 cmd.delim = match (strings::next(t)) { 214 case void => 215 return ExpectedArgument; 216 case let r: rune => 217 yield if (r == ' ') { 218 return InvalidDelimiter; 219 } else { 220 yield r; 221 }; 222 }; 223 cmd.arg1 = scan_item(t, cmd.delim).0; 224 strings::next(t); // scan delimiter if exists 225 scan_end_assert(t)?; 226 return true; 227 228 // ./<regex>/<cmdlist...> 229 case 'g', 'v' => 230 cmd.delim = match (strings::next(t)) { 231 case void => 232 return ExpectedArgument; 233 case let r: rune => 234 yield if (r == ' ') { 235 return InvalidDelimiter; 236 } else { 237 yield r; 238 }; 239 }; 240 cmd.arg1 = scan_item(t, cmd.delim).0; 241 strings::next(t); // scan delimiter if exists 242 cmd.arg2 = scan_rest(t); 243 if (strings::prev(t) as rune == '\\') { 244 return false; 245 }; 246 return true; 247 248 // s/<regex>/[<replace>[/[<flags>]]] 249 case 's' => 250 cmd.delim = match (strings::next(t)) { 251 case void => 252 return ExpectedArgument; 253 case let r: rune => 254 yield if (r == ' ') { 255 return InvalidDelimiter; 256 } else { 257 yield r; 258 }; 259 }; 260 cmd.arg1 = scan_item(t, cmd.delim).0; 261 match (strings::next(t)) { 262 case rune => void; 263 case void => 264 return ExpectedDelimiter; 265 }; 266 append(cmd.textinput, scan_item(t, cmd.delim).0); 267 match (strings::next(t)) { 268 case rune => void; 269 case void => 270 if (strings::prev(t) == '\\') 271 return false 272 else 273 return true; 274 }; 275 let (count, global, printmode) = scan_substitute_flags(t); 276 cmd.count = count; 277 cmd.flag_global = global; 278 cmd.printmode = printmode; 279 scan_end_assert(t)?; 280 return true; 281 282 case '&' => 283 scan_end_assert(t)?; 284 return true; 285 286 case => 287 abort(); 288 }; 289 }; 290 291 fn scan_addrs(t: *strings::iterator) []Address = { 292 let addrs: []Address = []; 293 let specialfirst = false; 294 295 scan_blanks(t); 296 match (strings::next(t)) { 297 case void => 298 return addrs; 299 case let r: rune => 300 switch (r) { 301 case ',' => 302 specialfirst = true; 303 append(addrs, Address{ 304 addrform = 1z, 305 lineoffset = 0, 306 setcurrentline = false, 307 }); 308 case ';' => 309 specialfirst = true; 310 append(addrs, Address{ 311 addrform = CurrentLine, 312 lineoffset = 0, 313 setcurrentline = true, 314 }); 315 case => 316 strings::prev(t); 317 }; 318 }; 319 320 for (true) { 321 let addr = match (scan_addr(t)) { 322 case void => 323 yield if (specialfirst) { 324 yield Address{ 325 addrform = LastLine, 326 lineoffset = 0, 327 ... 328 }; 329 } else if (len(addrs) > 0) { 330 yield addrs[len(addrs)-1]; 331 } else { 332 break; 333 }; 334 case let a: Address => 335 yield a; 336 }; 337 338 specialfirst = false; 339 340 scan_blanks(t); 341 match (strings::next(t)) { 342 case void => 343 append(addrs, addr); 344 break; 345 case let r: rune => 346 switch (r) { 347 case ',' => 348 append(addrs, addr); 349 case ';' => 350 addr.setcurrentline = true; 351 append(addrs, addr); 352 case => 353 append(addrs, addr); 354 strings::prev(t); 355 break; 356 }; 357 }; 358 }; 359 360 // debug("scan_addrs(): len(addrs)={}", len(addrs)); 361 362 return addrs; 363 }; 364 365 fn scan_addr(t: *strings::iterator) (Address | void) = { 366 scan_blanks(t); 367 let r = match (strings::next(t)) { 368 case void => 369 return void; 370 case let r: rune => 371 yield r; 372 }; 373 374 // debug("scan_addr(): r={}", r); 375 376 const addrform: (AddressForm | void) = switch (r) { 377 case '.' => 378 yield CurrentLine; 379 case '$' => 380 yield LastLine; 381 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => 382 strings::prev(t); 383 yield scan_size(t); 384 case '\'' => 385 yield scan_mark(t); 386 case '/' => 387 const rad = RegexAddr{ 388 expr = scan_item(t, '/').0, 389 direction = true, 390 }; 391 strings::next(t); 392 yield rad; 393 case '?' => 394 const rad = RegexAddr{ 395 expr = scan_item(t, '?').0, 396 direction = false, 397 }; 398 strings::next(t); 399 yield rad; 400 case => 401 strings::prev(t); 402 yield void; 403 }; 404 405 const offs = scan_offsets(t); 406 407 if (addrform is void && len(offs) == 0) 408 return void; 409 410 const addrform: AddressForm = 411 if (addrform is void) 412 CurrentLine 413 else 414 addrform as AddressForm; 415 416 let addr = Address{ 417 addrform = addrform, 418 lineoffset = 0, 419 ... 420 }; 421 422 for (let i = 0z; i < len(offs); i += 1) { 423 addr.lineoffset += offs[i]; 424 }; 425 426 return addr; 427 }; 428 429 fn scan_offsets(t: *strings::iterator) []int = { 430 let offs: []int = []; 431 432 for (true) { 433 scan_blanks(t); 434 435 match (strings::next(t)) { 436 case void => 437 return offs; 438 case let r: rune => 439 if (r == '+') { 440 append(offs, scan_offset(t)); 441 } else if (r == '-') { 442 append(offs, -scan_offset(t)); 443 } else if (ascii::isdigit(r)) { 444 strings::prev(t); 445 append(offs, scan_size(t): int); 446 } else { 447 strings::prev(t); 448 break; 449 }; 450 }; 451 }; 452 453 return offs; 454 }; 455 456 fn scan_offset(t: *strings::iterator) int = { 457 match (strings::next(t)) { 458 case void => 459 return 1; 460 case let r: rune => 461 strings::prev(t); 462 if (ascii::isdigit(r)) { 463 return scan_size(t): int; 464 } else { 465 return 1; 466 }; 467 }; 468 }; 469 470 fn scan_cmdname(t: *strings::iterator) (rune | UnknownCommand) = { 471 scan_blanks(t); 472 let r = match (strings::next(t)) { 473 case void => 474 return NUL; 475 case let r: rune => 476 yield r; 477 }; 478 479 switch (r) { 480 case 481 'a', 'c', 'd', 'e', 'E', 'f', 'g', 'G', 'h', 'H', 482 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'P', 'q', 'Q', 483 'r', 's', 't', 'u', 'v', 'V', 'w', '=', '!', '&', 484 => 485 return r; 486 case => 487 return r: UnknownCommand; 488 }; 489 }; 490 491 fn scan_printmode(t: *strings::iterator) PrintMode = { 492 let r = match (strings::next(t)) { 493 case void => 494 return PrintMode::NONE; 495 case let r: rune => 496 yield r; 497 }; 498 499 switch (r) { 500 case 'p' => 501 return PrintMode::PLAIN; 502 case 'n' => 503 return PrintMode::NUMBER; 504 case 'l' => 505 return PrintMode::LIST; 506 case => 507 strings::prev(t); 508 return PrintMode::NONE; 509 }; 510 }; 511 512 fn scan_rest(t: *strings::iterator) str = { 513 // TODO: just use [[strings::iterstr]]? 514 let rs: []rune = []; 515 for (true) { 516 match (strings::next(t)) { 517 case void => 518 break; 519 case let r: rune => 520 append(rs, r); 521 }; 522 }; 523 return strings::trim(strings::fromrunes(rs)); 524 }; 525 526 fn scan_item(t: *strings::iterator, delim: rune) (str, bool) = { 527 let rs: []rune = []; 528 let seen_delim = false; 529 for (true) { 530 let r = match (strings::next(t)) { 531 case void => 532 break; 533 case let r: rune => 534 yield r; 535 }; 536 if (r == '\\') { 537 match (strings::next(t)) { 538 case void => 539 break; // TODO: Error here? how? 540 case let r: rune => 541 if (r == delim) { 542 seen_delim = true; 543 append(rs, r); 544 } else { 545 append(rs, ['\\', r]...); 546 }; 547 continue; 548 }; 549 } else if (r == delim) { 550 seen_delim = true; 551 strings::prev(t); 552 break; 553 }; 554 append(rs, r); 555 }; 556 return (strings::fromrunes(rs), seen_delim); 557 }; 558 559 fn scan_mark(t: *strings::iterator) rune = { 560 match (strings::next(t)) { 561 case void => 562 abort(); // TODO: aborts? 563 case let r: rune => 564 if (ascii::isalpha(r)) { // TODO: cover all mark chars 565 return r; 566 } else { 567 abort(); 568 }; 569 }; 570 }; 571 572 fn scan_substitute_flags(t: *strings::iterator) (size, bool, PrintMode) = { 573 let count = 0z; 574 let global = false; 575 let printmode = PrintMode::NONE; 576 577 for (true) { 578 let r = match (strings::next(t)) { 579 case void => 580 break; 581 case let r: rune => 582 yield r; 583 }; 584 585 switch (r) { 586 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => 587 strings::prev(t); 588 count = scan_size(t); 589 case 'g' => 590 global = true; 591 case 'p' => 592 printmode = PrintMode::PLAIN; 593 case 'n' => 594 printmode = PrintMode::NUMBER; 595 case 'l' => 596 printmode = PrintMode::LIST; 597 case => 598 break; 599 }; 600 }; 601 602 return (count, global, printmode); 603 }; 604 605 fn scan_size(t: *strings::iterator) size = { 606 let begin = *t; 607 // reimplement this function using another iterator 608 for (true) { 609 let r = match (strings::next(t)) { 610 case void => 611 break; 612 case let r: rune => 613 yield r; 614 }; 615 616 if (!ascii::isdigit(r)) { 617 strings::prev(t); 618 break; 619 }; 620 }; 621 622 let num = strings::slice(&begin, t); 623 624 // TODO: return void instead? 625 if (num == "") { 626 return 0z; 627 }; 628 629 match (strconv::stoz(num)) { 630 case (strconv::invalid | strconv::overflow) => 631 abort("Invalid"); // TODO: propagate? 632 case let z: size => 633 return z; 634 }; 635 }; 636 637 fn scan_blanks(t: *strings::iterator) size = { 638 let sz = 0z; // runes, not bytes 639 for (true) { 640 match (strings::next(t)) { 641 case void => 642 break; 643 case let r: rune => 644 if (!ascii::isblank(r)) { 645 strings::prev(t); 646 break; 647 }; 648 sz += 1; 649 }; 650 }; 651 return sz; 652 }; 653 654 fn scan_end_assert(t: *strings::iterator) (void | TrailingCharacters) = { 655 scan_blanks(t); 656 match (strings::next(t)) { 657 case rune => 658 return TrailingCharacters; 659 case void => 660 return void; 661 }; 662 };