parse.ha (12296B)
1 use ascii; 2 use bufio; 3 use io; 4 use regex; 5 use strconv; 6 use strings; 7 8 type ParseError = !( 9 UnknownCommand 10 | InvalidSuffix 11 | UnexpectedSuffix 12 | TrailingCharacters 13 | ExpectedArgument 14 | ExpectedMark 15 | InvalidDelimiter 16 | ExpectedDelimiter 17 ); 18 19 type UnknownCommand = !rune; 20 21 type InvalidSuffix = !rune; 22 23 type UnexpectedSuffix = !rune; 24 25 type TrailingCharacters = !void; 26 27 type ExpectedArgument = !void; 28 29 type ExpectedMark = !void; 30 31 type InvalidDelimiter = !void; 32 33 type ExpectedDelimiter = !void; 34 35 def NUL = '\x00'; 36 37 // Parses inputted commands. Returns true when command is ready. 38 fn parse(input: *bufio::scanner) (Command | ParseError | InteractionError) = { 39 let inputline = match (bufio::scan_line(input)?) { 40 case let s: const str => 41 yield s; 42 case io::EOF => 43 return io::EOF; 44 }; 45 46 let t = strings::iter(inputline); 47 let cmd = Command{ ... }; 48 49 cmd.addrs = scan_addrs(&t); 50 cmd.name = scan_cmdname(&t)?; 51 let ready = parse_cmdargs(&cmd, &t)?; 52 53 switch :input (cmd.name) { 54 case => void; 55 case 'a', 'c', 'i' => 56 for (true) { 57 let inputline = match (bufio::scan_line(input)?) { 58 case let s: const str => 59 yield s; 60 case io::EOF => 61 break; 62 }; 63 64 if (inputline == ".") 65 break; 66 67 append(cmd.textinput, strings::dup(inputline)); 68 }; 69 case 'g', 'v' => 70 if (!strings::hassuffix(cmd.arg2, '\\')) { 71 append(cmd.textinput, strings::dup(cmd.arg2)); 72 yield :input; 73 }; 74 append(cmd.textinput, 75 strings::dup(strings::rtrim(cmd.arg2, '\\'))); 76 77 for (true) { 78 let inputline = match (bufio::scan_line(input)?) { 79 case let s: const str => 80 yield s; 81 case io::EOF => 82 break; 83 }; 84 // workaround scan_line undelimited last line. 85 // note, GNU ed would use "p\n" instead. 86 if (inputline == "") { 87 append(cmd.textinput, strings::dup("\n")); 88 break; 89 }; 90 if (!strings::hassuffix(inputline, '\\')) { 91 append(cmd.textinput, strings::dup(inputline)); 92 break; 93 }; 94 append(cmd.textinput, 95 strings::dup(strings::rtrim(inputline, '\\'))); 96 }; 97 case 's' => 98 if (ready) 99 yield; 100 101 for (true) { 102 let inputline = match (bufio::scan_line(input)?) { 103 case let s: const str => 104 yield s; 105 case io::EOF => 106 break; 107 }; 108 109 let t = strings::iter(inputline); 110 let (part, seen_delim) = scan_item(&t, cmd.delim); 111 112 // cmd.textinput holds the replacement text 113 append(cmd.textinput, strings::dup(part)); 114 115 if (!seen_delim && strings::hassuffix(inputline, '\\')) 116 continue; 117 118 strings::next(&t); // skip delim 119 let (count, global, printmode) = scan_substitute_flags(&t); 120 //debug("count={} global={}", count, global); 121 cmd.count = count; 122 cmd.flag_global = global; 123 cmd.printmode = printmode; 124 125 scan_end_assert(&t)?; 126 127 break; 128 }; 129 }; 130 131 for (let i = 0z; i < len(cmd.textinput); i += 1) 132 debug("parse() cmd.textinput[{}]=<{}>", i, cmd.textinput[i]); 133 134 return cmd; 135 }; 136 137 fn parse_cmdargs(cmd: *Command, t: *strings::iterator) (bool | ParseError) = { 138 switch (cmd.name) { 139 // [ ] 140 case NUL => 141 return true; 142 143 // (q|Q) 144 case 'q', 'Q' => 145 scan_end_assert(t)?; 146 return true; 147 148 // .[ <file>] 149 case 'e', 'E', 'f', 'r' => 150 if (scan_blanks(t) == 0) 151 match (strings::next(t)) { 152 case let r: rune => 153 return r: UnexpectedSuffix; 154 case done => 155 return true; 156 }; 157 cmd.arg1 = scan_rest(t); 158 return true; 159 160 // w(q|[ <file>]) 161 case 'w' => 162 if (scan_blanks(t) == 0) 163 match (strings::next(t)) { 164 case let r: rune => 165 if (r == 'q') { 166 cmd.suffix = r; 167 return true; 168 } else { 169 return r: InvalidSuffix; 170 }; 171 case done => 172 return true; 173 }; 174 cmd.arg1 = scan_rest(t); 175 return true; 176 177 // k<x> 178 case 'k' => 179 match (strings::next(t)) { 180 case let r: rune => 181 cmd.suffix = r; 182 case done => 183 return ExpectedMark; 184 }; 185 scan_end_assert(t)?; 186 return true; 187 188 // !<shellcmd> 189 case '!' => 190 cmd.arg1 = scan_rest(t); 191 return true; 192 193 // .[s] where 's' is '(l|n|p)' 194 case 'd', 'h', 'H', 'j', 'l', 'n', 'p', 'P', 'u', '=' => 195 cmd.printmode = scan_printmode(t); 196 scan_end_assert(t)?; 197 return true; 198 199 // .[s] 200 case 'a', 'c', 'i' => 201 cmd.printmode = scan_printmode(t); 202 scan_end_assert(t)?; 203 return false; 204 205 // .[s][ ]<addr> 206 case 'm', 't' => 207 cmd.printmode = scan_printmode(t); 208 cmd.arg1 = scan_rest(t); 209 return true; 210 211 // ./<regex>[/] where delimiter '/' is arbitrary 212 case 'G', 'V' => 213 cmd.delim = match (strings::next(t)) { 214 case done => 215 return ExpectedArgument; 216 case let r: rune => 217 yield if (r == ' ') { 218 return InvalidDelimiter; 219 } else { 220 yield r; 221 }; 222 }; 223 cmd.arg1 = scan_item(t, cmd.delim).0; 224 strings::next(t); // scan delimiter if exists 225 scan_end_assert(t)?; 226 return true; 227 228 // ./<regex>/<cmdlist...> 229 case 'g', 'v' => 230 cmd.delim = match (strings::next(t)) { 231 case done => 232 return ExpectedArgument; 233 case let r: rune => 234 yield if (r == ' ') { 235 return InvalidDelimiter; 236 } else { 237 yield r; 238 }; 239 }; 240 cmd.arg1 = scan_item(t, cmd.delim).0; 241 strings::next(t); // scan delimiter if exists 242 cmd.arg2 = scan_rest(t); 243 if (strings::prev(t) as rune == '\\') { 244 return false; 245 }; 246 return true; 247 248 // s/<regex>/[<replace>[/[<flags>]]] 249 case 's' => 250 cmd.delim = match (strings::next(t)) { 251 case done => 252 return ExpectedArgument; 253 case let r: rune => 254 yield if (r == ' ') { 255 return InvalidDelimiter; 256 } else { 257 yield r; 258 }; 259 }; 260 cmd.arg1 = scan_item(t, cmd.delim).0; 261 match (strings::next(t)) { 262 case rune => void; 263 case done => 264 return ExpectedDelimiter; 265 }; 266 append(cmd.textinput, scan_item(t, cmd.delim).0); 267 match (strings::next(t)) { 268 case rune => void; 269 case done => 270 if (strings::prev(t) == '\\') 271 return false 272 else 273 return true; 274 }; 275 let (count, global, printmode) = scan_substitute_flags(t); 276 cmd.count = count; 277 cmd.flag_global = global; 278 cmd.printmode = printmode; 279 scan_end_assert(t)?; 280 return true; 281 282 case '&' => 283 scan_end_assert(t)?; 284 return true; 285 286 case => 287 abort(); 288 }; 289 }; 290 291 fn scan_addrs(t: *strings::iterator) []Address = { 292 let addrs: []Address = []; 293 let specialfirst = false; 294 295 scan_blanks(t); 296 match (strings::next(t)) { 297 case done => 298 return addrs; 299 case let r: rune => 300 switch (r) { 301 case ',' => 302 specialfirst = true; 303 append(addrs, Address{ 304 addrform = 1z, 305 lineoffset = 0, 306 setcurrentline = false, 307 }); 308 case ';' => 309 specialfirst = true; 310 append(addrs, Address{ 311 addrform = CurrentLine, 312 lineoffset = 0, 313 setcurrentline = true, 314 }); 315 case => 316 strings::prev(t); 317 }; 318 }; 319 320 for (true) { 321 let addr = match (scan_addr(t)) { 322 case void => 323 yield if (specialfirst) { 324 yield Address{ 325 addrform = LastLine, 326 lineoffset = 0, 327 ... 328 }; 329 } else if (len(addrs) > 0) { 330 yield addrs[len(addrs)-1]; 331 } else { 332 break; 333 }; 334 case let a: Address => 335 yield a; 336 }; 337 338 specialfirst = false; 339 340 scan_blanks(t); 341 match (strings::next(t)) { 342 case done => 343 append(addrs, addr); 344 break; 345 case let r: rune => 346 switch (r) { 347 case ',' => 348 append(addrs, addr); 349 case ';' => 350 addr.setcurrentline = true; 351 append(addrs, addr); 352 case => 353 append(addrs, addr); 354 strings::prev(t); 355 break; 356 }; 357 }; 358 }; 359 360 // debug("scan_addrs(): len(addrs)={}", len(addrs)); 361 362 return addrs; 363 }; 364 365 fn scan_addr(t: *strings::iterator) (Address | void) = { 366 scan_blanks(t); 367 let r = match (strings::next(t)) { 368 case done => 369 return void; 370 case let r: rune => 371 yield r; 372 }; 373 374 // debug("scan_addr(): r={}", r); 375 376 const addrform: (AddressForm | void) = switch (r) { 377 case '.' => 378 yield CurrentLine; 379 case '$' => 380 yield LastLine; 381 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => 382 strings::prev(t); 383 yield scan_size(t); 384 case '\'' => 385 yield scan_mark(t); 386 case '/' => 387 const rad = RegexAddr{ 388 expr = scan_item(t, '/').0, 389 direction = true, 390 }; 391 strings::next(t); 392 yield rad; 393 case '?' => 394 const rad = RegexAddr{ 395 expr = scan_item(t, '?').0, 396 direction = false, 397 }; 398 strings::next(t); 399 yield rad; 400 case => 401 strings::prev(t); 402 yield void; 403 }; 404 405 const offs = scan_offsets(t); 406 407 if (addrform is void && len(offs) == 0) 408 return void; 409 410 const addrform: AddressForm = 411 if (addrform is void) 412 CurrentLine 413 else 414 addrform as AddressForm; 415 416 let addr = Address{ 417 addrform = addrform, 418 lineoffset = 0, 419 ... 420 }; 421 422 for (let i = 0z; i < len(offs); i += 1) { 423 addr.lineoffset += offs[i]; 424 }; 425 426 return addr; 427 }; 428 429 fn scan_offsets(t: *strings::iterator) []int = { 430 let offs: []int = []; 431 432 for (true) { 433 scan_blanks(t); 434 435 match (strings::next(t)) { 436 case done => 437 return offs; 438 case let r: rune => 439 if (r == '+') { 440 append(offs, scan_offset(t)); 441 } else if (r == '-') { 442 append(offs, -scan_offset(t)); 443 } else if (ascii::isdigit(r)) { 444 strings::prev(t); 445 append(offs, scan_size(t): int); 446 } else { 447 strings::prev(t); 448 break; 449 }; 450 }; 451 }; 452 453 return offs; 454 }; 455 456 fn scan_offset(t: *strings::iterator) int = { 457 match (strings::next(t)) { 458 case done => 459 return 1; 460 case let r: rune => 461 strings::prev(t); 462 if (ascii::isdigit(r)) { 463 return scan_size(t): int; 464 } else { 465 return 1; 466 }; 467 }; 468 }; 469 470 fn scan_cmdname(t: *strings::iterator) (rune | UnknownCommand) = { 471 scan_blanks(t); 472 let r = match (strings::next(t)) { 473 case done => 474 return NUL; 475 case let r: rune => 476 yield r; 477 }; 478 479 switch (r) { 480 case 481 'a', 'c', 'd', 'e', 'E', 'f', 'g', 'G', 'h', 'H', 482 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'P', 'q', 'Q', 483 'r', 's', 't', 'u', 'v', 'V', 'w', '=', '!', '&', 484 => 485 return r; 486 case => 487 return r: UnknownCommand; 488 }; 489 }; 490 491 fn scan_printmode(t: *strings::iterator) PrintMode = { 492 let r = match (strings::next(t)) { 493 case done => 494 return PrintMode::NONE; 495 case let r: rune => 496 yield r; 497 }; 498 499 switch (r) { 500 case 'p' => 501 return PrintMode::PLAIN; 502 case 'n' => 503 return PrintMode::NUMBER; 504 case 'l' => 505 return PrintMode::LIST; 506 case => 507 strings::prev(t); 508 return PrintMode::NONE; 509 }; 510 }; 511 512 fn scan_rest(t: *strings::iterator) str = { 513 // TODO: just use [[strings::iterstr]]? 514 let rs: []rune = []; 515 for (let r: rune => strings::next(t)) { 516 append(rs, r); 517 }; 518 return strings::trim(strings::fromrunes(rs)); 519 }; 520 521 fn scan_item(t: *strings::iterator, delim: rune) (str, bool) = { 522 let rs: []rune = []; 523 let seen_delim = false; 524 for (let r: rune => strings::next(t)) { 525 if (r == '\\') { 526 match (strings::next(t)) { 527 case done => 528 break; // TODO: Error here? how? 529 case let r: rune => 530 if (r == delim) { 531 seen_delim = true; 532 append(rs, r); 533 } else { 534 append(rs, ['\\', r]...); 535 }; 536 continue; 537 }; 538 } else if (r == delim) { 539 seen_delim = true; 540 strings::prev(t); 541 break; 542 }; 543 append(rs, r); 544 }; 545 return (strings::fromrunes(rs), seen_delim); 546 }; 547 548 fn scan_mark(t: *strings::iterator) rune = { 549 match (strings::next(t)) { 550 case done => 551 abort(); // TODO: aborts? 552 case let r: rune => 553 if (ascii::isalpha(r)) { // TODO: cover all mark chars 554 return r; 555 } else { 556 abort(); 557 }; 558 }; 559 }; 560 561 fn scan_substitute_flags(t: *strings::iterator) (size, bool, PrintMode) = { 562 let count = 0z; 563 let global = false; 564 let printmode = PrintMode::NONE; 565 566 for (let r => (strings::next(t))) { 567 switch (r) { 568 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' => 569 strings::prev(t); 570 count = scan_size(t); 571 case 'g' => 572 global = true; 573 case 'p' => 574 printmode = PrintMode::PLAIN; 575 case 'n' => 576 printmode = PrintMode::NUMBER; 577 case 'l' => 578 printmode = PrintMode::LIST; 579 case => 580 break; 581 }; 582 }; 583 584 return (count, global, printmode); 585 }; 586 587 fn scan_size(t: *strings::iterator) size = { 588 let begin = *t; 589 // reimplement this function using another iterator 590 for (let r => (strings::next(t))) { 591 if (!ascii::isdigit(r)) { 592 strings::prev(t); 593 break; 594 }; 595 }; 596 597 let num = strings::slice(&begin, t); 598 599 // TODO: return void instead? 600 if (num == "") { 601 return 0z; 602 }; 603 604 match (strconv::stoz(num)) { 605 case (strconv::invalid | strconv::overflow) => 606 abort("Invalid"); // TODO: propagate? 607 case let z: size => 608 return z; 609 }; 610 }; 611 612 fn scan_blanks(t: *strings::iterator) size = { 613 let sz = 0z; // runes, not bytes 614 for (let r: rune => strings::next(t)) { 615 if (!ascii::isblank(r)) { 616 strings::prev(t); 617 break; 618 }; 619 sz += 1; 620 }; 621 return sz; 622 }; 623 624 fn scan_end_assert(t: *strings::iterator) (void | TrailingCharacters) = { 625 scan_blanks(t); 626 match (strings::next(t)) { 627 case rune => 628 return TrailingCharacters; 629 case done => 630 return void; 631 }; 632 };