parse.ha (9113B)
1 use ascii; 2 use regex; 3 use strconv; 4 use strings; 5 6 type ParseError = ( 7 UnknownCommand 8 | UnexpectedSuffix 9 | TrailingCharacters 10 | ExpectedArgument 11 | ExpectedMark 12 | InvalidDelimiter 13 | ExpectedDelimiter 14 ); 15 16 type UnknownCommand = !rune; 17 18 type UnexpectedSuffix = !rune; 19 20 type TrailingCharacters = !void; 21 22 type ExpectedArgument = !void; 23 24 type ExpectedMark = !void; 25 26 type InvalidDelimiter = !void; 27 28 type ExpectedDelimiter = !void; 29 30 // Parses inputted commands. Returns true when command is ready. 31 fn parse(cmd: *Command, input: str) (bool | ParseError) = { 32 const iter = strings::iter(input); 33 34 cmd.addrs = scan_addrs(&iter); 35 cmd.cmdname = scan_cmdname(&iter)?; 36 37 switch (cmd.cmdname) { 38 // [ ] 39 case '\x00' => 40 return true; 41 42 // (q|Q) 43 case 'q', 'Q' => 44 scan_end_assert(&iter)?; 45 return true; 46 47 // .[ <file>] 48 case 'e', 'E', 'f', 'r', 'w' => 49 if (scan_blanks(&iter) == 0) { 50 match (strings::next(&iter)) { 51 case let r: rune => 52 return r: UnexpectedSuffix; 53 case void => 54 return true; 55 }; 56 } else { 57 cmd.arg = scan_rest(&iter); 58 return true; 59 }; 60 61 // k<x> 62 case 'k' => 63 match (strings::next(&iter)) { 64 case let r: rune => 65 cmd.arg = strings::fromrunes([r]); 66 case void => 67 return ExpectedMark; 68 }; 69 scan_end_assert(&iter)?; 70 return true; 71 72 // !<shellcmd> 73 case '!' => 74 cmd.arg = scan_rest(&iter); 75 return true; 76 77 // .[s] where 's' is '(l|n|p)' 78 case 'd', 'h', 'H', 'j', 'l', 'n', 'p', 'P', 'u', '=' => 79 cmd.printmode = scan_suffix(&iter); 80 scan_end_assert(&iter)?; 81 return true; 82 83 // .[s] 84 case 'a', 'c', 'i' => 85 cmd.printmode = scan_suffix(&iter); 86 scan_end_assert(&iter)?; 87 return false; 88 89 // .[s][ ]<addr> 90 case 'm', 't' => 91 cmd.printmode = scan_suffix(&iter); 92 cmd.arg = scan_rest(&iter); 93 return true; 94 95 // ./<regex>[/] where delimiter '/' is arbitrary 96 case 'G', 'V' => 97 const delim = match (strings::next(&iter)) { 98 case void => 99 return ExpectedArgument; 100 case let r: rune => 101 yield if (r == ' ') { 102 return InvalidDelimiter; 103 } else { 104 yield r; 105 }; 106 }; 107 cmd.arg = scan_item(&iter, delim); 108 strings::next(&iter); // scan delimiter if exists 109 scan_end_assert(&iter)?; 110 return true; 111 112 // s/<regex>/[<replace>[/[<flags>]]] 113 case 's' => 114 const delim = match (strings::next(&iter)) { 115 case void => 116 return ExpectedArgument; 117 case let r: rune => 118 yield if (r == ' ') { 119 return InvalidDelimiter; 120 } else { 121 yield r; 122 }; 123 }; 124 cmd.arg = scan_item(&iter, delim); 125 match (strings::next(&iter)) { 126 case rune => void; 127 case void => 128 return ExpectedDelimiter; 129 }; 130 cmd.arg2 = scan_item(&iter, delim); 131 match (strings::next(&iter)) { 132 case rune => void; 133 case void => 134 return true; 135 }; 136 cmd.arg3 = scan_rest(&iter); // TODO: scan properly here? 137 return true; 138 139 // ./<regex>/<cmdlist...> 140 case 'g', 'v' => 141 abort("TODO: parse: global, invglobal"); 142 143 case => 144 abort(); 145 }; 146 }; 147 148 fn scan_addrs(iter: *strings::iterator) []Address = { 149 let addrs: []Address = []; 150 let specialfirst = false; 151 152 scan_blanks(iter); 153 match (strings::next(iter)) { 154 case void => 155 return addrs; 156 case let r: rune => 157 switch (r) { 158 case ',' => 159 specialfirst = true; 160 append(addrs, Address { 161 addrtype = 1z, 162 lineoffset = 0, 163 setcurrentline = false, 164 }); 165 case ';' => 166 specialfirst = true; 167 append(addrs, Address { 168 addrtype = CurrentLine, 169 lineoffset = 0, 170 setcurrentline = true, 171 }); 172 case => 173 strings::prev(iter); 174 }; 175 }; 176 177 for (true) { 178 let addr = match (scan_addr(iter)) { 179 case void => 180 yield if (specialfirst) { 181 yield Address { 182 addrtype = LastLine, 183 lineoffset = 0, 184 ... 185 }; 186 } else if (len(addrs) > 0) { 187 yield addrs[len(addrs)-1]; 188 } else { 189 break; 190 }; 191 case let a: Address => 192 yield a; 193 }; 194 195 specialfirst = false; 196 197 scan_blanks(iter); 198 match (strings::next(iter)) { 199 case void => 200 append(addrs, addr); 201 break; 202 case let r: rune => 203 switch (r) { 204 case ',' => 205 append(addrs, addr); 206 case ';' => 207 addr.setcurrentline = true; 208 append(addrs, addr); 209 case => 210 append(addrs, addr); 211 strings::prev(iter); 212 break; 213 }; 214 }; 215 }; 216 217 // debug("scan_addrs(): len(addrs)={}", len(addrs)); 218 219 return addrs; 220 }; 221 222 fn scan_addr(iter: *strings::iterator) (Address | void) = { 223 scan_blanks(iter); 224 let r = match (strings::next(iter)) { 225 case void => 226 return void; 227 case let r: rune => 228 yield r; 229 }; 230 231 // debug("scan_addr(): r={}", r); 232 233 const addrtype: (AddressType | void) = 234 if (r == '.') { 235 yield CurrentLine; 236 } else if (r == '$') { 237 yield LastLine; 238 } else if (ascii::isdigit(r)) { 239 strings::prev(iter); 240 yield scan_uint(iter): size; 241 } else if (r == '\'') { 242 yield scan_mark(iter); 243 } else if (r == '/') { 244 const rad = RegexAddr { 245 expr = scan_item(iter, '/'), 246 direction = true, 247 }; 248 strings::next(iter); 249 yield rad; 250 } else if (r == '?') { 251 const rad = RegexAddr { 252 expr = scan_item(iter, '?'), 253 direction = false, 254 }; 255 strings::next(iter); 256 yield rad; 257 } else { 258 strings::prev(iter); 259 yield void; 260 }; 261 262 const offs = scan_offsets(iter); 263 264 const addrtype: AddressType = match (addrtype) { 265 case void => 266 yield if (len(offs) == 0) { 267 return void; 268 } else { 269 yield CurrentLine; 270 }; 271 case => 272 yield addrtype as AddressType; 273 }; 274 275 let addr = Address { 276 addrtype = addrtype, 277 lineoffset = 0, 278 ... 279 }; 280 281 for (let i = 0z; i < len(offs); i += 1) { 282 addr.lineoffset += offs[i]; 283 }; 284 285 return addr; 286 }; 287 288 fn scan_offsets(iter: *strings::iterator) []int = { 289 let offs: []int = []; 290 291 for (true) { 292 scan_blanks(iter); 293 294 match (strings::next(iter)) { 295 case void => 296 return offs; 297 case let r: rune => 298 if (r == '+') { 299 append(offs, scan_offset(iter)); 300 } else if (r == '-') { 301 append(offs, -scan_offset(iter)); 302 } else if (ascii::isdigit(r)) { 303 strings::prev(iter); 304 append(offs, scan_uint(iter): int); 305 } else { 306 strings::prev(iter); 307 break; 308 }; 309 }; 310 }; 311 312 return offs; 313 }; 314 315 fn scan_offset(iter: *strings::iterator) int = { 316 match (strings::next(iter)) { 317 case void => 318 return 1; 319 case let r: rune => 320 strings::prev(iter); 321 if (ascii::isdigit(r)) { 322 return scan_uint(iter): int; 323 } else { 324 return 1; 325 }; 326 }; 327 }; 328 329 fn scan_cmdname(iter: *strings::iterator) (rune | UnknownCommand) = { 330 scan_blanks(iter); 331 let r = match (strings::next(iter)) { 332 case void => 333 return '\x00'; 334 case let r: rune => 335 yield r; 336 }; 337 338 switch (r) { 339 case 340 'a', 'c', 'd', 'e', 'E', 'f', 'g', 'G', 'h', 'H', 341 'i', 'j', 'k', 'l', 'm', 'n', 'p', 'P', 'q', 'Q', 342 'r', 's', 't', 'u', 'v', 'V', 'w', '=', '!', 343 => 344 return r; 345 case => 346 return r: UnknownCommand; 347 }; 348 }; 349 350 fn scan_suffix(iter: *strings::iterator) PrintMode = { 351 let r = match (strings::next(iter)) { 352 case void => 353 return PrintMode::NONE; 354 case let r: rune => 355 yield r; 356 }; 357 358 switch (r) { 359 case 'l' => 360 return PrintMode::LIST; 361 case 'n' => 362 return PrintMode::NUMBER; 363 case 'p' => 364 return PrintMode::PRINT; 365 case => 366 strings::prev(iter); 367 return PrintMode::NONE; 368 }; 369 }; 370 371 fn scan_rest(iter: *strings::iterator) str = { 372 // TODO: just use [[strings::iterstr]]? 373 let rs: []rune = []; 374 for (true) { 375 match (strings::next(iter)) { 376 case void => 377 break; 378 case let r: rune => 379 append(rs, r); 380 }; 381 }; 382 return strings::trim(strings::fromrunes(rs)); 383 }; 384 385 fn scan_item(iter: *strings::iterator, end: rune) str = { 386 let rs: []rune = []; 387 for (true) { 388 let r = match (strings::next(iter)) { 389 case void => 390 break; 391 case let r: rune => 392 yield r; 393 }; 394 if (r == '\\') { 395 match (strings::next(iter)) { 396 case void => 397 break; // TODO: Error here? how? 398 case let r: rune => 399 if (r == end) { 400 append(rs, r); 401 } else { 402 append(rs, ['\\', r]...); 403 }; 404 continue; 405 }; 406 } else if (r == end) { 407 strings::prev(iter); 408 break; 409 }; 410 append(rs, r); 411 }; 412 return strings::fromrunes(rs); 413 }; 414 415 fn scan_mark(iter: *strings::iterator) rune = { 416 match (strings::next(iter)) { 417 case void => 418 abort(); // TODO: aborts? 419 case let r: rune => 420 if (ascii::isalpha(r)) { // TODO: cover all mark chars 421 return r; 422 } else { 423 abort(); 424 }; 425 }; 426 }; 427 428 // TODO: rename and appropriate to "scan_size()"? 429 fn scan_uint(iter: *strings::iterator) uint = { 430 let num: []u8 = []; 431 defer free(num); 432 for (true) { 433 let r = match (strings::next(iter)) { 434 case void => 435 break; 436 case let r: rune => 437 yield r; 438 }; 439 440 if (ascii::isdigit(r)) { 441 append(num, r: u32: u8); 442 } else { 443 strings::prev(iter); 444 break; 445 }; 446 }; 447 448 if (len(num) == 0) { 449 return 0; 450 }; 451 452 match (strconv::stou(strings::fromutf8(num)!)) { 453 case (strconv::invalid | strconv::overflow) => 454 abort("Invalid"); 455 case let u: uint => 456 return u; 457 }; 458 }; 459 460 fn scan_blanks(iter: *strings::iterator) size = { 461 let sz = 0z; // runes, not bytes 462 for (true) { 463 match (strings::next(iter)) { 464 case void => 465 break; 466 case let r: rune => 467 if (!ascii::isblank(r)) { 468 strings::prev(iter); 469 break; 470 }; 471 sz += 1; 472 }; 473 }; 474 return sz; 475 }; 476 477 fn scan_end_assert(iter: *strings::iterator) (void | TrailingCharacters) = { 478 scan_blanks(iter); 479 match (strings::next(iter)) { 480 case rune => 481 return TrailingCharacters; 482 case void => 483 return void; 484 }; 485 };