ed

[hare] The standard editor
Log | Files | Refs | README | LICENSE

parse.ha (12642B)


      1 use ascii;
      2 use bufio;
      3 use io;
      4 use regex;
      5 use strconv;
      6 use strings;
      7 
      8 type ParseError = !(
      9 	UnknownCommand
     10 	| InvalidSuffix
     11 	| UnexpectedSuffix
     12 	| TrailingCharacters
     13 	| ExpectedArgument
     14 	| ExpectedMark
     15 	| InvalidDelimiter
     16 	| ExpectedDelimiter
     17 );
     18 
     19 type UnknownCommand = !rune;
     20 
     21 type InvalidSuffix = !rune;
     22 
     23 type UnexpectedSuffix = !rune;
     24 
     25 type TrailingCharacters = !void;
     26 
     27 type ExpectedArgument = !void;
     28 
     29 type ExpectedMark = !void;
     30 
     31 type InvalidDelimiter = !void;
     32 
     33 type ExpectedDelimiter = !void;
     34 
     35 def NUL = '\x00';
     36 
     37 // Parses inputted commands. Returns true when command is ready.
     38 fn parse(input: *bufio::scanner) (Command | ParseError | InteractionError) = {
     39 	let inputline = match (bufio::scan_line(input)?) {
     40 	case let s: const str =>
     41 		yield s;
     42 	case io::EOF =>
     43 		return io::EOF;
     44 	};
     45 
     46 	let t = strings::iter(inputline);
     47 	let cmd = Command{ ... };
     48 	
     49 	cmd.addrs = scan_addrs(&t);
     50 	cmd.name = scan_cmdname(&t)?;
     51 	let ready = parse_cmdargs(&cmd, &t)?;
     52 
     53 	switch :input (cmd.name) {
     54 	case => void;
     55 	case 'a', 'c', 'i' =>
     56 		for (true) {
     57 			let inputline = match (bufio::scan_line(input)?) {
     58 			case let s: const str =>
     59 				yield s;
     60 			case io::EOF =>
     61 				break;
     62 			};
     63 
     64 			if (inputline == ".")
     65 				break;
     66 
     67 			append(cmd.textinput, strings::dup(inputline));
     68 		};
     69 	case 'g', 'v' =>
     70 		if (!strings::hassuffix(cmd.arg2, '\\')) {
     71 			append(cmd.textinput, strings::dup(cmd.arg2));
     72 			yield :input;
     73 		};
     74 		append(cmd.textinput,
     75 			strings::dup(strings::rtrim(cmd.arg2, '\\')));
     76 
     77 		for (true) {
     78 			let inputline = match (bufio::scan_line(input)?) {
     79 			case let s: const str =>
     80 				yield s;
     81 			case io::EOF =>
     82 				break;
     83 			};
     84 			// workaround scan_line undelimited last line.
     85 			// note, GNU ed would use "p\n" instead.
     86 			if (inputline == "") {
     87 				append(cmd.textinput, strings::dup("\n"));
     88 				break;
     89 			};
     90 			if (!strings::hassuffix(inputline, '\\')) {
     91 				append(cmd.textinput, strings::dup(inputline));
     92 				break;
     93 			};
     94 			append(cmd.textinput,
     95 				strings::dup(strings::rtrim(inputline, '\\')));
     96 		};
     97 	case 's' =>
     98 		if (ready)
     99 			yield;
    100 
    101 		for (true) {
    102 			let inputline = match (bufio::scan_line(input)?) {
    103 			case let s: const str =>
    104 				yield s;
    105 			case io::EOF =>
    106 				break;
    107 			};
    108 
    109 			let t = strings::iter(inputline);
    110 			let (part, seen_delim) = scan_item(&t, cmd.delim);
    111 
    112 			// cmd.textinput holds the replacement text
    113 			append(cmd.textinput, strings::dup(part));
    114 
    115 			if (!seen_delim && strings::hassuffix(inputline, '\\'))
    116 				continue;
    117 
    118 			strings::next(&t); // skip delim
    119 			let (count, global, printmode) = scan_substitute_flags(&t);
    120 			//debug("count={} global={}", count, global);
    121 			cmd.count = count;
    122 			cmd.flag_global = global;
    123 			cmd.printmode = printmode;
    124 
    125 			scan_end_assert(&t)?;
    126 
    127 			break;
    128 		};
    129 	};
    130 
    131 	for (let i = 0z; i < len(cmd.textinput); i += 1)
    132 		debug("parse() cmd.textinput[{}]=<{}>", i, cmd.textinput[i]);
    133 
    134 	return cmd;
    135 };
    136 
    137 fn parse_cmdargs(cmd: *Command, t: *strings::iterator) (bool | ParseError) = {
    138 	switch (cmd.name) {
    139 	// [ ]
    140 	case NUL =>
    141 		return true;
    142 
    143 	// (q|Q)
    144 	case 'q', 'Q' =>
    145 		scan_end_assert(t)?;
    146 		return true;
    147 
    148 	// .[ <file>]
    149 	case 'e', 'E', 'f', 'r' =>
    150 		if (scan_blanks(t) == 0)
    151 			match (strings::next(t)) {
    152 			case let r: rune =>
    153 				return r: UnexpectedSuffix;
    154 			case void =>
    155 				return true;
    156 			};
    157 		cmd.arg1 = scan_rest(t);
    158 		return true;
    159 
    160 	// w(q|[ <file>])
    161 	case 'w' =>
    162 		if (scan_blanks(t) == 0)
    163 			match (strings::next(t)) {
    164 			case let r: rune =>
    165 				if (r == 'q') {
    166 					cmd.suffix = r;
    167 					return true;
    168 				} else {
    169 					return r: InvalidSuffix;
    170 				};
    171 			case void =>
    172 				return true;
    173 			};
    174 		cmd.arg1 = scan_rest(t);
    175 		return true;
    176 
    177 	// k<x>
    178 	case 'k' =>
    179 		match (strings::next(t)) {
    180 		case let r: rune =>
    181 			cmd.suffix = r;
    182 		case void =>
    183 			return ExpectedMark;
    184 		};
    185 		scan_end_assert(t)?;
    186 		return true;
    187 
    188 	// !<shellcmd>
    189 	case '!' =>
    190 		cmd.arg1 = scan_rest(t);
    191 		return true;
    192 
    193 	// .[s]   where 's' is '(l|n|p)'
    194 	case 'd', 'h', 'H', 'j', 'l', 'n', 'p', 'P', 'u', '=' =>
    195 		cmd.printmode = scan_printmode(t);
    196 		scan_end_assert(t)?;
    197 		return true;
    198 
    199 	// .[s]
    200 	case 'a', 'c', 'i' =>
    201 		cmd.printmode = scan_printmode(t);
    202 		scan_end_assert(t)?;
    203 		return false;
    204 
    205 	// .[s][ ]<addr>
    206 	case 'm', 't' =>
    207 		cmd.printmode = scan_printmode(t);
    208 		cmd.arg1 = scan_rest(t);
    209 		return true;
    210 
    211 	// ./<regex>[/]   where delimiter '/' is arbitrary
    212 	case 'G', 'V' =>
    213 		cmd.delim = match (strings::next(t)) {
    214 		case void =>
    215 			return ExpectedArgument;
    216 		case let r: rune =>
    217 			yield if (r == ' ') {
    218 				return InvalidDelimiter;
    219 			} else {
    220 				yield r;
    221 			};
    222 		};
    223 		cmd.arg1 = scan_item(t, cmd.delim).0;
    224 		strings::next(t); // scan delimiter if exists
    225 		scan_end_assert(t)?;
    226 		return true;
    227 
    228 	// ./<regex>/<cmdlist...>
    229 	case 'g', 'v' =>
    230 		cmd.delim = match (strings::next(t)) {
    231 		case void =>
    232 			return ExpectedArgument;
    233 		case let r: rune =>
    234 			yield if (r == ' ') {
    235 				return InvalidDelimiter;
    236 			} else {
    237 				yield r;
    238 			};
    239 		};
    240 		cmd.arg1 = scan_item(t, cmd.delim).0;
    241 		strings::next(t); // scan delimiter if exists
    242 		cmd.arg2 = scan_rest(t);
    243 		if (strings::prev(t) as rune == '\\') {
    244 			return false;
    245 		};
    246 		return true;
    247 
    248 	// s/<regex>/[<replace>[/[<flags>]]]
    249 	case 's' =>
    250 		cmd.delim = match (strings::next(t)) {
    251 		case void =>
    252 			return ExpectedArgument;
    253 		case let r: rune =>
    254 			yield if (r == ' ') {
    255 				return InvalidDelimiter;
    256 			} else {
    257 				yield r;
    258 			};
    259 		};
    260 		cmd.arg1 = scan_item(t, cmd.delim).0;
    261 		match (strings::next(t)) {
    262 		case rune => void;
    263 		case void =>
    264 			return ExpectedDelimiter;
    265 		};
    266 		append(cmd.textinput, scan_item(t, cmd.delim).0);
    267 		match (strings::next(t)) {
    268 		case rune => void;
    269 		case void =>
    270 			if (strings::prev(t) == '\\')
    271 				return false
    272 			else
    273 				return true;
    274 		};
    275 		let (count, global, printmode) = scan_substitute_flags(t);
    276 		cmd.count = count;
    277 		cmd.flag_global = global;
    278 		cmd.printmode = printmode;
    279 		scan_end_assert(t)?;
    280 		return true;
    281 
    282 	case '&' =>
    283 		scan_end_assert(t)?;
    284 		return true;
    285 
    286 	case =>
    287 		abort();
    288 	};
    289 };
    290 
    291 fn scan_addrs(t: *strings::iterator) []Address = {
    292 	let addrs: []Address = [];
    293 	let specialfirst = false;
    294 
    295 	scan_blanks(t);
    296 	match (strings::next(t)) {
    297 	case void =>
    298 		return addrs;
    299 	case let r: rune =>
    300 		switch (r) {
    301 		case ',' =>
    302 			specialfirst = true;
    303 			append(addrs, Address{
    304 				addrform = 1z,
    305 				lineoffset = 0,
    306 				setcurrentline = false,
    307 			});
    308 		case ';' =>
    309 			specialfirst = true;
    310 			append(addrs, Address{
    311 				addrform = CurrentLine,
    312 				lineoffset = 0,
    313 				setcurrentline = true,
    314 			});
    315 		case =>
    316 			strings::prev(t);
    317 		};
    318 	};
    319 
    320 	for (true) {
    321 		let addr = match (scan_addr(t)) {
    322 		case void =>
    323 			yield if (specialfirst) {
    324 				yield Address{
    325 					addrform = LastLine,
    326 					lineoffset = 0,
    327 					...
    328 				};
    329 			} else if (len(addrs) > 0) {
    330 				yield addrs[len(addrs)-1];
    331 			} else {
    332 				break;
    333 			};
    334 		case let a: Address =>
    335 			yield a;
    336 		};
    337 
    338 		specialfirst = false;
    339 
    340 		scan_blanks(t);
    341 		match (strings::next(t)) {
    342 		case void =>
    343 			append(addrs, addr);
    344 			break;
    345 		case let r: rune =>
    346 			switch (r) {
    347 			case ',' =>
    348 				append(addrs, addr);
    349 			case ';' =>
    350 				addr.setcurrentline = true;
    351 				append(addrs, addr);
    352 			case =>
    353 				append(addrs, addr);
    354 				strings::prev(t);
    355 				break;
    356 			};
    357 		};
    358 	};
    359 
    360 	// debug("scan_addrs(): len(addrs)={}", len(addrs));
    361 
    362 	return addrs;
    363 };
    364 
    365 fn scan_addr(t: *strings::iterator) (Address | void) = {
    366 	scan_blanks(t);
    367 	let r = match (strings::next(t)) {
    368 	case void =>
    369 		return void;
    370 	case let r: rune =>
    371 		yield r;
    372 	};
    373 
    374 	// debug("scan_addr(): r={}", r);
    375 
    376 	const addrform: (AddressForm | void) = switch (r) {
    377 	case '.' =>
    378 		yield CurrentLine;
    379 	case '$' =>
    380 		yield LastLine;
    381 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
    382 		strings::prev(t);
    383 		yield scan_size(t);
    384 	case '\'' =>
    385 		yield scan_mark(t);
    386 	case '/' =>
    387 		const rad = RegexAddr{
    388 			expr = scan_item(t, '/').0,
    389 			direction = true,
    390 		};
    391 		strings::next(t);
    392 		yield rad;
    393 	case '?' =>
    394 		const rad = RegexAddr{
    395 			expr = scan_item(t, '?').0,
    396 			direction = false,
    397 		};
    398 		strings::next(t);
    399 		yield rad;
    400 	case =>
    401 		strings::prev(t);
    402 		yield void;
    403 	};
    404 
    405 	const offs = scan_offsets(t);
    406 
    407 	if (addrform is void && len(offs) == 0)
    408 		return void;
    409 
    410 	const addrform: AddressForm =
    411 		if (addrform is void)
    412 			CurrentLine
    413 		else
    414 			addrform as AddressForm;
    415 
    416 	let addr = Address{
    417 		addrform = addrform,
    418 		lineoffset = 0,
    419 		...
    420 	};
    421 
    422 	for (let i = 0z; i < len(offs); i += 1) {
    423 		addr.lineoffset += offs[i];
    424 	};
    425 
    426 	return addr;
    427 };
    428 
    429 fn scan_offsets(t: *strings::iterator) []int = {
    430 	let offs: []int = [];
    431 
    432 	for (true) {
    433 		scan_blanks(t);
    434 
    435 		match (strings::next(t)) {
    436 		case void =>
    437 			return offs;
    438 		case let r: rune =>
    439 			if (r == '+') {
    440 				append(offs, scan_offset(t));
    441 			} else if (r == '-') {
    442 				append(offs, -scan_offset(t));
    443 			} else if (ascii::isdigit(r)) {
    444 				strings::prev(t);
    445 				append(offs, scan_size(t): int);
    446 			} else {
    447 				strings::prev(t);
    448 				break;
    449 			};
    450 		};
    451 	};
    452 
    453 	return offs;
    454 };
    455 
    456 fn scan_offset(t: *strings::iterator) int = {
    457 	match (strings::next(t)) {
    458 	case void =>
    459 		return 1;
    460 	case let r: rune =>
    461 		strings::prev(t);
    462 		if (ascii::isdigit(r)) {
    463 			return scan_size(t): int;
    464 		} else {
    465 			return 1;
    466 		};
    467 	};
    468 };
    469 
    470 fn scan_cmdname(t: *strings::iterator) (rune | UnknownCommand) = {
    471 	scan_blanks(t);
    472 	let r = match (strings::next(t)) {
    473 	case void =>
    474 		return NUL;
    475 	case let r: rune =>
    476 		yield r;
    477 	};
    478 
    479 	switch (r) {
    480 	case
    481 	'a', 'c', 'd', 'e', 'E', 'f', 'g', 'G', 'h', 'H',
    482 	'i', 'j', 'k', 'l', 'm', 'n', 'p', 'P', 'q', 'Q',
    483 	'r', 's', 't', 'u', 'v', 'V', 'w', '=', '!', '&',
    484 	=>
    485 		return r;
    486 	case =>
    487 		return r: UnknownCommand;
    488 	};
    489 };
    490 
    491 fn scan_printmode(t: *strings::iterator) PrintMode = {
    492 	let r = match (strings::next(t)) {
    493 	case void =>
    494 		return PrintMode::NONE;
    495 	case let r: rune =>
    496 		yield r;
    497 	};
    498 
    499 	switch (r) {
    500 	case 'p' =>
    501 		return PrintMode::PLAIN;
    502 	case 'n' =>
    503 		return PrintMode::NUMBER;
    504 	case 'l' =>
    505 		return PrintMode::LIST;
    506 	case =>
    507 		strings::prev(t);
    508 		return PrintMode::NONE;
    509 	};
    510 };
    511 
    512 fn scan_rest(t: *strings::iterator) str = {
    513 	// TODO: just use [[strings::iterstr]]?
    514 	let rs: []rune = [];
    515 	for (true) {
    516 		match (strings::next(t)) {
    517 		case void =>
    518 			break;
    519 		case let r: rune =>
    520 			append(rs, r);
    521 		};
    522 	};
    523 	return strings::trim(strings::fromrunes(rs));
    524 };
    525 
    526 fn scan_item(t: *strings::iterator, delim: rune) (str, bool) = {
    527 	let rs: []rune = [];
    528 	let seen_delim = false;
    529 	for (true) {
    530 		let r = match (strings::next(t)) {
    531 		case void =>
    532 			break;
    533 		case let r: rune =>
    534 			yield r;
    535 		};
    536 		if (r == '\\') {
    537 			match (strings::next(t)) {
    538 			case void =>
    539 				break; // TODO: Error here? how?
    540 			case let r: rune =>
    541 				if (r == delim) {
    542 					seen_delim = true;
    543 					append(rs, r);
    544 				} else {
    545 					append(rs, ['\\', r]...);
    546 				};
    547 				continue;
    548 			};
    549 		} else if (r == delim) {
    550 			seen_delim = true;
    551 			strings::prev(t);
    552 			break;
    553 		};
    554 		append(rs, r);
    555 	};
    556 	return (strings::fromrunes(rs), seen_delim);
    557 };
    558 
    559 fn scan_mark(t: *strings::iterator) rune = {
    560 	match (strings::next(t)) {
    561 	case void =>
    562 		abort(); // TODO: aborts?
    563 	case let r: rune =>
    564 		if (ascii::isalpha(r)) { // TODO: cover all mark chars
    565 			return r;
    566 		} else {
    567 			abort();
    568 		};
    569 	};
    570 };
    571 
    572 fn scan_substitute_flags(t: *strings::iterator) (size, bool, PrintMode) = {
    573 	let count = 0z;
    574 	let global = false;
    575 	let printmode = PrintMode::NONE;
    576 
    577 	for (true) {
    578 		let r = match (strings::next(t)) {
    579 		case void =>
    580 			break;
    581 		case let r: rune =>
    582 			yield r;
    583 		};
    584 
    585 		switch (r) {
    586 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
    587 			strings::prev(t);
    588 			count = scan_size(t);
    589 		case 'g' =>
    590 			global = true;
    591 		case 'p' =>
    592 			printmode = PrintMode::PLAIN;
    593 		case 'n' =>
    594 			printmode = PrintMode::NUMBER;
    595 		case 'l' =>
    596 			printmode = PrintMode::LIST;
    597 		case =>
    598 			break;
    599 		};
    600 	};
    601 
    602 	return (count, global, printmode);
    603 };
    604 
    605 fn scan_size(t: *strings::iterator) size = {
    606 	let begin = *t;
    607 	// reimplement this function using another iterator
    608 	for (true) {
    609 		let r = match (strings::next(t)) {
    610 		case void =>
    611 			break;
    612 		case let r: rune =>
    613 			yield r;
    614 		};
    615 
    616 		if (!ascii::isdigit(r)) {
    617 			strings::prev(t);
    618 			break;
    619 		};
    620 	};
    621 
    622 	let num = strings::slice(&begin, t);
    623 
    624 	// TODO: return void instead?
    625 	if (num == "") {
    626 		return 0z;
    627 	};
    628 
    629 	match (strconv::stoz(num)) {
    630 	case (strconv::invalid | strconv::overflow) =>
    631 		abort("Invalid"); // TODO: propagate?
    632 	case let z: size =>
    633 		return z;
    634 	};
    635 };
    636 
    637 fn scan_blanks(t: *strings::iterator) size = {
    638 	let sz = 0z; // runes, not bytes
    639 	for (true) {
    640 		match (strings::next(t)) {
    641 		case void =>
    642 			break;
    643 		case let r: rune =>
    644 			if (!ascii::isblank(r)) {
    645 				strings::prev(t);
    646 				break;
    647 			};
    648 			sz += 1;
    649 		};
    650 	};
    651 	return sz;
    652 };
    653 
    654 fn scan_end_assert(t: *strings::iterator) (void | TrailingCharacters) = {
    655 	scan_blanks(t);
    656 	match (strings::next(t)) {
    657 	case rune =>
    658 		return TrailingCharacters;
    659 	case void =>
    660 		return void;
    661 	};
    662 };