ed

[hare] The standard editor
Log | Files | Refs | README | LICENSE

parse.ha (12296B)


      1 use ascii;
      2 use bufio;
      3 use io;
      4 use regex;
      5 use strconv;
      6 use strings;
      7 
      8 type ParseError = !(
      9 	UnknownCommand
     10 	| InvalidSuffix
     11 	| UnexpectedSuffix
     12 	| TrailingCharacters
     13 	| ExpectedArgument
     14 	| ExpectedMark
     15 	| InvalidDelimiter
     16 	| ExpectedDelimiter
     17 );
     18 
     19 type UnknownCommand = !rune;
     20 
     21 type InvalidSuffix = !rune;
     22 
     23 type UnexpectedSuffix = !rune;
     24 
     25 type TrailingCharacters = !void;
     26 
     27 type ExpectedArgument = !void;
     28 
     29 type ExpectedMark = !void;
     30 
     31 type InvalidDelimiter = !void;
     32 
     33 type ExpectedDelimiter = !void;
     34 
     35 def NUL = '\x00';
     36 
     37 // Parses inputted commands. Returns true when command is ready.
     38 fn parse(input: *bufio::scanner) (Command | ParseError | InteractionError) = {
     39 	let inputline = match (bufio::scan_line(input)?) {
     40 	case let s: const str =>
     41 		yield s;
     42 	case io::EOF =>
     43 		return io::EOF;
     44 	};
     45 
     46 	let t = strings::iter(inputline);
     47 	let cmd = Command{ ... };
     48 	
     49 	cmd.addrs = scan_addrs(&t);
     50 	cmd.name = scan_cmdname(&t)?;
     51 	let ready = parse_cmdargs(&cmd, &t)?;
     52 
     53 	switch :input (cmd.name) {
     54 	case => void;
     55 	case 'a', 'c', 'i' =>
     56 		for (true) {
     57 			let inputline = match (bufio::scan_line(input)?) {
     58 			case let s: const str =>
     59 				yield s;
     60 			case io::EOF =>
     61 				break;
     62 			};
     63 
     64 			if (inputline == ".")
     65 				break;
     66 
     67 			append(cmd.textinput, strings::dup(inputline));
     68 		};
     69 	case 'g', 'v' =>
     70 		if (!strings::hassuffix(cmd.arg2, '\\')) {
     71 			append(cmd.textinput, strings::dup(cmd.arg2));
     72 			yield :input;
     73 		};
     74 		append(cmd.textinput,
     75 			strings::dup(strings::rtrim(cmd.arg2, '\\')));
     76 
     77 		for (true) {
     78 			let inputline = match (bufio::scan_line(input)?) {
     79 			case let s: const str =>
     80 				yield s;
     81 			case io::EOF =>
     82 				break;
     83 			};
     84 			// workaround scan_line undelimited last line.
     85 			// note, GNU ed would use "p\n" instead.
     86 			if (inputline == "") {
     87 				append(cmd.textinput, strings::dup("\n"));
     88 				break;
     89 			};
     90 			if (!strings::hassuffix(inputline, '\\')) {
     91 				append(cmd.textinput, strings::dup(inputline));
     92 				break;
     93 			};
     94 			append(cmd.textinput,
     95 				strings::dup(strings::rtrim(inputline, '\\')));
     96 		};
     97 	case 's' =>
     98 		if (ready)
     99 			yield;
    100 
    101 		for (true) {
    102 			let inputline = match (bufio::scan_line(input)?) {
    103 			case let s: const str =>
    104 				yield s;
    105 			case io::EOF =>
    106 				break;
    107 			};
    108 
    109 			let t = strings::iter(inputline);
    110 			let (part, seen_delim) = scan_item(&t, cmd.delim);
    111 
    112 			// cmd.textinput holds the replacement text
    113 			append(cmd.textinput, strings::dup(part));
    114 
    115 			if (!seen_delim && strings::hassuffix(inputline, '\\'))
    116 				continue;
    117 
    118 			strings::next(&t); // skip delim
    119 			let (count, global, printmode) = scan_substitute_flags(&t);
    120 			//debug("count={} global={}", count, global);
    121 			cmd.count = count;
    122 			cmd.flag_global = global;
    123 			cmd.printmode = printmode;
    124 
    125 			scan_end_assert(&t)?;
    126 
    127 			break;
    128 		};
    129 	};
    130 
    131 	for (let i = 0z; i < len(cmd.textinput); i += 1)
    132 		debug("parse() cmd.textinput[{}]=<{}>", i, cmd.textinput[i]);
    133 
    134 	return cmd;
    135 };
    136 
    137 fn parse_cmdargs(cmd: *Command, t: *strings::iterator) (bool | ParseError) = {
    138 	switch (cmd.name) {
    139 	// [ ]
    140 	case NUL =>
    141 		return true;
    142 
    143 	// (q|Q)
    144 	case 'q', 'Q' =>
    145 		scan_end_assert(t)?;
    146 		return true;
    147 
    148 	// .[ <file>]
    149 	case 'e', 'E', 'f', 'r' =>
    150 		if (scan_blanks(t) == 0)
    151 			match (strings::next(t)) {
    152 			case let r: rune =>
    153 				return r: UnexpectedSuffix;
    154 			case done =>
    155 				return true;
    156 			};
    157 		cmd.arg1 = scan_rest(t);
    158 		return true;
    159 
    160 	// w(q|[ <file>])
    161 	case 'w' =>
    162 		if (scan_blanks(t) == 0)
    163 			match (strings::next(t)) {
    164 			case let r: rune =>
    165 				if (r == 'q') {
    166 					cmd.suffix = r;
    167 					return true;
    168 				} else {
    169 					return r: InvalidSuffix;
    170 				};
    171 			case done =>
    172 				return true;
    173 			};
    174 		cmd.arg1 = scan_rest(t);
    175 		return true;
    176 
    177 	// k<x>
    178 	case 'k' =>
    179 		match (strings::next(t)) {
    180 		case let r: rune =>
    181 			cmd.suffix = r;
    182 		case done =>
    183 			return ExpectedMark;
    184 		};
    185 		scan_end_assert(t)?;
    186 		return true;
    187 
    188 	// !<shellcmd>
    189 	case '!' =>
    190 		cmd.arg1 = scan_rest(t);
    191 		return true;
    192 
    193 	// .[s]   where 's' is '(l|n|p)'
    194 	case 'd', 'h', 'H', 'j', 'l', 'n', 'p', 'P', 'u', '=' =>
    195 		cmd.printmode = scan_printmode(t);
    196 		scan_end_assert(t)?;
    197 		return true;
    198 
    199 	// .[s]
    200 	case 'a', 'c', 'i' =>
    201 		cmd.printmode = scan_printmode(t);
    202 		scan_end_assert(t)?;
    203 		return false;
    204 
    205 	// .[s][ ]<addr>
    206 	case 'm', 't' =>
    207 		cmd.printmode = scan_printmode(t);
    208 		cmd.arg1 = scan_rest(t);
    209 		return true;
    210 
    211 	// ./<regex>[/]   where delimiter '/' is arbitrary
    212 	case 'G', 'V' =>
    213 		cmd.delim = match (strings::next(t)) {
    214 		case done =>
    215 			return ExpectedArgument;
    216 		case let r: rune =>
    217 			yield if (r == ' ') {
    218 				return InvalidDelimiter;
    219 			} else {
    220 				yield r;
    221 			};
    222 		};
    223 		cmd.arg1 = scan_item(t, cmd.delim).0;
    224 		strings::next(t); // scan delimiter if exists
    225 		scan_end_assert(t)?;
    226 		return true;
    227 
    228 	// ./<regex>/<cmdlist...>
    229 	case 'g', 'v' =>
    230 		cmd.delim = match (strings::next(t)) {
    231 		case done =>
    232 			return ExpectedArgument;
    233 		case let r: rune =>
    234 			yield if (r == ' ') {
    235 				return InvalidDelimiter;
    236 			} else {
    237 				yield r;
    238 			};
    239 		};
    240 		cmd.arg1 = scan_item(t, cmd.delim).0;
    241 		strings::next(t); // scan delimiter if exists
    242 		cmd.arg2 = scan_rest(t);
    243 		if (strings::prev(t) as rune == '\\') {
    244 			return false;
    245 		};
    246 		return true;
    247 
    248 	// s/<regex>/[<replace>[/[<flags>]]]
    249 	case 's' =>
    250 		cmd.delim = match (strings::next(t)) {
    251 		case done =>
    252 			return ExpectedArgument;
    253 		case let r: rune =>
    254 			yield if (r == ' ') {
    255 				return InvalidDelimiter;
    256 			} else {
    257 				yield r;
    258 			};
    259 		};
    260 		cmd.arg1 = scan_item(t, cmd.delim).0;
    261 		match (strings::next(t)) {
    262 		case rune => void;
    263 		case done =>
    264 			return ExpectedDelimiter;
    265 		};
    266 		append(cmd.textinput, scan_item(t, cmd.delim).0);
    267 		match (strings::next(t)) {
    268 		case rune => void;
    269 		case done =>
    270 			if (strings::prev(t) == '\\')
    271 				return false
    272 			else
    273 				return true;
    274 		};
    275 		let (count, global, printmode) = scan_substitute_flags(t);
    276 		cmd.count = count;
    277 		cmd.flag_global = global;
    278 		cmd.printmode = printmode;
    279 		scan_end_assert(t)?;
    280 		return true;
    281 
    282 	case '&' =>
    283 		scan_end_assert(t)?;
    284 		return true;
    285 
    286 	case =>
    287 		abort();
    288 	};
    289 };
    290 
    291 fn scan_addrs(t: *strings::iterator) []Address = {
    292 	let addrs: []Address = [];
    293 	let specialfirst = false;
    294 
    295 	scan_blanks(t);
    296 	match (strings::next(t)) {
    297 	case done =>
    298 		return addrs;
    299 	case let r: rune =>
    300 		switch (r) {
    301 		case ',' =>
    302 			specialfirst = true;
    303 			append(addrs, Address{
    304 				addrform = 1z,
    305 				lineoffset = 0,
    306 				setcurrentline = false,
    307 			});
    308 		case ';' =>
    309 			specialfirst = true;
    310 			append(addrs, Address{
    311 				addrform = CurrentLine,
    312 				lineoffset = 0,
    313 				setcurrentline = true,
    314 			});
    315 		case =>
    316 			strings::prev(t);
    317 		};
    318 	};
    319 
    320 	for (true) {
    321 		let addr = match (scan_addr(t)) {
    322 		case void =>
    323 			yield if (specialfirst) {
    324 				yield Address{
    325 					addrform = LastLine,
    326 					lineoffset = 0,
    327 					...
    328 				};
    329 			} else if (len(addrs) > 0) {
    330 				yield addrs[len(addrs)-1];
    331 			} else {
    332 				break;
    333 			};
    334 		case let a: Address =>
    335 			yield a;
    336 		};
    337 
    338 		specialfirst = false;
    339 
    340 		scan_blanks(t);
    341 		match (strings::next(t)) {
    342 		case done =>
    343 			append(addrs, addr);
    344 			break;
    345 		case let r: rune =>
    346 			switch (r) {
    347 			case ',' =>
    348 				append(addrs, addr);
    349 			case ';' =>
    350 				addr.setcurrentline = true;
    351 				append(addrs, addr);
    352 			case =>
    353 				append(addrs, addr);
    354 				strings::prev(t);
    355 				break;
    356 			};
    357 		};
    358 	};
    359 
    360 	// debug("scan_addrs(): len(addrs)={}", len(addrs));
    361 
    362 	return addrs;
    363 };
    364 
    365 fn scan_addr(t: *strings::iterator) (Address | void) = {
    366 	scan_blanks(t);
    367 	let r = match (strings::next(t)) {
    368 	case done =>
    369 		return void;
    370 	case let r: rune =>
    371 		yield r;
    372 	};
    373 
    374 	// debug("scan_addr(): r={}", r);
    375 
    376 	const addrform: (AddressForm | void) = switch (r) {
    377 	case '.' =>
    378 		yield CurrentLine;
    379 	case '$' =>
    380 		yield LastLine;
    381 	case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
    382 		strings::prev(t);
    383 		yield scan_size(t);
    384 	case '\'' =>
    385 		yield scan_mark(t);
    386 	case '/' =>
    387 		const rad = RegexAddr{
    388 			expr = scan_item(t, '/').0,
    389 			direction = true,
    390 		};
    391 		strings::next(t);
    392 		yield rad;
    393 	case '?' =>
    394 		const rad = RegexAddr{
    395 			expr = scan_item(t, '?').0,
    396 			direction = false,
    397 		};
    398 		strings::next(t);
    399 		yield rad;
    400 	case =>
    401 		strings::prev(t);
    402 		yield void;
    403 	};
    404 
    405 	const offs = scan_offsets(t);
    406 
    407 	if (addrform is void && len(offs) == 0)
    408 		return void;
    409 
    410 	const addrform: AddressForm =
    411 		if (addrform is void)
    412 			CurrentLine
    413 		else
    414 			addrform as AddressForm;
    415 
    416 	let addr = Address{
    417 		addrform = addrform,
    418 		lineoffset = 0,
    419 		...
    420 	};
    421 
    422 	for (let i = 0z; i < len(offs); i += 1) {
    423 		addr.lineoffset += offs[i];
    424 	};
    425 
    426 	return addr;
    427 };
    428 
    429 fn scan_offsets(t: *strings::iterator) []int = {
    430 	let offs: []int = [];
    431 
    432 	for (true) {
    433 		scan_blanks(t);
    434 
    435 		match (strings::next(t)) {
    436 		case done =>
    437 			return offs;
    438 		case let r: rune =>
    439 			if (r == '+') {
    440 				append(offs, scan_offset(t));
    441 			} else if (r == '-') {
    442 				append(offs, -scan_offset(t));
    443 			} else if (ascii::isdigit(r)) {
    444 				strings::prev(t);
    445 				append(offs, scan_size(t): int);
    446 			} else {
    447 				strings::prev(t);
    448 				break;
    449 			};
    450 		};
    451 	};
    452 
    453 	return offs;
    454 };
    455 
    456 fn scan_offset(t: *strings::iterator) int = {
    457 	match (strings::next(t)) {
    458 	case done =>
    459 		return 1;
    460 	case let r: rune =>
    461 		strings::prev(t);
    462 		if (ascii::isdigit(r)) {
    463 			return scan_size(t): int;
    464 		} else {
    465 			return 1;
    466 		};
    467 	};
    468 };
    469 
    470 fn scan_cmdname(t: *strings::iterator) (rune | UnknownCommand) = {
    471 	scan_blanks(t);
    472 	let r = match (strings::next(t)) {
    473 	case done =>
    474 		return NUL;
    475 	case let r: rune =>
    476 		yield r;
    477 	};
    478 
    479 	switch (r) {
    480 	case
    481 	'a', 'c', 'd', 'e', 'E', 'f', 'g', 'G', 'h', 'H',
    482 	'i', 'j', 'k', 'l', 'm', 'n', 'p', 'P', 'q', 'Q',
    483 	'r', 's', 't', 'u', 'v', 'V', 'w', '=', '!', '&',
    484 	=>
    485 		return r;
    486 	case =>
    487 		return r: UnknownCommand;
    488 	};
    489 };
    490 
    491 fn scan_printmode(t: *strings::iterator) PrintMode = {
    492 	let r = match (strings::next(t)) {
    493 	case done =>
    494 		return PrintMode::NONE;
    495 	case let r: rune =>
    496 		yield r;
    497 	};
    498 
    499 	switch (r) {
    500 	case 'p' =>
    501 		return PrintMode::PLAIN;
    502 	case 'n' =>
    503 		return PrintMode::NUMBER;
    504 	case 'l' =>
    505 		return PrintMode::LIST;
    506 	case =>
    507 		strings::prev(t);
    508 		return PrintMode::NONE;
    509 	};
    510 };
    511 
    512 fn scan_rest(t: *strings::iterator) str = {
    513 	// TODO: just use [[strings::iterstr]]?
    514 	let rs: []rune = [];
    515 	for (let r: rune => strings::next(t)) {
    516 		append(rs, r);
    517 	};
    518 	return strings::trim(strings::fromrunes(rs));
    519 };
    520 
    521 fn scan_item(t: *strings::iterator, delim: rune) (str, bool) = {
    522 	let rs: []rune = [];
    523 	let seen_delim = false;
    524 	for (let r: rune => strings::next(t)) {
    525 		if (r == '\\') {
    526 			match (strings::next(t)) {
    527 			case done =>
    528 				break; // TODO: Error here? how?
    529 			case let r: rune =>
    530 				if (r == delim) {
    531 					seen_delim = true;
    532 					append(rs, r);
    533 				} else {
    534 					append(rs, ['\\', r]...);
    535 				};
    536 				continue;
    537 			};
    538 		} else if (r == delim) {
    539 			seen_delim = true;
    540 			strings::prev(t);
    541 			break;
    542 		};
    543 		append(rs, r);
    544 	};
    545 	return (strings::fromrunes(rs), seen_delim);
    546 };
    547 
    548 fn scan_mark(t: *strings::iterator) rune = {
    549 	match (strings::next(t)) {
    550 	case done =>
    551 		abort(); // TODO: aborts?
    552 	case let r: rune =>
    553 		if (ascii::isalpha(r)) { // TODO: cover all mark chars
    554 			return r;
    555 		} else {
    556 			abort();
    557 		};
    558 	};
    559 };
    560 
    561 fn scan_substitute_flags(t: *strings::iterator) (size, bool, PrintMode) = {
    562 	let count = 0z;
    563 	let global = false;
    564 	let printmode = PrintMode::NONE;
    565 
    566 	for (let r => (strings::next(t))) {
    567 		switch (r) {
    568 		case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' =>
    569 			strings::prev(t);
    570 			count = scan_size(t);
    571 		case 'g' =>
    572 			global = true;
    573 		case 'p' =>
    574 			printmode = PrintMode::PLAIN;
    575 		case 'n' =>
    576 			printmode = PrintMode::NUMBER;
    577 		case 'l' =>
    578 			printmode = PrintMode::LIST;
    579 		case =>
    580 			break;
    581 		};
    582 	};
    583 
    584 	return (count, global, printmode);
    585 };
    586 
    587 fn scan_size(t: *strings::iterator) size = {
    588 	let begin = *t;
    589 	// reimplement this function using another iterator
    590 	for (let r => (strings::next(t))) {
    591 		if (!ascii::isdigit(r)) {
    592 			strings::prev(t);
    593 			break;
    594 		};
    595 	};
    596 
    597 	let num = strings::slice(&begin, t);
    598 
    599 	// TODO: return void instead?
    600 	if (num == "") {
    601 		return 0z;
    602 	};
    603 
    604 	match (strconv::stoz(num)) {
    605 	case (strconv::invalid | strconv::overflow) =>
    606 		abort("Invalid"); // TODO: propagate?
    607 	case let z: size =>
    608 		return z;
    609 	};
    610 };
    611 
    612 fn scan_blanks(t: *strings::iterator) size = {
    613 	let sz = 0z; // runes, not bytes
    614 	for (let r: rune => strings::next(t)) {
    615 		if (!ascii::isblank(r)) {
    616 			strings::prev(t);
    617 			break;
    618 		};
    619 		sz += 1;
    620 	};
    621 	return sz;
    622 };
    623 
    624 fn scan_end_assert(t: *strings::iterator) (void | TrailingCharacters) = {
    625 	scan_blanks(t);
    626 	match (strings::next(t)) {
    627 	case rune =>
    628 		return TrailingCharacters;
    629 	case done =>
    630 		return void;
    631 	};
    632 };