ed

[hare] The standard editor
Log | Files | Refs | README | LICENSE

parse.ha (9113B)


      1 use ascii;
      2 use regex;
      3 use strconv;
      4 use strings;
      5 
      6 type ParseError = (
      7 	UnknownCommand
      8 	| UnexpectedSuffix
      9 	| TrailingCharacters
     10 	| ExpectedArgument
     11 	| ExpectedMark
     12 	| InvalidDelimiter
     13 	| ExpectedDelimiter
     14 );
     15 
     16 type UnknownCommand = !rune;
     17 
     18 type UnexpectedSuffix = !rune;
     19 
     20 type TrailingCharacters = !void;
     21 
     22 type ExpectedArgument = !void;
     23 
     24 type ExpectedMark = !void;
     25 
     26 type InvalidDelimiter = !void;
     27 
     28 type ExpectedDelimiter = !void;
     29 
     30 // Parses inputted commands. Returns true when command is ready.
     31 fn parse(cmd: *Command, input: str) (bool | ParseError) = {
     32 	const iter = strings::iter(input);
     33 
     34 	cmd.addrs = scan_addrs(&iter);
     35 	cmd.cmdname = scan_cmdname(&iter)?;
     36 
     37 	switch (cmd.cmdname) {
     38 	// [ ]
     39 	case '\x00' =>
     40 		return true;
     41 
     42 	// (q|Q)
     43 	case 'q', 'Q' =>
     44 		scan_end_assert(&iter)?;
     45 		return true;
     46 
     47 	// .[ <file>]
     48 	case 'e', 'E', 'f', 'r', 'w' =>
     49 		if (scan_blanks(&iter) == 0) {
     50 			match (strings::next(&iter)) {
     51 			case let r: rune =>
     52 				return r: UnexpectedSuffix;
     53 			case void =>
     54 				return true;
     55 			};
     56 		} else {
     57 			cmd.arg = scan_rest(&iter);
     58 			return true;
     59 		};
     60 
     61 	// k<x>
     62 	case 'k' =>
     63 		match (strings::next(&iter)) {
     64 		case let r: rune =>
     65 			cmd.arg = strings::fromrunes([r]);
     66 		case void =>
     67 			return ExpectedMark;
     68 		};
     69 		scan_end_assert(&iter)?;
     70 		return true;
     71 
     72 	// !<shellcmd>
     73 	case '!' =>
     74 		cmd.arg = scan_rest(&iter);
     75 		return true;
     76 
     77 	// .[s]   where 's' is '(l|n|p)'
     78 	case 'd', 'h', 'H', 'j', 'l', 'n', 'p', 'P', 'u', '=' =>
     79 		cmd.printmode = scan_suffix(&iter);
     80 		scan_end_assert(&iter)?;
     81 		return true;
     82 
     83 	// .[s]
     84 	case 'a', 'c', 'i' =>
     85 		cmd.printmode = scan_suffix(&iter);
     86 		scan_end_assert(&iter)?;
     87 		return false;
     88 
     89 	// .[s][ ]<addr>
     90 	case 'm', 't' =>
     91 		cmd.printmode = scan_suffix(&iter);
     92 		cmd.arg = scan_rest(&iter);
     93 		return true;
     94 
     95 	// ./<regex>[/]   where delimiter '/' is arbitrary
     96 	case 'G', 'V' =>
     97 		const delim = match (strings::next(&iter)) {
     98 		case void =>
     99 			return ExpectedArgument;
    100 		case let r: rune =>
    101 			yield if (r == ' ') {
    102 				return InvalidDelimiter;
    103 			} else {
    104 				yield r;
    105 			};
    106 		};
    107 		cmd.arg = scan_item(&iter, delim);
    108 		strings::next(&iter); // scan delimiter if exists
    109 		scan_end_assert(&iter)?;
    110 		return true;
    111 
    112 	// s/<regex>/[<replace>[/[<flags>]]]
    113 	case 's' =>
    114 		const delim = match (strings::next(&iter)) {
    115 		case void =>
    116 			return ExpectedArgument;
    117 		case let r: rune =>
    118 			yield if (r == ' ') {
    119 				return InvalidDelimiter;
    120 			} else {
    121 				yield r;
    122 			};
    123 		};
    124 		cmd.arg = scan_item(&iter, delim);
    125 		match (strings::next(&iter)) {
    126 		case rune => void;
    127 		case void =>
    128 			return ExpectedDelimiter;
    129 		};
    130 		cmd.arg2 = scan_item(&iter, delim);
    131 		match (strings::next(&iter)) {
    132 		case rune => void;
    133 		case void =>
    134 			return true;
    135 		};
    136 		cmd.arg3 = scan_rest(&iter); // TODO: scan properly here?
    137 		return true;
    138 
    139 	// ./<regex>/<cmdlist...>
    140 	case 'g', 'v' =>
    141 		abort("TODO: parse: global, invglobal");
    142 
    143 	case =>
    144 		abort();
    145 	};
    146 };
    147 
    148 fn scan_addrs(iter: *strings::iterator) []Address = {
    149 	let addrs: []Address = [];
    150 	let specialfirst = false;
    151 
    152 	scan_blanks(iter);
    153 	match (strings::next(iter)) {
    154 	case void =>
    155 		return addrs;
    156 	case let r: rune =>
    157 		switch (r) {
    158 		case ',' =>
    159 			specialfirst = true;
    160 			append(addrs, Address {
    161 				addrtype = 1z,
    162 				lineoffset = 0,
    163 				setcurrentline = false,
    164 			});
    165 		case ';' =>
    166 			specialfirst = true;
    167 			append(addrs, Address {
    168 				addrtype = CurrentLine,
    169 				lineoffset = 0,
    170 				setcurrentline = true,
    171 			});
    172 		case =>
    173 			strings::prev(iter);
    174 		};
    175 	};
    176 
    177 	for (true) {
    178 		let addr = match (scan_addr(iter)) {
    179 		case void =>
    180 			yield if (specialfirst) {
    181 				yield Address {
    182 					addrtype = LastLine,
    183 					lineoffset = 0,
    184 					...
    185 				};
    186 			} else if (len(addrs) > 0) {
    187 				yield addrs[len(addrs)-1];
    188 			} else {
    189 				break;
    190 			};
    191 		case let a: Address =>
    192 			yield a;
    193 		};
    194 
    195 		specialfirst = false;
    196 
    197 		scan_blanks(iter);
    198 		match (strings::next(iter)) {
    199 		case void =>
    200 			append(addrs, addr);
    201 			break;
    202 		case let r: rune =>
    203 			switch (r) {
    204 			case ',' =>
    205 				append(addrs, addr);
    206 			case ';' =>
    207 				addr.setcurrentline = true;
    208 				append(addrs, addr);
    209 			case =>
    210 				append(addrs, addr);
    211 				strings::prev(iter);
    212 				break;
    213 			};
    214 		};
    215 	};
    216 
    217 	// debug("scan_addrs(): len(addrs)={}", len(addrs));
    218 
    219 	return addrs;
    220 };
    221 
    222 fn scan_addr(iter: *strings::iterator) (Address | void) = {
    223 	scan_blanks(iter);
    224 	let r = match (strings::next(iter)) {
    225 	case void =>
    226 		return void;
    227 	case let r: rune =>
    228 		yield r;
    229 	};
    230 
    231 	// debug("scan_addr(): r={}", r);
    232 
    233 	const addrtype: (AddressType | void) =
    234 		if (r == '.') {
    235 			yield CurrentLine;
    236 		} else if (r == '$') {
    237 			yield LastLine;
    238 		} else if (ascii::isdigit(r)) {
    239 			strings::prev(iter);
    240 			yield scan_uint(iter): size;
    241 		} else if (r == '\'') {
    242 			yield scan_mark(iter);
    243 		} else if (r == '/') {
    244 			const rad = RegexAddr {
    245 				expr = scan_item(iter, '/'),
    246 				direction = true,
    247 			};
    248 			strings::next(iter);
    249 			yield rad;
    250 		} else if (r == '?') {
    251 			const rad = RegexAddr {
    252 				expr = scan_item(iter, '?'),
    253 				direction = false,
    254 			};
    255 			strings::next(iter);
    256 			yield rad;
    257 		} else {
    258 			strings::prev(iter);
    259 			yield void;
    260 		};
    261 
    262 	const offs = scan_offsets(iter);
    263 
    264 	const addrtype: AddressType = match (addrtype) {
    265 	case void =>
    266 		yield if (len(offs) == 0) {
    267 			return void;
    268 		} else {
    269 			yield CurrentLine;
    270 		};
    271 	case =>
    272 		yield addrtype as AddressType;
    273 	};
    274 
    275 	let addr = Address {
    276 		addrtype = addrtype,
    277 		lineoffset = 0,
    278 		...
    279 	};
    280 
    281 	for (let i = 0z; i < len(offs); i += 1) {
    282 		addr.lineoffset += offs[i];
    283 	};
    284 
    285 	return addr;
    286 };
    287 
    288 fn scan_offsets(iter: *strings::iterator) []int = {
    289 	let offs: []int = [];
    290 
    291 	for (true) {
    292 		scan_blanks(iter);
    293 
    294 		match (strings::next(iter)) {
    295 		case void =>
    296 			return offs;
    297 		case let r: rune =>
    298 			if (r == '+') {
    299 				append(offs, scan_offset(iter));
    300 			} else if (r == '-') {
    301 				append(offs, -scan_offset(iter));
    302 			} else if (ascii::isdigit(r)) {
    303 				strings::prev(iter);
    304 				append(offs, scan_uint(iter): int);
    305 			} else {
    306 				strings::prev(iter);
    307 				break;
    308 			};
    309 		};
    310 	};
    311 
    312 	return offs;
    313 };
    314 
    315 fn scan_offset(iter: *strings::iterator) int = {
    316 	match (strings::next(iter)) {
    317 	case void =>
    318 		return 1;
    319 	case let r: rune =>
    320 		strings::prev(iter);
    321 		if (ascii::isdigit(r)) {
    322 			return scan_uint(iter): int;
    323 		} else {
    324 			return 1;
    325 		};
    326 	};
    327 };
    328 
    329 fn scan_cmdname(iter: *strings::iterator) (rune | UnknownCommand) = {
    330 	scan_blanks(iter);
    331 	let r = match (strings::next(iter)) {
    332 	case void =>
    333 		return '\x00';
    334 	case let r: rune =>
    335 		yield r;
    336 	};
    337 
    338 	switch (r) {
    339 	case
    340 	'a', 'c', 'd', 'e', 'E', 'f', 'g', 'G', 'h', 'H',
    341 	'i', 'j', 'k', 'l', 'm', 'n', 'p', 'P', 'q', 'Q',
    342 	'r', 's', 't', 'u', 'v', 'V', 'w', '=', '!',
    343 	=>
    344 		return r;
    345 	case =>
    346 		return r: UnknownCommand;
    347 	};
    348 };
    349 
    350 fn scan_suffix(iter: *strings::iterator) PrintMode = {
    351 	let r = match (strings::next(iter)) {
    352 	case void =>
    353 		return PrintMode::NONE;
    354 	case let r: rune =>
    355 		yield r;
    356 	};
    357 
    358 	switch (r) {
    359 	case 'l' =>
    360 		return PrintMode::LIST;
    361 	case 'n' =>
    362 		return PrintMode::NUMBER;
    363 	case 'p' =>
    364 		return PrintMode::PRINT;
    365 	case =>
    366 		strings::prev(iter);
    367 		return PrintMode::NONE;
    368 	};
    369 };
    370 
    371 fn scan_rest(iter: *strings::iterator) str = {
    372 	// TODO: just use [[strings::iterstr]]?
    373 	let rs: []rune = [];
    374 	for (true) {
    375 		match (strings::next(iter)) {
    376 		case void =>
    377 			break;
    378 		case let r: rune =>
    379 			append(rs, r);
    380 		};
    381 	};
    382 	return strings::trim(strings::fromrunes(rs));
    383 };
    384 
    385 fn scan_item(iter: *strings::iterator, end: rune) str = {
    386 	let rs: []rune = [];
    387 	for (true) {
    388 		let r = match (strings::next(iter)) {
    389 		case void =>
    390 			break;
    391 		case let r: rune =>
    392 			yield r;
    393 		};
    394 		if (r == '\\') {
    395 			match (strings::next(iter)) {
    396 			case void =>
    397 				break; // TODO: Error here? how?
    398 			case let r: rune =>
    399 				if (r == end) {
    400 					append(rs, r);
    401 				} else {
    402 					append(rs, ['\\', r]...);
    403 				};
    404 				continue;
    405 			};
    406 		} else if (r == end) {
    407 			strings::prev(iter);
    408 			break;
    409 		};
    410 		append(rs, r);
    411 	};
    412 	return strings::fromrunes(rs);
    413 };
    414 
    415 fn scan_mark(iter: *strings::iterator) rune = {
    416 	match (strings::next(iter)) {
    417 	case void =>
    418 		abort(); // TODO: aborts?
    419 	case let r: rune =>
    420 		if (ascii::isalpha(r)) { // TODO: cover all mark chars
    421 			return r;
    422 		} else {
    423 			abort();
    424 		};
    425 	};
    426 };
    427 
    428 // TODO: rename and appropriate to "scan_size()"?
    429 fn scan_uint(iter: *strings::iterator) uint = {
    430 	let num: []u8 = [];
    431 	defer free(num);
    432 	for (true) {
    433 		let r = match (strings::next(iter)) {
    434 		case void =>
    435 			break;
    436 		case let r: rune =>
    437 			yield r;
    438 		};
    439 
    440 		if (ascii::isdigit(r)) {
    441 			append(num, r: u32: u8);
    442 		} else {
    443 			strings::prev(iter);
    444 			break;
    445 		};
    446 	};
    447 
    448 	if (len(num) == 0) {
    449 		return 0;
    450 	};
    451 
    452 	match (strconv::stou(strings::fromutf8(num)!)) {
    453 	case (strconv::invalid | strconv::overflow) =>
    454 		abort("Invalid");
    455 	case let u: uint =>
    456 		return u;
    457 	};
    458 };
    459 
    460 fn scan_blanks(iter: *strings::iterator) size = {
    461 	let sz = 0z; // runes, not bytes
    462 	for (true) {
    463 		match (strings::next(iter)) {
    464 		case void =>
    465 			break;
    466 		case let r: rune =>
    467 			if (!ascii::isblank(r)) {
    468 				strings::prev(iter);
    469 				break;
    470 			};
    471 			sz += 1;
    472 		};
    473 	};
    474 	return sz;
    475 };
    476 
    477 fn scan_end_assert(iter: *strings::iterator) (void | TrailingCharacters) = {
    478 	scan_blanks(iter);
    479 	match (strings::next(iter)) {
    480 	case rune =>
    481 		return TrailingCharacters;
    482 	case void =>
    483 		return void;
    484 	};
    485 };