hautils

[hare] Set of POSIX utilities
Log | Files | Refs | README | LICENSE

uniq.ha (3790B)


      1 use encoding::utf8;
      2 use fmt;
      3 use strings;
      4 use bufio;
      5 use main;
      6 use os;
      7 use io;
      8 use getopt;
      9 use strconv;
     10 use fs;
     11 
     12 type config = struct {
     13 	showcount: bool,
     14 	removesingletons: bool,
     15 	skipchars: uint,
     16 	onlyunique: bool,
     17 
     18 	input: io::handle,
     19 	output: io::handle,
     20 };
     21 
     22 fn maybeprint(cfg: *config, line: str, count: int) void = {
     23 	if (count == 1 && cfg.removesingletons) {
     24 		return;
     25 	};
     26 	if (count != 1 && cfg.onlyunique) {
     27 		return;
     28 	};
     29 	if (cfg.showcount) {
     30 		fmt::fprintln(cfg.output, count, line)!;
     31 		return;
     32 	};
     33 	fmt::fprintln(cfg.output, line)!;
     34 };
     35 
     36 fn comparisonstring(cfg: *config, line: str) str = {
     37 	if (cfg.skipchars > len(line)) {
     38 		return "";
     39 	};
     40 	return strings::sub(line, cfg.skipchars, strings::end);
     41 };
     42 
     43 fn readline(input: io::handle) (str | io::error | io::EOF) = {
     44 	const rawline = match (bufio::read_line(input)) {
     45 	case let err: io::error =>
     46 		return err;
     47 	case io::EOF =>
     48 		return io::EOF;
     49 	case let rawline: []u8 =>
     50 		yield rawline;
     51 	};
     52 	defer free(rawline);
     53 	match (strings::fromutf8(rawline)) {
     54 	case let s: str =>
     55 		return strings::dup(s);
     56 	case  encoding::utf8::invalid =>
     57 		fmt::fatal("Error: Invalid UTF-8 input");
     58 	};
     59 };
     60 
     61 fn uniq(cfg: *config)  (main::error | void) =  {
     62 	let lastline = match(readline(cfg.input)) {
     63 	case let err: io::error =>
     64 		return err;
     65 	case io::EOF =>
     66 		return;
     67 	case let s: str =>
     68 	     yield s;
     69 	};
     70 	defer free(lastline);
     71 	let count = 1;
     72 	for(true) {
     73 		const line = match(readline(cfg.input)) {
     74 		case let err: io::error =>
     75 			return err;
     76 		case io::EOF =>
     77 			maybeprint(cfg, lastline, count);
     78 			return;
     79 		case let s: str =>
     80 			yield s;
     81 		};
     82 
     83 		let oldcomparison = comparisonstring(cfg, lastline);
     84 		let newcomparison = comparisonstring(cfg, line);
     85 		if (oldcomparison != newcomparison) {
     86 			maybeprint(cfg, lastline, count);
     87 			free(lastline);
     88 			lastline = strings::dup(line);
     89 			count = 1;
     90 			continue;
     91 		};
     92 		count += 1;
     93 	};
     94 };
     95 
     96 export fn utilmain() (main::error | void) = {
     97 	const help: []getopt::help = [
     98 		"report or filter out repeated lines in a file",
     99 		('c', "prefix line with occurance count"),
    100 		('d', "remove lines without duplicates"),
    101 		('f', "fields", "skip comparing the first N fields"),
    102 		('s', "chars", "skip the first N characters"),
    103 		('u', "remove lines with duplicates"),
    104 		"[input file [output file]]",
    105 	];
    106 	const cmd = getopt::parse(os::args, help...);
    107 	defer getopt::finish(&cmd);
    108 
    109 	let cfg = config {
    110 		input = os::stdin,
    111 		output = os::stdout,
    112 		...
    113 	};
    114 
    115 	for (let i = 0z; i < len(cmd.opts); i += 1) {
    116 		const opt = cmd.opts[i];
    117 		switch (opt.0) {
    118 		case 'c' =>
    119 			cfg.showcount = true;
    120 		case 'd' =>
    121 			cfg.removesingletons = true;
    122 		case 'f' =>
    123 			fmt::fatal("Unimplemented");
    124 		case 's' =>
    125 			cfg.skipchars = match (strconv::stou(opt.1)) {
    126 			case (strconv::invalid | strconv::overflow) =>
    127 				main::usage(help, 's');
    128 			case let skipchars: uint =>
    129 				yield skipchars;
    130 			};
    131 		case 'u' =>
    132 			cfg.onlyunique = true;
    133 		case => abort();
    134 		};
    135 	};
    136 
    137 	static const stdin_rbuf: [os::BUFSZ]u8 = [0...];
    138 	static const stdin_wbuf: [os::BUFSZ]u8 = [0...];
    139 	if (len(cmd.args) >= 1 && cmd.args[0] != "-") {
    140 		match (os::open(cmd.args[0])) {
    141 		case let err: fs::error =>
    142 			fmt::fatalf("Error opening '{}': {}",
    143 				cmd.args[0], fs::strerror(err));
    144 		case let file: io::file =>
    145 			cfg.input = &bufio::init(file, stdin_rbuf, stdin_wbuf);
    146 		};
    147 	};
    148 	defer io::close(cfg.input)!;
    149 
    150 	static const stdout_rbuf: [os::BUFSZ]u8 = [0...];
    151 	static const stdout_wbuf: [os::BUFSZ]u8 = [0...];
    152 	if (len(cmd.args) == 2) {
    153 		match (os::create(cmd.args[1], 0o666, fs::flag::WRONLY)) {
    154 		case let err: fs::error =>
    155 			fmt::fatalf("Error opening '{}': {}",
    156 				cmd.args[1], fs::strerror(err));
    157 		case let file: io::file =>
    158 			cfg.output = &bufio::init(file, stdout_rbuf, stdout_wbuf);
    159 		};
    160 	};
    161 	defer io::close(cfg.output)!;
    162 
    163 	uniq(&cfg)?;
    164 };