uniq.ha (3790B)
1 use encoding::utf8; 2 use fmt; 3 use strings; 4 use bufio; 5 use main; 6 use os; 7 use io; 8 use getopt; 9 use strconv; 10 use fs; 11 12 type config = struct { 13 showcount: bool, 14 removesingletons: bool, 15 skipchars: uint, 16 onlyunique: bool, 17 18 input: io::handle, 19 output: io::handle, 20 }; 21 22 fn maybeprint(cfg: *config, line: str, count: int) void = { 23 if (count == 1 && cfg.removesingletons) { 24 return; 25 }; 26 if (count != 1 && cfg.onlyunique) { 27 return; 28 }; 29 if (cfg.showcount) { 30 fmt::fprintln(cfg.output, count, line)!; 31 return; 32 }; 33 fmt::fprintln(cfg.output, line)!; 34 }; 35 36 fn comparisonstring(cfg: *config, line: str) str = { 37 if (cfg.skipchars > len(line)) { 38 return ""; 39 }; 40 return strings::sub(line, cfg.skipchars, strings::end); 41 }; 42 43 fn readline(input: io::handle) (str | io::error | io::EOF) = { 44 const rawline = match (bufio::read_line(input)) { 45 case let err: io::error => 46 return err; 47 case io::EOF => 48 return io::EOF; 49 case let rawline: []u8 => 50 yield rawline; 51 }; 52 defer free(rawline); 53 match (strings::fromutf8(rawline)) { 54 case let s: str => 55 return strings::dup(s); 56 case encoding::utf8::invalid => 57 fmt::fatal("Error: Invalid UTF-8 input"); 58 }; 59 }; 60 61 fn uniq(cfg: *config) (main::error | void) = { 62 let lastline = match(readline(cfg.input)) { 63 case let err: io::error => 64 return err; 65 case io::EOF => 66 return; 67 case let s: str => 68 yield s; 69 }; 70 defer free(lastline); 71 let count = 1; 72 for(true) { 73 const line = match(readline(cfg.input)) { 74 case let err: io::error => 75 return err; 76 case io::EOF => 77 maybeprint(cfg, lastline, count); 78 return; 79 case let s: str => 80 yield s; 81 }; 82 83 let oldcomparison = comparisonstring(cfg, lastline); 84 let newcomparison = comparisonstring(cfg, line); 85 if (oldcomparison != newcomparison) { 86 maybeprint(cfg, lastline, count); 87 free(lastline); 88 lastline = strings::dup(line); 89 count = 1; 90 continue; 91 }; 92 count += 1; 93 }; 94 }; 95 96 export fn utilmain() (main::error | void) = { 97 const help: []getopt::help = [ 98 "report or filter out repeated lines in a file", 99 ('c', "prefix line with occurance count"), 100 ('d', "remove lines without duplicates"), 101 ('f', "fields", "skip comparing the first N fields"), 102 ('s', "chars", "skip the first N characters"), 103 ('u', "remove lines with duplicates"), 104 "[input file [output file]]", 105 ]; 106 const cmd = getopt::parse(os::args, help...); 107 defer getopt::finish(&cmd); 108 109 let cfg = config { 110 input = os::stdin, 111 output = os::stdout, 112 ... 113 }; 114 115 for (let i = 0z; i < len(cmd.opts); i += 1) { 116 const opt = cmd.opts[i]; 117 switch (opt.0) { 118 case 'c' => 119 cfg.showcount = true; 120 case 'd' => 121 cfg.removesingletons = true; 122 case 'f' => 123 fmt::fatal("Unimplemented"); 124 case 's' => 125 cfg.skipchars = match (strconv::stou(opt.1)) { 126 case (strconv::invalid | strconv::overflow) => 127 main::usage(help, 's'); 128 case let skipchars: uint => 129 yield skipchars; 130 }; 131 case 'u' => 132 cfg.onlyunique = true; 133 case => abort(); 134 }; 135 }; 136 137 static const stdin_rbuf: [os::BUFSZ]u8 = [0...]; 138 static const stdin_wbuf: [os::BUFSZ]u8 = [0...]; 139 if (len(cmd.args) >= 1 && cmd.args[0] != "-") { 140 match (os::open(cmd.args[0])) { 141 case let err: fs::error => 142 fmt::fatalf("Error opening '{}': {}", 143 cmd.args[0], fs::strerror(err)); 144 case let file: io::file => 145 cfg.input = &bufio::init(file, stdin_rbuf, stdin_wbuf); 146 }; 147 }; 148 defer io::close(cfg.input)!; 149 150 static const stdout_rbuf: [os::BUFSZ]u8 = [0...]; 151 static const stdout_wbuf: [os::BUFSZ]u8 = [0...]; 152 if (len(cmd.args) == 2) { 153 match (os::create(cmd.args[1], 0o666, fs::flag::WRONLY)) { 154 case let err: fs::error => 155 fmt::fatalf("Error opening '{}': {}", 156 cmd.args[1], fs::strerror(err)); 157 case let file: io::file => 158 cfg.output = &bufio::init(file, stdout_rbuf, stdout_wbuf); 159 }; 160 }; 161 defer io::close(cfg.output)!; 162 163 uniq(&cfg)?; 164 };