commit 44d5fc5b2da3c3b54bcd932869380d8a40213dcc
parent ee411a91e24bf048158f117c01e9e7f506347183
Author: Alex McGrath <amk@amk.ie>
Date: Mon, 25 Apr 2022 22:31:11 +0100
uniq: new command
This implements most of uniq excluding the `-f` flag
spec: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html
Diffstat:
M | .gitignore | | | 1 | + |
M | Makefile | | | 2 | ++ |
A | uniq.ha | | | 159 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 162 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,5 @@ sleep
tee
true
uname
+uniq
wc
diff --git a/Makefile b/Makefile
@@ -17,6 +17,7 @@ utils=\
tee \
true \
uname \
+ uniq \
wc
all: $(utils)
@@ -43,4 +44,5 @@ sleep: sleep.ha main/main.ha
tee: tee.ha main/main.ha
true: true.ha
uname: uname.ha main/main.ha
+uniq: uniq.ha main/main.ha
wc: wc.ha main/main.ha
diff --git a/uniq.ha b/uniq.ha
@@ -0,0 +1,159 @@
+use fmt;
+use strings;
+use bufio;
+use main;
+use os;
+use io;
+use getopt;
+use strconv;
+use fs;
+
+type config = struct {
+ showcount: bool,
+ removesingletons: bool,
+ skipchars: uint,
+ onlyunique: bool,
+
+ input: io::handle,
+ output: io::handle,
+};
+
+fn maybeprint(cfg: *config, line: str, count: int) void = {
+ if (count == 1 && cfg.removesingletons) {
+ return;
+ };
+ if (count != 1 && cfg.onlyunique) {
+ return;
+ };
+ if (cfg.showcount) {
+ fmt::fprintln(cfg.output, count, line)!;
+ return;
+ };
+ fmt::fprintln(cfg.output, line)!;
+};
+
+fn comparisonstring(cfg: *config, line: str) str = {
+ if (cfg.skipchars > len(line)) {
+ return "";
+ };
+ return strings::sub(line, cfg.skipchars, strings::end);
+};
+
+fn readline(input: io::handle) (str | io::error | io::EOF) = {
+ const rawline = match (bufio::scanline(input)) {
+ case let err: io::error =>
+ return err;
+ case io::EOF =>
+ return io::EOF;
+ case let rawline: []u8 =>
+ yield rawline;
+ };
+ defer free(rawline);
+ return strings::dup(strings::fromutf8(rawline));
+};
+
+fn uniq(cfg: *config) (main::error | void) = {
+ let lastline = match(readline(cfg.input)) {
+ case let err: io::error =>
+ return err;
+ case io::EOF =>
+ return;
+ case let s: str =>
+ yield s;
+ };
+ defer free(lastline);
+ let count = 1;
+ for(true) {
+ const line = match(readline(cfg.input)) {
+ case let err: io::error =>
+ return err;
+ case io::EOF =>
+ maybeprint(cfg, lastline, count);
+ return;
+ case let s: str =>
+ yield s;
+ };
+
+ let oldcomparison = comparisonstring(cfg, lastline);
+ let newcomparison = comparisonstring(cfg, line);
+ if (oldcomparison != newcomparison) {
+ maybeprint(cfg, lastline, count);
+ free(lastline);
+ lastline = strings::dup(line);
+ count = 1;
+ continue;
+ };
+ count += 1;
+ };
+};
+
+export fn utilmain() (main::error | void) = {
+ const help: []getopt::help = [
+ "report or filter out repeated lines in a file",
+ ('c', "prefix line with occurance count"),
+ ('d', "remove lines without duplicates"),
+ ('f', "fields", "skip comparing the first N fields"),
+ ('s', "chars", "skip the first N characters"),
+ ('u', "remove lines with duplicates"),
+ "[input file [output file]]",
+ ];
+ const cmd = getopt::parse(os::args, help...);
+ defer getopt::finish(&cmd);
+
+ let cfg = config{...};
+
+ for (let i = 0z; i < len(cmd.opts); i += 1) {
+ const opt = cmd.opts[i];
+ switch (opt.0) {
+ case 'c' =>
+ cfg.showcount = true;
+ case 'd' =>
+ cfg.removesingletons = true;
+ case 'f' =>
+ fmt::fatal("Unimplemented");
+ case 's' =>
+ cfg.skipchars = match (strconv::stou(opt.1)) {
+ case (strconv::invalid | strconv::overflow) =>
+ getopt::printusage(os::stderr, os::args[0], help);
+ fmt::fatal("Error: invalid argument for -s");
+ case let skipchars: uint =>
+ yield skipchars;
+ };
+ case 'u' =>
+ cfg.onlyunique = true;
+ case => abort();
+ };
+ };
+
+ static const stdin_rbuf: [os::BUFSIZ]u8 = [0...];
+ static const stdin_wbuf: [os::BUFSIZ]u8 = [0...];
+ cfg.input = os::stdin;
+ if (len(cmd.args) >= 1 && cmd.args[0] != "-") {
+ match (os::open(cmd.args[0])) {
+ case let err: fs::error =>
+ getopt::printusage(os::stderr, os::args[0], help);
+ fmt::fatal("Error opening '{}': {}",
+ cmd.args[0], fs::strerror(err));
+ case let file: io::file =>
+ cfg.input = &bufio::buffered(file, stdin_rbuf, stdin_wbuf);
+ };
+ };
+ defer io::close(cfg.input)!;
+
+ cfg.output = os::stdout;
+ static const stdout_rbuf: [os::BUFSIZ]u8 = [0...];
+ static const stdout_wbuf: [os::BUFSIZ]u8 = [0...];
+ if (len(cmd.args) == 2) {
+ match (os::create(cmd.args[1], 0o666, fs::flags::WRONLY)) {
+ case let err: fs::error =>
+ getopt::printusage(os::stderr, os::args[0], help);
+ fmt::fatal("Error opening '{}': {}",
+ cmd.args[1], fs::strerror(err));
+ case let file: io::file =>
+ cfg.output = &bufio::buffered(file, stdout_rbuf, stdout_wbuf);
+ };
+ };
+ defer io::close(cfg.output)!;
+
+ uniq(&cfg)?;
+};