hautils

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README | LICENSE

commit 44d5fc5b2da3c3b54bcd932869380d8a40213dcc
parent ee411a91e24bf048158f117c01e9e7f506347183
Author: Alex McGrath <amk@amk.ie>
Date:   Mon, 25 Apr 2022 22:31:11 +0100

uniq: new command

This implements most of uniq excluding the `-f` flag

spec: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/uniq.html

Diffstat:
M.gitignore | 1+
MMakefile | 2++
Auniq.ha | 159+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 162 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -11,4 +11,5 @@ sleep tee true uname +uniq wc diff --git a/Makefile b/Makefile @@ -17,6 +17,7 @@ utils=\ tee \ true \ uname \ + uniq \ wc all: $(utils) @@ -43,4 +44,5 @@ sleep: sleep.ha main/main.ha tee: tee.ha main/main.ha true: true.ha uname: uname.ha main/main.ha +uniq: uniq.ha main/main.ha wc: wc.ha main/main.ha diff --git a/uniq.ha b/uniq.ha @@ -0,0 +1,159 @@ +use fmt; +use strings; +use bufio; +use main; +use os; +use io; +use getopt; +use strconv; +use fs; + +type config = struct { + showcount: bool, + removesingletons: bool, + skipchars: uint, + onlyunique: bool, + + input: io::handle, + output: io::handle, +}; + +fn maybeprint(cfg: *config, line: str, count: int) void = { + if (count == 1 && cfg.removesingletons) { + return; + }; + if (count != 1 && cfg.onlyunique) { + return; + }; + if (cfg.showcount) { + fmt::fprintln(cfg.output, count, line)!; + return; + }; + fmt::fprintln(cfg.output, line)!; +}; + +fn comparisonstring(cfg: *config, line: str) str = { + if (cfg.skipchars > len(line)) { + return ""; + }; + return strings::sub(line, cfg.skipchars, strings::end); +}; + +fn readline(input: io::handle) (str | io::error | io::EOF) = { + const rawline = match (bufio::scanline(input)) { + case let err: io::error => + return err; + case io::EOF => + return io::EOF; + case let rawline: []u8 => + yield rawline; + }; + defer free(rawline); + return strings::dup(strings::fromutf8(rawline)); +}; + +fn uniq(cfg: *config) (main::error | void) = { + let lastline = match(readline(cfg.input)) { + case let err: io::error => + return err; + case io::EOF => + return; + case let s: str => + yield s; + }; + defer free(lastline); + let count = 1; + for(true) { + const line = match(readline(cfg.input)) { + case let err: io::error => + return err; + case io::EOF => + maybeprint(cfg, lastline, count); + return; + case let s: str => + yield s; + }; + + let oldcomparison = comparisonstring(cfg, lastline); + let newcomparison = comparisonstring(cfg, line); + if (oldcomparison != newcomparison) { + maybeprint(cfg, lastline, count); + free(lastline); + lastline = strings::dup(line); + count = 1; + continue; + }; + count += 1; + }; +}; + +export fn utilmain() (main::error | void) = { + const help: []getopt::help = [ + "report or filter out repeated lines in a file", + ('c', "prefix line with occurance count"), + ('d', "remove lines without duplicates"), + ('f', "fields", "skip comparing the first N fields"), + ('s', "chars", "skip the first N characters"), + ('u', "remove lines with duplicates"), + "[input file [output file]]", + ]; + const cmd = getopt::parse(os::args, help...); + defer getopt::finish(&cmd); + + let cfg = config{...}; + + for (let i = 0z; i < len(cmd.opts); i += 1) { + const opt = cmd.opts[i]; + switch (opt.0) { + case 'c' => + cfg.showcount = true; + case 'd' => + cfg.removesingletons = true; + case 'f' => + fmt::fatal("Unimplemented"); + case 's' => + cfg.skipchars = match (strconv::stou(opt.1)) { + case (strconv::invalid | strconv::overflow) => + getopt::printusage(os::stderr, os::args[0], help); + fmt::fatal("Error: invalid argument for -s"); + case let skipchars: uint => + yield skipchars; + }; + case 'u' => + cfg.onlyunique = true; + case => abort(); + }; + }; + + static const stdin_rbuf: [os::BUFSIZ]u8 = [0...]; + static const stdin_wbuf: [os::BUFSIZ]u8 = [0...]; + cfg.input = os::stdin; + if (len(cmd.args) >= 1 && cmd.args[0] != "-") { + match (os::open(cmd.args[0])) { + case let err: fs::error => + getopt::printusage(os::stderr, os::args[0], help); + fmt::fatal("Error opening '{}': {}", + cmd.args[0], fs::strerror(err)); + case let file: io::file => + cfg.input = &bufio::buffered(file, stdin_rbuf, stdin_wbuf); + }; + }; + defer io::close(cfg.input)!; + + cfg.output = os::stdout; + static const stdout_rbuf: [os::BUFSIZ]u8 = [0...]; + static const stdout_wbuf: [os::BUFSIZ]u8 = [0...]; + if (len(cmd.args) == 2) { + match (os::create(cmd.args[1], 0o666, fs::flags::WRONLY)) { + case let err: fs::error => + getopt::printusage(os::stderr, os::args[0], help); + fmt::fatal("Error opening '{}': {}", + cmd.args[1], fs::strerror(err)); + case let file: io::file => + cfg.output = &bufio::buffered(file, stdout_rbuf, stdout_wbuf); + }; + }; + defer io::close(cfg.output)!; + + uniq(&cfg)?; +};