hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 871db194d529b70a9a09d0d4c671472bebf17ab4
parent 65449ddbbbf39659bfaf84a2cb78510409a4ab7a
Author: Drew DeVault <sir@cmpwn.com>
Date:   Wed, 29 Jun 2022 19:34:03 +0200

format::tar: new module

Just contains a reader for now.

Diffstat:
Aformat/tar/README | 8++++++++
Aformat/tar/reader.ha | 195+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aformat/tar/types.ha | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mscripts/gen-stdlib | 8++++++++
Mstdlib.mk | 34++++++++++++++++++++++++++++++++++
5 files changed, 310 insertions(+), 0 deletions(-)

diff --git a/format/tar/README b/format/tar/README @@ -0,0 +1,8 @@ +This module provides an implementation of the tar archive format for Unix. The +specific format implemented is USTAR, however, it is capable of reading most tar +variants which are backwards-compatible with the original format (e.g. GNU tar). + +To read an archive, use [[read]] to create a reader, and [[next]] to enumerate +its entries. The return value from [[next]] contains the file metadata and is an +[[io::stream]] that you may read the file contents from. You may call [[skip]] +to skip an archive entry without reading it. diff --git a/format/tar/reader.ha b/format/tar/reader.ha @@ -0,0 +1,195 @@ +// License: MPL-2.0 +// (c) 2022 Drew DeVault <sir@cmpwn.com> +use bufio; +use bytes; +use endian; +use errors; +use io; +use strconv; +use strings; +use strio; + +export type reader = struct { + src: io::handle, + name: [255]u8, +}; + +// Creates a new reader for a tar file. Use [[next]] to iterate through entries +// present in the tar file. +export fn read(src: io::handle) reader = { + return reader { + src = src, + ... + }; +}; + +// Returns the next entry from a tar [[reader]]. Parts of this structure +// (specifically the file name) are borrowed from the reader itself and will not +// be valid after subsequent calls. +// +// If the return value is a file (i.e. entry.etype == entry_type::FILE), the +// caller must either call [[io::read]] using the return value until it returns +// [[io::EOF]], or call [[skip]] to seek to the next entry in the archive. +// +// Note that reading from the header will modify the file size. +export fn next(rd: *reader) (entry | error | io::EOF) = { + static let buf: [BLOCKSIZE]u8 = [0...]; + match (io::read(rd.src, buf)?) { + case let z: size => + if (z != len(buf)) { + return truncated; + }; + case io::EOF => + return truncated; + }; + + if (zeroed(buf)) { + match (io::read(rd.src, buf)?) { + case let z: size => + if (z != len(buf)) { + return truncated; + }; + case io::EOF => + return truncated; + }; + if (!zeroed(buf)) { + return truncated; + }; + return io::EOF; + }; + + let ent = entry { ... }; + const reader = bufio::fixed(buf, io::mode::READ); + const name = readstr(&reader, 100); + ent.mode = readoct(&reader, 8)?; + ent.uid = readoct(&reader, 8)?; + ent.gid = readoct(&reader, 8)?; + ent.fsize = readsize(&reader, 12)?; + ent.mtime = readoct(&reader, 12)?; + ent.checksum = readoct(&reader, 8)?; + ent.etype = readoct(&reader, 1)?: entry_type; + ent.link = readstr(&reader, 100); + + if (ent.etype == entry_type::FILE) { + ent.vtable = &file_vtable; + ent.src = rd.src; + ent.orig = ent.fsize; + ent.remain = ent.orig; + }; + + const ustar = readstr(&reader, 6); + if (ustar != "ustar") { + ent.name = name; + return ent; + }; + + const version = readstr(&reader, 2); + // XXX: We could check the version here + ent.uname = readstr(&reader, 32); + ent.gname = readstr(&reader, 32); + ent.devmajor = readoct(&reader, 8)?; + ent.devminor = readoct(&reader, 8)?; + const prefix = readstr(&reader, 155); + let writer = strio::fixed(rd.name); + strio::join(&writer, prefix, name)!; + ent.name = strio::string(&writer); + return ent; +}; + +// Seeks the underlying tar file to the entry following this one. +export fn skip(ent: *entry) (void | io::error) = { + let amt = ent.remain; + if (amt % BLOCKSIZE != 0) { + amt += BLOCKSIZE - (amt % BLOCKSIZE); + }; + match (io::seek(ent.src, amt: io::off, io::whence::CUR)) { + case io::off => + return; + case io::error => + yield; + }; + io::copy(io::empty, ent)?; +}; + +const file_vtable: io::vtable = io::vtable { + reader = &file_read, + ... +}; + +fn file_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = { + let ent = s: *ent_reader; + assert(ent.vtable == &file_vtable); + if (ent.remain == 0) { + return io::EOF; + }; + + let z = len(buf); + if (z > ent.remain) { + z = ent.remain; + }; + z = match (io::read(ent.src, buf[..z])?) { + case let z: size => + yield z; + case io::EOF => + // TODO: Truncated flag + return io::EOF; + }; + ent.remain -= z; + + // Read until we reach the block size + static let buf: [BLOCKSIZE]u8 = [0...]; + if (ent.remain == 0 && ent.orig % BLOCKSIZE != 0) { + let remain = BLOCKSIZE - (ent.orig % BLOCKSIZE); + for (remain > 0) { + match (io::read(ent.src, buf[..remain])?) { + case let z: size => + remain -= z; + case io::EOF => + // TODO: Set a truncated flag or something + break; + }; + }; + }; + + return z; +}; + +fn readstr(rd: *bufio::memstream, ln: size) str = { + const buf = match (bufio::borrowedread(rd, ln)) { + case let buf: []u8 => + assert(len(buf) == ln); + yield buf; + case io::EOF => + abort(); + }; + return strings::fromc(buf: *[*]u8: *const char); +}; + +fn readoct(rd: *bufio::memstream, ln: size) (uint | invalid) = { + const string = readstr(rd, ln); + match (strconv::stoub(string, strconv::base::OCT)) { + case let u: uint => + return u; + case => + return invalid; + }; +}; + +fn readsize(rd: *bufio::memstream, ln: size) (size | invalid) = { + const string = readstr(rd, ln); + match (strconv::stozb(string, strconv::base::OCT)) { + case let z: size => + return z; + case => + return invalid; + }; +}; + +fn zeroed(buf: []u8) bool = { + for (let i = 0z; i < len(buf); i += 1) { + if (buf[i] != 0) { + return false; + }; + }; + return true; +}; diff --git a/format/tar/types.ha b/format/tar/types.ha @@ -0,0 +1,65 @@ +// License: MPL-2.0 +// (c) 2022 Drew DeVault <sir@cmpwn.com> +use io; + +// The size of each block in a tar file. +export def BLOCKSIZE: size = 512; + +// A file or directory in a tar file. +export type entry = struct { + ent_reader, + name: str, + mode: uint, + uid: uint, + gid: uint, + fsize: size, + mtime: uint, + checksum: uint, + etype: entry_type, + link: str, + uname: str, + gname: str, + devmajor: u64, + devminor: u64, +}; + +export type ent_reader = struct { + vtable: io::stream, + src: io::handle, + orig: size, + remain: size, +}; + +// A tar file entry. Note that some systems create tarballs with additional +// vendor-specific values for the entry type, so a default case is recommended +// when switching against this. +export type entry_type = enum u8 { + FILE, + HARDLINK, + SYMLINK, + CHARDEV, + BLOCKDEV, + DIRECTORY, + FIFO, +}; + +// Returned if the source file size is not aligned on [[BLOCKSIZE]]. +export type truncated = !void; + +// Returned if the source file does not contain a valid ustar archive. +export type invalid = !void; + +// Tagged union of all possible error types. +export type error = !(truncated | invalid | io::error); + +// Converts an [[error]] to a human-friendly representation. +export fn strerror(err: error) const str = { + match (err) { + case truncated => + return "Tar file is truncated"; + case invalid => + return "Tar file is invalid"; + case let err: io::error => + return io::strerror(err); + }; +}; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -621,6 +621,13 @@ format_ini() { gen_ssa format::ini bufio encoding::utf8 fmt io strings } +format_tar() { + gen_srcs format::tar \ + types.ha \ + reader.ha + gen_ssa format::tar +} + fs() { gen_srcs fs \ types.ha \ @@ -1433,6 +1440,7 @@ fmt fnmatch format::elf format::ini +format::tar fs getopt glob diff --git a/stdlib.mk b/stdlib.mk @@ -362,6 +362,12 @@ stdlib_deps_any += $(stdlib_format_ini_any) stdlib_format_ini_linux = $(stdlib_format_ini_any) stdlib_format_ini_freebsd = $(stdlib_format_ini_any) +# gen_lib format::tar (any) +stdlib_format_tar_any = $(HARECACHE)/format/tar/format_tar-any.o +stdlib_deps_any += $(stdlib_format_tar_any) +stdlib_format_tar_linux = $(stdlib_format_tar_any) +stdlib_format_tar_freebsd = $(stdlib_format_tar_any) + # gen_lib fs (any) stdlib_fs_any = $(HARECACHE)/fs/fs-any.o stdlib_deps_any += $(stdlib_fs_any) @@ -1177,6 +1183,17 @@ $(HARECACHE)/format/ini/format_ini-any.ssa: $(stdlib_format_ini_any_srcs) $(stdl @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nformat::ini \ -t$(HARECACHE)/format/ini/format_ini.td $(stdlib_format_ini_any_srcs) +# format::tar (+any) +stdlib_format_tar_any_srcs = \ + $(STDLIB)/format/tar/types.ha \ + $(STDLIB)/format/tar/reader.ha + +$(HARECACHE)/format/tar/format_tar-any.ssa: $(stdlib_format_tar_any_srcs) $(stdlib_rt) + @printf 'HAREC \t$@\n' + @mkdir -p $(HARECACHE)/format/tar + @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nformat::tar \ + -t$(HARECACHE)/format/tar/format_tar.td $(stdlib_format_tar_any_srcs) + # fs (+any) stdlib_fs_any_srcs = \ $(STDLIB)/fs/types.ha \ @@ -2479,6 +2496,12 @@ testlib_deps_any += $(testlib_format_ini_any) testlib_format_ini_linux = $(testlib_format_ini_any) testlib_format_ini_freebsd = $(testlib_format_ini_any) +# gen_lib format::tar (any) +testlib_format_tar_any = $(TESTCACHE)/format/tar/format_tar-any.o +testlib_deps_any += $(testlib_format_tar_any) +testlib_format_tar_linux = $(testlib_format_tar_any) +testlib_format_tar_freebsd = $(testlib_format_tar_any) + # gen_lib fs (any) testlib_fs_any = $(TESTCACHE)/fs/fs-any.o testlib_deps_any += $(testlib_fs_any) @@ -3321,6 +3344,17 @@ $(TESTCACHE)/format/ini/format_ini-any.ssa: $(testlib_format_ini_any_srcs) $(tes @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nformat::ini \ -t$(TESTCACHE)/format/ini/format_ini.td $(testlib_format_ini_any_srcs) +# format::tar (+any) +testlib_format_tar_any_srcs = \ + $(STDLIB)/format/tar/types.ha \ + $(STDLIB)/format/tar/reader.ha + +$(TESTCACHE)/format/tar/format_tar-any.ssa: $(testlib_format_tar_any_srcs) $(testlib_rt) + @printf 'HAREC \t$@\n' + @mkdir -p $(TESTCACHE)/format/tar + @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nformat::tar \ + -t$(TESTCACHE)/format/tar/format_tar.td $(testlib_format_tar_any_srcs) + # fs (+any) testlib_fs_any_srcs = \ $(STDLIB)/fs/types.ha \