commit 871db194d529b70a9a09d0d4c671472bebf17ab4
parent 65449ddbbbf39659bfaf84a2cb78510409a4ab7a
Author: Drew DeVault <sir@cmpwn.com>
Date: Wed, 29 Jun 2022 19:34:03 +0200
format::tar: new module
Just contains a reader for now.
Diffstat:
5 files changed, 310 insertions(+), 0 deletions(-)
diff --git a/format/tar/README b/format/tar/README
@@ -0,0 +1,8 @@
+This module provides an implementation of the tar archive format for Unix. The
+specific format implemented is USTAR, however, it is capable of reading most tar
+variants which are backwards-compatible with the original format (e.g. GNU tar).
+
+To read an archive, use [[read]] to create a reader, and [[next]] to enumerate
+its entries. The return value from [[next]] contains the file metadata and is an
+[[io::stream]] that you may read the file contents from. You may call [[skip]]
+to skip an archive entry without reading it.
diff --git a/format/tar/reader.ha b/format/tar/reader.ha
@@ -0,0 +1,195 @@
+// License: MPL-2.0
+// (c) 2022 Drew DeVault <sir@cmpwn.com>
+use bufio;
+use bytes;
+use endian;
+use errors;
+use io;
+use strconv;
+use strings;
+use strio;
+
+export type reader = struct {
+ src: io::handle,
+ name: [255]u8,
+};
+
+// Creates a new reader for a tar file. Use [[next]] to iterate through entries
+// present in the tar file.
+export fn read(src: io::handle) reader = {
+ return reader {
+ src = src,
+ ...
+ };
+};
+
+// Returns the next entry from a tar [[reader]]. Parts of this structure
+// (specifically the file name) are borrowed from the reader itself and will not
+// be valid after subsequent calls.
+//
+// If the return value is a file (i.e. entry.etype == entry_type::FILE), the
+// caller must either call [[io::read]] using the return value until it returns
+// [[io::EOF]], or call [[skip]] to seek to the next entry in the archive.
+//
+// Note that reading from the header will modify the file size.
+export fn next(rd: *reader) (entry | error | io::EOF) = {
+ static let buf: [BLOCKSIZE]u8 = [0...];
+ match (io::read(rd.src, buf)?) {
+ case let z: size =>
+ if (z != len(buf)) {
+ return truncated;
+ };
+ case io::EOF =>
+ return truncated;
+ };
+
+ if (zeroed(buf)) {
+ match (io::read(rd.src, buf)?) {
+ case let z: size =>
+ if (z != len(buf)) {
+ return truncated;
+ };
+ case io::EOF =>
+ return truncated;
+ };
+ if (!zeroed(buf)) {
+ return truncated;
+ };
+ return io::EOF;
+ };
+
+ let ent = entry { ... };
+ const reader = bufio::fixed(buf, io::mode::READ);
+ const name = readstr(&reader, 100);
+ ent.mode = readoct(&reader, 8)?;
+ ent.uid = readoct(&reader, 8)?;
+ ent.gid = readoct(&reader, 8)?;
+ ent.fsize = readsize(&reader, 12)?;
+ ent.mtime = readoct(&reader, 12)?;
+ ent.checksum = readoct(&reader, 8)?;
+ ent.etype = readoct(&reader, 1)?: entry_type;
+ ent.link = readstr(&reader, 100);
+
+ if (ent.etype == entry_type::FILE) {
+ ent.vtable = &file_vtable;
+ ent.src = rd.src;
+ ent.orig = ent.fsize;
+ ent.remain = ent.orig;
+ };
+
+ const ustar = readstr(&reader, 6);
+ if (ustar != "ustar") {
+ ent.name = name;
+ return ent;
+ };
+
+ const version = readstr(&reader, 2);
+ // XXX: We could check the version here
+ ent.uname = readstr(&reader, 32);
+ ent.gname = readstr(&reader, 32);
+ ent.devmajor = readoct(&reader, 8)?;
+ ent.devminor = readoct(&reader, 8)?;
+ const prefix = readstr(&reader, 155);
+ let writer = strio::fixed(rd.name);
+ strio::join(&writer, prefix, name)!;
+ ent.name = strio::string(&writer);
+ return ent;
+};
+
+// Seeks the underlying tar file to the entry following this one.
+export fn skip(ent: *entry) (void | io::error) = {
+ let amt = ent.remain;
+ if (amt % BLOCKSIZE != 0) {
+ amt += BLOCKSIZE - (amt % BLOCKSIZE);
+ };
+ match (io::seek(ent.src, amt: io::off, io::whence::CUR)) {
+ case io::off =>
+ return;
+ case io::error =>
+ yield;
+ };
+ io::copy(io::empty, ent)?;
+};
+
+const file_vtable: io::vtable = io::vtable {
+ reader = &file_read,
+ ...
+};
+
+fn file_read(s: *io::stream, buf: []u8) (size | io::EOF | io::error) = {
+ let ent = s: *ent_reader;
+ assert(ent.vtable == &file_vtable);
+ if (ent.remain == 0) {
+ return io::EOF;
+ };
+
+ let z = len(buf);
+ if (z > ent.remain) {
+ z = ent.remain;
+ };
+ z = match (io::read(ent.src, buf[..z])?) {
+ case let z: size =>
+ yield z;
+ case io::EOF =>
+ // TODO: Truncated flag
+ return io::EOF;
+ };
+ ent.remain -= z;
+
+ // Read until we reach the block size
+ static let buf: [BLOCKSIZE]u8 = [0...];
+ if (ent.remain == 0 && ent.orig % BLOCKSIZE != 0) {
+ let remain = BLOCKSIZE - (ent.orig % BLOCKSIZE);
+ for (remain > 0) {
+ match (io::read(ent.src, buf[..remain])?) {
+ case let z: size =>
+ remain -= z;
+ case io::EOF =>
+ // TODO: Set a truncated flag or something
+ break;
+ };
+ };
+ };
+
+ return z;
+};
+
+fn readstr(rd: *bufio::memstream, ln: size) str = {
+ const buf = match (bufio::borrowedread(rd, ln)) {
+ case let buf: []u8 =>
+ assert(len(buf) == ln);
+ yield buf;
+ case io::EOF =>
+ abort();
+ };
+ return strings::fromc(buf: *[*]u8: *const char);
+};
+
+fn readoct(rd: *bufio::memstream, ln: size) (uint | invalid) = {
+ const string = readstr(rd, ln);
+ match (strconv::stoub(string, strconv::base::OCT)) {
+ case let u: uint =>
+ return u;
+ case =>
+ return invalid;
+ };
+};
+
+fn readsize(rd: *bufio::memstream, ln: size) (size | invalid) = {
+ const string = readstr(rd, ln);
+ match (strconv::stozb(string, strconv::base::OCT)) {
+ case let z: size =>
+ return z;
+ case =>
+ return invalid;
+ };
+};
+
+fn zeroed(buf: []u8) bool = {
+ for (let i = 0z; i < len(buf); i += 1) {
+ if (buf[i] != 0) {
+ return false;
+ };
+ };
+ return true;
+};
diff --git a/format/tar/types.ha b/format/tar/types.ha
@@ -0,0 +1,65 @@
+// License: MPL-2.0
+// (c) 2022 Drew DeVault <sir@cmpwn.com>
+use io;
+
+// The size of each block in a tar file.
+export def BLOCKSIZE: size = 512;
+
+// A file or directory in a tar file.
+export type entry = struct {
+ ent_reader,
+ name: str,
+ mode: uint,
+ uid: uint,
+ gid: uint,
+ fsize: size,
+ mtime: uint,
+ checksum: uint,
+ etype: entry_type,
+ link: str,
+ uname: str,
+ gname: str,
+ devmajor: u64,
+ devminor: u64,
+};
+
+export type ent_reader = struct {
+ vtable: io::stream,
+ src: io::handle,
+ orig: size,
+ remain: size,
+};
+
+// A tar file entry. Note that some systems create tarballs with additional
+// vendor-specific values for the entry type, so a default case is recommended
+// when switching against this.
+export type entry_type = enum u8 {
+ FILE,
+ HARDLINK,
+ SYMLINK,
+ CHARDEV,
+ BLOCKDEV,
+ DIRECTORY,
+ FIFO,
+};
+
+// Returned if the source file size is not aligned on [[BLOCKSIZE]].
+export type truncated = !void;
+
+// Returned if the source file does not contain a valid ustar archive.
+export type invalid = !void;
+
+// Tagged union of all possible error types.
+export type error = !(truncated | invalid | io::error);
+
+// Converts an [[error]] to a human-friendly representation.
+export fn strerror(err: error) const str = {
+ match (err) {
+ case truncated =>
+ return "Tar file is truncated";
+ case invalid =>
+ return "Tar file is invalid";
+ case let err: io::error =>
+ return io::strerror(err);
+ };
+};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -621,6 +621,13 @@ format_ini() {
gen_ssa format::ini bufio encoding::utf8 fmt io strings
}
+format_tar() {
+ gen_srcs format::tar \
+ types.ha \
+ reader.ha
+ gen_ssa format::tar
+}
+
fs() {
gen_srcs fs \
types.ha \
@@ -1433,6 +1440,7 @@ fmt
fnmatch
format::elf
format::ini
+format::tar
fs
getopt
glob
diff --git a/stdlib.mk b/stdlib.mk
@@ -362,6 +362,12 @@ stdlib_deps_any += $(stdlib_format_ini_any)
stdlib_format_ini_linux = $(stdlib_format_ini_any)
stdlib_format_ini_freebsd = $(stdlib_format_ini_any)
+# gen_lib format::tar (any)
+stdlib_format_tar_any = $(HARECACHE)/format/tar/format_tar-any.o
+stdlib_deps_any += $(stdlib_format_tar_any)
+stdlib_format_tar_linux = $(stdlib_format_tar_any)
+stdlib_format_tar_freebsd = $(stdlib_format_tar_any)
+
# gen_lib fs (any)
stdlib_fs_any = $(HARECACHE)/fs/fs-any.o
stdlib_deps_any += $(stdlib_fs_any)
@@ -1177,6 +1183,17 @@ $(HARECACHE)/format/ini/format_ini-any.ssa: $(stdlib_format_ini_any_srcs) $(stdl
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nformat::ini \
-t$(HARECACHE)/format/ini/format_ini.td $(stdlib_format_ini_any_srcs)
+# format::tar (+any)
+stdlib_format_tar_any_srcs = \
+ $(STDLIB)/format/tar/types.ha \
+ $(STDLIB)/format/tar/reader.ha
+
+$(HARECACHE)/format/tar/format_tar-any.ssa: $(stdlib_format_tar_any_srcs) $(stdlib_rt)
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(HARECACHE)/format/tar
+ @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nformat::tar \
+ -t$(HARECACHE)/format/tar/format_tar.td $(stdlib_format_tar_any_srcs)
+
# fs (+any)
stdlib_fs_any_srcs = \
$(STDLIB)/fs/types.ha \
@@ -2479,6 +2496,12 @@ testlib_deps_any += $(testlib_format_ini_any)
testlib_format_ini_linux = $(testlib_format_ini_any)
testlib_format_ini_freebsd = $(testlib_format_ini_any)
+# gen_lib format::tar (any)
+testlib_format_tar_any = $(TESTCACHE)/format/tar/format_tar-any.o
+testlib_deps_any += $(testlib_format_tar_any)
+testlib_format_tar_linux = $(testlib_format_tar_any)
+testlib_format_tar_freebsd = $(testlib_format_tar_any)
+
# gen_lib fs (any)
testlib_fs_any = $(TESTCACHE)/fs/fs-any.o
testlib_deps_any += $(testlib_fs_any)
@@ -3321,6 +3344,17 @@ $(TESTCACHE)/format/ini/format_ini-any.ssa: $(testlib_format_ini_any_srcs) $(tes
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nformat::ini \
-t$(TESTCACHE)/format/ini/format_ini.td $(testlib_format_ini_any_srcs)
+# format::tar (+any)
+testlib_format_tar_any_srcs = \
+ $(STDLIB)/format/tar/types.ha \
+ $(STDLIB)/format/tar/reader.ha
+
+$(TESTCACHE)/format/tar/format_tar-any.ssa: $(testlib_format_tar_any_srcs) $(testlib_rt)
+ @printf 'HAREC \t$@\n'
+ @mkdir -p $(TESTCACHE)/format/tar
+ @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nformat::tar \
+ -t$(TESTCACHE)/format/tar/format_tar.td $(testlib_format_tar_any_srcs)
+
# fs (+any)
testlib_fs_any_srcs = \
$(STDLIB)/fs/types.ha \