commit 069d28c2684701bf66bf53b577559f29bee0b3c0
parent a8e619879d75b301e07586e855d679c52b36cb14
Author: Byron Torres <b@torresjrjr.com>
Date: Tue, 25 Jan 2022 17:06:47 +0000
olson: new TZif parser
Signed-off-by: Byron Torres <b@torresjrjr.com>
Diffstat:
2 files changed, 296 insertions(+), 61 deletions(-)
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -1187,7 +1187,7 @@ time_chrono() {
time_olson() {
gen_srcs time::olson \
olson.ha
- gen_ssa time::olson time time::chrono datetime
+ gen_ssa time::olson endian errors fs io os path strings time time::chrono
}
types() {
diff --git a/time/olson/olson.ha b/time/olson/olson.ha
@@ -1,79 +1,314 @@
+use endian;
+use errors;
+use fs;
+use io;
+use os;
+use path;
+use strings;
use time;
use time::chrono;
-use datetime;
+
+// Some TZif data is invalid
+export type invalidtzif = !void;
// Parses and retrieves a [[chrono::timezone]] from the system zoneinfo
-// database, or if applicable, from an internal selection of timezones.
-export fn tz(id: str) chrono::locality = {
- switch (id) {
+// database, or if applicable, from an internal selection of timezones. All
+// Olson timezones default to using the [[chrono::UTC]] timescale and
+// [[chrono::EARTH_DAY]] daylength.
+//
+// TODO: tidy up errors?
+// TODO: return locality instead? see below.
+export fn tz(name: str) (chrono::timezone | errors::overflow | fs::error | io::error | invalidtzif) = {
+ switch (name) {
case "Local" =>
- return chrono::local;
+ // returning a timezone instead of a locality (pointer to
+ // timezone) avoids hidden allocation, but if we decide that
+ // chrono::local might change during the lifetime of a long
+ // running program (say, some daemon which is smart enough to
+ // update it's zones as time months go by), we lose this
+ // ability.
+ return *chrono::local;
case "UTC" =>
- return chrono::UTC_Z;
+ return *chrono::UTC_Z;
case "TAI" =>
- return chrono::TAI_Z;
+ return *chrono::TAI_Z;
case "MTC" =>
- return chrono::MTC_Z;
+ return *chrono::MTC_Z;
case =>
void;
};
- // TODO: temporary
- if (id == "Europe/Amsterdam") {
- return TZ_Europe__Amsterdam;
+ // Try reading from a TZif file installed in on the system.
+ //
+ // TODO: try various prefixes for various OSs, try reading from
+ // installed zip files, etc.
+ const prefix = "/usr/share/zoneinfo/";
+
+ // TODO: try names like "./nearby/tzif_file" or "/abs/path/tzif_file"?
+ const filepath = path::init();
+ path::add(&filepath, prefix, name)?;
+ const fpath = path::string(&filepath);
+ const file = os::open(fpath)?;
+ const tz = parse_tzif(file, chrono::timezone {
+ name = name,
+ timescale = &chrono::UTC,
+ daylength = chrono::EARTH_DAY,
+ ...
+ })?;
+
+ return tz;
+};
+
+// Parses data in the TZif format, and returns the given timezone with the
+// fields "zones", "transitions", and "posix_extend" filled.
+//
+// See: https://datatracker.ietf.org/doc/html/rfc8536
+fn parse_tzif(
+ h: io::handle,
+ tz: chrono::timezone,
+) (chrono::timezone | invalidtzif | io::error) = {
+ const buf1: [1]u8 = [0...];
+ const buf4: [4]u8 = [0...];
+ const buf8: [8]u8 = [0...];
+ const buf15: [15]u8 = [0...];
+
+ // test for magic "TZif"
+ read(h, buf4)?;
+ if (strings::fromutf8(buf4) != "TZif") {
+ return invalidtzif;
+ };
+
+ // read version
+ read(h, buf1)?;
+ const version = switch (buf1[0]) {
+ case 0 =>
+ yield 1;
+ case '2' =>
+ yield 2;
+ case '3' =>
+ yield 3;
+ case =>
+ return invalidtzif;
+ };
+
+ // skip padding
+ read(h, buf15)?;
+
+ // read counts
+ read(h, buf4)?; let isutcnt = endian::begetu32(buf4);
+ read(h, buf4)?; let isstdcnt = endian::begetu32(buf4);
+ read(h, buf4)?; let leapcnt = endian::begetu32(buf4);
+ read(h, buf4)?; let timecnt = endian::begetu32(buf4);
+ read(h, buf4)?; let typecnt = endian::begetu32(buf4);
+ read(h, buf4)?; let charcnt = endian::begetu32(buf4);
+
+ let is64 = false;
+ if (version > 1) {
+ is64 = true;
+
+ // skip to the version 2 data
+ const skip = (
+ // size of version 1 data block
+ timecnt * 4
+ + timecnt
+ + typecnt * 6
+ + charcnt
+ + leapcnt * 8
+ + isstdcnt
+ + isutcnt
+ // size of version 2 header
+ + 20
+ );
+ for (let i = 0z; i < skip; i += 1) {
+ read(h, buf1)?;
+ };
+
+ // read version 2 counts
+ read(h, buf4)?; isutcnt = endian::begetu32(buf4);
+ read(h, buf4)?; isstdcnt = endian::begetu32(buf4);
+ read(h, buf4)?; leapcnt = endian::begetu32(buf4);
+ read(h, buf4)?; timecnt = endian::begetu32(buf4);
+ read(h, buf4)?; typecnt = endian::begetu32(buf4);
+ read(h, buf4)?; charcnt = endian::begetu32(buf4);
+ };
+
+ if (typecnt == 0 || charcnt == 0) {
+ return invalidtzif;
+ };
+
+ if (isutcnt != 0 && isutcnt != typecnt) {
+ return invalidtzif;
+ };
+
+ if (isstdcnt != 0 && isstdcnt != typecnt) {
+ return invalidtzif;
+ };
+
+ // TODO: if and how to handle? check Olson's tz code for behaviour.
+ if (isutcnt != 0 && isstdcnt != 0) {
+ void;
+ };
+
+ const timesz = if (is64) 8 else 4;
+
+ // read data
+
+ const transition_times: []i64 = [];
+ if (is64) {
+ readitems8(h, &transition_times, timecnt);
+ } else {
+ readitems4(h, &transition_times, timecnt);
+ };
+
+ const zone_indicies: []u8 = [];
+ readbytes(h, &zone_indicies, timecnt);
+
+ const zonedata: []u8 = [];
+ readbytes(h, &zonedata, typecnt * 6);
+
+ const abbrdata: []u8 = [];
+ readbytes(h, &abbrdata, charcnt);
+
+ const leapdata: []u8 = [];
+ readbytes(h, &leapdata, leapcnt * (timesz: u32 + 4));
+
+ const stdwalldata: []u8 = [];
+ readbytes(h, &stdwalldata, isstdcnt);
+
+ const normlocaldata: []u8 = [];
+ readbytes(h, &normlocaldata, isutcnt);
+
+ // read footer
+
+ let footerdata: []u8 = [];
+ read(h, buf1)?;
+ if (buf1[0] != 0x0A) { // '\n' newline
+ return invalidtzif;
};
+ for (let start = true; true; start = false) {
+ read(h, buf1)?;
+ if (buf1[0] == 0x0A) { // '\n' newline
+ break;
+ };
+ if (buf1[0] == 0x0) { // cannot contain NUL
+ return invalidtzif;
+ };
+ append(footerdata, buf1...);
+ };
+ const posix_extend = strings::fromutf8(footerdata);
+
+ // assemble structured data
+
+ // assemble zones
+ let zones: []chrono::zone = [];
+ for (let i = 0z; i < typecnt; i += 1) {
+ const idx = i * 6;
+ const zone = chrono::zone { ... };
+
+ // offset
+ const zoffset = endian::begetu32(zonedata[idx..idx + 4]): i32;
+ if (zoffset == -2147483648) { // -2^31
+ return invalidtzif;
+ };
+ zone.zoffset = zoffset * time::SECOND;
- return chrono::local;
+ // daylight saving time indicator
+ zone.dst = switch (zonedata[idx + 4]) {
+ case 1u8 =>
+ yield true;
+ case 0u8 =>
+ yield false;
+ case =>
+ return invalidtzif;
+ };
+
+ // abbreviation
+ const abbridx = zonedata[idx + 5];
+ if (abbridx < 0 || abbridx > (charcnt - 1)) {
+ return invalidtzif;
+ };
+ let bytes: []u8 = [];
+ for (let j = abbridx; j < len(abbrdata); j += 1) {
+ if (abbrdata[j] == 0x0) {
+ bytes = abbrdata[abbridx..j];
+ break;
+ };
+ };
+ if (len(bytes) == 0) { // no NUL encountered
+ return invalidtzif;
+ };
+ const abbr = strings::fromutf8(bytes);
+ zone.abbr = abbr;
+
+ append(zones, zone);
+ };
+
+ // assemble transitions
+ let transitions: []chrono::transition = [];
+ for (let i = 0z; i < timecnt; i += 1) {
+ const zoneindex = zone_indicies[i]: int;
+ if (zoneindex < 0 || zoneindex > (typecnt: int - 1)) {
+ return invalidtzif;
+ };
+
+ const tx = chrono::transition {
+ when = time::instant {
+ sec = transition_times[i],
+ ...
+ },
+ zoneindex = zoneindex,
+ };
+
+ // stdwalldata and normlocaldata have been omitted,
+ // until they show their utility.
+
+ append(transitions, tx);
+ };
+
+ // commit and return data
+ tz.zones = zones;
+ tz.transitions = transitions;
+ tz.posix_extend = posix_extend;
+ return tz;
+};
+
+// Error wrapper for [[io::read]]
+fn read(h: io::handle, buf: []u8) (void | invalidtzif | io::error) = {
+ match (io::read(h, buf)) {
+ case let err: io::error =>
+ return err;
+ case io::EOF =>
+ return invalidtzif;
+ case let sz: size =>
+ if (sz != len(buf)) {
+ return invalidtzif;
+ };
+ };
};
-// TODO: Here are some temporary timezones until a full parser is written
-
-// Europe/Amsterdam timezone
-export const TZ_Europe__Amsterdam: chrono::locality = &tz_europe__amsterdam;
-
-const tz_europe__amsterdam: chrono::timezone = chrono::timezone{
- name = "Europe/Amsterdam",
- timescale = &chrono::UTC,
- daylength = chrono::EARTH_DAY,
- zones = [
- chrono::zone {
- zoffset = 1 * time::HOUR,
- name = "Central European Time",
- abbr = "CET",
- dst = false,
- },
- chrono::zone {
- zoffset = 2 * time::HOUR,
- name = "Central European Summer Time",
- abbr = "CEST",
- dst = true,
- },
- ],
- transitions = [],
- posix_extend = "",
+fn readbytes(h: io::handle, items: *[]u8, n: size) void = {
+ const buf: [1]u8 = [0];
+ for (let i = 0z; i < n; i += 1) {
+ read(h, buf)?;
+ const it = buf[0];
+ append(items, it);
+ };
};
-// CET (Central European Time) timezone
-export const TZ_CET: chrono::locality = &tz_cet;
-
-const tz_cet: chrono::timezone = chrono::timezone{
- name = "CET",
- timescale = &chrono::UTC,
- daylength = chrono::EARTH_DAY,
- zones = [
- chrono::zone {
- zoffset = 1 * time::HOUR,
- name = "Central European Time",
- abbr = "CET",
- dst = false,
- },
- chrono::zone {
- zoffset = 2 * time::HOUR,
- name = "Central European Summer Time",
- abbr = "CEST",
- dst = true,
- },
- ],
- transitions = [],
- posix_extend = "",
+fn readitems8(h: io::handle, items: *[]i64, n: size) void = {
+ const buf: [8]u8 = [0...];
+ for (let i = 0z; i < n; i += 1) {
+ read(h, buf)?;
+ const it = endian::begetu64(buf): i64;
+ append(items, it);
+ };
+};
+
+fn readitems4(h: io::handle, items: *[]i64, n: size) void = {
+ const buf: [4]u8 = [0...];
+ for (let i = 0z; i < n; i += 1) {
+ read(h, buf)?;
+ const it = endian::begetu32(buf): i64;
+ append(items, it);
+ };
};