hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit 069d28c2684701bf66bf53b577559f29bee0b3c0
parent a8e619879d75b301e07586e855d679c52b36cb14
Author: Byron Torres <b@torresjrjr.com>
Date:   Tue, 25 Jan 2022 17:06:47 +0000

olson: new TZif parser

Signed-off-by: Byron Torres <b@torresjrjr.com>

Diffstat:
Mscripts/gen-stdlib | 2+-
Mtime/olson/olson.ha | 355+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------
2 files changed, 296 insertions(+), 61 deletions(-)

diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -1187,7 +1187,7 @@ time_chrono() { time_olson() { gen_srcs time::olson \ olson.ha - gen_ssa time::olson time time::chrono datetime + gen_ssa time::olson endian errors fs io os path strings time time::chrono } types() { diff --git a/time/olson/olson.ha b/time/olson/olson.ha @@ -1,79 +1,314 @@ +use endian; +use errors; +use fs; +use io; +use os; +use path; +use strings; use time; use time::chrono; -use datetime; + +// Some TZif data is invalid +export type invalidtzif = !void; // Parses and retrieves a [[chrono::timezone]] from the system zoneinfo -// database, or if applicable, from an internal selection of timezones. -export fn tz(id: str) chrono::locality = { - switch (id) { +// database, or if applicable, from an internal selection of timezones. All +// Olson timezones default to using the [[chrono::UTC]] timescale and +// [[chrono::EARTH_DAY]] daylength. +// +// TODO: tidy up errors? +// TODO: return locality instead? see below. +export fn tz(name: str) (chrono::timezone | errors::overflow | fs::error | io::error | invalidtzif) = { + switch (name) { case "Local" => - return chrono::local; + // returning a timezone instead of a locality (pointer to + // timezone) avoids hidden allocation, but if we decide that + // chrono::local might change during the lifetime of a long + // running program (say, some daemon which is smart enough to + // update it's zones as time months go by), we lose this + // ability. + return *chrono::local; case "UTC" => - return chrono::UTC_Z; + return *chrono::UTC_Z; case "TAI" => - return chrono::TAI_Z; + return *chrono::TAI_Z; case "MTC" => - return chrono::MTC_Z; + return *chrono::MTC_Z; case => void; }; - // TODO: temporary - if (id == "Europe/Amsterdam") { - return TZ_Europe__Amsterdam; + // Try reading from a TZif file installed in on the system. + // + // TODO: try various prefixes for various OSs, try reading from + // installed zip files, etc. + const prefix = "/usr/share/zoneinfo/"; + + // TODO: try names like "./nearby/tzif_file" or "/abs/path/tzif_file"? + const filepath = path::init(); + path::add(&filepath, prefix, name)?; + const fpath = path::string(&filepath); + const file = os::open(fpath)?; + const tz = parse_tzif(file, chrono::timezone { + name = name, + timescale = &chrono::UTC, + daylength = chrono::EARTH_DAY, + ... + })?; + + return tz; +}; + +// Parses data in the TZif format, and returns the given timezone with the +// fields "zones", "transitions", and "posix_extend" filled. +// +// See: https://datatracker.ietf.org/doc/html/rfc8536 +fn parse_tzif( + h: io::handle, + tz: chrono::timezone, +) (chrono::timezone | invalidtzif | io::error) = { + const buf1: [1]u8 = [0...]; + const buf4: [4]u8 = [0...]; + const buf8: [8]u8 = [0...]; + const buf15: [15]u8 = [0...]; + + // test for magic "TZif" + read(h, buf4)?; + if (strings::fromutf8(buf4) != "TZif") { + return invalidtzif; + }; + + // read version + read(h, buf1)?; + const version = switch (buf1[0]) { + case 0 => + yield 1; + case '2' => + yield 2; + case '3' => + yield 3; + case => + return invalidtzif; + }; + + // skip padding + read(h, buf15)?; + + // read counts + read(h, buf4)?; let isutcnt = endian::begetu32(buf4); + read(h, buf4)?; let isstdcnt = endian::begetu32(buf4); + read(h, buf4)?; let leapcnt = endian::begetu32(buf4); + read(h, buf4)?; let timecnt = endian::begetu32(buf4); + read(h, buf4)?; let typecnt = endian::begetu32(buf4); + read(h, buf4)?; let charcnt = endian::begetu32(buf4); + + let is64 = false; + if (version > 1) { + is64 = true; + + // skip to the version 2 data + const skip = ( + // size of version 1 data block + timecnt * 4 + + timecnt + + typecnt * 6 + + charcnt + + leapcnt * 8 + + isstdcnt + + isutcnt + // size of version 2 header + + 20 + ); + for (let i = 0z; i < skip; i += 1) { + read(h, buf1)?; + }; + + // read version 2 counts + read(h, buf4)?; isutcnt = endian::begetu32(buf4); + read(h, buf4)?; isstdcnt = endian::begetu32(buf4); + read(h, buf4)?; leapcnt = endian::begetu32(buf4); + read(h, buf4)?; timecnt = endian::begetu32(buf4); + read(h, buf4)?; typecnt = endian::begetu32(buf4); + read(h, buf4)?; charcnt = endian::begetu32(buf4); + }; + + if (typecnt == 0 || charcnt == 0) { + return invalidtzif; + }; + + if (isutcnt != 0 && isutcnt != typecnt) { + return invalidtzif; + }; + + if (isstdcnt != 0 && isstdcnt != typecnt) { + return invalidtzif; + }; + + // TODO: if and how to handle? check Olson's tz code for behaviour. + if (isutcnt != 0 && isstdcnt != 0) { + void; + }; + + const timesz = if (is64) 8 else 4; + + // read data + + const transition_times: []i64 = []; + if (is64) { + readitems8(h, &transition_times, timecnt); + } else { + readitems4(h, &transition_times, timecnt); + }; + + const zone_indicies: []u8 = []; + readbytes(h, &zone_indicies, timecnt); + + const zonedata: []u8 = []; + readbytes(h, &zonedata, typecnt * 6); + + const abbrdata: []u8 = []; + readbytes(h, &abbrdata, charcnt); + + const leapdata: []u8 = []; + readbytes(h, &leapdata, leapcnt * (timesz: u32 + 4)); + + const stdwalldata: []u8 = []; + readbytes(h, &stdwalldata, isstdcnt); + + const normlocaldata: []u8 = []; + readbytes(h, &normlocaldata, isutcnt); + + // read footer + + let footerdata: []u8 = []; + read(h, buf1)?; + if (buf1[0] != 0x0A) { // '\n' newline + return invalidtzif; }; + for (let start = true; true; start = false) { + read(h, buf1)?; + if (buf1[0] == 0x0A) { // '\n' newline + break; + }; + if (buf1[0] == 0x0) { // cannot contain NUL + return invalidtzif; + }; + append(footerdata, buf1...); + }; + const posix_extend = strings::fromutf8(footerdata); + + // assemble structured data + + // assemble zones + let zones: []chrono::zone = []; + for (let i = 0z; i < typecnt; i += 1) { + const idx = i * 6; + const zone = chrono::zone { ... }; + + // offset + const zoffset = endian::begetu32(zonedata[idx..idx + 4]): i32; + if (zoffset == -2147483648) { // -2^31 + return invalidtzif; + }; + zone.zoffset = zoffset * time::SECOND; - return chrono::local; + // daylight saving time indicator + zone.dst = switch (zonedata[idx + 4]) { + case 1u8 => + yield true; + case 0u8 => + yield false; + case => + return invalidtzif; + }; + + // abbreviation + const abbridx = zonedata[idx + 5]; + if (abbridx < 0 || abbridx > (charcnt - 1)) { + return invalidtzif; + }; + let bytes: []u8 = []; + for (let j = abbridx; j < len(abbrdata); j += 1) { + if (abbrdata[j] == 0x0) { + bytes = abbrdata[abbridx..j]; + break; + }; + }; + if (len(bytes) == 0) { // no NUL encountered + return invalidtzif; + }; + const abbr = strings::fromutf8(bytes); + zone.abbr = abbr; + + append(zones, zone); + }; + + // assemble transitions + let transitions: []chrono::transition = []; + for (let i = 0z; i < timecnt; i += 1) { + const zoneindex = zone_indicies[i]: int; + if (zoneindex < 0 || zoneindex > (typecnt: int - 1)) { + return invalidtzif; + }; + + const tx = chrono::transition { + when = time::instant { + sec = transition_times[i], + ... + }, + zoneindex = zoneindex, + }; + + // stdwalldata and normlocaldata have been omitted, + // until they show their utility. + + append(transitions, tx); + }; + + // commit and return data + tz.zones = zones; + tz.transitions = transitions; + tz.posix_extend = posix_extend; + return tz; +}; + +// Error wrapper for [[io::read]] +fn read(h: io::handle, buf: []u8) (void | invalidtzif | io::error) = { + match (io::read(h, buf)) { + case let err: io::error => + return err; + case io::EOF => + return invalidtzif; + case let sz: size => + if (sz != len(buf)) { + return invalidtzif; + }; + }; }; -// TODO: Here are some temporary timezones until a full parser is written - -// Europe/Amsterdam timezone -export const TZ_Europe__Amsterdam: chrono::locality = &tz_europe__amsterdam; - -const tz_europe__amsterdam: chrono::timezone = chrono::timezone{ - name = "Europe/Amsterdam", - timescale = &chrono::UTC, - daylength = chrono::EARTH_DAY, - zones = [ - chrono::zone { - zoffset = 1 * time::HOUR, - name = "Central European Time", - abbr = "CET", - dst = false, - }, - chrono::zone { - zoffset = 2 * time::HOUR, - name = "Central European Summer Time", - abbr = "CEST", - dst = true, - }, - ], - transitions = [], - posix_extend = "", +fn readbytes(h: io::handle, items: *[]u8, n: size) void = { + const buf: [1]u8 = [0]; + for (let i = 0z; i < n; i += 1) { + read(h, buf)?; + const it = buf[0]; + append(items, it); + }; }; -// CET (Central European Time) timezone -export const TZ_CET: chrono::locality = &tz_cet; - -const tz_cet: chrono::timezone = chrono::timezone{ - name = "CET", - timescale = &chrono::UTC, - daylength = chrono::EARTH_DAY, - zones = [ - chrono::zone { - zoffset = 1 * time::HOUR, - name = "Central European Time", - abbr = "CET", - dst = false, - }, - chrono::zone { - zoffset = 2 * time::HOUR, - name = "Central European Summer Time", - abbr = "CEST", - dst = true, - }, - ], - transitions = [], - posix_extend = "", +fn readitems8(h: io::handle, items: *[]i64, n: size) void = { + const buf: [8]u8 = [0...]; + for (let i = 0z; i < n; i += 1) { + read(h, buf)?; + const it = endian::begetu64(buf): i64; + append(items, it); + }; +}; + +fn readitems4(h: io::handle, items: *[]i64, n: size) void = { + const buf: [4]u8 = [0...]; + for (let i = 0z; i < n; i += 1) { + read(h, buf)?; + const it = endian::begetu32(buf): i64; + append(items, it); + }; };