hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

tzdb.ha (7420B)


      1 // License: MPL-2.0
      2 // (c) 2021-2022 Byron Torres <b@torresjrjr.com>
      3 use bufio;
      4 use bytes;
      5 use encoding::utf8;
      6 use endian;
      7 use errors;
      8 use fs;
      9 use io;
     10 use os;
     11 use path;
     12 use strings;
     13 use time;
     14 
     15 // Error concerning the Timezone database.
     16 export type tzdberror = !(invalidtzif | fs::error | io::error);
     17 
     18 // Invalid TZif data.
     19 export type invalidtzif = !void;
     20 
     21 // Finds, loads, and allocates a [[timezone]] from the system's Timezone
     22 // database, normally located at /usr/share/zoneinfo, and returns it as a
     23 // [[locality]]. Each call returns a new instance. The caller must free the
     24 // return value.
     25 //
     26 // All localities provided default to the [[utc]] [[timescale]] and
     27 // [[EARTH_DAY]] day-length.
     28 export fn tz(name: str) (locality | tzdberror) = {
     29 	const filepath = path::init(ZONEINFO_PREFIX, name)!;
     30 	const fpath = path::string(&filepath);
     31 	const file = os::open(fpath)?;
     32 
     33 	static let buf: [os::BUFSIZ]u8 = [0...];
     34 	const bufstrm = bufio::init(file, buf, []);
     35 
     36 	let loc = alloc(timezone {
     37 		name = strings::dup(name),
     38 		timescale = &utc,
     39 		daylength = EARTH_DAY,
     40 		...
     41 	});
     42 	match (load_tzif(&bufstrm, loc)) {
     43 	case void =>
     44 		io::close(&bufstrm)?;
     45 		io::close(file)?;
     46 		return loc;
     47 	case invalidtzif =>
     48 		io::close(&bufstrm): void;
     49 		io::close(file): void;
     50 		return invalidtzif;
     51 	case let err: io::error =>
     52 		io::close(&bufstrm): void;
     53 		io::close(file): void;
     54 		return err;
     55 	};
     56 };
     57 
     58 // Loads data of the TZif "Time Zone Information Format", and initialises the
     59 // fields "zones", "transitions", and "posix_extend" of the given [[timezone]].
     60 //
     61 // See: https://datatracker.ietf.org/doc/html/rfc8536
     62 fn load_tzif(h: io::handle, tz: *timezone) (void | invalidtzif | io::error) = {
     63 	const buf1: [1]u8 = [0...];
     64 	const buf4: [4]u8 = [0...];
     65 	const buf8: [8]u8 = [0...];
     66 	const buf15: [15]u8 = [0...];
     67 
     68 	// test for magic "TZif"
     69 	mustread(h, buf4)?;
     70 	if (!bytes::equal(buf4, ['T': u8, 'Z': u8, 'i': u8, 'f': u8])) {
     71 		return invalidtzif;
     72 	};
     73 
     74 	// read version
     75 	mustread(h, buf1)?;
     76 	const version = switch (buf1[0]) {
     77 	case 0 =>
     78 		yield 1;
     79 	case '2' =>
     80 		yield 2;
     81 	case '3' =>
     82 		yield 3;
     83 	case =>
     84 		return invalidtzif;
     85 	};
     86 
     87 	// skip padding
     88 	mustread(h, buf15)?;
     89 
     90 	// read counts
     91 	mustread(h, buf4)?; let isutcnt = endian::begetu32(buf4);
     92 	mustread(h, buf4)?; let isstdcnt = endian::begetu32(buf4);
     93 	mustread(h, buf4)?; let leapcnt = endian::begetu32(buf4);
     94 	mustread(h, buf4)?; let timecnt = endian::begetu32(buf4);
     95 	mustread(h, buf4)?; let typecnt = endian::begetu32(buf4);
     96 	mustread(h, buf4)?; let charcnt = endian::begetu32(buf4);
     97 
     98 	let is64 = false;
     99 	if (version > 1) {
    100 		is64 = true;
    101 
    102 		// skip to the version 2 data
    103 		const skip = (
    104 			// size of version 1 data block
    105 			timecnt * 4
    106 			+ timecnt
    107 			+ typecnt * 6
    108 			+ charcnt
    109 			+ leapcnt * 8
    110 			+ isstdcnt
    111 			+ isutcnt
    112 			// size of version 2 header
    113 			+ 20
    114 		);
    115 		for (let i = 0z; i < skip; i += 1) {
    116 			mustread(h, buf1)?;
    117 		};
    118 
    119 		// read version 2 counts
    120 		mustread(h, buf4)?; isutcnt = endian::begetu32(buf4);
    121 		mustread(h, buf4)?; isstdcnt = endian::begetu32(buf4);
    122 		mustread(h, buf4)?; leapcnt = endian::begetu32(buf4);
    123 		mustread(h, buf4)?; timecnt = endian::begetu32(buf4);
    124 		mustread(h, buf4)?; typecnt = endian::begetu32(buf4);
    125 		mustread(h, buf4)?; charcnt = endian::begetu32(buf4);
    126 	};
    127 
    128 	if (typecnt == 0 || charcnt == 0) {
    129 		return invalidtzif;
    130 	};
    131 
    132 	if (!(isutcnt == 0 || isutcnt == typecnt)
    133 			&& (isstdcnt == 0 && isstdcnt == typecnt)) {
    134 		return invalidtzif;
    135 	};
    136 
    137 	const timesz = if (is64) 8 else 4;
    138 
    139 	// read data
    140 
    141 	const transition_times: []i64 = [];
    142 	if (is64) {
    143 		readitems8(h, &transition_times, timecnt)?;
    144 	} else {
    145 		readitems4(h, &transition_times, timecnt)?;
    146 	};
    147 	defer free(transition_times);
    148 	const zone_indicies: []u8 = [];
    149 	readbytes(h, &zone_indicies, timecnt)?;
    150 	defer free(zone_indicies);
    151 	const zonedata: []u8 = [];
    152 	readbytes(h, &zonedata, typecnt * 6)?;
    153 	defer free(zonedata);
    154 	const abbrdata: []u8 = [];
    155 	readbytes(h, &abbrdata, charcnt)?;
    156 	defer free(abbrdata);
    157 	const leapdata: []u8 = [];
    158 	readbytes(h, &leapdata, leapcnt * (timesz: u32 + 4))?;
    159 	defer free(leapdata);
    160 	const stdwalldata: []u8 = [];
    161 	readbytes(h, &stdwalldata, isstdcnt)?;
    162 	defer free(stdwalldata);
    163 	const normlocaldata: []u8 = [];
    164 	readbytes(h, &normlocaldata, isutcnt)?;
    165 	defer free(normlocaldata);
    166 	// read footer
    167 
    168 	let footerdata: []u8 = [];
    169 	defer free(footerdata);
    170 	mustread(h, buf1)?;
    171 	if (buf1[0] != 0x0A) { // '\n' newline
    172 		return invalidtzif;
    173 	};
    174 	for (true) {
    175 		mustread(h, buf1)?;
    176 		if (buf1[0] == 0x0A) { // '\n' newline
    177 			break;
    178 		};
    179 		if (buf1[0] == 0x0) { // cannot contain NUL
    180 			return invalidtzif;
    181 		};
    182 		append(footerdata, buf1...);
    183 	};
    184 	const posix_extend = strings::dup(match (strings::fromutf8(footerdata)) {
    185 	case let s: str =>
    186 		yield s;
    187 	case encoding::utf8::invalid =>
    188 		return invalidtzif;
    189 	});
    190 
    191 	// assemble structured data
    192 
    193 	// assemble zones
    194 	let zones: []zone = [];
    195 	for (let i = 0z; i < typecnt; i += 1) {
    196 		const idx = i * 6;
    197 		const zone = zone { ... };
    198 
    199 		// offset
    200 		const zoff = endian::begetu32(zonedata[idx..idx + 4]): i32;
    201 		if (zoff == -2147483648) { // -2^31
    202 			return invalidtzif;
    203 		};
    204 		zone.zoff = zoff * time::SECOND;
    205 
    206 		// daylight saving time indicator
    207 		zone.dst = switch (zonedata[idx + 4]) {
    208 		case 1u8 =>
    209 			yield true;
    210 		case 0u8 =>
    211 			yield false;
    212 		case =>
    213 			return invalidtzif;
    214 		};
    215 
    216 		// abbreviation
    217 		const abbridx = zonedata[idx + 5];
    218 		if (abbridx < 0 || abbridx > (charcnt - 1)) {
    219 			return invalidtzif;
    220 		};
    221 		let bytes: []u8 = [];
    222 		for (let j = abbridx; j < len(abbrdata); j += 1) {
    223 			if (abbrdata[j] == 0x0) {
    224 				bytes = abbrdata[abbridx..j];
    225 				break;
    226 			};
    227 		};
    228 		if (len(bytes) == 0) { // no NUL encountered
    229 			return invalidtzif;
    230 		};
    231 		const abbr = match (strings::fromutf8(bytes)) {
    232 		case let s: str =>
    233 			yield s;
    234 		case encoding::utf8::invalid =>
    235 			return invalidtzif;
    236 		};
    237 		zone.abbr = strings::dup(abbr);
    238 
    239 		append(zones, zone);
    240 	};
    241 
    242 	// assemble transitions
    243 	let transitions: []transition = [];
    244 	for (let i = 0z; i < timecnt; i += 1) {
    245 		const zoneindex = zone_indicies[i]: int;
    246 		if (zoneindex < 0 || zoneindex > (typecnt: int - 1)) {
    247 			return invalidtzif;
    248 		};
    249 
    250 		const tx = transition {
    251 			when = time::instant {
    252 				sec = transition_times[i],
    253 				...
    254 			},
    255 			zoneindex = zoneindex,
    256 		};
    257 
    258 		// stdwalldata and normlocaldata have been omitted,
    259 		// until they show their utility.
    260 
    261 		append(transitions, tx);
    262 	};
    263 
    264 	// commit and return data
    265 	tz.zones = zones;
    266 	tz.transitions = transitions;
    267 	tz.posix_extend = posix_extend;
    268 };
    269 
    270 fn mustread(h: io::handle, buf: []u8) (void | invalidtzif | io::error) = {
    271 	match (io::readall(h, buf)) {
    272 	case let err: io::error =>
    273 		return err;
    274 	case io::EOF =>
    275 		return invalidtzif;
    276 	case size =>
    277 		return;
    278 	};
    279 };
    280 
    281 fn readbytes(
    282 	h: io::handle,
    283 	items: *[]u8,
    284 	n: size,
    285 ) (void | invalidtzif | io::error) = {
    286 	const buf: [1]u8 = [0];
    287 	for (let i = 0z; i < n; i += 1) {
    288 		mustread(h, buf)?;
    289 		const it = buf[0];
    290 		append(items, it);
    291 	};
    292 };
    293 
    294 fn readitems8(
    295 	h: io::handle,
    296 	items: *[]i64,
    297 	n: size,
    298 ) (void | invalidtzif | io::error) = {
    299 	const buf: [8]u8 = [0...];
    300 	for (let i = 0z; i < n; i += 1) {
    301 		mustread(h, buf)?;
    302 		const it = endian::begetu64(buf): i64;
    303 		append(items, it);
    304 	};
    305 };
    306 
    307 fn readitems4(
    308 	h: io::handle,
    309 	items: *[]i64,
    310 	n: size,
    311 ) (void | invalidtzif | io::error) = {
    312 	const buf: [4]u8 = [0...];
    313 	for (let i = 0z; i < n; i += 1) {
    314 		mustread(h, buf)?;
    315 		const it = endian::begetu32(buf): i64;
    316 		append(items, it);
    317 	};
    318 };