hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

tzdb.ha (7845B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use bufio;
      5 use bytes;
      6 use encoding::utf8;
      7 use endian;
      8 use errors;
      9 use fs;
     10 use io;
     11 use os;
     12 use path;
     13 use strings;
     14 use time;
     15 
     16 // Error concerning the Timezone database.
     17 export type tzdberror = !(invalidtzif | fs::error | io::error);
     18 
     19 // Invalid TZif data.
     20 export type invalidtzif = !void;
     21 
     22 // Finds, loads, and allocates a [[timezone]] from the system's Timezone
     23 // database (TZDB), and returns it as a [[locality]]. Each call returns a new
     24 // instance. The caller must free the return value; see [[timezone_free]].
     25 //
     26 // The system TZDB is normally located at [[TZDB_PATH]]. The timezone
     27 // filepath is resolved by appending the name argument to this prefix path.
     28 // If [name] is a full filepath (begins with '/'), it is used directly instead.
     29 //
     30 // All localities returned default to the [[utc]] [[timescale]] and
     31 // [[EARTH_DAY]] day-length.
     32 export fn tz(name: str) (locality | tzdberror) = {
     33 	const filepath = if (strings::hasprefix(name, "/")) {
     34 		yield path::init(name);
     35 	} else {
     36 		yield path::init(TZDB_PATH, name);
     37 	};
     38 	const filepath = match (filepath) {
     39 	case let buf: path::path =>
     40 		yield buf;
     41 	case let err: path::error =>
     42 		assert(err is path::too_long);
     43 		return errors::noentry: fs::error;
     44 	};
     45 	const file = os::open(path::string(&filepath))?;
     46 
     47 	static let buf: [os::BUFSZ]u8 = [0...];
     48 	const bufstrm = bufio::init(file, buf, []);
     49 
     50 	let loc = alloc(timezone {
     51 		name = strings::dup(name),
     52 		timescale = &utc,
     53 		daylength = EARTH_DAY,
     54 		...
     55 	})!;
     56 	match (load_tzif(&bufstrm, loc)) {
     57 	case void =>
     58 		io::close(&bufstrm)?;
     59 		io::close(file)?;
     60 		return loc;
     61 	case invalidtzif =>
     62 		io::close(&bufstrm): void;
     63 		io::close(file): void;
     64 		return invalidtzif;
     65 	case let err: io::error =>
     66 		io::close(&bufstrm): void;
     67 		io::close(file): void;
     68 		return err;
     69 	};
     70 };
     71 
     72 // Loads data of the TZif "Time Zone Information Format", and initialises the
     73 // fields "zones", "transitions", and "posix_extend" of the given [[timezone]].
     74 //
     75 // See: https://datatracker.ietf.org/doc/html/rfc8536
     76 fn load_tzif(h: io::handle, tz: *timezone) (void | invalidtzif | io::error) = {
     77 	const buf1: [1]u8 = [0...];
     78 	const buf4: [4]u8 = [0...];
     79 	const buf8: [8]u8 = [0...];
     80 	const buf15: [15]u8 = [0...];
     81 
     82 	// test for magic "TZif"
     83 	mustread(h, buf4)?;
     84 	if (!bytes::equal(buf4, ['T', 'Z', 'i', 'f'])) {
     85 		return invalidtzif;
     86 	};
     87 
     88 	// read version
     89 	mustread(h, buf1)?;
     90 	const version = switch (buf1[0]) {
     91 	case 0 =>
     92 		yield 1;
     93 	case '2' =>
     94 		yield 2;
     95 	case '3' =>
     96 		yield 3;
     97 	case =>
     98 		return invalidtzif;
     99 	};
    100 
    101 	// skip padding
    102 	mustread(h, buf15)?;
    103 
    104 	// read counts
    105 	mustread(h, buf4)?; let isutcnt = endian::begetu32(buf4);
    106 	mustread(h, buf4)?; let isstdcnt = endian::begetu32(buf4);
    107 	mustread(h, buf4)?; let leapcnt = endian::begetu32(buf4);
    108 	mustread(h, buf4)?; let timecnt = endian::begetu32(buf4);
    109 	mustread(h, buf4)?; let typecnt = endian::begetu32(buf4);
    110 	mustread(h, buf4)?; let charcnt = endian::begetu32(buf4);
    111 
    112 	let is64 = false;
    113 	if (version > 1) {
    114 		is64 = true;
    115 
    116 		// skip to the version 2 data
    117 		const skip = (
    118 			// size of version 1 data block
    119 			timecnt * 4
    120 			+ timecnt
    121 			+ typecnt * 6
    122 			+ charcnt
    123 			+ leapcnt * 8
    124 			+ isstdcnt
    125 			+ isutcnt
    126 			// size of version 2 header
    127 			+ 20
    128 		);
    129 		for (let i = 0z; i < skip; i += 1) {
    130 			mustread(h, buf1)?;
    131 		};
    132 
    133 		// read version 2 counts
    134 		mustread(h, buf4)?; isutcnt = endian::begetu32(buf4);
    135 		mustread(h, buf4)?; isstdcnt = endian::begetu32(buf4);
    136 		mustread(h, buf4)?; leapcnt = endian::begetu32(buf4);
    137 		mustread(h, buf4)?; timecnt = endian::begetu32(buf4);
    138 		mustread(h, buf4)?; typecnt = endian::begetu32(buf4);
    139 		mustread(h, buf4)?; charcnt = endian::begetu32(buf4);
    140 	};
    141 
    142 	if (typecnt == 0 || charcnt == 0) {
    143 		return invalidtzif;
    144 	};
    145 
    146 	if (!(isutcnt == 0 || isutcnt == typecnt)
    147 			&& (isstdcnt == 0 && isstdcnt == typecnt)) {
    148 		return invalidtzif;
    149 	};
    150 
    151 	const timesz = if (is64) 8 else 4;
    152 
    153 	// read data
    154 
    155 	const transition_times: []i64 = [];
    156 	if (is64) {
    157 		readitems8(h, &transition_times, timecnt)?;
    158 	} else {
    159 		readitems4(h, &transition_times, timecnt)?;
    160 	};
    161 	defer free(transition_times);
    162 	const zone_indicies: []u8 = [];
    163 	readbytes(h, &zone_indicies, timecnt)?;
    164 	defer free(zone_indicies);
    165 	const zonedata: []u8 = [];
    166 	readbytes(h, &zonedata, typecnt * 6)?;
    167 	defer free(zonedata);
    168 	const abbrdata: []u8 = [];
    169 	readbytes(h, &abbrdata, charcnt)?;
    170 	defer free(abbrdata);
    171 	const leapdata: []u8 = [];
    172 	readbytes(h, &leapdata, leapcnt * (timesz: u32 + 4))?;
    173 	defer free(leapdata);
    174 	const stdwalldata: []u8 = [];
    175 	readbytes(h, &stdwalldata, isstdcnt)?;
    176 	defer free(stdwalldata);
    177 	const normlocaldata: []u8 = [];
    178 	readbytes(h, &normlocaldata, isutcnt)?;
    179 	defer free(normlocaldata);
    180 	// read footer
    181 
    182 	let footerdata: []u8 = [];
    183 	defer free(footerdata);
    184 	mustread(h, buf1)?;
    185 	if (buf1[0] != 0x0A) { // '\n' newline
    186 		return invalidtzif;
    187 	};
    188 	for (true) {
    189 		mustread(h, buf1)?;
    190 		if (buf1[0] == 0x0A) { // '\n' newline
    191 			break;
    192 		};
    193 		if (buf1[0] == 0x0) { // cannot contain NUL
    194 			return invalidtzif;
    195 		};
    196 		append(footerdata, buf1...)!;
    197 	};
    198 	const posix_extend = strings::dup(match (strings::fromutf8(footerdata)) {
    199 	case let s: str =>
    200 		yield s;
    201 	case utf8::invalid =>
    202 		return invalidtzif;
    203 	});
    204 
    205 	// assemble structured data
    206 
    207 	// assemble zones
    208 	let zones: []zone = [];
    209 	for (let i = 0z; i < typecnt; i += 1) {
    210 		const idx = i * 6;
    211 		const zone = zone { ... };
    212 
    213 		// offset
    214 		const zoff = endian::begetu32(zonedata[idx..idx + 4]): i32;
    215 		if (zoff == -2147483648) { // -2^31
    216 			return invalidtzif;
    217 		};
    218 		zone.zoff = zoff * time::SECOND;
    219 
    220 		// daylight saving time indicator
    221 		zone.dst = switch (zonedata[idx + 4]) {
    222 		case 1u8 =>
    223 			yield true;
    224 		case 0u8 =>
    225 			yield false;
    226 		case =>
    227 			return invalidtzif;
    228 		};
    229 
    230 		// abbreviation
    231 		const abbridx = zonedata[idx + 5];
    232 		if (abbridx < 0 || abbridx > (charcnt - 1)) {
    233 			return invalidtzif;
    234 		};
    235 		let bytes: []u8 = [];
    236 		for (let j = abbridx; j < len(abbrdata); j += 1) {
    237 			if (abbrdata[j] == 0x0) {
    238 				bytes = abbrdata[abbridx..j];
    239 				break;
    240 			};
    241 		};
    242 		if (len(bytes) == 0) { // no NUL encountered
    243 			return invalidtzif;
    244 		};
    245 		const abbr = match (strings::fromutf8(bytes)) {
    246 		case let s: str =>
    247 			yield s;
    248 		case utf8::invalid =>
    249 			return invalidtzif;
    250 		};
    251 		zone.abbr = strings::dup(abbr);
    252 
    253 		append(zones, zone)!;
    254 	};
    255 
    256 	// assemble transitions
    257 	let transitions: []transition = [];
    258 	for (let i = 0z; i < timecnt; i += 1) {
    259 		const zoneindex = zone_indicies[i];
    260 		if (zoneindex < 0 || zoneindex > (typecnt - 1)) {
    261 			return invalidtzif;
    262 		};
    263 
    264 		const tx = transition {
    265 			when = time::instant {
    266 				sec = transition_times[i],
    267 				...
    268 			},
    269 			zoneindex = zoneindex,
    270 		};
    271 
    272 		// stdwalldata and normlocaldata have been omitted,
    273 		// until they show their utility.
    274 
    275 		append(transitions, tx)!;
    276 	};
    277 
    278 	// commit and return data
    279 	tz.zones = zones;
    280 	tz.transitions = transitions;
    281 	tz.posix_extend = posix_extend;
    282 };
    283 
    284 fn mustread(h: io::handle, buf: []u8) (void | invalidtzif | io::error) = {
    285 	match (io::readall(h, buf)) {
    286 	case let err: io::error =>
    287 		return err;
    288 	case io::EOF =>
    289 		return invalidtzif;
    290 	case size =>
    291 		return;
    292 	};
    293 };
    294 
    295 fn readbytes(
    296 	h: io::handle,
    297 	items: *[]u8,
    298 	n: size,
    299 ) (void | invalidtzif | io::error) = {
    300 	const buf: [1]u8 = [0];
    301 	for (let i = 0z; i < n; i += 1) {
    302 		mustread(h, buf)?;
    303 		const it = buf[0];
    304 		append(items, it)!;
    305 	};
    306 };
    307 
    308 fn readitems8(
    309 	h: io::handle,
    310 	items: *[]i64,
    311 	n: size,
    312 ) (void | invalidtzif | io::error) = {
    313 	const buf: [8]u8 = [0...];
    314 	for (let i = 0z; i < n; i += 1) {
    315 		mustread(h, buf)?;
    316 		const it = endian::begetu64(buf): i64;
    317 		append(items, it)!;
    318 	};
    319 };
    320 
    321 fn readitems4(
    322 	h: io::handle,
    323 	items: *[]i64,
    324 	n: size,
    325 ) (void | invalidtzif | io::error) = {
    326 	const buf: [4]u8 = [0...];
    327 	for (let i = 0z; i < n; i += 1) {
    328 		mustread(h, buf)?;
    329 		const it = endian::begetu32(buf): i64;
    330 		append(items, it)!;
    331 	};
    332 };