hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

tzdb.ha (7634B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use bufio;
      5 use bytes;
      6 use encoding::utf8;
      7 use endian;
      8 use fs;
      9 use io;
     10 use os;
     11 use path;
     12 use strings;
     13 use time;
     14 
     15 // Error concerning the Timezone database.
     16 export type tzdberror = !(invalidtzif | fs::error | io::error);
     17 
     18 // Invalid TZif data.
     19 export type invalidtzif = !void;
     20 
     21 // Finds, loads, and allocates a [[timezone]] from the system's Timezone
     22 // database (TZDB), and returns it as a [[locality]]. Each call returns a new
     23 // instance. The caller must free the return value; see [[timezone_free]].
     24 //
     25 // The system TZDB is normally located at [[TZDB_PATH]]. The timezone
     26 // filepath is resolved by appending the name argument to this prefix path.
     27 // If [name] is a full filepath (begins with '/'), it is used directly instead.
     28 //
     29 // All localities returned default to the [[utc]] [[timescale]] and
     30 // [[EARTH_DAY]] day-length.
     31 export fn tz(name: str) (locality | tzdberror) = {
     32 	const filepath =
     33 		if (!strings::hasprefix(name, TZDB_PATH))
     34 			path::init(TZDB_PATH, name)!
     35 		else
     36 			path::init(name)!;
     37 	const file = os::open(path::string(&filepath))?;
     38 
     39 	static let buf: [os::BUFSZ]u8 = [0...];
     40 	const bufstrm = bufio::init(file, buf, []);
     41 
     42 	let loc = alloc(timezone {
     43 		name = strings::dup(name),
     44 		timescale = &utc,
     45 		daylength = EARTH_DAY,
     46 		...
     47 	});
     48 	match (load_tzif(&bufstrm, loc)) {
     49 	case void =>
     50 		io::close(&bufstrm)?;
     51 		io::close(file)?;
     52 		return loc;
     53 	case invalidtzif =>
     54 		io::close(&bufstrm): void;
     55 		io::close(file): void;
     56 		return invalidtzif;
     57 	case let err: io::error =>
     58 		io::close(&bufstrm): void;
     59 		io::close(file): void;
     60 		return err;
     61 	};
     62 };
     63 
     64 // Loads data of the TZif "Time Zone Information Format", and initialises the
     65 // fields "zones", "transitions", and "posix_extend" of the given [[timezone]].
     66 //
     67 // See: https://datatracker.ietf.org/doc/html/rfc8536
     68 fn load_tzif(h: io::handle, tz: *timezone) (void | invalidtzif | io::error) = {
     69 	const buf1: [1]u8 = [0...];
     70 	const buf4: [4]u8 = [0...];
     71 	const buf8: [8]u8 = [0...];
     72 	const buf15: [15]u8 = [0...];
     73 
     74 	// test for magic "TZif"
     75 	mustread(h, buf4)?;
     76 	if (!bytes::equal(buf4, ['T', 'Z', 'i', 'f'])) {
     77 		return invalidtzif;
     78 	};
     79 
     80 	// read version
     81 	mustread(h, buf1)?;
     82 	const version = switch (buf1[0]) {
     83 	case 0 =>
     84 		yield 1;
     85 	case '2' =>
     86 		yield 2;
     87 	case '3' =>
     88 		yield 3;
     89 	case =>
     90 		return invalidtzif;
     91 	};
     92 
     93 	// skip padding
     94 	mustread(h, buf15)?;
     95 
     96 	// read counts
     97 	mustread(h, buf4)?; let isutcnt = endian::begetu32(buf4);
     98 	mustread(h, buf4)?; let isstdcnt = endian::begetu32(buf4);
     99 	mustread(h, buf4)?; let leapcnt = endian::begetu32(buf4);
    100 	mustread(h, buf4)?; let timecnt = endian::begetu32(buf4);
    101 	mustread(h, buf4)?; let typecnt = endian::begetu32(buf4);
    102 	mustread(h, buf4)?; let charcnt = endian::begetu32(buf4);
    103 
    104 	let is64 = false;
    105 	if (version > 1) {
    106 		is64 = true;
    107 
    108 		// skip to the version 2 data
    109 		const skip = (
    110 			// size of version 1 data block
    111 			timecnt * 4
    112 			+ timecnt
    113 			+ typecnt * 6
    114 			+ charcnt
    115 			+ leapcnt * 8
    116 			+ isstdcnt
    117 			+ isutcnt
    118 			// size of version 2 header
    119 			+ 20
    120 		);
    121 		for (let i = 0z; i < skip; i += 1) {
    122 			mustread(h, buf1)?;
    123 		};
    124 
    125 		// read version 2 counts
    126 		mustread(h, buf4)?; isutcnt = endian::begetu32(buf4);
    127 		mustread(h, buf4)?; isstdcnt = endian::begetu32(buf4);
    128 		mustread(h, buf4)?; leapcnt = endian::begetu32(buf4);
    129 		mustread(h, buf4)?; timecnt = endian::begetu32(buf4);
    130 		mustread(h, buf4)?; typecnt = endian::begetu32(buf4);
    131 		mustread(h, buf4)?; charcnt = endian::begetu32(buf4);
    132 	};
    133 
    134 	if (typecnt == 0 || charcnt == 0) {
    135 		return invalidtzif;
    136 	};
    137 
    138 	if (!(isutcnt == 0 || isutcnt == typecnt)
    139 			&& (isstdcnt == 0 && isstdcnt == typecnt)) {
    140 		return invalidtzif;
    141 	};
    142 
    143 	const timesz = if (is64) 8 else 4;
    144 
    145 	// read data
    146 
    147 	const transition_times: []i64 = [];
    148 	if (is64) {
    149 		readitems8(h, &transition_times, timecnt)?;
    150 	} else {
    151 		readitems4(h, &transition_times, timecnt)?;
    152 	};
    153 	defer free(transition_times);
    154 	const zone_indicies: []u8 = [];
    155 	readbytes(h, &zone_indicies, timecnt)?;
    156 	defer free(zone_indicies);
    157 	const zonedata: []u8 = [];
    158 	readbytes(h, &zonedata, typecnt * 6)?;
    159 	defer free(zonedata);
    160 	const abbrdata: []u8 = [];
    161 	readbytes(h, &abbrdata, charcnt)?;
    162 	defer free(abbrdata);
    163 	const leapdata: []u8 = [];
    164 	readbytes(h, &leapdata, leapcnt * (timesz: u32 + 4))?;
    165 	defer free(leapdata);
    166 	const stdwalldata: []u8 = [];
    167 	readbytes(h, &stdwalldata, isstdcnt)?;
    168 	defer free(stdwalldata);
    169 	const normlocaldata: []u8 = [];
    170 	readbytes(h, &normlocaldata, isutcnt)?;
    171 	defer free(normlocaldata);
    172 	// read footer
    173 
    174 	let footerdata: []u8 = [];
    175 	defer free(footerdata);
    176 	mustread(h, buf1)?;
    177 	if (buf1[0] != 0x0A) { // '\n' newline
    178 		return invalidtzif;
    179 	};
    180 	for (true) {
    181 		mustread(h, buf1)?;
    182 		if (buf1[0] == 0x0A) { // '\n' newline
    183 			break;
    184 		};
    185 		if (buf1[0] == 0x0) { // cannot contain NUL
    186 			return invalidtzif;
    187 		};
    188 		append(footerdata, buf1...);
    189 	};
    190 	const posix_extend = strings::dup(match (strings::fromutf8(footerdata)) {
    191 	case let s: str =>
    192 		yield s;
    193 	case utf8::invalid =>
    194 		return invalidtzif;
    195 	});
    196 
    197 	// assemble structured data
    198 
    199 	// assemble zones
    200 	let zones: []zone = [];
    201 	for (let i = 0z; i < typecnt; i += 1) {
    202 		const idx = i * 6;
    203 		const zone = zone { ... };
    204 
    205 		// offset
    206 		const zoff = endian::begetu32(zonedata[idx..idx + 4]): i32;
    207 		if (zoff == -2147483648) { // -2^31
    208 			return invalidtzif;
    209 		};
    210 		zone.zoff = zoff * time::SECOND;
    211 
    212 		// daylight saving time indicator
    213 		zone.dst = switch (zonedata[idx + 4]) {
    214 		case 1u8 =>
    215 			yield true;
    216 		case 0u8 =>
    217 			yield false;
    218 		case =>
    219 			return invalidtzif;
    220 		};
    221 
    222 		// abbreviation
    223 		const abbridx = zonedata[idx + 5];
    224 		if (abbridx < 0 || abbridx > (charcnt - 1)) {
    225 			return invalidtzif;
    226 		};
    227 		let bytes: []u8 = [];
    228 		for (let j = abbridx; j < len(abbrdata); j += 1) {
    229 			if (abbrdata[j] == 0x0) {
    230 				bytes = abbrdata[abbridx..j];
    231 				break;
    232 			};
    233 		};
    234 		if (len(bytes) == 0) { // no NUL encountered
    235 			return invalidtzif;
    236 		};
    237 		const abbr = match (strings::fromutf8(bytes)) {
    238 		case let s: str =>
    239 			yield s;
    240 		case utf8::invalid =>
    241 			return invalidtzif;
    242 		};
    243 		zone.abbr = strings::dup(abbr);
    244 
    245 		append(zones, zone);
    246 	};
    247 
    248 	// assemble transitions
    249 	let transitions: []transition = [];
    250 	for (let i = 0z; i < timecnt; i += 1) {
    251 		const zoneindex = zone_indicies[i];
    252 		if (zoneindex < 0 || zoneindex > (typecnt - 1)) {
    253 			return invalidtzif;
    254 		};
    255 
    256 		const tx = transition {
    257 			when = time::instant {
    258 				sec = transition_times[i],
    259 				...
    260 			},
    261 			zoneindex = zoneindex,
    262 		};
    263 
    264 		// stdwalldata and normlocaldata have been omitted,
    265 		// until they show their utility.
    266 
    267 		append(transitions, tx);
    268 	};
    269 
    270 	// commit and return data
    271 	tz.zones = zones;
    272 	tz.transitions = transitions;
    273 	tz.posix_extend = posix_extend;
    274 };
    275 
    276 fn mustread(h: io::handle, buf: []u8) (void | invalidtzif | io::error) = {
    277 	match (io::readall(h, buf)) {
    278 	case let err: io::error =>
    279 		return err;
    280 	case io::EOF =>
    281 		return invalidtzif;
    282 	case size =>
    283 		return;
    284 	};
    285 };
    286 
    287 fn readbytes(
    288 	h: io::handle,
    289 	items: *[]u8,
    290 	n: size,
    291 ) (void | invalidtzif | io::error) = {
    292 	const buf: [1]u8 = [0];
    293 	for (let i = 0z; i < n; i += 1) {
    294 		mustread(h, buf)?;
    295 		const it = buf[0];
    296 		append(items, it);
    297 	};
    298 };
    299 
    300 fn readitems8(
    301 	h: io::handle,
    302 	items: *[]i64,
    303 	n: size,
    304 ) (void | invalidtzif | io::error) = {
    305 	const buf: [8]u8 = [0...];
    306 	for (let i = 0z; i < n; i += 1) {
    307 		mustread(h, buf)?;
    308 		const it = endian::begetu64(buf): i64;
    309 		append(items, it);
    310 	};
    311 };
    312 
    313 fn readitems4(
    314 	h: io::handle,
    315 	items: *[]i64,
    316 	n: size,
    317 ) (void | invalidtzif | io::error) = {
    318 	const buf: [4]u8 = [0...];
    319 	for (let i = 0z; i < n; i += 1) {
    320 		mustread(h, buf)?;
    321 		const it = endian::begetu32(buf): i64;
    322 		append(items, it);
    323 	};
    324 };