hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

parse.ha (14995B)


      1 // License: MPL-2.0
      2 // (c) 2021-2022 Byron Torres <b@torresjrjr.com>
      3 // (c) 2022 Drew DeVault <sir@cmpwn.com>
      4 // (c) 2021-2022 Vlad-Stefan Harbuz <vlad@vladh.net>
      5 use ascii;
      6 use errors;
      7 use io;
      8 use strconv;
      9 use strings;
     10 use strio;
     11 use time;
     12 use time::chrono;
     13 
     14 type failure = !void;
     15 
     16 // A parsing error occurred. If appropriate, the offending format specifier is
     17 // stored. A null rune represents all other error cases.
     18 export type parsefail = !rune;
     19 
     20 // Parses a date/time string into a [[virtual]], according to a layout format
     21 // string with specifiers as documented under [[format]]. Partial, sequential,
     22 // aggregative parsing is possible.
     23 //
     24 // 	datetime::parse(&v, "%Y-%m-%d",    "2019-12-27");
     25 // 	datetime::parse(&v, "%H:%M:%S.%N", "22:07:08.000000000");
     26 // 	datetime::parse(&v, "%z %Z %L",    "+0100 CET Europe/Amsterdam");
     27 //
     28 // Parse will return parsefail, if an invalid format specifier is encountered
     29 // or if given string 's' does not match the layout.
     30 export fn parse(v: *virtual, layout: str, s: str) (void | parsefail) = {
     31 	const liter = strings::iter(layout);
     32 	const siter = strings::iter(s);
     33 	let escaped = false;
     34 
     35 	for (true) {
     36 		const lr: rune = match (strings::next(&liter)) {
     37 		case void =>
     38 			break;
     39 		case let lr: rune =>
     40 			yield lr;
     41 		};
     42 
     43 		if (!escaped && lr == '%') {
     44 			escaped = true;
     45 			continue;
     46 		};
     47 
     48 		if (!escaped) {
     49 			const sr = match (strings::next(&siter)) {
     50 			case void =>
     51 				return '\x00';
     52 			case let sr: rune =>
     53 				yield sr;
     54 			};
     55 			if (sr != lr) {
     56 				return '\x00';
     57 			};
     58 			continue;
     59 		};
     60 
     61 		escaped = false;
     62 
     63 		match (parse_specifier(v, &siter, lr)) {
     64 		case void => void;
     65 		case failure =>
     66 			return lr;
     67 		};
     68 	};
     69 
     70 	return void;
     71 };
     72 
     73 fn parse_specifier(
     74 	v: *virtual,
     75 	iter: *strings::iterator,
     76 	lr: rune,
     77 ) (void | failure) = {
     78 	switch (lr) {
     79 	case 'a' => v.weekday =
     80 		scan_for(iter, WEEKDAYS_SHORT...)?;
     81 	case 'A' => v.weekday =
     82 		scan_for(iter, WEEKDAYS...)?;
     83 	case 'b' => v.month =
     84 		scan_for(iter, MONTHS_SHORT...)? + 1;
     85 	case 'B' => v.month =
     86 		scan_for(iter, MONTHS...)? + 1;
     87 	case 'd' => v.day =
     88 		scan_int(iter, 2, false)?;
     89 	case 'F' =>
     90 		v.year = scan_int(iter, 4, false)?;
     91 		eat_rune(iter, '-')?;
     92 		v.month = scan_int(iter, 2, false)?;
     93 		eat_rune(iter, '-')?;
     94 		v.day = scan_int(iter, 2, false)?;
     95 	case 'H' => v.hour =
     96 		scan_int(iter, 2, false)?;
     97 	case 'I' => v.halfhour =
     98 		scan_int(iter, 2, false)?;
     99 	case 'j' => v.yearday =
    100 		scan_int(iter, 3, false)?;
    101 	case 'L' => v.locname =
    102 		scan_str(iter)?;
    103 	case 'm' => v.month =
    104 		scan_int(iter, 2, false)?;
    105 	case 'M' => v.minute =
    106 		scan_int(iter, 2, false)?;
    107 	case 'N' => v.nanosecond =
    108 		scan_int(iter, 9, true)?;
    109 	case 'p' => v.ampm = // AM=false PM=true
    110 		scan_for(iter, "AM", "PM", "am", "pm")? % 2 == 1;
    111 	case 'S' => v.second =
    112 		scan_int(iter, 2, false)?;
    113 	case 'T' =>
    114 		v.hour = scan_int(iter, 2, false)?;
    115 		eat_rune(iter, ':')?;
    116 		v.minute = scan_int(iter, 2, false)?;
    117 		eat_rune(iter, ':')?;
    118 		v.second = scan_int(iter, 2, false)?;
    119 	case 'u' => v.weekday =
    120 		scan_int(iter, 1, false)? - 1;
    121 	case 'U' => v.week =
    122 		scan_int(iter, 2, false)?;
    123 	case 'w' => v.weekday =
    124 		scan_int(iter, 1, false)? - 1;
    125 	case 'W' => v.week =
    126 		scan_int(iter, 2, false)?;
    127 	case 'Y' => v.year =
    128 		scan_int(iter, 4, false)?;
    129 	case 'z' => v.zoff =
    130 		scan_zo(iter)?;
    131 	case 'Z' => v.zabbr =
    132 		scan_str(iter)?;
    133 	case '%' =>
    134 		eat_rune(iter, '%')?;
    135 	case =>
    136 		return failure;
    137 	};
    138 };
    139 
    140 fn eat_rune(iter: *strings::iterator, needle: rune) (uint | failure) = {
    141 	const rn = match (strings::next(iter)) {
    142 	case void =>
    143 		return failure;
    144 	case let rn: rune =>
    145 		yield rn;
    146 	};
    147 	if (rn == needle) {
    148 		return 1;
    149 	} else {
    150 		strings::prev(iter);
    151 		return 0;
    152 	};
    153 };
    154 
    155 // Scans the iterator for a given list of strings.
    156 // Returns the list index of the matched string.
    157 fn scan_for(iter: *strings::iterator, list: str...) (int | failure) = {
    158 	const name = strings::iterstr(iter);
    159 	if (len(name) == 0) {
    160 		return failure;
    161 	};
    162 	for(let i = 0z; i < len(list); i += 1) {
    163 		if (strings::hasprefix(name, list[i])) {
    164 			// Consume name
    165 			for (let j = 0z; j < len(list[i]); j += 1) {
    166 				strings::next(iter);
    167 			};
    168 			return i: int;
    169 		};
    170 	};
    171 	return failure;
    172 };
    173 
    174 // Scans the iterator upto n consecutive numeric digits.
    175 // Returns the resulting int.
    176 // If pad is true, the number is right-padded with zeroes upto n digits.
    177 fn scan_int(iter: *strings::iterator, n: size, pad: bool) (int | failure) = {
    178 	let copy = *iter;
    179 	for (let i = 0z; i < n; i += 1) {
    180 		let rn: rune = match (strings::next(iter)) {
    181 		case void =>
    182 			break;
    183 		case let rn: rune =>
    184 			yield rn;
    185 		};
    186 		if (!ascii::isdigit(rn)) {
    187 			strings::prev(iter);
    188 			break;
    189 		};
    190 	};
    191 	const s = strings::slice(&copy, iter);
    192 	match (strconv::stoi(s)) {
    193 	case let num: int =>
    194 		for (let i = 0z; i < n - len(s); i += 1) {
    195 			num *= 10;
    196 		};
    197 		return num;
    198 	case =>
    199 		return failure;
    200 	};
    201 };
    202 
    203 // Scans and parses zone offsets of the form:
    204 //
    205 // 	Z
    206 // 	z
    207 // 	+nn:nn
    208 // 	+nnnn
    209 // 	-nn:nn
    210 // 	-nnnn
    211 //
    212 fn scan_zo(iter: *strings::iterator) (time::duration | failure) = {
    213 	const r = match (strings::next(iter)) {
    214 	case void =>
    215 		return failure;
    216 	case let r: rune =>
    217 		yield r;
    218 	};
    219 	if (r == 'Z' || r == 'z') {
    220 		return 0;
    221 	};
    222 	let zo = scan_int(iter, 2, false)? * time::HOUR;
    223 	match (strings::next(iter)) {
    224 	case void => void;
    225 	case let r: rune =>
    226 		if (r == ':') {
    227 			strings::next(iter);
    228 		};
    229 	};
    230 	zo += scan_int(iter, 2, false)? * time::MINUTE;
    231 	if (r == '-') {
    232 		zo *= -1;
    233 	};
    234 	return zo;
    235 };
    236 
    237 // Scans and parses locality names, made of printable characters.
    238 fn scan_str(iter: *strings::iterator) (str | failure) = {
    239 	let copy = *iter;
    240 	for (true) {
    241 		match (strings::next(iter)) {
    242 		case void =>
    243 			break;
    244 		case let rn: rune =>
    245 			if (!ascii::isgraph(rn)) {
    246 				strings::prev(iter);
    247 				break;
    248 			};
    249 		};
    250 	};
    251 	return strings::slice(&copy, iter);
    252 };
    253 
    254 @test fn parse() void = {
    255 	let v = newvirtual();
    256 	assert(parse(&v, "foo", "foo") is void, "none: parsefail");
    257 	assert(v.zone        == null, "none: non-null zone");
    258 	assert(v.date        is void, "none: non-void date");
    259 	assert(v.time        is void, "none: non-void time");
    260 	assert(v.era         is void, "none: non-void era");
    261 	assert(v.year        is void, "none: non-void year");
    262 	assert(v.month       is void, "none: non-void month");
    263 	assert(v.day         is void, "none: non-void day");
    264 	assert(v.yearday     is void, "none: non-void yearday");
    265 	assert(v.isoweekyear is void, "none: non-void isoweekyear");
    266 	assert(v.isoweek     is void, "none: non-void isoweek");
    267 	assert(v.week        is void, "none: non-void week");
    268 	assert(v.sundayweek  is void, "none: non-void sundayweek");
    269 	assert(v.weekday     is void, "none: non-void weekday");
    270 	assert(v.hour        is void, "none: non-void hour");
    271 	assert(v.minute      is void, "none: non-void minute");
    272 	assert(v.second      is void, "none: non-void second");
    273 	assert(v.nanosecond  is void, "none: non-void nanosecond");
    274 	assert(v.vloc        is void, "none: non-void vloc");
    275 	assert(v.locname     is void, "none: non-void locname");
    276 	assert(v.zoff        is void, "none: non-void zoff");
    277 	assert(v.zabbr       is void, "none: non-void zabbr");
    278 	assert(v.halfhour    is void, "none: non-void halfhour");
    279 	assert(v.ampm        is void, "none: non-void ampm");
    280 
    281 	let v = newvirtual();
    282 	assert(parse(&v, "%a", "Fri") is void                , "%a: parsefail");
    283 	assert(v.weekday is int                              , "%a: void");
    284 	assert(v.weekday as int == 4                         , "%a: incorrect");
    285 
    286 	let v = newvirtual();
    287 	assert(parse(&v, "%A", "Friday") is void             , "%A: parsefail");
    288 	assert(v.weekday is int                              , "%A: void");
    289 	assert(v.weekday as int == 4                         , "%A: incorrect");
    290 
    291 	let v = newvirtual();
    292 	assert(parse(&v, "%b", "Jan") is void                , "%b: parsefail");
    293 	assert(v.month is int                                , "%b: void");
    294 	assert(v.month as int == 1                           , "%b: incorrect");
    295 
    296 	let v = newvirtual();
    297 	assert(parse(&v, "%B", "January") is void   ,         "%B: parsefail");
    298 	assert(v.month is int                                , "%B: void");
    299 	assert(v.month as int == 1                           , "%B: incorrect");
    300 
    301 	let v = newvirtual();
    302 	assert(parse(&v, "%d", "27") is void                 , "%d: parsefail");
    303 	assert(v.day is int                                  , "%d: void");
    304 	assert(v.day as int == 27                            , "%d: incorrect");
    305 
    306 	let v = newvirtual();
    307 	assert(parse(&v, "%F", "2012-10-01") is void         , "%d: parsefail");
    308 	assert(v.year is int                                 , "%d: void");
    309 	assert(v.year as int == 2012                         , "%d: incorrect");
    310 	assert(v.month is int                                , "%d: void");
    311 	assert(v.month as int == 10                          , "%d: incorrect");
    312 	assert(v.day is int                                  , "%d: void");
    313 	assert(v.day as int == 1                             , "%d: incorrect");
    314 
    315 	let v = newvirtual();
    316 	assert(parse(&v, "%H", "22") is void                 , "%H: parsefail");
    317 	assert(v.hour is int                                 , "%H: void");
    318 	assert(v.hour as int == 22                           , "%H: incorrect");
    319 
    320 	let v = newvirtual();
    321 	assert(parse(&v, "%I", "10") is void                 , "%I: parsefail");
    322 	assert(v.halfhour is int                             , "%I: void");
    323 	assert(v.halfhour as int == 10                       , "%I: incorrect");
    324 
    325 	let v = newvirtual();
    326 	assert(parse(&v, "%j", "361") is void                , "%j: parsefail");
    327 	assert(v.yearday is int                              , "%j: void");
    328 	assert(v.yearday as int == 361                       , "%j: incorrect");
    329 
    330 	let v = newvirtual();
    331 	assert(parse(&v, "%L", "Europe/Amsterdam") is void   , "%L: parsefail");
    332 	assert(v.locname is str                              , "%L: void");
    333 	assert(v.locname as str == "Europe/Amsterdam"        , "%L: incorrect");
    334 
    335 	let v = newvirtual();
    336 	assert(parse(&v, "%m", "12") is void                 , "%m: parsefail");
    337 	assert(v.month is int                                , "%m: void");
    338 	assert(v.month as int == 12                          , "%m: incorrect");
    339 
    340 	let v = newvirtual();
    341 	assert(parse(&v, "%M", "07") is void                 , "%M: parsefail");
    342 	assert(v.minute is int                               , "%M: void");
    343 	assert(v.minute as int == 7                          , "%M: incorrect");
    344 
    345 	let v = newvirtual();
    346 	assert(parse(&v, "%N", "123456789") is void          , "%N: parsefail");
    347 	assert(v.nanosecond is int                           , "%N: void");
    348 	assert(v.nanosecond as int == 123456789              , "%N: incorrect");
    349 
    350 	let v = newvirtual();
    351 	assert(parse(&v, "%p", "PM") is void                 , "%p: parsefail");
    352 	assert(v.ampm is bool                                , "%p: void");
    353 	assert(v.ampm as bool == true                        , "%p: incorrect");
    354 
    355 	let v = newvirtual();
    356 	assert(parse(&v, "%S", "08") is void                 , "%S: parsefail");
    357 	assert(v.second is int                               , "%S: void");
    358 	assert(v.second as int == 8                          , "%S: incorrect");
    359 
    360 	let v = newvirtual();
    361 	assert(parse(&v, "%T", "18:42:05") is void           , "%d: parsefail");
    362 	assert(v.hour is int                                 , "%d: void");
    363 	assert(v.hour as int == 18                           , "%d: incorrect");
    364 	assert(v.minute is int                               , "%d: void");
    365 	assert(v.minute as int == 42                         , "%d: incorrect");
    366 	assert(v.second is int                               , "%d: void");
    367 	assert(v.second as int == 5                          , "%d: incorrect");
    368 
    369 	let v = newvirtual();
    370 	assert(parse(&v, "%u", "5") is void                  , "%u: parsefail");
    371 	assert(v.weekday is int                              , "%u: void");
    372 	assert(v.weekday as int == 4                         , "%u: incorrect");
    373 
    374 	let v = newvirtual();
    375 	assert(parse(&v, "%U", "51") is void                 , "%U: parsefail");
    376 	assert(v.week is int                                 , "%U: void");
    377 	assert(v.week as int == 51                           , "%U: incorrect");
    378 
    379 	let v = newvirtual();
    380 	assert(parse(&v, "%w", "5") is void                  , "%w: parsefail");
    381 	assert(v.weekday is int                              , "%w: void");
    382 	assert(v.weekday as int == 4                         , "%w: incorrect");
    383 
    384 	let v = newvirtual();
    385 	assert(parse(&v, "%W", "51") is void                 , "%W: parsefail");
    386 	assert(v.week is int                                 , "%W: void");
    387 	assert(v.week as int == 51                           , "%W: incorrect");
    388 
    389 	let v = newvirtual();
    390 	assert(parse(&v, "%Y", "2019") is void               , "%Y: parsefail");
    391 	assert(v.year is int                                 , "%Y: void");
    392 	assert(v.year as int == 2019                         , "%Y: incorrect");
    393 
    394 	let v = newvirtual();
    395 	assert(parse(&v, "%z", "+0100") is void              , "%z: parsefail");
    396 	assert(v.zoff is i64                                 , "%z: void");
    397 	assert(v.zoff as i64 == 1 * time::HOUR               , "%z: incorrect");
    398 	let v = newvirtual();
    399 	assert(parse(&v, "%z", "+01:00") is void             , "%z: parsefail");
    400 	assert(v.zoff is i64                                 , "%z: void");
    401 	assert(v.zoff as i64 == 1 * time::HOUR               , "%z: incorrect");
    402 
    403 	let v = newvirtual();
    404 	assert(parse(&v, "%Z", "CET") is void                , "%Z: parsefail");
    405 	assert(v.zabbr is str                                , "%Z: void");
    406 	assert(v.zabbr as str == "CET"                       , "%Z: incorrect");
    407 
    408 	let v = newvirtual();
    409 	assert((
    410 		parse(&v,
    411 			"%Y-%m-%d %H:%M:%S.%N %z %Z %L",
    412 			"2038-01-19 03:14:07.000000000 +0000 UTC UTC",
    413 		)
    414 		is void
    415 	),
    416 		"test 1: parsefail"
    417 	);
    418 	assert(v.year       is int         , "test 1: year void");
    419 	assert(v.year       as int ==  2038, "test 1: year incorrect");
    420 	assert(v.month      is int         , "test 1: month void");
    421 	assert(v.month      as int ==     1, "test 1: month incorrect");
    422 	assert(v.day        is int         , "test 1: day void");
    423 	assert(v.day        as int ==    19, "test 1: day incorrect");
    424 	assert(v.hour       is int         , "test 1: hour void");
    425 	assert(v.hour       as int ==     3, "test 1: hour incorrect");
    426 	assert(v.minute     is int         , "test 1: minute void");
    427 	assert(v.minute     as int ==    14, "test 1: minute incorrect");
    428 	assert(v.second     is int         , "test 1: second void");
    429 	assert(v.second     as int ==     7, "test 1: second incorrect");
    430 	assert(v.nanosecond is int         , "test 1: nanosecond void");
    431 	assert(v.nanosecond as int ==     0, "test 1: nanosecond incorrect");
    432 	assert(v.zoff       is i64         , "test 1: zoff void");
    433 	assert(v.zoff       as i64 ==     0, "test 1: zoff incorrect");
    434 	assert(v.zabbr      is str         , "test 1: zabbr void");
    435 	assert(v.zabbr      as str == "UTC", "test 1: zabbr incorrect");
    436 	assert(v.locname    is str         , "test 1: locname void");
    437 	assert(v.locname    as str == "UTC", "test 1: locname incorrect");
    438 
    439 };