hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

parse.ha (17266B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use ascii;
      5 use io;
      6 use strconv;
      7 use strings;
      8 use time;
      9 use time::chrono;
     10 
     11 type failure = !void;
     12 
     13 // A parsing error occurred. This shall contain a byteindex of and rune from the
     14 // layout at the position where the parsing failure occured.
     15 export type parsefail = !(size, rune);
     16 
     17 // Parses a datetime string into a [[virtual]] date, according to a layout
     18 // format string with specifiers as documented under [[format]]. Partial,
     19 // sequential, aggregative parsing is possible.
     20 //
     21 // 	date::parse(&v, "%Y-%m-%d",    "2019-12-27");
     22 // 	date::parse(&v, "%H:%M:%S.%N", "22:07:08.000000000");
     23 // 	date::parse(&v, "%z %Z %L",    "+0100 CET Europe/Amsterdam");
     24 //
     25 // Parse will return [[parsefail]] if an invalid format specifier is encountered
     26 // or if given string 's' does not match the layout.
     27 export fn parse(v: *virtual, layout: str, s: str) (void | parsefail) = {
     28 	const liter = strings::iter(layout);
     29 	const siter = strings::iter(s);
     30 	let escaped = false;
     31 
     32 	for (let lr => strings::next(&liter)) {
     33 		if (!escaped && lr == '%') {
     34 			escaped = true;
     35 			continue;
     36 		};
     37 
     38 		if (!escaped) {
     39 			const sr = match (strings::next(&siter)) {
     40 			case done =>
     41 				return (liter.dec.offs, lr);
     42 			case let sr: rune =>
     43 				yield sr;
     44 			};
     45 			if (sr != lr) {
     46 				return (liter.dec.offs, lr);
     47 			};
     48 			continue;
     49 		};
     50 
     51 		escaped = false;
     52 
     53 		match (parse_specifier(v, &siter, lr)) {
     54 		case void => void;
     55 		case failure =>
     56 			return (liter.dec.offs, lr);
     57 		};
     58 	};
     59 
     60 	return void;
     61 };
     62 
     63 fn parse_specifier(
     64 	v: *virtual,
     65 	iter: *strings::iterator,
     66 	lr: rune,
     67 ) (void | failure) = {
     68 	switch (lr) {
     69 	case 'a' =>
     70 		v.weekday = scan_for(iter, WEEKDAYS_SHORT...)?;
     71 	case 'A' =>
     72 		v.weekday = scan_for(iter, WEEKDAYS...)?;
     73 	case 'b' =>
     74 		v.month = scan_for(iter, MONTHS_SHORT...)? + 1;
     75 	case 'B' =>
     76 		v.month = scan_for(iter, MONTHS...)? + 1;
     77 	case 'C' =>
     78 		v.century = scan_int(iter, 2)?;
     79 	case 'd', 'e' =>
     80 		v.day = scan_int(iter, 2)?;
     81 	case 'F' =>
     82 		v.year = scan_int(iter, 4)?;
     83 		eat_rune(iter, '-')?;
     84 		v.month = scan_int(iter, 2)?;
     85 		eat_rune(iter, '-')?;
     86 		v.day = scan_int(iter, 2)?;
     87 	case 'H' =>
     88 		v.hour = scan_int(iter, 2)?;
     89 	case 'I' =>
     90 		v.hour12 = scan_int(iter, 2)?;
     91 	case 'j' =>
     92 		v.yearday = scan_int(iter, 3)?;
     93 	case 'L' =>
     94 		v.locname = scan_str(iter)?;
     95 	case 'm' =>
     96 		v.month = scan_int(iter, 2)?;
     97 	case 'M' =>
     98 		v.minute = scan_int(iter, 2)?;
     99 	case 'N' =>
    100 		let nsec = scan_decimal(iter, 9)?;
    101 		v.nanosecond = nsec: int;
    102 		v.vnsec = nsec;
    103 	case 'p' => // AM=false PM=true
    104 		v.ampm = scan_for(iter, "AM", "PM", "am", "pm")? % 2 == 1;
    105 	case 's' =>
    106 		v.vsec = scan_num(iter, 20)?;
    107 	case 'S' =>
    108 		v.second = scan_int(iter, 2)?;
    109 	case 'T' =>
    110 		v.hour = scan_int(iter, 2)?;
    111 		eat_rune(iter, ':')?;
    112 		v.minute = scan_int(iter, 2)?;
    113 		eat_rune(iter, ':')?;
    114 		v.second = scan_int(iter, 2)?;
    115 	case 'u' =>
    116 		v.weekday = scan_int(iter, 1)? - 1;
    117 	case 'U' =>
    118 		v.week = scan_int(iter, 2)?;
    119 	case 'w' =>
    120 		v.weekday = scan_int(iter, 1)? - 1;
    121 	case 'W' =>
    122 		v.week = scan_int(iter, 2)?;
    123 	case 'y' =>
    124 		v.year100 = scan_int(iter, 2)?;
    125 	case 'Y' =>
    126 		v.year = scan_int(iter, 4)?;
    127 	case 'z' =>
    128 		v.zoff = scan_zo(iter)?;
    129 	case 'Z' =>
    130 		v.zabbr = scan_str(iter)?;
    131 	case '%' =>
    132 		eat_rune(iter, '%')?;
    133 	case =>
    134 		return failure;
    135 	};
    136 };
    137 
    138 fn eat_rune(iter: *strings::iterator, needle: rune) (uint | failure) = {
    139 	const rn = match (strings::next(iter)) {
    140 	case done =>
    141 		return failure;
    142 	case let rn: rune =>
    143 		yield rn;
    144 	};
    145 	if (rn == needle) {
    146 		return 1;
    147 	} else {
    148 		strings::prev(iter);
    149 		return 0;
    150 	};
    151 };
    152 
    153 // Scans the iterator for a given list of strings.
    154 // Returns the list index of the matched string.
    155 fn scan_for(iter: *strings::iterator, list: str...) (int | failure) = {
    156 	const name = strings::iterstr(iter);
    157 	if (len(name) == 0) {
    158 		return failure;
    159 	};
    160 	for(let i = 0z; i < len(list); i += 1) {
    161 		if (strings::hasprefix(name, list[i])) {
    162 			// Consume name
    163 			for (let j = 0z; j < len(list[i]); j += 1) {
    164 				strings::next(iter);
    165 			};
    166 			return i: int;
    167 		};
    168 	};
    169 	return failure;
    170 };
    171 
    172 // Scans the iterator for consecutive numeric digits.
    173 // Left-padded whitespace and zeros are permitted.
    174 // Returns the resulting int.
    175 fn scan_int(iter: *strings::iterator, maxrunes: size) (int | failure) = {
    176 	let start = *iter;
    177 	let startfixed = false;
    178 	for (let i = 0z; i < maxrunes; i += 1) {
    179 		let rn: rune = match (strings::next(iter)) {
    180 		case done =>
    181 			break;
    182 		case let rn: rune =>
    183 			yield rn;
    184 		};
    185 		if (!ascii::isdigit(rn) && rn != ' ') {
    186 			return failure;
    187 		};
    188 		if (!startfixed) {
    189 			if (ascii::isdigit(rn)) {
    190 				startfixed = true;
    191 			} else {
    192 				strings::next(&start);
    193 			};
    194 		};
    195 	};
    196 	match (strconv::stoi(strings::slice(&start, iter))) {
    197 	case let num: int =>
    198 		return num;
    199 	case =>
    200 		return failure;
    201 	};
    202 };
    203 
    204 // Scans the iterator for consecutive numeric digits.
    205 // Left-padded whitespace and zeros are permitted.
    206 // Returns the resulting i64.
    207 fn scan_num(iter: *strings::iterator, maxrunes: size) (i64 | failure) = {
    208 	let start = *iter;
    209 	for (let i = 0z; i < maxrunes; i += 1) {
    210 		match (strings::next(iter)) {
    211 		case done =>
    212 			return failure;
    213 		case let rn: rune =>
    214 			if (!ascii::isdigit(rn)) {
    215 				strings::prev(iter);
    216 				break;
    217 			};
    218 		};
    219 	};
    220 
    221 	match (strconv::stoi64(strings::slice(&start, iter))) {
    222 	case let num: i64 =>
    223 		return num;
    224 	case =>
    225 		return failure;
    226 	};
    227 };
    228 
    229 // Scans the iterator for consecutive numeric digits.
    230 // Left-padded whitespace and zeros are NOT permitted.
    231 // The resulting decimal is right-padded with zeros.
    232 fn scan_decimal(iter: *strings::iterator, maxrunes: size) (i64 | failure) = {
    233 	let start = *iter;
    234 	for (let i = 0z; i < maxrunes; i += 1) {
    235 		let rn: rune = match (strings::next(iter)) {
    236 		case done =>
    237 			break;
    238 		case let rn: rune =>
    239 			yield rn;
    240 		};
    241 		if (!ascii::isdigit(rn)) {
    242 			strings::prev(iter);
    243 			break;
    244 		};
    245 	};
    246 	const s = strings::slice(&start, iter);
    247 	match (strconv::stoi64(s)) {
    248 	case let num: i64 =>
    249 		for (let i = 0z; i < maxrunes - len(s); i += 1) {
    250 			num *= 10;
    251 		};
    252 		return num;
    253 	case =>
    254 		return failure;
    255 	};
    256 };
    257 
    258 // Scans and parses zone offsets of the form:
    259 //
    260 // 	Z
    261 // 	z
    262 // 	+nn:nn
    263 // 	+nnnn
    264 // 	-nn:nn
    265 // 	-nnnn
    266 //
    267 fn scan_zo(iter: *strings::iterator) (time::duration | failure) = {
    268 	const first = match (strings::next(iter)) {
    269 	case done =>
    270 		return failure;
    271 	case let first: rune =>
    272 		yield first;
    273 	};
    274 	if (first == 'Z' || first == 'z') {
    275 		return 0;
    276 	};
    277 
    278 	let zo = scan_int(iter, 2)? * time::HOUR;
    279 
    280 	match (strings::next(iter)) {
    281 	case done =>
    282 		return failure;
    283 	case let sep: rune =>
    284 		if (sep != ':') {
    285 			strings::prev(iter);
    286 		};
    287 	};
    288 
    289 	zo += scan_int(iter, 2)? * time::MINUTE;
    290 
    291 	if (first == '-') {
    292 		zo *= -1;
    293 	};
    294 
    295 	return zo;
    296 };
    297 
    298 // Scans and parses locality names, made of printable characters.
    299 fn scan_str(iter: *strings::iterator) (str | failure) = {
    300 	let start = *iter;
    301 	for (let rn => strings::next(iter)) {
    302 		if (!ascii::isgraph(rn)) {
    303 			strings::prev(iter);
    304 			break;
    305 		};
    306 	};
    307 	return strings::slice(&start, iter);
    308 };
    309 
    310 @test fn parse() void = {
    311 	let v = newvirtual();
    312 	assert(parse(&v, "foo", "foo") is void, "none: parsefail");
    313 	assert(v.zone        == null, "none: non-null zone");
    314 	assert(v.daydate     is void, "none: non-void daydate");
    315 	assert(v.daytime     is void, "none: non-void daytime");
    316 	assert(v.era         is void, "none: non-void era");
    317 	assert(v.year        is void, "none: non-void year");
    318 	assert(v.month       is void, "none: non-void month");
    319 	assert(v.day         is void, "none: non-void day");
    320 	assert(v.yearday     is void, "none: non-void yearday");
    321 	assert(v.isoweekyear is void, "none: non-void isoweekyear");
    322 	assert(v.isoweek     is void, "none: non-void isoweek");
    323 	assert(v.week        is void, "none: non-void week");
    324 	assert(v.sundayweek  is void, "none: non-void sundayweek");
    325 	assert(v.weekday     is void, "none: non-void weekday");
    326 	assert(v.hour        is void, "none: non-void hour");
    327 	assert(v.minute      is void, "none: non-void minute");
    328 	assert(v.second      is void, "none: non-void second");
    329 	assert(v.nanosecond  is void, "none: non-void nanosecond");
    330 	assert(v.vloc        is void, "none: non-void vloc");
    331 	assert(v.locname     is void, "none: non-void locname");
    332 	assert(v.zoff        is void, "none: non-void zoff");
    333 	assert(v.zabbr       is void, "none: non-void zabbr");
    334 	assert(v.hour12      is void, "none: non-void hour12");
    335 	assert(v.ampm        is void, "none: non-void ampm");
    336 
    337 	let v = newvirtual();
    338 	assert(parse(&v, "%a", "Fri") is void                , "%a: parsefail");
    339 	assert(v.weekday is int                              , "%a: void");
    340 	assert(v.weekday as int == 4                         , "%a: incorrect");
    341 
    342 	let v = newvirtual();
    343 	assert(parse(&v, "%A", "Friday") is void             , "%A: parsefail");
    344 	assert(v.weekday is int                              , "%A: void");
    345 	assert(v.weekday as int == 4                         , "%A: incorrect");
    346 
    347 	let v = newvirtual();
    348 	assert(parse(&v, "%b", "Jan") is void                , "%b: parsefail");
    349 	assert(v.month is int                                , "%b: void");
    350 	assert(v.month as int == 1                           , "%b: incorrect");
    351 
    352 	let v = newvirtual();
    353 	assert(parse(&v, "%B", "January") is void   ,         "%B: parsefail");
    354 	assert(v.month is int                                , "%B: void");
    355 	assert(v.month as int == 1                           , "%B: incorrect");
    356 
    357 	let v = newvirtual();
    358 	assert(parse(&v, "%d", "27") is void                 , "%d: parsefail");
    359 	assert(v.day is int                                  , "%d: void");
    360 	assert(v.day as int == 27                            , "%d: incorrect");
    361 
    362 	let v = newvirtual();
    363 	assert(parse(&v, "%d", " 1") is void                 , "%d: parsefail");
    364 	assert(v.day is int                                  , "%d: void");
    365 	assert(v.day as int == 1                             , "%d: incorrect");
    366 
    367 	let v = newvirtual();
    368 	assert(parse(&v, "%d", "x1") is parsefail            , "%d: not parsefail");
    369 
    370 	let v = newvirtual();
    371 	assert(parse(&v, "%e", " 7") is void                 , "%d: parsefail");
    372 	assert(v.day is int                                  , "%d: void");
    373 	assert(v.day as int == 7                             , "%d: incorrect");
    374 
    375 	let v = newvirtual();
    376 	assert(parse(&v, "%F", "2012-10-01") is void         , "%d: parsefail");
    377 	assert(v.year is int                                 , "%d: void");
    378 	assert(v.year as int == 2012                         , "%d: incorrect");
    379 	assert(v.month is int                                , "%d: void");
    380 	assert(v.month as int == 10                          , "%d: incorrect");
    381 	assert(v.day is int                                  , "%d: void");
    382 	assert(v.day as int == 1                             , "%d: incorrect");
    383 
    384 	let v = newvirtual();
    385 	assert(parse(&v, "%H", "22") is void                 , "%H: parsefail");
    386 	assert(v.hour is int                                 , "%H: void");
    387 	assert(v.hour as int == 22                           , "%H: incorrect");
    388 
    389 	let v = newvirtual();
    390 	assert(parse(&v, "%I", "10") is void                 , "%I: parsefail");
    391 	assert(v.hour12 is int                               , "%I: void");
    392 	assert(v.hour12 as int == 10                         , "%I: incorrect");
    393 
    394 	let v = newvirtual();
    395 	assert(parse(&v, "%j", "361") is void                , "%j: parsefail");
    396 	assert(v.yearday is int                              , "%j: void");
    397 	assert(v.yearday as int == 361                       , "%j: incorrect");
    398 
    399 	let v = newvirtual();
    400 	assert(parse(&v, "%j", "  9") is void                , "%j: parsefail");
    401 	assert(v.yearday is int                              , "%j: void");
    402 	assert(v.yearday as int == 9                         , "%j: incorrect");
    403 
    404 	let v = newvirtual();
    405 	assert(parse(&v, "%L", "Europe/Amsterdam") is void   , "%L: parsefail");
    406 	assert(v.locname is str                              , "%L: void");
    407 	assert(v.locname as str == "Europe/Amsterdam"        , "%L: incorrect");
    408 
    409 	let v = newvirtual();
    410 	assert(parse(&v, "%m", "12") is void                 , "%m: parsefail");
    411 	assert(v.month is int                                , "%m: void");
    412 	assert(v.month as int == 12                          , "%m: incorrect");
    413 
    414 	let v = newvirtual();
    415 	assert(parse(&v, "%M", "07") is void                 , "%M: parsefail");
    416 	assert(v.minute is int                               , "%M: void");
    417 	assert(v.minute as int == 7                          , "%M: incorrect");
    418 
    419 	let v = newvirtual();
    420 	assert(parse(&v, "%N", "123456789") is void          , "%N: parsefail");
    421 	assert(v.nanosecond is int                           , "%N: void");
    422 	assert(v.nanosecond as int == 123456789              , "%N: incorrect");
    423 
    424 	let v = newvirtual();
    425 	assert(parse(&v, "%N", "123") is void                , "%N: parsefail");
    426 	assert(v.nanosecond is int                           , "%N: void");
    427 	assert(v.nanosecond as int == 123000000              , "%N: incorrect");
    428 
    429 	let v = newvirtual();
    430 	assert(parse(&v, "%p", "PM") is void                 , "%p: parsefail");
    431 	assert(v.ampm is bool                                , "%p: void");
    432 	assert(v.ampm as bool == true                        , "%p: incorrect");
    433 
    434 	let v = newvirtual();
    435 	assert(parse(&v, "%S", "08") is void                 , "%S: parsefail");
    436 	assert(v.second is int                               , "%S: void");
    437 	assert(v.second as int == 8                          , "%S: incorrect");
    438 
    439 	let v = newvirtual();
    440 	assert(parse(&v, "%T", "18:42:05") is void           , "%d: parsefail");
    441 	assert(v.hour is int                                 , "%d: void");
    442 	assert(v.hour as int == 18                           , "%d: incorrect");
    443 	assert(v.minute is int                               , "%d: void");
    444 	assert(v.minute as int == 42                         , "%d: incorrect");
    445 	assert(v.second is int                               , "%d: void");
    446 	assert(v.second as int == 5                          , "%d: incorrect");
    447 
    448 	let v = newvirtual();
    449 	assert(parse(&v, "%u", "5") is void                  , "%u: parsefail");
    450 	assert(v.weekday is int                              , "%u: void");
    451 	assert(v.weekday as int == 4                         , "%u: incorrect");
    452 
    453 	let v = newvirtual();
    454 	assert(parse(&v, "%U", "51") is void                 , "%U: parsefail");
    455 	assert(v.week is int                                 , "%U: void");
    456 	assert(v.week as int == 51                           , "%U: incorrect");
    457 
    458 	let v = newvirtual();
    459 	assert(parse(&v, "%w", "5") is void                  , "%w: parsefail");
    460 	assert(v.weekday is int                              , "%w: void");
    461 	assert(v.weekday as int == 4                         , "%w: incorrect");
    462 
    463 	let v = newvirtual();
    464 	assert(parse(&v, "%W", "51") is void                 , "%W: parsefail");
    465 	assert(v.week is int                                 , "%W: void");
    466 	assert(v.week as int == 51                           , "%W: incorrect");
    467 
    468 	let v = newvirtual();
    469 	assert(parse(&v, "%Y", "2019") is void               , "%Y: parsefail");
    470 	assert(v.year is int                                 , "%Y: void");
    471 	assert(v.year as int == 2019                         , "%Y: incorrect");
    472 
    473 	let v = newvirtual();
    474 	assert(parse(&v, "%z", "+0100") is void              , "%z: parsefail");
    475 	assert(v.zoff is i64                                 , "%z: void");
    476 	assert(v.zoff as i64 == 1 * time::HOUR               , "%z: incorrect");
    477 	let v = newvirtual();
    478 	assert(parse(&v, "%z", "+01:00") is void             , "%z: parsefail");
    479 	assert(v.zoff is i64                                 , "%z: void");
    480 	assert(v.zoff as i64 == 1 * time::HOUR               , "%z: incorrect");
    481 
    482 	let v = newvirtual();
    483 	assert(parse(&v, "%Z", "CET") is void                , "%Z: parsefail");
    484 	assert(v.zabbr is str                                , "%Z: void");
    485 	assert(v.zabbr as str == "CET"                       , "%Z: incorrect");
    486 
    487 	let v = newvirtual();
    488 	assert((
    489 		parse(&v,
    490 			"%Y-%m-%d %H:%M:%S.%N %z %Z %L",
    491 			"2038-01-19 03:14:07.000000000 +0000 UTC UTC",
    492 		)
    493 		is void
    494 	),
    495 		"test 1: parsefail"
    496 	);
    497 	assert(v.year       is int         , "test 1: year void");
    498 	assert(v.year       as int ==  2038, "test 1: year incorrect");
    499 	assert(v.month      is int         , "test 1: month void");
    500 	assert(v.month      as int ==     1, "test 1: month incorrect");
    501 	assert(v.day        is int         , "test 1: day void");
    502 	assert(v.day        as int ==    19, "test 1: day incorrect");
    503 	assert(v.hour       is int         , "test 1: hour void");
    504 	assert(v.hour       as int ==     3, "test 1: hour incorrect");
    505 	assert(v.minute     is int         , "test 1: minute void");
    506 	assert(v.minute     as int ==    14, "test 1: minute incorrect");
    507 	assert(v.second     is int         , "test 1: second void");
    508 	assert(v.second     as int ==     7, "test 1: second incorrect");
    509 	assert(v.nanosecond is int         , "test 1: nanosecond void");
    510 	assert(v.nanosecond as int ==     0, "test 1: nanosecond incorrect");
    511 	assert(v.zoff       is i64         , "test 1: zoff void");
    512 	assert(v.zoff       as i64 ==     0, "test 1: zoff incorrect");
    513 	assert(v.zabbr      is str         , "test 1: zabbr void");
    514 	assert(v.zabbr      as str == "UTC", "test 1: zabbr incorrect");
    515 	assert(v.locname    is str         , "test 1: locname void");
    516 	assert(v.locname    as str == "UTC", "test 1: locname incorrect");
    517 
    518 };