hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

parse.ha (17338B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use ascii;
      5 use strconv;
      6 use strings;
      7 use time;
      8 
      9 type failure = !void;
     10 
     11 // A parsing error occurred. This shall contain a byteindex of and rune from the
     12 // layout at the position where the parsing failure occured.
     13 export type parsefail = !(size, rune);
     14 
     15 // Parses a datetime string into a [[virtual]] date, according to a layout
     16 // format string with specifiers as documented under [[format]]. Partial,
     17 // sequential, aggregative parsing is possible.
     18 //
     19 // 	date::parse(&v, "%Y-%m-%d",    "2019-12-27");
     20 // 	date::parse(&v, "%H:%M:%S.%N", "22:07:08.000000000");
     21 // 	date::parse(&v, "%z %Z %L",    "+0100 CET Europe/Amsterdam");
     22 //
     23 // Parse will return [[parsefail]] if an invalid format specifier is encountered
     24 // or if given string 's' does not match the layout.
     25 export fn parse(v: *virtual, layout: str, s: str) (void | parsefail) = {
     26 	const liter = strings::iter(layout);
     27 	const siter = strings::iter(s);
     28 	let escaped = false;
     29 
     30 	for (let lr => strings::next(&liter)) {
     31 		if (!escaped && lr == '%') {
     32 			escaped = true;
     33 			continue;
     34 		};
     35 
     36 		if (!escaped) {
     37 			const sr = match (strings::next(&siter)) {
     38 			case done =>
     39 				return (liter.dec.offs, lr);
     40 			case let sr: rune =>
     41 				yield sr;
     42 			};
     43 			if (sr != lr) {
     44 				return (liter.dec.offs, lr);
     45 			};
     46 			continue;
     47 		};
     48 
     49 		escaped = false;
     50 
     51 		match (parse_specifier(v, &siter, lr)) {
     52 		case void => void;
     53 		case failure =>
     54 			return (liter.dec.offs, lr);
     55 		};
     56 	};
     57 
     58 	return void;
     59 };
     60 
     61 fn parse_specifier(
     62 	v: *virtual,
     63 	iter: *strings::iterator,
     64 	lr: rune,
     65 ) (void | failure) = {
     66 	switch (lr) {
     67 	case 'a' =>
     68 		v.weekday = scan_for(iter, WEEKDAYS_SHORT...)?;
     69 	case 'A' =>
     70 		v.weekday = scan_for(iter, WEEKDAYS...)?;
     71 	case 'b' =>
     72 		v.month = scan_for(iter, MONTHS_SHORT...)? + 1;
     73 	case 'B' =>
     74 		v.month = scan_for(iter, MONTHS...)? + 1;
     75 	case 'C' =>
     76 		v.century = scan_int(iter, 2)?;
     77 	case 'd', 'e' =>
     78 		v.day = scan_int(iter, 2)?;
     79 	case 'F' =>
     80 		v.year = scan_int(iter, 4)?;
     81 		eat_rune(iter, '-')?;
     82 		v.month = scan_int(iter, 2)?;
     83 		eat_rune(iter, '-')?;
     84 		v.day = scan_int(iter, 2)?;
     85 	case 'G' =>
     86 		v.isoweekyear = scan_int(iter, 4)?;
     87 	case 'H' =>
     88 		v.hour = scan_int(iter, 2)?;
     89 	case 'I' =>
     90 		v.hour12 = scan_int(iter, 2)?;
     91 	case 'j' =>
     92 		v.yearday = scan_int(iter, 3)?;
     93 	case 'L' =>
     94 		v.locname = scan_str(iter)?;
     95 	case 'm' =>
     96 		v.month = scan_int(iter, 2)?;
     97 	case 'M' =>
     98 		v.minute = scan_int(iter, 2)?;
     99 	case 'N' =>
    100 		let nsec = scan_decimal(iter, 9)?;
    101 		v.nanosecond = nsec: int;
    102 		v.vnsec = nsec;
    103 	case 'p' => // AM=false PM=true
    104 		v.ampm = scan_for(iter, "AM", "PM", "am", "pm")? % 2 == 1;
    105 	case 's' =>
    106 		v.vsec = scan_num(iter, 20)?;
    107 	case 'S' =>
    108 		v.second = scan_int(iter, 2)?;
    109 	case 'T' =>
    110 		v.hour = scan_int(iter, 2)?;
    111 		eat_rune(iter, ':')?;
    112 		v.minute = scan_int(iter, 2)?;
    113 		eat_rune(iter, ':')?;
    114 		v.second = scan_int(iter, 2)?;
    115 	case 'u' =>
    116 		v.weekday = scan_int(iter, 1)? - 1;
    117 	case 'U' =>
    118 		v.week = scan_int(iter, 2)?;
    119 	case 'V' =>
    120 		v.isoweek = scan_int(iter, 2)?;
    121 	case 'w' =>
    122 		v.weekday = scan_int(iter, 1)? - 1;
    123 	case 'W' =>
    124 		v.week = scan_int(iter, 2)?;
    125 	case 'y' =>
    126 		v.year100 = scan_int(iter, 2)?;
    127 	case 'Y' =>
    128 		v.year = scan_int(iter, 4)?;
    129 	case 'z' =>
    130 		v.zoff = scan_zo(iter)?;
    131 	case 'Z' =>
    132 		v.zabbr = scan_str(iter)?;
    133 	case '%' =>
    134 		eat_rune(iter, '%')?;
    135 	case =>
    136 		return failure;
    137 	};
    138 };
    139 
    140 fn eat_rune(iter: *strings::iterator, needle: rune) (uint | failure) = {
    141 	const rn = match (strings::next(iter)) {
    142 	case done =>
    143 		return failure;
    144 	case let rn: rune =>
    145 		yield rn;
    146 	};
    147 	if (rn == needle) {
    148 		return 1;
    149 	} else {
    150 		strings::prev(iter);
    151 		return 0;
    152 	};
    153 };
    154 
    155 // Scans the iterator for a given list of strings.
    156 // Returns the list index of the matched string.
    157 fn scan_for(iter: *strings::iterator, list: str...) (int | failure) = {
    158 	const name = strings::iterstr(iter);
    159 	if (len(name) == 0) {
    160 		return failure;
    161 	};
    162 	for(let i = 0z; i < len(list); i += 1) {
    163 		if (strings::hasprefix(name, list[i])) {
    164 			// Consume name
    165 			for (let j = 0z; j < len(list[i]); j += 1) {
    166 				strings::next(iter);
    167 			};
    168 			return i: int;
    169 		};
    170 	};
    171 	return failure;
    172 };
    173 
    174 // Scans the iterator for consecutive numeric digits.
    175 // Left-padded whitespace and zeros are permitted.
    176 // Returns the resulting int.
    177 fn scan_int(iter: *strings::iterator, maxrunes: size) (int | failure) = {
    178 	let start = *iter;
    179 	let startfixed = false;
    180 	for (let i = 0z; i < maxrunes; i += 1) {
    181 		let rn: rune = match (strings::next(iter)) {
    182 		case done =>
    183 			break;
    184 		case let rn: rune =>
    185 			yield rn;
    186 		};
    187 		if (!ascii::isdigit(rn) && rn != ' ') {
    188 			return failure;
    189 		};
    190 		if (!startfixed) {
    191 			if (ascii::isdigit(rn)) {
    192 				startfixed = true;
    193 			} else {
    194 				strings::next(&start);
    195 			};
    196 		};
    197 	};
    198 	match (strconv::stoi(strings::slice(&start, iter))) {
    199 	case let num: int =>
    200 		return num;
    201 	case =>
    202 		return failure;
    203 	};
    204 };
    205 
    206 // Scans the iterator for consecutive numeric digits.
    207 // Left-padded whitespace and zeros are permitted.
    208 // Returns the resulting i64.
    209 fn scan_num(iter: *strings::iterator, maxrunes: size) (i64 | failure) = {
    210 	let start = *iter;
    211 	for (let i = 0z; i < maxrunes; i += 1) {
    212 		match (strings::next(iter)) {
    213 		case done =>
    214 			return failure;
    215 		case let rn: rune =>
    216 			if (!ascii::isdigit(rn)) {
    217 				strings::prev(iter);
    218 				break;
    219 			};
    220 		};
    221 	};
    222 
    223 	match (strconv::stoi64(strings::slice(&start, iter))) {
    224 	case let num: i64 =>
    225 		return num;
    226 	case =>
    227 		return failure;
    228 	};
    229 };
    230 
    231 // Scans the iterator for consecutive numeric digits.
    232 // Left-padded whitespace and zeros are NOT permitted.
    233 // The resulting decimal is right-padded with zeros.
    234 fn scan_decimal(iter: *strings::iterator, maxrunes: size) (i64 | failure) = {
    235 	let start = *iter;
    236 	for (let i = 0z; i < maxrunes; i += 1) {
    237 		let rn: rune = match (strings::next(iter)) {
    238 		case done =>
    239 			break;
    240 		case let rn: rune =>
    241 			yield rn;
    242 		};
    243 		if (!ascii::isdigit(rn)) {
    244 			strings::prev(iter);
    245 			break;
    246 		};
    247 	};
    248 	const s = strings::slice(&start, iter);
    249 	match (strconv::stoi64(s)) {
    250 	case let num: i64 =>
    251 		for (let i = 0z; i < maxrunes - len(s); i += 1) {
    252 			num *= 10;
    253 		};
    254 		return num;
    255 	case =>
    256 		return failure;
    257 	};
    258 };
    259 
    260 // Scans and parses zone offsets of the form:
    261 //
    262 // 	Z
    263 // 	z
    264 // 	+nn:nn
    265 // 	+nnnn
    266 // 	-nn:nn
    267 // 	-nnnn
    268 //
    269 fn scan_zo(iter: *strings::iterator) (time::duration | failure) = {
    270 	const first = match (strings::next(iter)) {
    271 	case done =>
    272 		return failure;
    273 	case let first: rune =>
    274 		yield first;
    275 	};
    276 	if (first == 'Z' || first == 'z') {
    277 		return 0;
    278 	};
    279 
    280 	let zo = scan_int(iter, 2)? * time::HOUR;
    281 
    282 	match (strings::next(iter)) {
    283 	case done =>
    284 		return failure;
    285 	case let sep: rune =>
    286 		if (sep != ':') {
    287 			strings::prev(iter);
    288 		};
    289 	};
    290 
    291 	zo += scan_int(iter, 2)? * time::MINUTE;
    292 
    293 	if (first == '-') {
    294 		zo *= -1;
    295 	};
    296 
    297 	return zo;
    298 };
    299 
    300 // Scans and parses locality names, made of printable characters.
    301 fn scan_str(iter: *strings::iterator) (str | failure) = {
    302 	let start = *iter;
    303 	for (let rn => strings::next(iter)) {
    304 		if (!ascii::isgraph(rn)) {
    305 			strings::prev(iter);
    306 			break;
    307 		};
    308 	};
    309 	return strings::slice(&start, iter);
    310 };
    311 
    312 @test fn parse() void = {
    313 	let v = newvirtual();
    314 	assert(parse(&v, "foo", "foo") is void, "none: parsefail");
    315 	assert(v.zone        == null, "none: non-null zone");
    316 	assert(v.daydate     is void, "none: non-void daydate");
    317 	assert(v.daytime     is void, "none: non-void daytime");
    318 	assert(v.era         is void, "none: non-void era");
    319 	assert(v.year        is void, "none: non-void year");
    320 	assert(v.month       is void, "none: non-void month");
    321 	assert(v.day         is void, "none: non-void day");
    322 	assert(v.yearday     is void, "none: non-void yearday");
    323 	assert(v.isoweekyear is void, "none: non-void isoweekyear");
    324 	assert(v.isoweek     is void, "none: non-void isoweek");
    325 	assert(v.week        is void, "none: non-void week");
    326 	assert(v.sundayweek  is void, "none: non-void sundayweek");
    327 	assert(v.weekday     is void, "none: non-void weekday");
    328 	assert(v.hour        is void, "none: non-void hour");
    329 	assert(v.minute      is void, "none: non-void minute");
    330 	assert(v.second      is void, "none: non-void second");
    331 	assert(v.nanosecond  is void, "none: non-void nanosecond");
    332 	assert(v.vloc        is void, "none: non-void vloc");
    333 	assert(v.locname     is void, "none: non-void locname");
    334 	assert(v.zoff        is void, "none: non-void zoff");
    335 	assert(v.zabbr       is void, "none: non-void zabbr");
    336 	assert(v.hour12      is void, "none: non-void hour12");
    337 	assert(v.ampm        is void, "none: non-void ampm");
    338 
    339 	let v = newvirtual();
    340 	assert(parse(&v, "%a", "Fri") is void                , "%a: parsefail");
    341 	assert(v.weekday is int                              , "%a: void");
    342 	assert(v.weekday as int == 4                         , "%a: incorrect");
    343 
    344 	let v = newvirtual();
    345 	assert(parse(&v, "%A", "Friday") is void             , "%A: parsefail");
    346 	assert(v.weekday is int                              , "%A: void");
    347 	assert(v.weekday as int == 4                         , "%A: incorrect");
    348 
    349 	let v = newvirtual();
    350 	assert(parse(&v, "%b", "Jan") is void                , "%b: parsefail");
    351 	assert(v.month is int                                , "%b: void");
    352 	assert(v.month as int == 1                           , "%b: incorrect");
    353 
    354 	let v = newvirtual();
    355 	assert(parse(&v, "%B", "January") is void   ,         "%B: parsefail");
    356 	assert(v.month is int                                , "%B: void");
    357 	assert(v.month as int == 1                           , "%B: incorrect");
    358 
    359 	let v = newvirtual();
    360 	assert(parse(&v, "%d", "27") is void                 , "%d: parsefail");
    361 	assert(v.day is int                                  , "%d: void");
    362 	assert(v.day as int == 27                            , "%d: incorrect");
    363 
    364 	let v = newvirtual();
    365 	assert(parse(&v, "%d", " 1") is void                 , "%d: parsefail");
    366 	assert(v.day is int                                  , "%d: void");
    367 	assert(v.day as int == 1                             , "%d: incorrect");
    368 
    369 	let v = newvirtual();
    370 	assert(parse(&v, "%d", "x1") is parsefail            , "%d: not parsefail");
    371 
    372 	let v = newvirtual();
    373 	assert(parse(&v, "%e", " 7") is void                 , "%d: parsefail");
    374 	assert(v.day is int                                  , "%d: void");
    375 	assert(v.day as int == 7                             , "%d: incorrect");
    376 
    377 	let v = newvirtual();
    378 	assert(parse(&v, "%F", "2012-10-01") is void         , "%d: parsefail");
    379 	assert(v.year is int                                 , "%d: void");
    380 	assert(v.year as int == 2012                         , "%d: incorrect");
    381 	assert(v.month is int                                , "%d: void");
    382 	assert(v.month as int == 10                          , "%d: incorrect");
    383 	assert(v.day is int                                  , "%d: void");
    384 	assert(v.day as int == 1                             , "%d: incorrect");
    385 
    386 	let v = newvirtual();
    387 	assert(parse(&v, "%H", "22") is void                 , "%H: parsefail");
    388 	assert(v.hour is int                                 , "%H: void");
    389 	assert(v.hour as int == 22                           , "%H: incorrect");
    390 
    391 	let v = newvirtual();
    392 	assert(parse(&v, "%I", "10") is void                 , "%I: parsefail");
    393 	assert(v.hour12 is int                               , "%I: void");
    394 	assert(v.hour12 as int == 10                         , "%I: incorrect");
    395 
    396 	let v = newvirtual();
    397 	assert(parse(&v, "%j", "361") is void                , "%j: parsefail");
    398 	assert(v.yearday is int                              , "%j: void");
    399 	assert(v.yearday as int == 361                       , "%j: incorrect");
    400 
    401 	let v = newvirtual();
    402 	assert(parse(&v, "%j", "  9") is void                , "%j: parsefail");
    403 	assert(v.yearday is int                              , "%j: void");
    404 	assert(v.yearday as int == 9                         , "%j: incorrect");
    405 
    406 	let v = newvirtual();
    407 	assert(parse(&v, "%L", "Europe/Amsterdam") is void   , "%L: parsefail");
    408 	assert(v.locname is str                              , "%L: void");
    409 	assert(v.locname as str == "Europe/Amsterdam"        , "%L: incorrect");
    410 
    411 	let v = newvirtual();
    412 	assert(parse(&v, "%m", "12") is void                 , "%m: parsefail");
    413 	assert(v.month is int                                , "%m: void");
    414 	assert(v.month as int == 12                          , "%m: incorrect");
    415 
    416 	let v = newvirtual();
    417 	assert(parse(&v, "%M", "07") is void                 , "%M: parsefail");
    418 	assert(v.minute is int                               , "%M: void");
    419 	assert(v.minute as int == 7                          , "%M: incorrect");
    420 
    421 	let v = newvirtual();
    422 	assert(parse(&v, "%N", "123456789") is void          , "%N: parsefail");
    423 	assert(v.nanosecond is int                           , "%N: void");
    424 	assert(v.nanosecond as int == 123456789              , "%N: incorrect");
    425 
    426 	let v = newvirtual();
    427 	assert(parse(&v, "%N", "123") is void                , "%N: parsefail");
    428 	assert(v.nanosecond is int                           , "%N: void");
    429 	assert(v.nanosecond as int == 123000000              , "%N: incorrect");
    430 
    431 	let v = newvirtual();
    432 	assert(parse(&v, "%p", "PM") is void                 , "%p: parsefail");
    433 	assert(v.ampm is bool                                , "%p: void");
    434 	assert(v.ampm as bool == true                        , "%p: incorrect");
    435 
    436 	let v = newvirtual();
    437 	assert(parse(&v, "%S", "08") is void                 , "%S: parsefail");
    438 	assert(v.second is int                               , "%S: void");
    439 	assert(v.second as int == 8                          , "%S: incorrect");
    440 
    441 	let v = newvirtual();
    442 	assert(parse(&v, "%T", "18:42:05") is void           , "%d: parsefail");
    443 	assert(v.hour is int                                 , "%d: void");
    444 	assert(v.hour as int == 18                           , "%d: incorrect");
    445 	assert(v.minute is int                               , "%d: void");
    446 	assert(v.minute as int == 42                         , "%d: incorrect");
    447 	assert(v.second is int                               , "%d: void");
    448 	assert(v.second as int == 5                          , "%d: incorrect");
    449 
    450 	let v = newvirtual();
    451 	assert(parse(&v, "%u", "5") is void                  , "%u: parsefail");
    452 	assert(v.weekday is int                              , "%u: void");
    453 	assert(v.weekday as int == 4                         , "%u: incorrect");
    454 
    455 	let v = newvirtual();
    456 	assert(parse(&v, "%U", "51") is void                 , "%U: parsefail");
    457 	assert(v.week is int                                 , "%U: void");
    458 	assert(v.week as int == 51                           , "%U: incorrect");
    459 
    460 	let v = newvirtual();
    461 	assert(parse(&v, "%w", "5") is void                  , "%w: parsefail");
    462 	assert(v.weekday is int                              , "%w: void");
    463 	assert(v.weekday as int == 4                         , "%w: incorrect");
    464 
    465 	let v = newvirtual();
    466 	assert(parse(&v, "%W", "51") is void                 , "%W: parsefail");
    467 	assert(v.week is int                                 , "%W: void");
    468 	assert(v.week as int == 51                           , "%W: incorrect");
    469 
    470 	let v = newvirtual();
    471 	assert(parse(&v, "%Y", "2019") is void               , "%Y: parsefail");
    472 	assert(v.year is int                                 , "%Y: void");
    473 	assert(v.year as int == 2019                         , "%Y: incorrect");
    474 
    475 	let v = newvirtual();
    476 	assert(parse(&v, "%z", "+0100") is void              , "%z: parsefail");
    477 	assert(v.zoff is i64                                 , "%z: void");
    478 	assert(v.zoff as i64 == 1 * time::HOUR               , "%z: incorrect");
    479 	let v = newvirtual();
    480 	assert(parse(&v, "%z", "+01:00") is void             , "%z: parsefail");
    481 	assert(v.zoff is i64                                 , "%z: void");
    482 	assert(v.zoff as i64 == 1 * time::HOUR               , "%z: incorrect");
    483 
    484 	let v = newvirtual();
    485 	assert(parse(&v, "%Z", "CET") is void                , "%Z: parsefail");
    486 	assert(v.zabbr is str                                , "%Z: void");
    487 	assert(v.zabbr as str == "CET"                       , "%Z: incorrect");
    488 
    489 	let v = newvirtual();
    490 	assert((
    491 		parse(&v,
    492 			"%Y-%m-%d %H:%M:%S.%N %z %Z %L",
    493 			"2038-01-19 03:14:07.000000000 +0000 UTC UTC",
    494 		)
    495 		is void
    496 	),
    497 		"test 1: parsefail"
    498 	);
    499 	assert(v.year       is int         , "test 1: year void");
    500 	assert(v.year       as int ==  2038, "test 1: year incorrect");
    501 	assert(v.month      is int         , "test 1: month void");
    502 	assert(v.month      as int ==     1, "test 1: month incorrect");
    503 	assert(v.day        is int         , "test 1: day void");
    504 	assert(v.day        as int ==    19, "test 1: day incorrect");
    505 	assert(v.hour       is int         , "test 1: hour void");
    506 	assert(v.hour       as int ==     3, "test 1: hour incorrect");
    507 	assert(v.minute     is int         , "test 1: minute void");
    508 	assert(v.minute     as int ==    14, "test 1: minute incorrect");
    509 	assert(v.second     is int         , "test 1: second void");
    510 	assert(v.second     as int ==     7, "test 1: second incorrect");
    511 	assert(v.nanosecond is int         , "test 1: nanosecond void");
    512 	assert(v.nanosecond as int ==     0, "test 1: nanosecond incorrect");
    513 	assert(v.zoff       is i64         , "test 1: zoff void");
    514 	assert(v.zoff       as i64 ==     0, "test 1: zoff incorrect");
    515 	assert(v.zabbr      is str         , "test 1: zabbr void");
    516 	assert(v.zabbr      as str == "UTC", "test 1: zabbr incorrect");
    517 	assert(v.locname    is str         , "test 1: locname void");
    518 	assert(v.locname    as str == "UTC", "test 1: locname incorrect");
    519 
    520 };