parse.ha (17338B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use ascii; 5 use strconv; 6 use strings; 7 use time; 8 9 type failure = !void; 10 11 // A parsing error occurred. This shall contain a byteindex of and rune from the 12 // layout at the position where the parsing failure occured. 13 export type parsefail = !(size, rune); 14 15 // Parses a datetime string into a [[virtual]] date, according to a layout 16 // format string with specifiers as documented under [[format]]. Partial, 17 // sequential, aggregative parsing is possible. 18 // 19 // date::parse(&v, "%Y-%m-%d", "2019-12-27"); 20 // date::parse(&v, "%H:%M:%S.%N", "22:07:08.000000000"); 21 // date::parse(&v, "%z %Z %L", "+0100 CET Europe/Amsterdam"); 22 // 23 // Parse will return [[parsefail]] if an invalid format specifier is encountered 24 // or if given string 's' does not match the layout. 25 export fn parse(v: *virtual, layout: str, s: str) (void | parsefail) = { 26 const liter = strings::iter(layout); 27 const siter = strings::iter(s); 28 let escaped = false; 29 30 for (let lr => strings::next(&liter)) { 31 if (!escaped && lr == '%') { 32 escaped = true; 33 continue; 34 }; 35 36 if (!escaped) { 37 const sr = match (strings::next(&siter)) { 38 case done => 39 return (liter.dec.offs, lr); 40 case let sr: rune => 41 yield sr; 42 }; 43 if (sr != lr) { 44 return (liter.dec.offs, lr); 45 }; 46 continue; 47 }; 48 49 escaped = false; 50 51 match (parse_specifier(v, &siter, lr)) { 52 case void => void; 53 case failure => 54 return (liter.dec.offs, lr); 55 }; 56 }; 57 58 return void; 59 }; 60 61 fn parse_specifier( 62 v: *virtual, 63 iter: *strings::iterator, 64 lr: rune, 65 ) (void | failure) = { 66 switch (lr) { 67 case 'a' => 68 v.weekday = scan_for(iter, WEEKDAYS_SHORT...)?; 69 case 'A' => 70 v.weekday = scan_for(iter, WEEKDAYS...)?; 71 case 'b' => 72 v.month = scan_for(iter, MONTHS_SHORT...)? + 1; 73 case 'B' => 74 v.month = scan_for(iter, MONTHS...)? + 1; 75 case 'C' => 76 v.century = scan_int(iter, 2)?; 77 case 'd', 'e' => 78 v.day = scan_int(iter, 2)?; 79 case 'F' => 80 v.year = scan_int(iter, 4)?; 81 eat_rune(iter, '-')?; 82 v.month = scan_int(iter, 2)?; 83 eat_rune(iter, '-')?; 84 v.day = scan_int(iter, 2)?; 85 case 'G' => 86 v.isoweekyear = scan_int(iter, 4)?; 87 case 'H' => 88 v.hour = scan_int(iter, 2)?; 89 case 'I' => 90 v.hour12 = scan_int(iter, 2)?; 91 case 'j' => 92 v.yearday = scan_int(iter, 3)?; 93 case 'L' => 94 v.locname = scan_str(iter)?; 95 case 'm' => 96 v.month = scan_int(iter, 2)?; 97 case 'M' => 98 v.minute = scan_int(iter, 2)?; 99 case 'N' => 100 let nsec = scan_decimal(iter, 9)?; 101 v.nanosecond = nsec: int; 102 v.vnsec = nsec; 103 case 'p' => // AM=false PM=true 104 v.ampm = scan_for(iter, "AM", "PM", "am", "pm")? % 2 == 1; 105 case 's' => 106 v.vsec = scan_num(iter, 20)?; 107 case 'S' => 108 v.second = scan_int(iter, 2)?; 109 case 'T' => 110 v.hour = scan_int(iter, 2)?; 111 eat_rune(iter, ':')?; 112 v.minute = scan_int(iter, 2)?; 113 eat_rune(iter, ':')?; 114 v.second = scan_int(iter, 2)?; 115 case 'u' => 116 v.weekday = scan_int(iter, 1)? - 1; 117 case 'U' => 118 v.week = scan_int(iter, 2)?; 119 case 'V' => 120 v.isoweek = scan_int(iter, 2)?; 121 case 'w' => 122 v.weekday = scan_int(iter, 1)? - 1; 123 case 'W' => 124 v.week = scan_int(iter, 2)?; 125 case 'y' => 126 v.year100 = scan_int(iter, 2)?; 127 case 'Y' => 128 v.year = scan_int(iter, 4)?; 129 case 'z' => 130 v.zoff = scan_zo(iter)?; 131 case 'Z' => 132 v.zabbr = scan_str(iter)?; 133 case '%' => 134 eat_rune(iter, '%')?; 135 case => 136 return failure; 137 }; 138 }; 139 140 fn eat_rune(iter: *strings::iterator, needle: rune) (uint | failure) = { 141 const rn = match (strings::next(iter)) { 142 case done => 143 return failure; 144 case let rn: rune => 145 yield rn; 146 }; 147 if (rn == needle) { 148 return 1; 149 } else { 150 strings::prev(iter); 151 return 0; 152 }; 153 }; 154 155 // Scans the iterator for a given list of strings. 156 // Returns the list index of the matched string. 157 fn scan_for(iter: *strings::iterator, list: str...) (int | failure) = { 158 const name = strings::iterstr(iter); 159 if (len(name) == 0) { 160 return failure; 161 }; 162 for(let i = 0z; i < len(list); i += 1) { 163 if (strings::hasprefix(name, list[i])) { 164 // Consume name 165 for (let j = 0z; j < len(list[i]); j += 1) { 166 strings::next(iter); 167 }; 168 return i: int; 169 }; 170 }; 171 return failure; 172 }; 173 174 // Scans the iterator for consecutive numeric digits. 175 // Left-padded whitespace and zeros are permitted. 176 // Returns the resulting int. 177 fn scan_int(iter: *strings::iterator, maxrunes: size) (int | failure) = { 178 let start = *iter; 179 let startfixed = false; 180 for (let i = 0z; i < maxrunes; i += 1) { 181 let rn: rune = match (strings::next(iter)) { 182 case done => 183 break; 184 case let rn: rune => 185 yield rn; 186 }; 187 if (!ascii::isdigit(rn) && rn != ' ') { 188 return failure; 189 }; 190 if (!startfixed) { 191 if (ascii::isdigit(rn)) { 192 startfixed = true; 193 } else { 194 strings::next(&start); 195 }; 196 }; 197 }; 198 match (strconv::stoi(strings::slice(&start, iter))) { 199 case let num: int => 200 return num; 201 case => 202 return failure; 203 }; 204 }; 205 206 // Scans the iterator for consecutive numeric digits. 207 // Left-padded whitespace and zeros are permitted. 208 // Returns the resulting i64. 209 fn scan_num(iter: *strings::iterator, maxrunes: size) (i64 | failure) = { 210 let start = *iter; 211 for (let i = 0z; i < maxrunes; i += 1) { 212 match (strings::next(iter)) { 213 case done => 214 return failure; 215 case let rn: rune => 216 if (!ascii::isdigit(rn)) { 217 strings::prev(iter); 218 break; 219 }; 220 }; 221 }; 222 223 match (strconv::stoi64(strings::slice(&start, iter))) { 224 case let num: i64 => 225 return num; 226 case => 227 return failure; 228 }; 229 }; 230 231 // Scans the iterator for consecutive numeric digits. 232 // Left-padded whitespace and zeros are NOT permitted. 233 // The resulting decimal is right-padded with zeros. 234 fn scan_decimal(iter: *strings::iterator, maxrunes: size) (i64 | failure) = { 235 let start = *iter; 236 for (let i = 0z; i < maxrunes; i += 1) { 237 let rn: rune = match (strings::next(iter)) { 238 case done => 239 break; 240 case let rn: rune => 241 yield rn; 242 }; 243 if (!ascii::isdigit(rn)) { 244 strings::prev(iter); 245 break; 246 }; 247 }; 248 const s = strings::slice(&start, iter); 249 match (strconv::stoi64(s)) { 250 case let num: i64 => 251 for (let i = 0z; i < maxrunes - len(s); i += 1) { 252 num *= 10; 253 }; 254 return num; 255 case => 256 return failure; 257 }; 258 }; 259 260 // Scans and parses zone offsets of the form: 261 // 262 // Z 263 // z 264 // +nn:nn 265 // +nnnn 266 // -nn:nn 267 // -nnnn 268 // 269 fn scan_zo(iter: *strings::iterator) (time::duration | failure) = { 270 const first = match (strings::next(iter)) { 271 case done => 272 return failure; 273 case let first: rune => 274 yield first; 275 }; 276 if (first == 'Z' || first == 'z') { 277 return 0; 278 }; 279 280 let zo = scan_int(iter, 2)? * time::HOUR; 281 282 match (strings::next(iter)) { 283 case done => 284 return failure; 285 case let sep: rune => 286 if (sep != ':') { 287 strings::prev(iter); 288 }; 289 }; 290 291 zo += scan_int(iter, 2)? * time::MINUTE; 292 293 if (first == '-') { 294 zo *= -1; 295 }; 296 297 return zo; 298 }; 299 300 // Scans and parses locality names, made of printable characters. 301 fn scan_str(iter: *strings::iterator) (str | failure) = { 302 let start = *iter; 303 for (let rn => strings::next(iter)) { 304 if (!ascii::isgraph(rn)) { 305 strings::prev(iter); 306 break; 307 }; 308 }; 309 return strings::slice(&start, iter); 310 }; 311 312 @test fn parse() void = { 313 let v = newvirtual(); 314 assert(parse(&v, "foo", "foo") is void, "none: parsefail"); 315 assert(v.zone == null, "none: non-null zone"); 316 assert(v.daydate is void, "none: non-void daydate"); 317 assert(v.daytime is void, "none: non-void daytime"); 318 assert(v.era is void, "none: non-void era"); 319 assert(v.year is void, "none: non-void year"); 320 assert(v.month is void, "none: non-void month"); 321 assert(v.day is void, "none: non-void day"); 322 assert(v.yearday is void, "none: non-void yearday"); 323 assert(v.isoweekyear is void, "none: non-void isoweekyear"); 324 assert(v.isoweek is void, "none: non-void isoweek"); 325 assert(v.week is void, "none: non-void week"); 326 assert(v.sundayweek is void, "none: non-void sundayweek"); 327 assert(v.weekday is void, "none: non-void weekday"); 328 assert(v.hour is void, "none: non-void hour"); 329 assert(v.minute is void, "none: non-void minute"); 330 assert(v.second is void, "none: non-void second"); 331 assert(v.nanosecond is void, "none: non-void nanosecond"); 332 assert(v.vloc is void, "none: non-void vloc"); 333 assert(v.locname is void, "none: non-void locname"); 334 assert(v.zoff is void, "none: non-void zoff"); 335 assert(v.zabbr is void, "none: non-void zabbr"); 336 assert(v.hour12 is void, "none: non-void hour12"); 337 assert(v.ampm is void, "none: non-void ampm"); 338 339 let v = newvirtual(); 340 assert(parse(&v, "%a", "Fri") is void , "%a: parsefail"); 341 assert(v.weekday is int , "%a: void"); 342 assert(v.weekday as int == 4 , "%a: incorrect"); 343 344 let v = newvirtual(); 345 assert(parse(&v, "%A", "Friday") is void , "%A: parsefail"); 346 assert(v.weekday is int , "%A: void"); 347 assert(v.weekday as int == 4 , "%A: incorrect"); 348 349 let v = newvirtual(); 350 assert(parse(&v, "%b", "Jan") is void , "%b: parsefail"); 351 assert(v.month is int , "%b: void"); 352 assert(v.month as int == 1 , "%b: incorrect"); 353 354 let v = newvirtual(); 355 assert(parse(&v, "%B", "January") is void , "%B: parsefail"); 356 assert(v.month is int , "%B: void"); 357 assert(v.month as int == 1 , "%B: incorrect"); 358 359 let v = newvirtual(); 360 assert(parse(&v, "%d", "27") is void , "%d: parsefail"); 361 assert(v.day is int , "%d: void"); 362 assert(v.day as int == 27 , "%d: incorrect"); 363 364 let v = newvirtual(); 365 assert(parse(&v, "%d", " 1") is void , "%d: parsefail"); 366 assert(v.day is int , "%d: void"); 367 assert(v.day as int == 1 , "%d: incorrect"); 368 369 let v = newvirtual(); 370 assert(parse(&v, "%d", "x1") is parsefail , "%d: not parsefail"); 371 372 let v = newvirtual(); 373 assert(parse(&v, "%e", " 7") is void , "%d: parsefail"); 374 assert(v.day is int , "%d: void"); 375 assert(v.day as int == 7 , "%d: incorrect"); 376 377 let v = newvirtual(); 378 assert(parse(&v, "%F", "2012-10-01") is void , "%d: parsefail"); 379 assert(v.year is int , "%d: void"); 380 assert(v.year as int == 2012 , "%d: incorrect"); 381 assert(v.month is int , "%d: void"); 382 assert(v.month as int == 10 , "%d: incorrect"); 383 assert(v.day is int , "%d: void"); 384 assert(v.day as int == 1 , "%d: incorrect"); 385 386 let v = newvirtual(); 387 assert(parse(&v, "%H", "22") is void , "%H: parsefail"); 388 assert(v.hour is int , "%H: void"); 389 assert(v.hour as int == 22 , "%H: incorrect"); 390 391 let v = newvirtual(); 392 assert(parse(&v, "%I", "10") is void , "%I: parsefail"); 393 assert(v.hour12 is int , "%I: void"); 394 assert(v.hour12 as int == 10 , "%I: incorrect"); 395 396 let v = newvirtual(); 397 assert(parse(&v, "%j", "361") is void , "%j: parsefail"); 398 assert(v.yearday is int , "%j: void"); 399 assert(v.yearday as int == 361 , "%j: incorrect"); 400 401 let v = newvirtual(); 402 assert(parse(&v, "%j", " 9") is void , "%j: parsefail"); 403 assert(v.yearday is int , "%j: void"); 404 assert(v.yearday as int == 9 , "%j: incorrect"); 405 406 let v = newvirtual(); 407 assert(parse(&v, "%L", "Europe/Amsterdam") is void , "%L: parsefail"); 408 assert(v.locname is str , "%L: void"); 409 assert(v.locname as str == "Europe/Amsterdam" , "%L: incorrect"); 410 411 let v = newvirtual(); 412 assert(parse(&v, "%m", "12") is void , "%m: parsefail"); 413 assert(v.month is int , "%m: void"); 414 assert(v.month as int == 12 , "%m: incorrect"); 415 416 let v = newvirtual(); 417 assert(parse(&v, "%M", "07") is void , "%M: parsefail"); 418 assert(v.minute is int , "%M: void"); 419 assert(v.minute as int == 7 , "%M: incorrect"); 420 421 let v = newvirtual(); 422 assert(parse(&v, "%N", "123456789") is void , "%N: parsefail"); 423 assert(v.nanosecond is int , "%N: void"); 424 assert(v.nanosecond as int == 123456789 , "%N: incorrect"); 425 426 let v = newvirtual(); 427 assert(parse(&v, "%N", "123") is void , "%N: parsefail"); 428 assert(v.nanosecond is int , "%N: void"); 429 assert(v.nanosecond as int == 123000000 , "%N: incorrect"); 430 431 let v = newvirtual(); 432 assert(parse(&v, "%p", "PM") is void , "%p: parsefail"); 433 assert(v.ampm is bool , "%p: void"); 434 assert(v.ampm as bool == true , "%p: incorrect"); 435 436 let v = newvirtual(); 437 assert(parse(&v, "%S", "08") is void , "%S: parsefail"); 438 assert(v.second is int , "%S: void"); 439 assert(v.second as int == 8 , "%S: incorrect"); 440 441 let v = newvirtual(); 442 assert(parse(&v, "%T", "18:42:05") is void , "%d: parsefail"); 443 assert(v.hour is int , "%d: void"); 444 assert(v.hour as int == 18 , "%d: incorrect"); 445 assert(v.minute is int , "%d: void"); 446 assert(v.minute as int == 42 , "%d: incorrect"); 447 assert(v.second is int , "%d: void"); 448 assert(v.second as int == 5 , "%d: incorrect"); 449 450 let v = newvirtual(); 451 assert(parse(&v, "%u", "5") is void , "%u: parsefail"); 452 assert(v.weekday is int , "%u: void"); 453 assert(v.weekday as int == 4 , "%u: incorrect"); 454 455 let v = newvirtual(); 456 assert(parse(&v, "%U", "51") is void , "%U: parsefail"); 457 assert(v.week is int , "%U: void"); 458 assert(v.week as int == 51 , "%U: incorrect"); 459 460 let v = newvirtual(); 461 assert(parse(&v, "%w", "5") is void , "%w: parsefail"); 462 assert(v.weekday is int , "%w: void"); 463 assert(v.weekday as int == 4 , "%w: incorrect"); 464 465 let v = newvirtual(); 466 assert(parse(&v, "%W", "51") is void , "%W: parsefail"); 467 assert(v.week is int , "%W: void"); 468 assert(v.week as int == 51 , "%W: incorrect"); 469 470 let v = newvirtual(); 471 assert(parse(&v, "%Y", "2019") is void , "%Y: parsefail"); 472 assert(v.year is int , "%Y: void"); 473 assert(v.year as int == 2019 , "%Y: incorrect"); 474 475 let v = newvirtual(); 476 assert(parse(&v, "%z", "+0100") is void , "%z: parsefail"); 477 assert(v.zoff is i64 , "%z: void"); 478 assert(v.zoff as i64 == 1 * time::HOUR , "%z: incorrect"); 479 let v = newvirtual(); 480 assert(parse(&v, "%z", "+01:00") is void , "%z: parsefail"); 481 assert(v.zoff is i64 , "%z: void"); 482 assert(v.zoff as i64 == 1 * time::HOUR , "%z: incorrect"); 483 484 let v = newvirtual(); 485 assert(parse(&v, "%Z", "CET") is void , "%Z: parsefail"); 486 assert(v.zabbr is str , "%Z: void"); 487 assert(v.zabbr as str == "CET" , "%Z: incorrect"); 488 489 let v = newvirtual(); 490 assert(( 491 parse(&v, 492 "%Y-%m-%d %H:%M:%S.%N %z %Z %L", 493 "2038-01-19 03:14:07.000000000 +0000 UTC UTC", 494 ) 495 is void 496 ), 497 "test 1: parsefail" 498 ); 499 assert(v.year is int , "test 1: year void"); 500 assert(v.year as int == 2038, "test 1: year incorrect"); 501 assert(v.month is int , "test 1: month void"); 502 assert(v.month as int == 1, "test 1: month incorrect"); 503 assert(v.day is int , "test 1: day void"); 504 assert(v.day as int == 19, "test 1: day incorrect"); 505 assert(v.hour is int , "test 1: hour void"); 506 assert(v.hour as int == 3, "test 1: hour incorrect"); 507 assert(v.minute is int , "test 1: minute void"); 508 assert(v.minute as int == 14, "test 1: minute incorrect"); 509 assert(v.second is int , "test 1: second void"); 510 assert(v.second as int == 7, "test 1: second incorrect"); 511 assert(v.nanosecond is int , "test 1: nanosecond void"); 512 assert(v.nanosecond as int == 0, "test 1: nanosecond incorrect"); 513 assert(v.zoff is i64 , "test 1: zoff void"); 514 assert(v.zoff as i64 == 0, "test 1: zoff incorrect"); 515 assert(v.zabbr is str , "test 1: zabbr void"); 516 assert(v.zabbr as str == "UTC", "test 1: zabbr incorrect"); 517 assert(v.locname is str , "test 1: locname void"); 518 assert(v.locname as str == "UTC", "test 1: locname incorrect"); 519 520 };