parse.ha (17266B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use ascii; 5 use io; 6 use strconv; 7 use strings; 8 use time; 9 use time::chrono; 10 11 type failure = !void; 12 13 // A parsing error occurred. This shall contain a byteindex of and rune from the 14 // layout at the position where the parsing failure occured. 15 export type parsefail = !(size, rune); 16 17 // Parses a datetime string into a [[virtual]] date, according to a layout 18 // format string with specifiers as documented under [[format]]. Partial, 19 // sequential, aggregative parsing is possible. 20 // 21 // date::parse(&v, "%Y-%m-%d", "2019-12-27"); 22 // date::parse(&v, "%H:%M:%S.%N", "22:07:08.000000000"); 23 // date::parse(&v, "%z %Z %L", "+0100 CET Europe/Amsterdam"); 24 // 25 // Parse will return [[parsefail]] if an invalid format specifier is encountered 26 // or if given string 's' does not match the layout. 27 export fn parse(v: *virtual, layout: str, s: str) (void | parsefail) = { 28 const liter = strings::iter(layout); 29 const siter = strings::iter(s); 30 let escaped = false; 31 32 for (let lr => strings::next(&liter)) { 33 if (!escaped && lr == '%') { 34 escaped = true; 35 continue; 36 }; 37 38 if (!escaped) { 39 const sr = match (strings::next(&siter)) { 40 case done => 41 return (liter.dec.offs, lr); 42 case let sr: rune => 43 yield sr; 44 }; 45 if (sr != lr) { 46 return (liter.dec.offs, lr); 47 }; 48 continue; 49 }; 50 51 escaped = false; 52 53 match (parse_specifier(v, &siter, lr)) { 54 case void => void; 55 case failure => 56 return (liter.dec.offs, lr); 57 }; 58 }; 59 60 return void; 61 }; 62 63 fn parse_specifier( 64 v: *virtual, 65 iter: *strings::iterator, 66 lr: rune, 67 ) (void | failure) = { 68 switch (lr) { 69 case 'a' => 70 v.weekday = scan_for(iter, WEEKDAYS_SHORT...)?; 71 case 'A' => 72 v.weekday = scan_for(iter, WEEKDAYS...)?; 73 case 'b' => 74 v.month = scan_for(iter, MONTHS_SHORT...)? + 1; 75 case 'B' => 76 v.month = scan_for(iter, MONTHS...)? + 1; 77 case 'C' => 78 v.century = scan_int(iter, 2)?; 79 case 'd', 'e' => 80 v.day = scan_int(iter, 2)?; 81 case 'F' => 82 v.year = scan_int(iter, 4)?; 83 eat_rune(iter, '-')?; 84 v.month = scan_int(iter, 2)?; 85 eat_rune(iter, '-')?; 86 v.day = scan_int(iter, 2)?; 87 case 'H' => 88 v.hour = scan_int(iter, 2)?; 89 case 'I' => 90 v.hour12 = scan_int(iter, 2)?; 91 case 'j' => 92 v.yearday = scan_int(iter, 3)?; 93 case 'L' => 94 v.locname = scan_str(iter)?; 95 case 'm' => 96 v.month = scan_int(iter, 2)?; 97 case 'M' => 98 v.minute = scan_int(iter, 2)?; 99 case 'N' => 100 let nsec = scan_decimal(iter, 9)?; 101 v.nanosecond = nsec: int; 102 v.vnsec = nsec; 103 case 'p' => // AM=false PM=true 104 v.ampm = scan_for(iter, "AM", "PM", "am", "pm")? % 2 == 1; 105 case 's' => 106 v.vsec = scan_num(iter, 20)?; 107 case 'S' => 108 v.second = scan_int(iter, 2)?; 109 case 'T' => 110 v.hour = scan_int(iter, 2)?; 111 eat_rune(iter, ':')?; 112 v.minute = scan_int(iter, 2)?; 113 eat_rune(iter, ':')?; 114 v.second = scan_int(iter, 2)?; 115 case 'u' => 116 v.weekday = scan_int(iter, 1)? - 1; 117 case 'U' => 118 v.week = scan_int(iter, 2)?; 119 case 'w' => 120 v.weekday = scan_int(iter, 1)? - 1; 121 case 'W' => 122 v.week = scan_int(iter, 2)?; 123 case 'y' => 124 v.year100 = scan_int(iter, 2)?; 125 case 'Y' => 126 v.year = scan_int(iter, 4)?; 127 case 'z' => 128 v.zoff = scan_zo(iter)?; 129 case 'Z' => 130 v.zabbr = scan_str(iter)?; 131 case '%' => 132 eat_rune(iter, '%')?; 133 case => 134 return failure; 135 }; 136 }; 137 138 fn eat_rune(iter: *strings::iterator, needle: rune) (uint | failure) = { 139 const rn = match (strings::next(iter)) { 140 case done => 141 return failure; 142 case let rn: rune => 143 yield rn; 144 }; 145 if (rn == needle) { 146 return 1; 147 } else { 148 strings::prev(iter); 149 return 0; 150 }; 151 }; 152 153 // Scans the iterator for a given list of strings. 154 // Returns the list index of the matched string. 155 fn scan_for(iter: *strings::iterator, list: str...) (int | failure) = { 156 const name = strings::iterstr(iter); 157 if (len(name) == 0) { 158 return failure; 159 }; 160 for(let i = 0z; i < len(list); i += 1) { 161 if (strings::hasprefix(name, list[i])) { 162 // Consume name 163 for (let j = 0z; j < len(list[i]); j += 1) { 164 strings::next(iter); 165 }; 166 return i: int; 167 }; 168 }; 169 return failure; 170 }; 171 172 // Scans the iterator for consecutive numeric digits. 173 // Left-padded whitespace and zeros are permitted. 174 // Returns the resulting int. 175 fn scan_int(iter: *strings::iterator, maxrunes: size) (int | failure) = { 176 let start = *iter; 177 let startfixed = false; 178 for (let i = 0z; i < maxrunes; i += 1) { 179 let rn: rune = match (strings::next(iter)) { 180 case done => 181 break; 182 case let rn: rune => 183 yield rn; 184 }; 185 if (!ascii::isdigit(rn) && rn != ' ') { 186 return failure; 187 }; 188 if (!startfixed) { 189 if (ascii::isdigit(rn)) { 190 startfixed = true; 191 } else { 192 strings::next(&start); 193 }; 194 }; 195 }; 196 match (strconv::stoi(strings::slice(&start, iter))) { 197 case let num: int => 198 return num; 199 case => 200 return failure; 201 }; 202 }; 203 204 // Scans the iterator for consecutive numeric digits. 205 // Left-padded whitespace and zeros are permitted. 206 // Returns the resulting i64. 207 fn scan_num(iter: *strings::iterator, maxrunes: size) (i64 | failure) = { 208 let start = *iter; 209 for (let i = 0z; i < maxrunes; i += 1) { 210 match (strings::next(iter)) { 211 case done => 212 return failure; 213 case let rn: rune => 214 if (!ascii::isdigit(rn)) { 215 strings::prev(iter); 216 break; 217 }; 218 }; 219 }; 220 221 match (strconv::stoi64(strings::slice(&start, iter))) { 222 case let num: i64 => 223 return num; 224 case => 225 return failure; 226 }; 227 }; 228 229 // Scans the iterator for consecutive numeric digits. 230 // Left-padded whitespace and zeros are NOT permitted. 231 // The resulting decimal is right-padded with zeros. 232 fn scan_decimal(iter: *strings::iterator, maxrunes: size) (i64 | failure) = { 233 let start = *iter; 234 for (let i = 0z; i < maxrunes; i += 1) { 235 let rn: rune = match (strings::next(iter)) { 236 case done => 237 break; 238 case let rn: rune => 239 yield rn; 240 }; 241 if (!ascii::isdigit(rn)) { 242 strings::prev(iter); 243 break; 244 }; 245 }; 246 const s = strings::slice(&start, iter); 247 match (strconv::stoi64(s)) { 248 case let num: i64 => 249 for (let i = 0z; i < maxrunes - len(s); i += 1) { 250 num *= 10; 251 }; 252 return num; 253 case => 254 return failure; 255 }; 256 }; 257 258 // Scans and parses zone offsets of the form: 259 // 260 // Z 261 // z 262 // +nn:nn 263 // +nnnn 264 // -nn:nn 265 // -nnnn 266 // 267 fn scan_zo(iter: *strings::iterator) (time::duration | failure) = { 268 const first = match (strings::next(iter)) { 269 case done => 270 return failure; 271 case let first: rune => 272 yield first; 273 }; 274 if (first == 'Z' || first == 'z') { 275 return 0; 276 }; 277 278 let zo = scan_int(iter, 2)? * time::HOUR; 279 280 match (strings::next(iter)) { 281 case done => 282 return failure; 283 case let sep: rune => 284 if (sep != ':') { 285 strings::prev(iter); 286 }; 287 }; 288 289 zo += scan_int(iter, 2)? * time::MINUTE; 290 291 if (first == '-') { 292 zo *= -1; 293 }; 294 295 return zo; 296 }; 297 298 // Scans and parses locality names, made of printable characters. 299 fn scan_str(iter: *strings::iterator) (str | failure) = { 300 let start = *iter; 301 for (let rn => strings::next(iter)) { 302 if (!ascii::isgraph(rn)) { 303 strings::prev(iter); 304 break; 305 }; 306 }; 307 return strings::slice(&start, iter); 308 }; 309 310 @test fn parse() void = { 311 let v = newvirtual(); 312 assert(parse(&v, "foo", "foo") is void, "none: parsefail"); 313 assert(v.zone == null, "none: non-null zone"); 314 assert(v.daydate is void, "none: non-void daydate"); 315 assert(v.daytime is void, "none: non-void daytime"); 316 assert(v.era is void, "none: non-void era"); 317 assert(v.year is void, "none: non-void year"); 318 assert(v.month is void, "none: non-void month"); 319 assert(v.day is void, "none: non-void day"); 320 assert(v.yearday is void, "none: non-void yearday"); 321 assert(v.isoweekyear is void, "none: non-void isoweekyear"); 322 assert(v.isoweek is void, "none: non-void isoweek"); 323 assert(v.week is void, "none: non-void week"); 324 assert(v.sundayweek is void, "none: non-void sundayweek"); 325 assert(v.weekday is void, "none: non-void weekday"); 326 assert(v.hour is void, "none: non-void hour"); 327 assert(v.minute is void, "none: non-void minute"); 328 assert(v.second is void, "none: non-void second"); 329 assert(v.nanosecond is void, "none: non-void nanosecond"); 330 assert(v.vloc is void, "none: non-void vloc"); 331 assert(v.locname is void, "none: non-void locname"); 332 assert(v.zoff is void, "none: non-void zoff"); 333 assert(v.zabbr is void, "none: non-void zabbr"); 334 assert(v.hour12 is void, "none: non-void hour12"); 335 assert(v.ampm is void, "none: non-void ampm"); 336 337 let v = newvirtual(); 338 assert(parse(&v, "%a", "Fri") is void , "%a: parsefail"); 339 assert(v.weekday is int , "%a: void"); 340 assert(v.weekday as int == 4 , "%a: incorrect"); 341 342 let v = newvirtual(); 343 assert(parse(&v, "%A", "Friday") is void , "%A: parsefail"); 344 assert(v.weekday is int , "%A: void"); 345 assert(v.weekday as int == 4 , "%A: incorrect"); 346 347 let v = newvirtual(); 348 assert(parse(&v, "%b", "Jan") is void , "%b: parsefail"); 349 assert(v.month is int , "%b: void"); 350 assert(v.month as int == 1 , "%b: incorrect"); 351 352 let v = newvirtual(); 353 assert(parse(&v, "%B", "January") is void , "%B: parsefail"); 354 assert(v.month is int , "%B: void"); 355 assert(v.month as int == 1 , "%B: incorrect"); 356 357 let v = newvirtual(); 358 assert(parse(&v, "%d", "27") is void , "%d: parsefail"); 359 assert(v.day is int , "%d: void"); 360 assert(v.day as int == 27 , "%d: incorrect"); 361 362 let v = newvirtual(); 363 assert(parse(&v, "%d", " 1") is void , "%d: parsefail"); 364 assert(v.day is int , "%d: void"); 365 assert(v.day as int == 1 , "%d: incorrect"); 366 367 let v = newvirtual(); 368 assert(parse(&v, "%d", "x1") is parsefail , "%d: not parsefail"); 369 370 let v = newvirtual(); 371 assert(parse(&v, "%e", " 7") is void , "%d: parsefail"); 372 assert(v.day is int , "%d: void"); 373 assert(v.day as int == 7 , "%d: incorrect"); 374 375 let v = newvirtual(); 376 assert(parse(&v, "%F", "2012-10-01") is void , "%d: parsefail"); 377 assert(v.year is int , "%d: void"); 378 assert(v.year as int == 2012 , "%d: incorrect"); 379 assert(v.month is int , "%d: void"); 380 assert(v.month as int == 10 , "%d: incorrect"); 381 assert(v.day is int , "%d: void"); 382 assert(v.day as int == 1 , "%d: incorrect"); 383 384 let v = newvirtual(); 385 assert(parse(&v, "%H", "22") is void , "%H: parsefail"); 386 assert(v.hour is int , "%H: void"); 387 assert(v.hour as int == 22 , "%H: incorrect"); 388 389 let v = newvirtual(); 390 assert(parse(&v, "%I", "10") is void , "%I: parsefail"); 391 assert(v.hour12 is int , "%I: void"); 392 assert(v.hour12 as int == 10 , "%I: incorrect"); 393 394 let v = newvirtual(); 395 assert(parse(&v, "%j", "361") is void , "%j: parsefail"); 396 assert(v.yearday is int , "%j: void"); 397 assert(v.yearday as int == 361 , "%j: incorrect"); 398 399 let v = newvirtual(); 400 assert(parse(&v, "%j", " 9") is void , "%j: parsefail"); 401 assert(v.yearday is int , "%j: void"); 402 assert(v.yearday as int == 9 , "%j: incorrect"); 403 404 let v = newvirtual(); 405 assert(parse(&v, "%L", "Europe/Amsterdam") is void , "%L: parsefail"); 406 assert(v.locname is str , "%L: void"); 407 assert(v.locname as str == "Europe/Amsterdam" , "%L: incorrect"); 408 409 let v = newvirtual(); 410 assert(parse(&v, "%m", "12") is void , "%m: parsefail"); 411 assert(v.month is int , "%m: void"); 412 assert(v.month as int == 12 , "%m: incorrect"); 413 414 let v = newvirtual(); 415 assert(parse(&v, "%M", "07") is void , "%M: parsefail"); 416 assert(v.minute is int , "%M: void"); 417 assert(v.minute as int == 7 , "%M: incorrect"); 418 419 let v = newvirtual(); 420 assert(parse(&v, "%N", "123456789") is void , "%N: parsefail"); 421 assert(v.nanosecond is int , "%N: void"); 422 assert(v.nanosecond as int == 123456789 , "%N: incorrect"); 423 424 let v = newvirtual(); 425 assert(parse(&v, "%N", "123") is void , "%N: parsefail"); 426 assert(v.nanosecond is int , "%N: void"); 427 assert(v.nanosecond as int == 123000000 , "%N: incorrect"); 428 429 let v = newvirtual(); 430 assert(parse(&v, "%p", "PM") is void , "%p: parsefail"); 431 assert(v.ampm is bool , "%p: void"); 432 assert(v.ampm as bool == true , "%p: incorrect"); 433 434 let v = newvirtual(); 435 assert(parse(&v, "%S", "08") is void , "%S: parsefail"); 436 assert(v.second is int , "%S: void"); 437 assert(v.second as int == 8 , "%S: incorrect"); 438 439 let v = newvirtual(); 440 assert(parse(&v, "%T", "18:42:05") is void , "%d: parsefail"); 441 assert(v.hour is int , "%d: void"); 442 assert(v.hour as int == 18 , "%d: incorrect"); 443 assert(v.minute is int , "%d: void"); 444 assert(v.minute as int == 42 , "%d: incorrect"); 445 assert(v.second is int , "%d: void"); 446 assert(v.second as int == 5 , "%d: incorrect"); 447 448 let v = newvirtual(); 449 assert(parse(&v, "%u", "5") is void , "%u: parsefail"); 450 assert(v.weekday is int , "%u: void"); 451 assert(v.weekday as int == 4 , "%u: incorrect"); 452 453 let v = newvirtual(); 454 assert(parse(&v, "%U", "51") is void , "%U: parsefail"); 455 assert(v.week is int , "%U: void"); 456 assert(v.week as int == 51 , "%U: incorrect"); 457 458 let v = newvirtual(); 459 assert(parse(&v, "%w", "5") is void , "%w: parsefail"); 460 assert(v.weekday is int , "%w: void"); 461 assert(v.weekday as int == 4 , "%w: incorrect"); 462 463 let v = newvirtual(); 464 assert(parse(&v, "%W", "51") is void , "%W: parsefail"); 465 assert(v.week is int , "%W: void"); 466 assert(v.week as int == 51 , "%W: incorrect"); 467 468 let v = newvirtual(); 469 assert(parse(&v, "%Y", "2019") is void , "%Y: parsefail"); 470 assert(v.year is int , "%Y: void"); 471 assert(v.year as int == 2019 , "%Y: incorrect"); 472 473 let v = newvirtual(); 474 assert(parse(&v, "%z", "+0100") is void , "%z: parsefail"); 475 assert(v.zoff is i64 , "%z: void"); 476 assert(v.zoff as i64 == 1 * time::HOUR , "%z: incorrect"); 477 let v = newvirtual(); 478 assert(parse(&v, "%z", "+01:00") is void , "%z: parsefail"); 479 assert(v.zoff is i64 , "%z: void"); 480 assert(v.zoff as i64 == 1 * time::HOUR , "%z: incorrect"); 481 482 let v = newvirtual(); 483 assert(parse(&v, "%Z", "CET") is void , "%Z: parsefail"); 484 assert(v.zabbr is str , "%Z: void"); 485 assert(v.zabbr as str == "CET" , "%Z: incorrect"); 486 487 let v = newvirtual(); 488 assert(( 489 parse(&v, 490 "%Y-%m-%d %H:%M:%S.%N %z %Z %L", 491 "2038-01-19 03:14:07.000000000 +0000 UTC UTC", 492 ) 493 is void 494 ), 495 "test 1: parsefail" 496 ); 497 assert(v.year is int , "test 1: year void"); 498 assert(v.year as int == 2038, "test 1: year incorrect"); 499 assert(v.month is int , "test 1: month void"); 500 assert(v.month as int == 1, "test 1: month incorrect"); 501 assert(v.day is int , "test 1: day void"); 502 assert(v.day as int == 19, "test 1: day incorrect"); 503 assert(v.hour is int , "test 1: hour void"); 504 assert(v.hour as int == 3, "test 1: hour incorrect"); 505 assert(v.minute is int , "test 1: minute void"); 506 assert(v.minute as int == 14, "test 1: minute incorrect"); 507 assert(v.second is int , "test 1: second void"); 508 assert(v.second as int == 7, "test 1: second incorrect"); 509 assert(v.nanosecond is int , "test 1: nanosecond void"); 510 assert(v.nanosecond as int == 0, "test 1: nanosecond incorrect"); 511 assert(v.zoff is i64 , "test 1: zoff void"); 512 assert(v.zoff as i64 == 0, "test 1: zoff incorrect"); 513 assert(v.zabbr is str , "test 1: zabbr void"); 514 assert(v.zabbr as str == "UTC", "test 1: zabbr incorrect"); 515 assert(v.locname is str , "test 1: locname void"); 516 assert(v.locname as str == "UTC", "test 1: locname incorrect"); 517 518 };