parse.ha (14995B)
1 // License: MPL-2.0 2 // (c) 2021-2022 Byron Torres <b@torresjrjr.com> 3 // (c) 2022 Drew DeVault <sir@cmpwn.com> 4 // (c) 2021-2022 Vlad-Stefan Harbuz <vlad@vladh.net> 5 use ascii; 6 use errors; 7 use io; 8 use strconv; 9 use strings; 10 use strio; 11 use time; 12 use time::chrono; 13 14 type failure = !void; 15 16 // A parsing error occurred. If appropriate, the offending format specifier is 17 // stored. A null rune represents all other error cases. 18 export type parsefail = !rune; 19 20 // Parses a date/time string into a [[virtual]], according to a layout format 21 // string with specifiers as documented under [[format]]. Partial, sequential, 22 // aggregative parsing is possible. 23 // 24 // datetime::parse(&v, "%Y-%m-%d", "2019-12-27"); 25 // datetime::parse(&v, "%H:%M:%S.%N", "22:07:08.000000000"); 26 // datetime::parse(&v, "%z %Z %L", "+0100 CET Europe/Amsterdam"); 27 // 28 // Parse will return parsefail, if an invalid format specifier is encountered 29 // or if given string 's' does not match the layout. 30 export fn parse(v: *virtual, layout: str, s: str) (void | parsefail) = { 31 const liter = strings::iter(layout); 32 const siter = strings::iter(s); 33 let escaped = false; 34 35 for (true) { 36 const lr: rune = match (strings::next(&liter)) { 37 case void => 38 break; 39 case let lr: rune => 40 yield lr; 41 }; 42 43 if (!escaped && lr == '%') { 44 escaped = true; 45 continue; 46 }; 47 48 if (!escaped) { 49 const sr = match (strings::next(&siter)) { 50 case void => 51 return '\x00'; 52 case let sr: rune => 53 yield sr; 54 }; 55 if (sr != lr) { 56 return '\x00'; 57 }; 58 continue; 59 }; 60 61 escaped = false; 62 63 match (parse_specifier(v, &siter, lr)) { 64 case void => void; 65 case failure => 66 return lr; 67 }; 68 }; 69 70 return void; 71 }; 72 73 fn parse_specifier( 74 v: *virtual, 75 iter: *strings::iterator, 76 lr: rune, 77 ) (void | failure) = { 78 switch (lr) { 79 case 'a' => v.weekday = 80 scan_for(iter, WEEKDAYS_SHORT...)?; 81 case 'A' => v.weekday = 82 scan_for(iter, WEEKDAYS...)?; 83 case 'b' => v.month = 84 scan_for(iter, MONTHS_SHORT...)? + 1; 85 case 'B' => v.month = 86 scan_for(iter, MONTHS...)? + 1; 87 case 'd' => v.day = 88 scan_int(iter, 2, false)?; 89 case 'F' => 90 v.year = scan_int(iter, 4, false)?; 91 eat_rune(iter, '-')?; 92 v.month = scan_int(iter, 2, false)?; 93 eat_rune(iter, '-')?; 94 v.day = scan_int(iter, 2, false)?; 95 case 'H' => v.hour = 96 scan_int(iter, 2, false)?; 97 case 'I' => v.halfhour = 98 scan_int(iter, 2, false)?; 99 case 'j' => v.yearday = 100 scan_int(iter, 3, false)?; 101 case 'L' => v.locname = 102 scan_str(iter)?; 103 case 'm' => v.month = 104 scan_int(iter, 2, false)?; 105 case 'M' => v.minute = 106 scan_int(iter, 2, false)?; 107 case 'N' => v.nanosecond = 108 scan_int(iter, 9, true)?; 109 case 'p' => v.ampm = // AM=false PM=true 110 scan_for(iter, "AM", "PM", "am", "pm")? % 2 == 1; 111 case 'S' => v.second = 112 scan_int(iter, 2, false)?; 113 case 'T' => 114 v.hour = scan_int(iter, 2, false)?; 115 eat_rune(iter, ':')?; 116 v.minute = scan_int(iter, 2, false)?; 117 eat_rune(iter, ':')?; 118 v.second = scan_int(iter, 2, false)?; 119 case 'u' => v.weekday = 120 scan_int(iter, 1, false)? - 1; 121 case 'U' => v.week = 122 scan_int(iter, 2, false)?; 123 case 'w' => v.weekday = 124 scan_int(iter, 1, false)? - 1; 125 case 'W' => v.week = 126 scan_int(iter, 2, false)?; 127 case 'Y' => v.year = 128 scan_int(iter, 4, false)?; 129 case 'z' => v.zoff = 130 scan_zo(iter)?; 131 case 'Z' => v.zabbr = 132 scan_str(iter)?; 133 case '%' => 134 eat_rune(iter, '%')?; 135 case => 136 return failure; 137 }; 138 }; 139 140 fn eat_rune(iter: *strings::iterator, needle: rune) (uint | failure) = { 141 const rn = match (strings::next(iter)) { 142 case void => 143 return failure; 144 case let rn: rune => 145 yield rn; 146 }; 147 if (rn == needle) { 148 return 1; 149 } else { 150 strings::prev(iter); 151 return 0; 152 }; 153 }; 154 155 // Scans the iterator for a given list of strings. 156 // Returns the list index of the matched string. 157 fn scan_for(iter: *strings::iterator, list: str...) (int | failure) = { 158 const name = strings::iterstr(iter); 159 if (len(name) == 0) { 160 return failure; 161 }; 162 for(let i = 0z; i < len(list); i += 1) { 163 if (strings::hasprefix(name, list[i])) { 164 // Consume name 165 for (let j = 0z; j < len(list[i]); j += 1) { 166 strings::next(iter); 167 }; 168 return i: int; 169 }; 170 }; 171 return failure; 172 }; 173 174 // Scans the iterator upto n consecutive numeric digits. 175 // Returns the resulting int. 176 // If pad is true, the number is right-padded with zeroes upto n digits. 177 fn scan_int(iter: *strings::iterator, n: size, pad: bool) (int | failure) = { 178 let copy = *iter; 179 for (let i = 0z; i < n; i += 1) { 180 let rn: rune = match (strings::next(iter)) { 181 case void => 182 break; 183 case let rn: rune => 184 yield rn; 185 }; 186 if (!ascii::isdigit(rn)) { 187 strings::prev(iter); 188 break; 189 }; 190 }; 191 const s = strings::slice(©, iter); 192 match (strconv::stoi(s)) { 193 case let num: int => 194 for (let i = 0z; i < n - len(s); i += 1) { 195 num *= 10; 196 }; 197 return num; 198 case => 199 return failure; 200 }; 201 }; 202 203 // Scans and parses zone offsets of the form: 204 // 205 // Z 206 // z 207 // +nn:nn 208 // +nnnn 209 // -nn:nn 210 // -nnnn 211 // 212 fn scan_zo(iter: *strings::iterator) (time::duration | failure) = { 213 const r = match (strings::next(iter)) { 214 case void => 215 return failure; 216 case let r: rune => 217 yield r; 218 }; 219 if (r == 'Z' || r == 'z') { 220 return 0; 221 }; 222 let zo = scan_int(iter, 2, false)? * time::HOUR; 223 match (strings::next(iter)) { 224 case void => void; 225 case let r: rune => 226 if (r == ':') { 227 strings::next(iter); 228 }; 229 }; 230 zo += scan_int(iter, 2, false)? * time::MINUTE; 231 if (r == '-') { 232 zo *= -1; 233 }; 234 return zo; 235 }; 236 237 // Scans and parses locality names, made of printable characters. 238 fn scan_str(iter: *strings::iterator) (str | failure) = { 239 let copy = *iter; 240 for (true) { 241 match (strings::next(iter)) { 242 case void => 243 break; 244 case let rn: rune => 245 if (!ascii::isgraph(rn)) { 246 strings::prev(iter); 247 break; 248 }; 249 }; 250 }; 251 return strings::slice(©, iter); 252 }; 253 254 @test fn parse() void = { 255 let v = newvirtual(); 256 assert(parse(&v, "foo", "foo") is void, "none: parsefail"); 257 assert(v.zone == null, "none: non-null zone"); 258 assert(v.date is void, "none: non-void date"); 259 assert(v.time is void, "none: non-void time"); 260 assert(v.era is void, "none: non-void era"); 261 assert(v.year is void, "none: non-void year"); 262 assert(v.month is void, "none: non-void month"); 263 assert(v.day is void, "none: non-void day"); 264 assert(v.yearday is void, "none: non-void yearday"); 265 assert(v.isoweekyear is void, "none: non-void isoweekyear"); 266 assert(v.isoweek is void, "none: non-void isoweek"); 267 assert(v.week is void, "none: non-void week"); 268 assert(v.sundayweek is void, "none: non-void sundayweek"); 269 assert(v.weekday is void, "none: non-void weekday"); 270 assert(v.hour is void, "none: non-void hour"); 271 assert(v.minute is void, "none: non-void minute"); 272 assert(v.second is void, "none: non-void second"); 273 assert(v.nanosecond is void, "none: non-void nanosecond"); 274 assert(v.vloc is void, "none: non-void vloc"); 275 assert(v.locname is void, "none: non-void locname"); 276 assert(v.zoff is void, "none: non-void zoff"); 277 assert(v.zabbr is void, "none: non-void zabbr"); 278 assert(v.halfhour is void, "none: non-void halfhour"); 279 assert(v.ampm is void, "none: non-void ampm"); 280 281 let v = newvirtual(); 282 assert(parse(&v, "%a", "Fri") is void , "%a: parsefail"); 283 assert(v.weekday is int , "%a: void"); 284 assert(v.weekday as int == 4 , "%a: incorrect"); 285 286 let v = newvirtual(); 287 assert(parse(&v, "%A", "Friday") is void , "%A: parsefail"); 288 assert(v.weekday is int , "%A: void"); 289 assert(v.weekday as int == 4 , "%A: incorrect"); 290 291 let v = newvirtual(); 292 assert(parse(&v, "%b", "Jan") is void , "%b: parsefail"); 293 assert(v.month is int , "%b: void"); 294 assert(v.month as int == 1 , "%b: incorrect"); 295 296 let v = newvirtual(); 297 assert(parse(&v, "%B", "January") is void , "%B: parsefail"); 298 assert(v.month is int , "%B: void"); 299 assert(v.month as int == 1 , "%B: incorrect"); 300 301 let v = newvirtual(); 302 assert(parse(&v, "%d", "27") is void , "%d: parsefail"); 303 assert(v.day is int , "%d: void"); 304 assert(v.day as int == 27 , "%d: incorrect"); 305 306 let v = newvirtual(); 307 assert(parse(&v, "%F", "2012-10-01") is void , "%d: parsefail"); 308 assert(v.year is int , "%d: void"); 309 assert(v.year as int == 2012 , "%d: incorrect"); 310 assert(v.month is int , "%d: void"); 311 assert(v.month as int == 10 , "%d: incorrect"); 312 assert(v.day is int , "%d: void"); 313 assert(v.day as int == 1 , "%d: incorrect"); 314 315 let v = newvirtual(); 316 assert(parse(&v, "%H", "22") is void , "%H: parsefail"); 317 assert(v.hour is int , "%H: void"); 318 assert(v.hour as int == 22 , "%H: incorrect"); 319 320 let v = newvirtual(); 321 assert(parse(&v, "%I", "10") is void , "%I: parsefail"); 322 assert(v.halfhour is int , "%I: void"); 323 assert(v.halfhour as int == 10 , "%I: incorrect"); 324 325 let v = newvirtual(); 326 assert(parse(&v, "%j", "361") is void , "%j: parsefail"); 327 assert(v.yearday is int , "%j: void"); 328 assert(v.yearday as int == 361 , "%j: incorrect"); 329 330 let v = newvirtual(); 331 assert(parse(&v, "%L", "Europe/Amsterdam") is void , "%L: parsefail"); 332 assert(v.locname is str , "%L: void"); 333 assert(v.locname as str == "Europe/Amsterdam" , "%L: incorrect"); 334 335 let v = newvirtual(); 336 assert(parse(&v, "%m", "12") is void , "%m: parsefail"); 337 assert(v.month is int , "%m: void"); 338 assert(v.month as int == 12 , "%m: incorrect"); 339 340 let v = newvirtual(); 341 assert(parse(&v, "%M", "07") is void , "%M: parsefail"); 342 assert(v.minute is int , "%M: void"); 343 assert(v.minute as int == 7 , "%M: incorrect"); 344 345 let v = newvirtual(); 346 assert(parse(&v, "%N", "123456789") is void , "%N: parsefail"); 347 assert(v.nanosecond is int , "%N: void"); 348 assert(v.nanosecond as int == 123456789 , "%N: incorrect"); 349 350 let v = newvirtual(); 351 assert(parse(&v, "%p", "PM") is void , "%p: parsefail"); 352 assert(v.ampm is bool , "%p: void"); 353 assert(v.ampm as bool == true , "%p: incorrect"); 354 355 let v = newvirtual(); 356 assert(parse(&v, "%S", "08") is void , "%S: parsefail"); 357 assert(v.second is int , "%S: void"); 358 assert(v.second as int == 8 , "%S: incorrect"); 359 360 let v = newvirtual(); 361 assert(parse(&v, "%T", "18:42:05") is void , "%d: parsefail"); 362 assert(v.hour is int , "%d: void"); 363 assert(v.hour as int == 18 , "%d: incorrect"); 364 assert(v.minute is int , "%d: void"); 365 assert(v.minute as int == 42 , "%d: incorrect"); 366 assert(v.second is int , "%d: void"); 367 assert(v.second as int == 5 , "%d: incorrect"); 368 369 let v = newvirtual(); 370 assert(parse(&v, "%u", "5") is void , "%u: parsefail"); 371 assert(v.weekday is int , "%u: void"); 372 assert(v.weekday as int == 4 , "%u: incorrect"); 373 374 let v = newvirtual(); 375 assert(parse(&v, "%U", "51") is void , "%U: parsefail"); 376 assert(v.week is int , "%U: void"); 377 assert(v.week as int == 51 , "%U: incorrect"); 378 379 let v = newvirtual(); 380 assert(parse(&v, "%w", "5") is void , "%w: parsefail"); 381 assert(v.weekday is int , "%w: void"); 382 assert(v.weekday as int == 4 , "%w: incorrect"); 383 384 let v = newvirtual(); 385 assert(parse(&v, "%W", "51") is void , "%W: parsefail"); 386 assert(v.week is int , "%W: void"); 387 assert(v.week as int == 51 , "%W: incorrect"); 388 389 let v = newvirtual(); 390 assert(parse(&v, "%Y", "2019") is void , "%Y: parsefail"); 391 assert(v.year is int , "%Y: void"); 392 assert(v.year as int == 2019 , "%Y: incorrect"); 393 394 let v = newvirtual(); 395 assert(parse(&v, "%z", "+0100") is void , "%z: parsefail"); 396 assert(v.zoff is i64 , "%z: void"); 397 assert(v.zoff as i64 == 1 * time::HOUR , "%z: incorrect"); 398 let v = newvirtual(); 399 assert(parse(&v, "%z", "+01:00") is void , "%z: parsefail"); 400 assert(v.zoff is i64 , "%z: void"); 401 assert(v.zoff as i64 == 1 * time::HOUR , "%z: incorrect"); 402 403 let v = newvirtual(); 404 assert(parse(&v, "%Z", "CET") is void , "%Z: parsefail"); 405 assert(v.zabbr is str , "%Z: void"); 406 assert(v.zabbr as str == "CET" , "%Z: incorrect"); 407 408 let v = newvirtual(); 409 assert(( 410 parse(&v, 411 "%Y-%m-%d %H:%M:%S.%N %z %Z %L", 412 "2038-01-19 03:14:07.000000000 +0000 UTC UTC", 413 ) 414 is void 415 ), 416 "test 1: parsefail" 417 ); 418 assert(v.year is int , "test 1: year void"); 419 assert(v.year as int == 2038, "test 1: year incorrect"); 420 assert(v.month is int , "test 1: month void"); 421 assert(v.month as int == 1, "test 1: month incorrect"); 422 assert(v.day is int , "test 1: day void"); 423 assert(v.day as int == 19, "test 1: day incorrect"); 424 assert(v.hour is int , "test 1: hour void"); 425 assert(v.hour as int == 3, "test 1: hour incorrect"); 426 assert(v.minute is int , "test 1: minute void"); 427 assert(v.minute as int == 14, "test 1: minute incorrect"); 428 assert(v.second is int , "test 1: second void"); 429 assert(v.second as int == 7, "test 1: second incorrect"); 430 assert(v.nanosecond is int , "test 1: nanosecond void"); 431 assert(v.nanosecond as int == 0, "test 1: nanosecond incorrect"); 432 assert(v.zoff is i64 , "test 1: zoff void"); 433 assert(v.zoff as i64 == 0, "test 1: zoff incorrect"); 434 assert(v.zabbr is str , "test 1: zabbr void"); 435 assert(v.zabbr as str == "UTC", "test 1: zabbr incorrect"); 436 assert(v.locname is str , "test 1: locname void"); 437 assert(v.locname as str == "UTC", "test 1: locname incorrect"); 438 439 };