+test.ha (28015B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use fmt; 5 use strings; 6 use types; 7 8 type matchres = enum { MATCH, NOMATCH, ERROR }; 9 10 fn run_find_case( 11 expr: str, 12 string: str, 13 expected: matchres, 14 start: int, 15 end: int 16 ) void = { 17 const re = match (compile(expr)) { 18 case let re: regex => yield re; 19 case let e: error => 20 if (expected == matchres::MATCH) { 21 fmt::errorln(e)!; 22 fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it errored", 23 expr, string)!; 24 abort(); 25 }; 26 if (expected == matchres::NOMATCH) { 27 fmt::errorln(e)!; 28 fmt::errorfln("Expected expression /{}/ to not match string \"{}\", but it errored", 29 expr, string)!; 30 abort(); 31 }; 32 return; 33 }; 34 35 if (expected == matchres::ERROR) { 36 fmt::errorfln("Expected expression /{}/ to have error caught during compilation, but it did not", 37 expr)!; 38 abort(); 39 }; 40 defer finish(&re); 41 42 const result = find(&re, string); 43 defer result_free(result); 44 if (len(result) == 0) { 45 if (expected == matchres::MATCH) { 46 fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it did not", 47 expr, string)!; 48 abort(); 49 }; 50 return; 51 } else if (expected == matchres::NOMATCH) { 52 fmt::errorfln("Expected expression /{}/ to not match string \"{}\", but it did", 53 expr, string)!; 54 abort(); 55 }; 56 57 if (start: size != result[0].start) { 58 fmt::errorfln("Expected start of main capture to be {} but it was {}", 59 start, result[0].start)!; 60 abort(); 61 }; 62 if (end: size != result[0].end) { 63 fmt::errorfln("Expected end of main capture to be {} but it was {}", 64 end, result[0].end)!; 65 abort(); 66 }; 67 }; 68 69 fn run_submatch_case( 70 expr: str, 71 string: str, 72 expected: matchres, 73 targets: []str 74 ) void = { 75 const re = compile(expr)!; 76 defer finish(&re); 77 78 const result = find(&re, string); 79 defer result_free(result); 80 assert(len(result) == len(targets), "Invalid number of captures"); 81 for (let i = 0z; i < len(targets); i += 1) { 82 assert(targets[i] == result[i].content, "Invalid capture"); 83 }; 84 }; 85 86 fn run_findall_case( 87 expr: str, 88 string: str, 89 expected: matchres, 90 targets: []str 91 ) void = { 92 const re = match (compile(expr)) { 93 case let re: regex => yield re; 94 case let e: error => 95 if (expected != matchres::ERROR) { 96 fmt::errorln(e)!; 97 fmt::errorfln("Expected expression /{}/ to compile, but it errored", 98 expr)!; 99 abort(); 100 }; 101 return; 102 }; 103 defer finish(&re); 104 105 if (expected == matchres::ERROR) { 106 fmt::errorfln("Expected expression /{}/ to have error caught during compilation, but it did not", 107 expr)!; 108 abort(); 109 }; 110 111 const results = findall(&re, string); 112 if (len(results) == 0 && expected == matchres::MATCH) { 113 fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it did not", 114 expr, string)!; 115 abort(); 116 }; 117 defer result_freeall(results); 118 119 if (expected == matchres::NOMATCH) { 120 fmt::errorfln("Expected expression /{}/ to not match string \"{}\", but it did", 121 expr, string)!; 122 abort(); 123 }; 124 if (len(targets) != len(results)) { 125 fmt::errorfln("Expected expression /{}/ to find {} results but found {}", 126 expr, len(targets), len(results))!; 127 abort(); 128 }; 129 for (let i = 0z; i < len(results); i += 1) { 130 if (results[i][0].content != targets[i]) { 131 fmt::errorfln("Expected submatch of expression /{}/ to be {} but it was {}", 132 expr, targets[i], results[i][0].content)!; 133 abort(); 134 }; 135 }; 136 }; 137 138 fn run_replace_case( 139 expr: str, 140 string: str, 141 target: str, 142 n: size, 143 expected: (str | void), 144 ) void = { 145 const re = match (compile(expr)) { 146 case let re: regex => yield re; 147 case let e: error => 148 fmt::errorln(e)!; 149 fmt::errorfln("Expected expression /{}/ to compile, but it errored", 150 expr)!; 151 abort(); 152 }; 153 defer finish(&re); 154 155 match (replacen(&re, string, target, n)) { 156 case let e: error => 157 if (expected is str) { 158 fmt::errorln(e)!; 159 fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\"", 160 expr, string, target, n, expected as str)!; 161 abort(); 162 }; 163 case let s: str => 164 defer free(s); 165 if (expected is void) { 166 fmt::errorln("Expected replace to fail, but it did not")!; 167 fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} return=\"{}\"", 168 expr, string, target, n, s)!; 169 abort(); 170 }; 171 if (expected as str != s) { 172 fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"", 173 expr, string, target, n, expected as str, s)!; 174 abort(); 175 }; 176 }; 177 }; 178 179 fn run_rawreplace_case( 180 expr: str, 181 string: str, 182 target: str, 183 n: size, 184 expected: str, 185 ) void = { 186 const re = match (compile(expr)) { 187 case let re: regex => yield re; 188 case let e: error => 189 fmt::errorln(e)!; 190 fmt::errorfln("Expected expression /{}/ to compile, but it errored", 191 expr)!; 192 abort(); 193 }; 194 defer finish(&re); 195 196 const s = rawreplacen(&re, string, target, n); 197 defer free(s); 198 if (expected != s) { 199 fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"", 200 expr, string, target, n, expected, s)!; 201 abort(); 202 }; 203 }; 204 205 @test fn find() void = { 206 const cases = [ 207 // literals 208 (`^$`, "", matchres::MATCH, 0, 0), 209 (``, "", matchres::MATCH, 0, -1), 210 (`abcd`, "abcd", matchres::MATCH, 0, -1), 211 (`abc`, "abcd", matchres::MATCH, 0, 3), 212 (`bcd`, "abcd", matchres::MATCH, 1, 4), 213 (`^abc$`, "abc", matchres::MATCH, 0, -1), 214 (`^abc$`, "axc", matchres::NOMATCH, 0, -1), 215 // . 216 (`^.$`, "x", matchres::MATCH, 0, 1), 217 (`^.$`, "y", matchres::MATCH, 0, 1), 218 (`^.$`, "", matchres::NOMATCH, 0, 1), 219 // + 220 (`^a+$`, "a", matchres::MATCH, 0, 1), 221 (`^a+$`, "aaa", matchres::MATCH, 0, 3), 222 (`^a+$`, "", matchres::NOMATCH, 0, 0), 223 (`^(abc)+$`, "abc", matchres::MATCH, 0, 3), 224 (`^(abc)+$`, "abcabc", matchres::MATCH, 0, 6), 225 (`^(abc)+$`, "", matchres::NOMATCH, 0, 0), 226 // * 227 (`^a*$`, "", matchres::MATCH, 0, 0), 228 (`^a*$`, "aaaa", matchres::MATCH, 0, 4), 229 (`^a*$`, "b", matchres::NOMATCH, 0, 0), 230 (`^(abc)*$`, "", matchres::MATCH, 0, 0), 231 (`^(abc)*$`, "abc", matchres::MATCH, 0, 3), 232 (`^(abc)*$`, "abcabc", matchres::MATCH, 0, 6), 233 (`^(abc)*$`, "bbb", matchres::NOMATCH, 0, 3), 234 // ? 235 (`^a?$`, "", matchres::MATCH, 0, 0), 236 (`^a?$`, "a", matchres::MATCH, 0, 1), 237 (`^a?$`, "b", matchres::NOMATCH, 0, 0), 238 (`^(abc)?$`, "", matchres::MATCH, 0, 0), 239 (`^(abc)?$`, "abc", matchres::MATCH, 0, 3), 240 (`^(abc)?$`, "bbb", matchres::NOMATCH, 0, 0), 241 // ^ and $ 242 (`^a*`, "aaaa", matchres::MATCH, 0, 4), 243 (`a*$`, "aaaa", matchres::MATCH, 0, 4), 244 (`^a*$`, "aaaa", matchres::MATCH, 0, 4), 245 (`a*`, "aaaa", matchres::MATCH, 0, 4), 246 (`b*`, "aaaabbbb", matchres::MATCH, 4, 8), 247 (`^b*`, "aaaabbbb", matchres::MATCH, 0, 0), 248 (`b*$`, "aaaabbbb", matchres::MATCH, 4, 8), 249 // (a|b) 250 (`^(cafe|b)x$`, "cafex", matchres::MATCH, 0, 5), 251 (`^(cafe|b)x$`, "bx", matchres::MATCH, 0, 2), 252 (`^(cafe|b)x$`, "XXXx", matchres::NOMATCH, 0, 0), 253 (`^(cafe|b)x$`, "bx", matchres::MATCH, 0, 2), 254 ( 255 `^(Privat|Jagd)(haftpflicht|schaden)versicherungs(police|betrag)$`, 256 "Jagdhaftpflichtversicherungsbetrag", 257 matchres::MATCH, 0, -1 258 ), 259 ( 260 `^(Privat|Jagd)(haftpflicht|schaden)versicherungs(police|betrag)$`, 261 "Jagdhaftpflichtversicherungsbetrug", 262 matchres::NOMATCH, 0, -1 263 ), 264 ( 265 `^(Privat|Jagd)(haftpflicht|schaden)versicherungs(police|betrag)$`, 266 "Jagdversicherungspolice", 267 matchres::NOMATCH, 0, -1 268 ), 269 (`)`, "", matchres::ERROR, 0, 0), 270 // [abc] 271 (`^test[abc]$`, "testa", matchres::MATCH, 0, -1), 272 (`^test[abc]$`, "testb", matchres::MATCH, 0, -1), 273 (`^test[abc]$`, "testc", matchres::MATCH, 0, -1), 274 (`^test[abc]$`, "testd", matchres::NOMATCH, 0, -1), 275 (`^test[abc]*$`, "test", matchres::MATCH, 0, -1), 276 (`^test[abc]*$`, "testa", matchres::MATCH, 0, -1), 277 (`^test[abc]*$`, "testaaa", matchres::MATCH, 0, -1), 278 (`^test[abc]*$`, "testabc", matchres::MATCH, 0, -1), 279 (`^test[abc]?$`, "test", matchres::MATCH, 0, -1), 280 (`^test[abc]?$`, "testa", matchres::MATCH, 0, -1), 281 (`^test[abc]+$`, "testa", matchres::MATCH, 0, -1), 282 (`^test[abc]+$`, "test", matchres::NOMATCH, 0, -1), 283 (`^test[]abc]$`, "test]", matchres::MATCH, 0, -1), 284 (`^test[[abc]$`, "test[", matchres::MATCH, 0, -1), 285 (`^test[^abc]$`, "testd", matchres::MATCH, 0, -1), 286 (`^test[^abc]$`, "test!", matchres::MATCH, 0, -1), 287 (`^test[^abc]$`, "testa", matchres::NOMATCH, 0, -1), 288 (`^test[^abc]$`, "testb", matchres::NOMATCH, 0, -1), 289 (`^test[^abc]$`, "testc", matchres::NOMATCH, 0, -1), 290 (`^test[^]abc]$`, "test]", matchres::NOMATCH, 0, -1), 291 (`^test[^abc[]$`, "test[", matchres::NOMATCH, 0, -1), 292 (`^test[^abc]*$`, "testd", matchres::MATCH, 0, -1), 293 (`^test[^abc]*$`, "testqqqqq", matchres::MATCH, 0, -1), 294 (`^test[^abc]*$`, "test", matchres::MATCH, 0, -1), 295 (`^test[^abc]*$`, "testc", matchres::NOMATCH, 0, -1), 296 (`^test[^abc]?$`, "test", matchres::MATCH, 0, -1), 297 (`^test[^abc]?$`, "testd", matchres::MATCH, 0, -1), 298 (`^test[^abc]?$`, "testc", matchres::NOMATCH, 0, -1), 299 (`^test[^abc]+$`, "testd", matchres::MATCH, 0, -1), 300 (`^test[^abc]+$`, "testddd", matchres::MATCH, 0, -1), 301 (`^test[^abc]+$`, "testc", matchres::NOMATCH, 0, -1), 302 (`^test[^abc]+$`, "testcccc", matchres::NOMATCH, 0, -1), 303 (`^test[a-c]$`, "testa", matchres::MATCH, 0, -1), 304 (`^test[a-c]$`, "testb", matchres::MATCH, 0, -1), 305 (`^test[a-c]$`, "testc", matchres::MATCH, 0, -1), 306 (`^test[a-c]$`, "testd", matchres::NOMATCH, 0, -1), 307 (`^test[a-c]$`, "test!", matchres::NOMATCH, 0, -1), 308 (`^test[a-c]$`, "test-", matchres::NOMATCH, 0, -1), 309 (`^test[-a-c]$`, "test-", matchres::MATCH, 0, -1), 310 (`^test[a-c-]$`, "test-", matchres::MATCH, 0, -1), 311 (`^test[a-c]*$`, "test", matchres::MATCH, 0, -1), 312 (`^test[a-c]*$`, "testa", matchres::MATCH, 0, -1), 313 (`^test[a-c]*$`, "testabb", matchres::MATCH, 0, -1), 314 (`^test[a-c]*$`, "testddd", matchres::NOMATCH, 0, -1), 315 (`^test[a-c]?$`, "test", matchres::MATCH, 0, -1), 316 (`^test[a-c]?$`, "testb", matchres::MATCH, 0, -1), 317 (`^test[a-c]?$`, "testd", matchres::NOMATCH, 0, -1), 318 (`^test[a-c]+$`, "test", matchres::NOMATCH, 0, -1), 319 (`^test[a-c]+$`, "testbcbc", matchres::MATCH, 0, -1), 320 (`^test[a-c]+$`, "testd", matchres::NOMATCH, 0, -1), 321 (`^test[^a-c]$`, "testa", matchres::NOMATCH, 0, -1), 322 (`^test[^a-c]$`, "testb", matchres::NOMATCH, 0, -1), 323 (`^test[^a-c]$`, "testc", matchres::NOMATCH, 0, -1), 324 (`^test[^a-c]$`, "testd", matchres::MATCH, 0, -1), 325 (`^test[^a-c]$`, "test!", matchres::MATCH, 0, -1), 326 (`^test[^a-c]$`, "test-", matchres::MATCH, 0, -1), 327 (`^test[^-a-c]$`, "test-", matchres::NOMATCH, 0, -1), 328 (`^test[^a-c-]$`, "test-", matchres::NOMATCH, 0, -1), 329 (`^test[^a-c-]*$`, "test", matchres::MATCH, 0, -1), 330 (`^test[^a-c-]*$`, "test--", matchres::NOMATCH, 0, -1), 331 (`^test[^a-c-]*$`, "testq", matchres::MATCH, 0, -1), 332 (`^test[^a-c-]?$`, "test", matchres::MATCH, 0, -1), 333 (`^test[^a-c-]?$`, "testq", matchres::MATCH, 0, -1), 334 (`^test[^a-c-]?$`, "test-", matchres::NOMATCH, 0, -1), 335 (`^test[^a-c-]+$`, "test", matchres::NOMATCH, 0, -1), 336 (`^test[^a-c-]+$`, "testb", matchres::NOMATCH, 0, -1), 337 (`^test[^a-c-]+$`, "testddd", matchres::MATCH, 0, -1), 338 (`([a-z][a-z0-9]*,)+`, "a5,b7,c9,", matchres::MATCH, 0, -1), 339 // [:alpha:] etc. 340 (`^test[[:alnum:]]+$`, "testaA1", matchres::MATCH, 0, -1), 341 (`^test[[:alnum:]]+$`, "testa_1", matchres::NOMATCH, 0, -1), 342 (`^test[[:alpha:]]+$`, "testa", matchres::MATCH, 0, -1), 343 (`^test[[:alpha:]]+$`, "testa1", matchres::NOMATCH, 0, -1), 344 (`^test[[:blank:]]+$`, "testa", matchres::NOMATCH, 0, -1), 345 (`^test[[:blank:]]+$`, "test ", matchres::MATCH, 0, -1), 346 (`^test[^[:blank:]]+$`, "testx", matchres::MATCH, 0, -1), 347 (`^test[[:blank:]]+$`, "test ", matchres::MATCH, 0, -1), 348 (`^test[^[:cntrl:]]+$`, "testa", matchres::MATCH, 0, -1), 349 (`^test[[:digit:]]$`, "test1", matchres::MATCH, 0, -1), 350 (`^test[[:digit:]]$`, "testa", matchres::NOMATCH, 0, -1), 351 (`^test[[:graph:]]+$`, "test\t", matchres::NOMATCH, 0, -1), 352 (`^test[[:lower:]]+$`, "testa", matchres::MATCH, 0, -1), 353 (`^test[[:lower:]]+$`, "testA", matchres::NOMATCH, 0, -1), 354 (`^test[[:print:]]+$`, "test\t", matchres::NOMATCH, 0, -1), 355 (`^test[[:punct:]]+$`, "testA", matchres::NOMATCH, 0, -1), 356 (`^test[[:punct:]]+$`, "test!", matchres::MATCH, 0, -1), 357 (`^test[[:space:]]+$`, "test ", matchres::MATCH, 0, -1), 358 (`^test[[:upper:]]+$`, "testa", matchres::NOMATCH, 0, -1), 359 (`^test[[:upper:]]+$`, "testA", matchres::MATCH, 0, -1), 360 (`^test[[:xdigit:]]+$`, "testCAFE", matchres::MATCH, 0, -1), 361 // range expressions 362 (`[a-z]+`, "onlylatinletters", matchres::MATCH, 0, -1), 363 (`[x-z]+`, "xyz", matchres::MATCH, 0, -1), 364 (`[x-z]+`, "wxyz", matchres::MATCH, 1, 4), 365 (`[a-e]+`, "-abcdefg", matchres::MATCH, 1, 6), 366 (`[a-z]`, "-1234567890@#$%^&*(!)-+=", matchres::NOMATCH, 0, -1), 367 (`[0-9]+`, "9246", matchres::MATCH, 0, -1), 368 // # Cyrillic 369 (`[а-я]+`, "кирилица", matchres::MATCH, 0, -1), 370 (`[а-д]`, "е", matchres::NOMATCH, 0, -1), 371 (`[я-ф]`, "-", matchres::ERROR, 0, -1), 372 (`[А-Я]+`, "АБВГд", matchres::MATCH, 0, 4), 373 // because Macedonian uses cyrrilics, the broad range does 374 // not include special symbols 375 (`[а-ш]+`, "ѓљњќ", matchres::NOMATCH, 0, -1), 376 // # Polish Alphabet 377 (`[a-ż]+`, "polskialfabet", matchres::MATCH, 0, -1), 378 (`[a-ż]+`, "źśółęćą", matchres::MATCH, 0, -1), 379 // because Polish alphabet uses Latin with special characters, 380 // other characters can be accepted 381 (`[a-ż]+`, "englishspeak", matchres::MATCH, 0, -1), 382 (`[a-ż]+`, "{|}~", matchres::MATCH, 0, -1), 383 // # Thai Alphabet 384 (`[ก-ฮ]+`, "ศอผจข", matchres::MATCH, 0, -1), 385 // [:alpha:] etc. plus extra characters 386 (`^test[[:digit:]][[:alpha:]]$`, "test1a", matchres::MATCH, 0, -1), 387 (`^test[[:digit:]][[:alpha:]]$`, "testa1", matchres::NOMATCH, 0, -1), 388 (`^test[[:alnum:]!]+$`, "testa!1", matchres::MATCH, 0, -1), 389 (`^test[@[:alnum:]!]+$`, "testa!@1", matchres::MATCH, 0, -1), 390 // Escaped characters such as \+ 391 (`^a\+b$`, "a+b", matchres::MATCH, 0, -1), 392 (`^a\?b$`, "a?b", matchres::MATCH, 0, -1), 393 (`^a\*b$`, "a*b", matchres::MATCH, 0, -1), 394 (`^a\^b$`, "a^b", matchres::MATCH, 0, -1), 395 (`^a\$b$`, "a$b", matchres::MATCH, 0, -1), 396 (`^a\[b$`, "a[b", matchres::MATCH, 0, -1), 397 (`^a\]b$`, "a]b", matchres::MATCH, 0, -1), 398 (`^a\(b$`, "a(b", matchres::MATCH, 0, -1), 399 (`^a\)b$`, "a)b", matchres::MATCH, 0, -1), 400 (`^a\|b$`, "a|b", matchres::MATCH, 0, -1), 401 (`^a\.b$`, "a.b", matchres::MATCH, 0, -1), 402 (`^a\\b$`, "a\\b", matchres::MATCH, 0, -1), 403 (`^x(abc)\{,2\}$`, "xabc{,2}", matchres::MATCH, 0, -1), 404 (`^x(abc)\{,2\}$`, "xabcabc{,2}", matchres::NOMATCH, 0, -1), 405 (`^[\\]+$`, "\\", matchres::MATCH, 0, -1), 406 (`^[\]]+$`, "]", matchres::MATCH, 0, -1), 407 (`^[A-Za-z\[\]]+$`, "foo[bar]baz", matchres::MATCH, 0, -1), 408 // {m,n} 409 (`^x(abc){2}$`, "xabcabc", matchres::MATCH, 0, -1), 410 (`^x(abc){3}$`, "xabcabc", matchres::NOMATCH, 0, -1), 411 (`^x(abc){1,2}$`, "xabc", matchres::MATCH, 0, -1), 412 (`^x(abc){1,2}$`, "xabcabc", matchres::MATCH, 0, -1), 413 (`^x(abc){1,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1), 414 (`^x(abc){,2}$`, "xabc", matchres::MATCH, 0, -1), 415 (`^x(abc){,2}$`, "xabcabc", matchres::MATCH, 0, -1), 416 (`^x(abc){,2}`, "xabcabcabc", matchres::MATCH, 0, 7), 417 (`^x(abc){,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1), 418 (`^x(abc){1,}$`, "xabc", matchres::MATCH, 0, -1), 419 (`^x(abc){1,}$`, "xabcabc", matchres::MATCH, 0, -1), 420 (`^x(abc){3,}$`, "xabcabc", matchres::NOMATCH, 0, -1), 421 (`^x(abc){3,}$`, "xabcabcabc", matchres::MATCH, 0, -1), 422 (`^x(abc){2,2}$`, "xabcabc", matchres::MATCH, 0, -1), 423 (`^x(abc){2,2}$`, "xabc", matchres::NOMATCH, 0, -1), 424 (`^x(abc){2,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1), 425 (`^x(abc){-1,2}$`, "xabcabcabc", matchres::ERROR, 0, -1), 426 (`^x(abc){x,2}$`, "xabcabcabc", matchres::ERROR, 0, -1), 427 (`^x(abc){0,-2}$`, "xabcabcabc", matchres::ERROR, 0, -1), 428 // various 429 ( 430 `^.(1024)?(face)*(1024)*ca*(f+e?cafe)(babe)+$`, 431 "X1024facefacecaaaaafffcafebabebabe", 432 matchres::MATCH, 0, -1, 433 ), 434 ( 435 `.(1024)?(face)*(1024)*ca*(f+e?cafe)(babe)+`, 436 "X1024facefacecaaaaafffcafebabebabe", 437 matchres::MATCH, 0, -1, 438 ), 439 ( 440 `^.(1024)?(face)*(1024)*ca*(f+e?cafe)(babe)+$`, 441 "1024facefacecaaaaafffcafebabebabe", 442 matchres::NOMATCH, 0, 0, 443 ), 444 ( 445 `.(1024)?(face)*(1024)*ca*(f+e?cafe)(babe)+`, 446 "1024facefacecaaaaafffcafebabebabe", 447 matchres::MATCH, 3, -1, 448 ), 449 ( 450 `^([a-zA-Z]{1,2}[[:digit:]]{1,2})[[:space:]]*([[:digit:]][a-zA-Z]{2})$`, 451 "M15 4QN", 452 matchres::MATCH, 0, -1 453 ), 454 (`^[^-a]`, "-bcd", matchres::NOMATCH, 0, 0), 455 (`^[-a]`, "-bcd", matchres::MATCH, 0, 1), 456 (`[^ac-]`, "bde", matchres::MATCH, 0, 1), 457 (`[-ac]`, "foo-de", matchres::MATCH, 3, 4), 458 (`[-ac]`, "def", matchres::NOMATCH, 0, 0), 459 (`foo[-ac]bar`, "foo-bar", matchres::MATCH, 0, 7), 460 (`[ac-]$`, "bde-", matchres::MATCH, 3, 4), 461 (`^[A-Za-z_-]+$`, "foo", matchres::MATCH, 0, 3), 462 // tests from perl 463 (`abc`, "abc", matchres::MATCH, 0, -1), 464 (`abc`, "xbc", matchres::NOMATCH, 0, 0), 465 (`abc`, "axc", matchres::NOMATCH, 0, 0), 466 (`abc`, "abx", matchres::NOMATCH, 0, 0), 467 (`abc`, "xabcy", matchres::MATCH, 1, 4), 468 (`abc`, "ababc", matchres::MATCH, 2, -1), 469 (`ab*c`, "abc", matchres::MATCH, 0, -1), 470 (`ab*bc`, "abc", matchres::MATCH, 0, -1), 471 (`ab*bc`, "abbc", matchres::MATCH, 0, -1), 472 (`ab*bc`, "abbbbc", matchres::MATCH, 0, -1), 473 (`ab{0,}bc`, "abbbbc", matchres::MATCH, 0, -1), 474 (`ab+bc`, "abbc", matchres::MATCH, 0, -1), 475 (`ab+bc`, "abc", matchres::NOMATCH, 0, 0), 476 (`ab+bc`, "abq", matchres::NOMATCH, 0, 0), 477 (`ab{1,}bc`, "abq", matchres::NOMATCH, 0, 0), 478 (`ab+bc`, "abbbbc", matchres::MATCH, 0, -1), 479 (`ab{1,}bc`, "abbbbc", matchres::MATCH, 0, -1), 480 (`ab{1,3}bc`, "abbbbc", matchres::MATCH, 0, -1), 481 (`ab{3,4}bc`, "abbbbc", matchres::MATCH, 0, -1), 482 (`ab{4,5}bc`, "abbbbc", matchres::NOMATCH, 0, 0), 483 (`ab?bc`, "abbc", matchres::MATCH, 0, -1), 484 (`ab?bc`, "abc", matchres::MATCH, 0, -1), 485 (`ab{0,1}bc`, "abc", matchres::MATCH, 0, -1), 486 (`ab?bc`, "abbbbc", matchres::NOMATCH, 0, 0), 487 (`ab?c`, "abc", matchres::MATCH, 0, -1), 488 (`ab{0,1}c`, "abc", matchres::MATCH, 0, -1), 489 (`^abc$`, "abc", matchres::MATCH, 0, -1), 490 (`^abc$`, "abcc", matchres::NOMATCH, 0, 0), 491 (`^abc`, "abcc", matchres::MATCH, 0, 3), 492 (`^abc$`, "aabc", matchres::NOMATCH, 0, 0), 493 (`abc$`, "aabc", matchres::MATCH, 1, -1), 494 (`^`, "abc", matchres::MATCH, 0, 0), 495 (`$`, "abc", matchres::MATCH, 3, 3), 496 (`a.c`, "abc", matchres::MATCH, 0, -1), 497 (`a.c`, "axc", matchres::MATCH, 0, -1), 498 (`a.*c`, "axyzc", matchres::MATCH, 0, -1), 499 (`a.*c`, "axyzd", matchres::NOMATCH, 0, 0), 500 (`a[bc]d`, "abc", matchres::NOMATCH, 0, 0), 501 (`a[bc]d`, "abd", matchres::MATCH, 0, -1), 502 (`a[b-d]e`, "abd", matchres::NOMATCH, 0, 0), 503 (`a[b-d]e`, "ace", matchres::MATCH, 0, -1), 504 (`a[b-d]`, "aac", matchres::MATCH, 1, -1), 505 (`a[-b]`, "a-", matchres::MATCH, 0, -1), 506 (`a[b-]`, "a-", matchres::MATCH, 0, -1), 507 (`a[b-a]`, "-", matchres::ERROR, 0, 0), 508 (`a[]b`, "-", matchres::ERROR, 0, 0), 509 (`a[`, "-", matchres::ERROR, 0, 0), 510 (`a]`, "a]", matchres::MATCH, 0, -1), 511 (`a[]]b`, "a]b", matchres::MATCH, 0, -1), 512 (`a[^bc]d`, "aed", matchres::MATCH, 0, -1), 513 (`a[^bc]d`, "abd", matchres::NOMATCH, 0, 0), 514 (`a[^-b]c`, "adc", matchres::MATCH, 0, -1), 515 (`a[^-b]c`, "a-c", matchres::NOMATCH, 0, 0), 516 (`a[^]b]c`, "a]c", matchres::NOMATCH, 0, 0), 517 (`a[^]b]c`, "adc", matchres::MATCH, 0, -1), 518 (`()ef`, "def", matchres::MATCH, 1, -1), 519 (`*a`, "-", matchres::ERROR, 0, 0), 520 (`(*)b`, "-", matchres::ERROR, 0, 0), 521 (`$b`, "b", matchres::ERROR, 0, 0), 522 (`a\`, "-", matchres::ERROR, 0, 0), 523 (`a\(b`, "a(b", matchres::MATCH, 0, -1), 524 (`a\(*b`, "ab", matchres::MATCH, 0, -1), 525 (`a\(*b`, "a((b", matchres::MATCH, 0, -1), 526 (`a\\b`, `a\b`, matchres::MATCH, 0, -1), 527 (`abc)`, "-", matchres::ERROR, 0, 0), 528 (`(abc`, "-", matchres::ERROR, 0, 0), 529 (`(a)b(c)`, "abc", matchres::MATCH, 0, -1), 530 (`a+b+c`, "aabbabc", matchres::MATCH, 4, -1), 531 (`a{1,}b{1,}c`, "aabbabc", matchres::MATCH, 4, -1), 532 (`a**`, "-", matchres::ERROR, 0, 0), 533 (`)(`, "-", matchres::ERROR, 0, 0), 534 (`[^ab]*`, "cde", matchres::MATCH, 0, -1), 535 (`abc`, "", matchres::NOMATCH, 0, 0), 536 (`a*`, "", matchres::MATCH, 0, -1), 537 (`([abc])*d`, "abbbcd", matchres::MATCH, 0, -1), 538 (`([abc])*bcd`, "abcd", matchres::MATCH, 0, -1), 539 (`abcd*efg`, "abcdefg", matchres::MATCH, 0, -1), 540 (`ab*`, "xabyabbbz", matchres::MATCH, 1, 3), 541 (`ab*`, "xayabbbz", matchres::MATCH, 1, 2), 542 (`(ab|cd)e`, "abcde", matchres::MATCH, 2, -1), 543 (`[abhgefdc]ij`, "hij", matchres::MATCH, 0, -1), 544 (`^(ab|cd)e`, "abcde", matchres::NOMATCH, 0, 0), 545 (`(abc|)ef`, "abcdef", matchres::MATCH, 4, -1), 546 (`(a|b)c*d`, "abcd", matchres::MATCH, 1, -1), 547 (`(ab|ab*)bc`, "abc", matchres::MATCH, 0, -1), 548 (`a([bc]*)c*`, "abc", matchres::MATCH, 0, -1), 549 (`a([bc]*)(c*d)`, "abcd", matchres::MATCH, 0, -1), 550 (`a([bc]+)(c*d)`, "abcd", matchres::MATCH, 0, -1), 551 (`a([bc]*)(c+d)`, "abcd", matchres::MATCH, 0, -1), 552 (`a[bcd]*dcdcde`, "adcdcde", matchres::MATCH, 0, -1), 553 (`a[bcd]+dcdcde`, "adcdcde", matchres::NOMATCH, 0, 0), 554 (`(ab|a)b*c`, "abc", matchres::MATCH, 0, -1), 555 (`[a-zA-Z_][a-zA-Z0-9_]*`, "alpha", matchres::MATCH, 0, -1), 556 (`^a(bc+|b[eh])g|.h$`, "abh", matchres::MATCH, 0, -1), 557 (`multiple words of text`, "uh-uh", matchres::NOMATCH, 0, 0), 558 (`multiple words`, "multiple words, yeah", matchres::MATCH, 0, 14), 559 (`(.*)c(.*)`, "abcde", matchres::MATCH, 0, -1), 560 (`\((.*), (.*)\)`, "(a, b)", matchres::MATCH, 0, -1), 561 (`[k]`, "ab", matchres::NOMATCH, 0, 0), 562 (`a[-]?c`, "ac", matchres::MATCH, 0, -1), 563 (`.*d`, "abc\nabd", matchres::MATCH, 0, -1), 564 (`(`, "", matchres::ERROR, 0, 0), 565 (`(x?)?`, "x", matchres::MATCH, 0, -1), 566 (`^*`, "", matchres::ERROR, 0, 0), 567 // Submatch handling 568 (`(a|ab)(c|bcd)(d*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 569 (`(a|ab)(bcd|c)(d*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 570 (`(ab|a)(c|bcd)(d*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 571 (`(ab|a)(bcd|c)(d*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 572 (`(a*)(b|abc)(c*)`, "abc", matchres::MATCH, 0, -1), // POSIX: (0,3)(0,1)(1,2)(2,3) 573 (`(a*)(abc|b)(c*)`, "abc", matchres::MATCH, 0, -1), // POSIX: (0,3)(0,1)(1,2)(2,3) 574 (`(a*)(b|abc)(c*)`, "abc", matchres::MATCH, 0, -1), // POSIX: (0,3)(0,1)(1,2)(2,3) 575 (`(a*)(abc|b)(c*)`, "abc", matchres::MATCH, 0, -1), // POSIX: (0,3)(0,1)(1,2)(2,3) 576 (`(a|ab)(c|bcd)(d|.*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 577 (`(a|ab)(bcd|c)(d|.*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 578 (`(ab|a)(c|bcd)(d|.*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 579 (`(ab|a)(bcd|c)(d|.*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 580 // whole-expression alternation 581 (`ab|cd`, "cd", matchres::MATCH, 0, 2), 582 (`ab|cd`, "abc", matchres::MATCH, 0, 2), 583 (`ab|cd`, "abcd", matchres::MATCH, 0, 2), 584 // multiple alternation 585 (`a|b|c|d|e`, "e", matchres::MATCH, 0, -1), 586 (`(a|b|c|d|e)f`, "ef", matchres::MATCH, 0, -1), 587 // TODO: nested capture groups 588 (`((a))`, "abc", matchres::ERROR, 0, -1), 589 // (`((a))`, "abc", matchres::MATCH, 0, -1), 590 // (`((a)(b)c)(d)`, "abcd", matchres::MATCH, 0, -1), 591 // (`(bc+d$|ef*g.|h?i(j|k))`, "effgz", matchres::MATCH, 0, -1), 592 // (`(bc+d$|ef*g.|h?i(j|k))`, "ij", matchres::MATCH, 0, -1), 593 // (`(bc+d$|ef*g.|h?i(j|k))`, "effg", matchres::NOMATCH, 0, 0), 594 // (`(bc+d$|ef*g.|h?i(j|k))`, "bcdd", matchres::NOMATCH, 0, 0), 595 // (`(bc+d$|ef*g.|h?i(j|k))`, "reffgz", matchres::MATCH, 0, -1), 596 // (`((((((((((a))))))))))`, "a", matchres::MATCH, 0, -1), 597 // (`(((((((((a)))))))))`, "a", matchres::MATCH, 0, -1), 598 // (`(([a-z]+):)?([a-z]+)$`, "smil", matchres::MATCH, 0, -1), 599 // (`^((a)c)?(ab)$`, "ab", matchres::MATCH, 0, -1), 600 // TODO: multiple simultaneous capture groups 601 // (`(a+|b)*`, "ab", matchres::MATCH, 0, -1), 602 // (`(a+|b){0,}`, "ab", matchres::MATCH, 0, -1), 603 // (`(a+|b)+`, "ab", matchres::MATCH, 0, -1), 604 // (`(a+|b){1,}`, "ab", matchres::MATCH, 0, -1), 605 // (`(a+|b)?`, "ab", matchres::MATCH, 0, -1), 606 // (`(a+|b){0,1}`, "ab", matchres::MATCH, 0, -1), 607 // NOTE: character sequences not currently supported 608 // (`\0`, "\0", matchres::MATCH, 0, -1), 609 // (`[\0a]`, "\0", matchres::MATCH, 0, -1), 610 // (`[a\0]`, "\0", matchres::MATCH, 0, -1), 611 // (`[^a\0]`, "\0", matchres::NOMATCH, 0, 0), 612 // NOTE: octal sequences not currently supported 613 // (`[\1]`, "\1", matchres::MATCH, 0, -1), 614 // (`\09`, "\0(separate-me)9", matchres::MATCH, 0, -1), 615 // (`\141`, "a", matchres::MATCH, 0, -1), 616 // (`[\41]`, "!", matchres::MATCH, 0, -1), 617 // NOTE: hex sequences not currently supported 618 // (`\xff`, "\377", matchres::MATCH, 0, -1), 619 // NOTE: non-greedy matching not currently supported 620 // (`a.+?c`, "abcabc", matchres::MATCH, 0, -1), 621 // (`.*?\S *:`, "xx:", matchres::MATCH, 0, -1), 622 // (`a[ ]*?\ (\d+).*`, "a 10", matchres::MATCH, 0, -1), 623 // (`a[ ]*?\ (\d+).*`, "a 10", matchres::MATCH, 0, -1), 624 // (`"(\\"|[^"])*?"`, `"\""`, matchres::MATCH, 0, -1), 625 // (`^.*?$`, "one\ntwo\nthree\n", matchres::NOMATCH, 0, 0), 626 // (`a[^>]*?b`, "a>b", matchres::NOMATCH, 0, 0), 627 // (`^a*?$`, "foo", matchres::NOMATCH, 0, 0), 628 // (`^([ab]*?)(?=(b)?)c`, "abc", matchres::MATCH, 0, -1), 629 // (`^([ab]*?)(?!(b))c`, "abc", matchres::MATCH, 0, -1), 630 // (`^([ab]*?)(?<!(a))c`, "abc", matchres::MATCH, 0, -1), 631 ]; 632 633 for (let (expr, string, should_match, start, end) .. cases) { 634 if (end == -1) { 635 // workaround to get the length in codepoints 636 let runes = strings::torunes(string); 637 defer free(runes); 638 end = len(runes): int; 639 }; 640 run_find_case(expr, string, should_match, start, end); 641 }; 642 643 const submatch_cases = [ 644 // literals 645 (`aaa ([^ ]*) (...)`, "aaa bbb ccc", matchres::MATCH, 646 ["aaa bbb ccc", "bbb", "ccc"]: []str), 647 ]; 648 649 for (let (expr, string, should_match, targets) .. submatch_cases) { 650 run_submatch_case(expr, string, should_match, targets); 651 }; 652 }; 653 654 @test fn findall() void = { 655 const cases = [ 656 (`ab.`, "hello abc and abあ test abq thanks", matchres::MATCH, 657 ["abc", "abあ", "abq"]: []str), 658 (`a`, "aa", matchres::MATCH, 659 ["a", "a"]: []str), 660 (`fo{2,}`, "fo foo fooofoof oofoo", matchres::MATCH, 661 ["foo", "fooo", "foo", "foo"]: []str), 662 (``, "abc", matchres::MATCH, 663 ["", "", "", ""]: []str), 664 (`a*`, "aaa", matchres::MATCH, 665 ["aaa", ""]: []str), 666 ]; 667 668 for (let (expr, string, should_match, targets) .. cases) { 669 run_findall_case(expr, string, should_match, targets); 670 }; 671 }; 672 673 @test fn replace() void = { 674 const cases: [_](str, str, str, size, (str | void)) = [ 675 (`ab.`, "hello abc and abあ test abq thanks", `xyz`, 676 types::SIZE_MAX, "hello xyz and xyz test xyz thanks"), 677 (`([Hh])ello`, "Hello world and hello Hare.", `\1owdy`, 678 types::SIZE_MAX, "Howdy world and howdy Hare."), 679 (`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`, 680 types::SIZE_MAX, "fo foobar fooobarfoobarf oofoobar"), 681 (`(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)`, "12345678910", `\10`, 682 types::SIZE_MAX, "10"), 683 (`...?`, "abcdefgh", `\7\0\8`, 684 types::SIZE_MAX, "abcdefgh"), 685 (`...?`, "abcdefgh", `\7\0\`, types::SIZE_MAX, void), 686 (`ab.`, "hello abc and abあ test abq thanks", `xyz`, 687 2, "hello xyz and xyz test abq thanks"), 688 (`.`, "blablabla", `x`, 0, "blablabla"), 689 (`([[:digit:]])([[:digit:]])`, "1234", `\2`, 1, "234"), 690 ]; 691 692 for (let (expr, string, target, n, expected) .. cases) { 693 run_replace_case(expr, string, target, n, expected); 694 }; 695 }; 696 697 @test fn rawreplace() void = { 698 const cases = [ 699 (`ab.`, "hello abc and abあ test abq thanks", "xyz", 700 types::SIZE_MAX, "hello xyz and xyz test xyz thanks"), 701 (`([Hh])ello`, "Hello world and hello Hare.", `\howdy\`, 702 types::SIZE_MAX, `\howdy\ world and \howdy\ Hare.`), 703 (`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`, 704 types::SIZE_MAX, `fo \0bar \0bar\0barf oo\0bar`), 705 (`\\\\`, `\\\\\\\\`, `\00\1`, 706 types::SIZE_MAX, `\00\1\00\1\00\1\00\1`), 707 (`ab.`, "hello abc and abあ test abq thanks", `xyz`, 708 2, "hello xyz and xyz test abq thanks"), 709 (`.`, "blablabla", `x`, 0, "blablabla"), 710 ]; 711 712 for (let (expr, string, target, n, expected) .. cases) { 713 run_rawreplace_case(expr, string, target, n, expected); 714 }; 715 };