+test.ha (29072B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use fmt; 5 use strings; 6 use types; 7 8 type matchres = enum { MATCH, NOMATCH, ERROR }; 9 10 fn run_find_case( 11 expr: str, 12 string: str, 13 expected: matchres, 14 start: int, 15 end: int 16 ) void = { 17 const re = match (compile(expr)) { 18 case let re: regex => yield re; 19 case let e: error => 20 if (expected == matchres::MATCH) { 21 fmt::errorln(e)!; 22 fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it errored", 23 expr, string)!; 24 abort(); 25 }; 26 if (expected == matchres::NOMATCH) { 27 fmt::errorln(e)!; 28 fmt::errorfln("Expected expression /{}/ to not match string \"{}\", but it errored", 29 expr, string)!; 30 abort(); 31 }; 32 return; 33 }; 34 defer finish(&re); 35 36 if (expected == matchres::ERROR) { 37 fmt::errorfln("Expected expression /{}/ to have error caught during compilation, but it did not", 38 expr)!; 39 abort(); 40 }; 41 42 const result = find(&re, string); 43 defer result_free(result); 44 if (len(result) == 0) { 45 if (expected == matchres::MATCH) { 46 fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it did not", 47 expr, string)!; 48 abort(); 49 }; 50 return; 51 } else if (expected == matchres::NOMATCH) { 52 fmt::errorfln("Expected expression /{}/ to not match string \"{}\", but it did", 53 expr, string)!; 54 abort(); 55 }; 56 57 if (start: size != result[0].start) { 58 fmt::errorfln("Expected start of main capture to be {} but it was {}", 59 start, result[0].start)!; 60 abort(); 61 }; 62 if (end: size != result[0].end) { 63 fmt::errorfln("Expected end of main capture to be {} but it was {}", 64 end, result[0].end)!; 65 abort(); 66 }; 67 }; 68 69 fn run_submatch_case( 70 expr: str, 71 string: str, 72 expected: matchres, 73 targets: []str 74 ) void = { 75 const re = compile(expr)!; 76 defer finish(&re); 77 78 const result = find(&re, string); 79 defer result_free(result); 80 assert(len(result) == len(targets), "Invalid number of captures"); 81 for (let i = 0z; i < len(targets); i += 1) { 82 assert(targets[i] == result[i].content, "Invalid capture"); 83 }; 84 }; 85 86 fn run_findall_case( 87 expr: str, 88 string: str, 89 expected: matchres, 90 targets: []str 91 ) void = { 92 const re = match (compile(expr)) { 93 case let re: regex => yield re; 94 case let e: error => 95 if (expected != matchres::ERROR) { 96 fmt::errorln(e)!; 97 fmt::errorfln("Expected expression /{}/ to compile, but it errored", 98 expr)!; 99 abort(); 100 }; 101 return; 102 }; 103 defer finish(&re); 104 105 if (expected == matchres::ERROR) { 106 fmt::errorfln("Expected expression /{}/ to have error caught during compilation, but it did not", 107 expr)!; 108 abort(); 109 }; 110 111 const results = findall(&re, string); 112 if (len(results) == 0 && expected == matchres::MATCH) { 113 fmt::errorfln("Expected expression /{}/ to match string \"{}\", but it did not", 114 expr, string)!; 115 abort(); 116 }; 117 defer result_freeall(results); 118 119 if (expected == matchres::NOMATCH) { 120 fmt::errorfln("Expected expression /{}/ to not match string \"{}\", but it did", 121 expr, string)!; 122 abort(); 123 }; 124 if (len(targets) != len(results)) { 125 fmt::errorfln("Expected expression /{}/ to find {} results but found {}", 126 expr, len(targets), len(results))!; 127 abort(); 128 }; 129 for (let i = 0z; i < len(results); i += 1) { 130 if (results[i][0].content != targets[i]) { 131 fmt::errorfln("Expected submatch of expression /{}/ to be {} but it was {}", 132 expr, targets[i], results[i][0].content)!; 133 abort(); 134 }; 135 }; 136 }; 137 138 fn run_replace_case( 139 expr: str, 140 string: str, 141 target: str, 142 n: size, 143 expected: (str | void), 144 ) void = { 145 const re = match (compile(expr)) { 146 case let re: regex => yield re; 147 case let e: error => 148 fmt::errorln(e)!; 149 fmt::errorfln("Expected expression /{}/ to compile, but it errored", 150 expr)!; 151 abort(); 152 }; 153 defer finish(&re); 154 155 match (replacen(&re, string, target, n)) { 156 case let e: error => 157 if (expected is str) { 158 fmt::errorln(e)!; 159 fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\"", 160 expr, string, target, n, expected as str)!; 161 abort(); 162 }; 163 case let s: str => 164 defer free(s); 165 if (expected is void) { 166 fmt::errorln("Expected replace to fail, but it did not")!; 167 fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} return=\"{}\"", 168 expr, string, target, n, s)!; 169 abort(); 170 }; 171 if (expected as str != s) { 172 fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"", 173 expr, string, target, n, expected as str, s)!; 174 abort(); 175 }; 176 }; 177 }; 178 179 fn run_rawreplace_case( 180 expr: str, 181 string: str, 182 target: str, 183 n: size, 184 expected: str, 185 ) void = { 186 const re = match (compile(expr)) { 187 case let re: regex => yield re; 188 case let e: error => 189 fmt::errorln(e)!; 190 fmt::errorfln("Expected expression /{}/ to compile, but it errored", 191 expr)!; 192 abort(); 193 }; 194 defer finish(&re); 195 196 const s = rawreplacen(&re, string, target, n); 197 defer free(s); 198 if (expected != s) { 199 fmt::errorfln("expr=/{}/ string=\"{}\" target=\"{}\" n={} expected=\"{}\" return=\"{}\"", 200 expr, string, target, n, expected, s)!; 201 abort(); 202 }; 203 }; 204 205 @test fn find() void = { 206 const cases = [ 207 // Literals 208 (`^$`, "", matchres::MATCH, 0, 0), 209 (``, "", matchres::MATCH, 0, -1), 210 (`abcd`, "abcd", matchres::MATCH, 0, -1), 211 (`abc`, "abcd", matchres::MATCH, 0, 3), 212 (`bcd`, "abcd", matchres::MATCH, 1, 4), 213 (`^abc$`, "abc", matchres::MATCH, 0, -1), 214 (`^abc$`, "axc", matchres::NOMATCH, 0, -1), 215 // . 216 (`^.$`, "x", matchres::MATCH, 0, 1), 217 (`^.$`, "y", matchres::MATCH, 0, 1), 218 (`^.$`, "", matchres::NOMATCH, 0, 1), 219 // + 220 (`^a+$`, "a", matchres::MATCH, 0, 1), 221 (`^a+$`, "aaa", matchres::MATCH, 0, 3), 222 (`^a+$`, "", matchres::NOMATCH, 0, 0), 223 (`^(abc)+$`, "abc", matchres::MATCH, 0, 3), 224 (`^(abc)+$`, "abcabc", matchres::MATCH, 0, 6), 225 (`^(abc)+$`, "", matchres::NOMATCH, 0, 0), 226 // * 227 (`^a*$`, "", matchres::MATCH, 0, 0), 228 (`^a*$`, "aaaa", matchres::MATCH, 0, 4), 229 (`^a*$`, "b", matchres::NOMATCH, 0, 0), 230 (`^(abc)*$`, "", matchres::MATCH, 0, 0), 231 (`^(abc)*$`, "abc", matchres::MATCH, 0, 3), 232 (`^(abc)*$`, "abcabc", matchres::MATCH, 0, 6), 233 (`^(abc)*$`, "bbb", matchres::NOMATCH, 0, 3), 234 // ? 235 (`^a?$`, "", matchres::MATCH, 0, 0), 236 (`^a?$`, "a", matchres::MATCH, 0, 1), 237 (`^a?$`, "b", matchres::NOMATCH, 0, 0), 238 (`^(abc)?$`, "", matchres::MATCH, 0, 0), 239 (`^(abc)?$`, "abc", matchres::MATCH, 0, 3), 240 (`^(abc)?$`, "bbb", matchres::NOMATCH, 0, 0), 241 // ^ and $ 242 (`^a*`, "aaaa", matchres::MATCH, 0, 4), 243 (`a*$`, "aaaa", matchres::MATCH, 0, 4), 244 (`^a*$`, "aaaa", matchres::MATCH, 0, 4), 245 (`a*`, "aaaa", matchres::MATCH, 0, 4), 246 (`b*`, "aaaabbbb", matchres::MATCH, 4, 8), 247 (`^b*`, "aaaabbbb", matchres::MATCH, 0, 0), 248 (`b*$`, "aaaabbbb", matchres::MATCH, 4, 8), 249 // (a|b) 250 (`^(cafe|b)x$`, "cafex", matchres::MATCH, 0, 5), 251 (`^(cafe|b)x$`, "bx", matchres::MATCH, 0, 2), 252 (`^(cafe|b)x$`, "XXXx", matchres::NOMATCH, 0, 0), 253 ( 254 `^(Privat|Jagd)(haftpflicht|schaden)versicherungs(police|betrag)$`, 255 "Jagdhaftpflichtversicherungsbetrag", 256 matchres::MATCH, 0, -1 257 ), 258 ( 259 `^(Privat|Jagd)(haftpflicht|schaden)versicherungs(police|betrag)$`, 260 "Jagdhaftpflichtversicherungsbetrug", 261 matchres::NOMATCH, 0, -1 262 ), 263 ( 264 `^(Privat|Jagd)(haftpflicht|schaden)versicherungs(police|betrag)$`, 265 "Jagdversicherungspolice", 266 matchres::NOMATCH, 0, -1 267 ), 268 (`)`, "", matchres::ERROR, 0, 0), 269 // [abc] 270 (`^test[abc]$`, "testa", matchres::MATCH, 0, -1), 271 (`^test[abc]$`, "testb", matchres::MATCH, 0, -1), 272 (`^test[abc]$`, "testc", matchres::MATCH, 0, -1), 273 (`^test[abc]$`, "testd", matchres::NOMATCH, 0, -1), 274 (`^test[abc]*$`, "test", matchres::MATCH, 0, -1), 275 (`^test[abc]*$`, "testa", matchres::MATCH, 0, -1), 276 (`^test[abc]*$`, "testaaa", matchres::MATCH, 0, -1), 277 (`^test[abc]*$`, "testabc", matchres::MATCH, 0, -1), 278 (`^test[abc]?$`, "test", matchres::MATCH, 0, -1), 279 (`^test[abc]?$`, "testa", matchres::MATCH, 0, -1), 280 (`^test[abc]+$`, "testa", matchres::MATCH, 0, -1), 281 (`^test[abc]+$`, "test", matchres::NOMATCH, 0, -1), 282 (`^test[]abc]$`, "test]", matchres::MATCH, 0, -1), 283 (`^test[[abc]$`, "test[", matchres::MATCH, 0, -1), 284 (`^test[^abc]$`, "testd", matchres::MATCH, 0, -1), 285 (`^test[^abc]$`, "test!", matchres::MATCH, 0, -1), 286 (`^test[^abc]$`, "testa", matchres::NOMATCH, 0, -1), 287 (`^test[^abc]$`, "testb", matchres::NOMATCH, 0, -1), 288 (`^test[^abc]$`, "testc", matchres::NOMATCH, 0, -1), 289 (`^test[^]abc]$`, "test]", matchres::NOMATCH, 0, -1), 290 (`^test[^abc[]$`, "test[", matchres::NOMATCH, 0, -1), 291 (`^test[^abc]*$`, "testd", matchres::MATCH, 0, -1), 292 (`^test[^abc]*$`, "testqqqqq", matchres::MATCH, 0, -1), 293 (`^test[^abc]*$`, "test", matchres::MATCH, 0, -1), 294 (`^test[^abc]*$`, "testc", matchres::NOMATCH, 0, -1), 295 (`^test[^abc]?$`, "test", matchres::MATCH, 0, -1), 296 (`^test[^abc]?$`, "testd", matchres::MATCH, 0, -1), 297 (`^test[^abc]?$`, "testc", matchres::NOMATCH, 0, -1), 298 (`^test[^abc]+$`, "testd", matchres::MATCH, 0, -1), 299 (`^test[^abc]+$`, "testddd", matchres::MATCH, 0, -1), 300 (`^test[^abc]+$`, "testc", matchres::NOMATCH, 0, -1), 301 (`^test[^abc]+$`, "testcccc", matchres::NOMATCH, 0, -1), 302 (`^test[a-c]$`, "testa", matchres::MATCH, 0, -1), 303 (`^test[a-c]$`, "testb", matchres::MATCH, 0, -1), 304 (`^test[a-c]$`, "testc", matchres::MATCH, 0, -1), 305 (`^test[a-c]$`, "testd", matchres::NOMATCH, 0, -1), 306 (`^test[a-c]$`, "test!", matchres::NOMATCH, 0, -1), 307 (`^test[a-c]$`, "test-", matchres::NOMATCH, 0, -1), 308 (`^test[-a-c]$`, "test-", matchres::MATCH, 0, -1), 309 (`^test[a-c-]$`, "test-", matchres::MATCH, 0, -1), 310 (`^test[a-c]*$`, "test", matchres::MATCH, 0, -1), 311 (`^test[a-c]*$`, "testa", matchres::MATCH, 0, -1), 312 (`^test[a-c]*$`, "testabb", matchres::MATCH, 0, -1), 313 (`^test[a-c]*$`, "testddd", matchres::NOMATCH, 0, -1), 314 (`^test[a-c]?$`, "test", matchres::MATCH, 0, -1), 315 (`^test[a-c]?$`, "testb", matchres::MATCH, 0, -1), 316 (`^test[a-c]?$`, "testd", matchres::NOMATCH, 0, -1), 317 (`^test[a-c]+$`, "test", matchres::NOMATCH, 0, -1), 318 (`^test[a-c]+$`, "testbcbc", matchres::MATCH, 0, -1), 319 (`^test[a-c]+$`, "testd", matchres::NOMATCH, 0, -1), 320 (`^test[^a-c]$`, "testa", matchres::NOMATCH, 0, -1), 321 (`^test[^a-c]$`, "testb", matchres::NOMATCH, 0, -1), 322 (`^test[^a-c]$`, "testc", matchres::NOMATCH, 0, -1), 323 (`^test[^a-c]$`, "testd", matchres::MATCH, 0, -1), 324 (`^test[^a-c]$`, "test!", matchres::MATCH, 0, -1), 325 (`^test[^a-c]$`, "test-", matchres::MATCH, 0, -1), 326 (`^test[^-a-c]$`, "test-", matchres::NOMATCH, 0, -1), 327 (`^test[^a-c-]$`, "test-", matchres::NOMATCH, 0, -1), 328 (`^test[^a-c-]*$`, "test", matchres::MATCH, 0, -1), 329 (`^test[^a-c-]*$`, "test--", matchres::NOMATCH, 0, -1), 330 (`^test[^a-c-]*$`, "testq", matchres::MATCH, 0, -1), 331 (`^test[^a-c-]?$`, "test", matchres::MATCH, 0, -1), 332 (`^test[^a-c-]?$`, "testq", matchres::MATCH, 0, -1), 333 (`^test[^a-c-]?$`, "test-", matchres::NOMATCH, 0, -1), 334 (`^test[^a-c-]+$`, "test", matchres::NOMATCH, 0, -1), 335 (`^test[^a-c-]+$`, "testb", matchres::NOMATCH, 0, -1), 336 (`^test[^a-c-]+$`, "testddd", matchres::MATCH, 0, -1), 337 (`([a-z][a-z0-9]*,)+`, "a5,b7,c9,", matchres::MATCH, 0, -1), 338 // [:alpha:] etc. 339 (`^test[[:alnum:]]+$`, "testaA1", matchres::MATCH, 0, -1), 340 (`^test[[:alnum:]]+$`, "testa_1", matchres::NOMATCH, 0, -1), 341 (`^test[[:alpha:]]+$`, "testa", matchres::MATCH, 0, -1), 342 (`^test[[:alpha:]]+$`, "testa1", matchres::NOMATCH, 0, -1), 343 (`^test[[:blank:]]+$`, "testa", matchres::NOMATCH, 0, -1), 344 (`^test[[:blank:]]+$`, "test ", matchres::MATCH, 0, -1), 345 (`^test[^[:blank:]]+$`, "testx", matchres::MATCH, 0, -1), 346 (`^test[^[:cntrl:]]+$`, "testa", matchres::MATCH, 0, -1), 347 (`^test[[:digit:]]$`, "test1", matchres::MATCH, 0, -1), 348 (`^test[[:digit:]]$`, "testa", matchres::NOMATCH, 0, -1), 349 (`^test[[:graph:]]+$`, "test\t", matchres::NOMATCH, 0, -1), 350 (`^test[[:lower:]]+$`, "testa", matchres::MATCH, 0, -1), 351 (`^test[[:lower:]]+$`, "testA", matchres::NOMATCH, 0, -1), 352 (`^test[[:print:]]+$`, "test\t", matchres::NOMATCH, 0, -1), 353 (`^test[[:punct:]]+$`, "testA", matchres::NOMATCH, 0, -1), 354 (`^test[[:punct:]]+$`, "test!", matchres::MATCH, 0, -1), 355 (`^test[[:space:]]+$`, "test ", matchres::MATCH, 0, -1), 356 (`^test[[:upper:]]+$`, "testa", matchres::NOMATCH, 0, -1), 357 (`^test[[:upper:]]+$`, "testA", matchres::MATCH, 0, -1), 358 (`^test[[:xdigit:]]+$`, "testCAFE", matchres::MATCH, 0, -1), 359 // Range expressions 360 (`[a-z]+`, "onlylatinletters", matchres::MATCH, 0, -1), 361 (`[x-z]+`, "xyz", matchres::MATCH, 0, -1), 362 (`[x-z]+`, "wxyz", matchres::MATCH, 1, 4), 363 (`[a-e]+`, "-abcdefg", matchres::MATCH, 1, 6), 364 (`[a-z]`, "-1234567890@#$%^&*(!)-+=", matchres::NOMATCH, 0, -1), 365 (`[0-9]+`, "9246", matchres::MATCH, 0, -1), 366 // Cyrillic 367 (`[а-я]+`, "кирилица", matchres::MATCH, 0, -1), 368 (`[а-д]`, "е", matchres::NOMATCH, 0, -1), 369 (`[я-ф]`, "-", matchres::ERROR, 0, -1), 370 (`[А-Я]+`, "АБВГд", matchres::MATCH, 0, 4), 371 // Because Macedonian uses Cyrillic, the broad range does 372 // not include special symbols 373 (`[а-ш]+`, "ѓљњќ", matchres::NOMATCH, 0, -1), 374 // Polish alphabet 375 (`[a-ż]+`, "polskialfabet", matchres::MATCH, 0, -1), 376 (`[a-ż]+`, "źśółęćą", matchres::MATCH, 0, -1), 377 // Because the Polish alphabet uses Latin with special 378 // characters, other characters can be accepted 379 (`[a-ż]+`, "englishspeak", matchres::MATCH, 0, -1), 380 (`[a-ż]+`, "{|}~", matchres::MATCH, 0, -1), 381 // Thai alphabet 382 (`[ก-ฮ]+`, "ศอผจข", matchres::MATCH, 0, -1), 383 // [:alpha:] etc. plus extra characters 384 (`^test[[:digit:]][[:alpha:]]$`, "test1a", matchres::MATCH, 0, -1), 385 (`^test[[:digit:]][[:alpha:]]$`, "testa1", matchres::NOMATCH, 0, -1), 386 (`^test[[:alnum:]!]+$`, "testa!1", matchres::MATCH, 0, -1), 387 (`^test[@[:alnum:]!]+$`, "testa!@1", matchres::MATCH, 0, -1), 388 // Escaped characters such as \+ 389 (`^a\+b$`, "a+b", matchres::MATCH, 0, -1), 390 (`^a\?b$`, "a?b", matchres::MATCH, 0, -1), 391 (`^a\*b$`, "a*b", matchres::MATCH, 0, -1), 392 (`^a\^b$`, "a^b", matchres::MATCH, 0, -1), 393 (`^a\$b$`, "a$b", matchres::MATCH, 0, -1), 394 (`^a\[b$`, "a[b", matchres::MATCH, 0, -1), 395 (`^a\]b$`, "a]b", matchres::MATCH, 0, -1), 396 (`^a\(b$`, "a(b", matchres::MATCH, 0, -1), 397 (`^a\)b$`, "a)b", matchres::MATCH, 0, -1), 398 (`^a\|b$`, "a|b", matchres::MATCH, 0, -1), 399 (`^a\.b$`, "a.b", matchres::MATCH, 0, -1), 400 (`^a\\b$`, "a\\b", matchres::MATCH, 0, -1), 401 (`^x(abc)\{,2\}$`, "xabc{,2}", matchres::MATCH, 0, -1), 402 (`^x(abc)\{,2\}$`, "xabcabc{,2}", matchres::NOMATCH, 0, -1), 403 (`^[\\]+$`, "\\", matchres::MATCH, 0, -1), 404 (`^[\]]+$`, "]", matchres::MATCH, 0, -1), 405 (`^[A-Za-z\[\]]+$`, "foo[bar]baz", matchres::MATCH, 0, -1), 406 // {m,n} 407 (`^x(abc){2}$`, "xabcabc", matchres::MATCH, 0, -1), 408 (`^x(abc){3}$`, "xabcabc", matchres::NOMATCH, 0, -1), 409 (`^x(abc){1,2}$`, "xabc", matchres::MATCH, 0, -1), 410 (`^x(abc){1,2}$`, "xabcabc", matchres::MATCH, 0, -1), 411 (`^x(abc){1,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1), 412 (`^x(abc){,2}$`, "xabc", matchres::MATCH, 0, -1), 413 (`^x(abc){,2}$`, "xabcabc", matchres::MATCH, 0, -1), 414 (`^x(abc){,2}`, "xabcabcabc", matchres::MATCH, 0, 7), 415 (`^x(abc){,0}de`, "xde", matchres::MATCH, 0, -1), 416 (`^x(abc){,0}de`, "xe", matchres::NOMATCH, 0, -1), 417 (`^x(abc){,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1), 418 (`^x(abc){1,}$`, "xabc", matchres::MATCH, 0, -1), 419 (`^x(abc){1,}$`, "xabcabc", matchres::MATCH, 0, -1), 420 (`^x(abc){3,}$`, "xabcabc", matchres::NOMATCH, 0, -1), 421 (`^x(abc){3,}$`, "xabcabcabc", matchres::MATCH, 0, -1), 422 (`^x(abc){2,2}$`, "xabcabc", matchres::MATCH, 0, -1), 423 (`^x(abc){2,2}$`, "xabc", matchres::NOMATCH, 0, -1), 424 (`^x(abc){2,2}$`, "xabcabcabc", matchres::NOMATCH, 0, -1), 425 (`^x(abc){-1,2}$`, "xabcabcabc", matchres::ERROR, 0, -1), 426 (`^x(abc){x,2}$`, "xabcabcabc", matchres::ERROR, 0, -1), 427 (`^x(abc){0,-2}$`, "xabcabcabc", matchres::ERROR, 0, -1), 428 // Various 429 ( 430 `^.(1024)?(face)*(1024)*ca*(f+e?cafe)(babe)+$`, 431 "X1024facefacecaaaaafffcafebabebabe", 432 matchres::MATCH, 0, -1, 433 ), 434 ( 435 `.(1024)?(face)*(1024)*ca*(f+e?cafe)(babe)+`, 436 "X1024facefacecaaaaafffcafebabebabe", 437 matchres::MATCH, 0, -1, 438 ), 439 ( 440 `^.(1024)?(face)*(1024)*ca*(f+e?cafe)(babe)+$`, 441 "1024facefacecaaaaafffcafebabebabe", 442 matchres::NOMATCH, 0, 0, 443 ), 444 ( 445 `.(1024)?(face)*(1024)*ca*(f+e?cafe)(babe)+`, 446 "1024facefacecaaaaafffcafebabebabe", 447 matchres::MATCH, 3, -1, 448 ), 449 ( 450 `^([a-zA-Z]{1,2}[[:digit:]]{1,2})[[:space:]]*([[:digit:]][a-zA-Z]{2})$`, 451 "M15 4QN", 452 matchres::MATCH, 0, -1 453 ), 454 (`^[^-a]`, "-bcd", matchres::NOMATCH, 0, 0), 455 (`^[-a]`, "-bcd", matchres::MATCH, 0, 1), 456 (`[^ac-]`, "bde", matchres::MATCH, 0, 1), 457 (`[-ac]`, "foo-de", matchres::MATCH, 3, 4), 458 (`[-ac]`, "def", matchres::NOMATCH, 0, 0), 459 (`foo[-ac]bar`, "foo-bar", matchres::MATCH, 0, 7), 460 (`[ac-]$`, "bde-", matchres::MATCH, 3, 4), 461 (`^[A-Za-z_-]+$`, "foo", matchres::MATCH, 0, 3), 462 // Tests for jump bugs 463 (`ab?c`, "ac", matchres::MATCH, 0, -1), 464 (`ab?c|z`, "ac", matchres::MATCH, 0, -1), 465 (`(ab?c){,1}`, "ac", matchres::MATCH, 0, -1), 466 (`(ab?c)?`, "ac", matchres::MATCH, 0, -1), 467 (`(ab?c)*`, "ac", matchres::MATCH, 0, -1), 468 // Tests from perl 469 (`abc`, "abc", matchres::MATCH, 0, -1), 470 (`abc`, "xbc", matchres::NOMATCH, 0, 0), 471 (`abc`, "axc", matchres::NOMATCH, 0, 0), 472 (`abc`, "abx", matchres::NOMATCH, 0, 0), 473 (`abc`, "xabcy", matchres::MATCH, 1, 4), 474 (`abc`, "ababc", matchres::MATCH, 2, -1), 475 (`ab*c`, "abc", matchres::MATCH, 0, -1), 476 (`ab*bc`, "abc", matchres::MATCH, 0, -1), 477 (`ab*bc`, "abbc", matchres::MATCH, 0, -1), 478 (`ab*bc`, "abbbbc", matchres::MATCH, 0, -1), 479 (`ab{0,}bc`, "abbbbc", matchres::MATCH, 0, -1), 480 (`ab+bc`, "abbc", matchres::MATCH, 0, -1), 481 (`ab+bc`, "abc", matchres::NOMATCH, 0, 0), 482 (`ab+bc`, "abq", matchres::NOMATCH, 0, 0), 483 (`ab{1,}bc`, "abq", matchres::NOMATCH, 0, 0), 484 (`ab+bc`, "abbbbc", matchres::MATCH, 0, -1), 485 (`ab{1,}bc`, "abbbbc", matchres::MATCH, 0, -1), 486 (`ab{1,3}bc`, "abbbbc", matchres::MATCH, 0, -1), 487 (`ab{3,4}bc`, "abbbbc", matchres::MATCH, 0, -1), 488 (`ab{4,5}bc`, "abbbbc", matchres::NOMATCH, 0, 0), 489 (`ab?bc`, "abbc", matchres::MATCH, 0, -1), 490 (`ab?bc`, "abc", matchres::MATCH, 0, -1), 491 (`ab{0,1}bc`, "abc", matchres::MATCH, 0, -1), 492 (`ab?bc`, "abbbbc", matchres::NOMATCH, 0, 0), 493 (`ab?c`, "abc", matchres::MATCH, 0, -1), 494 (`ab{0,1}c`, "abc", matchres::MATCH, 0, -1), 495 (`^abc$`, "abc", matchres::MATCH, 0, -1), 496 (`^abc$`, "abcc", matchres::NOMATCH, 0, 0), 497 (`^abc`, "abcc", matchres::MATCH, 0, 3), 498 (`^abc$`, "aabc", matchres::NOMATCH, 0, 0), 499 (`abc$`, "aabc", matchres::MATCH, 1, -1), 500 (`^`, "abc", matchres::MATCH, 0, 0), 501 (`$`, "abc", matchres::MATCH, 3, 3), 502 (`a.c`, "abc", matchres::MATCH, 0, -1), 503 (`a.c`, "axc", matchres::MATCH, 0, -1), 504 (`a.*c`, "axyzc", matchres::MATCH, 0, -1), 505 (`a.*c`, "axyzd", matchres::NOMATCH, 0, 0), 506 (`a[bc]d`, "abc", matchres::NOMATCH, 0, 0), 507 (`a[bc]d`, "abd", matchres::MATCH, 0, -1), 508 (`a[b-d]e`, "abd", matchres::NOMATCH, 0, 0), 509 (`a[b-d]e`, "ace", matchres::MATCH, 0, -1), 510 (`a[b-d]`, "aac", matchres::MATCH, 1, -1), 511 (`a[-b]`, "a-", matchres::MATCH, 0, -1), 512 (`a[b-]`, "a-", matchres::MATCH, 0, -1), 513 (`a[b-a]`, "-", matchres::ERROR, 0, 0), 514 (`a[]b`, "-", matchres::ERROR, 0, 0), 515 (`a[`, "-", matchres::ERROR, 0, 0), 516 (`a]`, "a]", matchres::MATCH, 0, -1), 517 (`a[]]b`, "a]b", matchres::MATCH, 0, -1), 518 (`a[^bc]d`, "aed", matchres::MATCH, 0, -1), 519 (`a[^bc]d`, "abd", matchres::NOMATCH, 0, 0), 520 (`a[^-b]c`, "adc", matchres::MATCH, 0, -1), 521 (`a[^-b]c`, "a-c", matchres::NOMATCH, 0, 0), 522 (`a[^]b]c`, "a]c", matchres::NOMATCH, 0, 0), 523 (`a[^]b]c`, "adc", matchres::MATCH, 0, -1), 524 (`()ef`, "def", matchres::MATCH, 1, -1), 525 (`*a`, "-", matchres::ERROR, 0, 0), 526 (`(*)b`, "-", matchres::ERROR, 0, 0), 527 (`$b`, "b", matchres::ERROR, 0, 0), 528 (`a\`, "-", matchres::ERROR, 0, 0), 529 (`a\(b`, "a(b", matchres::MATCH, 0, -1), 530 (`a\(*b`, "ab", matchres::MATCH, 0, -1), 531 (`a\(*b`, "a((b", matchres::MATCH, 0, -1), 532 (`a\\b`, `a\b`, matchres::MATCH, 0, -1), 533 (`abc)`, "-", matchres::ERROR, 0, 0), 534 (`(abc`, "-", matchres::ERROR, 0, 0), 535 (`(a)b(c)`, "abc", matchres::MATCH, 0, -1), 536 (`a+b+c`, "aabbabc", matchres::MATCH, 4, -1), 537 (`a{1,}b{1,}c`, "aabbabc", matchres::MATCH, 4, -1), 538 (`a**`, "-", matchres::ERROR, 0, 0), 539 (`)(`, "-", matchres::ERROR, 0, 0), 540 (`[^ab]*`, "cde", matchres::MATCH, 0, -1), 541 (`abc`, "", matchres::NOMATCH, 0, 0), 542 (`a*`, "", matchres::MATCH, 0, -1), 543 (`([abc])*d`, "abbbcd", matchres::MATCH, 0, -1), 544 (`([abc])*bcd`, "abcd", matchres::MATCH, 0, -1), 545 (`abcd*efg`, "abcdefg", matchres::MATCH, 0, -1), 546 (`ab*`, "xabyabbbz", matchres::MATCH, 1, 3), 547 (`ab*`, "xayabbbz", matchres::MATCH, 1, 2), 548 (`(ab|cd)e`, "abcde", matchres::MATCH, 2, -1), 549 (`[abhgefdc]ij`, "hij", matchres::MATCH, 0, -1), 550 (`^(ab|cd)e`, "abcde", matchres::NOMATCH, 0, 0), 551 (`(abc|)ef`, "abcdef", matchres::MATCH, 4, -1), 552 (`(a|b)c*d`, "abcd", matchres::MATCH, 1, -1), 553 (`(ab|ab*)bc`, "abc", matchres::MATCH, 0, -1), 554 (`a([bc]*)c*`, "abc", matchres::MATCH, 0, -1), 555 (`a([bc]*)(c*d)`, "abcd", matchres::MATCH, 0, -1), 556 (`a([bc]+)(c*d)`, "abcd", matchres::MATCH, 0, -1), 557 (`a([bc]*)(c+d)`, "abcd", matchres::MATCH, 0, -1), 558 (`a[bcd]*dcdcde`, "adcdcde", matchres::MATCH, 0, -1), 559 (`a[bcd]+dcdcde`, "adcdcde", matchres::NOMATCH, 0, 0), 560 (`(ab|a)b*c`, "abc", matchres::MATCH, 0, -1), 561 (`[a-zA-Z_][a-zA-Z0-9_]*`, "alpha", matchres::MATCH, 0, -1), 562 (`^a(bc+|b[eh])g|.h$`, "abh", matchres::MATCH, 1, -1), 563 (`multiple words of text`, "uh-uh", matchres::NOMATCH, 0, 0), 564 (`multiple words`, "multiple words, yeah", matchres::MATCH, 0, 14), 565 (`(.*)c(.*)`, "abcde", matchres::MATCH, 0, -1), 566 (`\((.*), (.*)\)`, "(a, b)", matchres::MATCH, 0, -1), 567 (`[k]`, "ab", matchres::NOMATCH, 0, 0), 568 (`a[-]?c`, "ac", matchres::MATCH, 0, -1), 569 (`.*d`, "abc\nabd", matchres::MATCH, 0, -1), 570 (`(`, "", matchres::ERROR, 0, 0), 571 (`(x?)?`, "x", matchres::MATCH, 0, -1), 572 (`^*`, "", matchres::ERROR, 0, 0), 573 // Submatch handling 574 (`(a|ab)(c|bcd)(d*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 575 (`(a|ab)(bcd|c)(d*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 576 (`(ab|a)(c|bcd)(d*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 577 (`(ab|a)(bcd|c)(d*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 578 (`(a*)(b|abc)(c*)`, "abc", matchres::MATCH, 0, -1), // POSIX: (0,3)(0,1)(1,2)(2,3) 579 (`(a*)(abc|b)(c*)`, "abc", matchres::MATCH, 0, -1), // POSIX: (0,3)(0,1)(1,2)(2,3) 580 (`(a*)(b|abc)(c*)`, "abc", matchres::MATCH, 0, -1), // POSIX: (0,3)(0,1)(1,2)(2,3) 581 (`(a*)(abc|b)(c*)`, "abc", matchres::MATCH, 0, -1), // POSIX: (0,3)(0,1)(1,2)(2,3) 582 (`(a|ab)(c|bcd)(d|.*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 583 (`(a|ab)(bcd|c)(d|.*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 584 (`(ab|a)(c|bcd)(d|.*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 585 (`(ab|a)(bcd|c)(d|.*)`, "abcd", matchres::MATCH, 0, -1), // POSIX: (0,4)(0,2)(2,3)(3,4) 586 // Whole-expression alternation 587 (`ab|cd`, "cd", matchres::MATCH, 0, 2), 588 (`ab|cd`, "abc", matchres::MATCH, 0, 2), 589 (`ab|cd`, "abcd", matchres::MATCH, 0, 2), 590 (`ab|cd`, "bcd", matchres::MATCH, 1, 3), 591 (`^ab|cd`, "bcd", matchres::MATCH, 1, 3), 592 (`^ab|cd`, "zab", matchres::NOMATCH, 0, 0), 593 (`ab$|cd`, "ab", matchres::MATCH, 0, 2), 594 (`ab$|cd`, "abc", matchres::NOMATCH, 0, 0), 595 (`ab|cd$`, "cde", matchres::NOMATCH, 0, 0), 596 (`ab|^cd`, "bcd", matchres::NOMATCH, 0, 0), 597 (`ab|^cd`, "cde", matchres::MATCH, 0, 2), 598 (`ab\|^cd`, "cde", matchres::ERROR, 0, 0), 599 (`a|(b)`, "a", matchres::MATCH, 0, -1), 600 (`(a|(b?|c*){,1}|d+|e)`, "e", matchres::MATCH, 0, -1), 601 // Multiple alternation 602 (`a|b|c|d|e`, "e", matchres::MATCH, 0, -1), 603 (`a|b|c|d|e`, "xe", matchres::MATCH, 1, -1), 604 (`(a|b|c|d|e)f`, "ef", matchres::MATCH, 0, -1), 605 (`a|b$|c$|d$|e`, "cd", matchres::MATCH, 1, -1), 606 (`a|b$|c$|d$|e`, "ax", matchres::MATCH, 0, 1), 607 (`a|b$|c$|d$|e`, "cx", matchres::NOMATCH, 0, 0), 608 (`a|b$|c$|d$|e`, "ex", matchres::MATCH, 0, 1), 609 (`a|^b|^c|^d|e`, "cd", matchres::MATCH, 0, 1), 610 (`a|^b|^c|^d|e`, "xa", matchres::MATCH, 1, 2), 611 (`a|^b|^c|^d|e`, "xc", matchres::NOMATCH, 0, 0), 612 (`a|^b|^c|^d|e`, "xe", matchres::MATCH, 1, 2), 613 (`((a))`, "abc", matchres::MATCH, 0, 1), 614 (`((a)(b)c)(d)`, "abcd", matchres::MATCH, 0, -1), 615 // TODO: anchor in capture groups 616 //(`(bc+d$|ef*g.|h?i(j|k))`, "effgz", matchres::MATCH, 0, -1), 617 //(`(bc+d$|ef*g.|h?i(j|k))`, "ij", matchres::MATCH, 0, -1), 618 //(`(bc+d$|ef*g.|h?i(j|k))`, "effg", matchres::NOMATCH, 0, 0), 619 //(`(bc+d$|ef*g.|h?i(j|k))`, "bcdd", matchres::NOMATCH, 0, 0), 620 //(`(bc+d$|ef*g.|h?i(j|k))`, "reffgz", matchres::MATCH, 0, -1), 621 (`((((((((((a))))))))))`, "a", matchres::MATCH, 0, -1), 622 (`(((((((((a)))))))))`, "a", matchres::MATCH, 0, -1), 623 (`(([a-z]+):)?([a-z]+)$`, "smil", matchres::MATCH, 0, -1), 624 (`^((a)c)?(ab)$`, "ab", matchres::MATCH, 0, -1), 625 (`(a+|b)*`, "ab", matchres::MATCH, 0, -1), 626 (`(a+|b){0,}`, "ab", matchres::MATCH, 0, -1), 627 (`(a+|b)+`, "ab", matchres::MATCH, 0, -1), 628 (`(a+|b){1,}`, "ab", matchres::MATCH, 0, -1), 629 (`(a+|b)?`, "ab", matchres::MATCH, 0, 1), 630 (`(a+|b){0,1}`, "ab", matchres::MATCH, 0, 1), 631 // NOTE: character sequences not currently supported 632 // (`\0`, "\0", matchres::MATCH, 0, -1), 633 // (`[\0a]`, "\0", matchres::MATCH, 0, -1), 634 // (`[a\0]`, "\0", matchres::MATCH, 0, -1), 635 // (`[^a\0]`, "\0", matchres::NOMATCH, 0, 0), 636 // NOTE: octal sequences not currently supported 637 // (`[\1]`, "\1", matchres::MATCH, 0, -1), 638 // (`\09`, "\0(separate-me)9", matchres::MATCH, 0, -1), 639 // (`\141`, "a", matchres::MATCH, 0, -1), 640 // (`[\41]`, "!", matchres::MATCH, 0, -1), 641 // NOTE: hex sequences not currently supported 642 // (`\xff`, "\377", matchres::MATCH, 0, -1), 643 // NOTE: non-greedy matching not currently supported 644 // (`a.+?c`, "abcabc", matchres::MATCH, 0, -1), 645 // (`.*?\S *:`, "xx:", matchres::MATCH, 0, -1), 646 // (`a[ ]*?\ (\d+).*`, "a 10", matchres::MATCH, 0, -1), 647 // (`a[ ]*?\ (\d+).*`, "a 10", matchres::MATCH, 0, -1), 648 // (`"(\\"|[^"])*?"`, `"\""`, matchres::MATCH, 0, -1), 649 // (`^.*?$`, "one\ntwo\nthree\n", matchres::NOMATCH, 0, 0), 650 // (`a[^>]*?b`, "a>b", matchres::NOMATCH, 0, 0), 651 // (`^a*?$`, "foo", matchres::NOMATCH, 0, 0), 652 // (`^([ab]*?)(?=(b)?)c`, "abc", matchres::MATCH, 0, -1), 653 // (`^([ab]*?)(?!(b))c`, "abc", matchres::MATCH, 0, -1), 654 // (`^([ab]*?)(?<!(a))c`, "abc", matchres::MATCH, 0, -1), 655 ]; 656 657 for (let (expr, string, should_match, start, end) .. cases) { 658 if (end == -1) { 659 // workaround to get the length in codepoints 660 let runes = strings::torunes(string); 661 defer free(runes); 662 end = len(runes): int; 663 }; 664 run_find_case(expr, string, should_match, start, end); 665 }; 666 667 const submatch_cases = [ 668 // literals 669 (`aaa ([^ ]*) (...)`, "aaa bbb ccc", matchres::MATCH, 670 ["aaa bbb ccc", "bbb", "ccc"]: []str), 671 ]; 672 673 for (let (expr, string, should_match, targets) .. submatch_cases) { 674 run_submatch_case(expr, string, should_match, targets); 675 }; 676 }; 677 678 @test fn findall() void = { 679 const cases = [ 680 (`ab.`, "hello abc and abあ test abq thanks", matchres::MATCH, 681 ["abc", "abあ", "abq"]: []str), 682 (`a`, "aa", matchres::MATCH, 683 ["a", "a"]: []str), 684 (`fo{2,}`, "fo foo fooofoof oofoo", matchres::MATCH, 685 ["foo", "fooo", "foo", "foo"]: []str), 686 (``, "abc", matchres::MATCH, 687 ["", "", "", ""]: []str), 688 (`a*`, "aaa", matchres::MATCH, 689 ["aaa", ""]: []str), 690 ]; 691 692 for (let (expr, string, should_match, targets) .. cases) { 693 run_findall_case(expr, string, should_match, targets); 694 }; 695 }; 696 697 @test fn replace() void = { 698 const cases: [_](str, str, str, size, (str | void)) = [ 699 (`ab.`, "hello abc and abあ test abq thanks", `xyz`, 700 types::SIZE_MAX, "hello xyz and xyz test xyz thanks"), 701 (`([Hh])ello`, "Hello world and hello Hare.", `\1owdy`, 702 types::SIZE_MAX, "Howdy world and howdy Hare."), 703 (`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`, 704 types::SIZE_MAX, "fo foobar fooobarfoobarf oofoobar"), 705 (`(1)(2)(3)(4)(5)(6)(7)(8)(9)(10)`, "12345678910", `\10`, 706 types::SIZE_MAX, "10"), 707 (`...?`, "abcdefgh", `\7\0\8`, 708 types::SIZE_MAX, "abcdefgh"), 709 (`...?`, "abcdefgh", `\7\0\`, types::SIZE_MAX, void), 710 (`ab.`, "hello abc and abあ test abq thanks", `xyz`, 711 2, "hello xyz and xyz test abq thanks"), 712 (`.`, "blablabla", `x`, 0, "blablabla"), 713 (`([[:digit:]])([[:digit:]])`, "1234", `\2`, 1, "234"), 714 ]; 715 716 for (let (expr, string, target, n, expected) .. cases) { 717 run_replace_case(expr, string, target, n, expected); 718 }; 719 }; 720 721 @test fn rawreplace() void = { 722 const cases = [ 723 (`ab.`, "hello abc and abあ test abq thanks", "xyz", 724 types::SIZE_MAX, "hello xyz and xyz test xyz thanks"), 725 (`([Hh])ello`, "Hello world and hello Hare.", `\howdy\`, 726 types::SIZE_MAX, `\howdy\ world and \howdy\ Hare.`), 727 (`fo{2,}`, "fo foo fooofoof oofoo", `\0bar`, 728 types::SIZE_MAX, `fo \0bar \0bar\0barf oo\0bar`), 729 (`\\\\`, `\\\\\\\\`, `\00\1`, 730 types::SIZE_MAX, `\00\1\00\1\00\1\00\1`), 731 (`ab.`, "hello abc and abあ test abq thanks", `xyz`, 732 2, "hello xyz and xyz test abq thanks"), 733 (`.`, "blablabla", `x`, 0, "blablabla"), 734 ]; 735 736 for (let (expr, string, target, n, expected) .. cases) { 737 run_rawreplace_case(expr, string, target, n, expected); 738 }; 739 };