scan.ha (11949B)
1 // License: MPL-2.0 2 // (c) 2021-2022 Alexey Yerin <yyp@disroot.org> 3 // (c) 2022 Bor Grošelj Simić <bor.groseljsimic@telemach.net> 4 // (c) 2021-2022 Drew DeVault <sir@cmpwn.com> 5 // (c) 2021 Ember Sawady <ecs@d2evs.net> 6 // (c) 2021 Kiëd Llaentenn <kiedtl@tilde.team> 7 // (c) 2021 Thomas Bracht Laumann Jespersen <t@laumann.xyz> 8 use ascii; 9 use crypto::sha256; 10 use fs; 11 use hare::ast; 12 use hare::lex; 13 use hare::parse; 14 use hash; 15 use io; 16 use memio; 17 use path; 18 use sort; 19 use strings; 20 use bufio; 21 use os; 22 23 def ABI_VERSION: u8 = 6; 24 25 // Scans the files in a directory for eligible build inputs and returns a 26 // [[version]] which includes all applicable files and their dependencies. 27 export fn scan(ctx: *context, path: str) (version | error) = { 28 // TODO: Incorporate defines into the hash 29 let sha = sha256::sha256(); 30 for (let i = 0z; i < len(ctx.tags); i += 1) { 31 const tag = &ctx.tags[i]; 32 hash::write(&sha, if (tag.mode == tag_mode::INCLUSIVE) { 33 yield [1]; 34 } else { 35 yield [0]; 36 }); 37 hash::write(&sha, strings::toutf8(tag.name)); 38 }; 39 let iter = match (fs::iter(ctx.fs, path)) { 40 case fs::wrongtype => 41 // Single file case 42 let inputs: []input = []; 43 let deps: []ast::ident = []; 44 let ft = match (type_for_ext(path)) { 45 case void => 46 return notfound; 47 case let ft: filetype => 48 yield ft; 49 }; 50 let path = fs::resolve(ctx.fs, path); 51 let st = fs::stat(ctx.fs, path)?; 52 let in = input { 53 path = strings::dup(path), 54 stat = st, 55 ft = ft, 56 hash = scan_file(ctx, path, ft, &deps)?, 57 ... 58 }; 59 append(inputs, in); 60 61 let sumbuf: [sha256::SIZE]u8 = [0...]; 62 hash::write(&sha, in.hash); 63 hash::sum(&sha, sumbuf); 64 65 return version { 66 hash = sumbuf, 67 basedir = strings::dup(path::dirname(path)), 68 depends = deps, 69 inputs = inputs, 70 tags = tags_dup(ctx.tags), 71 ... 72 }; 73 case let err: fs::error => 74 return err; 75 case let iter: *fs::iterator => 76 yield iter; 77 }; 78 defer fs::finish(iter); 79 let ver = version { 80 basedir = strings::dup(path), 81 tags = tags_dup(ctx.tags), 82 ... 83 }; 84 scan_directory(ctx, &ver, &sha, path, iter)?; 85 86 let buf = path::init(path, "README")!; 87 if (len(ver.inputs) == 0 && !fs::exists(ctx.fs, path::string(&buf))) { 88 // TODO: HACK: README is a workaround for haredoc issues 89 return notfound; 90 }; 91 92 let tmp: [sha256::SIZE]u8 = [0...]; 93 hash::sum(&sha, tmp); 94 ver.hash = alloc([], sha.sz); 95 append(ver.hash, tmp...); 96 return ver; 97 }; 98 99 // Given a file or directory name, parses it into the basename, extension, and 100 // tag set. 101 export fn parsename(name: str) (str, str, []tag) = { 102 static let buf = path::buffer {...}; 103 path::set(&buf, name)!; 104 let ext = match (path::pop_ext(&buf)) { 105 case void => yield ""; 106 case let s: str => yield strings::dup(s); 107 }; 108 let base = match (path::peek(&buf)) { 109 case void => yield ""; 110 case let s: str => yield strings::dup(s); 111 }; 112 113 let p = strings::index(base, '+'); 114 let m = strings::index(base, '-'); 115 if (p is void && m is void) { 116 return (base, ext, []); 117 }; 118 let i: size = 119 if (p is void && m is size) m: size 120 else if (m is void && p is size) p: size 121 else if (m: size < p: size) m: size 122 else p: size; 123 let tags = strings::sub(base, i, strings::end); 124 let tags = match (parsetags(tags)) { 125 case void => 126 return (base, ext, []); 127 case let t: []tag => 128 yield t; 129 }; 130 let base = strings::sub(base, 0, i); 131 return (base, ext, tags); 132 }; 133 134 fn scan_directory( 135 ctx: *context, 136 ver: *version, 137 sha: *hash::hash, 138 path: str, 139 iter: *fs::iterator, 140 ) (void | error) = { 141 let files: []str = [], dirs: []str = []; 142 defer { 143 strings::freeall(files); 144 strings::freeall(dirs); 145 }; 146 147 let pathbuf = path::init()!; 148 for (true) { 149 const ent = match (fs::next(iter)) { 150 case void => 151 break; 152 case let ent: fs::dirent => 153 yield ent; 154 }; 155 156 switch (ent.ftype) { 157 case fs::mode::LINK => 158 let linkpath = path::set(&pathbuf, path, ent.name)!; 159 linkpath = fs::readlink(ctx.fs, linkpath)?; 160 if (!path::abs(linkpath)) { 161 linkpath = path::set(&pathbuf, path, linkpath)!; 162 }; 163 164 const st = fs::stat(ctx.fs, linkpath)?; 165 if (fs::isfile(st.mode)) { 166 append(files, strings::dup(ent.name)); 167 } else if (fs::isdir(st.mode)) { 168 append(dirs, strings::dup(ent.name)); 169 } else if (fs::islink(st.mode)) { 170 abort(); // TODO: Resolve recursively 171 }; 172 case fs::mode::DIR => 173 append(dirs, strings::dup(ent.name)); 174 case fs::mode::REG => 175 append(files, strings::dup(ent.name)); 176 case => void; 177 }; 178 }; 179 180 // Sorted to keep the hash consistent 181 sort::strings(dirs); 182 sort::strings(files); 183 184 // Tuple of is_directory, basename, tags, and path to a candidate input. 185 let inputs: [](bool, str, []tag, str) = []; 186 defer for (let i = 0z; i < len(inputs); i += 1) { 187 // For file paths, these are assigned to the input, which 188 // assumes ownership over them. 189 if (inputs[i].0) { 190 free(inputs[i].1); 191 tags_free(inputs[i].2); 192 free(inputs[i].3); 193 }; 194 }; 195 196 // For a given basename, only the most specific path (i.e. with the most 197 // tags) is used. 198 // 199 // foo.ha 200 // foo+linux.ha 201 // foo+linux+x86_64/ 202 // bar.ha 203 // baz.ha 204 // 205 // In this case, foo+linux+x86_64 is the most specific, and so its used 206 // as the build input and the other two files are discarded. 207 208 for (let i = 0z; i < len(dirs); i += 1) { 209 let name = dirs[i]; 210 let parsed = parsename(name); 211 let base = parsed.0, tags = parsed.2; 212 213 if (!strings::hasprefix(name, "+") 214 && !strings::hasprefix(name, "-")) { 215 if (!strings::hasprefix(name, ".")) { 216 append(ver.subdirs, strings::dup(name)); 217 }; 218 continue; 219 }; 220 if (!tagcompat(ctx.tags, tags)) { 221 continue; 222 }; 223 224 const buf = path::init(path, name)!; 225 let path = strings::dup(path::string(&buf)); 226 let tuple = (true, strings::dup(base), tags, path); 227 let superceded = false; 228 for (let j = 0z; j < len(inputs); j += 1) { 229 if (inputs[j].1 != base) { 230 continue; 231 }; 232 let theirs = inputs[j].2; 233 if (len(theirs) < len(tags)) { 234 free(inputs[j].1); 235 tags_free(inputs[j].2); 236 free(inputs[j].3); 237 inputs[j] = tuple; 238 superceded = true; 239 break; 240 } else if (len(theirs) > len(tags)) { 241 // They are more specific 242 superceded = true; 243 break; 244 } else if (len(base) != 0) { 245 return (path, inputs[j].3): ambiguous; 246 }; 247 }; 248 if (!superceded) { 249 append(inputs, tuple); 250 }; 251 }; 252 253 for (let i = 0z; i < len(files); i += 1) { 254 let name = files[i]; 255 let parsed = parsename(name); 256 let base = parsed.0, ext = parsed.1, tags = parsed.2; 257 258 let eligible = false; 259 static const exts = ["ha", "s"]; 260 for (let i = 0z; i < len(exts); i += 1) { 261 if (exts[i] == ext) { 262 eligible = true; 263 break; 264 }; 265 }; 266 if (!eligible || !tagcompat(ctx.tags, tags)) { 267 tags_free(tags); 268 continue; 269 }; 270 271 const buf = path::init(path, name)!; 272 let path = strings::dup(path::string(&buf)); 273 let tuple = (false, strings::dup(base), tags, path); 274 let superceded = false; 275 for (let j = 0z; j < len(inputs); j += 1) { 276 if (inputs[j].1 != base) { 277 continue; 278 }; 279 let theirs = inputs[j].2; 280 if (len(theirs) < len(tags)) { 281 // We are more specific 282 free(inputs[j].1); 283 tags_free(inputs[j].2); 284 free(inputs[j].3); 285 inputs[j] = tuple; 286 superceded = true; 287 break; 288 } else if (len(theirs) > len(tags)) { 289 // They are more specific 290 superceded = true; 291 break; 292 } else if (len(base) != 0) { 293 return (path, inputs[j].3): ambiguous; 294 }; 295 }; 296 if (!superceded) { 297 append(inputs, tuple); 298 }; 299 }; 300 301 for (let i = 0z; i < len(inputs); i += 1) { 302 let isdir = inputs[i].0, path = inputs[i].3; 303 if (isdir) { 304 let iter = fs::iter(ctx.fs, path)?; 305 defer fs::finish(iter); 306 scan_directory(ctx, ver, sha, path, iter)?; 307 } else { 308 let path = fs::resolve(ctx.fs, path); 309 let st = fs::stat(ctx.fs, path)?; 310 let ftype = type_for_ext(path) as filetype; 311 let in = input { 312 path = strings::dup(path), 313 stat = st, 314 ft = ftype, 315 hash = scan_file(ctx, path, ftype, &ver.depends)?, 316 basename = inputs[i].1, 317 tags = inputs[i].2, 318 ... 319 }; 320 append(ver.inputs, in); 321 hash::write(sha, in.hash); 322 }; 323 }; 324 }; 325 326 // Looks up a module by its identifier from HAREPATH, and returns a [[version]] 327 // which includes all eligible build inputs. 328 export fn lookup(ctx: *context, name: ast::ident) (version | error) = { 329 let ipath = identpath(name); 330 defer free(ipath); 331 for (let i = len(ctx.paths); i > 0; i -= 1) { 332 let cand = path::init(ctx.paths[i - 1], ipath)!; 333 match (scan(ctx, path::string(&cand))) { 334 case let v: version => 335 return v; 336 case error => void; 337 }; 338 }; 339 return notfound; 340 }; 341 342 fn type_for_ext(name: str) (filetype | void) = { 343 static let buf = path::buffer {...}; 344 path::set(&buf, name)!; 345 const ext = path::peek_ext(&buf) as str; 346 return 347 if (ext == "ha") filetype::HARE 348 else if (ext == "s") filetype::ASSEMBLY 349 else void; 350 }; 351 352 fn scan_file( 353 ctx: *context, 354 path: str, 355 ftype: filetype, 356 deps: *[]ast::ident, 357 ) ([]u8 | error) = { 358 let truef = fs::open(ctx.fs, path)?; 359 defer io::close(truef)!; 360 let rbuf: [os::BUFSIZ]u8 = [0...]; 361 let f = &bufio::init(truef, rbuf, []); 362 let sha = sha256::sha256(); 363 hash::write(&sha, strings::toutf8(path)); 364 hash::write(&sha, [ABI_VERSION]); 365 366 if (ftype == filetype::HARE) { 367 let tee = io::tee(f, &sha); 368 let lexer = lex::init(&tee, path); 369 defer lex::finish(&lexer); 370 let imports = match (parse::imports(&lexer)) { 371 case let im: []ast::import => 372 yield im; 373 case let err: parse::error => 374 return err; 375 }; 376 for (let i = 0z; i < len(imports); i += 1) { 377 if (!have_ident(deps, imports[i].ident)) { 378 append(deps, imports[i].ident); 379 }; 380 }; 381 // Finish spooling out the file for the SHA 382 match (io::copy(io::empty, &tee)) { 383 case size => void; 384 case let err: io::error => 385 return err; 386 }; 387 } else { 388 match (io::copy(&sha, f)) { 389 case size => void; 390 case let err: io::error => 391 return err; 392 }; 393 }; 394 395 let tmp: [sha256::SIZE]u8 = [0...]; 396 hash::sum(&sha, tmp); 397 398 let checksum: []u8 = alloc([], sha.sz); 399 append(checksum, tmp...); 400 return checksum; 401 }; 402 403 fn have_ident(sl: *[]ast::ident, id: ast::ident) bool = { 404 for (let i = 0z; i < len(sl); i += 1) { 405 if (ast::ident_eq(sl[i], id)) { 406 return true; 407 }; 408 }; 409 return false; 410 }; 411 412 // Parses a set of build tags, returning void if the string is an invalid tag 413 // set. The caller must free the return value with [[tags_free]]. 414 export fn parsetags(in: str) ([]tag | void) = { 415 let tags: []tag = []; 416 let iter = strings::iter(in); 417 for (true) { 418 let t = tag { ... }; 419 let m = match (strings::next(&iter)) { 420 case void => 421 break; 422 case let r: rune => 423 yield r; 424 }; 425 t.mode = switch (m) { 426 case => 427 tags_free(tags); 428 return; 429 case '+' => 430 yield tag_mode::INCLUSIVE; 431 case '-' => 432 yield tag_mode::EXCLUSIVE; 433 }; 434 let buf = memio::dynamic(); 435 for (true) match (strings::next(&iter)) { 436 case void => 437 break; 438 case let r: rune => 439 if (ascii::isalnum(r) || r == '_') { 440 memio::appendrune(&buf, r)!; 441 } else { 442 strings::prev(&iter); 443 break; 444 }; 445 }; 446 t.name = memio::string(&buf)!; 447 append(tags, t); 448 }; 449 return tags; 450 }; 451 452 // Frees a set of tags. 453 export fn tags_free(tags: []tag) void = { 454 for (let i = 0z; i < len(tags); i += 1) { 455 free(tags[i].name); 456 }; 457 free(tags); 458 }; 459 460 // Duplicates a set of tags. 461 export fn tags_dup(tags: []tag) []tag = { 462 let new: []tag = alloc([], len(tags)); 463 for (let i = 0z; i < len(tags); i += 1) { 464 append(new, tag { 465 name = strings::dup(tags[i].name), 466 mode = tags[i].mode, 467 }); 468 }; 469 return new; 470 }; 471 472 // Compares two tag sets and tells you if they are compatible. 473 export fn tagcompat(have: []tag, want: []tag) bool = { 474 // XXX: O(n²), lame 475 for (let i = 0z; i < len(want); i += 1) { 476 let present = false; 477 for (let j = 0z; j < len(have); j += 1) { 478 if (have[j].name == want[i].name) { 479 present = have[j].mode == tag_mode::INCLUSIVE; 480 break; 481 }; 482 }; 483 switch (want[i].mode) { 484 case tag_mode::INCLUSIVE => 485 if (!present) return false; 486 case tag_mode::EXCLUSIVE => 487 if (present) return false; 488 }; 489 }; 490 return true; 491 };