scan.ha (11513B)
1 // License: MPL-2.0 2 // (c) 2021-2022 Alexey Yerin <yyp@disroot.org> 3 // (c) 2022 Bor Grošelj Simić <bor.groseljsimic@telemach.net> 4 // (c) 2021-2022 Drew DeVault <sir@cmpwn.com> 5 // (c) 2021 Ember Sawady <ecs@d2evs.net> 6 // (c) 2021 Kiëd Llaentenn <kiedtl@tilde.team> 7 // (c) 2021 Thomas Bracht Laumann Jespersen <t@laumann.xyz> 8 use ascii; 9 use crypto::sha256; 10 use fs; 11 use hare::ast; 12 use hare::lex; 13 use hare::parse; 14 use hash; 15 use io; 16 use path; 17 use sort; 18 use strings; 19 use strio; 20 21 def ABI_VERSION: u8 = 5; 22 23 // Scans the files in a directory for eligible build inputs and returns a 24 // [[version]] which includes all applicable files and their dependencies. 25 export fn scan(ctx: *context, path: str) (version | error) = { 26 // TODO: Incorporate defines into the hash 27 let sha = sha256::sha256(); 28 for (let i = 0z; i < len(ctx.tags); i += 1) { 29 const tag = &ctx.tags[i]; 30 hash::write(&sha, if (tag.mode == tag_mode::INCLUSIVE) { 31 yield [1]; 32 } else { 33 yield [0]; 34 }); 35 hash::write(&sha, strings::toutf8(tag.name)); 36 }; 37 let iter = match (fs::iter(ctx.fs, path)) { 38 case fs::wrongtype => 39 // Single file case 40 let inputs: []input = []; 41 let deps: []ast::ident = []; 42 let ft = match (type_for_ext(path)) { 43 case void => 44 return notfound; 45 case let ft: filetype => 46 yield ft; 47 }; 48 let path = fs::resolve(ctx.fs, path); 49 let st = fs::stat(ctx.fs, path)?; 50 let in = input { 51 path = strings::dup(path), 52 stat = st, 53 ft = ft, 54 hash = scan_file(ctx, path, ft, &deps)?, 55 ... 56 }; 57 append(inputs, in); 58 59 let sumbuf: [sha256::SIZE]u8 = [0...]; 60 hash::write(&sha, in.hash); 61 hash::sum(&sha, sumbuf); 62 63 return version { 64 hash = sumbuf, 65 basedir = strings::dup(path::dirname(path)), 66 depends = deps, 67 inputs = inputs, 68 tags = tags_dup(ctx.tags), 69 ... 70 }; 71 case let err: fs::error => 72 return err; 73 case let iter: *fs::iterator => 74 yield iter; 75 }; 76 defer fs::finish(iter); 77 let ver = version { 78 basedir = strings::dup(path), 79 tags = tags_dup(ctx.tags), 80 ... 81 }; 82 scan_directory(ctx, &ver, &sha, path, iter)?; 83 84 let readme = path::join(path, "README"); 85 defer free(readme); 86 if (len(ver.inputs) == 0 && !fs::exists(ctx.fs, readme)) { 87 // TODO: HACK: README is a workaround for haredoc issues 88 return notfound; 89 }; 90 91 let tmp: [sha256::SIZE]u8 = [0...]; 92 hash::sum(&sha, tmp); 93 ver.hash = alloc([], sha.sz); 94 append(ver.hash, tmp...); 95 return ver; 96 }; 97 98 // Given a file or directory name, parses it into the basename, extension, and 99 // tag set. 100 export fn parsename(name: str) (str, str, []tag) = { 101 let ext = path::extension(name); 102 let base = ext.0, ext = ext.1; 103 104 let p = strings::index(base, '+'); 105 let m = strings::index(base, '-'); 106 if (p is void && m is void) { 107 return (base, ext, []); 108 }; 109 let i: size = 110 if (p is void && m is size) m: size 111 else if (m is void && p is size) p: size 112 else if (m: size < p: size) m: size 113 else p: size; 114 let tags = strings::sub(base, i, strings::end); 115 let tags = match (parsetags(tags)) { 116 case void => 117 return (base, ext, []); 118 case let t: []tag => 119 yield t; 120 }; 121 let base = strings::sub(base, 0, i); 122 return (base, ext, tags); 123 }; 124 125 fn scan_directory( 126 ctx: *context, 127 ver: *version, 128 sha: *hash::hash, 129 path: str, 130 iter: *fs::iterator, 131 ) (void | error) = { 132 let files: []str = [], dirs: []str = []; 133 defer { 134 strings::freeall(files); 135 strings::freeall(dirs); 136 }; 137 138 let pathbuf = path::init(); 139 for (true) { 140 const ent = match (fs::next(iter)) { 141 case void => 142 break; 143 case let ent: fs::dirent => 144 yield ent; 145 }; 146 147 switch (ent.ftype) { 148 case fs::mode::LINK => 149 let linkpath = path::set(&pathbuf, path, ent.name)!; 150 linkpath = fs::readlink(ctx.fs, linkpath)?; 151 if (!path::abs(linkpath)) { 152 linkpath = path::set(&pathbuf, path, linkpath)!; 153 }; 154 155 const st = fs::stat(ctx.fs, linkpath)?; 156 if (fs::isfile(st.mode)) { 157 append(files, strings::dup(ent.name)); 158 } else if (fs::isdir(st.mode)) { 159 append(dirs, strings::dup(ent.name)); 160 } else if (fs::islink(st.mode)) { 161 abort(); // TODO: Resolve recursively 162 }; 163 case fs::mode::DIR => 164 append(dirs, strings::dup(ent.name)); 165 case fs::mode::REG => 166 append(files, strings::dup(ent.name)); 167 case => void; 168 }; 169 }; 170 171 // Sorted to keep the hash consistent 172 sort::strings(dirs); 173 sort::strings(files); 174 175 // Tuple of is_directory, basename, tags, and path to a candidate input. 176 let inputs: [](bool, str, []tag, str) = []; 177 defer for (let i = 0z; i < len(inputs); i += 1) { 178 // For file paths, these are assigned to the input, which 179 // assumes ownership over them. 180 if (inputs[i].0) { 181 free(inputs[i].1); 182 tags_free(inputs[i].2); 183 free(inputs[i].3); 184 }; 185 }; 186 187 // For a given basename, only the most specific path (i.e. with the most 188 // tags) is used. 189 // 190 // foo.ha 191 // foo+linux.ha 192 // foo+linux+x86_64/ 193 // bar.ha 194 // baz.ha 195 // 196 // In this case, foo+linux+x86_64 is the most specific, and so its used 197 // as the build input and the other two files are discarded. 198 199 for (let i = 0z; i < len(dirs); i += 1) { 200 let name = dirs[i]; 201 let parsed = parsename(name); 202 let base = parsed.0, tags = parsed.2; 203 204 if (!strings::hasprefix(name, "+") 205 && !strings::hasprefix(name, "-")) { 206 if (!strings::hasprefix(name, ".")) { 207 append(ver.subdirs, strings::dup(name)); 208 }; 209 continue; 210 }; 211 if (!tagcompat(ctx.tags, tags)) { 212 continue; 213 }; 214 215 let path = path::join(path, name); 216 let tuple = (true, strings::dup(base), tags, path); 217 let superceded = false; 218 for (let j = 0z; j < len(inputs); j += 1) { 219 if (inputs[j].1 != base) { 220 continue; 221 }; 222 let theirs = inputs[j].2; 223 if (len(theirs) < len(tags)) { 224 free(inputs[j].1); 225 tags_free(inputs[j].2); 226 free(inputs[j].3); 227 inputs[j] = tuple; 228 superceded = true; 229 break; 230 } else if (len(theirs) > len(tags)) { 231 // They are more specific 232 superceded = true; 233 break; 234 } else if (len(base) != 0) { 235 return (path, inputs[j].3): ambiguous; 236 }; 237 }; 238 if (!superceded) { 239 append(inputs, tuple); 240 }; 241 }; 242 243 for (let i = 0z; i < len(files); i += 1) { 244 let name = files[i]; 245 let parsed = parsename(name); 246 let base = parsed.0, ext = parsed.1, tags = parsed.2; 247 248 let eligible = false; 249 static const exts = [".ha", ".s"]; 250 for (let i = 0z; i < len(exts); i += 1) { 251 if (exts[i] == ext) { 252 eligible = true; 253 break; 254 }; 255 }; 256 if (!eligible || !tagcompat(ctx.tags, tags)) { 257 tags_free(tags); 258 continue; 259 }; 260 261 let path = path::join(path, name); 262 let tuple = (false, strings::dup(base), tags, path); 263 let superceded = false; 264 for (let j = 0z; j < len(inputs); j += 1) { 265 if (inputs[j].1 != base) { 266 continue; 267 }; 268 let theirs = inputs[j].2; 269 if (len(theirs) < len(tags)) { 270 // We are more specific 271 free(inputs[j].1); 272 tags_free(inputs[j].2); 273 free(inputs[j].3); 274 inputs[j] = tuple; 275 superceded = true; 276 break; 277 } else if (len(theirs) > len(tags)) { 278 // They are more specific 279 superceded = true; 280 break; 281 } else if (len(base) != 0) { 282 return (path, inputs[j].3): ambiguous; 283 }; 284 }; 285 if (!superceded) { 286 append(inputs, tuple); 287 }; 288 }; 289 290 for (let i = 0z; i < len(inputs); i += 1) { 291 let isdir = inputs[i].0, path = inputs[i].3; 292 if (isdir) { 293 let iter = fs::iter(ctx.fs, path)?; 294 defer fs::finish(iter); 295 scan_directory(ctx, ver, sha, path, iter)?; 296 } else { 297 let path = fs::resolve(ctx.fs, path); 298 let st = fs::stat(ctx.fs, path)?; 299 let ftype = type_for_ext(path) as filetype; 300 let in = input { 301 path = strings::dup(path), 302 stat = st, 303 ft = ftype, 304 hash = scan_file(ctx, path, ftype, &ver.depends)?, 305 basename = inputs[i].1, 306 tags = inputs[i].2, 307 ... 308 }; 309 append(ver.inputs, in); 310 hash::write(sha, in.hash); 311 }; 312 }; 313 }; 314 315 // Looks up a module by its identifier from HAREPATH, and returns a [[version]] 316 // which includes all eligible build inputs. 317 export fn lookup(ctx: *context, name: ast::ident) (version | error) = { 318 let ipath = identpath(name); 319 defer free(ipath); 320 for (let i = len(ctx.paths); i > 0; i -= 1) { 321 let cand = path::join(ctx.paths[i - 1], ipath); 322 defer free(cand); 323 match (scan(ctx, cand)) { 324 case let v: version => 325 return v; 326 case error => void; 327 }; 328 }; 329 return notfound; 330 }; 331 332 fn type_for_ext(name: str) (filetype | void) = { 333 const ext = path::extension(name).1; 334 return 335 if (ext == ".ha") filetype::HARE 336 else if (ext == ".s") filetype::ASSEMBLY 337 else void; 338 }; 339 340 fn scan_file( 341 ctx: *context, 342 path: str, 343 ftype: filetype, 344 deps: *[]ast::ident, 345 ) ([]u8 | error) = { 346 let f = fs::open(ctx.fs, path)?; 347 let sha = sha256::sha256(); 348 hash::write(&sha, strings::toutf8(path)); 349 hash::write(&sha, [ABI_VERSION]); 350 351 if (ftype == filetype::HARE) { 352 let tee = io::tee(f, &sha); 353 let lexer = lex::init(&tee, path); 354 let imports = match (parse::imports(&lexer)) { 355 case let im: []ast::import => 356 yield im; 357 case let err: parse::error => 358 io::close(f): void; 359 return err; 360 }; 361 for (let i = 0z; i < len(imports); i += 1) { 362 if (!have_ident(deps, imports[i].ident)) { 363 append(deps, imports[i].ident); 364 }; 365 }; 366 // Finish spooling out the file for the SHA 367 match (io::copy(io::empty, &tee)) { 368 case size => void; 369 case let err: io::error => 370 io::close(f): void; 371 return err; 372 }; 373 } else { 374 match (io::copy(&sha, f)) { 375 case size => void; 376 case let err: io::error => 377 io::close(f): void; 378 return err; 379 }; 380 }; 381 io::close(f)?; 382 383 let tmp: [sha256::SIZE]u8 = [0...]; 384 hash::sum(&sha, tmp); 385 386 let checksum: []u8 = alloc([], sha.sz); 387 append(checksum, tmp...); 388 return checksum; 389 }; 390 391 fn have_ident(sl: *[]ast::ident, id: ast::ident) bool = { 392 for (let i = 0z; i < len(sl); i += 1) { 393 if (ast::ident_eq(sl[i], id)) { 394 return true; 395 }; 396 }; 397 return false; 398 }; 399 400 // Parses a set of build tags, returning void if the string is an invalid tag 401 // set. The caller must free the return value with [[tags_free]]. 402 export fn parsetags(in: str) ([]tag | void) = { 403 let tags: []tag = []; 404 let iter = strings::iter(in); 405 for (true) { 406 let t = tag { ... }; 407 let m = match (strings::next(&iter)) { 408 case void => 409 break; 410 case let r: rune => 411 yield r; 412 }; 413 t.mode = switch (m) { 414 case => 415 tags_free(tags); 416 return; 417 case '+' => 418 yield tag_mode::INCLUSIVE; 419 case '-' => 420 yield tag_mode::EXCLUSIVE; 421 }; 422 let buf = strio::dynamic(); 423 for (true) match (strings::next(&iter)) { 424 case void => 425 break; 426 case let r: rune => 427 if (ascii::isalnum(r) || r == '_') { 428 strio::appendrune(&buf, r)!; 429 } else { 430 strings::prev(&iter); 431 break; 432 }; 433 }; 434 t.name = strio::string(&buf); 435 append(tags, t); 436 }; 437 return tags; 438 }; 439 440 // Frees a set of tags. 441 export fn tags_free(tags: []tag) void = { 442 for (let i = 0z; i < len(tags); i += 1) { 443 free(tags[i].name); 444 }; 445 free(tags); 446 }; 447 448 // Duplicates a set of tags. 449 export fn tags_dup(tags: []tag) []tag = { 450 let new: []tag = alloc([], len(tags)); 451 for (let i = 0z; i < len(tags); i += 1) { 452 append(new, tag { 453 name = strings::dup(tags[i].name), 454 mode = tags[i].mode, 455 }); 456 }; 457 return new; 458 }; 459 460 // Compares two tag sets and tells you if they are compatible. 461 export fn tagcompat(have: []tag, want: []tag) bool = { 462 // XXX: O(n²), lame 463 for (let i = 0z; i < len(want); i += 1) { 464 let present = false; 465 for (let j = 0z; j < len(have); j += 1) { 466 if (have[j].name == want[i].name) { 467 present = have[j].mode == tag_mode::INCLUSIVE; 468 break; 469 }; 470 }; 471 switch (want[i].mode) { 472 case tag_mode::INCLUSIVE => 473 if (!present) return false; 474 case tag_mode::EXCLUSIVE => 475 if (present) return false; 476 }; 477 }; 478 return true; 479 };