hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

scan.ha (11513B)


      1 // License: MPL-2.0
      2 // (c) 2021-2022 Alexey Yerin <yyp@disroot.org>
      3 // (c) 2022 Bor Grošelj Simić <bor.groseljsimic@telemach.net>
      4 // (c) 2021-2022 Drew DeVault <sir@cmpwn.com>
      5 // (c) 2021 Ember Sawady <ecs@d2evs.net>
      6 // (c) 2021 Kiëd Llaentenn <kiedtl@tilde.team>
      7 // (c) 2021 Thomas Bracht Laumann Jespersen <t@laumann.xyz>
      8 use ascii;
      9 use crypto::sha256;
     10 use fs;
     11 use hare::ast;
     12 use hare::lex;
     13 use hare::parse;
     14 use hash;
     15 use io;
     16 use path;
     17 use sort;
     18 use strings;
     19 use strio;
     20 
     21 def ABI_VERSION: u8 = 5;
     22 
     23 // Scans the files in a directory for eligible build inputs and returns a
     24 // [[version]] which includes all applicable files and their dependencies.
     25 export fn scan(ctx: *context, path: str) (version | error) = {
     26 	// TODO: Incorporate defines into the hash
     27 	let sha = sha256::sha256();
     28 	for (let i = 0z; i < len(ctx.tags); i += 1) {
     29 		const tag = &ctx.tags[i];
     30 		hash::write(&sha, if (tag.mode == tag_mode::INCLUSIVE) {
     31 			yield [1];
     32 		} else {
     33 			yield [0];
     34 		});
     35 		hash::write(&sha, strings::toutf8(tag.name));
     36 	};
     37 	let iter = match (fs::iter(ctx.fs, path)) {
     38 	case fs::wrongtype =>
     39 		// Single file case
     40 		let inputs: []input = [];
     41 		let deps: []ast::ident = [];
     42 		let ft = match (type_for_ext(path)) {
     43 		case void =>
     44 			return notfound;
     45 		case let ft: filetype =>
     46 			yield ft;
     47 		};
     48 		let path = fs::resolve(ctx.fs, path);
     49 		let st = fs::stat(ctx.fs, path)?;
     50 		let in = input {
     51 			path = strings::dup(path),
     52 			stat = st,
     53 			ft = ft,
     54 			hash = scan_file(ctx, path, ft, &deps)?,
     55 			...
     56 		};
     57 		append(inputs, in);
     58 
     59 		let sumbuf: [sha256::SIZE]u8 = [0...];
     60 		hash::write(&sha, in.hash);
     61 		hash::sum(&sha, sumbuf);
     62 
     63 		return version {
     64 			hash = sumbuf,
     65 			basedir = strings::dup(path::dirname(path)),
     66 			depends = deps,
     67 			inputs = inputs,
     68 			tags = tags_dup(ctx.tags),
     69 			...
     70 		};
     71 	case let err: fs::error =>
     72 		return err;
     73 	case let iter: *fs::iterator =>
     74 		yield iter;
     75 	};
     76 	defer fs::finish(iter);
     77 	let ver = version {
     78 		basedir = strings::dup(path),
     79 		tags = tags_dup(ctx.tags),
     80 		...
     81 	};
     82 	scan_directory(ctx, &ver, &sha, path, iter)?;
     83 
     84 	let readme = path::join(path, "README");
     85 	defer free(readme);
     86 	if (len(ver.inputs) == 0 && !fs::exists(ctx.fs, readme)) {
     87 		// TODO: HACK: README is a workaround for haredoc issues
     88 		return notfound;
     89 	};
     90 
     91 	let tmp: [sha256::SIZE]u8 = [0...];
     92 	hash::sum(&sha, tmp);
     93 	ver.hash = alloc([], sha.sz);
     94 	append(ver.hash, tmp...);
     95 	return ver;
     96 };
     97 
     98 // Given a file or directory name, parses it into the basename, extension, and
     99 // tag set.
    100 export fn parsename(name: str) (str, str, []tag) = {
    101 	let ext = path::extension(name);
    102 	let base = ext.0, ext = ext.1;
    103 
    104 	let p = strings::index(base, '+');
    105 	let m = strings::index(base, '-');
    106 	if (p is void && m is void) {
    107 		return (base, ext, []);
    108 	};
    109 	let i: size =
    110 		if (p is void && m is size) m: size
    111 		else if (m is void && p is size) p: size
    112 		else if (m: size < p: size) m: size
    113 		else p: size;
    114 	let tags = strings::sub(base, i, strings::end);
    115 	let tags = match (parsetags(tags)) {
    116 	case void =>
    117 		return (base, ext, []);
    118 	case let t: []tag =>
    119 		yield t;
    120 	};
    121 	let base = strings::sub(base, 0, i);
    122 	return (base, ext, tags);
    123 };
    124 
    125 fn scan_directory(
    126 	ctx: *context,
    127 	ver: *version,
    128 	sha: *hash::hash,
    129 	path: str,
    130 	iter: *fs::iterator,
    131 ) (void | error) = {
    132 	let files: []str = [], dirs: []str = [];
    133 	defer {
    134 		strings::freeall(files);
    135 		strings::freeall(dirs);
    136 	};
    137 
    138 	let pathbuf = path::init();
    139 	for (true) {
    140 		const ent = match (fs::next(iter)) {
    141 		case void =>
    142 			break;
    143 		case let ent: fs::dirent =>
    144 			yield ent;
    145 		};
    146 
    147 		switch (ent.ftype) {
    148 		case fs::mode::LINK =>
    149 			let linkpath = path::set(&pathbuf, path, ent.name)!;
    150 			linkpath = fs::readlink(ctx.fs, linkpath)?;
    151 			if (!path::abs(linkpath)) {
    152 				linkpath = path::set(&pathbuf, path, linkpath)!;
    153 			};
    154 
    155 			const st = fs::stat(ctx.fs, linkpath)?;
    156 			if (fs::isfile(st.mode)) {
    157 				append(files, strings::dup(ent.name));
    158 			} else if (fs::isdir(st.mode)) {
    159 				append(dirs, strings::dup(ent.name));
    160 			} else if (fs::islink(st.mode)) {
    161 				abort(); // TODO: Resolve recursively
    162 			};
    163 		case fs::mode::DIR =>
    164 			append(dirs, strings::dup(ent.name));
    165 		case fs::mode::REG =>
    166 			append(files, strings::dup(ent.name));
    167 		case => void;
    168 		};
    169 	};
    170 
    171 	// Sorted to keep the hash consistent
    172 	sort::strings(dirs);
    173 	sort::strings(files);
    174 
    175 	// Tuple of is_directory, basename, tags, and path to a candidate input.
    176 	let inputs: [](bool, str, []tag, str) = [];
    177 	defer for (let i = 0z; i < len(inputs); i += 1) {
    178 		// For file paths, these are assigned to the input, which
    179 		// assumes ownership over them.
    180 		if (inputs[i].0) {
    181 			free(inputs[i].1);
    182 			tags_free(inputs[i].2);
    183 			free(inputs[i].3);
    184 		};
    185 	};
    186 
    187 	// For a given basename, only the most specific path (i.e. with the most
    188 	// tags) is used.
    189 	//
    190 	// foo.ha
    191 	// foo+linux.ha
    192 	// foo+linux+x86_64/
    193 	// 	bar.ha
    194 	// 	baz.ha
    195 	//
    196 	// In this case, foo+linux+x86_64 is the most specific, and so its used
    197 	// as the build input and the other two files are discarded.
    198 
    199 	for (let i = 0z; i < len(dirs); i += 1) {
    200 		let name = dirs[i];
    201 		let parsed = parsename(name);
    202 		let base = parsed.0, tags = parsed.2;
    203 
    204 		if (!strings::hasprefix(name, "+")
    205 				&& !strings::hasprefix(name, "-")) {
    206 			if (!strings::hasprefix(name, ".")) {
    207 				append(ver.subdirs, strings::dup(name));
    208 			};
    209 			continue;
    210 		};
    211 		if (!tagcompat(ctx.tags, tags)) {
    212 			continue;
    213 		};
    214 
    215 		let path = path::join(path, name);
    216 		let tuple = (true, strings::dup(base), tags, path);
    217 		let superceded = false;
    218 		for (let j = 0z; j < len(inputs); j += 1) {
    219 			if (inputs[j].1 != base) {
    220 				continue;
    221 			};
    222 			let theirs = inputs[j].2;
    223 			if (len(theirs) < len(tags)) {
    224 				free(inputs[j].1);
    225 				tags_free(inputs[j].2);
    226 				free(inputs[j].3);
    227 				inputs[j] = tuple;
    228 				superceded = true;
    229 				break;
    230 			} else if (len(theirs) > len(tags)) {
    231 				// They are more specific
    232 				superceded = true;
    233 				break;
    234 			} else if (len(base) != 0) {
    235 				return (path, inputs[j].3): ambiguous;
    236 			};
    237 		};
    238 		if (!superceded) {
    239 			append(inputs, tuple);
    240 		};
    241 	};
    242 
    243 	for (let i = 0z; i < len(files); i += 1) {
    244 		let name = files[i];
    245 		let parsed = parsename(name);
    246 		let base = parsed.0, ext = parsed.1, tags = parsed.2;
    247 
    248 		let eligible = false;
    249 		static const exts = [".ha", ".s"];
    250 		for (let i = 0z; i < len(exts); i += 1) {
    251 			if (exts[i] == ext) {
    252 				eligible = true;
    253 				break;
    254 			};
    255 		};
    256 		if (!eligible || !tagcompat(ctx.tags, tags)) {
    257 			tags_free(tags);
    258 			continue;
    259 		};
    260 
    261 		let path = path::join(path, name);
    262 		let tuple = (false, strings::dup(base), tags, path);
    263 		let superceded = false;
    264 		for (let j = 0z; j < len(inputs); j += 1) {
    265 			if (inputs[j].1 != base) {
    266 				continue;
    267 			};
    268 			let theirs = inputs[j].2;
    269 			if (len(theirs) < len(tags)) {
    270 				// We are more specific
    271 				free(inputs[j].1);
    272 				tags_free(inputs[j].2);
    273 				free(inputs[j].3);
    274 				inputs[j] = tuple;
    275 				superceded = true;
    276 				break;
    277 			} else if (len(theirs) > len(tags)) {
    278 				// They are more specific
    279 				superceded = true;
    280 				break;
    281 			} else if (len(base) != 0) {
    282 				return (path, inputs[j].3): ambiguous;
    283 			};
    284 		};
    285 		if (!superceded) {
    286 			append(inputs, tuple);
    287 		};
    288 	};
    289 
    290 	for (let i = 0z; i < len(inputs); i += 1) {
    291 		let isdir = inputs[i].0, path = inputs[i].3;
    292 		if (isdir) {
    293 			let iter = fs::iter(ctx.fs, path)?;
    294 			defer fs::finish(iter);
    295 			scan_directory(ctx, ver, sha, path, iter)?;
    296 		} else {
    297 			let path = fs::resolve(ctx.fs, path);
    298 			let st = fs::stat(ctx.fs, path)?;
    299 			let ftype = type_for_ext(path) as filetype;
    300 			let in = input {
    301 				path = strings::dup(path),
    302 				stat = st,
    303 				ft = ftype,
    304 				hash = scan_file(ctx, path, ftype, &ver.depends)?,
    305 				basename = inputs[i].1,
    306 				tags = inputs[i].2,
    307 				...
    308 			};
    309 			append(ver.inputs, in);
    310 			hash::write(sha, in.hash);
    311 		};
    312 	};
    313 };
    314 
    315 // Looks up a module by its identifier from HAREPATH, and returns a [[version]]
    316 // which includes all eligible build inputs.
    317 export fn lookup(ctx: *context, name: ast::ident) (version | error) = {
    318 	let ipath = identpath(name);
    319 	defer free(ipath);
    320 	for (let i = len(ctx.paths); i > 0; i -= 1) {
    321 		let cand = path::join(ctx.paths[i - 1], ipath);
    322 		defer free(cand);
    323 		match (scan(ctx, cand)) {
    324 		case let v: version =>
    325 			return v;
    326 		case error => void;
    327 		};
    328 	};
    329 	return notfound;
    330 };
    331 
    332 fn type_for_ext(name: str) (filetype | void) = {
    333 	const ext = path::extension(name).1;
    334 	return
    335 		if (ext == ".ha") filetype::HARE
    336 		else if (ext == ".s") filetype::ASSEMBLY
    337 		else void;
    338 };
    339 
    340 fn scan_file(
    341 	ctx: *context,
    342 	path: str,
    343 	ftype: filetype,
    344 	deps: *[]ast::ident,
    345 ) ([]u8 | error) = {
    346 	let f = fs::open(ctx.fs, path)?;
    347 	let sha = sha256::sha256();
    348 	hash::write(&sha, strings::toutf8(path));
    349 	hash::write(&sha, [ABI_VERSION]);
    350 
    351 	if (ftype == filetype::HARE) {
    352 		let tee = io::tee(f, &sha);
    353 		let lexer = lex::init(&tee, path);
    354 		let imports = match (parse::imports(&lexer)) {
    355 		case let im: []ast::import =>
    356 			yield im;
    357 		case let err: parse::error =>
    358 			io::close(f): void;
    359 			return err;
    360 		};
    361 		for (let i = 0z; i < len(imports); i += 1) {
    362 			if (!have_ident(deps, imports[i].ident)) {
    363 				append(deps, imports[i].ident);
    364 			};
    365 		};
    366 		// Finish spooling out the file for the SHA
    367 		match (io::copy(io::empty, &tee)) {
    368 		case size => void;
    369 		case let err: io::error =>
    370 			io::close(f): void;
    371 			return err;
    372 		};
    373 	} else {
    374 		match (io::copy(&sha, f)) {
    375 		case size => void;
    376 		case let err: io::error =>
    377 			io::close(f): void;
    378 			return err;
    379 		};
    380 	};
    381 	io::close(f)?;
    382 
    383 	let tmp: [sha256::SIZE]u8 = [0...];
    384 	hash::sum(&sha, tmp);
    385 
    386 	let checksum: []u8 = alloc([], sha.sz);
    387 	append(checksum, tmp...);
    388 	return checksum;
    389 };
    390 
    391 fn have_ident(sl: *[]ast::ident, id: ast::ident) bool = {
    392 	for (let i = 0z; i < len(sl); i += 1) {
    393 		if (ast::ident_eq(sl[i], id)) {
    394 			return true;
    395 		};
    396 	};
    397 	return false;
    398 };
    399 
    400 // Parses a set of build tags, returning void if the string is an invalid tag
    401 // set. The caller must free the return value with [[tags_free]].
    402 export fn parsetags(in: str) ([]tag | void) = {
    403 	let tags: []tag = [];
    404 	let iter = strings::iter(in);
    405 	for (true) {
    406 		let t = tag { ... };
    407 		let m = match (strings::next(&iter)) {
    408 		case void =>
    409 			break;
    410 		case let r: rune =>
    411 			yield r;
    412 		};
    413 		t.mode = switch (m) {
    414 		case =>
    415 			tags_free(tags);
    416 			return;
    417 		case '+' =>
    418 			yield tag_mode::INCLUSIVE;
    419 		case '-' =>
    420 			yield tag_mode::EXCLUSIVE;
    421 		};
    422 		let buf = strio::dynamic();
    423 		for (true) match (strings::next(&iter)) {
    424 		case void =>
    425 			break;
    426 		case let r: rune =>
    427 			if (ascii::isalnum(r) || r == '_') {
    428 				strio::appendrune(&buf, r)!;
    429 			} else {
    430 				strings::prev(&iter);
    431 				break;
    432 			};
    433 		};
    434 		t.name = strio::string(&buf);
    435 		append(tags, t);
    436 	};
    437 	return tags;
    438 };
    439 
    440 // Frees a set of tags.
    441 export fn tags_free(tags: []tag) void = {
    442 	for (let i = 0z; i < len(tags); i += 1) {
    443 		free(tags[i].name);
    444 	};
    445 	free(tags);
    446 };
    447 
    448 // Duplicates a set of tags.
    449 export fn tags_dup(tags: []tag) []tag = {
    450 	let new: []tag = alloc([], len(tags));
    451 	for (let i = 0z; i < len(tags); i += 1) {
    452 		append(new, tag {
    453 			name = strings::dup(tags[i].name),
    454 			mode = tags[i].mode,
    455 		});
    456 	};
    457 	return new;
    458 };
    459 
    460 // Compares two tag sets and tells you if they are compatible.
    461 export fn tagcompat(have: []tag, want: []tag) bool = {
    462 	// XXX: O(n²), lame
    463 	for (let i = 0z; i < len(want); i += 1) {
    464 		let present = false;
    465 		for (let j = 0z; j < len(have); j += 1) {
    466 			if (have[j].name == want[i].name) {
    467 				present = have[j].mode == tag_mode::INCLUSIVE;
    468 				break;
    469 			};
    470 		};
    471 		switch (want[i].mode) {
    472 		case tag_mode::INCLUSIVE =>
    473 			if (!present) return false;
    474 		case tag_mode::EXCLUSIVE =>
    475 			if (present) return false;
    476 		};
    477 	};
    478 	return true;
    479 };