hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

scan.ha (11949B)


      1 // License: MPL-2.0
      2 // (c) 2021-2022 Alexey Yerin <yyp@disroot.org>
      3 // (c) 2022 Bor Grošelj Simić <bor.groseljsimic@telemach.net>
      4 // (c) 2021-2022 Drew DeVault <sir@cmpwn.com>
      5 // (c) 2021 Ember Sawady <ecs@d2evs.net>
      6 // (c) 2021 Kiëd Llaentenn <kiedtl@tilde.team>
      7 // (c) 2021 Thomas Bracht Laumann Jespersen <t@laumann.xyz>
      8 use ascii;
      9 use crypto::sha256;
     10 use fs;
     11 use hare::ast;
     12 use hare::lex;
     13 use hare::parse;
     14 use hash;
     15 use io;
     16 use memio;
     17 use path;
     18 use sort;
     19 use strings;
     20 use bufio;
     21 use os;
     22 
     23 def ABI_VERSION: u8 = 6;
     24 
     25 // Scans the files in a directory for eligible build inputs and returns a
     26 // [[version]] which includes all applicable files and their dependencies.
     27 export fn scan(ctx: *context, path: str) (version | error) = {
     28 	// TODO: Incorporate defines into the hash
     29 	let sha = sha256::sha256();
     30 	for (let i = 0z; i < len(ctx.tags); i += 1) {
     31 		const tag = &ctx.tags[i];
     32 		hash::write(&sha, if (tag.mode == tag_mode::INCLUSIVE) {
     33 			yield [1];
     34 		} else {
     35 			yield [0];
     36 		});
     37 		hash::write(&sha, strings::toutf8(tag.name));
     38 	};
     39 	let iter = match (fs::iter(ctx.fs, path)) {
     40 	case fs::wrongtype =>
     41 		// Single file case
     42 		let inputs: []input = [];
     43 		let deps: []ast::ident = [];
     44 		let ft = match (type_for_ext(path)) {
     45 		case void =>
     46 			return notfound;
     47 		case let ft: filetype =>
     48 			yield ft;
     49 		};
     50 		let path = fs::resolve(ctx.fs, path);
     51 		let st = fs::stat(ctx.fs, path)?;
     52 		let in = input {
     53 			path = strings::dup(path),
     54 			stat = st,
     55 			ft = ft,
     56 			hash = scan_file(ctx, path, ft, &deps)?,
     57 			...
     58 		};
     59 		append(inputs, in);
     60 
     61 		let sumbuf: [sha256::SIZE]u8 = [0...];
     62 		hash::write(&sha, in.hash);
     63 		hash::sum(&sha, sumbuf);
     64 
     65 		return version {
     66 			hash = sumbuf,
     67 			basedir = strings::dup(path::dirname(path)),
     68 			depends = deps,
     69 			inputs = inputs,
     70 			tags = tags_dup(ctx.tags),
     71 			...
     72 		};
     73 	case let err: fs::error =>
     74 		return err;
     75 	case let iter: *fs::iterator =>
     76 		yield iter;
     77 	};
     78 	defer fs::finish(iter);
     79 	let ver = version {
     80 		basedir = strings::dup(path),
     81 		tags = tags_dup(ctx.tags),
     82 		...
     83 	};
     84 	scan_directory(ctx, &ver, &sha, path, iter)?;
     85 
     86 	let buf = path::init(path, "README")!;
     87 	if (len(ver.inputs) == 0 && !fs::exists(ctx.fs, path::string(&buf))) {
     88 		// TODO: HACK: README is a workaround for haredoc issues
     89 		return notfound;
     90 	};
     91 
     92 	let tmp: [sha256::SIZE]u8 = [0...];
     93 	hash::sum(&sha, tmp);
     94 	ver.hash = alloc([], sha.sz);
     95 	append(ver.hash, tmp...);
     96 	return ver;
     97 };
     98 
     99 // Given a file or directory name, parses it into the basename, extension, and
    100 // tag set.
    101 export fn parsename(name: str) (str, str, []tag) = {
    102 	static let buf = path::buffer {...};
    103 	path::set(&buf, name)!;
    104 	let ext = match (path::pop_ext(&buf)) {
    105 	case void => yield "";
    106 	case let s: str => yield strings::dup(s);
    107 	};
    108 	let base = match (path::peek(&buf)) {
    109 	case void => yield "";
    110 	case let s: str => yield strings::dup(s);
    111 	};
    112 
    113 	let p = strings::index(base, '+');
    114 	let m = strings::index(base, '-');
    115 	if (p is void && m is void) {
    116 		return (base, ext, []);
    117 	};
    118 	let i: size =
    119 		if (p is void && m is size) m: size
    120 		else if (m is void && p is size) p: size
    121 		else if (m: size < p: size) m: size
    122 		else p: size;
    123 	let tags = strings::sub(base, i, strings::end);
    124 	let tags = match (parsetags(tags)) {
    125 	case void =>
    126 		return (base, ext, []);
    127 	case let t: []tag =>
    128 		yield t;
    129 	};
    130 	let base = strings::sub(base, 0, i);
    131 	return (base, ext, tags);
    132 };
    133 
    134 fn scan_directory(
    135 	ctx: *context,
    136 	ver: *version,
    137 	sha: *hash::hash,
    138 	path: str,
    139 	iter: *fs::iterator,
    140 ) (void | error) = {
    141 	let files: []str = [], dirs: []str = [];
    142 	defer {
    143 		strings::freeall(files);
    144 		strings::freeall(dirs);
    145 	};
    146 
    147 	let pathbuf = path::init()!;
    148 	for (true) {
    149 		const ent = match (fs::next(iter)) {
    150 		case void =>
    151 			break;
    152 		case let ent: fs::dirent =>
    153 			yield ent;
    154 		};
    155 
    156 		switch (ent.ftype) {
    157 		case fs::mode::LINK =>
    158 			let linkpath = path::set(&pathbuf, path, ent.name)!;
    159 			linkpath = fs::readlink(ctx.fs, linkpath)?;
    160 			if (!path::abs(linkpath)) {
    161 				linkpath = path::set(&pathbuf, path, linkpath)!;
    162 			};
    163 
    164 			const st = fs::stat(ctx.fs, linkpath)?;
    165 			if (fs::isfile(st.mode)) {
    166 				append(files, strings::dup(ent.name));
    167 			} else if (fs::isdir(st.mode)) {
    168 				append(dirs, strings::dup(ent.name));
    169 			} else if (fs::islink(st.mode)) {
    170 				abort(); // TODO: Resolve recursively
    171 			};
    172 		case fs::mode::DIR =>
    173 			append(dirs, strings::dup(ent.name));
    174 		case fs::mode::REG =>
    175 			append(files, strings::dup(ent.name));
    176 		case => void;
    177 		};
    178 	};
    179 
    180 	// Sorted to keep the hash consistent
    181 	sort::strings(dirs);
    182 	sort::strings(files);
    183 
    184 	// Tuple of is_directory, basename, tags, and path to a candidate input.
    185 	let inputs: [](bool, str, []tag, str) = [];
    186 	defer for (let i = 0z; i < len(inputs); i += 1) {
    187 		// For file paths, these are assigned to the input, which
    188 		// assumes ownership over them.
    189 		if (inputs[i].0) {
    190 			free(inputs[i].1);
    191 			tags_free(inputs[i].2);
    192 			free(inputs[i].3);
    193 		};
    194 	};
    195 
    196 	// For a given basename, only the most specific path (i.e. with the most
    197 	// tags) is used.
    198 	//
    199 	// foo.ha
    200 	// foo+linux.ha
    201 	// foo+linux+x86_64/
    202 	// 	bar.ha
    203 	// 	baz.ha
    204 	//
    205 	// In this case, foo+linux+x86_64 is the most specific, and so its used
    206 	// as the build input and the other two files are discarded.
    207 
    208 	for (let i = 0z; i < len(dirs); i += 1) {
    209 		let name = dirs[i];
    210 		let parsed = parsename(name);
    211 		let base = parsed.0, tags = parsed.2;
    212 
    213 		if (!strings::hasprefix(name, "+")
    214 				&& !strings::hasprefix(name, "-")) {
    215 			if (!strings::hasprefix(name, ".")) {
    216 				append(ver.subdirs, strings::dup(name));
    217 			};
    218 			continue;
    219 		};
    220 		if (!tagcompat(ctx.tags, tags)) {
    221 			continue;
    222 		};
    223 
    224 		const buf = path::init(path, name)!;
    225 		let path = strings::dup(path::string(&buf));
    226 		let tuple = (true, strings::dup(base), tags, path);
    227 		let superceded = false;
    228 		for (let j = 0z; j < len(inputs); j += 1) {
    229 			if (inputs[j].1 != base) {
    230 				continue;
    231 			};
    232 			let theirs = inputs[j].2;
    233 			if (len(theirs) < len(tags)) {
    234 				free(inputs[j].1);
    235 				tags_free(inputs[j].2);
    236 				free(inputs[j].3);
    237 				inputs[j] = tuple;
    238 				superceded = true;
    239 				break;
    240 			} else if (len(theirs) > len(tags)) {
    241 				// They are more specific
    242 				superceded = true;
    243 				break;
    244 			} else if (len(base) != 0) {
    245 				return (path, inputs[j].3): ambiguous;
    246 			};
    247 		};
    248 		if (!superceded) {
    249 			append(inputs, tuple);
    250 		};
    251 	};
    252 
    253 	for (let i = 0z; i < len(files); i += 1) {
    254 		let name = files[i];
    255 		let parsed = parsename(name);
    256 		let base = parsed.0, ext = parsed.1, tags = parsed.2;
    257 
    258 		let eligible = false;
    259 		static const exts = ["ha", "s"];
    260 		for (let i = 0z; i < len(exts); i += 1) {
    261 			if (exts[i] == ext) {
    262 				eligible = true;
    263 				break;
    264 			};
    265 		};
    266 		if (!eligible || !tagcompat(ctx.tags, tags)) {
    267 			tags_free(tags);
    268 			continue;
    269 		};
    270 
    271 		const buf = path::init(path, name)!;
    272 		let path = strings::dup(path::string(&buf));
    273 		let tuple = (false, strings::dup(base), tags, path);
    274 		let superceded = false;
    275 		for (let j = 0z; j < len(inputs); j += 1) {
    276 			if (inputs[j].1 != base) {
    277 				continue;
    278 			};
    279 			let theirs = inputs[j].2;
    280 			if (len(theirs) < len(tags)) {
    281 				// We are more specific
    282 				free(inputs[j].1);
    283 				tags_free(inputs[j].2);
    284 				free(inputs[j].3);
    285 				inputs[j] = tuple;
    286 				superceded = true;
    287 				break;
    288 			} else if (len(theirs) > len(tags)) {
    289 				// They are more specific
    290 				superceded = true;
    291 				break;
    292 			} else if (len(base) != 0) {
    293 				return (path, inputs[j].3): ambiguous;
    294 			};
    295 		};
    296 		if (!superceded) {
    297 			append(inputs, tuple);
    298 		};
    299 	};
    300 
    301 	for (let i = 0z; i < len(inputs); i += 1) {
    302 		let isdir = inputs[i].0, path = inputs[i].3;
    303 		if (isdir) {
    304 			let iter = fs::iter(ctx.fs, path)?;
    305 			defer fs::finish(iter);
    306 			scan_directory(ctx, ver, sha, path, iter)?;
    307 		} else {
    308 			let path = fs::resolve(ctx.fs, path);
    309 			let st = fs::stat(ctx.fs, path)?;
    310 			let ftype = type_for_ext(path) as filetype;
    311 			let in = input {
    312 				path = strings::dup(path),
    313 				stat = st,
    314 				ft = ftype,
    315 				hash = scan_file(ctx, path, ftype, &ver.depends)?,
    316 				basename = inputs[i].1,
    317 				tags = inputs[i].2,
    318 				...
    319 			};
    320 			append(ver.inputs, in);
    321 			hash::write(sha, in.hash);
    322 		};
    323 	};
    324 };
    325 
    326 // Looks up a module by its identifier from HAREPATH, and returns a [[version]]
    327 // which includes all eligible build inputs.
    328 export fn lookup(ctx: *context, name: ast::ident) (version | error) = {
    329 	let ipath = identpath(name);
    330 	defer free(ipath);
    331 	for (let i = len(ctx.paths); i > 0; i -= 1) {
    332 		let cand = path::init(ctx.paths[i - 1], ipath)!;
    333 		match (scan(ctx, path::string(&cand))) {
    334 		case let v: version =>
    335 			return v;
    336 		case error => void;
    337 		};
    338 	};
    339 	return notfound;
    340 };
    341 
    342 fn type_for_ext(name: str) (filetype | void) = {
    343 	static let buf = path::buffer {...};
    344 	path::set(&buf, name)!;
    345 	const ext = path::peek_ext(&buf) as str;
    346 	return
    347 		if (ext == "ha") filetype::HARE
    348 		else if (ext == "s") filetype::ASSEMBLY
    349 		else void;
    350 };
    351 
    352 fn scan_file(
    353 	ctx: *context,
    354 	path: str,
    355 	ftype: filetype,
    356 	deps: *[]ast::ident,
    357 ) ([]u8 | error) = {
    358 	let truef = fs::open(ctx.fs, path)?;
    359 	defer io::close(truef)!;
    360 	let rbuf: [os::BUFSIZ]u8 = [0...];
    361 	let f = &bufio::init(truef, rbuf, []);
    362 	let sha = sha256::sha256();
    363 	hash::write(&sha, strings::toutf8(path));
    364 	hash::write(&sha, [ABI_VERSION]);
    365 
    366 	if (ftype == filetype::HARE) {
    367 		let tee = io::tee(f, &sha);
    368 		let lexer = lex::init(&tee, path);
    369 		defer lex::finish(&lexer);
    370 		let imports = match (parse::imports(&lexer)) {
    371 		case let im: []ast::import =>
    372 			yield im;
    373 		case let err: parse::error =>
    374 			return err;
    375 		};
    376 		for (let i = 0z; i < len(imports); i += 1) {
    377 			if (!have_ident(deps, imports[i].ident)) {
    378 				append(deps, imports[i].ident);
    379 			};
    380 		};
    381 		// Finish spooling out the file for the SHA
    382 		match (io::copy(io::empty, &tee)) {
    383 		case size => void;
    384 		case let err: io::error =>
    385 			return err;
    386 		};
    387 	} else {
    388 		match (io::copy(&sha, f)) {
    389 		case size => void;
    390 		case let err: io::error =>
    391 			return err;
    392 		};
    393 	};
    394 
    395 	let tmp: [sha256::SIZE]u8 = [0...];
    396 	hash::sum(&sha, tmp);
    397 
    398 	let checksum: []u8 = alloc([], sha.sz);
    399 	append(checksum, tmp...);
    400 	return checksum;
    401 };
    402 
    403 fn have_ident(sl: *[]ast::ident, id: ast::ident) bool = {
    404 	for (let i = 0z; i < len(sl); i += 1) {
    405 		if (ast::ident_eq(sl[i], id)) {
    406 			return true;
    407 		};
    408 	};
    409 	return false;
    410 };
    411 
    412 // Parses a set of build tags, returning void if the string is an invalid tag
    413 // set. The caller must free the return value with [[tags_free]].
    414 export fn parsetags(in: str) ([]tag | void) = {
    415 	let tags: []tag = [];
    416 	let iter = strings::iter(in);
    417 	for (true) {
    418 		let t = tag { ... };
    419 		let m = match (strings::next(&iter)) {
    420 		case void =>
    421 			break;
    422 		case let r: rune =>
    423 			yield r;
    424 		};
    425 		t.mode = switch (m) {
    426 		case =>
    427 			tags_free(tags);
    428 			return;
    429 		case '+' =>
    430 			yield tag_mode::INCLUSIVE;
    431 		case '-' =>
    432 			yield tag_mode::EXCLUSIVE;
    433 		};
    434 		let buf = memio::dynamic();
    435 		for (true) match (strings::next(&iter)) {
    436 		case void =>
    437 			break;
    438 		case let r: rune =>
    439 			if (ascii::isalnum(r) || r == '_') {
    440 				memio::appendrune(&buf, r)!;
    441 			} else {
    442 				strings::prev(&iter);
    443 				break;
    444 			};
    445 		};
    446 		t.name = memio::string(&buf)!;
    447 		append(tags, t);
    448 	};
    449 	return tags;
    450 };
    451 
    452 // Frees a set of tags.
    453 export fn tags_free(tags: []tag) void = {
    454 	for (let i = 0z; i < len(tags); i += 1) {
    455 		free(tags[i].name);
    456 	};
    457 	free(tags);
    458 };
    459 
    460 // Duplicates a set of tags.
    461 export fn tags_dup(tags: []tag) []tag = {
    462 	let new: []tag = alloc([], len(tags));
    463 	for (let i = 0z; i < len(tags); i += 1) {
    464 		append(new, tag {
    465 			name = strings::dup(tags[i].name),
    466 			mode = tags[i].mode,
    467 		});
    468 	};
    469 	return new;
    470 };
    471 
    472 // Compares two tag sets and tells you if they are compatible.
    473 export fn tagcompat(have: []tag, want: []tag) bool = {
    474 	// XXX: O(n²), lame
    475 	for (let i = 0z; i < len(want); i += 1) {
    476 		let present = false;
    477 		for (let j = 0z; j < len(have); j += 1) {
    478 			if (have[j].name == want[i].name) {
    479 				present = have[j].mode == tag_mode::INCLUSIVE;
    480 				break;
    481 			};
    482 		};
    483 		switch (want[i].mode) {
    484 		case tag_mode::INCLUSIVE =>
    485 			if (!present) return false;
    486 		case tag_mode::EXCLUSIVE =>
    487 			if (present) return false;
    488 		};
    489 	};
    490 	return true;
    491 };