hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

srcs.ha (10540B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use bytes;
      5 use fs;
      6 use hare::ast;
      7 use os;
      8 use path;
      9 use sort;
     10 use sort::cmp;
     11 use strings;
     12 use time;
     13 
     14 // A file tag, e.g. +x86_64, or -libc.
     15 export type tag = struct {
     16 	// The name of the tag.
     17 	name: str,
     18 	// Whether the tag is inclusive (+tag) or not (-tag).
     19 	include: bool,
     20 };
     21 
     22 // A set of sources for a module, filtered by a set of tags.
     23 export type srcset = struct {
     24 	// The last time the list of source files changed. Note that this is not
     25 	// the last time that the source files themselves changed.
     26 	mtime: time::instant,
     27 	// Source directories traversed while finding these source files.
     28 	dirs: []str,
     29 	// The list of tags that were actually encountered while finding these
     30 	// source files. These are sorted alphabetically, and are the set of
     31 	// tags that should be used to find this module in the cache.
     32 	seentags: []str,
     33 	// hare source files (.ha)
     34 	ha: []str,
     35 	// assembly source files (.s)
     36 	s: []str,
     37 	// object source files (.o)
     38 	o: []str,
     39 	// linker scripts (.sc)
     40 	sc: []str,
     41 };
     42 
     43 // Frees the resources associated with a [[srcset]].
     44 export fn finish_srcset(srcs: *srcset) void = {
     45 	strings::freeall(srcs.dirs);
     46 	strings::freeall(srcs.seentags);
     47 	strings::freeall(srcs.ha);
     48 	strings::freeall(srcs.s);
     49 	strings::freeall(srcs.o);
     50 	strings::freeall(srcs.sc);
     51 };
     52 
     53 // Find the on-disk path and set of source files for a given module. The path is
     54 // statically allocated and may be overwritten on subsequent calls.
     55 export fn find(ctx: *context, loc: location) ((str, srcset) | error) = {
     56 	match (loc) {
     57 	case let buf: *path::buffer =>
     58 		match (path_find(ctx, buf)) {
     59 		case let s: srcset =>
     60 			return (path::string(buf), s);
     61 		case not_found =>
     62 			return attach(locstr(loc), not_found);
     63 		case let e: error =>
     64 			return e;
     65 		};
     66 	case let ident: ast::ident =>
     67 		let tok = strings::tokenize(ctx.harepath, ":");
     68 		let next: (str | done) = ".";
     69 		for (next is str; next = strings::next_token(&tok)) {
     70 			if (!os::exists(next as str)) {
     71 				continue;
     72 			};
     73 
     74 			static let buf = path::buffer { ... };
     75 			path::set(&buf, os::realpath(next as str)?)?;
     76 			for (let part .. ident) {
     77 				path::push(&buf, part)?;
     78 			};
     79 
     80 			match (path_find(ctx, &buf)) {
     81 			case let s: srcset =>
     82 				return (path::string(&buf), s);
     83 			case not_found => void;
     84 			case let e: error =>
     85 				return attach(strings::dup(path::string(&buf)), e);
     86 			};
     87 		};
     88 		return attach(locstr(ident), not_found);
     89 	};
     90 };
     91 
     92 fn path_find(ctx: *context, buf: *path::buffer) (srcset | error) = {
     93 	// list of sources to return, with 3 extra fields prepended to allow
     94 	// quick lookup and comparison. each item is e.g.:
     95 	// ("basename", "ha", 2 (# of tags), ["path/-tag1/basename+tag2.ha"])
     96 	// if len(srcs.3) != 1 at the end of _findsrcs() then there's a conflict
     97 	let srcs: [](str, str, size, []str) = [];
     98 	defer {
     99 		for (let i = 0z; i < len(srcs); i += 1) {
    100 			free(srcs[i].0);
    101 			free(srcs[i].1);
    102 			free(srcs[i].3);
    103 		};
    104 		free(srcs);
    105 	};
    106 	let mtime = time::INSTANT_MIN;
    107 	let res = srcset { mtime = time::INSTANT_MIN, ... };
    108 
    109 	_findsrcs(buf, ctx.tags, &srcs, &res, 0)?;
    110 	for (let i = 0z; i < len(srcs); i += 1) {
    111 		if (len(srcs[i].3) != 1) {
    112 			return alloc(srcs[i].3...): file_conflict;
    113 		};
    114 		let out = switch (srcs[i].1) {
    115 		case "ha" =>
    116 			yield &res.ha;
    117 		case "s" =>
    118 			yield &res.s;
    119 		case "o" =>
    120 			yield &res.o;
    121 		case "sc" =>
    122 			yield &res.sc;
    123 		case => abort();
    124 		};
    125 		append(out, srcs[i].3[0]);
    126 	};
    127 
    128 	// module needs either a hare source file or a README in order to be
    129 	// valid. used to allow eg. shadowing foo::bar:: without accidentally
    130 	// shadowing foo::
    131 	if (len(res.ha) == 0) {
    132 		path::push(buf, "README")?;
    133 		defer path::pop(buf);
    134 		if (!os::exists(path::string(buf))) {
    135 			finish_srcset(&res);
    136 			return not_found;
    137 		};
    138 	};
    139 
    140 	sort::sort(res.dirs, size(str), &cmp::strs);
    141 	sort::sort(res.ha, size(str), &cmp::strs);
    142 	sort::sort(res.s, size(str), &cmp::strs);
    143 	sort::sort(res.o, size(str), &cmp::strs);
    144 	sort::sort(res.sc, size(str), &cmp::strs);
    145 	return res;
    146 };
    147 
    148 // simple implementation but the reasons for it are delicate
    149 //
    150 // finding the right sources is conceptually simple: just collect all the
    151 // files compatible with the tagset and then pick the best ones for each
    152 // conflicting filename
    153 //
    154 // the mtime is the first weird part: you want to find the last time a file
    155 // was added, moved, or deleted, but only for parts of the module relevant to
    156 // the input tags. the edge-case here is "what if i renamed a subdirectory so
    157 // that it's tags don't match". the solution is that you can just find the
    158 // latest mtime of directories that get traversed, i.e. have matching tags.
    159 // this is because the tag-compatible subset of the filetree constitutes
    160 // a filetree in its own right, where a file being renamed to no longer be
    161 // part of the tag-filetree is equivalent to it being deleted from the
    162 // tag-filetree. the mtime-checking does not distinguish between renames
    163 // and deletions, so we get this for free by checking mtimes in the underlying
    164 // filesystem
    165 //
    166 // the second weird part is the seentags: the goal here is finding the subset
    167 // of the input tags which were actually used while finding the srcset,
    168 // so that the cache can be reused for two sets of input tags which don't
    169 // produce different srcsets. the method used here is just to take note of
    170 // the tags which were encountered while traversing the tree, and not to
    171 // continue down a file path beyond the first incompatible tag. exploring
    172 // this method, you could look at e.g "mod/+linux/+x86_64.ha". you might think
    173 // that this should, in theory, produce 4 different cache versions, since
    174 // there are 2 tags, each of which has 2 states; and using the method here
    175 // would produce only 3: none, +linux, and +linux+x86_64. however, there are
    176 // actually only 2 options: none, and +linux+x86_64, and the method here adds
    177 // one redundant slot for +linux. this is because either tag on their own
    178 // doesn't change whether the path matches, only both together. in practice,
    179 // the redundancy in the method used here will cause minimal overhead, because
    180 // it's likely that you do actually have a file with just one of the tags
    181 // somewhere else in your module, or else you would have combined them into
    182 // one tag. in any case, the method used here is fast because it gets to stop
    183 // searching as soon as it can
    184 fn _findsrcs(
    185 	buf: *path::buffer,
    186 	in_tags: []str,
    187 	srcs: *[](str, str, size, []str),
    188 	res: *srcset,
    189 	tagdepth: size,
    190 ) (void | error) = {
    191 	const pathstr = path::string(buf);
    192 	const stat = match (os::stat(pathstr)) {
    193 	case let stat: fs::filestat =>
    194 		yield stat;
    195 	case fs::error =>
    196 		return;
    197 	};
    198 
    199 	let tmp = pathstr;
    200 	for (fs::islink(stat.mode)) {
    201 		if (time::compare(res.mtime, stat.mtime) < 0) {
    202 			res.mtime = stat.mtime;
    203 		};
    204 		tmp = os::realpath(tmp)?;
    205 		stat = os::stat(tmp)?;
    206 	};
    207 
    208 	if (fs::isfile(stat.mode)) {
    209 		let ext = match (path::pop_ext(buf)) {
    210 		case void =>
    211 			return;
    212 		case let ext: str =>
    213 			yield ext;
    214 		};
    215 		switch (ext) {
    216 		case "ha", "s", "o", "sc" => void;
    217 		case =>
    218 			return;
    219 		};
    220 		let filebytes = strings::toutf8(path::peek(buf) as str);
    221 		path::push_ext(buf, ext)?;
    222 
    223 		let split = tagindex(filebytes);
    224 		let (base, tags) = (
    225 			strings::fromutf8_unsafe(filebytes[..split]),
    226 			strings::fromutf8_unsafe(filebytes[split..]),
    227 		);
    228 
    229 		let wanttags = match (parse_tags(tags)) {
    230 		case let tags: []tag =>
    231 			yield tags;
    232 		case let e: error =>
    233 			return attach(strings::dup(path::string(buf)), e);
    234 		};
    235 		defer free(wanttags);
    236 		if (!seentags_compat(in_tags, wanttags, &res.seentags)) {
    237 			return;
    238 		};
    239 
    240 		let ntags = tagdepth + len(wanttags);
    241 		let bufstr = path::string(buf);
    242 		for (let i = 0z; i < len(srcs); i += 1) {
    243 			if (srcs[i].0 == base && srcs[i].1 == ext) {
    244 				if (srcs[i].2 > ntags) {
    245 					return;
    246 				};
    247 				if (srcs[i].2 < ntags) {
    248 					srcs[i].2 = ntags;
    249 					strings::freeall(srcs[i].3);
    250 					srcs[i].3 = [];
    251 				};
    252 				append(srcs[i].3, strings::dup(bufstr));
    253 				return;
    254 			};
    255 		};
    256 
    257 		append(srcs, (
    258 			strings::dup(base),
    259 			strings::dup(ext),
    260 			ntags,
    261 			alloc([strings::dup(bufstr)]),
    262 		));
    263 		return;
    264 	};
    265 
    266 	if (!fs::isdir(stat.mode)) return; // ignore special files
    267 
    268 	append(res.dirs, strings::dup(pathstr));
    269 	if (time::compare(res.mtime, stat.mtime) < 0) {
    270 		res.mtime = stat.mtime;
    271 	};
    272 
    273 	let iter = match (os::iter(pathstr)) {
    274 	case let i: *fs::iterator =>
    275 		yield i;
    276 	case let e: fs::error =>
    277 		return attach(strings::dup(pathstr), e);
    278 	};
    279 	defer fs::finish(iter);
    280 
    281 	for (let d => fs::next(iter)?) {
    282 		path::push(buf, d.name)?;
    283 		defer path::pop(buf);
    284 
    285 		if (fs::isdir(d.ftype)) {
    286 			if (tagindex(strings::toutf8(d.name)) != 0) {
    287 				continue;
    288 			};
    289 			let wanttags = match (parse_tags(d.name)) {
    290 			case let tags: []tag =>
    291 				yield tags;
    292 			case let e: error =>
    293 				return attach(strings::dup(path::string(buf)), e);
    294 			};
    295 			defer free(wanttags);
    296 			if (!seentags_compat(in_tags, wanttags, &res.seentags)) {
    297 				continue;
    298 			};
    299 
    300 			_findsrcs(buf, in_tags, srcs, res,
    301 				tagdepth+len(wanttags))?;
    302 		} else if (fs::isfile(d.ftype)) {
    303 			_findsrcs(buf, in_tags, srcs, res, tagdepth)?;
    304 		};
    305 	};
    306 };
    307 
    308 fn tagindex(bs: []u8) size = {
    309 	let i = 0z;
    310 	for (i < len(bs) && bs[i] != '+' && bs[i] != '-'; i += 1) void;
    311 	return i;
    312 };
    313 
    314 // Parses tags from a string. The tag themselves are borrowed from the input,
    315 // but the caller must free the slice returned.
    316 export fn parse_tags(s: str) ([]tag | error) = {
    317 	let bs = strings::toutf8(s);
    318 	if (bytes::contains(bs, '.')) {
    319 		return tag_has_dot;
    320 	};
    321 	let tags: []tag = [];
    322 	let start = tagindex(bs);
    323 	if (start != 0) {
    324 		return tag_bad_format;
    325 	};
    326 	for (start < len(bs)) {
    327 		const end = start + 1 + tagindex(bs[start+1..]);
    328 		append(tags, tag {
    329 			name = strings::fromutf8_unsafe(bs[start+1..end]),
    330 			include = bs[start] == '+',
    331 		});
    332 		start = end;
    333 	};
    334 	return tags;
    335 };
    336 
    337 // Checks if a set of tags are compatible with a tag requirement.
    338 export fn tags_compat(have: []str, want: []tag) bool = {
    339 	for (let t .. want) {
    340 		let found = false;
    341 
    342 		for (let ht .. have) {
    343 			if (ht == t.name) {
    344 				found = true;
    345 				break;
    346 			};
    347 		};
    348 
    349 		if (t.include ^^ found) {
    350 			return false;
    351 		};
    352 	};
    353 	return true;
    354 };
    355 
    356 // same as tags_compat, but also adds any relevant tags to a seentags list
    357 // for use in _findsrcs.
    358 fn seentags_compat(have: []str, want: []tag, seen: *[]str) bool = {
    359 	for (let t .. want) {
    360 		let found = false;
    361 
    362 		for (let ht .. have) {
    363 			if (ht == t.name) {
    364 				insert_uniq(seen, t.name);
    365 				found = true;
    366 				break;
    367 			};
    368 		};
    369 
    370 		if (t.include ^^ found) {
    371 			return false;
    372 		};
    373 	};
    374 	return true;
    375 };