srcs.ha (10540B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use bytes; 5 use fs; 6 use hare::ast; 7 use os; 8 use path; 9 use sort; 10 use sort::cmp; 11 use strings; 12 use time; 13 14 // A file tag, e.g. +x86_64, or -libc. 15 export type tag = struct { 16 // The name of the tag. 17 name: str, 18 // Whether the tag is inclusive (+tag) or not (-tag). 19 include: bool, 20 }; 21 22 // A set of sources for a module, filtered by a set of tags. 23 export type srcset = struct { 24 // The last time the list of source files changed. Note that this is not 25 // the last time that the source files themselves changed. 26 mtime: time::instant, 27 // Source directories traversed while finding these source files. 28 dirs: []str, 29 // The list of tags that were actually encountered while finding these 30 // source files. These are sorted alphabetically, and are the set of 31 // tags that should be used to find this module in the cache. 32 seentags: []str, 33 // hare source files (.ha) 34 ha: []str, 35 // assembly source files (.s) 36 s: []str, 37 // object source files (.o) 38 o: []str, 39 // linker scripts (.sc) 40 sc: []str, 41 }; 42 43 // Frees the resources associated with a [[srcset]]. 44 export fn finish_srcset(srcs: *srcset) void = { 45 strings::freeall(srcs.dirs); 46 strings::freeall(srcs.seentags); 47 strings::freeall(srcs.ha); 48 strings::freeall(srcs.s); 49 strings::freeall(srcs.o); 50 strings::freeall(srcs.sc); 51 }; 52 53 // Find the on-disk path and set of source files for a given module. The path is 54 // statically allocated and may be overwritten on subsequent calls. 55 export fn find(ctx: *context, loc: location) ((str, srcset) | error) = { 56 match (loc) { 57 case let buf: *path::buffer => 58 match (path_find(ctx, buf)) { 59 case let s: srcset => 60 return (path::string(buf), s); 61 case not_found => 62 return attach(locstr(loc), not_found); 63 case let e: error => 64 return e; 65 }; 66 case let ident: ast::ident => 67 let tok = strings::tokenize(ctx.harepath, ":"); 68 let next: (str | done) = "."; 69 for (next is str; next = strings::next_token(&tok)) { 70 if (!os::exists(next as str)) { 71 continue; 72 }; 73 74 static let buf = path::buffer { ... }; 75 path::set(&buf, os::realpath(next as str)?)?; 76 for (let part .. ident) { 77 path::push(&buf, part)?; 78 }; 79 80 match (path_find(ctx, &buf)) { 81 case let s: srcset => 82 return (path::string(&buf), s); 83 case not_found => void; 84 case let e: error => 85 return attach(strings::dup(path::string(&buf)), e); 86 }; 87 }; 88 return attach(locstr(ident), not_found); 89 }; 90 }; 91 92 fn path_find(ctx: *context, buf: *path::buffer) (srcset | error) = { 93 // list of sources to return, with 3 extra fields prepended to allow 94 // quick lookup and comparison. each item is e.g.: 95 // ("basename", "ha", 2 (# of tags), ["path/-tag1/basename+tag2.ha"]) 96 // if len(srcs.3) != 1 at the end of _findsrcs() then there's a conflict 97 let srcs: [](str, str, size, []str) = []; 98 defer { 99 for (let i = 0z; i < len(srcs); i += 1) { 100 free(srcs[i].0); 101 free(srcs[i].1); 102 free(srcs[i].3); 103 }; 104 free(srcs); 105 }; 106 let mtime = time::INSTANT_MIN; 107 let res = srcset { mtime = time::INSTANT_MIN, ... }; 108 109 _findsrcs(buf, ctx.tags, &srcs, &res, 0)?; 110 for (let i = 0z; i < len(srcs); i += 1) { 111 if (len(srcs[i].3) != 1) { 112 return alloc(srcs[i].3...): file_conflict; 113 }; 114 let out = switch (srcs[i].1) { 115 case "ha" => 116 yield &res.ha; 117 case "s" => 118 yield &res.s; 119 case "o" => 120 yield &res.o; 121 case "sc" => 122 yield &res.sc; 123 case => abort(); 124 }; 125 append(out, srcs[i].3[0]); 126 }; 127 128 // module needs either a hare source file or a README in order to be 129 // valid. used to allow eg. shadowing foo::bar:: without accidentally 130 // shadowing foo:: 131 if (len(res.ha) == 0) { 132 path::push(buf, "README")?; 133 defer path::pop(buf); 134 if (!os::exists(path::string(buf))) { 135 finish_srcset(&res); 136 return not_found; 137 }; 138 }; 139 140 sort::sort(res.dirs, size(str), &cmp::strs); 141 sort::sort(res.ha, size(str), &cmp::strs); 142 sort::sort(res.s, size(str), &cmp::strs); 143 sort::sort(res.o, size(str), &cmp::strs); 144 sort::sort(res.sc, size(str), &cmp::strs); 145 return res; 146 }; 147 148 // simple implementation but the reasons for it are delicate 149 // 150 // finding the right sources is conceptually simple: just collect all the 151 // files compatible with the tagset and then pick the best ones for each 152 // conflicting filename 153 // 154 // the mtime is the first weird part: you want to find the last time a file 155 // was added, moved, or deleted, but only for parts of the module relevant to 156 // the input tags. the edge-case here is "what if i renamed a subdirectory so 157 // that it's tags don't match". the solution is that you can just find the 158 // latest mtime of directories that get traversed, i.e. have matching tags. 159 // this is because the tag-compatible subset of the filetree constitutes 160 // a filetree in its own right, where a file being renamed to no longer be 161 // part of the tag-filetree is equivalent to it being deleted from the 162 // tag-filetree. the mtime-checking does not distinguish between renames 163 // and deletions, so we get this for free by checking mtimes in the underlying 164 // filesystem 165 // 166 // the second weird part is the seentags: the goal here is finding the subset 167 // of the input tags which were actually used while finding the srcset, 168 // so that the cache can be reused for two sets of input tags which don't 169 // produce different srcsets. the method used here is just to take note of 170 // the tags which were encountered while traversing the tree, and not to 171 // continue down a file path beyond the first incompatible tag. exploring 172 // this method, you could look at e.g "mod/+linux/+x86_64.ha". you might think 173 // that this should, in theory, produce 4 different cache versions, since 174 // there are 2 tags, each of which has 2 states; and using the method here 175 // would produce only 3: none, +linux, and +linux+x86_64. however, there are 176 // actually only 2 options: none, and +linux+x86_64, and the method here adds 177 // one redundant slot for +linux. this is because either tag on their own 178 // doesn't change whether the path matches, only both together. in practice, 179 // the redundancy in the method used here will cause minimal overhead, because 180 // it's likely that you do actually have a file with just one of the tags 181 // somewhere else in your module, or else you would have combined them into 182 // one tag. in any case, the method used here is fast because it gets to stop 183 // searching as soon as it can 184 fn _findsrcs( 185 buf: *path::buffer, 186 in_tags: []str, 187 srcs: *[](str, str, size, []str), 188 res: *srcset, 189 tagdepth: size, 190 ) (void | error) = { 191 const pathstr = path::string(buf); 192 const stat = match (os::stat(pathstr)) { 193 case let stat: fs::filestat => 194 yield stat; 195 case fs::error => 196 return; 197 }; 198 199 let tmp = pathstr; 200 for (fs::islink(stat.mode)) { 201 if (time::compare(res.mtime, stat.mtime) < 0) { 202 res.mtime = stat.mtime; 203 }; 204 tmp = os::realpath(tmp)?; 205 stat = os::stat(tmp)?; 206 }; 207 208 if (fs::isfile(stat.mode)) { 209 let ext = match (path::pop_ext(buf)) { 210 case void => 211 return; 212 case let ext: str => 213 yield ext; 214 }; 215 switch (ext) { 216 case "ha", "s", "o", "sc" => void; 217 case => 218 return; 219 }; 220 let filebytes = strings::toutf8(path::peek(buf) as str); 221 path::push_ext(buf, ext)?; 222 223 let split = tagindex(filebytes); 224 let (base, tags) = ( 225 strings::fromutf8_unsafe(filebytes[..split]), 226 strings::fromutf8_unsafe(filebytes[split..]), 227 ); 228 229 let wanttags = match (parse_tags(tags)) { 230 case let tags: []tag => 231 yield tags; 232 case let e: error => 233 return attach(strings::dup(path::string(buf)), e); 234 }; 235 defer free(wanttags); 236 if (!seentags_compat(in_tags, wanttags, &res.seentags)) { 237 return; 238 }; 239 240 let ntags = tagdepth + len(wanttags); 241 let bufstr = path::string(buf); 242 for (let i = 0z; i < len(srcs); i += 1) { 243 if (srcs[i].0 == base && srcs[i].1 == ext) { 244 if (srcs[i].2 > ntags) { 245 return; 246 }; 247 if (srcs[i].2 < ntags) { 248 srcs[i].2 = ntags; 249 strings::freeall(srcs[i].3); 250 srcs[i].3 = []; 251 }; 252 append(srcs[i].3, strings::dup(bufstr)); 253 return; 254 }; 255 }; 256 257 append(srcs, ( 258 strings::dup(base), 259 strings::dup(ext), 260 ntags, 261 alloc([strings::dup(bufstr)]), 262 )); 263 return; 264 }; 265 266 if (!fs::isdir(stat.mode)) return; // ignore special files 267 268 append(res.dirs, strings::dup(pathstr)); 269 if (time::compare(res.mtime, stat.mtime) < 0) { 270 res.mtime = stat.mtime; 271 }; 272 273 let iter = match (os::iter(pathstr)) { 274 case let i: *fs::iterator => 275 yield i; 276 case let e: fs::error => 277 return attach(strings::dup(pathstr), e); 278 }; 279 defer fs::finish(iter); 280 281 for (let d => fs::next(iter)?) { 282 path::push(buf, d.name)?; 283 defer path::pop(buf); 284 285 if (fs::isdir(d.ftype)) { 286 if (tagindex(strings::toutf8(d.name)) != 0) { 287 continue; 288 }; 289 let wanttags = match (parse_tags(d.name)) { 290 case let tags: []tag => 291 yield tags; 292 case let e: error => 293 return attach(strings::dup(path::string(buf)), e); 294 }; 295 defer free(wanttags); 296 if (!seentags_compat(in_tags, wanttags, &res.seentags)) { 297 continue; 298 }; 299 300 _findsrcs(buf, in_tags, srcs, res, 301 tagdepth+len(wanttags))?; 302 } else if (fs::isfile(d.ftype)) { 303 _findsrcs(buf, in_tags, srcs, res, tagdepth)?; 304 }; 305 }; 306 }; 307 308 fn tagindex(bs: []u8) size = { 309 let i = 0z; 310 for (i < len(bs) && bs[i] != '+' && bs[i] != '-'; i += 1) void; 311 return i; 312 }; 313 314 // Parses tags from a string. The tag themselves are borrowed from the input, 315 // but the caller must free the slice returned. 316 export fn parse_tags(s: str) ([]tag | error) = { 317 let bs = strings::toutf8(s); 318 if (bytes::contains(bs, '.')) { 319 return tag_has_dot; 320 }; 321 let tags: []tag = []; 322 let start = tagindex(bs); 323 if (start != 0) { 324 return tag_bad_format; 325 }; 326 for (start < len(bs)) { 327 const end = start + 1 + tagindex(bs[start+1..]); 328 append(tags, tag { 329 name = strings::fromutf8_unsafe(bs[start+1..end]), 330 include = bs[start] == '+', 331 }); 332 start = end; 333 }; 334 return tags; 335 }; 336 337 // Checks if a set of tags are compatible with a tag requirement. 338 export fn tags_compat(have: []str, want: []tag) bool = { 339 for (let t .. want) { 340 let found = false; 341 342 for (let ht .. have) { 343 if (ht == t.name) { 344 found = true; 345 break; 346 }; 347 }; 348 349 if (t.include ^^ found) { 350 return false; 351 }; 352 }; 353 return true; 354 }; 355 356 // same as tags_compat, but also adds any relevant tags to a seentags list 357 // for use in _findsrcs. 358 fn seentags_compat(have: []str, want: []tag, seen: *[]str) bool = { 359 for (let t .. want) { 360 let found = false; 361 362 for (let ht .. have) { 363 if (ht == t.name) { 364 insert_uniq(seen, t.name); 365 found = true; 366 break; 367 }; 368 }; 369 370 if (t.include ^^ found) { 371 return false; 372 }; 373 }; 374 return true; 375 };