doc.ha (6862B)
1 // SPDX-License-Identifier: MPL-2.0 2 // (c) Hare authors <https://harelang.org> 3 4 use bufio; 5 use encoding::utf8; 6 use hare::ast; 7 use hare::lex; 8 use hare::parse; 9 use io; 10 use memio; 11 use strings; 12 13 // A representation of a complete haredoc document. 14 export type doc = [](paragraph | list | code_sample); 15 16 // A paragraph of text. 17 export type paragraph = [](str | decl_ref | mod_ref); 18 19 // A bulleted list. 20 export type list = []paragraph; 21 22 // A code sample. 23 export type code_sample = str; 24 25 // A reference to a declaration. 26 export type decl_ref = ast::ident; 27 28 // A reference to a module. 29 export type mod_ref = ast::ident; 30 31 // All possible error types. 32 export type error = !lex::error; 33 34 // Converts an error into a human-friendly string. The result may be statically 35 // allocated. 36 export fn strerror(err: error) const str = lex::strerror(err); 37 38 // Parses a haredoc document from an [[io::handle]]. 'start' is the location of 39 // the top-left corner of the document, for accurate locations in error messages 40 // (e.g. declaration documentation starts at col=3; READMEs start at col=1). 41 export fn parse(in: io::handle, start: lex::location) (doc | error) = { 42 let sc = bufio::newscanner(in); 43 defer bufio::finish(&sc); 44 45 match (_parse(&sc)) { 46 case let doc: doc => 47 return doc; 48 case let err: lex::syntax => 49 err.0.path = start.path; 50 err.0.line += start.line; 51 err.0.col += start.col; 52 return err; 53 case let err: io::error => 54 return err; 55 case utf8::invalid => 56 // XXX: the location for this error is inaccurate 57 return lex::syntaxerr(start, "Invalid UTF-8"); 58 }; 59 }; 60 61 fn _parse(sc: *bufio::scanner) (doc | ...error | utf8::invalid) = { 62 let loc = lex::location { ... }; 63 let doc: doc = []; 64 65 for (let r => bufio::scan_rune(sc)?) { 66 if (r == ' ') { 67 r = match (bufio::scan_rune(sc)?) { 68 case io::EOF => 69 break; 70 case let r: rune => 71 loc.col = 1; 72 yield r; 73 }; 74 }; 75 76 switch (r) { 77 case '\t' => 78 loc.col = 8; 79 append(doc, scan_code_sample(sc, &loc)?); 80 case '\n' => 81 loc.line += 1; 82 loc.col = 0; 83 case '-' => 84 loc.col += 1; 85 append(doc, scan_list(sc, &loc)?); 86 case => 87 bufio::unreadrune(sc, r); 88 append(doc, scan_paragraph(sc, &loc)?); 89 }; 90 }; 91 92 return doc; 93 }; 94 95 fn scan_code_sample( 96 sc: *bufio::scanner, 97 loc: *lex::location, 98 ) (code_sample | ...error | utf8::invalid) = { 99 let s = memio::dynamic(); 100 for (let r => bufio::scan_rune(sc)?) { 101 switch (r) { 102 case '\t' => 103 loc.col += 8 - loc.col % 8; 104 memio::appendrune(&s, r)!; 105 case '\n' => 106 loc.line += 1; 107 loc.col = 0; 108 109 let (r, space) = match (bufio::scan_rune(sc)?) { 110 case io::EOF => 111 break; 112 case let r: rune => 113 if (r != ' ') yield (r, false); 114 yield match (bufio::scan_rune(sc)?) { 115 case io::EOF => 116 break; 117 case let r: rune => 118 yield (r, true); 119 }; 120 }; 121 122 switch (r) { 123 case '\t' => 124 loc.col = 8; 125 memio::appendrune(&s, '\n')!; 126 case '\n' => 127 memio::appendrune(&s, '\n')!; 128 bufio::unreadrune(sc, '\n'); 129 case => 130 bufio::unreadrune(sc, r); 131 if (space) { 132 bufio::unreadrune(sc, ' '); 133 }; 134 break; 135 }; 136 case => 137 loc.col += 1; 138 memio::appendrune(&s, r)!; 139 }; 140 }; 141 142 return memio::string(&s)!; 143 }; 144 145 fn scan_list( 146 sc: *bufio::scanner, 147 loc: *lex::location, 148 ) (list | ...error | utf8::invalid) = { 149 let li: list = []; 150 for (true) { 151 match (bufio::scan_rune(sc)?) { 152 case io::EOF => 153 append(li, []); 154 break; 155 case let r: rune => 156 if (r != ' ') { 157 bufio::unreadrune(sc, r); 158 }; 159 }; 160 161 append(li, scan_paragraph(sc, loc)?); 162 163 match (bufio::scan_rune(sc)?) { 164 case io::EOF => 165 break; 166 case let r: rune => 167 if (r != '-') { 168 bufio::unreadrune(sc, r); 169 break; 170 }; 171 }; 172 }; 173 174 return li; 175 }; 176 177 // XXX: should be local to scan_paragraph, once that's possible 178 type state = enum { 179 NORMAL, 180 SPACE, 181 NEWLINE, 182 }; 183 184 fn scan_paragraph( 185 sc: *bufio::scanner, 186 loc: *lex::location, 187 ) (paragraph | ...error | utf8::invalid) = { 188 let p: paragraph = []; 189 let s = memio::dynamic(); 190 defer io::close(&s)!; 191 let state = state::NORMAL; 192 193 for (let r => bufio::scan_rune(sc)?) { 194 switch (r) { 195 case '\t' => 196 if (state == state::NEWLINE && loc.col <= 1) { 197 bufio::unreadrune(sc, r); 198 break; 199 }; 200 loc.col += 8 - loc.col % 8; 201 if (state == state::NORMAL) { 202 state = state::SPACE; 203 }; 204 continue; 205 case '\n' => 206 loc.line += 1; 207 loc.col = 0; 208 if (state == state::NEWLINE) { 209 break; 210 }; 211 state = state::NEWLINE; 212 continue; 213 case ' ' => 214 loc.col += 1; 215 if (state == state::NORMAL) { 216 state = state::SPACE; 217 }; 218 continue; 219 case '-' => 220 if (state != state::NEWLINE || loc.col > 1) yield; 221 // XXX: we may want to reconsider if recognizing '-' 222 // here is too lenient (what if a line begins with a 223 // negative number?) 224 bufio::unreadrune(sc, r); 225 break; 226 case => void; 227 }; 228 229 if (state != state::NORMAL) { 230 memio::appendrune(&s, ' ')!; 231 }; 232 state = state::NORMAL; 233 loc.col += 1; 234 235 if (r != '[') { 236 memio::appendrune(&s, r)!; 237 continue; 238 }; 239 240 r = match (bufio::scan_rune(sc)?) { 241 case io::EOF => 242 memio::appendrune(&s, '[')!; 243 break; 244 case let r: rune => 245 yield r; 246 }; 247 if (r != '[') { 248 memio::appendrune(&s, '[')!; 249 bufio::unreadrune(sc, r); 250 continue; 251 }; 252 253 loc.col += 1; 254 const part = memio::string(&s)!; 255 if (part != "") { 256 append(p, strings::dup(part)); 257 memio::reset(&s); 258 }; 259 260 let lexer = lex::init(sc, loc.path); 261 const (ident, mod) = match (parse::ident_trailing(&lexer)) { 262 case let id: (ast::ident, bool) => 263 yield id; 264 case let err: lex::syntax => 265 if (err.0.line == 1) { 266 err.0.col += loc.col - 1; 267 }; 268 err.0.line += loc.line - 1; 269 return err; 270 case let err: io::error => 271 return err; 272 }; 273 274 // intentionally not using lex::mkloc, so whitespace is 275 // accounted for 276 if (lexer.loc.0 == 1) { 277 loc.col += lexer.loc.1 - 1; 278 } else { 279 loc.col = 0; 280 }; 281 loc.line += lexer.loc.0 - 1; 282 283 append(p, if (mod) ident: mod_ref else ident: decl_ref); 284 285 if (lexer.un.0 == lex::ltok::RBRACKET) { 286 match (bufio::scan_rune(sc)?) { 287 case io::EOF => void; 288 case let r: rune => 289 if (r == ']') { 290 loc.col += 1; 291 continue; 292 }; 293 }; 294 }; 295 return lex::syntaxerr(*loc, "Unterminated reference"); 296 }; 297 298 const part = memio::string(&s)!; 299 if (part != "") { 300 append(p, strings::dup(part)); 301 }; 302 return p; 303 }; 304 305 // Frees resources associated with a [[doc]]. 306 export fn freeall(doc: doc) void = { 307 for (let d .. doc) { 308 match (d) { 309 case let p: paragraph => 310 free_paragraph(p); 311 case let l: list => 312 for (let p .. l) { 313 free_paragraph(p); 314 }; 315 free(l); 316 case let c: code_sample => 317 free(c); 318 }; 319 }; 320 free(doc); 321 }; 322 323 fn free_paragraph(p: paragraph) void = { 324 for (let entry .. p) { 325 match (entry) { 326 case let s: str => 327 free(s); 328 case let d: decl_ref => 329 ast::ident_free(d); 330 case let m: mod_ref => 331 ast::ident_free(m); 332 }; 333 }; 334 free(p); 335 };