hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

doc.ha (6862B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use bufio;
      5 use encoding::utf8;
      6 use hare::ast;
      7 use hare::lex;
      8 use hare::parse;
      9 use io;
     10 use memio;
     11 use strings;
     12 
     13 // A representation of a complete haredoc document.
     14 export type doc = [](paragraph | list | code_sample);
     15 
     16 // A paragraph of text.
     17 export type paragraph = [](str | decl_ref | mod_ref);
     18 
     19 // A bulleted list.
     20 export type list = []paragraph;
     21 
     22 // A code sample.
     23 export type code_sample = str;
     24 
     25 // A reference to a declaration.
     26 export type decl_ref = ast::ident;
     27 
     28 // A reference to a module.
     29 export type mod_ref = ast::ident;
     30 
     31 // All possible error types.
     32 export type error = !lex::error;
     33 
     34 // Converts an error into a human-friendly string. The result may be statically
     35 // allocated.
     36 export fn strerror(err: error) const str = lex::strerror(err);
     37 
     38 // Parses a haredoc document from an [[io::handle]]. 'start' is the location of
     39 // the top-left corner of the document, for accurate locations in error messages
     40 // (e.g. declaration documentation starts at col=3; READMEs start at col=1).
     41 export fn parse(in: io::handle, start: lex::location) (doc | error) = {
     42 	let sc = bufio::newscanner(in);
     43 	defer bufio::finish(&sc);
     44 
     45 	match (_parse(&sc)) {
     46 	case let doc: doc =>
     47 		return doc;
     48 	case let err: lex::syntax =>
     49 		err.0.path = start.path;
     50 		err.0.line += start.line;
     51 		err.0.col += start.col;
     52 		return err;
     53 	case let err: io::error =>
     54 		return err;
     55 	case utf8::invalid =>
     56 		// XXX: the location for this error is inaccurate
     57 		return lex::syntaxerr(start, "Invalid UTF-8");
     58 	};
     59 };
     60 
     61 fn _parse(sc: *bufio::scanner) (doc | ...error | utf8::invalid) = {
     62 	let loc = lex::location { ... };
     63 	let doc: doc = [];
     64 
     65 	for (let r => bufio::scan_rune(sc)?) {
     66 		if (r == ' ') {
     67 			r = match (bufio::scan_rune(sc)?) {
     68 			case io::EOF =>
     69 				break;
     70 			case let r: rune =>
     71 				loc.col = 1;
     72 				yield r;
     73 			};
     74 		};
     75 
     76 		switch (r) {
     77 		case '\t' =>
     78 			loc.col = 8;
     79 			append(doc, scan_code_sample(sc, &loc)?);
     80 		case '\n' =>
     81 			loc.line += 1;
     82 			loc.col = 0;
     83 		case '-' =>
     84 			loc.col += 1;
     85 			append(doc, scan_list(sc, &loc)?);
     86 		case =>
     87 			bufio::unreadrune(sc, r);
     88 			append(doc, scan_paragraph(sc, &loc)?);
     89 		};
     90 	};
     91 
     92 	return doc;
     93 };
     94 
     95 fn scan_code_sample(
     96 	sc: *bufio::scanner,
     97 	loc: *lex::location,
     98 ) (code_sample | ...error | utf8::invalid) = {
     99 	let s = memio::dynamic();
    100 	for (let r => bufio::scan_rune(sc)?) {
    101 		switch (r) {
    102 		case '\t' =>
    103 			loc.col += 8 - loc.col % 8;
    104 			memio::appendrune(&s, r)!;
    105 		case '\n' =>
    106 			loc.line += 1;
    107 			loc.col = 0;
    108 
    109 			let (r, space) = match (bufio::scan_rune(sc)?) {
    110 			case io::EOF =>
    111 				break;
    112 			case let r: rune =>
    113 				if (r != ' ') yield (r, false);
    114 				yield match (bufio::scan_rune(sc)?) {
    115 				case io::EOF =>
    116 					break;
    117 				case let r: rune =>
    118 					yield (r, true);
    119 				};
    120 			};
    121 
    122 			switch (r) {
    123 			case '\t' =>
    124 				loc.col = 8;
    125 				memio::appendrune(&s, '\n')!;
    126 			case '\n' =>
    127 				memio::appendrune(&s, '\n')!;
    128 				bufio::unreadrune(sc, '\n');
    129 			case =>
    130 				bufio::unreadrune(sc, r);
    131 				if (space) {
    132 					bufio::unreadrune(sc, ' ');
    133 				};
    134 				break;
    135 			};
    136 		case =>
    137 			loc.col += 1;
    138 			memio::appendrune(&s, r)!;
    139 		};
    140 	};
    141 
    142 	return memio::string(&s)!;
    143 };
    144 
    145 fn scan_list(
    146 	sc: *bufio::scanner,
    147 	loc: *lex::location,
    148 ) (list | ...error | utf8::invalid) = {
    149 	let li: list = [];
    150 	for (true) {
    151 		match (bufio::scan_rune(sc)?) {
    152 		case io::EOF =>
    153 			append(li, []);
    154 			break;
    155 		case let r: rune =>
    156 			if (r != ' ') {
    157 				bufio::unreadrune(sc, r);
    158 			};
    159 		};
    160 
    161 		append(li, scan_paragraph(sc, loc)?);
    162 
    163 		match (bufio::scan_rune(sc)?) {
    164 		case io::EOF =>
    165 			break;
    166 		case let r: rune =>
    167 			if (r != '-') {
    168 				bufio::unreadrune(sc, r);
    169 				break;
    170 			};
    171 		};
    172 	};
    173 
    174 	return li;
    175 };
    176 
    177 // XXX: should be local to scan_paragraph, once that's possible
    178 type state = enum {
    179 	NORMAL,
    180 	SPACE,
    181 	NEWLINE,
    182 };
    183 
    184 fn scan_paragraph(
    185 	sc: *bufio::scanner,
    186 	loc: *lex::location,
    187 ) (paragraph | ...error | utf8::invalid) = {
    188 	let p: paragraph = [];
    189 	let s = memio::dynamic();
    190 	defer io::close(&s)!;
    191 	let state = state::NORMAL;
    192 
    193 	for (let r => bufio::scan_rune(sc)?) {
    194 		switch (r) {
    195 		case '\t' =>
    196 			if (state == state::NEWLINE && loc.col <= 1) {
    197 				bufio::unreadrune(sc, r);
    198 				break;
    199 			};
    200 			loc.col += 8 - loc.col % 8;
    201 			if (state == state::NORMAL) {
    202 				state = state::SPACE;
    203 			};
    204 			continue;
    205 		case '\n' =>
    206 			loc.line += 1;
    207 			loc.col = 0;
    208 			if (state == state::NEWLINE) {
    209 				break;
    210 			};
    211 			state = state::NEWLINE;
    212 			continue;
    213 		case ' ' =>
    214 			loc.col += 1;
    215 			if (state == state::NORMAL) {
    216 				state = state::SPACE;
    217 			};
    218 			continue;
    219 		case '-' =>
    220 			if (state != state::NEWLINE || loc.col > 1) yield;
    221 			// XXX: we may want to reconsider if recognizing '-'
    222 			// here is too lenient (what if a line begins with a
    223 			// negative number?)
    224 			bufio::unreadrune(sc, r);
    225 			break;
    226 		case => void;
    227 		};
    228 
    229 		if (state != state::NORMAL) {
    230 			memio::appendrune(&s, ' ')!;
    231 		};
    232 		state = state::NORMAL;
    233 		loc.col += 1;
    234 
    235 		if (r != '[') {
    236 			memio::appendrune(&s, r)!;
    237 			continue;
    238 		};
    239 
    240 		r = match (bufio::scan_rune(sc)?) {
    241 		case io::EOF =>
    242 			memio::appendrune(&s, '[')!;
    243 			break;
    244 		case let r: rune =>
    245 			yield r;
    246 		};
    247 		if (r != '[') {
    248 			memio::appendrune(&s, '[')!;
    249 			bufio::unreadrune(sc, r);
    250 			continue;
    251 		};
    252 
    253 		loc.col += 1;
    254 		const part = memio::string(&s)!;
    255 		if (part != "") {
    256 			append(p, strings::dup(part));
    257 			memio::reset(&s);
    258 		};
    259 
    260 		let lexer = lex::init(sc, loc.path);
    261 		const (ident, mod) = match (parse::ident_trailing(&lexer)) {
    262 		case let id: (ast::ident, bool) =>
    263 			yield id;
    264 		case let err: lex::syntax =>
    265 			if (err.0.line == 1) {
    266 				err.0.col += loc.col - 1;
    267 			};
    268 			err.0.line += loc.line - 1;
    269 			return err;
    270 		case let err: io::error =>
    271 			return err;
    272 		};
    273 
    274 		// intentionally not using lex::mkloc, so whitespace is
    275 		// accounted for
    276 		if (lexer.loc.0 == 1) {
    277 			loc.col += lexer.loc.1 - 1;
    278 		} else {
    279 			loc.col = 0;
    280 		};
    281 		loc.line += lexer.loc.0 - 1;
    282 
    283 		append(p, if (mod) ident: mod_ref else ident: decl_ref);
    284 
    285 		if (lexer.un.0 == lex::ltok::RBRACKET) {
    286 			match (bufio::scan_rune(sc)?) {
    287 			case io::EOF => void;
    288 			case let r: rune =>
    289 				if (r == ']') {
    290 					loc.col += 1;
    291 					continue;
    292 				};
    293 			};
    294 		};
    295 		return lex::syntaxerr(*loc, "Unterminated reference");
    296 	};
    297 
    298 	const part = memio::string(&s)!;
    299 	if (part != "") {
    300 		append(p, strings::dup(part));
    301 	};
    302 	return p;
    303 };
    304 
    305 // Frees resources associated with a [[doc]].
    306 export fn freeall(doc: doc) void = {
    307 	for (let d .. doc) {
    308 		match (d) {
    309 		case let p: paragraph =>
    310 			free_paragraph(p);
    311 		case let l: list =>
    312 			for (let p .. l) {
    313 				free_paragraph(p);
    314 			};
    315 			free(l);
    316 		case let c: code_sample =>
    317 			free(c);
    318 		};
    319 	};
    320 	free(doc);
    321 };
    322 
    323 fn free_paragraph(p: paragraph) void = {
    324 	for (let entry .. p) {
    325 		match (entry) {
    326 		case let s: str =>
    327 			free(s);
    328 		case let d: decl_ref =>
    329 			ast::ident_free(d);
    330 		case let m: mod_ref =>
    331 			ast::ident_free(m);
    332 		};
    333 	};
    334 	free(p);
    335 };