hare

The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

commit f8048538f650a7560853d51b619cd93a7c239786
parent 066afc4a432fa935705b157827e49aaa388712d1
Author: Sebastian <sebastian@sebsite.pw>
Date:   Sat,  2 Apr 2022 00:03:27 -0400

lex, parse, unparse, haredoc: parse comments in structs, unions, and enums

The hare and tty backends of haredoc are supported. Support for
haredoc's html backend isn't included in this commit.

Signed-off-by: Sebastian <sebastian@sebsite.pw>

Diffstat:
Mcmd/haredoc/html.ha | 3+++
Mcmd/haredoc/tty.ha | 71+++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Mhare/ast/expr.ha | 3+++
Mhare/ast/type.ha | 5+++++
Mhare/lex/+test.ha | 2+-
Mhare/lex/lex.ha | 92+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mhare/lex/token.ha | 2++
Mhare/parse/+test/types.ha | 7+++++--
Mhare/parse/type.ha | 24+++++++++++++++++++++---
Mhare/unparse/type.ha | 40++++++++++++++++++++++++++++++++++++----
Mscripts/gen-stdlib | 2+-
Mstdlib.mk | 4++--
12 files changed, 201 insertions(+), 54 deletions(-)

diff --git a/cmd/haredoc/html.ha b/cmd/haredoc/html.ha @@ -478,6 +478,9 @@ fn newline(out: io::handle, indent: size) (size | io::error) = { return n; }; +fn multiline_comment(s: str) bool = + strings::byteindex(s, '\n') as size != len(s) - 1; + fn enum_html( out: io::handle, indent: size, diff --git a/cmd/haredoc/tty.ha b/cmd/haredoc/tty.ha @@ -24,11 +24,10 @@ fn emit_tty(ctx: *context) (void | error) = { for (true) match (bufio::scanline(readme)?) { case io::EOF => break; case let b: []u8 => + defer free(b); firstline = false; - fmt::fprintfln(ctx.out, - "\x1b[1m" "// {}" "\x1b[0m", - strings::fromutf8(b))?; - free(b); + insert(b[0], ' '); + comment_tty(ctx.out, strings::fromutf8(b))?; }; case void => void; }; @@ -51,6 +50,27 @@ fn emit_tty(ctx: *context) (void | error) = { }; }; +fn comment_tty(out: io::handle, s: str) (size | io::error) = { + return fmt::fprintfln(out, "\x1b[1m" "//{}" "\x1b[0m", s)?; +}; + +fn docs_tty(out: io::handle, s: str, indent: size) (size | io::error) = { + const iter = strings::tokenize(s, "\n"); + let z = 0z; + for (true) match (strings::next_token(&iter)) { + case let s: str => + if (!(strings::peek_token(&iter) is void)) { + z += comment_tty(out, s)?; + for (let i = 0z; i < indent; i += 1) { + z += fmt::fprint(out, "\t")?; + }; + }; + case void => break; + }; + + return z; +}; + fn isws(s: str) bool = { const iter = strings::iter(s); for (true) { @@ -75,18 +95,7 @@ fn details_tty(ctx: *context, decl: ast::decl) (void | error) = { }; firstline = false; - const iter = strings::tokenize(decl.docs, "\n"); - for (true) { - match (strings::next_token(&iter)) { - case let s: str => - if (!(strings::peek_token(&iter) is void)) { - fmt::fprintfln(ctx.out, - "\x1b[1m" "//{}" "\x1b[0m", s)?; - }; - case void => break; - }; - }; - + docs_tty(ctx.out, decl.docs, 0)?; unparse_tty(ctx.out, decl)?; fmt::fprintln(ctx.out)?; }; @@ -194,7 +203,7 @@ fn prototype_tty( return n; }; -// newline() and builtin_type() are from cmd/haredoc/html.ha +// newline(), builtin_type(), and multiline_comment() are from html.ha // Forked from [[hare::unparse]] fn struct_union_type_tty( @@ -217,6 +226,9 @@ fn struct_union_type_tty( indent += 1z; for (let i = 0z; i < len(membs); i += 1) { z += newline(out, indent)?; + if (membs[i].docs != "") { + z += docs_tty(out, membs[i].docs, indent)?; + }; match (membs[i]._offset) { case null => void; @@ -278,11 +290,22 @@ fn type_tty( "\x1b[36m" "{} " "\x1b[0m", unparse::builtin_type(e.storage))?; }; - n += fmt::fprint(out, "{")?; + n += fmt::fprintln(out, "{")?; indent += 1; for (let i = 0z; i < len(e.values); i += 1) { - n += newline(out, indent)?; + for (let i = 0z; i < indent; i += 1) { + n += fmt::fprint(out, "\t")?; + }; let value = e.values[i]; + let wrotedocs = false; + if (value.docs != "") { + // Check if comment should go above or next to + // field + if (multiline_comment(value.docs)) { + n += docs_tty(out, value.docs, indent)?; + wrotedocs = true; + }; + }; n += fmt::fprint(out, value.name)?; match (value.value) { case null => void; @@ -291,9 +314,17 @@ fn type_tty( n += unparse::expr(out, indent, *e)?; }; n += fmt::fprint(out, ",")?; + if (value.docs != "" && !wrotedocs) { + n += fmt::fprint(out, " ")?; + n += docs_tty(out, value.docs, 0)?; + } else { + n += fmt::fprintln(out)?; + }; }; indent -= 1; - n += newline(out, indent)?; + for (let i = 0z; i < indent; i += 1) { + n += fmt::fprint(out, "\t")?; + }; n += fmt::fprint(out, "}")?; case let f: ast::func_type => if (f.attrs & ast::func_attrs::NORETURN != 0) { diff --git a/hare/ast/expr.ha b/hare/ast/expr.ha @@ -92,6 +92,7 @@ export type assign_expr = struct { // A binary arithmetic operator export type binarithm_op = enum { // TODO: Rehome this with the checked AST? + BAND, // & BOR, // | DIV, // / @@ -157,6 +158,7 @@ export type call_expr = struct { // The kind of cast expression being used. export type cast_kind = enum { // TODO: Should this be rehomed with the checked AST? + CAST, ASSERTION, TEST, @@ -363,6 +365,7 @@ export type switch_expr = struct { // A unary operator export type unarithm_op = enum { // TODO: Should this be rehomed with the checked AST? + ADDR, // & BNOT, // ~ DEREF, // * diff --git a/hare/ast/type.ha b/hare/ast/type.ha @@ -21,6 +21,7 @@ export type enum_field = struct { name: str, value: nullable *expr, loc: lex::location, + docs: str, }; // enum { FOO = 0, BAR, ... } @@ -100,6 +101,9 @@ export type struct_alias = ident; export type struct_member = struct { _offset: nullable *expr, member: (struct_field | struct_embedded | struct_alias), + + // Only valid if the lexer has comments enabled + docs: str, }; // struct { ... } @@ -139,6 +143,7 @@ fn struct_type_free(t: (struct_type | union_type)) void = { yield u: []struct_member; }; for (let i = 0z; i < len(membs); i += 1) { + free(membs[i].docs); match (membs[i]._offset) { case null => void; case let e: *expr => diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha @@ -207,7 +207,7 @@ fn loc(line: uint, col: uint) location = location { assert(lex(&lexer) is token); assert(comment(&lexer) == " foo\n bar\n"); assert(lex(&lexer) is token); - assert(comment(&lexer) == ""); + assert(comment(&lexer) == " baz\n"); assert(lex(&lexer) is token); assert(comment(&lexer) == " bad\n"); }; diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha @@ -141,6 +141,8 @@ export fn lex(lex: *lexer) (token | error) = { case => return syntaxerr(r.1, "invalid character"); }; + + line_comment(lex)?; return (tok, void, r.1); }; @@ -249,6 +251,7 @@ fn lex_string(lex: *lexer, loc: location, delim: rune) (token | error) = { unget(lex, r); }; }; + line_comment(lex)?; return (ltok::LIT_STR, strio::string(&buf), loc); }; @@ -278,6 +281,7 @@ fn lex_rn_str(lex: *lexer) (token | error) = { return syntaxerr(n.1, "expected \"\'\""); }; }; + line_comment(lex)?; return ret; }; @@ -301,6 +305,8 @@ fn lex_name(lex: *lexer, loc: location, label: bool) (token | error) = { strio::appendrune(&buf, r.0)?; }; + line_comment(lex)?; + let n = strio::string(&buf); if (label) { return (ltok::LABEL, n, loc); @@ -318,7 +324,35 @@ fn lex_name(lex: *lexer, loc: location, label: bool) (token | error) = { }; }; -fn lex_comment(lexr: *lexer) (token | error) = { +fn line_comment(lex: *lexer) (void | error) = { + if (lex.flags & flags::COMMENTS != flags::COMMENTS) { + return; + }; + + let r: (rune, location) = ('\0', location { ... }); + for (true) match (try(lex, '\t', ' ', '/')?) { + case void => + return; + case let v: (rune, location) => + switch (v.0) { + case '\t', ' ' => void; + case '/' => + r = v; + break; + }; + }; + + if (try(lex, '/')? is void) { + unget(lex, r); + return; + }; + + free(lex.comment); + lex.comment = ""; + lex_comment(lex)?; +}; + +fn lex_comment(lexr: *lexer) (void | error) = { if (lexr.flags & flags::COMMENTS != flags::COMMENTS) { for (true) match (next(lexr)?) { case io::EOF => @@ -328,7 +362,7 @@ fn lex_comment(lexr: *lexer) (token | error) = { break; }; }; - return lex(lexr); + return; }; let buf = strio::dynamic(); @@ -345,7 +379,6 @@ fn lex_comment(lexr: *lexer) (token | error) = { let new = strings::concat(lexr.comment, strio::string(&buf)); free(lexr.comment); lexr.comment = new; - return lex(lexr); }; fn lex_literal(lex: *lexer) (token | error) = { @@ -574,26 +607,29 @@ fn lex_literal(lex: *lexer) (token | error) = { return syntaxerr(loc, "overflow in exponent"); }; + line_comment(lex)?; return (suff, val, loc); }; -fn lex2(lex: *lexer) (token | error) = { - let first = next(lex)? as (rune, location); +fn lex2(lexr: *lexer) (token | error) = { + let first = next(lexr)? as (rune, location); let tok: (ltok, [](rune, ltok)) = switch (first.0) { case '*' => yield (ltok::TIMES, [('=', ltok::TIMESEQ)]); case '%' => yield (ltok::MODULO, [('=', ltok::MODEQ)]); case '/' => - match (next(lex)?) { + match (next(lexr)?) { case let r: (rune, location) => switch (r.0) { case '=' => + line_comment(lexr)?; return (ltok::DIVEQ, void, first.1); case '/' => - return lex_comment(lex); + lex_comment(lexr)?; + return lex(lexr); case => - unget(lex, r); + unget(lexr, r); return (ltok::DIV, void, first.1); }; case io::EOF => @@ -602,18 +638,20 @@ fn lex2(lex: *lexer) (token | error) = { case '+' => yield (ltok::PLUS, [('=', ltok::PLUSEQ)]); case '-' => - match (next(lex)?) { + match (next(lexr)?) { case let r: (rune, location) => switch (r.0) { case '=' => + line_comment(lexr)?; return (ltok::MINUSEQ, void, first.1); case => if (ascii::isdigit(r.0)) { - unget(lex, r); - unget(lex, first); - return lex_literal(lex); + unget(lexr, r); + unget(lexr, first); + return lex_literal(lexr); } else { - unget(lex, r); + unget(lexr, r); + line_comment(lexr)?; return (ltok::MINUS, void, first.1); }; }; @@ -621,15 +659,17 @@ fn lex2(lex: *lexer) (token | error) = { return (ltok::MINUS, void, first.1); }; case ':' => - match (next(lex)?) { + match (next(lexr)?) { case let r: (rune, location) => switch (r.0) { case ':' => + line_comment(lexr)?; return (ltok::DOUBLE_COLON, void, first.1); case => - unget(lex, r); + unget(lexr, r); + line_comment(lexr)?; return if (is_name(r.0, false)) { - yield lex_name(lex, first.1, true)?; + yield lex_name(lexr, first.1, true)?; } else (ltok::COLON, void, first.1); }; case io::EOF => @@ -642,14 +682,16 @@ fn lex2(lex: *lexer) (token | error) = { case => return syntaxerr(first.1, "unknown token sequence"); }; - match (next(lex)?) { + match (next(lexr)?) { case let r: (rune, location) => for (let i = 0z; i < len(tok.1); i += 1) { if (tok.1[i].0 == r.0) { + line_comment(lexr)?; return (tok.1[i].1, void, first.1); }; }; - unget(lex, r); + unget(lexr, r); + line_comment(lexr)?; case io::EOF => void; }; return (tok.0, void, first.1); @@ -662,6 +704,7 @@ fn lex3(lex: *lexer) (token | error) = { let tok = if (try(lex, '.')? is void) ltok::DOT else if (try(lex, '.')? is void) ltok::SLICE else ltok::ELLIPSIS; + line_comment(lex)?; return (tok, void, r.1); case '<' => yield [ltok::LESS, ltok::LESSEQ, ltok::LSHIFT, ltok::LSHIFTEQ]; @@ -693,6 +736,7 @@ fn lex3(lex: *lexer) (token | error) = { }; }; }; + line_comment(lex)?; return (toks[idx], void, r.1); }; @@ -731,12 +775,18 @@ fn nextw(lex: *lexer) ((rune, location) | io::EOF | io::error) = { case let e: (io::error | io::EOF) => return e; case let r: (rune, location) => - if (!ascii::isspace(r.0)) { - return r; - } else { + if (ascii::isspace(r.0)) { + if (r.0 == '\n') { + free(lex.comment); + lex.comment = ""; + }; + continue; + }; + if (!is_name(r.0, true) && r.0 != '/') { free(lex.comment); lex.comment = ""; }; + return r; }; abort(); }; diff --git a/hare/lex/token.ha b/hare/lex/token.ha @@ -9,6 +9,7 @@ use strings; export type ltok = enum uint { // Keep ordered with bmap // Alpha sorted + ATTR_FINI, ATTR_INIT, ATTR_NORETURN, @@ -75,6 +76,7 @@ export type ltok = enum uint { LAST_KEYWORD = YIELD, // Operators + ARROW, BAND, BANDEQ, diff --git a/hare/parse/+test/types.ha b/hare/parse/+test/types.ha @@ -8,6 +8,7 @@ }; export type bar = union { x: int, + // docs docs docs y: int, }; export type baz = struct { @@ -31,9 +32,11 @@ export type bat = [void]int; @test fn enum_type() void = { roundtrip("export type foo = enum { X = void, + // foo + // bar Y = void, - Z, - Q, + Z, // foo + Q, // bar }; export type bar = enum uint { X = void, diff --git a/hare/parse/type.ha b/hare/parse/type.ha @@ -6,6 +6,7 @@ use hare::ast; use hare::ast::{builtin_type}; use hare::lex; use hare::lex::{ltok}; +use strings; fn prototype(lexer: *lex::lexer) (ast::func_type | error) = { let variadism = ast::variadism::NONE; @@ -246,10 +247,13 @@ fn struct_union_type(lexer: *lex::lexer) (ast::_type | error) = { break; }; + let comment = ""; + let offs: nullable *ast::expr = match (try(lexer, ltok::ATTR_OFFSET)?) { case void => yield null; case lex::token => + comment = strings::dup(lex::comment(lexer)); want(lexer, ltok::LPAREN)?; let ex = expr(lexer)?; want(lexer, ltok::RPAREN)?; @@ -257,10 +261,13 @@ fn struct_union_type(lexer: *lex::lexer) (ast::_type | error) = { }; let tok = want(lexer, ltok::NAME, ltok::STRUCT, ltok::UNION)?; + if (comment == "") { + comment = strings::dup(lex::comment(lexer)); + }; switch (tok.0) { case ltok::NAME => lex::unlex(lexer, tok); - let memb = struct_embed_or_field(lexer, offs)?; + let memb = struct_embed_or_field(lexer, offs, comment)?; append(membs, memb); case ltok::STRUCT, ltok::UNION => lex::unlex(lexer, tok); @@ -268,6 +275,7 @@ fn struct_union_type(lexer: *lex::lexer) (ast::_type | error) = { append(membs, ast::struct_member { _offset = offs, member = alloc(subtype), + docs = comment, }); case => abort(); }; @@ -296,6 +304,7 @@ fn struct_union_type(lexer: *lex::lexer) (ast::_type | error) = { fn struct_embed_or_field( lexer: *lex::lexer, offs: nullable *ast::expr, + comment: str, ) (ast::struct_member | error) = { // Disambiguates between `name: type` and `identifier` // @@ -321,6 +330,7 @@ fn struct_embed_or_field( return ast::struct_member { _offset = offs, member = field, + docs = comment, }; case ltok::DOUBLE_COLON => let id = ident(lexer)?; @@ -333,6 +343,7 @@ fn struct_embed_or_field( return ast::struct_member { _offset = offs, member = id: ast::struct_alias, + docs = comment, }; }; @@ -399,19 +410,26 @@ fn enum_type(lexer: *lex::lexer) (ast::_type | error) = { const loc = lex::mkloc(lexer); let name = want(lexer, ltok::NAME)?; + let comment = strings::dup(lex::comment(lexer)); let value: nullable *ast::expr = if (try(lexer, ltok::EQUAL) is lex::token) alloc(expr(lexer)?) else null; - append(membs, ast::enum_field { + defer append(membs, ast::enum_field { name = name.1 as str, value = value, loc = loc, + docs = comment, }); switch (want(lexer, ltok::COMMA, ltok::RBRACE)?.0) { - case ltok::COMMA => void; + case ltok::COMMA => + const linecomment = lex::comment(lexer); + if (linecomment != "") { + free(comment); + comment = strings::dup(linecomment); + }; case ltok::RBRACE => break; case => abort(); }; diff --git a/hare/unparse/type.ha b/hare/unparse/type.ha @@ -6,6 +6,7 @@ use fmt; use io; use hare::ast; use hare::lex; +use strings; use strio; // Returns a builtin type as a string. @@ -101,7 +102,13 @@ fn struct_union_type( indent += 1z; for (let i = 0z; i < len(membs); i += 1) { - z += newline(out, indent)?; + z += fmt::fprintln(out)?; + if (membs[i].docs != "") { + z += comment(out, membs[i].docs, indent)?; + }; + for (let i = 0z; i < indent; i += 1) { + z += fmt::fprint(out, "\t")?; + }; match (membs[i]._offset) { case null => void; @@ -130,7 +137,10 @@ fn struct_union_type( return z; }; -// Unparses a [[hare::ast::_type]]. +fn multiline_comment(s: str) bool = + strings::byteindex(s, '\n') as size != len(s) - 1; + +// Unparses an [[ast::_type]]. export fn _type( out: io::handle, indent: size, @@ -160,9 +170,21 @@ export fn _type( n += fmt::fprint(out, "enum {")?; }; indent += 1; + n += fmt::fprintln(out)?; for (let i = 0z; i < len(e.values); i += 1) { - n += newline(out, indent)?; let value = e.values[i]; + let wrotedocs = false; + if (value.docs != "") { + // Check if comment should go above or next to + // field + if (multiline_comment(value.docs)) { + n += comment(out, value.docs, indent)?; + wrotedocs = true; + }; + }; + for (let i = 0z; i < indent; i += 1) { + n += fmt::fprint(out, "\t")?; + }; n += fmt::fprint(out, value.name)?; match (value.value) { case null => void; @@ -171,9 +193,17 @@ export fn _type( n += expr(out, indent, *e)?; }; n += fmt::fprint(out, ",")?; + if (value.docs != "" && !wrotedocs) { + n += fmt::fprint(out, " ")?; + n += comment(out, value.docs, 0)?; + } else { + n += fmt::fprintln(out)?; + }; }; indent -= 1; - n += newline(out, indent)?; + for (let i = 0z; i < indent; i += 1) { + n += fmt::fprint(out, "\t")?; + }; n += fmt::fprint(out, "}")?; case let f: ast::func_type => if (f.attrs & ast::func_attrs::NORETURN != 0) { @@ -311,11 +341,13 @@ fn type_test(t: ast::_type, expected: str) bool = { name = "FOO", value = null, loc = loc, + docs = "", }, ast::enum_field { name = "BAR", value = &expr_void, loc = loc, + docs = "", }, ], }; diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib @@ -613,7 +613,7 @@ hare_unparse() { type.ha \ unit.ha \ util.ha - gen_ssa hare::unparse fmt io strio hare::ast + gen_ssa hare::unparse fmt io strings strio hare::ast } gensrcs_hare_lex() { diff --git a/stdlib.mk b/stdlib.mk @@ -1166,7 +1166,7 @@ stdlib_hare_unparse_any_srcs= \ $(STDLIB)/hare/unparse/unit.ha \ $(STDLIB)/hare/unparse/util.ha -$(HARECACHE)/hare/unparse/hare_unparse-any.ssa: $(stdlib_hare_unparse_any_srcs) $(stdlib_rt) $(stdlib_fmt_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_hare_ast_$(PLATFORM)) +$(HARECACHE)/hare/unparse/hare_unparse-any.ssa: $(stdlib_hare_unparse_any_srcs) $(stdlib_rt) $(stdlib_fmt_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_hare_ast_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(HARECACHE)/hare/unparse @HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nhare::unparse \ @@ -3075,7 +3075,7 @@ testlib_hare_unparse_any_srcs= \ $(STDLIB)/hare/unparse/unit.ha \ $(STDLIB)/hare/unparse/util.ha -$(TESTCACHE)/hare/unparse/hare_unparse-any.ssa: $(testlib_hare_unparse_any_srcs) $(testlib_rt) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_hare_ast_$(PLATFORM)) +$(TESTCACHE)/hare/unparse/hare_unparse-any.ssa: $(testlib_hare_unparse_any_srcs) $(testlib_rt) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_hare_ast_$(PLATFORM)) @printf 'HAREC \t$@\n' @mkdir -p $(TESTCACHE)/hare/unparse @HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nhare::unparse \