commit f8048538f650a7560853d51b619cd93a7c239786
parent 066afc4a432fa935705b157827e49aaa388712d1
Author: Sebastian <sebastian@sebsite.pw>
Date: Sat, 2 Apr 2022 00:03:27 -0400
lex, parse, unparse, haredoc: parse comments in structs, unions, and enums
The hare and tty backends of haredoc are supported. Support for
haredoc's html backend isn't included in this commit.
Signed-off-by: Sebastian <sebastian@sebsite.pw>
Diffstat:
12 files changed, 201 insertions(+), 54 deletions(-)
diff --git a/cmd/haredoc/html.ha b/cmd/haredoc/html.ha
@@ -478,6 +478,9 @@ fn newline(out: io::handle, indent: size) (size | io::error) = {
return n;
};
+fn multiline_comment(s: str) bool =
+ strings::byteindex(s, '\n') as size != len(s) - 1;
+
fn enum_html(
out: io::handle,
indent: size,
diff --git a/cmd/haredoc/tty.ha b/cmd/haredoc/tty.ha
@@ -24,11 +24,10 @@ fn emit_tty(ctx: *context) (void | error) = {
for (true) match (bufio::scanline(readme)?) {
case io::EOF => break;
case let b: []u8 =>
+ defer free(b);
firstline = false;
- fmt::fprintfln(ctx.out,
- "\x1b[1m" "// {}" "\x1b[0m",
- strings::fromutf8(b))?;
- free(b);
+ insert(b[0], ' ');
+ comment_tty(ctx.out, strings::fromutf8(b))?;
};
case void => void;
};
@@ -51,6 +50,27 @@ fn emit_tty(ctx: *context) (void | error) = {
};
};
+fn comment_tty(out: io::handle, s: str) (size | io::error) = {
+ return fmt::fprintfln(out, "\x1b[1m" "//{}" "\x1b[0m", s)?;
+};
+
+fn docs_tty(out: io::handle, s: str, indent: size) (size | io::error) = {
+ const iter = strings::tokenize(s, "\n");
+ let z = 0z;
+ for (true) match (strings::next_token(&iter)) {
+ case let s: str =>
+ if (!(strings::peek_token(&iter) is void)) {
+ z += comment_tty(out, s)?;
+ for (let i = 0z; i < indent; i += 1) {
+ z += fmt::fprint(out, "\t")?;
+ };
+ };
+ case void => break;
+ };
+
+ return z;
+};
+
fn isws(s: str) bool = {
const iter = strings::iter(s);
for (true) {
@@ -75,18 +95,7 @@ fn details_tty(ctx: *context, decl: ast::decl) (void | error) = {
};
firstline = false;
- const iter = strings::tokenize(decl.docs, "\n");
- for (true) {
- match (strings::next_token(&iter)) {
- case let s: str =>
- if (!(strings::peek_token(&iter) is void)) {
- fmt::fprintfln(ctx.out,
- "\x1b[1m" "//{}" "\x1b[0m", s)?;
- };
- case void => break;
- };
- };
-
+ docs_tty(ctx.out, decl.docs, 0)?;
unparse_tty(ctx.out, decl)?;
fmt::fprintln(ctx.out)?;
};
@@ -194,7 +203,7 @@ fn prototype_tty(
return n;
};
-// newline() and builtin_type() are from cmd/haredoc/html.ha
+// newline(), builtin_type(), and multiline_comment() are from html.ha
// Forked from [[hare::unparse]]
fn struct_union_type_tty(
@@ -217,6 +226,9 @@ fn struct_union_type_tty(
indent += 1z;
for (let i = 0z; i < len(membs); i += 1) {
z += newline(out, indent)?;
+ if (membs[i].docs != "") {
+ z += docs_tty(out, membs[i].docs, indent)?;
+ };
match (membs[i]._offset) {
case null => void;
@@ -278,11 +290,22 @@ fn type_tty(
"\x1b[36m" "{} " "\x1b[0m",
unparse::builtin_type(e.storage))?;
};
- n += fmt::fprint(out, "{")?;
+ n += fmt::fprintln(out, "{")?;
indent += 1;
for (let i = 0z; i < len(e.values); i += 1) {
- n += newline(out, indent)?;
+ for (let i = 0z; i < indent; i += 1) {
+ n += fmt::fprint(out, "\t")?;
+ };
let value = e.values[i];
+ let wrotedocs = false;
+ if (value.docs != "") {
+ // Check if comment should go above or next to
+ // field
+ if (multiline_comment(value.docs)) {
+ n += docs_tty(out, value.docs, indent)?;
+ wrotedocs = true;
+ };
+ };
n += fmt::fprint(out, value.name)?;
match (value.value) {
case null => void;
@@ -291,9 +314,17 @@ fn type_tty(
n += unparse::expr(out, indent, *e)?;
};
n += fmt::fprint(out, ",")?;
+ if (value.docs != "" && !wrotedocs) {
+ n += fmt::fprint(out, " ")?;
+ n += docs_tty(out, value.docs, 0)?;
+ } else {
+ n += fmt::fprintln(out)?;
+ };
};
indent -= 1;
- n += newline(out, indent)?;
+ for (let i = 0z; i < indent; i += 1) {
+ n += fmt::fprint(out, "\t")?;
+ };
n += fmt::fprint(out, "}")?;
case let f: ast::func_type =>
if (f.attrs & ast::func_attrs::NORETURN != 0) {
diff --git a/hare/ast/expr.ha b/hare/ast/expr.ha
@@ -92,6 +92,7 @@ export type assign_expr = struct {
// A binary arithmetic operator
export type binarithm_op = enum {
// TODO: Rehome this with the checked AST?
+
BAND, // &
BOR, // |
DIV, // /
@@ -157,6 +158,7 @@ export type call_expr = struct {
// The kind of cast expression being used.
export type cast_kind = enum {
// TODO: Should this be rehomed with the checked AST?
+
CAST,
ASSERTION,
TEST,
@@ -363,6 +365,7 @@ export type switch_expr = struct {
// A unary operator
export type unarithm_op = enum {
// TODO: Should this be rehomed with the checked AST?
+
ADDR, // &
BNOT, // ~
DEREF, // *
diff --git a/hare/ast/type.ha b/hare/ast/type.ha
@@ -21,6 +21,7 @@ export type enum_field = struct {
name: str,
value: nullable *expr,
loc: lex::location,
+ docs: str,
};
// enum { FOO = 0, BAR, ... }
@@ -100,6 +101,9 @@ export type struct_alias = ident;
export type struct_member = struct {
_offset: nullable *expr,
member: (struct_field | struct_embedded | struct_alias),
+
+ // Only valid if the lexer has comments enabled
+ docs: str,
};
// struct { ... }
@@ -139,6 +143,7 @@ fn struct_type_free(t: (struct_type | union_type)) void = {
yield u: []struct_member;
};
for (let i = 0z; i < len(membs); i += 1) {
+ free(membs[i].docs);
match (membs[i]._offset) {
case null => void;
case let e: *expr =>
diff --git a/hare/lex/+test.ha b/hare/lex/+test.ha
@@ -207,7 +207,7 @@ fn loc(line: uint, col: uint) location = location {
assert(lex(&lexer) is token);
assert(comment(&lexer) == " foo\n bar\n");
assert(lex(&lexer) is token);
- assert(comment(&lexer) == "");
+ assert(comment(&lexer) == " baz\n");
assert(lex(&lexer) is token);
assert(comment(&lexer) == " bad\n");
};
diff --git a/hare/lex/lex.ha b/hare/lex/lex.ha
@@ -141,6 +141,8 @@ export fn lex(lex: *lexer) (token | error) = {
case =>
return syntaxerr(r.1, "invalid character");
};
+
+ line_comment(lex)?;
return (tok, void, r.1);
};
@@ -249,6 +251,7 @@ fn lex_string(lex: *lexer, loc: location, delim: rune) (token | error) = {
unget(lex, r);
};
};
+ line_comment(lex)?;
return (ltok::LIT_STR, strio::string(&buf), loc);
};
@@ -278,6 +281,7 @@ fn lex_rn_str(lex: *lexer) (token | error) = {
return syntaxerr(n.1, "expected \"\'\"");
};
};
+ line_comment(lex)?;
return ret;
};
@@ -301,6 +305,8 @@ fn lex_name(lex: *lexer, loc: location, label: bool) (token | error) = {
strio::appendrune(&buf, r.0)?;
};
+ line_comment(lex)?;
+
let n = strio::string(&buf);
if (label) {
return (ltok::LABEL, n, loc);
@@ -318,7 +324,35 @@ fn lex_name(lex: *lexer, loc: location, label: bool) (token | error) = {
};
};
-fn lex_comment(lexr: *lexer) (token | error) = {
+fn line_comment(lex: *lexer) (void | error) = {
+ if (lex.flags & flags::COMMENTS != flags::COMMENTS) {
+ return;
+ };
+
+ let r: (rune, location) = ('\0', location { ... });
+ for (true) match (try(lex, '\t', ' ', '/')?) {
+ case void =>
+ return;
+ case let v: (rune, location) =>
+ switch (v.0) {
+ case '\t', ' ' => void;
+ case '/' =>
+ r = v;
+ break;
+ };
+ };
+
+ if (try(lex, '/')? is void) {
+ unget(lex, r);
+ return;
+ };
+
+ free(lex.comment);
+ lex.comment = "";
+ lex_comment(lex)?;
+};
+
+fn lex_comment(lexr: *lexer) (void | error) = {
if (lexr.flags & flags::COMMENTS != flags::COMMENTS) {
for (true) match (next(lexr)?) {
case io::EOF =>
@@ -328,7 +362,7 @@ fn lex_comment(lexr: *lexer) (token | error) = {
break;
};
};
- return lex(lexr);
+ return;
};
let buf = strio::dynamic();
@@ -345,7 +379,6 @@ fn lex_comment(lexr: *lexer) (token | error) = {
let new = strings::concat(lexr.comment, strio::string(&buf));
free(lexr.comment);
lexr.comment = new;
- return lex(lexr);
};
fn lex_literal(lex: *lexer) (token | error) = {
@@ -574,26 +607,29 @@ fn lex_literal(lex: *lexer) (token | error) = {
return syntaxerr(loc, "overflow in exponent");
};
+ line_comment(lex)?;
return (suff, val, loc);
};
-fn lex2(lex: *lexer) (token | error) = {
- let first = next(lex)? as (rune, location);
+fn lex2(lexr: *lexer) (token | error) = {
+ let first = next(lexr)? as (rune, location);
let tok: (ltok, [](rune, ltok)) = switch (first.0) {
case '*' =>
yield (ltok::TIMES, [('=', ltok::TIMESEQ)]);
case '%' =>
yield (ltok::MODULO, [('=', ltok::MODEQ)]);
case '/' =>
- match (next(lex)?) {
+ match (next(lexr)?) {
case let r: (rune, location) =>
switch (r.0) {
case '=' =>
+ line_comment(lexr)?;
return (ltok::DIVEQ, void, first.1);
case '/' =>
- return lex_comment(lex);
+ lex_comment(lexr)?;
+ return lex(lexr);
case =>
- unget(lex, r);
+ unget(lexr, r);
return (ltok::DIV, void, first.1);
};
case io::EOF =>
@@ -602,18 +638,20 @@ fn lex2(lex: *lexer) (token | error) = {
case '+' =>
yield (ltok::PLUS, [('=', ltok::PLUSEQ)]);
case '-' =>
- match (next(lex)?) {
+ match (next(lexr)?) {
case let r: (rune, location) =>
switch (r.0) {
case '=' =>
+ line_comment(lexr)?;
return (ltok::MINUSEQ, void, first.1);
case =>
if (ascii::isdigit(r.0)) {
- unget(lex, r);
- unget(lex, first);
- return lex_literal(lex);
+ unget(lexr, r);
+ unget(lexr, first);
+ return lex_literal(lexr);
} else {
- unget(lex, r);
+ unget(lexr, r);
+ line_comment(lexr)?;
return (ltok::MINUS, void, first.1);
};
};
@@ -621,15 +659,17 @@ fn lex2(lex: *lexer) (token | error) = {
return (ltok::MINUS, void, first.1);
};
case ':' =>
- match (next(lex)?) {
+ match (next(lexr)?) {
case let r: (rune, location) =>
switch (r.0) {
case ':' =>
+ line_comment(lexr)?;
return (ltok::DOUBLE_COLON, void, first.1);
case =>
- unget(lex, r);
+ unget(lexr, r);
+ line_comment(lexr)?;
return if (is_name(r.0, false)) {
- yield lex_name(lex, first.1, true)?;
+ yield lex_name(lexr, first.1, true)?;
} else (ltok::COLON, void, first.1);
};
case io::EOF =>
@@ -642,14 +682,16 @@ fn lex2(lex: *lexer) (token | error) = {
case =>
return syntaxerr(first.1, "unknown token sequence");
};
- match (next(lex)?) {
+ match (next(lexr)?) {
case let r: (rune, location) =>
for (let i = 0z; i < len(tok.1); i += 1) {
if (tok.1[i].0 == r.0) {
+ line_comment(lexr)?;
return (tok.1[i].1, void, first.1);
};
};
- unget(lex, r);
+ unget(lexr, r);
+ line_comment(lexr)?;
case io::EOF => void;
};
return (tok.0, void, first.1);
@@ -662,6 +704,7 @@ fn lex3(lex: *lexer) (token | error) = {
let tok = if (try(lex, '.')? is void) ltok::DOT
else if (try(lex, '.')? is void) ltok::SLICE
else ltok::ELLIPSIS;
+ line_comment(lex)?;
return (tok, void, r.1);
case '<' =>
yield [ltok::LESS, ltok::LESSEQ, ltok::LSHIFT, ltok::LSHIFTEQ];
@@ -693,6 +736,7 @@ fn lex3(lex: *lexer) (token | error) = {
};
};
};
+ line_comment(lex)?;
return (toks[idx], void, r.1);
};
@@ -731,12 +775,18 @@ fn nextw(lex: *lexer) ((rune, location) | io::EOF | io::error) = {
case let e: (io::error | io::EOF) =>
return e;
case let r: (rune, location) =>
- if (!ascii::isspace(r.0)) {
- return r;
- } else {
+ if (ascii::isspace(r.0)) {
+ if (r.0 == '\n') {
+ free(lex.comment);
+ lex.comment = "";
+ };
+ continue;
+ };
+ if (!is_name(r.0, true) && r.0 != '/') {
free(lex.comment);
lex.comment = "";
};
+ return r;
};
abort();
};
diff --git a/hare/lex/token.ha b/hare/lex/token.ha
@@ -9,6 +9,7 @@ use strings;
export type ltok = enum uint {
// Keep ordered with bmap
// Alpha sorted
+
ATTR_FINI,
ATTR_INIT,
ATTR_NORETURN,
@@ -75,6 +76,7 @@ export type ltok = enum uint {
LAST_KEYWORD = YIELD,
// Operators
+
ARROW,
BAND,
BANDEQ,
diff --git a/hare/parse/+test/types.ha b/hare/parse/+test/types.ha
@@ -8,6 +8,7 @@
};
export type bar = union {
x: int,
+ // docs docs docs
y: int,
};
export type baz = struct {
@@ -31,9 +32,11 @@ export type bat = [void]int;
@test fn enum_type() void = {
roundtrip("export type foo = enum {
X = void,
+ // foo
+ // bar
Y = void,
- Z,
- Q,
+ Z, // foo
+ Q, // bar
};
export type bar = enum uint {
X = void,
diff --git a/hare/parse/type.ha b/hare/parse/type.ha
@@ -6,6 +6,7 @@ use hare::ast;
use hare::ast::{builtin_type};
use hare::lex;
use hare::lex::{ltok};
+use strings;
fn prototype(lexer: *lex::lexer) (ast::func_type | error) = {
let variadism = ast::variadism::NONE;
@@ -246,10 +247,13 @@ fn struct_union_type(lexer: *lex::lexer) (ast::_type | error) = {
break;
};
+ let comment = "";
+
let offs: nullable *ast::expr = match (try(lexer, ltok::ATTR_OFFSET)?) {
case void =>
yield null;
case lex::token =>
+ comment = strings::dup(lex::comment(lexer));
want(lexer, ltok::LPAREN)?;
let ex = expr(lexer)?;
want(lexer, ltok::RPAREN)?;
@@ -257,10 +261,13 @@ fn struct_union_type(lexer: *lex::lexer) (ast::_type | error) = {
};
let tok = want(lexer, ltok::NAME, ltok::STRUCT, ltok::UNION)?;
+ if (comment == "") {
+ comment = strings::dup(lex::comment(lexer));
+ };
switch (tok.0) {
case ltok::NAME =>
lex::unlex(lexer, tok);
- let memb = struct_embed_or_field(lexer, offs)?;
+ let memb = struct_embed_or_field(lexer, offs, comment)?;
append(membs, memb);
case ltok::STRUCT, ltok::UNION =>
lex::unlex(lexer, tok);
@@ -268,6 +275,7 @@ fn struct_union_type(lexer: *lex::lexer) (ast::_type | error) = {
append(membs, ast::struct_member {
_offset = offs,
member = alloc(subtype),
+ docs = comment,
});
case => abort();
};
@@ -296,6 +304,7 @@ fn struct_union_type(lexer: *lex::lexer) (ast::_type | error) = {
fn struct_embed_or_field(
lexer: *lex::lexer,
offs: nullable *ast::expr,
+ comment: str,
) (ast::struct_member | error) = {
// Disambiguates between `name: type` and `identifier`
//
@@ -321,6 +330,7 @@ fn struct_embed_or_field(
return ast::struct_member {
_offset = offs,
member = field,
+ docs = comment,
};
case ltok::DOUBLE_COLON =>
let id = ident(lexer)?;
@@ -333,6 +343,7 @@ fn struct_embed_or_field(
return ast::struct_member {
_offset = offs,
member = id: ast::struct_alias,
+ docs = comment,
};
};
@@ -399,19 +410,26 @@ fn enum_type(lexer: *lex::lexer) (ast::_type | error) = {
const loc = lex::mkloc(lexer);
let name = want(lexer, ltok::NAME)?;
+ let comment = strings::dup(lex::comment(lexer));
let value: nullable *ast::expr =
if (try(lexer, ltok::EQUAL) is lex::token)
alloc(expr(lexer)?)
else null;
- append(membs, ast::enum_field {
+ defer append(membs, ast::enum_field {
name = name.1 as str,
value = value,
loc = loc,
+ docs = comment,
});
switch (want(lexer, ltok::COMMA, ltok::RBRACE)?.0) {
- case ltok::COMMA => void;
+ case ltok::COMMA =>
+ const linecomment = lex::comment(lexer);
+ if (linecomment != "") {
+ free(comment);
+ comment = strings::dup(linecomment);
+ };
case ltok::RBRACE => break;
case => abort();
};
diff --git a/hare/unparse/type.ha b/hare/unparse/type.ha
@@ -6,6 +6,7 @@ use fmt;
use io;
use hare::ast;
use hare::lex;
+use strings;
use strio;
// Returns a builtin type as a string.
@@ -101,7 +102,13 @@ fn struct_union_type(
indent += 1z;
for (let i = 0z; i < len(membs); i += 1) {
- z += newline(out, indent)?;
+ z += fmt::fprintln(out)?;
+ if (membs[i].docs != "") {
+ z += comment(out, membs[i].docs, indent)?;
+ };
+ for (let i = 0z; i < indent; i += 1) {
+ z += fmt::fprint(out, "\t")?;
+ };
match (membs[i]._offset) {
case null => void;
@@ -130,7 +137,10 @@ fn struct_union_type(
return z;
};
-// Unparses a [[hare::ast::_type]].
+fn multiline_comment(s: str) bool =
+ strings::byteindex(s, '\n') as size != len(s) - 1;
+
+// Unparses an [[ast::_type]].
export fn _type(
out: io::handle,
indent: size,
@@ -160,9 +170,21 @@ export fn _type(
n += fmt::fprint(out, "enum {")?;
};
indent += 1;
+ n += fmt::fprintln(out)?;
for (let i = 0z; i < len(e.values); i += 1) {
- n += newline(out, indent)?;
let value = e.values[i];
+ let wrotedocs = false;
+ if (value.docs != "") {
+ // Check if comment should go above or next to
+ // field
+ if (multiline_comment(value.docs)) {
+ n += comment(out, value.docs, indent)?;
+ wrotedocs = true;
+ };
+ };
+ for (let i = 0z; i < indent; i += 1) {
+ n += fmt::fprint(out, "\t")?;
+ };
n += fmt::fprint(out, value.name)?;
match (value.value) {
case null => void;
@@ -171,9 +193,17 @@ export fn _type(
n += expr(out, indent, *e)?;
};
n += fmt::fprint(out, ",")?;
+ if (value.docs != "" && !wrotedocs) {
+ n += fmt::fprint(out, " ")?;
+ n += comment(out, value.docs, 0)?;
+ } else {
+ n += fmt::fprintln(out)?;
+ };
};
indent -= 1;
- n += newline(out, indent)?;
+ for (let i = 0z; i < indent; i += 1) {
+ n += fmt::fprint(out, "\t")?;
+ };
n += fmt::fprint(out, "}")?;
case let f: ast::func_type =>
if (f.attrs & ast::func_attrs::NORETURN != 0) {
@@ -311,11 +341,13 @@ fn type_test(t: ast::_type, expected: str) bool = {
name = "FOO",
value = null,
loc = loc,
+ docs = "",
},
ast::enum_field {
name = "BAR",
value = &expr_void,
loc = loc,
+ docs = "",
},
],
};
diff --git a/scripts/gen-stdlib b/scripts/gen-stdlib
@@ -613,7 +613,7 @@ hare_unparse() {
type.ha \
unit.ha \
util.ha
- gen_ssa hare::unparse fmt io strio hare::ast
+ gen_ssa hare::unparse fmt io strings strio hare::ast
}
gensrcs_hare_lex() {
diff --git a/stdlib.mk b/stdlib.mk
@@ -1166,7 +1166,7 @@ stdlib_hare_unparse_any_srcs= \
$(STDLIB)/hare/unparse/unit.ha \
$(STDLIB)/hare/unparse/util.ha
-$(HARECACHE)/hare/unparse/hare_unparse-any.ssa: $(stdlib_hare_unparse_any_srcs) $(stdlib_rt) $(stdlib_fmt_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_hare_ast_$(PLATFORM))
+$(HARECACHE)/hare/unparse/hare_unparse-any.ssa: $(stdlib_hare_unparse_any_srcs) $(stdlib_rt) $(stdlib_fmt_$(PLATFORM)) $(stdlib_io_$(PLATFORM)) $(stdlib_strings_$(PLATFORM)) $(stdlib_strio_$(PLATFORM)) $(stdlib_hare_ast_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(HARECACHE)/hare/unparse
@HARECACHE=$(HARECACHE) $(HAREC) $(HAREFLAGS) -o $@ -Nhare::unparse \
@@ -3075,7 +3075,7 @@ testlib_hare_unparse_any_srcs= \
$(STDLIB)/hare/unparse/unit.ha \
$(STDLIB)/hare/unparse/util.ha
-$(TESTCACHE)/hare/unparse/hare_unparse-any.ssa: $(testlib_hare_unparse_any_srcs) $(testlib_rt) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_hare_ast_$(PLATFORM))
+$(TESTCACHE)/hare/unparse/hare_unparse-any.ssa: $(testlib_hare_unparse_any_srcs) $(testlib_rt) $(testlib_fmt_$(PLATFORM)) $(testlib_io_$(PLATFORM)) $(testlib_strings_$(PLATFORM)) $(testlib_strio_$(PLATFORM)) $(testlib_hare_ast_$(PLATFORM))
@printf 'HAREC \t$@\n'
@mkdir -p $(TESTCACHE)/hare/unparse
@HARECACHE=$(TESTCACHE) $(HAREC) $(TESTHAREFLAGS) -o $@ -Nhare::unparse \