hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

token.ha (4103B)


      1 // License: MPL-2.0
      2 // (c) 2021 Bor Grošelj Simić <bor.groseljsimic@telemach.net>
      3 // (c) 2021 Drew DeVault <sir@cmpwn.com>
      4 // (c) 2021 Ember Sawady <ecs@d2evs.net>
      5 use encoding::utf8;
      6 use strings;
      7 
      8 // A lexical token class.
      9 export type ltok = enum uint {
     10 	// Keep ordered with bmap
     11 	// Alpha sorted
     12 
     13 	ATTR_FINI,
     14 	ATTR_INIT,
     15 	ATTR_NORETURN,
     16 	ATTR_OFFSET,
     17 	ATTR_PACKED,
     18 	ATTR_SYMBOL,
     19 	ATTR_TEST,
     20 	ATTR_THREADLOCAL,
     21 	UNDERSCORE,
     22 	ABORT,
     23 	ALIGN,
     24 	ALLOC,
     25 	APPEND,
     26 	AS,
     27 	ASSERT,
     28 	BOOL,
     29 	BREAK,
     30 	CASE,
     31 	CONST,
     32 	CONTINUE,
     33 	DEF,
     34 	DEFER,
     35 	DELETE,
     36 	ELSE,
     37 	ENUM,
     38 	EXPORT,
     39 	F32,
     40 	F64,
     41 	FALSE,
     42 	FN,
     43 	FOR,
     44 	FREE,
     45 	I16,
     46 	I32,
     47 	I64,
     48 	I8,
     49 	IF,
     50 	INSERT,
     51 	INT,
     52 	IS,
     53 	LEN,
     54 	LET,
     55 	MATCH,
     56 	NULL,
     57 	NULLABLE,
     58 	OFFSET,
     59 	RETURN,
     60 	RUNE,
     61 	SIZE,
     62 	STATIC,
     63 	STR,
     64 	STRUCT,
     65 	SWITCH,
     66 	TRUE,
     67 	TYPE,
     68 	U16,
     69 	U32,
     70 	U64,
     71 	U8,
     72 	UINT,
     73 	UINTPTR,
     74 	UNION,
     75 	USE,
     76 	VAARG,
     77 	VAEND,
     78 	VALIST,
     79 	VASTART,
     80 	VOID,
     81 	YIELD,
     82 	LAST_KEYWORD = YIELD,
     83 
     84 	// Operators
     85 
     86 	ARROW,
     87 	BAND,
     88 	BANDEQ,
     89 	BNOT,
     90 	BOR,
     91 	BOREQ,
     92 	BXOR,
     93 	BXOREQ,
     94 	COLON,
     95 	COMMA,
     96 	DIV,
     97 	DIVEQ,
     98 	DOT,
     99 	DOUBLE_COLON,
    100 	ELLIPSIS,
    101 	EQUAL,
    102 	GT,
    103 	GTEQ,
    104 	LAND,
    105 	LANDEQ,
    106 	LBRACE,
    107 	LBRACKET,
    108 	LEQUAL,
    109 	LESS,
    110 	LESSEQ,
    111 	LNOT,
    112 	LOR,
    113 	LOREQ,
    114 	LPAREN,
    115 	LSHIFT,
    116 	LSHIFTEQ,
    117 	LXOR,
    118 	LXOREQ,
    119 	MINUS,
    120 	MINUSEQ,
    121 	MODEQ,
    122 	MODULO,
    123 	NEQUAL,
    124 	PLUS,
    125 	PLUSEQ,
    126 	QUESTION,
    127 	RBRACE,
    128 	RBRACKET,
    129 	RPAREN,
    130 	RSHIFT,
    131 	RSHIFTEQ,
    132 	SEMICOLON,
    133 	SLICE,
    134 	TIMES,
    135 	TIMESEQ,
    136 	LAST_BTOK = TIMESEQ,
    137 
    138 	LIT_U8,
    139 	LIT_U16,
    140 	LIT_U32,
    141 	LIT_U64,
    142 	LIT_UINT,
    143 	LIT_SIZE,
    144 	LIT_I8,
    145 	LIT_I16,
    146 	LIT_I32,
    147 	LIT_I64,
    148 	LIT_INT,
    149 	LIT_ICONST,
    150 	LIT_F32,
    151 	LIT_F64,
    152 	LIT_FCONST,
    153 	LIT_RUNE,
    154 	LIT_STR,
    155 	LAST_LITERAL = LIT_STR,
    156 
    157 	NAME,
    158 	EOF,
    159 };
    160 
    161 const bmap: [_]str = [
    162 	// Keep ordered with tok
    163 	"@fini",
    164 	"@init",
    165 	"@noreturn",
    166 	"@offset",
    167 	"@packed",
    168 	"@symbol",
    169 	"@test",
    170 	"@threadlocal",
    171 	"_",
    172 	"abort",
    173 	"align",
    174 	"alloc",
    175 	"append",
    176 	"as",
    177 	"assert",
    178 	"bool",
    179 	"break",
    180 	"case",
    181 	"const",
    182 	"continue",
    183 	"def",
    184 	"defer",
    185 	"delete",
    186 	"else",
    187 	"enum",
    188 	"export",
    189 	"f32",
    190 	"f64",
    191 	"false",
    192 	"fn",
    193 	"for",
    194 	"free",
    195 	"i16",
    196 	"i32",
    197 	"i64",
    198 	"i8",
    199 	"if",
    200 	"insert",
    201 	"int",
    202 	"is",
    203 	"len",
    204 	"let",
    205 	"match",
    206 	"null",
    207 	"nullable",
    208 	"offset",
    209 	"return",
    210 	"rune",
    211 	"size",
    212 	"static",
    213 	"str",
    214 	"struct",
    215 	"switch",
    216 	"true",
    217 	"type",
    218 	"u16",
    219 	"u32",
    220 	"u64",
    221 	"u8",
    222 	"uint",
    223 	"uintptr",
    224 	"union",
    225 	"use",
    226 	"vaarg",
    227 	"vaend",
    228 	"valist",
    229 	"vastart",
    230 	"void",
    231 	"yield",
    232 	"=>",
    233 	"&",
    234 	"&=",
    235 	"~",
    236 	"|",
    237 	"|=",
    238 	"^",
    239 	"^=",
    240 	":",
    241 	",",
    242 	"/",
    243 	"/=",
    244 	".",
    245 	"::",
    246 	"...",
    247 	"=",
    248 	">",
    249 	">=",
    250 	"&&",
    251 	"&&=",
    252 	"{",
    253 	"[",
    254 	"==",
    255 	"<",
    256 	"<=",
    257 	"!",
    258 	"||",
    259 	"||=",
    260 	"(",
    261 	"<<",
    262 	"<<=",
    263 	"^^",
    264 	"^^=",
    265 	"-",
    266 	"-=",
    267 	"%=",
    268 	"%",
    269 	"!=",
    270 	"+",
    271 	"+=",
    272 	"?",
    273 	"}",
    274 	"]",
    275 	")",
    276 	">>",
    277 	">>=",
    278 	";",
    279 	"..",
    280 	"*",
    281 	"*=",
    282 ];
    283 
    284 // XXX: get rid of @test fn once top-level static assertions are supported
    285 @test fn bmaplen() void = static assert(len(bmap) == ltok::LAST_BTOK: size + 1);
    286 
    287 // A token value, used for tokens such as '1337' (an integer).
    288 export type value = (str | rune | u64 | f64 | void);
    289 
    290 // A location within a source file.
    291 // The path is borrowed from the file name given to the lexer.
    292 export type location = struct {
    293 	path: str,
    294 	line: uint,
    295 	col: uint
    296 };
    297 
    298 // A single lexical token.
    299 export type token = (ltok, value, location);
    300 
    301 // Converts a token to its string representation.
    302 export fn tokstr(tok: token) const str = {
    303 	if (tok.0 <= ltok::LAST_BTOK) {
    304 		return bmap[tok.0: int];
    305 	};
    306 	switch (tok.0) {
    307 	case ltok::LIT_U8 =>
    308 		return "u8";
    309 	case ltok::LIT_U16 =>
    310 		return "u16";
    311 	case ltok::LIT_U32 =>
    312 		return "u32";
    313 	case ltok::LIT_U64 =>
    314 		return "u64";
    315 	case ltok::LIT_UINT =>
    316 		return "uint";
    317 	case ltok::LIT_SIZE =>
    318 		return "size";
    319 	case ltok::LIT_I8 =>
    320 		return "i8";
    321 	case ltok::LIT_I16 =>
    322 		return "i16";
    323 	case ltok::LIT_I32 =>
    324 		return "i32";
    325 	case ltok::LIT_I64 =>
    326 		return "i64";
    327 	case ltok::LIT_INT =>
    328 		return "int";
    329 	case ltok::LIT_ICONST =>
    330 		return "iconst";
    331 	case ltok::LIT_F32 =>
    332 		return "f32";
    333 	case ltok::LIT_F64 =>
    334 		return "f64";
    335 	case ltok::LIT_FCONST =>
    336 		return "fconst";
    337 	case ltok::LIT_RUNE =>
    338 		return "rune";
    339 	case ltok::LIT_STR =>
    340 		return "str";
    341 	case ltok::NAME =>
    342 		return tok.1 as str;
    343 	case ltok::EOF =>
    344 		return "EOF";
    345 	case =>
    346 		abort();
    347 	};
    348 };