hare

[hare] The Hare programming language
git clone https://git.torresjrjr.com/hare.git
Log | Files | Refs | README | LICENSE

token.ha (3951B)


      1 // SPDX-License-Identifier: MPL-2.0
      2 // (c) Hare authors <https://harelang.org>
      3 
      4 use encoding::utf8;
      5 use strings;
      6 
      7 // A lexical token class.
      8 export type ltok = enum uint {
      9 	// Keep ordered with bmap
     10 	// Alpha sorted
     11 
     12 	ATTR_FINI,
     13 	ATTR_INIT,
     14 	ATTR_OFFSET,
     15 	ATTR_PACKED,
     16 	ATTR_SYMBOL,
     17 	ATTR_TEST,
     18 	ATTR_THREADLOCAL,
     19 	UNDERSCORE,
     20 	ABORT,
     21 	ALIGN,
     22 	ALLOC,
     23 	APPEND,
     24 	AS,
     25 	ASSERT,
     26 	BOOL,
     27 	BREAK,
     28 	CASE,
     29 	CONST,
     30 	CONTINUE,
     31 	DEF,
     32 	DEFER,
     33 	DELETE,
     34 	DONE,
     35 	ELSE,
     36 	ENUM,
     37 	EXPORT,
     38 	F32,
     39 	F64,
     40 	FALSE,
     41 	FN,
     42 	FOR,
     43 	FREE,
     44 	I16,
     45 	I32,
     46 	I64,
     47 	I8,
     48 	IF,
     49 	INSERT,
     50 	INT,
     51 	IS,
     52 	LEN,
     53 	LET,
     54 	MATCH,
     55 	NEVER,
     56 	NULL,
     57 	NULLABLE,
     58 	OFFSET,
     59 	OPAQUE,
     60 	RETURN,
     61 	RUNE,
     62 	SIZE,
     63 	STATIC,
     64 	STR,
     65 	STRUCT,
     66 	SWITCH,
     67 	TRUE,
     68 	TYPE,
     69 	U16,
     70 	U32,
     71 	U64,
     72 	U8,
     73 	UINT,
     74 	UINTPTR,
     75 	UNION,
     76 	USE,
     77 	VAARG,
     78 	VAEND,
     79 	VALIST,
     80 	VASTART,
     81 	VOID,
     82 	YIELD,
     83 	LAST_KEYWORD = YIELD,
     84 
     85 	// Operators
     86 
     87 	ARROW,
     88 	BAND,
     89 	BANDEQ,
     90 	BNOT,
     91 	BOR,
     92 	BOREQ,
     93 	BXOR,
     94 	BXOREQ,
     95 	COLON,
     96 	COMMA,
     97 	DIV,
     98 	DIVEQ,
     99 	DOT,
    100 	DOUBLE_COLON,
    101 	DOUBLE_DOT,
    102 	ELLIPSIS,
    103 	EQUAL,
    104 	GT,
    105 	GTEQ,
    106 	LAND,
    107 	LANDEQ,
    108 	LBRACE,
    109 	LBRACKET,
    110 	LEQUAL,
    111 	LESS,
    112 	LESSEQ,
    113 	LNOT,
    114 	LOR,
    115 	LOREQ,
    116 	LPAREN,
    117 	LSHIFT,
    118 	LSHIFTEQ,
    119 	LXOR,
    120 	LXOREQ,
    121 	MINUS,
    122 	MINUSEQ,
    123 	MODEQ,
    124 	MODULO,
    125 	NEQUAL,
    126 	PLUS,
    127 	PLUSEQ,
    128 	QUESTION,
    129 	RBRACE,
    130 	RBRACKET,
    131 	RPAREN,
    132 	RSHIFT,
    133 	RSHIFTEQ,
    134 	SEMICOLON,
    135 	TIMES,
    136 	TIMESEQ,
    137 	LAST_BTOK = TIMESEQ,
    138 
    139 	LIT_U8,
    140 	LIT_U16,
    141 	LIT_U32,
    142 	LIT_U64,
    143 	LIT_UINT,
    144 	LIT_SIZE,
    145 	LIT_I8,
    146 	LIT_I16,
    147 	LIT_I32,
    148 	LIT_I64,
    149 	LIT_INT,
    150 	LIT_ICONST,
    151 	LIT_F32,
    152 	LIT_F64,
    153 	LIT_FCONST,
    154 	LIT_RCONST,
    155 	LIT_STR,
    156 	LAST_LITERAL = LIT_STR,
    157 
    158 	NAME,
    159 	EOF,
    160 };
    161 
    162 const bmap: [_]str = [
    163 	// Keep ordered with tok
    164 	"@fini",
    165 	"@init",
    166 	"@offset",
    167 	"@packed",
    168 	"@symbol",
    169 	"@test",
    170 	"@threadlocal",
    171 	"_",
    172 	"abort",
    173 	"align",
    174 	"alloc",
    175 	"append",
    176 	"as",
    177 	"assert",
    178 	"bool",
    179 	"break",
    180 	"case",
    181 	"const",
    182 	"continue",
    183 	"def",
    184 	"defer",
    185 	"delete",
    186 	"done",
    187 	"else",
    188 	"enum",
    189 	"export",
    190 	"f32",
    191 	"f64",
    192 	"false",
    193 	"fn",
    194 	"for",
    195 	"free",
    196 	"i16",
    197 	"i32",
    198 	"i64",
    199 	"i8",
    200 	"if",
    201 	"insert",
    202 	"int",
    203 	"is",
    204 	"len",
    205 	"let",
    206 	"match",
    207 	"never",
    208 	"null",
    209 	"nullable",
    210 	"offset",
    211 	"opaque",
    212 	"return",
    213 	"rune",
    214 	"size",
    215 	"static",
    216 	"str",
    217 	"struct",
    218 	"switch",
    219 	"true",
    220 	"type",
    221 	"u16",
    222 	"u32",
    223 	"u64",
    224 	"u8",
    225 	"uint",
    226 	"uintptr",
    227 	"union",
    228 	"use",
    229 	"vaarg",
    230 	"vaend",
    231 	"valist",
    232 	"vastart",
    233 	"void",
    234 	"yield",
    235 	"=>",
    236 	"&",
    237 	"&=",
    238 	"~",
    239 	"|",
    240 	"|=",
    241 	"^",
    242 	"^=",
    243 	":",
    244 	",",
    245 	"/",
    246 	"/=",
    247 	".",
    248 	"::",
    249 	"..",
    250 	"...",
    251 	"=",
    252 	">",
    253 	">=",
    254 	"&&",
    255 	"&&=",
    256 	"{",
    257 	"[",
    258 	"==",
    259 	"<",
    260 	"<=",
    261 	"!",
    262 	"||",
    263 	"||=",
    264 	"(",
    265 	"<<",
    266 	"<<=",
    267 	"^^",
    268 	"^^=",
    269 	"-",
    270 	"-=",
    271 	"%=",
    272 	"%",
    273 	"!=",
    274 	"+",
    275 	"+=",
    276 	"?",
    277 	"}",
    278 	"]",
    279 	")",
    280 	">>",
    281 	">>=",
    282 	";",
    283 	"*",
    284 	"*=",
    285 ];
    286 
    287 static assert(len(bmap) == ltok::LAST_BTOK: size + 1);
    288 
    289 // A token value, used for tokens such as '1337' (an integer).
    290 export type value = (str | rune | u64 | f64 | void);
    291 
    292 // A location within a source file.
    293 // The path is borrowed from the file name given to the lexer.
    294 export type location = struct {
    295 	path: str,
    296 	line: uint,
    297 	col: uint
    298 };
    299 
    300 // A single lexical token.
    301 export type token = (ltok, value, location);
    302 
    303 // Converts a token to its string representation.
    304 export fn tokstr(tok: token) const str = {
    305 	if (tok.0 <= ltok::LAST_BTOK) {
    306 		return bmap[tok.0: int];
    307 	};
    308 	switch (tok.0) {
    309 	case ltok::LIT_U8 =>
    310 		return "u8";
    311 	case ltok::LIT_U16 =>
    312 		return "u16";
    313 	case ltok::LIT_U32 =>
    314 		return "u32";
    315 	case ltok::LIT_U64 =>
    316 		return "u64";
    317 	case ltok::LIT_UINT =>
    318 		return "uint";
    319 	case ltok::LIT_SIZE =>
    320 		return "size";
    321 	case ltok::LIT_I8 =>
    322 		return "i8";
    323 	case ltok::LIT_I16 =>
    324 		return "i16";
    325 	case ltok::LIT_I32 =>
    326 		return "i32";
    327 	case ltok::LIT_I64 =>
    328 		return "i64";
    329 	case ltok::LIT_INT =>
    330 		return "int";
    331 	case ltok::LIT_ICONST =>
    332 		return "iconst";
    333 	case ltok::LIT_F32 =>
    334 		return "f32";
    335 	case ltok::LIT_F64 =>
    336 		return "f64";
    337 	case ltok::LIT_FCONST =>
    338 		return "fconst";
    339 	case ltok::LIT_RCONST =>
    340 		return "rconst";
    341 	case ltok::LIT_STR =>
    342 		return "str";
    343 	case ltok::NAME =>
    344 		return tok.1 as str;
    345 	case ltok::EOF =>
    346 		return "EOF";
    347 	case =>
    348 		abort();
    349 	};
    350 };