lex.h (2401B)
1 #ifndef HAREC_LEX_H 2 #define HAREC_LEX_H 3 #include <stdint.h> 4 #include <stdio.h> 5 #include "types.h" 6 #include "utf8.h" 7 8 #define C_EOF UTF8_INVALID 9 10 // Keep sorted 11 enum lexical_token { 12 T_ATTR_FINI, 13 T_ATTR_INIT, 14 T_ATTR_NORETURN, 15 T_ATTR_OFFSET, 16 T_ATTR_PACKED, 17 T_ATTR_SYMBOL, 18 T_ATTR_TEST, 19 T_ATTR_THREADLOCAL, 20 T_UNDERSCORE, 21 T_ABORT, 22 T_ALIGN, 23 T_ALLOC, 24 T_APPEND, 25 T_AS, 26 T_ASSERT, 27 T_BOOL, 28 T_BREAK, 29 T_CASE, 30 T_CHAR, 31 T_CONST, 32 T_CONTINUE, 33 T_DEF, 34 T_DEFER, 35 T_DELETE, 36 T_ELSE, 37 T_ENUM, 38 T_EXPORT, 39 T_F32, 40 T_F64, 41 T_FALSE, 42 T_FN, 43 T_FOR, 44 T_FREE, 45 T_I16, 46 T_I32, 47 T_I64, 48 T_I8, 49 T_IF, 50 T_INSERT, 51 T_INT, 52 T_IS, 53 T_LEN, 54 T_LET, 55 T_MATCH, 56 T_NULL, 57 T_NULLABLE, 58 T_OFFSET, 59 T_RETURN, 60 T_RUNE, 61 T_SIZE, 62 T_STATIC, 63 T_STR, 64 T_STRUCT, 65 T_SWITCH, 66 T_TRUE, 67 T_TYPE, 68 T_U16, 69 T_U32, 70 T_U64, 71 T_U8, 72 T_UINT, 73 T_UINTPTR, 74 T_UNION, 75 T_USE, 76 T_VAARG, 77 T_VAEND, 78 T_VALIST, 79 T_VASTART, 80 T_VOID, 81 T_YIELD, 82 T_LAST_KEYWORD = T_YIELD, 83 84 // Operators 85 T_ARROW, 86 T_BANDEQ, 87 T_BAND, 88 T_BNOT, 89 T_BOR, 90 T_COLON, 91 T_COMMA, 92 T_DIV, 93 T_DIVEQ, 94 T_DOT, 95 T_DOUBLE_COLON, 96 T_ELLIPSIS, 97 T_EQUAL, 98 T_GREATER, 99 T_GREATEREQ, 100 T_LAND, 101 T_LANDEQ, 102 T_LBRACE, 103 T_LBRACKET, 104 T_LEQUAL, 105 T_LESS, 106 T_LESSEQ, 107 T_LNOT, 108 T_LOR, 109 T_LOREQ, 110 T_LPAREN, 111 T_LSHIFT, 112 T_LSHIFTEQ, 113 T_LXOR, 114 T_LXOREQ, 115 T_MINUS, 116 T_MINUSEQ, 117 T_MODEQ, 118 T_MODULO, 119 T_NEQUAL, 120 T_BOREQ, 121 T_PLUS, 122 T_PLUSEQ, 123 T_QUESTION, 124 T_RBRACE, 125 T_RBRACKET, 126 T_RPAREN, 127 T_RSHIFT, 128 T_RSHIFTEQ, 129 T_SEMICOLON, 130 T_SLICE, 131 T_TIMES, 132 T_TIMESEQ, 133 T_BXOR, 134 T_BXOREQ, 135 T_LAST_OPERATOR = T_BXOREQ, 136 137 // Tokens with additional information 138 T_LITERAL, 139 T_NAME, 140 141 // Magic tokens 142 T_EOF, 143 T_NONE, 144 }; 145 146 struct location { 147 int file; 148 int lineno, colno; 149 }; 150 151 struct token { 152 struct location loc; 153 enum lexical_token token; 154 enum type_storage storage; 155 union { 156 char *name; 157 uint32_t rune; 158 intmax_t ival; 159 uintmax_t uval; 160 double fval; 161 struct { 162 size_t len; 163 char *value; 164 } string; 165 }; 166 }; 167 168 struct lexer { 169 FILE *in; 170 char *buf; 171 size_t bufsz, buflen; 172 uint32_t c[2]; 173 struct token un; 174 struct location loc; 175 bool require_int; 176 }; 177 178 void lex_init(struct lexer *lexer, FILE *f, int fileid); 179 void lex_finish(struct lexer *lexer); 180 enum lexical_token lex(struct lexer *lexer, struct token *out); 181 void unlex(struct lexer *lexer, struct token *in); 182 183 void token_finish(struct token *tok); 184 const char *token_str(const struct token *tok); 185 const char *lexical_token_str(enum lexical_token tok); 186 187 #endif