harec

[hare] Hare compiler, written in C11 for POSIX OSs
Log | Files | Refs | README | LICENSE

lex.h (2401B)


      1 #ifndef HAREC_LEX_H
      2 #define HAREC_LEX_H
      3 #include <stdint.h>
      4 #include <stdio.h>
      5 #include "types.h"
      6 #include "utf8.h"
      7 
      8 #define C_EOF UTF8_INVALID
      9 
     10 // Keep sorted
     11 enum lexical_token {
     12 	T_ATTR_FINI,
     13 	T_ATTR_INIT,
     14 	T_ATTR_NORETURN,
     15 	T_ATTR_OFFSET,
     16 	T_ATTR_PACKED,
     17 	T_ATTR_SYMBOL,
     18 	T_ATTR_TEST,
     19 	T_ATTR_THREADLOCAL,
     20 	T_UNDERSCORE,
     21 	T_ABORT,
     22 	T_ALIGN,
     23 	T_ALLOC,
     24 	T_APPEND,
     25 	T_AS,
     26 	T_ASSERT,
     27 	T_BOOL,
     28 	T_BREAK,
     29 	T_CASE,
     30 	T_CHAR,
     31 	T_CONST,
     32 	T_CONTINUE,
     33 	T_DEF,
     34 	T_DEFER,
     35 	T_DELETE,
     36 	T_ELSE,
     37 	T_ENUM,
     38 	T_EXPORT,
     39 	T_F32,
     40 	T_F64,
     41 	T_FALSE,
     42 	T_FN,
     43 	T_FOR,
     44 	T_FREE,
     45 	T_I16,
     46 	T_I32,
     47 	T_I64,
     48 	T_I8,
     49 	T_IF,
     50 	T_INSERT,
     51 	T_INT,
     52 	T_IS,
     53 	T_LEN,
     54 	T_LET,
     55 	T_MATCH,
     56 	T_NULL,
     57 	T_NULLABLE,
     58 	T_OFFSET,
     59 	T_RETURN,
     60 	T_RUNE,
     61 	T_SIZE,
     62 	T_STATIC,
     63 	T_STR,
     64 	T_STRUCT,
     65 	T_SWITCH,
     66 	T_TRUE,
     67 	T_TYPE,
     68 	T_U16,
     69 	T_U32,
     70 	T_U64,
     71 	T_U8,
     72 	T_UINT,
     73 	T_UINTPTR,
     74 	T_UNION,
     75 	T_USE,
     76 	T_VAARG,
     77 	T_VAEND,
     78 	T_VALIST,
     79 	T_VASTART,
     80 	T_VOID,
     81 	T_YIELD,
     82 	T_LAST_KEYWORD = T_YIELD,
     83 
     84 	// Operators
     85 	T_ARROW,
     86 	T_BANDEQ,
     87 	T_BAND,
     88 	T_BNOT,
     89 	T_BOR,
     90 	T_COLON,
     91 	T_COMMA,
     92 	T_DIV,
     93 	T_DIVEQ,
     94 	T_DOT,
     95 	T_DOUBLE_COLON,
     96 	T_ELLIPSIS,
     97 	T_EQUAL,
     98 	T_GREATER,
     99 	T_GREATEREQ,
    100 	T_LAND,
    101 	T_LANDEQ,
    102 	T_LBRACE,
    103 	T_LBRACKET,
    104 	T_LEQUAL,
    105 	T_LESS,
    106 	T_LESSEQ,
    107 	T_LNOT,
    108 	T_LOR,
    109 	T_LOREQ,
    110 	T_LPAREN,
    111 	T_LSHIFT,
    112 	T_LSHIFTEQ,
    113 	T_LXOR,
    114 	T_LXOREQ,
    115 	T_MINUS,
    116 	T_MINUSEQ,
    117 	T_MODEQ,
    118 	T_MODULO,
    119 	T_NEQUAL,
    120 	T_BOREQ,
    121 	T_PLUS,
    122 	T_PLUSEQ,
    123 	T_QUESTION,
    124 	T_RBRACE,
    125 	T_RBRACKET,
    126 	T_RPAREN,
    127 	T_RSHIFT,
    128 	T_RSHIFTEQ,
    129 	T_SEMICOLON,
    130 	T_SLICE,
    131 	T_TIMES,
    132 	T_TIMESEQ,
    133 	T_BXOR,
    134 	T_BXOREQ,
    135 	T_LAST_OPERATOR = T_BXOREQ,
    136 
    137 	// Tokens with additional information
    138 	T_LITERAL,
    139 	T_NAME,
    140 
    141 	// Magic tokens
    142 	T_EOF,
    143 	T_NONE,
    144 };
    145 
    146 struct location {
    147 	int file;
    148 	int lineno, colno;
    149 };
    150 
    151 struct token {
    152 	struct location loc;
    153 	enum lexical_token token;
    154 	enum type_storage storage;
    155 	union {
    156 		char *name;
    157 		uint32_t rune;
    158 		intmax_t ival;
    159 		uintmax_t uval;
    160 		double fval;
    161 		struct {
    162 			size_t len;
    163 			char *value;
    164 		} string;
    165 	};
    166 };
    167 
    168 struct lexer {
    169 	FILE *in;
    170 	char *buf;
    171 	size_t bufsz, buflen;
    172 	uint32_t c[2];
    173 	struct token un;
    174 	struct location loc;
    175 	bool require_int;
    176 };
    177 
    178 void lex_init(struct lexer *lexer, FILE *f, int fileid);
    179 void lex_finish(struct lexer *lexer);
    180 enum lexical_token lex(struct lexer *lexer, struct token *out);
    181 void unlex(struct lexer *lexer, struct token *in);
    182 
    183 void token_finish(struct token *tok);
    184 const char *token_str(const struct token *tok);
    185 const char *lexical_token_str(enum lexical_token tok);
    186 
    187 #endif