harec

[hare] Hare compiler, written in C11 for POSIX OSs
Log | Files | Refs | README | LICENSE

lex.h (2309B)


      1 #ifndef HAREC_LEX_H
      2 #define HAREC_LEX_H
      3 #include <stdint.h>
      4 #include <stdio.h>
      5 #include "types.h"
      6 
      7 // Keep sorted
      8 enum lexical_token {
      9 	T_ATTR_FINI,
     10 	T_ATTR_INIT,
     11 	T_ATTR_NORETURN,
     12 	T_ATTR_OFFSET,
     13 	T_ATTR_SYMBOL,
     14 	T_ATTR_TEST,
     15 	T_UNDERSCORE,
     16 	T_ABORT,
     17 	T_ALLOC,
     18 	T_APPEND,
     19 	T_AS,
     20 	T_ASSERT,
     21 	T_BOOL,
     22 	T_BREAK,
     23 	T_CASE,
     24 	T_CHAR,
     25 	T_CONST,
     26 	T_CONTINUE,
     27 	T_DEF,
     28 	T_DEFER,
     29 	T_DELETE,
     30 	T_ELSE,
     31 	T_ENUM,
     32 	T_EXPORT,
     33 	T_F32,
     34 	T_F64,
     35 	T_FALSE,
     36 	T_FN,
     37 	T_FOR,
     38 	T_FREE,
     39 	T_I16,
     40 	T_I32,
     41 	T_I64,
     42 	T_I8,
     43 	T_IF,
     44 	T_INSERT,
     45 	T_INT,
     46 	T_IS,
     47 	T_LEN,
     48 	T_LET,
     49 	T_MATCH,
     50 	T_NULL,
     51 	T_NULLABLE,
     52 	T_OFFSET,
     53 	T_RETURN,
     54 	T_RUNE,
     55 	T_SIZE,
     56 	T_STATIC,
     57 	T_STR,
     58 	T_STRUCT,
     59 	T_SWITCH,
     60 	T_TRUE,
     61 	T_TYPE,
     62 	T_U16,
     63 	T_U32,
     64 	T_U64,
     65 	T_U8,
     66 	T_UINT,
     67 	T_UINTPTR,
     68 	T_UNION,
     69 	T_USE,
     70 	T_VAARG,
     71 	T_VAEND,
     72 	T_VALIST,
     73 	T_VASTART,
     74 	T_VOID,
     75 	T_YIELD,
     76 	T_LAST_KEYWORD = T_YIELD,
     77 
     78 	// Operators
     79 	T_ARROW,
     80 	T_BANDEQ,
     81 	T_BAND,
     82 	T_BNOT,
     83 	T_BOR,
     84 	T_COLON,
     85 	T_COMMA,
     86 	T_DIV,
     87 	T_DIVEQ,
     88 	T_DOT,
     89 	T_DOUBLE_COLON,
     90 	T_ELLIPSIS,
     91 	T_EQUAL,
     92 	T_GREATER,
     93 	T_GREATEREQ,
     94 	T_LAND,
     95 	T_LANDEQ,
     96 	T_LBRACE,
     97 	T_LBRACKET,
     98 	T_LEQUAL,
     99 	T_LESS,
    100 	T_LESSEQ,
    101 	T_LNOT,
    102 	T_LOR,
    103 	T_LOREQ,
    104 	T_LPAREN,
    105 	T_LSHIFT,
    106 	T_LSHIFTEQ,
    107 	T_LXOR,
    108 	T_LXOREQ,
    109 	T_MINUS,
    110 	T_MINUSEQ,
    111 	T_MODEQ,
    112 	T_MODULO,
    113 	T_NEQUAL,
    114 	T_BOREQ,
    115 	T_PLUS,
    116 	T_PLUSEQ,
    117 	T_QUESTION,
    118 	T_RBRACE,
    119 	T_RBRACKET,
    120 	T_RPAREN,
    121 	T_RSHIFT,
    122 	T_RSHIFTEQ,
    123 	T_SEMICOLON,
    124 	T_SLICE,
    125 	T_TIMES,
    126 	T_TIMESEQ,
    127 	T_BXOR,
    128 	T_BXOREQ,
    129 	T_LAST_OPERATOR = T_BXOREQ,
    130 
    131 	// Tokens with additional information
    132 	T_LITERAL,
    133 	T_NAME,
    134 
    135 	// Magic tokens
    136 	T_EOF,
    137 	T_ERROR,
    138 };
    139 
    140 struct location {
    141 	int file;
    142 	int lineno, colno;
    143 };
    144 
    145 struct token {
    146 	struct location loc;
    147 	enum lexical_token token;
    148 	enum type_storage storage;
    149 	union {
    150 		char *name;
    151 		uint32_t rune;
    152 		intmax_t ival;
    153 		uintmax_t uval;
    154 		double fval;
    155 		struct {
    156 			size_t len;
    157 			char *value;
    158 		} string;
    159 	};
    160 };
    161 
    162 struct lexer {
    163 	FILE *in;
    164 	char *buf;
    165 	size_t bufsz, buflen;
    166 	uint32_t c[2];
    167 	struct token un;
    168 	struct location loc;
    169 	bool require_int;
    170 };
    171 
    172 void lex_init(struct lexer *lexer, FILE *f, int fileid);
    173 void lex_finish(struct lexer *lexer);
    174 enum lexical_token lex(struct lexer *lexer, struct token *out);
    175 void unlex(struct lexer *lexer, struct token *in);
    176 
    177 void token_finish(struct token *tok);
    178 const char *token_str(const struct token *tok);
    179 const char *lexical_token_str(enum lexical_token tok);
    180 
    181 #endif