lex: handle string and rune constants - harec - Unnamed repository; edit this file 'description' to name the repository.

commit e85e5df216cbd26cc9a9b2ab79bf7638251d6ebd
parent 576fe8c851fb6eaa01b37c214752aa83e6cd86ad
Author: Eyal Sawady <ecs@d2evs.net>
Date:   Sat, 24 Oct 2020 15:28:05 -0400

lex: handle string and rune constants

Diffstat:
M include/lex.h  | 8 ++++++++
M src/lex.c  | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
M src/main.c  | 10 ++++++++++

3 files changed, 125 insertions(+), 2 deletions(-)
diff --git a/include/lex.h b/include/lex.h
@@ -1,5 +1,6 @@
 #ifndef HAREC_LEX_H
 #define HAREC_LEX_H
+#include <stdint.h>
 
 // Keep sorted
 enum lexical_token {
@@ -103,6 +104,8 @@ enum lexical_token {
 	// Tokens with additional information
 	T_NAME,
 	T_LITERAL,
+	T_RUNE,
+	T_STRING,
 
 	// Magic tokens
 	T_EOF,
@@ -114,6 +117,11 @@ struct token {
 	union {
 		char *name;
 		char *literal;
+		uint32_t rune;
+		struct {
+			size_t len;
+			char *value;
+		} string;
 	};
 };
 
diff --git a/src/lex.c b/src/lex.c
@@ -1,6 +1,7 @@
 #include <assert.h>
 #include <ctype.h>
 #include <stdbool.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -297,10 +298,111 @@ finalize:
 	return c;
 }
 
-static int
+static uint32_t
+lex_rune(struct lexer *lexer)
+{
+	char buf[5];
+	char *endptr;
+	uint32_t c = next(lexer, false);
+	assert(c != UTF8_INVALID);
+
+	switch (c) {
+	case '\\':
+		c = next(lexer, false);
+		switch (c) {
+		case '0':
+			return '\0';
+		case 'a':
+			return '\a';
+		case 'b':
+			return '\b';
+		case 'f':
+			return '\f';
+		case 'n':
+			return '\n';
+		case 'r':
+			return '\r';
+		case 't':
+			return '\t';
+		case 'v':
+			return '\v';
+		case '\\':
+			return '\\';
+		case '\'':
+			return '\'';
+		case '"':
+			return '\"';
+		case 'x':
+			buf[0] = next(lexer, false);
+			buf[1] = next(lexer, false);
+			buf[2] = '\0';
+			c = strtoul(&buf[0], &endptr, 16);
+			assert(*endptr == '\0');
+			return c;
+		case 'u':
+			buf[0] = next(lexer, false);
+			buf[1] = next(lexer, false);
+			buf[2] = next(lexer, false);
+			buf[3] = next(lexer, false);
+			buf[4] = '\0';
+			c = strtoul(&buf[0], &endptr, 16);
+			assert(*endptr == '\0');
+			return c;
+		default:
+			assert(0); // Invariant
+		}
+		assert(0);
+	default:
+		return c;
+	}
+	assert(0);
+}
+
+static uint32_t
 lex_string(struct lexer *lexer, struct token *out)
 {
-	assert(0); // TODO
+	uint32_t c = next(lexer, false);
+	assert(c != UTF8_INVALID);
+
+	switch (c) {
+	case '"':
+		while ((c = next(lexer, false)) != UTF8_INVALID) {
+			switch (c) {
+			case '"':;
+				char *buf = malloc(lexer->buflen);
+				memcpy(buf, lexer->buf, lexer->buflen);
+				out->token = T_STRING;
+				out->string.len = lexer->buflen;
+				out->string.value = buf;
+				consume(lexer, -1);
+				return c;
+			default:
+				push(lexer, c, false);
+				push(lexer, lex_rune(lexer), false);
+				next(lexer, true);
+			}
+		}
+		assert(0); // Invariant
+	case '\'':
+		c = next(lexer, false);
+		switch (c) {
+		case '\'':
+			assert(0); // Invariant
+		case '\\':
+			push(lexer, c, false);
+			out->rune = lex_rune(lexer);
+			break;
+		default:
+			out->rune = c;
+		}
+		c = next(lexer, false);
+		assert(c == '\'');
+		out->token = T_RUNE;
+		return c;
+	default:
+		assert(0); // Invariant
+	}
+	assert(0);
 }
 
 static uint32_t
@@ -609,6 +711,9 @@ token_finish(struct token *tok)
 	case T_NAME:
 		free(tok->name);
 		break;
+	case T_STRING:
+		free(tok->string.value);
+		break;
 	default:
 		break;
 	}
diff --git a/src/main.c b/src/main.c
@@ -19,6 +19,16 @@ main(int argc, char *argv[])
 		case T_LITERAL:
 			fprintf(stderr, "(%s)\n", tok.literal);
 			break;
+		case T_RUNE:
+			putc('\'', stderr);
+			utf8_fputch(stderr, tok.rune);
+			putc('\'', stderr);
+			putc('\n', stderr);
+			break;
+		case T_STRING:
+			fprintf(stderr, "\"%*s\"\n", (int)tok.string.len,
+				tok.string.value);
+			break;
 		case T_ERROR:
 			fprintf(stderr, "ERROR\n");
 			break;

	harec Unnamed repository; edit this file 'description' to name the repository.
	Log \| Files \| Refs \| README \| LICENSE

M	include/lex.h	\|	8	++++++++
M	src/lex.c	\|	109	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
M	src/main.c	\|	10	++++++++++