Use infix-to-postfix to parse expressions

This commit is contained in:
2024-01-29 14:54:23 +01:00
parent 33d07bea37
commit 2f7fc1f1d9
10 changed files with 345 additions and 185 deletions

View File

@@ -1,3 +1,6 @@
all: CC = gcc
gcc -o lang parser.c tokenizer.c error.c file_stream.c CFLAGS = -Wall -Wextra -g -O0
lang : parser.c tokenizer.c error.c file_stream.c | tokenizer.h error.h common.h file_stream.h
$(CC) $(CFLAGS) -o $@ $^

6
src/common.h Normal file
View File

@@ -0,0 +1,6 @@
#pragma once
#define FATAL_NOT_IMPLEMENTED 123
#define FATAL_EXPR_TOO_BIG 101

View File

@@ -83,9 +83,9 @@ static void error_msg_free(struct error_msg* msg)
return; return;
error_msg_free(msg->next); error_msg_free(msg->next);
free(msg->message); free(msg->message);
free(msg);
msg->message = NULL; msg->message = NULL;
msg->next = NULL; msg->next = NULL;
free(msg);
} }
void error_clear(Error* err) void error_clear(Error* err)

View File

@@ -11,8 +11,6 @@
#include "error.h" #include "error.h"
#include "file_stream.h" #include "file_stream.h"
char* mfile_overflow_slope = NULL;
Mfile* mfile_open(Error* err, char* filename) Mfile* mfile_open(Error* err, char* filename)
{ {
Mfile* s = malloc(sizeof *s); Mfile* s = malloc(sizeof *s);
@@ -58,51 +56,54 @@ void mfile_close(Error* err, Mfile* s)
ok = munmap(s->data, s->size); ok = munmap(s->data, s->size);
if (ok == -1) { if (ok == -1) {
error_push(err, "failed to munmap file: %s", strerror(errno)); error_push(err, "failed to munmap file: %s", strerror(errno));
free(s);
return;
} }
close(s->fd); close(s->fd);
if (ok == -1) { if (ok == -1) {
error_push(err, "failed to close file: %s", strerror(errno)); error_push(err, "failed to close file: %s", strerror(errno));
free(s);
return;
} }
free(s); free(s);
} }
int mfile_get(Mfile* s) inline size_t mfile_inc_pos(Mfile* m)
{ {
if (s->pos >= s->size) { return m->pos++;
}
inline size_t mfile_decr_pos(Mfile* m)
{
return m->pos--;
}
inline int mfile_get(Mfile* m)
{
if (m->pos >= m->size) {
return EOF; return EOF;
} }
return s->data[s->pos++]; return m->data[mfile_inc_pos(m)];
} }
bool mfile_eof(Mfile* s) inline bool mfile_eof(Mfile* m)
{ {
return s->pos >= s->size; return m->pos >= m->size;
} }
char* mfile_cur(Mfile* s) inline char* mfile_cur(Mfile* m)
{ {
if (s->pos >= s->size) { static char eof = EOF;
return mfile_overflow_slope; if (m->pos >= m->size) {
return &eof;
} }
return s->data + s->pos; return m->data + m->pos;
} }
int mfile_curchar(Mfile* s) inline int mfile_curchar(Mfile* m)
{ {
if (s->pos >= s->size) { return *(mfile_cur(m));
return EOF;
}
return *(s->data + s->pos);
} }
void mfile_skip(Mfile* s, int (*f)(int)) void mfile_skip(Mfile* m, int (*f)(int))
{ {
while (f(*(s->data + s->pos))) while (f(mfile_curchar(m)))
s->pos += 1; mfile_inc_pos(m);
} }

View File

@@ -8,7 +8,7 @@ extern char* mfile_overflow_slope;
typedef struct mfile { typedef struct mfile {
char* data; char* data;
off_t size; size_t size;
size_t pos; size_t pos;
// internal // internal
@@ -37,3 +37,8 @@ void mfile_skip(Mfile* s, int (*f)(int));
/* Get current char */ /* Get current char */
int mfile_curchar(Mfile* s); int mfile_curchar(Mfile* s);
/* Skip the current char */
size_t mfile_inc_pos(Mfile* m);
size_t mfile_decr_pos(Mfile* m);

View File

@@ -1,8 +1,15 @@
/*
* TODO:
* line 312
* */
#include "error.h" #include "error.h"
#include "file_stream.h" #include "file_stream.h"
#include "tokenizer.h" #include "tokenizer.h"
#include "printable.h" #include "printable.h"
#include "common.h"
#include "stack.h"
#include <assert.h> #include <assert.h>
#include <ctype.h> #include <ctype.h>
@@ -11,6 +18,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <inttypes.h>
#include <unistd.h> #include <unistd.h>
#include <string.h> #include <string.h>
#include <errno.h> #include <errno.h>
@@ -18,6 +26,7 @@
#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b))
/* ======= Grammar Rules ======= /* ======= Grammar Rules =======
statement: statement:
@@ -35,37 +44,51 @@ expr
| FLOAT OPERATOR FLOAT {$$ = binary_op_float($1, $3, $2);}; | FLOAT OPERATOR FLOAT {$$ = binary_op_float($1, $3, $2);};
| INT | INT
| FLOAT | FLOAT
============================= */ ============================= */
enum value_type { enum value_type {
VALUE_INTEGER, VALUE_INTEGER,
VALUE_OPERATOR,
VALUE_FLOATING, VALUE_FLOATING,
VALUE_TYPE_COUNT, VALUE_TYPE_COUNT,
}; };
static const char* value_type_str[VALUE_TYPE_COUNT] = { static const char* value_type_str[VALUE_TYPE_COUNT] = {
[VALUE_INTEGER] = "VALUE_INTEGER", [VALUE_INTEGER] = "VALUE_INTEGER",
[VALUE_OPERATOR] = "VALUE_OPERATOR",
[VALUE_FLOATING] = "VALUE_FLOATING", [VALUE_FLOATING] = "VALUE_FLOATING",
}; };
static const int operator_precedence[256] = {
['+'] = 0,
['-'] = 0,
['*'] = 10,
['/'] = 10,
};
typedef struct parser { typedef struct parser {
Token* cur; Token* cur;
Token* next; Token* next;
Mfile* m; Mfile* m;
} Parser; } ParserState;
bool parser_advance(Error* err, Parser* p)
void parser_print_position(ParserState* p)
{
Mfile m = *(p->m);
m.pos = 0;
int linecount = 0;
int col = 0;
while (!mfile_eof(&m) && m.pos <= p->m->pos) {
char c = mfile_get(&m);
if (c == '\n') {
linecount++;
col = 0;
} else {
col++;
}
}
fprintf(stderr, "\nLine: %d\nCol: %d\n", linecount, col);
}
bool parser_advance(Error* err, ParserState* p)
{ {
p->cur = p->next; p->cur = p->next;
//if (p->cur)
// token_print(NULL, p->cur);
mfile_skip(p->m, isspace); mfile_skip(p->m, isspace);
p->next = token_read(err, p->m); p->next = token_read(err, p->m);
if (!error_empty(err)) { if (!error_empty(err)) {
@@ -85,31 +108,31 @@ typedef struct value {
}; };
} Value; } Value;
static Value* parse_operator(Error* err, Token* t) static void value_print(FILE* out, Value* v)
{ {
if (t->type != TOKEN_OPERATOR) { switch (v->type) {
error_push(err, "%s: unexpected token type: %s", __func__, token_type_str[t->type]); case VALUE_INTEGER:
return NULL; fprintf(out, "%" PRId64, v->i64);
break;
case VALUE_FLOATING:
fprintf(out, "%lf", v->f64);
break;
default:
fprintf(out, "(bad value)");
break;
} }
Value* v = calloc(1, sizeof *v);
if (!v) {
error_push(err, "%s: failed to allocate value: %s", __func__, strerror(errno));
return NULL;
}
v->type = VALUE_OPERATOR;
strncpy(v->op, t->start, MIN(t->end - t->start, sizeof(v->op)));
return v;
} }
static Value* parse_int(Error* err, Token* t) static Value* parse_int(Error* err, ParserState* p)
{ {
Token* t = p->cur;
if (t->type != TOKEN_INTEGER) { if (t->type != TOKEN_INTEGER) {
error_push(err, "%s: unexpected token type: %s", __func__, token_type_str[t->type]); error_push(err, "(%s) unexpected token type: %s", __func__, token_type_str[t->type]);
return NULL; return NULL;
} }
Value* v = calloc(1, sizeof *v); Value* v = calloc(1, sizeof *v);
if (!v) { if (!v) {
error_push(err, "%s: failed to allocate value: %s", __func__, strerror(errno)); error_push(err, "(%s) failed to allocate value: %s", __func__, strerror(errno));
return NULL; return NULL;
} }
v->type = VALUE_INTEGER; v->type = VALUE_INTEGER;
@@ -117,14 +140,16 @@ static Value* parse_int(Error* err, Token* t)
errno = 0; errno = 0;
v->i64= strtol(t->start, NULL, 10); v->i64= strtol(t->start, NULL, 10);
if (errno != 0) { if (errno != 0) {
error_push(err, "%s: failed to parse int: %s", __func__, strerror(errno)); error_push(err, "(%s) failed to parse int: %s", __func__, strerror(errno));
return NULL; return NULL;
} }
parser_advance(err, p);
return v; return v;
} }
static Value* parse_floating(Error* err, Token* t) static Value* parse_floating(Error* err, ParserState* p)
{ {
Token* t = p->cur;
if (t->type != TOKEN_FLOATING) { if (t->type != TOKEN_FLOATING) {
error_push(err, "(%s) unexpected token type: %s", __func__, token_type_str[t->type]); error_push(err, "(%s) unexpected token type: %s", __func__, token_type_str[t->type]);
return NULL; return NULL;
@@ -142,20 +167,12 @@ static Value* parse_floating(Error* err, Token* t)
error_push(err, "(%s) failed to parse float: %s", __func__, strerror(errno)); error_push(err, "(%s) failed to parse float: %s", __func__, strerror(errno));
return NULL; return NULL;
} }
return v; parser_advance(err, p);
} if (!error_empty(err)) {
error_push(err, "(%s) couldn't advance parser", __func__);
static Value* parse_number(Error* err, Token* t)
{
switch (t->type) {
case TOKEN_FLOATING:
return parse_floating(err, t);
case TOKEN_INTEGER:
return parse_int(err, t);
default:
error_push(err, "(%s) unexpected token type %s", __func__, token_type_str[t->type]);
return NULL; return NULL;
} }
return v;
} }
static void conv_int_to_float(Error* err, Value* val) static void conv_int_to_float(Error* err, Value* val)
@@ -169,15 +186,17 @@ static void conv_int_to_float(Error* err, Value* val)
val->type = VALUE_FLOATING; val->type = VALUE_FLOATING;
} }
static Value* parse_binary_expr(Error* err, Value* lval, Value* rval, Value* op) static Value* binary_op(Error* err, Value* lval, Value* rval, Token* op)
{ {
if ((lval->type != VALUE_INTEGER && lval->type != VALUE_FLOATING) if ((lval->type != VALUE_INTEGER && lval->type != VALUE_FLOATING)
|| (rval->type != VALUE_INTEGER && rval->type != VALUE_FLOATING) || (rval->type != VALUE_INTEGER && rval->type != VALUE_FLOATING)
|| op->type != VALUE_OPERATOR) || op->type != TOKEN_OPERATOR)
{ {
error_push(err, "%s: unexpected token types: %s %s %s", error_push(err, "%s: unexpected token types: %s %s %s",
__func__, value_type_str[lval->type], __func__, value_type_str[lval->type],
value_type_str[rval->type], value_type_str[op->type]); value_type_str[rval->type], token_type_str[op->type]);
fprintf(stderr, "\n####\n");
token_print(err, op);
return NULL; return NULL;
} }
@@ -197,10 +216,10 @@ static Value* parse_binary_expr(Error* err, Value* lval, Value* rval, Value* op)
return NULL; return NULL;
} }
fprintf(stderr, "\ndoing op: %s %c %s", value_type_str[lval->type], op->op[0], value_type_str[rval->type]); //fprintf(stderr, "\ndoing op: %s %c %s", value_type_str[lval->type], op->start[0], value_type_str[rval->type]);
if (rval->type == VALUE_INTEGER && lval->type == VALUE_INTEGER) { if (rval->type == VALUE_INTEGER && lval->type == VALUE_INTEGER) {
result->type = VALUE_INTEGER; result->type = VALUE_INTEGER;
switch (op->op[0]) { switch (op->start[0]) {
case '+': case '+':
result->i64 = lval->i64 + rval->i64; result->i64 = lval->i64 + rval->i64;
break; break;
@@ -214,11 +233,11 @@ static Value* parse_binary_expr(Error* err, Value* lval, Value* rval, Value* op)
result->i64 = lval->i64 / rval->i64; result->i64 = lval->i64 / rval->i64;
break; break;
} }
printf("\nCALCULATED EXPRESSION %ld %c %ld = %ld\n", lval->i64, //fprintf(stderr, "\nCALCULATED EXPRESSION %ld %c %ld = %ld\n", lval->i64,
op->op[0], rval->i64, result->i64); // op->start[0], rval->i64, result->i64);
} else if (rval->type == VALUE_FLOATING && lval->type == VALUE_FLOATING) { } else if (rval->type == VALUE_FLOATING && lval->type == VALUE_FLOATING) {
result->type = VALUE_FLOATING; result->type = VALUE_FLOATING;
switch (op->op[0]) { switch (op->start[0]) {
case '+': case '+':
result->f64 = lval->f64 + rval->f64; result->f64 = lval->f64 + rval->f64;
break; break;
@@ -232,69 +251,128 @@ static Value* parse_binary_expr(Error* err, Value* lval, Value* rval, Value* op)
result->f64 = lval->f64 / rval->f64; result->f64 = lval->f64 / rval->f64;
break; break;
} }
printf("\nCALCULATED EXPRESSION %lf %c %lf = %lf\n", lval->f64, //fprintf(stderr, "\nCALCULATED EXPRESSION %lf %c %lf = %lf\n", lval->f64,
op->op[0], rval->f64, result->f64); // op->start[0], rval->f64, result->f64);
} }
return result; return result;
} }
static Value* parse_expr(Error* err, Parser* p)
static inline int8_t operator_precedence(Token* op)
{ {
Value* lval = parse_number(err, p->cur); // wrapped in a function for now because operators longer than 1 char might
if (!error_empty(err) || !lval) { // be implemented
goto generic_error; static const int8_t lookup[256] = {
} ['+'] = -20, ['-'] = -20,
['*'] = -10, ['/'] = -10,
['('] = 128, [')'] = 128
};
return lookup[(size_t)(op->start[0])];
}
if (!error_empty(err)) { static Value* parse_expr(Error* err, ParserState* p)
goto generic_error; {
} else if (p->next->type != TOKEN_OPERATOR) { FixedStack op_stack = STACK_INIT;
FixedStack value_stack = STACK_INIT;
fprintf(stderr, "PRATT START\n");
while (1) {
token_print(NULL, p->cur);
switch (p->cur->type) {
case TOKEN_INTEGER:
stack_push(&value_stack, parse_int(err, p));
if (!error_empty(err))
goto fail;
break;
case TOKEN_FLOATING:
stack_push(&value_stack, parse_floating(err, p));
if (!error_empty(err))
goto fail;
break;
case TOKEN_IDENTIFIER:
fprintf(stderr, "identifiers not implemented yet\n");
exit(FATAL_NOT_IMPLEMENTED);
break;
case TOKEN_PAREN_OPEN:
stack_push(&op_stack, p->cur);
parser_advance(err, p); parser_advance(err, p);
if (!error_empty(err)) { if (!error_empty(err))
goto syntax_error; goto fail;
} break;
return lval;
}
Value* op = parse_operator(err, p->next); case TOKEN_PAREN_CLOSE:
if (!error_empty(err)) { while (!stack_empty(&op_stack) && ((Token*)stack_top(&op_stack))->type != TOKEN_PAREN_OPEN) {
goto generic_error; Value* rval = stack_pop(&value_stack);
Value* lval = stack_pop(&value_stack);
Token* op = stack_pop(&op_stack);
Value* result = binary_op(err, lval, rval, op);
if (!error_empty(err))
goto fail;
stack_push(&value_stack, result);
}
if (((Token*)stack_top(&op_stack))->type != TOKEN_PAREN_OPEN) {
error_push(err, "%s: mismatched parentheses", __func__);
return NULL;
} else {
stack_pop(&op_stack);
} }
parser_advance(err, p); parser_advance(err, p);
if (!error_empty(err)) { if (!error_empty(err))
goto generic_error; goto fail;
break;
case TOKEN_OPERATOR: {
Token* new_op = p->cur;
if (!stack_empty(&op_stack)) {
while (operator_precedence(new_op) < operator_precedence(stack_top(&op_stack))) {
Value* rval = stack_pop(&value_stack);
Value* lval = stack_pop(&value_stack);
Token* op = stack_pop(&op_stack);
Value* result = binary_op(err, lval, rval, op);
if (!error_empty(err))
goto fail;
stack_push(&value_stack, result);
}
} }
parser_advance(err, p); parser_advance(err, p);
if (!error_empty(err)) { if (!error_empty(err))
goto generic_error; goto fail;
} stack_push(&op_stack, new_op);
break;}
if (!error_empty(err)) { default:
goto generic_error; goto end;
} else if (p->cur->type != TOKEN_INTEGER
&& p->cur->type != TOKEN_FLOATING)
{
goto syntax_error;
} }
Value* rval = parse_expr(err, p);
if (!error_empty(err)) {
goto generic_error;
} }
end:
while (!stack_empty(&value_stack) && !stack_empty(&op_stack)) {
Token* op = stack_pop(&op_stack);
Value* rval = stack_pop(&value_stack);
Value* lval = stack_pop(&value_stack);
Value* result = binary_op(err, lval, rval, op);
if (!error_empty(err))
goto fail;
stack_push(&value_stack, result);
}
if (stack_len(&value_stack) != 1 && stack_len(&op_stack) != 0) {
error_push(err, "(%s) bad expression", __func__);
goto fail;
}
fprintf(stderr, "PRATT END\n");
return stack_top(&value_stack);
Value* result = parse_binary_expr(err, lval, rval, op); fail:
if (!error_empty(err)) {
goto generic_error;
}
return result;
syntax_error:
error_push(err, "%s: syntax error, expected binary expression", __func__);
generic_error:
return NULL; return NULL;
} }
static Value* parser_next(Error* err, Parser* p) static Value* parser_next(Error* err, ParserState* p)
{ {
if (p->cur->type == EOF || !error_empty(err)) { if (p->cur->type == TOKEN_EOF || !error_empty(err)) {
return NULL; return NULL;
} }
@@ -302,20 +380,28 @@ static Value* parser_next(Error* err, Parser* p)
switch (p->cur->type) { switch (p->cur->type) {
case TOKEN_INTEGER: case TOKEN_INTEGER:
case TOKEN_FLOATING: case TOKEN_FLOATING:
{
result = parse_expr(err, p); result = parse_expr(err, p);
if (!error_empty(err)) { if (!error_empty(err) || result == NULL) {
goto syntax_error; goto syntax_error;
} }
fprintf(stderr, "result: ");
value_print(stderr, result);
fprintf(stderr, "\n");
break; break;
}
default: default: syntax_error:
syntax_error: error_push(err, "(%s) syntax error: unexpected token %s (%s)", __func__,
{ token_type_str[p->cur->type], token_str(p->cur));
error_push(err, "(%s) syntax error: unexpected token %s", __func__, token_type_str[p->cur->type]);
return NULL; return NULL;
} }
if (p->cur->type != TOKEN_STATEMENT_END) {
error_push(err, "(%s) expected semicolon", __func__);
return NULL;
} }
parser_advance(err, p);
return result; return result;
} }
@@ -330,12 +416,6 @@ int main(int argc, char** argv)
return EXIT_FAILURE; return EXIT_FAILURE;
} }
// create a protected page for debugging purposes
size_t pagesize = sysconf(_SC_PAGESIZE);
void* protected_page = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
mfile_overflow_slope = protected_page + 0;
mprotect(protected_page, pagesize, PROT_NONE);
Error err = ERROR_INIT; Error err = ERROR_INIT;
Mfile* m = mfile_open(&err, argv[1]); Mfile* m = mfile_open(&err, argv[1]);
if (!error_empty(&err)) { if (!error_empty(&err)) {
@@ -344,7 +424,7 @@ int main(int argc, char** argv)
return EXIT_FAILURE; return EXIT_FAILURE;
} }
Parser p = { ParserState p = {
.cur = NULL, .cur = NULL,
.next = NULL, .next = NULL,
.m = m, .m = m,
@@ -356,6 +436,7 @@ int main(int argc, char** argv)
parser_next(&err, &p); parser_next(&err, &p);
if (!error_empty(&err)) { if (!error_empty(&err)) {
error_print(&err); error_print(&err);
parser_print_position(&p);
return EXIT_FAILURE; return EXIT_FAILURE;
} }
} }

44
src/stack.h Normal file
View File

@@ -0,0 +1,44 @@
#pragma once
#include <stdlib.h>
#include <stdio.h>
#define STACK_MAX 256
typedef struct fixed_stack {
void* vals[STACK_MAX];
size_t top;
} FixedStack;
#define STACK_INIT { 0 }
#define stack_push(s, v) stack_push_((s), (void*)(v))
static inline void stack_push_(FixedStack* s, void* val)
{
if (s->top >= STACK_MAX) {
fprintf(stderr, "static stack capacity exceeded");
exit(EXIT_FAILURE);
}
s->vals[s->top++] = val;
}
static inline void* stack_top(FixedStack* s)
{
return s->vals[s->top - 1];
}
static inline void* stack_pop(FixedStack* s)
{
return s->vals[--s->top];
}
static inline bool stack_empty(FixedStack* s)
{
return s->top == 0;
}
static inline size_t stack_len(FixedStack* s)
{
return s->top;
}

View File

@@ -1,4 +1,2 @@
1+2 1+2*2;
1 + 4 + 3 2 * (3+4);
5 / 2
1.3 * 2

View File

@@ -24,27 +24,29 @@ static const bool is_operator[256] = {
static void token_read_operator(Error* err, Mfile* m, Token* t) static void token_read_operator(Error* err, Mfile* m, Token* t)
{ {
(void)err;
t->type = TOKEN_OPERATOR; t->type = TOKEN_OPERATOR;
t->start = mfile_cur(m); t->start = mfile_cur(m);
while (!mfile_eof(m) && is_operator[(unsigned char)mfile_get(m)]) while (!mfile_eof(m) && is_operator[(unsigned char)mfile_get(m)])
/* NOOP */; /* NOOP */;
m->pos -= 1; mfile_decr_pos(m);
t->end = mfile_cur(m); t->end = mfile_cur(m);
} }
static void token_read_number(Error* err, Mfile* m, Token* t) static void token_read_number(Error* err, Mfile* m, Token* t)
{ {
(void)err;
t->start = mfile_cur(m); t->start = mfile_cur(m);
if (mfile_curchar(m) == '-') { if (mfile_curchar(m) == '-') {
m->pos += 1; mfile_inc_pos(m);
} }
mfile_skip(m, isdigit); mfile_skip(m, isdigit);
if (mfile_curchar(m) == '.') { if (mfile_curchar(m) == '.') {
t->type = TOKEN_FLOATING; t->type = TOKEN_FLOATING;
m->pos += 1; mfile_inc_pos(m);
mfile_skip(m, isdigit); mfile_skip(m, isdigit);
} else { } else {
t->type = TOKEN_INTEGER; t->type = TOKEN_INTEGER;
@@ -82,7 +84,7 @@ static void token_read_string(Error* err, Mfile* m, Token* t)
t->type = TOKEN_STRING; t->type = TOKEN_STRING;
t->start = mfile_cur(m); t->start = mfile_cur(m);
m->pos += 1; mfile_inc_pos(m);
bool escaped = false; bool escaped = false;
while (!mfile_eof(m) && (mfile_curchar(m) != '"' || escaped)) { while (!mfile_eof(m) && (mfile_curchar(m) != '"' || escaped)) {
escaped = mfile_get(m) == '\\'; escaped = mfile_get(m) == '\\';
@@ -91,7 +93,7 @@ static void token_read_string(Error* err, Mfile* m, Token* t)
error_push(err, "%s: expected '\"', got %c", __func__, PRINTABLE(*(mfile_cur(m)))); error_push(err, "%s: expected '\"', got %c", __func__, PRINTABLE(*(mfile_cur(m))));
return; return;
} }
m->pos += 1; mfile_inc_pos(m);
t->end = mfile_cur(m); t->end = mfile_cur(m);
} }
@@ -102,20 +104,14 @@ static void token_read_identifier(Error* err, Mfile* m, Token* t)
t->start = mfile_cur(m); t->start = mfile_cur(m);
if (!isalpha(*(t->start))) { if (!isalpha(*(t->start))) {
error_push(err, "%s, expected identifier, got %c", __func__, PRINTABLE(*(t->start))); error_push(err, "(%s), expected alphanumeric character, got %c",
__func__, PRINTABLE(*(t->start)));
return; return;
} }
mfile_skip(m, isalnum); mfile_skip(m, isalnum);
t->end = mfile_cur(m); t->end = mfile_cur(m);
return;
}
static void token_eval_identifier(Token* t)
{
} }
Token* token_read(Error* err, Mfile* m) Token* token_read(Error* err, Mfile* m)
@@ -132,12 +128,27 @@ Token* token_read(Error* err, Mfile* m)
token_read_identifier(err, m, t); token_read_identifier(err, m, t);
} else if (c == '"') { } else if (c == '"') {
token_read_string(err, m, t); token_read_string(err, m, t);
} else if (isdigit(c) || c == '-') { } else if (c == ';') {
t->type = TOKEN_STATEMENT_END;
t->start = mfile_cur(m);
mfile_inc_pos(m);
t->end = mfile_cur(m);
} else if (c == '(') {
t->type = TOKEN_PAREN_OPEN;
t->start = mfile_cur(m);
mfile_inc_pos(m);
t->end = mfile_cur(m);
} else if (c == ')') {
t->type = TOKEN_PAREN_CLOSE;
t->start = mfile_cur(m);
mfile_inc_pos(m);
t->end = mfile_cur(m);
} else if (isdigit(c)) { // signs are handled by the parser grammar
token_read_number(err, m, t); token_read_number(err, m, t);
} else if (is_operator[c]) { } else if (is_operator[c]) {
token_read_operator(err, m, t); token_read_operator(err, m, t);
} else if ( c == EOF ) { } else if ( c == EOF ) {
t->type = EOF; t->type = TOKEN_EOF;
} else { } else {
error_push(err, "unexpected character: %s (0x%02x)", PRINTABLE(c), c); error_push(err, "unexpected character: %s (0x%02x)", PRINTABLE(c), c);
} }
@@ -149,7 +160,7 @@ void token_print(Error* err, Token* t)
{ {
int ok; int ok;
char* start = t->start; char* start = t->start;
ok = fprintf(stderr, "{\n\t\""); ok = fprintf(stderr, "[%s:%ld \"", token_type_str[t->type], t->type);
if (ok < 0) { if (ok < 0) {
error_push(err, "failed to print token: %s", strerror(errno)); error_push(err, "failed to print token: %s", strerror(errno));
return; return;
@@ -162,10 +173,14 @@ void token_print(Error* err, Token* t)
} }
start += 1; start += 1;
} }
ok = fprintf(stderr, "\"\n\ttype: %s\n}\n", token_type_str[t->type]); fprintf(stderr, "\"]\n");
if (ok < 0) { }
error_push(err, "failed to print token: %s", strerror(errno));
return; char* token_str(Token* t)
} {
static __thread char buf[256];
memcpy(buf, t->start, t->end - t->start);
buf[t->start - t->end] = '\0';
return buf;
} }

View File

@@ -6,24 +6,30 @@
#include <stdint.h> #include <stdint.h>
enum token_type { enum token_type {
TOKEN_IDENTIFIER = 0, // [a-zA-Z][a-zA-Z0-9_]* TOKEN_IDENTIFIER, // [a-zA-Z][a-zA-Z0-9_]*
TOKEN_STRING = 1, // "[^"]*" TOKEN_STRING, // "[^"]*"
TOKEN_INTEGER = 2, // [0-9]+ TOKEN_INTEGER, // [0-9]+
TOKEN_FLOATING = 3, // [0-9]+\.[0-9]* TOKEN_FLOATING, // [0-9]+\.[0-9]*
TOKEN_OPERATOR = 4, // + - * / TOKEN_OPERATOR, // '+' | '-' | '*' | '/'
TOKEN_EOF = 5, TOKEN_STATEMENT_END, // ';'
TOKEN_UNKNOWN = 6, TOKEN_PAREN_OPEN,
TOKEN_TYPE_COUNT = 7 TOKEN_PAREN_CLOSE,
TOKEN_EOF,
TOKEN_UNKNOWN,
TOKEN_TYPE_COUNT
}; };
static const char* token_type_str[TOKEN_TYPE_COUNT] = { static const char* token_type_str[TOKEN_TYPE_COUNT] = {
"TOKEN_IDENTIFIER", [TOKEN_IDENTIFIER] = "TOKEN_IDENTIFIER",
"TOKEN_STRING", [TOKEN_STRING] = "TOKEN_STRING",
"TOKEN_INTEGER", [TOKEN_INTEGER] = "TOKEN_INTEGER",
"TOKEN_FLOATING", [TOKEN_FLOATING] = "TOKEN_FLOATING",
"TOKEN_OPERATOR", [TOKEN_OPERATOR] = "TOKEN_OPERATOR",
"TOKEN_EOF", [TOKEN_PAREN_OPEN] = "TOKEN_PAREN_OPEN",
"TOKEN_UNKNOWN", [TOKEN_PAREN_CLOSE] = "TOKEN_PAREN_CLOSE",
[TOKEN_STATEMENT_END] = "TOKEN_STATEMENT_END",
[TOKEN_EOF] = "TOKEN_EOF",
[TOKEN_UNKNOWN] = "TOKEN_UNKNOWN",
}; };
typedef struct token { typedef struct token {
@@ -42,4 +48,5 @@ void token_print(Error* err, Token* t);
int64_t token_eval_int(Error* err, Token* t); int64_t token_eval_int(Error* err, Token* t);
double token_eval_float(Error* err, Token* t); double token_eval_float(Error* err, Token* t);
char* token_str(Token* t);