diff --git a/src/error.c b/src/error.c index 827a098..c346b87 100644 --- a/src/error.c +++ b/src/error.c @@ -13,12 +13,12 @@ bool error_empty(Error* err) return err->msg == NULL; } -void error_push(Error* err, const char* fmt, ...) +void error_push_(Error* err, const char* fmt, ...) { - if (!err) - return; + if (!err) + return; - #define MSG_SIZE 128 + #define MSG_SIZE 128 // first list element is a dummy element if (err->msg == NULL) { @@ -31,37 +31,35 @@ void error_push(Error* err, const char* fmt, ...) struct error_msg* m = err->msg; - while (m->next) { - m = m->next; - } - m->next = malloc(sizeof *m); - if (m->next == NULL) { - perror("malloc"); + while (m->next) { + m = m->next; + } + m->next = malloc(sizeof *m); + if (m->next == NULL) { + perror("malloc"); exit(EXIT_FAILURE); - } - m->next->message = malloc(MSG_SIZE); - if (!(m->next->message)) { - perror("malloc"); + } + m->next->message = malloc(MSG_SIZE); + if (!(m->next->message)) { + perror("malloc"); exit(EXIT_FAILURE); - } - va_list args; - va_start(args, fmt); - vsnprintf(m->next->message, MSG_SIZE, fmt, args); - va_end(args); + } + va_list args; + va_start(args, fmt); + vsnprintf(m->next->message, MSG_SIZE, fmt, args); + va_end(args); - #undef MSG_SIZE + #undef MSG_SIZE } static void error_msg_print(struct error_msg* msg, bool print_colon) { - if (!msg) - return; + if (!msg) + return; error_msg_print(msg->next, true); - - if (msg->message) { - fprintf(stderr, "%s", msg->message); - } - + if (msg->message) { + fprintf(stderr, "%s", msg->message); + } if (print_colon) { fprintf(stderr, "\n - "); } diff --git a/src/error.h b/src/error.h index 01b9d87..93e73f4 100644 --- a/src/error.h +++ b/src/error.h @@ -5,8 +5,8 @@ #include struct error_msg { - struct error_msg* next; - char* message; + struct error_msg* next; + char* message; }; typedef struct error { @@ -17,7 +17,8 @@ typedef struct error { bool error_empty(Error* err); /* Add message to error */ -void error_push(Error* err, const char* fmt, ...); +void error_push_(Error* err, const char* fmt, ...); +#define error_push(err, fmt, args...) error_push_(err, "(%s) " fmt, __func__ __VA_OPT__(,) args) /* Print error */ void error_print(Error* err); diff --git a/src/parser.c b/src/parser.c index 7fff86f..d34a689 100644 --- a/src/parser.c +++ b/src/parser.c @@ -36,12 +36,6 @@ expr : ================================ */ -typedef struct parser { - Token* cur; - Token* next; - Mfile* m; -} TokenStream; - void parser_print_position(TokenStream* ts) { Mfile m = *(ts->m); @@ -62,18 +56,6 @@ void parser_print_position(TokenStream* ts) fprintf(stderr, "\nLine: %d\nCol: %d\n", linecount, col); } -bool tokenstream_advance(Error* err, TokenStream* ts) -{ - ts->cur = ts->next; - mfile_skip(ts->m, isspace); - ts->next = token_read(err, ts->m); - if (!error_empty(err)) { - error_push(err, "%s failed", __func__); - return false; - } - return true; -} - enum value_type { VALUE_INTEGER, VALUE_FLOATING, @@ -96,28 +78,31 @@ typedef struct value { static void value_print(FILE* out, Value* v) { switch (v->type) { - case VALUE_INTEGER: - fprintf(out, "%" PRId64, v->i64); - break; - case VALUE_FLOATING: - fprintf(out, "%lf", v->f64); - break; - default: - fprintf(out, "(bad value)"); - break; + case VALUE_INTEGER: + fprintf(out, "%" PRId64, v->i64); + break; + case VALUE_FLOATING: + fprintf(out, "%lf", v->f64); + break; + default: + fprintf(out, "(bad value)"); + break; } } static Value* parse_int(Error* err, TokenStream* ts) { - Token* t = ts->cur; + Token* t = tokenstream_get(err, ts); + if (!error_empty(err)) { + return NULL; + } if (t->type != TOKEN_INTEGER) { - error_push(err, "(%s) unexpected token type: %s", __func__, token_type_str[t->type]); + error_push(err, "unexpected token type: %s", token_type_str[t->type]); return NULL; } Value* v = calloc(1, sizeof *v); if (!v) { - error_push(err, "(%s) failed to allocate value: %s", __func__, strerror(errno)); + error_push(err, "failed to allocate value: %s", strerror(errno)); return NULL; } v->type = VALUE_INTEGER; @@ -125,23 +110,24 @@ static Value* parse_int(Error* err, TokenStream* ts) errno = 0; v->i64= strtol(t->start, NULL, 10); if (errno != 0) { - error_push(err, "(%s) failed to parse int: %s", __func__, strerror(errno)); + error_push(err, "failed to parse int: %s", strerror(errno)); return NULL; } - tokenstream_advance(err, ts); return v; } static Value* parse_floating(Error* err, TokenStream* ts) { - Token* t = ts->cur; + Token* t = tokenstream_get(err, ts); + if (!error_empty(err)) + return NULL; if (t->type != TOKEN_FLOATING) { - error_push(err, "(%s) unexpected token type: %s", __func__, token_type_str[t->type]); + error_push(err, "unexpected token type: %s", token_type_str[t->type]); return NULL; } Value* v = calloc(1, sizeof *v); if (!v) { - error_push(err, "(%s) failed to allocate value: %s", __func__, strerror(errno)); + error_push(err, "failed to allocate value: %s", strerror(errno)); return NULL; } v->type = VALUE_FLOATING; @@ -149,12 +135,7 @@ static Value* parse_floating(Error* err, TokenStream* ts) errno = 0; v->f64= strtod(t->start, NULL); if (errno != 0) { - error_push(err, "(%s) failed to parse float: %s", __func__, strerror(errno)); - return NULL; - } - tokenstream_advance(err, ts); - if (!error_empty(err)) { - error_push(err, "(%s) couldn't advance parser", __func__); + error_push(err, "failed to parse float: %s", strerror(errno)); return NULL; } return v; @@ -163,10 +144,9 @@ static Value* parse_floating(Error* err, TokenStream* ts) static void conv_int_to_float(Error* err, Value* val) { if (val->type != VALUE_INTEGER) { - error_push(err, "(%s) conversion from %s to float not implemented", __func__, value_type_str[val->type]); + error_push(err, "conversion from %s to float not implemented", value_type_str[val->type]); return; } - fprintf(stderr, "converting %ld to %lf", val->i64, (double)val->i64); val->f64 = (double)val->i64; val->type = VALUE_FLOATING; } @@ -177,11 +157,9 @@ static Value* binary_op(Error* err, Value* lval, Value* rval, Token* op) || (rval->type != VALUE_INTEGER && rval->type != VALUE_FLOATING) || op->type != TOKEN_OPERATOR) { - error_push(err, "%s: unexpected token types: %s %s %s", - __func__, value_type_str[lval->type], - value_type_str[rval->type], token_type_str[op->type]); - fprintf(stderr, "\n####\n"); - token_print(err, op); + error_push(err, "unexpected token types: %s %s %s", + value_type_str[lval->type], value_type_str[rval->type], + token_type_str[op->type]); goto fail; } @@ -197,7 +175,7 @@ static Value* binary_op(Error* err, Value* lval, Value* rval, Token* op) Value* result = calloc(1, sizeof *result); if (!result) { - error_push(err, "%s: failed to allocate value: %s", __func__, strerror(errno)); + error_push(err, "failed to allocate value: %s", strerror(errno)); goto fail; } @@ -235,11 +213,11 @@ static Value* binary_op(Error* err, Value* lval, Value* rval, Token* op) break; } } - free(op); + free(op); return result; fail: - return NULL; + return NULL; } static inline int8_t operator_precedence(Token* op) @@ -259,11 +237,16 @@ static Value* parse_expr(Error* err, TokenStream* ts) FixedStack op_stack = STACK_INIT; FixedStack value_stack = STACK_INIT; - fprintf(stderr, "PRATT START\n"); + fprintf(stderr, "EXPR START\n"); while (1) { - token_print(NULL, ts->cur); - switch (ts->cur->type) { + Token* cur = tokenstream_cur(ts); + token_print(err, cur); + if (!error_empty(err)) { + fprintf(stderr, "failed to print error"); + exit(1); + } + switch (cur->type) { case TOKEN_INTEGER: stack_push(&value_stack, parse_int(err, ts)); if (!error_empty(err)) @@ -282,14 +265,15 @@ static Value* parse_expr(Error* err, TokenStream* ts) break; case TOKEN_PAREN_OPEN: - stack_push(&op_stack, ts->cur); - tokenstream_advance(err, ts); + stack_push(&op_stack, tokenstream_get(err, ts)); if (!error_empty(err)) goto fail; break; case TOKEN_PAREN_CLOSE: - while (!stack_empty(&op_stack) && ((Token*)stack_top(&op_stack))->type != TOKEN_PAREN_OPEN) { + while (!stack_empty(&op_stack) + && ((Token*)stack_top(&op_stack))->type != TOKEN_PAREN_OPEN) + { Value* rval = stack_pop(&value_stack); Value* lval = stack_pop(&value_stack); Token* op = stack_pop(&op_stack); @@ -299,7 +283,7 @@ static Value* parse_expr(Error* err, TokenStream* ts) stack_push(&value_stack, result); } if (((Token*)stack_top(&op_stack))->type != TOKEN_PAREN_OPEN) { - error_push(err, "%s: mismatched parentheses", __func__); + error_push(err, "mismatched parentheses"); return NULL; } else { stack_pop(&op_stack); @@ -311,9 +295,13 @@ static Value* parse_expr(Error* err, TokenStream* ts) break; case TOKEN_OPERATOR: { - Token* new_op = ts->cur; + Token* new_op = tokenstream_get(err, ts); + if (!error_empty(err)) + goto fail; if (!stack_empty(&op_stack)) { - while (operator_precedence(new_op) < operator_precedence(stack_top(&op_stack))) { + while (operator_precedence(new_op) + < operator_precedence(stack_top(&op_stack))) + { Value* rval = stack_pop(&value_stack); Value* lval = stack_pop(&value_stack); Token* op = stack_pop(&op_stack); @@ -323,9 +311,6 @@ static Value* parse_expr(Error* err, TokenStream* ts) stack_push(&value_stack, result); } } - tokenstream_advance(err, ts); - if (!error_empty(err)) - goto fail; stack_push(&op_stack, new_op); break;} @@ -344,26 +329,28 @@ end: stack_push(&value_stack, result); } if (stack_len(&value_stack) != 1 && stack_len(&op_stack) != 0) { - error_push(err, "(%s) bad expression", __func__); + error_push(err, "bad expression"); goto fail; } - fprintf(stderr, "PRATT END\n"); + fprintf(stderr, "EXPR END\n"); return stack_top(&value_stack); fail: return NULL; } -static Value* parser_next(Error* err, TokenStream* ts) +static Value* parse_statement(Error* err, TokenStream* ts) { - if (ts->cur->type == TOKEN_EOF || !error_empty(err)) { + if (tokenstream_cur(ts)->type == TOKEN_EOF || !error_empty(err)) { return NULL; } Value* result; - switch (ts->cur->type) { + Token* t = tokenstream_cur(ts); + switch (t->type) { case TOKEN_INTEGER: case TOKEN_FLOATING: + case TOKEN_IDENTIFIER: result = parse_expr(err, ts); if (!error_empty(err) || result == NULL) { goto syntax_error; @@ -371,17 +358,22 @@ static Value* parser_next(Error* err, TokenStream* ts) fprintf(stderr, "result: "); value_print(stderr, result); fprintf(stderr, "\n"); - + break; + + case TOKEN_IF: + fprintf(stderr, "if statements not implemented"); + exit(FATAL_NOT_IMPLEMENTED); break; default: syntax_error: - error_push(err, "(%s) syntax error: unexpected token %s (%s)", __func__, - token_type_str[ts->cur->type], token_str(ts->cur)); + error_push(err, "syntax error: unexpected token %s (%s)", + token_type_str[tokenstream_cur(ts)->type], + token_str(tokenstream_cur(ts))); return NULL; } - if (ts->cur->type != TOKEN_STATEMENT_END) { - error_push(err, "(%s) expected semicolon", __func__); + if (tokenstream_cur(ts)->type != TOKEN_STATEMENT_END) { + error_push(err, "expected semicolon"); return NULL; } tokenstream_advance(err, ts); @@ -408,16 +400,15 @@ int main(int argc, char** argv) return EXIT_FAILURE; } - TokenStream ts = { - .cur = NULL, - .next = NULL, - .m = m, - }; - tokenstream_advance(&err, &ts); - tokenstream_advance(&err, &ts); + TokenStream ts = tokenstream_attach(&err, m); + if (!error_empty(&err)) { + error_push(&err, "tokenstream_attach"); + error_print(&err); + return EXIT_FAILURE; + } while (!mfile_eof(m)) { - parser_next(&err, &ts); + parse_statement(&err, &ts); if (!error_empty(&err)) { error_print(&err); parser_print_position(&ts); @@ -425,7 +416,6 @@ int main(int argc, char** argv) } } - mfile_close(&err, m); if (!error_empty(&err)) { error_push(&err, "mfile_close"); diff --git a/src/test/expressions.txt b/src/test/expressions.txt index 147203c..e3414b5 100644 --- a/src/test/expressions.txt +++ b/src/test/expressions.txt @@ -1,2 +1,4 @@ 1+2*2; 2 * (3+4); +2.4 + 1 * 2; +if 1; diff --git a/src/tokenizer.c b/src/tokenizer.c index df48d77..39375c2 100644 --- a/src/tokenizer.c +++ b/src/tokenizer.c @@ -4,6 +4,7 @@ #include "tokenizer.h" #include "printable.h" +#include #include #include #include @@ -38,12 +39,10 @@ static void token_read_number(Error* err, Mfile* m, Token* t) (void)err; t->start = mfile_cur(m); - if (mfile_curchar(m) == '-') { + if (mfile_curchar(m) == '-') { mfile_inc_pos(m); - } - + } mfile_skip(m, isdigit); - if (mfile_curchar(m) == '.') { t->type = TOKEN_FLOATING; mfile_inc_pos(m); @@ -54,31 +53,6 @@ static void token_read_number(Error* err, Mfile* m, Token* t) t->end = mfile_cur(m); } -int64_t token_eval_int(Error* err, Token* t) -{ - char* endptr; - int64_t n = strtoll(t->start, &endptr, 10); - if (endptr != t->end) { - error_push(err, "%s: invalid integer: %s", __func__, t->start); - return 0; - } - return n; -} - -double token_eval_float(Error* err, Token* t) -{ - char* endptr; - double n = strtod(t->start, &endptr); - if (endptr != t->end) { - error_push(err, "%s: invalid floating point: %s", __func__, t->start); - return 0; - } - if (n == HUGE_VAL || n == -HUGE_VAL) { - error_push(err, "%s: invalid floating point: %s", __func__, t->start); - } - return n; -} - static void token_read_string(Error* err, Mfile* m, Token* t) { t->type = TOKEN_STRING; @@ -90,7 +64,7 @@ static void token_read_string(Error* err, Mfile* m, Token* t) escaped = mfile_get(m) == '\\'; } if (mfile_curchar(m) != '"') { - error_push(err, "%s: expected '\"', got %c", __func__, PRINTABLE(*(mfile_cur(m)))); + error_push(err, "expected '\"', got %c", PRINTABLE(*(mfile_cur(m)))); return; } mfile_inc_pos(m); @@ -98,20 +72,29 @@ static void token_read_string(Error* err, Mfile* m, Token* t) t->end = mfile_cur(m); } -static void token_read_identifier(Error* err, Mfile* m, Token* t) +static void token_read_keyword_or_identifier(Error* err, Mfile* m, Token* t) { - t->type = TOKEN_IDENTIFIER; + (void)err; t->start = mfile_cur(m); - if (!isalpha(*(t->start))) { - error_push(err, "(%s), expected alphanumeric character, got %c", - __func__, PRINTABLE(*(t->start))); - return; - } + assert(isalpha(*(t->start))); mfile_skip(m, isalnum); t->end = mfile_cur(m); + +#define IS_KEYWORD(s) \ + (memcmp(t->start, s, \ + MIN((ssize_t)(sizeof(s)-1), (ssize_t)(t->end - t->start))) == 0) + if (IS_KEYWORD("if")) { + t->type = TOKEN_IF; + } else if (IS_KEYWORD("while")) { + fprintf(stderr, "while statements not implemented\n"); + exit(1); + } else { + t->type = TOKEN_IDENTIFIER; + } +#undef IS_KEYWORD } Token* token_read(Error* err, Mfile* m) @@ -121,11 +104,12 @@ Token* token_read(Error* err, Mfile* m) error_push(err, "failed to allocate token: %s", strerror(errno)); return NULL; } - mfile_skip(m, isspace); + + mfile_skip(m, isspace); const int c = mfile_curchar(m); if (isalpha(c)) { - token_read_identifier(err, m, t); + token_read_keyword_or_identifier(err, m, t); } else if (c == '"') { token_read_string(err, m, t); } else if (c == ';') { @@ -143,12 +127,12 @@ Token* token_read(Error* err, Mfile* m) t->start = mfile_cur(m); mfile_inc_pos(m); t->end = mfile_cur(m); - } else if (isdigit(c)) { // signs are handled by the parser grammar + } else if (isdigit(c)) { // signs are handled by parser.c token_read_number(err, m, t); } else if (is_operator[c]) { token_read_operator(err, m, t); - } else if ( c == EOF ) { - t->type = TOKEN_EOF; + } else if (c == EOF) { + t->type = TOKEN_EOF; } else { error_push(err, "unexpected character: %s (0x%02x)", PRINTABLE(c), c); } @@ -158,19 +142,19 @@ Token* token_read(Error* err, Mfile* m) void token_print(Error* err, Token* t) { - int ok; + int ok; char* start = t->start; - ok = fprintf(stderr, "[%s:%ld \"", token_type_str[t->type], t->type); - if (ok < 0) { - error_push(err, "failed to print token: %s", strerror(errno)); - return; - } + ok = fprintf(stderr, "[%s:%d \"", token_type_str[t->type], t->type); + if (ok < 0) { + error_push(err, "failed to print token: %s", strerror(errno)); + return; + } while (start < t->end) { - ok = fputc(*start, stderr); - if (ok < 0) { - error_push(err, "failed to print token: %s", strerror(errno)); - return; - } + ok = fputc(*start, stderr); + if (ok < 0) { + error_push(err, "failed to print token: %s", strerror(errno)); + return; + } start += 1; } fprintf(stderr, "\"]\n"); @@ -178,9 +162,37 @@ void token_print(Error* err, Token* t) char* token_str(Token* t) { - static __thread char buf[256]; + static __thread char buf[512]; memcpy(buf, t->start, t->end - t->start); buf[t->start - t->end] = '\0'; return buf; } +bool tokenstream_advance(Error* err, TokenStream* ts) +{ + ts->cur = token_read(err, ts->m); + if (!error_empty(err)) { + error_push(err, "failed"); + return false; + } + return true; +} + +TokenStream tokenstream_attach(Error* err, Mfile* m) +{ + TokenStream ts = {.cur = NULL, .m = m}; + ts.cur = token_read(err, m); + return ts; +} + +Token* tokenstream_cur(TokenStream* ts) +{ + return ts->cur; +} + +Token* tokenstream_get(Error* err, TokenStream* ts) +{ + Token* cur = tokenstream_cur(ts); + tokenstream_advance(err, ts); + return cur; +} diff --git a/src/tokenizer.h b/src/tokenizer.h index 11f385b..129b81c 100644 --- a/src/tokenizer.h +++ b/src/tokenizer.h @@ -6,47 +6,49 @@ #include enum token_type { - TOKEN_IDENTIFIER, // [a-zA-Z][a-zA-Z0-9_]* - TOKEN_STRING, // "[^"]*" - TOKEN_INTEGER, // [0-9]+ - TOKEN_FLOATING, // [0-9]+\.[0-9]* - TOKEN_OPERATOR, // '+' | '-' | '*' | '/' + TOKEN_IDENTIFIER, // [a-zA-Z][a-zA-Z0-9_]* + TOKEN_STRING, // "[^"]*" + TOKEN_INTEGER, // [0-9]+ + TOKEN_FLOATING, // [0-9]+\.[0-9]* + TOKEN_OPERATOR, // '+' | '-' | '*' | '/' TOKEN_STATEMENT_END, // ';' - TOKEN_PAREN_OPEN, + TOKEN_PAREN_OPEN, TOKEN_PAREN_CLOSE, + TOKEN_IF, TOKEN_EOF, TOKEN_UNKNOWN, TOKEN_TYPE_COUNT }; static const char* token_type_str[TOKEN_TYPE_COUNT] = { - [TOKEN_IDENTIFIER] = "TOKEN_IDENTIFIER", + [TOKEN_IDENTIFIER] = "TOKEN_IDENTIFIER", [TOKEN_STRING] = "TOKEN_STRING", [TOKEN_INTEGER] = "TOKEN_INTEGER", [TOKEN_FLOATING] = "TOKEN_FLOATING", [TOKEN_OPERATOR] = "TOKEN_OPERATOR", [TOKEN_PAREN_OPEN] = "TOKEN_PAREN_OPEN", [TOKEN_PAREN_CLOSE] = "TOKEN_PAREN_CLOSE", + [TOKEN_IF] = "TOKEN_IF", [TOKEN_STATEMENT_END] = "TOKEN_STATEMENT_END", - [TOKEN_EOF] = "TOKEN_EOF", + [TOKEN_EOF] = "TOKEN_EOF", [TOKEN_UNKNOWN] = "TOKEN_UNKNOWN", }; typedef struct token { char* start; char* end; - uint64_t type; - union { - int64_t i; - double f; - } parsed; + uint32_t type; } Token; -Token* token_read(Error* err, Mfile* m); +char* token_str(Token* t); void token_print(Error* err, Token* t); -int64_t token_eval_int(Error* err, Token* t); -double token_eval_float(Error* err, Token* t); - -char* token_str(Token* t); +typedef struct token_stream { + Token* cur; + Mfile* m; +} TokenStream; +TokenStream tokenstream_attach(Error* err, Mfile* m); +bool tokenstream_advance(Error* err, TokenStream* ts); +Token* tokenstream_cur(TokenStream* ts); +Token* tokenstream_get(Error* err, TokenStream* ts);