Initial commit

This commit is contained in:
2024-01-04 15:13:02 +01:00
commit ddb7c33c37
13 changed files with 909 additions and 0 deletions

3
src/Makefile Normal file
View File

@@ -0,0 +1,3 @@
all:
gcc -o lang parser.c tokenizer.c error.c file_stream.c

95
src/error.c Normal file
View File

@@ -0,0 +1,95 @@
#include "error.h"
#include <ctype.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
bool error_empty(Error* err)
{
if (!err)
return false;
return err->msg == NULL;
}
void error_push(Error* err, const char* fmt, ...)
{
if (!err)
return;
#define MSG_SIZE 128
// first list element is a dummy element
if (err->msg == NULL) {
err->msg = malloc(sizeof *err->msg);
if (!(err->msg)) {
perror("malloc");
exit(EXIT_FAILURE);
}
}
struct error_msg* m = err->msg;
while (m->next) {
m = m->next;
}
m->next = malloc(sizeof *m);
if (m->next == NULL) {
perror("malloc");
exit(EXIT_FAILURE);
}
m->next->message = malloc(MSG_SIZE);
if (!(m->next->message)) {
perror("malloc");
exit(EXIT_FAILURE);
}
va_list args;
va_start(args, fmt);
vsnprintf(m->next->message, MSG_SIZE, fmt, args);
va_end(args);
#undef MSG_SIZE
}
static void error_msg_print(struct error_msg* msg, bool print_colon)
{
if (!msg)
return;
error_msg_print(msg->next, true);
fprintf(stderr, "%s", msg->message);
if (print_colon) {
fprintf(stderr, ": ");
}
}
void error_print(Error* err)
{
if (!err) {
fprintf(stderr, "(empty error)\n");
return;
}
error_msg_print(err->msg, false);
fprintf(stderr, "\n");
}
static void error_msg_free(struct error_msg* msg)
{
if (!msg)
return;
error_msg_free(msg->next);
free(msg->message);
free(msg);
msg->message = NULL;
msg->next = NULL;
}
void error_clear(Error* err)
{
if (!err)
return;
error_msg_free(err->msg);
err->msg = NULL;
}

28
src/error.h Normal file
View File

@@ -0,0 +1,28 @@
#pragma once
#include <stddef.h>
#include <stdbool.h>
struct error_msg {
struct error_msg* next;
char* message;
};
typedef struct error {
struct error_msg* msg;
} Error;
/* Returns true if there is no error */
bool error_empty(Error* err);
/* Add message to error */
void error_push(Error* err, const char* fmt, ...);
/* Print error */
void error_print(Error* err);
/* Frees the internal data structures but leaves err in a usable state */
void error_clear(Error* err);
#define ERROR_INIT {.msg = NULL}

110
src/file_stream.c Normal file
View File

@@ -0,0 +1,110 @@
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include "error.h"
#include "file_stream.h"
char* mfile_overflow_slope = NULL;
Mfile* mfile_open(Error* err, char* filename)
{
Mfile* s = malloc(sizeof *s);
if (!s) {
error_push(err, "failed to allocate file stream struct: %s", strerror(errno));
goto malloc_fail;
}
s->fd = open(filename, O_RDONLY);
if (s->fd == -1) {
error_push(err, "failed to open file %s: %s", filename, strerror(errno));
goto open_fail;
}
struct stat sb;
int ok = fstat(s->fd, &sb);
if (ok == -1) {
error_push(err, "failed to stat file %s: %s", filename, strerror(errno));
goto stat_fail;
}
s->size = sb.st_size;
s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, s->fd, 0);
if (s->data == MAP_FAILED) {
error_push(err, "failed to mmap file: %s", strerror(errno));
goto mmap_fail;
}
return s;
mmap_fail:
stat_fail:
close(s->fd);
open_fail:
free(s);
malloc_fail:
return NULL;
}
void mfile_close(Error* err, Mfile* s)
{
int ok;
ok = munmap(s->data, s->size);
if (ok == -1) {
error_push(err, "failed to munmap file: %s", strerror(errno));
free(s);
return;
}
close(s->fd);
if (ok == -1) {
error_push(err, "failed to close file: %s", strerror(errno));
free(s);
return;
}
free(s);
}
int mfile_get(Mfile* s)
{
if (s->pos >= s->size) {
return EOF;
}
return s->data[s->pos++];
}
bool mfile_eof(Mfile* s)
{
return s->pos >= s->size;
}
char* mfile_cur(Mfile* s)
{
if (s->pos >= s->size) {
return mfile_overflow_slope;
}
return s->data + s->pos;
}
int mfile_curchar(Mfile* s)
{
if (s->pos >= s->size) {
return EOF;
}
return *(s->data + s->pos);
}
void mfile_skip(Mfile* s, int (*f)(int))
{
while (f(*(s->data + s->pos)))
s->pos += 1;
}

39
src/file_stream.h Normal file
View File

@@ -0,0 +1,39 @@
#pragma once
#include <sys/stat.h>
#include <stdlib.h>
#include "error.h"
extern char* mfile_overflow_slope;
typedef struct mfile {
char* data;
off_t size;
size_t pos;
// internal
int fd;
struct stat sb;
#define mfile_size sb.st_size
} Mfile;
/* Open memory mapped file */
Mfile* mfile_open(Error* err, char* filename);
/* Close memory mapped file */
void mfile_close(Error* err, Mfile* s);
/* Get next byte, returns EOF if end of file */
int mfile_get(Mfile* s);
/* Returns true if end of file */
bool mfile_eof(Mfile* s);
/* Returns current position */
char* mfile_cur(Mfile* s);
/* Skips char until f is false */
void mfile_skip(Mfile* s, int (*f)(int));
/* Get current char */
int mfile_curchar(Mfile* s);

189
src/parser.c Normal file
View File

@@ -0,0 +1,189 @@
#include "error.h"
#include "file_stream.h"
#include "tokenizer.h"
#include "printable.h"
#include <ctype.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
/* ======= Grammar Rules =======
statement:
: statements statement
| statement;
statement
| expr
expr
: INT OPERATOR INT {$$ = binary_op_int($1, $3, $2);}
| INT OPERATOR FLOAT {conv($1); $$ = binary_op_float($1, $3, $2);}
| FLOAT OPERATOR INT {conv($2); $$ = binary_op_float($1, $3, $2);}
| FLOAT OPERATOR FLOAT {$$ = binary_op_float($1, $3, $2);}
;
============================= */
enum value_type {
VALUE_INTEGER,
VALUE_OPERATOR,
VALUE_TYPE_COUNT,
};
static const char* value_type_str[VALUE_TYPE_COUNT] = {
[VALUE_INTEGER] = "VALUE_INTEGER",
[VALUE_OPERATOR] = "VALUE_OPERATOR",
};
typedef struct value {
const char* debug_name;
enum value_type type;
union {
int64_t i;
char op[3];
};
} Value;
typedef struct symbol_table_entry {
const char* debug_name;
int id;
Value* val;
struct symbol_table_entry* next;
} Symbol_table_entry;
typedef struct symbol_table {
Symbol_table_entry* syms;
} Symbol_table;
static Value* parse_int(Error* err, Token* t)
{
if (t->type != TOKEN_INTEGER) {
error_push(err, "%s: unexpected token type: %s", __func__, token_type_str[t->type]);
return NULL;
}
Value* v = malloc(sizeof *v);
if (!v) {
error_push(err, "%s: failed to allocate value: %s", __func__, strerror(errno));
return NULL;
}
v->type = VALUE_INTEGER;
v->i = strtol(t->start, NULL, 10);
return v;
}
static Value* parse_binary_expr(Error* err, Value* lval, Value* rval, Value* op)
{
if (lval->type != VALUE_INTEGER
|| rval->type != VALUE_INTEGER
|| op->type != VALUE_OPERATOR)
{
error_push(err, "%s: unexpected token types: %s %s %s",
__func__, value_type_str[lval->type],
value_type_str[rval->type], value_type_str[op->type]);
return NULL;
}
Value* result = malloc(sizeof *result);
if (!result) {
error_push(err, "%s: failed to allocate value: %s", __func__, strerror(errno));
return NULL;
}
result->type = VALUE_INTEGER;
switch (op->op[0]) {
case '+':
result->i = lval->i + rval->i;
break;
case '*':
result->i = lval->i * rval->i;
break;
case '-':
result->i = lval->i - rval->i;
break;
case '/':
result->i = lval->i / rval->i;
break;
}
printf("\nCALCULATED EXPRESSION %ld %c %ld = %ld\n", lval->i, op->op[0], rval->i, result->i);
return result;
}
static int parser_next(Error* err, Mfile* m)
{
mfile_skip(m, isspace);
Token* t = token_read(err, m);
if (t->type == EOF) {
return EOF;
} else if (!error_empty(err)) {
error_print(err);
return EOF;
} else {
fprintf(stderr, "unknown error\n");
return EOF;
}
Value* v;
switch (t->type) {
case TOKEN_INTEGER:
v = parse_int(err, t);
if (!v) {
return EOF;
}
if (!error_empty(err)) {
error_print(err);
return EOF;
}
printf("\nINT: %ld\n", v->i);
break;
}
token_print(err, t);
return 0;
}
/* ========================================================================= */
int main(int argc, char** argv)
{
int status = EXIT_SUCCESS;
if (argc != 2) {
fprintf(stderr, "usage: %s <file>\n", argv[0]);
return EXIT_FAILURE;
}
// create a protected page for debugging purposes
size_t pagesize = sysconf(_SC_PAGESIZE);
void* protected_page = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
mfile_overflow_slope = protected_page + 0;
mprotect(protected_page, pagesize, PROT_NONE);
Error err = ERROR_INIT;
Mfile* m = mfile_open(&err, argv[1]);
if (!error_empty(&err)) {
error_push(&err, "mfile_open");
error_print(&err);
status = EXIT_FAILURE;
error_clear(&err);
}
error_clear(&err);
while (!mfile_eof(m)) {
parser_next(&err, m);
}
error_clear(&err);
mfile_close(&err, m);
if (!error_empty(&err)) {
error_push(&err, "mfile_close");
error_print(&err);
status = EXIT_FAILURE;
}
return status;
}

91
src/printable.h Normal file
View File

@@ -0,0 +1,91 @@
#pragma once
#ifdef GENERATE_TABLE
/*
Generates table below:
cc -DGENERATE_TABLE -xc printable.h -o generate_table \
&& ./generate_table >> printable.h
*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
static int escape(int c) {
return c == '"' || c == '\\';
}
int main() {
#define MAX_LINE_WIDTH 80
#define BUF_SZ 16
#define NULL_STR "\"<NULL>\", "
#define TAB " "
#define printf_assert(...) assert( printf(__VA_ARGS__) > 0)
int written = 0;
char buf[BUF_SZ];
char* fmt;
printf_assert(
"\n// generated by main() in printable.h\n"
"\nstatic const char * asdasd_printable[256] = {"
"\n" TAB NULL_STR
);
int line_len = sizeof(TAB) + sizeof(NULL_STR) - 1;
for (int c = 1; c < 255; c++) {
if (isprint(c) && escape(c)) {
fmt = "\"\\%c\", ";
} else if (isprint(c)) {
fmt = "\"%c\", ";
} else {
fmt = "\"<%02x>\", ";
}
written = snprintf(buf, BUF_SZ, fmt, c);
assert( written > 0 && written < BUF_SZ );
line_len += written;
if (line_len > MAX_LINE_WIDTH) {
printf_assert("\n" TAB);
line_len = sizeof(TAB) + written - 1;
}
printf_assert(buf);
}
printf_assert("\"<EOF>\"\n\n};");
return EXIT_SUCCESS;
}
#endif
#define PRINTABLE(ch) asdasd_printable[(unsigned char)(ch)]
// generated by main() in printable.h
static const char * asdasd_printable[256] = {
"<NULL>", "<01>", "<02>", "<03>", "<04>", "<05>", "<06>", "<07>", "<08>",
"<09>", "<0a>", "<0b>", "<0c>", "<0d>", "<0e>", "<0f>", "<10>", "<11>",
"<12>", "<13>", "<14>", "<15>", "<16>", "<17>", "<18>", "<19>", "<1a>",
"<1b>", "<1c>", "<1d>", "<1e>", "<1f>", " ", "!", "\"", "#", "$", "%", "&",
"'", "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5",
"6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@", "A", "B", "C", "D",
"E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S",
"T", "U", "V", "W", "X", "Y", "Z", "[", "\\", "]", "^", "_", "`", "a", "b",
"c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q",
"r", "s", "t", "u", "v", "w", "x", "y", "z", "{", "|", "}", "~", "<7f>",
"<80>", "<81>", "<82>", "<83>", "<84>", "<85>", "<86>", "<87>", "<88>",
"<89>", "<8a>", "<8b>", "<8c>", "<8d>", "<8e>", "<8f>", "<90>", "<91>",
"<92>", "<93>", "<94>", "<95>", "<96>", "<97>", "<98>", "<99>", "<9a>",
"<9b>", "<9c>", "<9d>", "<9e>", "<9f>", "<a0>", "<a1>", "<a2>", "<a3>",
"<a4>", "<a5>", "<a6>", "<a7>", "<a8>", "<a9>", "<aa>", "<ab>", "<ac>",
"<ad>", "<ae>", "<af>", "<b0>", "<b1>", "<b2>", "<b3>", "<b4>", "<b5>",
"<b6>", "<b7>", "<b8>", "<b9>", "<ba>", "<bb>", "<bc>", "<bd>", "<be>",
"<bf>", "<c0>", "<c1>", "<c2>", "<c3>", "<c4>", "<c5>", "<c6>", "<c7>",
"<c8>", "<c9>", "<ca>", "<cb>", "<cc>", "<cd>", "<ce>", "<cf>", "<d0>",
"<d1>", "<d2>", "<d3>", "<d4>", "<d5>", "<d6>", "<d7>", "<d8>", "<d9>",
"<da>", "<db>", "<dc>", "<dd>", "<de>", "<df>", "<e0>", "<e1>", "<e2>",
"<e3>", "<e4>", "<e5>", "<e6>", "<e7>", "<e8>", "<e9>", "<ea>", "<eb>",
"<ec>", "<ed>", "<ee>", "<ef>", "<f0>", "<f1>", "<f2>", "<f3>", "<f4>",
"<f5>", "<f6>", "<f7>", "<f8>", "<f9>", "<fa>", "<fb>", "<fc>", "<fd>",
"<fe>", "<EOF>"
};

3
src/test/test.txt Normal file
View File

@@ -0,0 +1,3 @@
The "quick" brown fox jumps at 8.42 m/h over the lazy dog
10 + 20 = 30

63
src/test/test_error.c Normal file
View File

@@ -0,0 +1,63 @@
#include "error.h"
#include <stddef.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <stdio.h>
int bad_alloc(struct error* err)
{
void* bad = malloc(9223372036854775807UL);
if (!bad) {
error_push(err, "bad_alloc failed: malloc: %s", strerror(errno));
return -1;
}
return 0;
}
int failer(struct error* err)
{
bad_alloc(err);
if (!error_empty(err)) {
error_push(err, "failer failed");
return -1;
}
return 0;
}
int winner(struct error* err)
{
return 1;
}
int main()
{
int status = EXIT_SUCCESS;
fprintf(stderr, "Running function that results in error\n");
struct error err = ERROR_INIT;
failer(&err);
if (!error_empty(&err)) {
fprintf(stderr, "OK\n");
} else {
fprintf(stderr, "function `failer` did not fail\n");
status = EXIT_FAILURE;
}
error_clear(&err);
fprintf(stderr, "Running function that should not return error\n");
winner(&err);
if (!error_empty(&err)) {
error_print(&err);
fprintf(stderr, "function `winner` failed when it's not supposed to");
status = EXIT_FAILURE;
} else {
fprintf(stderr, "OK\n");
}
error_clear(&err);
return status;
}

View File

@@ -0,0 +1,46 @@
#include "file_stream.h"
#include <string.h>
#include <stdio.h>
int main(int argc, char** argv)
{
int status = EXIT_SUCCESS;
fprintf(stderr, "attempting to open test.txt\n");
Error err = ERROR_INIT;
Mfile* m = mfile_open(&err, "test.txt");
if (!error_empty(&err)) {
error_push(&err, "mfile_open test failed");
error_print(&err);
status = EXIT_FAILURE;
} else {
fprintf(stderr, "OK\n");
}
fprintf(stderr, "attempting to read from test.txt with mfile_get(m)\n");
char str[12];
size_t n = 0;
while (!mfile_eof(m) && n < 12) {
str[n++] = mfile_get(m);
}
if (strncmp("the brown fox", str, n) != 0) {
error_push(&err, "mfile_get test failed");
error_print(&err);
status = EXIT_FAILURE;
} else {
fprintf(stderr, "OK\n");
}
fprintf(stderr, "attempting to close\n");
mfile_close(&err, m);
if (!error_empty(&err)) {
error_push(&err, "mfile_close test failed");
error_print(&err);
status = EXIT_FAILURE;
} else {
fprintf(stderr, "OK\n");
}
return status;
}

27
src/test/test_tokenizer.c Normal file
View File

@@ -0,0 +1,27 @@
#include <stdlib.h>
#include "error.h"
#include "file_stream.h"
int main(int argc, char** argv)
{
int status = EXIT_SUCCESS;
Error err = ERROR_INIT;
Mfile* m = mfile_open(&err, "test.txt");
if (!error_empty(&err)) {
error_push(&err, "mfile_open");
error_print(&err);
status = EXIT_FAILURE;
}
mfile_close(&err, m);
if (!error_empty(&err)) {
error_push(&err, "mfile_close");
error_print(&err);
status = EXIT_FAILURE;
}
return status;
}

170
src/tokenizer.c Normal file
View File

@@ -0,0 +1,170 @@
#include "error.h"
#include "file_stream.h"
#include "tokenizer.h"
#include "printable.h"
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <math.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
static const bool is_operator[256] = {
['+'] = 1, ['-'] = 1, ['*'] = 1, ['/'] = 1, ['='] = 1, ['%'] = 1,
['&'] = 1, ['|'] = 1, ['<'] = 1, ['>'] = 1, ['!'] = 1, ['^'] = 1,
['('] = 1, [')'] = 1, ['~'] = 1
};
static void token_read_operator(Error* err, Mfile* m, Token* t)
{
t->type = TOKEN_OPERATOR;
t->start = mfile_cur(m);
while (!mfile_eof(m) && is_operator[(unsigned char)mfile_get(m)])
/* NOOP */;
m->pos -= 1;
t->end = mfile_cur(m);
}
static void token_read_number(Error* err, Mfile* m, Token* t)
{
t->start = mfile_cur(m);
if (mfile_curchar(m) == '-') {
m->pos += 1;
}
mfile_skip(m, isdigit);
if (mfile_curchar(m) == '.') {
t->type = TOKEN_FLOATING;
m->pos += 1;
mfile_skip(m, isdigit);
} else {
t->type = TOKEN_INTEGER;
}
t->end = mfile_cur(m);
}
int64_t token_eval_int(Error* err, Token* t)
{
char* endptr;
int64_t n = strtoll(t->start, &endptr, 10);
if (endptr != t->end) {
error_push(err, "%s: invalid integer: %s", __func__, t->start);
return 0;
}
return n;
}
double token_eval_float(Error* err, Token* t)
{
char* endptr;
double n = strtod(t->start, &endptr);
if (endptr != t->end) {
error_push(err, "%s: invalid floating point: %s", __func__, t->start);
return 0;
}
if (n == HUGE_VAL || n == -HUGE_VAL) {
error_push(err, "%s: invalid floating point: %s", __func__, t->start);
}
return n;
}
static void token_read_string(Error* err, Mfile* m, Token* t)
{
t->type = TOKEN_STRING;
t->start = mfile_cur(m);
m->pos += 1;
bool escaped = false;
while (!mfile_eof(m) && (mfile_curchar(m) != '"' || escaped)) {
escaped = mfile_get(m) == '\\';
}
if (mfile_curchar(m) != '"') {
error_push(err, "%s: expected '\"', got %c", __func__, PRINTABLE(*(mfile_cur(m))));
return;
}
m->pos += 1;
t->end = mfile_cur(m);
}
static void token_read_identifier(Error* err, Mfile* m, Token* t)
{
t->type = TOKEN_IDENTIFIER;
t->start = mfile_cur(m);
if (!isalpha(*(t->start))) {
error_push(err, "%s, expected identifier, got %c", __func__, PRINTABLE(*(t->start)));
return;
}
mfile_skip(m, isalnum);
t->end = mfile_cur(m);
return;
}
static void token_eval_identifier(Token* t)
{
}
Token* token_read(Error* err, Mfile* m)
{
Token* t = calloc(1, sizeof *t);
if (!t) {
error_push(err, "failed to allocate token: %s", strerror(errno));
return NULL;
}
const int c = mfile_curchar(m);
if (isalpha(c)) {
token_read_identifier(err, m, t);
} else if (c == '"') {
token_read_string(err, m, t);
} else if (isdigit(c) || c == '-') {
token_read_number(err, m, t);
} else if (is_operator[c]) {
token_read_operator(err, m, t);
} else if ( c == EOF ) {
t->type = EOF;
} else {
error_push(err, "unexpected character: %c", PRINTABLE(c));
}
return t;
}
void token_print(Error* err, Token* t)
{
int ok;
char* start = t->start;
ok = fprintf(stderr, "{\n\t\"");
if (ok < 0) {
error_push(err, "failed to print token: %s", strerror(errno));
return;
}
while (start < t->end) {
ok = fputc(*start, stderr);
if (ok < 0) {
error_push(err, "failed to print token: %s", strerror(errno));
return;
}
start += 1;
}
ok = fprintf(stderr, "\"\n\ttype: %s\n}\n", token_type_str[t->type]);
if (ok < 0) {
error_push(err, "failed to print token: %s", strerror(errno));
return;
}
}

45
src/tokenizer.h Normal file
View File

@@ -0,0 +1,45 @@
#pragma once
#include "file_stream.h"
#include <stdbool.h>
#include <stdint.h>
enum token_type {
TOKEN_IDENTIFIER = 0, // [a-zA-Z][a-zA-Z0-9_]*
TOKEN_STRING = 1, // "[^"]*"
TOKEN_INTEGER = 2, // [0-9]+
TOKEN_FLOATING = 3, // [0-9]+\.[0-9]*
TOKEN_OPERATOR = 4, // + - * /
TOKEN_EOF = 5,
TOKEN_UNKNOWN = 6,
TOKEN_TYPE_COUNT = 7
};
static const char* token_type_str[TOKEN_TYPE_COUNT] = {
"TOKEN_IDENTIFIER",
"TOKEN_STRING",
"TOKEN_INTEGER",
"TOKEN_FLOATING",
"TOKEN_OPERATOR",
"TOKEN_EOF",
"TOKEN_UNKNOWN",
};
typedef struct token {
char* start;
char* end;
uint64_t type;
union {
int64_t i;
double f;
} parsed;
} Token;
Token* token_read(Error* err, Mfile* m);
void token_print(Error* err, Token* t);
int64_t token_eval_int(Error* err, Token* t);
double token_eval_float(Error* err, Token* t);