#include #include #include #include #include #include #include "os.h" #include "s8slice.h" typedef struct Parser { S8Slice file; int64_t cursor; } Parser; typedef struct Token { enum { TOKEN_UNDEFINED, TOKEN_INT, TOKEN_IDENTIFIER, TOKEN_STRING, TOKEN_SYNTAX_ERROR, TOKEN_EOF, } kind; union { S8Slice identifier; S8Slice string; int integer; }; } Token; Parser parser_attach(S8Slice file) { return (Parser){.file = file, .cursor = 0}; } static int parser_peek(Parser* p) { const int ch = p->cursor >= p->file.len ? EOF : (int)p->file.data[p->cursor]; return ch; } static int parser_getch(Parser* p) { const int ch = p->cursor >= p->file.len ? EOF : (int)p->file.data[p->cursor]; p->cursor += 1; if (ch == '\n' && parser_peek(p) == '\r') { p->cursor += 1; } return ch; } static void parser_ungetch(Parser* p) { if (p->cursor > 0) { p->cursor -= 1; } } static void parser_discard(Parser* p, int (*f) (int)) { int ch; while (ch = parser_getch(p), f(ch)) /* noop */; if (ch != EOF) parser_ungetch(p); } static Token read_integer(Parser* p) { /* FIXME: add support for 0x prefixes */ /* the first char should be guaranteed to be isdigit */ assert(isdigit(parser_peek(p))); int ch = EOF; int n = 0; while (ch = parser_getch(p), isdigit(ch)) { n *= 10; n += ch - '0'; } parser_ungetch(p); Token t = {.kind = TOKEN_INT, .integer = n}; return t; } static Token read_identifier(Parser* p) { int ch = EOF; /* the first char should be guaranteed to be isalpha */ assert(isalpha(parser_peek(p))); int64_t begin = p->cursor; while (ch = parser_getch(p), isalnum(ch)) /* NOOP */; parser_ungetch(p); int64_t end = p->cursor; Token t = { .kind = TOKEN_IDENTIFIER, .identifier = s8slice(&p->file, begin, end) }; return t; } static Token read_string(Parser* p) { int ch = EOF; /* the first char should be guaranteed to be '"' */ assert(parser_peek(p) == '"'); (void)parser_getch(p); /* skip quote */ int64_t begin = p->cursor; while (ch = parser_getch(p), ch != '"' && ch != '\n' && ch != EOF) /* NOOP */; if (ch != '"') { fprintf(stderr, "syntax error: expected \", found %c\n", ch); exit(EXIT_FAILURE); } int64_t end = p->cursor - 1; /* subtract one to ignore end quote */ Token t = { .kind = TOKEN_STRING, .identifier = s8slice(&p->file, begin, end) }; return t; } static Token read_token(Parser* p) { int ch = parser_peek(p); Token t; if (ch == EOF) { t.kind = TOKEN_EOF; } else if (isalpha(ch)) { t = read_identifier(p); } else if (isdigit(ch)) { t = read_integer(p); } else if (ch == '"') { t = read_string(p); } return t; } int main(int argc, char** argv) { if (argc != 2) { fprintf(stderr, "Usage: %s \n", argv[0] ? argv[0] : "program"); exit(EXIT_FAILURE); } S8Slice path = s8slice_from_cstr(argv[1]); const S8Slice f = os_open_file(path, OS_READ); if (f.len == -1) { fprintf(stderr, "could not open file %s: %s\n", path.data, strerror(errno)); exit(EXIT_FAILURE); } Parser p = parser_attach(f); Token t = {0}; while (true) { parser_discard(&p, isspace); Token t = read_token(&p); if (t.kind == TOKEN_IDENTIFIER) { S8Slice s = t.identifier; printf("{.len = %lld, .data = %.*s}\n", s.len, (int)s.len, s.data); } else if (t.kind == TOKEN_INT) { printf("%d\n", t.integer); } else if (t.kind == TOKEN_STRING) { S8Slice s = t.identifier; printf("{.len = %lld, .data = %.*s}\n", s.len, (int)s.len, s.data); } else if (t.kind == TOKEN_EOF) { break; } } return EXIT_SUCCESS; }