#include #include #include #include #include #include #include "os.h" #include "s8slice.h" #if 0 #define log_trace(...) fprintf(stderr, __VA_ARGS__) #else #define log_trace(...) #endif typedef struct Parser { S8Slice file; int64_t cursor; } Parser; typedef struct Token { enum { TOKEN_UNDEFINED, TOKEN_INT, TOKEN_ID, TOKEN_STRING, TOKEN_SYNTAX_ERROR, TOKEN_EOS, TOKEN_EOF, } kind; union { S8Slice identifier; S8Slice string; int integer; char undefined; }; } Token; const char* token_kind_str[] = { [TOKEN_UNDEFINED] = "TOKEN_UNDEFINED", [TOKEN_INT] = "TOKEN_INT", [TOKEN_ID] = "TOKEN_ID", [TOKEN_STRING] = "TOKEN_STRING", [TOKEN_SYNTAX_ERROR] = "TOKEN_SYNTAX_ERROR", [TOKEN_EOS] = "TOKEN_EOS", [TOKEN_EOF] = "TOKEN_EOF", }; Parser parser_attach(S8Slice file) { return (Parser){.file = file, .cursor = 0}; } static int parser_peek(Parser* p) { const int ch = p->cursor >= p->file.len ? EOF : (int)p->file.data[p->cursor]; log_trace("peeking:\t%c\n", isprint(ch) ? ch : '?'); return ch; } static int parser_getch(Parser* p) { const int ch = p->cursor >= p->file.len ? EOF : (int)p->file.data[p->cursor]; p->cursor += 1; log_trace(stderr, "getch:\t%c\n", isprint(ch) ? ch : '?'); if (ch == '\n' && p->file.data[p->cursor] == '\r') { p->cursor += 1; } return ch; } static void parser_skip_char(Parser* p) { const int ch = parser_getch(p); log_trace(stderr, "skipping:\t%c\n", isprint(ch) ? ch : '?'); } static void parser_ungetch(Parser* p) { log_trace(stderr, "ungetch:\n"); if (p->cursor > 0) { p->cursor -= 1; } if (p->file.data[p->cursor] == '\r') { p->cursor -= 1; } } static inline void parser_discard(Parser* p, int (*f) (int)) { int ch; while (ch = parser_getch(p), f(ch)) /* noop */; if (ch != EOF) parser_ungetch(p); } static Token read_integer(Parser* p) { /* FIXME: add support for 0x prefixes */ /* the first char should be guaranteed to be isdigit */ assert(isdigit(parser_peek(p))); int ch = EOF; int n = 0; while (ch = parser_getch(p), isdigit(ch)) { n *= 10; n += ch - '0'; } parser_ungetch(p); Token t = {.kind = TOKEN_INT, .integer = n}; return t; } int is_identifier_tail(int ch) { return isalnum(ch) || ch == '_'; } static Token read_identifier(Parser* p) { int ch = EOF; /* the should be checked by the caller */ assert(isalpha(parser_peek(p))); int64_t begin = p->cursor; while (ch = parser_getch(p), is_identifier_tail(ch)) /* NOOP */; parser_ungetch(p); int64_t end = p->cursor; Token t = { .kind = TOKEN_ID, .identifier = s8slice(&p->file, begin, end) }; return t; } static Token read_string(Parser* p) { int ch = EOF; /* the first char should be guaranteed to be '"' */ assert(parser_peek(p) == '"'); (void)parser_getch(p); /* skip quote */ int64_t begin = p->cursor; while (ch = parser_getch(p), ch != '"' && ch != '\n' && ch != EOF) /* NOOP */; if (ch != '"') { fprintf(stderr, "syntax error: expected \", found %c\n", ch); exit(EXIT_FAILURE); } int64_t end = p->cursor - 1; /* subtract one to ignore end quote */ Token t = { .kind = TOKEN_STRING, .identifier = s8slice(&p->file, begin, end) }; return t; } static Token read_token(Parser* p) { int ch = parser_peek(p); Token t; if (ch == EOF) { t.kind = TOKEN_EOF; } else if (ch == '\n') { t.kind = TOKEN_EOS; parser_skip_char(p); parser_discard(p, isspace); } else if (isalpha(ch)) { t = read_identifier(p); } else if (isdigit(ch)) { t = read_integer(p); } else if (ch == '"') { t = read_string(p); } else { t.kind = TOKEN_UNDEFINED; t.undefined = ch; parser_skip_char(p); } return t; } int isspace_except_newline(int ch) { return isspace(ch) && (ch != '\n'); } int main(int argc, char** argv) { if (argc != 2) { fprintf(stderr, "Usage: %s \n", argv[0] ? argv[0] : "program"); exit(EXIT_FAILURE); } S8Slice path = s8slice_from_cstr(argv[1]); const S8Slice f = os_open_file(path, OS_READ); if (f.len == -1) { fprintf(stderr, "could not open file %s: %s\n", path.data, strerror(errno)); exit(EXIT_FAILURE); } Parser p = parser_attach(f); Token t = {0}; while (true) { parser_discard(&p, isspace_except_newline); Token t = read_token(&p); printf("%s\t", token_kind_str[t.kind]); if (t.kind == TOKEN_ID) { S8Slice s = t.identifier; printf("<%.*s>\n", (int)s.len, s.data); } else if (t.kind == TOKEN_INT) { printf("%d\n", t.integer); } else if (t.kind == TOKEN_STRING) { S8Slice s = t.identifier; printf("\"%.*s\"\n", (int)s.len, s.data); } else if (t.kind == TOKEN_EOF) { break; } else if (t.kind == TOKEN_UNDEFINED) { printf("'%c'\n", t.undefined); } else { printf("\n"); } } return EXIT_SUCCESS; }