Files
tokenizer/dare.c
2025-11-06 07:20:31 +01:00

193 lines
3.6 KiB
C

#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <ctype.h>
#include <assert.h>
#include <stdbool.h>
#include "os.h"
#include "s8slice.h"
typedef struct Parser {
S8Slice file;
int64_t cursor;
} Parser;
typedef struct Token {
enum {
TOKEN_UNDEFINED,
TOKEN_INT,
TOKEN_IDENTIFIER,
TOKEN_STRING,
TOKEN_SYNTAX_ERROR,
TOKEN_EOF,
} kind;
union {
S8Slice identifier;
S8Slice string;
int integer;
};
} Token;
Parser parser_attach(S8Slice file)
{
return (Parser){.file = file, .cursor = 0};
}
static int parser_peek(Parser* p)
{
const int ch = p->cursor >= p->file.len
? EOF
: (int)p->file.data[p->cursor];
return ch;
}
static int parser_getch(Parser* p)
{
const int ch = p->cursor >= p->file.len
? EOF
: (int)p->file.data[p->cursor];
p->cursor += 1;
if (ch == '\n' && parser_peek(p) == '\r') {
p->cursor += 1;
}
return ch;
}
static void parser_ungetch(Parser* p)
{
if (p->cursor > 0) {
p->cursor -= 1;
}
}
static void parser_discard(Parser* p, int (*f) (int))
{
int ch;
while (ch = parser_getch(p), f(ch))
/* noop */;
if (ch != EOF)
parser_ungetch(p);
}
static Token read_integer(Parser* p)
{
/* FIXME: add support for 0x prefixes */
/* the first char should be guaranteed to be isdigit */
assert(isdigit(parser_peek(p)));
int ch = EOF;
int n = 0;
while (ch = parser_getch(p), isdigit(ch)) {
n *= 10;
n += ch - '0';
}
parser_ungetch(p);
Token t = {.kind = TOKEN_INT, .integer = n};
return t;
}
static Token read_identifier(Parser* p)
{
int ch = EOF;
/* the first char should be guaranteed to be isalpha */
assert(isalpha(parser_peek(p)));
int64_t begin = p->cursor;
while (ch = parser_getch(p), isalnum(ch))
/* NOOP */;
parser_ungetch(p);
int64_t end = p->cursor;
Token t = {
.kind = TOKEN_IDENTIFIER,
.identifier = s8slice(&p->file, begin, end)
};
return t;
}
static Token read_string(Parser* p)
{
int ch = EOF;
/* the first char should be guaranteed to be '"' */
assert(parser_peek(p) == '"');
(void)parser_getch(p); /* skip quote */
int64_t begin = p->cursor;
while (ch = parser_getch(p), ch != '"' && ch != '\n' && ch != EOF)
/* NOOP */;
if (ch != '"') {
fprintf(stderr, "syntax error: expected \", found %c\n", ch);
exit(EXIT_FAILURE);
}
int64_t end = p->cursor - 1; /* subtract one to ignore end quote */
Token t = {
.kind = TOKEN_STRING,
.identifier = s8slice(&p->file, begin, end)
};
return t;
}
static Token read_token(Parser* p)
{
int ch = parser_peek(p);
Token t;
if (ch == EOF) {
t.kind = TOKEN_EOF;
} else if (isalpha(ch)) {
t = read_identifier(p);
} else if (isdigit(ch)) {
t = read_integer(p);
} else if (ch == '"') {
t = read_string(p);
}
return t;
}
int main(int argc, char** argv)
{
if (argc != 2) {
fprintf(stderr, "Usage: %s <file>\n", argv[0] ? argv[0] : "program");
exit(EXIT_FAILURE);
}
S8Slice path = s8slice_from_cstr(argv[1]);
const S8Slice f = os_open_file(path, OS_READ);
if (f.len == -1) {
fprintf(stderr, "could not open file %s: %s\n", path.data, strerror(errno));
exit(EXIT_FAILURE);
}
Parser p = parser_attach(f);
Token t = {0};
while (true) {
parser_discard(&p, isspace);
Token t = read_token(&p);
if (t.kind == TOKEN_IDENTIFIER) {
S8Slice s = t.identifier;
printf("{.len = %lld, .data = %.*s}\n", s.len, (int)s.len, s.data);
} else if (t.kind == TOKEN_INT) {
printf("%d\n", t.integer);
} else if (t.kind == TOKEN_STRING) {
S8Slice s = t.identifier;
printf("{.len = %lld, .data = %.*s}\n", s.len, (int)s.len, s.data);
} else if (t.kind == TOKEN_EOF) {
break;
}
}
return EXIT_SUCCESS;
}