From 4ce1c348382c13b1e0ea0e499fe3d98e9e01e1a4 Mon Sep 17 00:00:00 2001 From: olemorud Date: Mon, 24 Apr 2023 17:13:47 +0200 Subject: [PATCH] Initial commit --- .clang-format | 3 + .gitignore | 3 + Makefile | 31 +++++ compile_flags.txt | 4 + include/json_obj.h | 22 +++ include/parse.h | 32 +++++ include/util.h | 12 ++ sample.json | 22 +++ src/json_obj.c | 106 ++++++++++++++ src/main.c | 20 +++ src/parse.c | 340 +++++++++++++++++++++++++++++++++++++++++++++ src/util.c | 58 ++++++++ 12 files changed, 653 insertions(+) create mode 100644 .clang-format create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 compile_flags.txt create mode 100644 include/json_obj.h create mode 100644 include/parse.h create mode 100644 include/util.h create mode 100644 sample.json create mode 100644 src/json_obj.c create mode 100644 src/main.c create mode 100644 src/parse.c create mode 100644 src/util.c diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..8039b57 --- /dev/null +++ b/.clang-format @@ -0,0 +1,3 @@ + +BasedOnStyle: WebKit +PointerAlignment: Left diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f35b32a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.o +obj/ +bin/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f93dd6e --- /dev/null +++ b/Makefile @@ -0,0 +1,31 @@ + +CC=gcc +CFLAGS=-ggdb -O0 +CFLAGS+=-Wextra -Wall -Wpedantic +CFLAGS+=-fsanitize=address -fsanitize=undefined +CFLAGS+=-fanalyzer +CFLAGS+=-rdynamic +CFLAGS+=-Iinclude + +LDFLAGS= +LDLIBS= + +_OBJS=main.o parse.o json_obj.o util.o +OBJS=$(patsubst %,.obj/%,$(_OBJS)) + +all: bin/parse + +bin/parse: $(OBJS) | bin + $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDLIBS) -o $@ + +.obj/main.o: src/main.c | .obj + $(CC) $(CFLAGS) $(LDFLAGS) $(LDLIBS) -c $< -o $@ + +.obj/%.o: src/%.c include/%.h | .obj + $(CC) $(CFLAGS) $(LDFLAGS) $(LDLIBS) -c $< -o $@ + +bin: + mkdir -p $@ + +.obj: + mkdir -p $@ diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..189ff3d --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,4 @@ +-Iinclude +-Wall +-Werror +-Wpedantic diff --git a/include/json_obj.h b/include/json_obj.h new file mode 100644 index 0000000..613dfee --- /dev/null +++ b/include/json_obj.h @@ -0,0 +1,22 @@ + +#ifndef _obj_H +#define _obj_H + +#include +#include + +#define OBJ_SIZE 1024 + +typedef struct obj_entry { + char const* key; + struct json_value* val; + struct obj_entry* next; +} * __p_obj_entry; + +typedef __p_obj_entry obj_t[OBJ_SIZE]; + +void* obj_at(obj_t m, char* const key); +bool obj_insert(obj_t m, char* const key, struct json_value* value); +void obj_delete(obj_t m); + +#endif diff --git a/include/parse.h b/include/parse.h new file mode 100644 index 0000000..7617b24 --- /dev/null +++ b/include/parse.h @@ -0,0 +1,32 @@ + +#ifndef _PARSE_H +#define _PARSE_H + +#include "json_obj.h" +#include +#include +#include + +enum json_type { object, + array, + string, + number, + boolean, + null }; + +struct json_value { + enum json_type type; + union { + obj_t* object; + struct json_value** array; + char* string; + bool boolean; + int64_t number; + }; +}; + +struct json_value parse_json_value(FILE* fp); + +void print_json(struct json_value val, int indent); + +#endif diff --git a/include/util.h b/include/util.h new file mode 100644 index 0000000..310b9a3 --- /dev/null +++ b/include/util.h @@ -0,0 +1,12 @@ + +#ifndef _UTIL_H +#define _UTIL_H + +#include + +void* malloc_or_die(size_t size); +void* realloc_or_die(void* ptr, size_t size); +void* calloc_or_die(size_t nmemb, size_t size); +void print_trace(); + +#endif diff --git a/sample.json b/sample.json new file mode 100644 index 0000000..eacfbf5 --- /dev/null +++ b/sample.json @@ -0,0 +1,22 @@ +{ + "glossary": { + "title": "example glossary", + "GlossDiv": { + "title": "S", + "GlossList": { + "GlossEntry": { + "ID": "SGML", + "SortAs": "SGML", + "GlossTerm": "Standard Generalized Markup Language", + "Acronym": "SGML", + "Abbrev": "ISO 8879:1986", + "GlossDef": { + "para": "A meta-markup language, used to create markup languages such as DocBook.", + "GlossSeeAlso": ["GML", "XML"] + }, + "GlossSee": "markup" + } + } + } + } +} diff --git a/src/json_obj.c b/src/json_obj.c new file mode 100644 index 0000000..b2a30a6 --- /dev/null +++ b/src/json_obj.c @@ -0,0 +1,106 @@ + +#include +#include +#include +#include +#include + +#include "json_obj.h" + +/* djb2 string hash + credits: Daniel J. Bernstein */ +size_t obj_hash(char const* str) +{ + size_t hash = 5381; + unsigned int c; + + while ((c = *str++) != '\0') + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + + return hash % OBJ_SIZE; +} + +/* Value at index `key` + + m - obj to retrieve from + key - index to read + + returns value of index if exists, or NULL + if key is not in obj +*/ +void* obj_at(obj_t m, char* const key) +{ + struct obj_entry* hit = m[obj_hash(key)]; + + /* traverse linked list to end or until key is found */ + while (hit != NULL && strcmp(hit->key, key)) + hit = hit->next; + + return hit ? hit->val : NULL; +} + +/* Insert `value` at index `key` + + m - obj to insert to + key - key to insert at + val - value to insert + val_size - size of value in bytes + + returns true if successful + returns false if key already exists */ +bool obj_insert(obj_t m, char* const key, struct json_value* value) +{ + size_t i = obj_hash(key); + struct obj_entry* cur = m[i]; + + if (value == NULL) + err(EINVAL, "value cannot be NULL"); + + if (key == NULL) + err(EINVAL, "key cannot be NULL"); + + /* traverse linked list to end or until key is found */ + while (cur != NULL) { + if (cur->key == NULL) + err(EXIT_FAILURE, "entry without key"); + + if (strncmp(cur->key, key, strlen(key)) == 0) + break; + + cur = cur->next; + } + + /* fail if key already exists */ + if (cur != NULL) + return false; + + /* populate new entry */ + cur = malloc(sizeof(struct obj_entry)); + cur->key = strdup(key); + cur->val = value; + cur->next = m[i]; + + /* insert newest entry as head */ + m[i] = cur; + + return true; +} + +/* Free memory allocated for obj */ +void obj_delete(obj_t m) +{ + for (size_t i = 0; i < OBJ_SIZE; i++) { + if (m[i] == NULL) + continue; + + struct obj_entry *e = m[i], *tmp; + + while (e != NULL) { + tmp = e; + free((char*)e->key); + free(e->val); + e = e->next; + free(tmp); + } + } +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..16efdf4 --- /dev/null +++ b/src/main.c @@ -0,0 +1,20 @@ + +#include "json_obj.h" +#include "parse.h" +#include "util.h" + +#include // atexit +#include // _exit + +int main() +{ + atexit(print_trace); + + FILE* fp = fopen("sample.json", "r"); + + volatile struct json_value x = parse_json_value(fp); + + print_json(x, 1); + + return EXIT_SUCCESS; +} diff --git a/src/parse.c b/src/parse.c new file mode 100644 index 0000000..6fe73d1 --- /dev/null +++ b/src/parse.c @@ -0,0 +1,340 @@ + +#include "parse.h" + +#include // isalpha +#include // err, warn +#include +#include // EXIT_SUCCESS, EXIT_FAILURE +#include // strdup + +#include "json_obj.h" +#include "util.h" + +#define EARLY_EOF 202 +#define MALLOC_DIE 201 +#define UNEXPECTED_CHAR 200 + +char* read_string(FILE* fp); +obj_t* read_object(FILE* fp); +void discard_whitespace(FILE* fp); +bool read_boolean(FILE* fp); +void read_null(FILE* fp); +int64_t read_number(FILE* fp); +struct json_value** read_array(FILE* fp); + +void print_object(obj_t obj, int cur_indent, int indent_amount); +void print_json_value(struct json_value val, int cur_indent, int indent_amount); +void print_array(struct json_value** arr, int cur_indent, int indent_amount); + +char* read_string(FILE* fp) +{ + int c; + size_t i = 0, result_size = 16 * sizeof(char); + char* result = malloc_or_die(result_size); + + while (true) { + if (i + 1 >= result_size) { + result_size *= 2; + result = realloc_or_die(result, result_size); + } + + switch (c = fgetc(fp)) { + default: + result[i++] = c; + break; + + case '"': + result[i++] = '\0'; + return realloc_or_die(result, i); + + case '\\': + break; + + case EOF: + err(EARLY_EOF, "(%s) unexpected EOF", __func__); + } + } +} + +void discard_whitespace(FILE* fp) +{ + int c; + + while (isspace(c = fgetc(fp))) + if (c == EOF) + err(EARLY_EOF, "(%s) unexpected EOF", __func__); + + ungetc(c, fp); +} + +obj_t* read_object(FILE* fp) +{ + obj_t* result = calloc_or_die(1, sizeof(obj_t)); + char* key; + struct json_value* val = calloc_or_die(1, sizeof(struct json_value)); + int c; + + while (true) { + discard_whitespace(fp); + + if ((c = fgetc(fp)) == EOF) + err(EARLY_EOF, "(%s) unexpected EOF", __func__); + + if (c == '"') + key = read_string(fp); + else if (c == '}') + return result; + + discard_whitespace(fp); + + if ((c = fgetc(fp)) == EOF) + err(EARLY_EOF, "(%s) unexpected EOF", __func__); + + if (c != ':') + errx(UNEXPECTED_CHAR, "(%s) expected separator (':') at index %zu", + __func__, ftell(fp)); + + discard_whitespace(fp); + + *val = parse_json_value(fp); + + bool ok = obj_insert(*result, key, val); + + if (!ok) + err(EXIT_FAILURE, "failed to insert pair (%s, %p)", key, (void*)val); + + discard_whitespace(fp); + + if ((c = fgetc(fp)) == EOF) + err(EARLY_EOF, "(%s) unexpected EOF", __func__); + + if (c == ',') + continue; + else if (c == '}') + return result; + else + errx(UNEXPECTED_CHAR, "(%s) expected ',' or '}' at index %zu", __func__, + ftell(fp)); + } + + return NULL; +} + +struct json_value** read_array(FILE* fp) +{ + int c; + size_t i = 0, output_size = 16 * sizeof(struct json_value*); + struct json_value** output = malloc_or_die(output_size); + + while (true) { + c = fgetc(fp); + + if (c == EOF) + err(EARLY_EOF, "(%s) unexpected EOF", __func__); + + if (c == ']') + break; + + if (c == ',') + continue; + + ungetc(c, fp); + + if (i > output_size) { + output_size *= 2; + output = realloc_or_die(output, output_size); + } + + output[i] = malloc_or_die(sizeof(struct json_value)); + *output[i] = parse_json_value(fp); + i++; + } + + output[i] = NULL; + + return realloc_or_die(output, i * sizeof(void*)); +} + +void read_null(FILE* fp) +{ + static const char ok[] = { 'n', 'u', 'l', 'l' }; + char buf[sizeof(ok)]; + + size_t n_read = fread(buf, sizeof(char), sizeof(ok), fp); + + if (n_read != sizeof(ok)) + err(EXIT_FAILURE, "(%s) read failure at index %zu", __func__, ftell(fp)); + + if (strncmp(buf, ok, sizeof(ok)) != 0) + errx(UNEXPECTED_CHAR, "(%s) unexpected symbol at index %zu", __func__, + ftell(fp)); +} + +bool read_boolean(FILE* fp) +{ + static const char t[] = { 't', 'r', 'u', 'e' }; + static const char f[] = { 'f', 'a', 'l', 's', 'e' }; + + char buf[sizeof(f)] = { 0 }; + + size_t n_read = fread(buf, sizeof(char), sizeof(f), fp); + + if (n_read != sizeof(f)) + exit(EXIT_FAILURE); + + if (strncmp(buf, t, sizeof(t)) == 0) + return true; + + if (strncmp(buf, f, sizeof(f)) == 0) + return false; + + errx(UNEXPECTED_CHAR, "(%s) unexpected symbol at index %zu", __func__, + ftell(fp) - n_read); +} + +// TODO: fix int overflow +int64_t read_number(FILE* fp) +{ + int c; + + int64_t sum = 0; + + do { + c = fgetc(fp); + + if (c == EOF) + err(EARLY_EOF, "(%s) unexpected EOF", __func__); + + sum *= 10; + sum += c - '0'; + } while (isdigit(c)); + + ungetc(c, fp); + + return sum; +} + +struct json_value parse_json_value(FILE* fp) +{ + discard_whitespace(fp); + int c = fgetc(fp); + struct json_value result = { 0 }; + + switch (c) { + case EOF: + err(EARLY_EOF, "(%s) unexpected EOF", __func__); + case '{': + result.type = object; + result.object = read_object(fp); + break; + case '"': + result.type = string; + result.string = read_string(fp); + break; + case '[': + result.type = array; + result.array = read_array(fp); + break; + case 't': + case 'f': + ungetc(c, fp); + result.type = boolean; + result.boolean = read_boolean(fp); + break; + case 'n': + ungetc(c, fp); + read_null(fp); + result.type = null; + result.number = 0L; + break; + default: + if (isdigit(c)) { + result.type = number; + result.number = read_number(fp); + } else { + errx(UNEXPECTED_CHAR, "(%s) unexpected symbol %c at index %zu", __func__, + c, ftell(fp) - 1); + } + } + + return result; +} + +void add_indent(int n) +{ + for (int i = 0; i < n; i++) { + putchar(' '); + } +} + +void print_object(obj_t obj, int cur_indent, int indent_amount) +{ + printf("{"); + + for (size_t i = 0; i < OBJ_SIZE; i++) { + struct obj_entry* e = obj[i]; + + if (e == NULL) + continue; + + while (e != NULL) { + putchar('\n'); + add_indent(cur_indent); + printf("\"%s\": ", e->key); + print_json_value(*(e->val), cur_indent + indent_amount, indent_amount); + putchar(','); + + e = e->next; + } + } + printf("\b"); // undo last comma + putchar('\n'); + + add_indent(cur_indent - indent_amount * 2); + printf("}"); +} + +void print_array(struct json_value** arr, int cur_indent, int indent_amount) +{ + putchar('['); + + for (size_t i = 0; arr[i] != NULL; i++) { + print_json_value(*arr[i], cur_indent + indent_amount, indent_amount); + putchar(','); + } + + putchar(']'); +} + +void print_json_value(struct json_value val, int cur_indent, + int indent_amount) +{ + switch (val.type) { + case string: + printf("\"%s\"", val.string); + break; + case number: + printf("%zu", val.number); + break; + case boolean: + printf("%s", val.boolean ? "true" : "false"); + break; + case null: + printf("null"); + break; + case object: + print_object(*val.object, cur_indent + indent_amount, indent_amount); + break; + case array: + print_array(val.array, cur_indent + indent_amount, indent_amount); + break; + default: + printf(""); + } +} + +void print_json(struct json_value val, int indent) +{ + print_json_value(val, 0, indent); + putchar('\n'); +} diff --git a/src/util.c b/src/util.c new file mode 100644 index 0000000..0dab62e --- /dev/null +++ b/src/util.c @@ -0,0 +1,58 @@ + +#include "util.h" + +#include +#include +#include +#include +#include + +void* malloc_or_die(size_t size) +{ + void* result = malloc(size); + + if (result == NULL) + err(errno, "malloc_or_die failed"); + + return result; +} + +void* realloc_or_die(void* ptr, size_t size) +{ + ptr = realloc(ptr, size); + + if (ptr == NULL) + err(errno, "realloc_or_die failed"); + + return ptr; +} + +void* calloc_or_die(size_t nmemb, size_t size) +{ + void* ptr = calloc(nmemb, size); + + if (ptr == NULL) + err(errno, "calloc_or_die failed"); + + return ptr; +} + +// from the glibc man pages +// https://www.gnu.org/software/libc/manual/html_node/Backtraces.html +void print_trace() +{ + void* array[500]; + char** strings; + int size, i; + + size = backtrace(array, 500); + strings = backtrace_symbols(array, size); + + if (strings != NULL) { + printf("Obtained %d stack frames.\n", size); + for (i = 0; i < size; i++) + printf("%s\n", strings[i]); + } + + free(strings); +}