From b3821cf8bf6b99a8d44e136981fc56f1ba4664d0 Mon Sep 17 00:00:00 2001 From: olemorud Date: Tue, 16 May 2023 02:44:49 +0200 Subject: [PATCH] Feature: Use arena-allocation for memory handling Arena allocation make deallocation of nested structed MUCH faster, and improves the spatial locality of allocations. It makes no sense to deallocate only parts of a JSON structure so arenas are a good fit here. --- .gitmodules | 3 ++ Makefile | 25 ++++++--- arena-allocator | 1 + compile_flags.txt | 1 + include/json_value.h | 9 ++-- include/parse.h | 3 +- include/util.h | 2 +- src/json_value.c | 37 +++++++------- src/main.c | 8 +-- src/parse.c | 118 +++++++++++++++++++++++-------------------- src/util.c | 14 ++--- 11 files changed, 124 insertions(+), 97 deletions(-) create mode 100644 .gitmodules create mode 160000 arena-allocator diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..eb771dc --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "arena-allocator"] + path = arena-allocator + url = https://github.com/olemorud/arena-allocator.git diff --git a/Makefile b/Makefile index 0baf282..2a1861d 100644 --- a/Makefile +++ b/Makefile @@ -10,15 +10,18 @@ COMPILER ?= gcc CC := gcc # -fsanitize={address,undefined} -CFLAGS.gcc.debug := -Og -ggdb -fanalyzer -DBACKTRACE -rdynamic -fsanitize=address -fno-omit-frame-pointer +CFLAGS.gcc.debug := -Og -ggdb -DBACKTRACE -rdynamic -fanalyzer -fsanitize=address -fno-omit-frame-pointer CFLAGS.gcc.release := -O3 -g -march=native -DNDEBUG -CFLAGS.gcc := ${CFLAGS.gcc.${BUILD}} -Iinclude -Wall -Wextra -Wpedantic -Werror -Wno-strict-aliasing -fstack-protector-all -std=gnu11 +CFLAGS.gcc := ${CFLAGS.gcc.${BUILD}} -fstack-protector-all -std=gnu11 #-Wpedantic CFLAGS.clang.debug=-O0 -g3 -DBACKTRACE -rdynamic CFLAGS.clang.release=-O3 -g -march=native -DNDEBUG -CFLAGS.clang=-Iinclude -Wall -Wextra -Wpedantic -Werror -Wno-strict-aliasing -fstack-protector-all ${CFLAGS.clang.${BUILD}} +CFLAGS.clang=-Iinclude -Wno-strict-aliasing -fstack-protector-all ${CFLAGS.clang.${BUILD}} -CFLAGS := ${CFLAGS.${COMPILER}} +CFLAGS := ${CFLAGS.${COMPILER}} \ + -Iinclude -Iarena-allocator/include \ + -std=c2x \ + -Wall -Wextra -Wpedantic -Werror LD_PRELOAD:= @@ -27,18 +30,24 @@ LD_PRELOAD:= BUILD_DIR := bin/${BUILD} OBJ_DIR := .obj/${BUILD} -_OBJS := main.o parse.o json_value.o util.o +_OBJS := main.o parse.o json_value.o util.o libarena.a OBJS := $(patsubst %,$(OBJ_DIR)/%,$(_OBJS)) - all : $(BUILD_DIR)/parse -$(BUILD_DIR)/parse : $(OBJS) | $(BUILD_DIR) Makefile +$(BUILD_DIR)/parse : $(OBJS) $(OBJ_DIR)/libarena.a | $(BUILD_DIR) Makefile $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDLIBS) -o $@ -$(OBJ_DIR)/main.o : src/main.c | $(OBJ_DIR) Makefile +$(OBJ_DIR)/main.o : src/main.c arena-allocator | $(OBJ_DIR) Makefile $(CC) $(CFLAGS) $(LDFLAGS) $(LDLIBS) -c $< -o $@ +$(OBJ_DIR)/libarena.a : arena-allocator .gitmodules $(wildcard .git/modules/arena-allocator/HEAD) | $(OBJ_DIR) Makefile + (cd arena-allocator && make static) + cp --force arena-allocator/lib/libarena.a $(OBJ_DIR)/libarena.a + +arena-allocator : .gitmodules + git submodule update $@ + $(OBJ_DIR)/%.o : src/%.c include/%.h | $(OBJ_DIR) Makefile $(CC) $(CFLAGS) $(LDFLAGS) $(LDLIBS) -c $< -o $@ diff --git a/arena-allocator b/arena-allocator new file mode 160000 index 0000000..f8e6e2e --- /dev/null +++ b/arena-allocator @@ -0,0 +1 @@ +Subproject commit f8e6e2e6d244bd1d6e4287a4b3dba89c04f61607 diff --git a/compile_flags.txt b/compile_flags.txt index 189ff3d..6e85c84 100644 --- a/compile_flags.txt +++ b/compile_flags.txt @@ -1,4 +1,5 @@ -Iinclude +-Iarena-allocator/include -Wall -Werror -Wpedantic diff --git a/include/json_value.h b/include/json_value.h index 880d2e8..d5d862c 100644 --- a/include/json_value.h +++ b/include/json_value.h @@ -2,6 +2,7 @@ #ifndef _JSON_VALUE_H #define _JSON_VALUE_H +#include "arena.h" #include "config.h" #include // bool @@ -9,7 +10,7 @@ typedef struct obj_entry { char const* key; struct json_value* val; struct obj_entry* next; -} * __p_obj_entry; +}* __p_obj_entry; typedef __p_obj_entry obj_t[OBJ_SIZE]; @@ -32,11 +33,11 @@ struct json_value { }; void* obj_at(obj_t m, char* const key); -bool obj_insert(obj_t m, char* const key, struct json_value* value); -void obj_delete(obj_t* m); +bool obj_insert(obj_t m, char* const key, struct json_value* value, arena_t* arena); +void obj_delete(obj_t* m, arena_t* arena); void print_json(struct json_value val, int indent); -void json_value_delete(struct json_value val); +void json_value_delete(struct json_value val, arena_t* arena); #endif diff --git a/include/parse.h b/include/parse.h index 68bcfba..d298c59 100644 --- a/include/parse.h +++ b/include/parse.h @@ -4,8 +4,9 @@ #include "json_value.h" +#include "arena.h" #include // FILE* -struct json_value parse_json_value(FILE* fp); +struct json_value parse_json_value(FILE* fp, arena_t* arena); #endif diff --git a/include/util.h b/include/util.h index 829f645..a4a85fa 100644 --- a/include/util.h +++ b/include/util.h @@ -10,6 +10,6 @@ __attribute__((__noreturn__)) void err_ctx(int exit_code, FILE* fp, const char* void* malloc_or_die(size_t size); void* realloc_or_die(void* ptr, size_t size); void* calloc_or_die(size_t nmemb, size_t size); -void print_trace(); +void print_trace(void); #endif diff --git a/src/json_value.c b/src/json_value.c index d091a49..f63bdfe 100644 --- a/src/json_value.c +++ b/src/json_value.c @@ -23,13 +23,11 @@ #include #include +#include "arena.h" #include "json_value.h" #include "util.h" -bool obj_insert(obj_t m, char* const key, struct json_value* value); size_t obj_hash(char const* str); -void obj_delete(obj_t* m); -void* obj_at(obj_t m, char* const key); void print_array(struct json_value** arr, int cur_indent, int indent_amount); void print_json_value(struct json_value val, int cur_indent, int indent_amount); @@ -95,7 +93,7 @@ void* obj_at(obj_t m, char* const key) returns true if successful returns false if key already exists */ -bool obj_insert(obj_t m, char* const key, struct json_value* value) +bool obj_insert(obj_t m, char* const key, struct json_value* value, arena_t* arena) { size_t i = obj_hash(key); struct obj_entry* cur = m[i]; @@ -119,7 +117,7 @@ bool obj_insert(obj_t m, char* const key, struct json_value* value) return false; /* populate new entry */ - cur = malloc_or_die(sizeof(struct obj_entry)); + cur = arena_alloc(arena, sizeof *cur); cur->key = key; cur->val = value; cur->next = m[i]; @@ -133,24 +131,24 @@ bool obj_insert(obj_t m, char* const key, struct json_value* value) /* Free memory allocated for json_value val */ -void json_value_delete(struct json_value val) +void json_value_delete(struct json_value val, arena_t* arena) { switch (val.type) { case array: for (size_t i = 0; val.array[i] != NULL; i++) { - json_value_delete(*(val.array[i])); - free(val.array[i]); + json_value_delete(*(val.array[i]), arena); + arena_free(arena, val.array[i]); } - free(val.array); + arena_free(arena, val.array); break; case object: - obj_delete(val.object); + obj_delete(val.object, arena); break; case string: - free(val.string); + arena_free(arena, val.string); break; default: @@ -159,26 +157,27 @@ void json_value_delete(struct json_value val) } /* - Free memory allocated for obj + Recursively deletes object and its children - Recursively deletes children objects + Included for completeness. To efficiently delete an object, reset + its associated arena instead. */ -void obj_delete(obj_t* m) +void obj_delete(obj_t* m, arena_t* arena) { for (size_t i = 0; i < OBJ_SIZE; i++) { struct obj_entry *e = (*m)[i], *tmp; while (e != NULL) { - json_value_delete(*(e->val)); - free((char*)e->key); - free(e->val); + json_value_delete(*(e->val), arena); + arena_free(arena, (char*)e->key); + arena_free(arena, e->val); tmp = e; e = e->next; - free(tmp); + arena_free(arena, tmp); } } - free(m); + arena_free(arena, m); } void add_indent(int n) diff --git a/src/main.c b/src/main.c index a1c6453..ee9a0c5 100644 --- a/src/main.c +++ b/src/main.c @@ -26,6 +26,8 @@ int main(int argc, char* argv[]) { + arena_t arena_default = arena_new(); + if (argc != 2) { errx(EXIT_FAILURE, "Usage: %s ", argv[0]); } @@ -34,13 +36,13 @@ int main(int argc, char* argv[]) FILE* fp = fopen(argv[1], "r"); - struct json_value x = parse_json_value(fp); + struct json_value x = parse_json_value(fp, &arena_default); print_json(x, 1); - json_value_delete(x); - fclose(fp); + arena_delete(&arena_default); + return EXIT_SUCCESS; } diff --git a/src/parse.c b/src/parse.c index b5d50e1..d582209 100644 --- a/src/parse.c +++ b/src/parse.c @@ -20,24 +20,37 @@ #include "parse.h" +#include "arena.h" #include "config.h" #include "json_value.h" #include "util.h" #include // isspace, isdigit #include // err, errx +#include // NAN #include #include // exit, EXIT_SUCCESS, EXIT_FAILURE #include // strcmp -char* read_string(FILE* fp); -obj_t* read_object(FILE* fp); -void discard_whitespace(FILE* fp); bool read_boolean(FILE* fp); -void read_null(FILE* fp); +char* read_string(FILE* fp, arena_t* arena); double read_number(FILE* fp); -struct json_value** read_array(FILE* fp); +obj_t* read_object(FILE* fp, arena_t* arena); +struct json_value** read_array(FILE* fp, arena_t* arena); void discard_whitespace(FILE* fp); +void read_null(FILE* fp); + +/* +ghetto way to hunt down bugs +#ifdef __GNUC__ + #define _fgetc(fp) \ + ({int retval; retval = fgetc(fp); fputc(retval, stderr); retval;}) +#else + #define _fgetc(fp) \ + fgetc(fp) +#endif +*/ +#define _fgetc(fp) fgetc(fp) /* Consumes the next whitespace character in a file stream @@ -47,7 +60,7 @@ void discard_whitespace(FILE* fp) { int c; - while (isspace(c = fgetc(fp))) { + while (isspace(c = _fgetc(fp))) { if (c == EOF) err_ctx(EARLY_EOF, fp, "(%s) unexpected EOF", __func__); } @@ -64,10 +77,10 @@ void discard_whitespace(FILE* fp) These structures can be nested. */ -struct json_value parse_json_value(FILE* fp) +struct json_value parse_json_value(FILE* fp, arena_t* arena) { discard_whitespace(fp); - int c = fgetc(fp); + int c = _fgetc(fp); struct json_value result = { 0 }; @@ -77,17 +90,17 @@ struct json_value parse_json_value(FILE* fp) case '{': result.type = object; - result.object = read_object(fp); + result.object = read_object(fp, arena); break; case '"': result.type = string; - result.string = read_string(fp); + result.string = read_string(fp, arena); break; case '[': result.type = array; - result.array = read_array(fp); + result.array = read_array(fp, arena); break; case 't': @@ -127,21 +140,26 @@ struct json_value parse_json_value(FILE* fp) Consumes a JSON string from a file stream and returns a corresponding char* */ -char* read_string(FILE* fp) +char* read_string(FILE* fp, arena_t* arena) { int c; size_t i = 0, result_size = 16; - char* result = malloc_or_die(result_size); + // char* result = malloc_or_die(result_size); + char* result = arena_alloc(arena, result_size); bool escaped = false; while (true) { if (i + 1 >= result_size) { result_size *= 2; - result = realloc_or_die(result, result_size); + result = arena_realloc_tail(arena, result_size); + if (result == NULL) + err_ctx(EXIT_FAILURE, fp, "could not allocate memory for string" + "%s", + __func__); } - c = fgetc(fp); + c = _fgetc(fp); if (escaped) { escaped = false; @@ -155,10 +173,9 @@ char* read_string(FILE* fp) case '"': result[i++] = '\0'; - return realloc_or_die(result, i); + return arena_realloc_tail(arena, i); case EOF: - free(result); err_ctx(EARLY_EOF, fp, "(%s) unexpected EOF", __func__); default: @@ -177,26 +194,25 @@ char* read_string(FILE* fp) Consumes a JSON object from a file stream and returns a corresponding obj_t */ -obj_t* read_object(FILE* fp) +obj_t* read_object(FILE* fp, arena_t* arena) { - obj_t* result = calloc_or_die(1, sizeof(obj_t)); + // obj_t* result = calloc_or_die(1, sizeof(obj_t)); + obj_t* result = arena_calloc(arena, 1, sizeof *result); char* key; while (true) { /* read key */ discard_whitespace(fp); - switch (fgetc(fp)) { + switch (_fgetc(fp)) { case EOF: - free(result); err_ctx(EARLY_EOF, fp, "(%s) unexpected EOF", __func__); default: - free(result); err_ctx(UNEXPECTED_CHAR, fp, "(%s) expected \"", __func__); case '"': - key = read_string(fp); + key = read_string(fp, arena); break; case '}': @@ -206,39 +222,35 @@ obj_t* read_object(FILE* fp) /* check for ':' separator */ discard_whitespace(fp); - switch (fgetc(fp)) { + switch (_fgetc(fp)) { case ':': break; case EOF: - free(result); err_ctx(EARLY_EOF, fp, "(%s) unexpected EOF", __func__); default: - free(result); err_ctx(UNEXPECTED_CHAR, fp, "(%s) expected ':'", __func__); } /* read value */ discard_whitespace(fp); - struct json_value* val = calloc_or_die(1, sizeof(struct json_value)); - *val = parse_json_value(fp); + // struct json_value* val = calloc_or_die(1, sizeof(struct json_value)); + struct json_value* val = arena_calloc(arena, 1, sizeof *val); + *val = parse_json_value(fp, arena); /* insert key-value pair to obj */ - if (!obj_insert(*result, key, val)) { - free(result); - free(val); - errx(EXIT_FAILURE, "failed to insert pair (%s, %p)", key, (void*)val); + if (!obj_insert(*result, key, val, arena)) { + fprintf(stderr, "failed to insert pair (%s, %p)\n", key, (void*)val); + exit(EXIT_FAILURE); } /* read separator or end of object */ discard_whitespace(fp); - switch (fgetc(fp)) { + switch (_fgetc(fp)) { case EOF: - free(val); - free(result); err_ctx(EARLY_EOF, fp, "(%s) unexpected EOF", __func__); case ',': @@ -248,8 +260,6 @@ obj_t* read_object(FILE* fp) return result; default: - free(val); - free(result); err_ctx(UNEXPECTED_CHAR, fp, "(%s) expected ',' or '}'", __func__); } } @@ -264,42 +274,43 @@ obj_t* read_object(FILE* fp) Consumes a JSON array from a file stream and returns a NULL separated array of json_value pointers */ -struct json_value** read_array(FILE* fp) +struct json_value** read_array(FILE* fp, arena_t* arena) { int c; size_t i = 0, output_size = 16; - struct json_value** output = malloc_or_die(output_size * sizeof(struct json_value*)); + struct json_value** output = malloc_or_die(output_size * sizeof *output); + // struct json_value** output = arena_alloc(arena, output_size * sizeof *output); while (true) { - if (i + 1 >= output_size) { + if (i >= output_size) { output_size *= 2; - output = realloc_or_die(output, output_size * sizeof(struct json_value*)); + // output = arena_realloc_tail(arena, output_size * sizeof *output); + output = realloc_or_die(output, output_size * sizeof *output); } discard_whitespace(fp); - c = fgetc(fp); + c = _fgetc(fp); switch (c) { case EOF: - free(output); - - for (size_t j = 0; j < i; j++) - free(output[j]); - err_ctx(EARLY_EOF, fp, "(%s) unexpected EOF", __func__); break; case ']': output[i++] = NULL; - return realloc_or_die(output, i * sizeof(struct json_value*)); + struct json_value** x = arena_alloc(arena, i * sizeof *output); + memcpy(x, output, i * sizeof *output); + free(output); + return x; case ',': continue; default: ungetc(c, fp); - output[i] = malloc_or_die(sizeof(struct json_value)); - *output[i] = parse_json_value(fp); + // output[i] = malloc_or_die(sizeof(struct json_value)); + output[i] = arena_alloc(arena, sizeof *(output[i])); + *output[i] = parse_json_value(fp, arena); i++; break; } @@ -365,15 +376,12 @@ bool read_boolean(FILE* fp) */ double read_number(FILE* fp) { - static const unsigned long neg_nan = 0xFFFFFFFFFFFFFFFFULL; - double n = *(double*)&neg_nan; + double n = NAN; int n_read = fscanf(fp, "%lf", &n); - /* try to read as long instead */ - if (n_read == 0) { + if (n_read == 0) err_ctx(UNEXPECTED_CHAR, fp, "(%s) number expected, found %lf", __func__, n); - } if (n_read == EOF) err_ctx(EARLY_EOF, fp, "(%s) unexpected EOF", __func__); diff --git a/src/util.c b/src/util.c index 1b6f402..c773d1e 100644 --- a/src/util.c +++ b/src/util.c @@ -42,13 +42,12 @@ void* realloc_or_die(void* ptr, size_t size) { void* tmp = realloc(ptr, size); - if (ptr == NULL) { + if (tmp == NULL) { if (errno != 0) { - free(ptr); err(errno, "realloc_or_die failed"); } - fprintf(stderr, "\nrealloc_or_die returned NULL but errno is 0, ptr: %p size: %zu\n", ptr, size); + fprintf(stderr, "\nrealloc_or_die returned NULL but errno is 0, ptr: %p size: %zu\n", tmp, size); exit(EXIT_FAILURE); } @@ -67,9 +66,11 @@ void* calloc_or_die(size_t nmemb, size_t size) // from the glibc man pages // https://www.gnu.org/software/libc/manual/html_node/Backtraces.html -void print_trace() +void print_trace(void) { -#ifdef BACKTRACE +#ifndef BACKTRACE + return; +#else void* array[500]; char** strings; int size, i; @@ -79,8 +80,9 @@ void print_trace() if (strings != NULL) { printf("\n\n=== Obtained %d stack frames. === \n", size); - for (i = 0; i < size; i++) + for (i = 0; i < size; i++) { printf("%s\n", strings[i]); + } } free(strings);