First version correctly parsing sample.json

This commit is contained in:
olemorud
2023-04-26 11:35:21 +02:00
parent 4ce1c34838
commit 89dc8149c0
3 changed files with 142 additions and 76 deletions

View File

@@ -1,8 +1,8 @@
CC=gcc CC=gcc
CFLAGS=-ggdb -O0 CFLAGS=-ggdb -Og
CFLAGS+=-Wextra -Wall -Wpedantic CFLAGS+=-Wextra -Wall -Wpedantic
CFLAGS+=-fsanitize=address -fsanitize=undefined #CFLAGS+=-fsanitize=address -fsanitize=undefined
CFLAGS+=-fanalyzer CFLAGS+=-fanalyzer
CFLAGS+=-rdynamic CFLAGS+=-rdynamic
CFLAGS+=-Iinclude CFLAGS+=-Iinclude

View File

@@ -6,6 +6,7 @@
#include <string.h> #include <string.h>
#include "json_obj.h" #include "json_obj.h"
#include "util.h"
/* djb2 string hash /* djb2 string hash
credits: Daniel J. Bernstein */ credits: Daniel J. Bernstein */
@@ -75,7 +76,7 @@ bool obj_insert(obj_t m, char* const key, struct json_value* value)
return false; return false;
/* populate new entry */ /* populate new entry */
cur = malloc(sizeof(struct obj_entry)); cur = malloc_or_die(sizeof(struct obj_entry));
cur->key = strdup(key); cur->key = strdup(key);
cur->val = value; cur->val = value;
cur->next = m[i]; cur->next = m[i];

View File

@@ -19,13 +19,24 @@ obj_t* read_object(FILE* fp);
void discard_whitespace(FILE* fp); void discard_whitespace(FILE* fp);
bool read_boolean(FILE* fp); bool read_boolean(FILE* fp);
void read_null(FILE* fp); void read_null(FILE* fp);
int64_t read_number(FILE* fp); double read_number(FILE* fp);
struct json_value** read_array(FILE* fp); struct json_value** read_array(FILE* fp);
void print_object(obj_t obj, int cur_indent, int indent_amount); void print_object(obj_t obj, int cur_indent, int indent_amount);
void print_json_value(struct json_value val, int cur_indent, int indent_amount); void print_json_value(struct json_value val, int cur_indent, int indent_amount);
void print_array(struct json_value** arr, int cur_indent, int indent_amount); void print_array(struct json_value** arr, int cur_indent, int indent_amount);
// define as a macro to make debugging smoother
#define discard_whitespace(fp) \
do { \
int c; \
while (isspace(c = fgetc(fp))) { \
if (c == EOF) \
err(EARLY_EOF, "(%s) unexpected EOF", __func__); \
} \
ungetc(c, fp); \
} while (0);
char* read_string(FILE* fp) char* read_string(FILE* fp)
{ {
int c; int c;
@@ -56,70 +67,90 @@ char* read_string(FILE* fp)
} }
} }
void discard_whitespace(FILE* fp) /*
{ A JSON object is an unordered set of name/value pairs.
int c;
while (isspace(c = fgetc(fp))) An object begins with "{" and ends with "}".
if (c == EOF) Each name is followed by ":" and the name/value pairs are separated by ",".
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
ungetc(c, fp);
}
Consumes a JSON object from a file stream and returns a corresponding obj_t
*/
obj_t* read_object(FILE* fp) obj_t* read_object(FILE* fp)
{ {
obj_t* result = calloc_or_die(1, sizeof(obj_t)); obj_t* result = calloc_or_die(1, sizeof(obj_t));
char* key; char* key;
struct json_value* val = calloc_or_die(1, sizeof(struct json_value));
int c;
while (true) { while (true) {
/* read key */
discard_whitespace(fp); discard_whitespace(fp);
if ((c = fgetc(fp)) == EOF) switch (fgetc(fp)) {
case EOF:
err(EARLY_EOF, "(%s) unexpected EOF", __func__); err(EARLY_EOF, "(%s) unexpected EOF", __func__);
if (c == '"') default:
errx(UNEXPECTED_CHAR, "(%s) expected \" at index %zu", __func__, ftell(fp));
case '"':
key = read_string(fp); key = read_string(fp);
else if (c == '}') break;
return result;
case '}':
return result;
}
/* check for ':' separator */
discard_whitespace(fp); discard_whitespace(fp);
if ((c = fgetc(fp)) == EOF) switch (fgetc(fp)) {
case ':':
break;
case EOF:
err(EARLY_EOF, "(%s) unexpected EOF", __func__); err(EARLY_EOF, "(%s) unexpected EOF", __func__);
if (c != ':') default:
errx(UNEXPECTED_CHAR, "(%s) expected separator (':') at index %zu", errx(UNEXPECTED_CHAR, "(%s) expected ':' at index %zu", __func__, ftell(fp));
__func__, ftell(fp)); }
/* read value */
discard_whitespace(fp); discard_whitespace(fp);
struct json_value* val = calloc_or_die(1, sizeof(struct json_value));
*val = parse_json_value(fp); *val = parse_json_value(fp);
bool ok = obj_insert(*result, key, val); /* insert key-value pair to obj */
if (!obj_insert(*result, key, val))
if (!ok)
err(EXIT_FAILURE, "failed to insert pair (%s, %p)", key, (void*)val); err(EXIT_FAILURE, "failed to insert pair (%s, %p)", key, (void*)val);
/* read separator or end of object */
discard_whitespace(fp); discard_whitespace(fp);
if ((c = fgetc(fp)) == EOF) switch (fgetc(fp)) {
case EOF:
err(EARLY_EOF, "(%s) unexpected EOF", __func__); err(EARLY_EOF, "(%s) unexpected EOF", __func__);
if (c == ',') case ',':
continue; continue;
else if (c == '}')
case '}':
return result; return result;
else
errx(UNEXPECTED_CHAR, "(%s) expected ',' or '}' at index %zu", __func__, default:
ftell(fp)); errx(UNEXPECTED_CHAR, "(%s) expected ',' or '}' at index %zu", __func__, ftell(fp));
}
} }
return NULL; return NULL;
} }
/*
A JSON array is an ordered collection of values.
It begins with "[" and ends with "]". Values are separated by ","
Consumes a JSON array from a file stream and returns
a NULL separated array of json_value pointers
*/
struct json_value** read_array(FILE* fp) struct json_value** read_array(FILE* fp)
{ {
int c; int c;
@@ -127,26 +158,32 @@ struct json_value** read_array(FILE* fp)
struct json_value** output = malloc_or_die(output_size); struct json_value** output = malloc_or_die(output_size);
while (true) { while (true) {
c = fgetc(fp);
if (c == EOF)
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
if (c == ']')
break;
if (c == ',')
continue;
ungetc(c, fp);
if (i > output_size) { if (i > output_size) {
output_size *= 2; output_size *= 2;
output = realloc_or_die(output, output_size); output = realloc_or_die(output, output_size);
} }
c = fgetc(fp);
switch (c) {
case EOF:
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
case ']':
output[i] = NULL;
return realloc_or_die(output, i * sizeof(struct json_value*));
case ',':
continue;
default:
ungetc(c, fp);
output[i] = malloc_or_die(sizeof(struct json_value)); output[i] = malloc_or_die(sizeof(struct json_value));
*output[i] = parse_json_value(fp); *output[i] = parse_json_value(fp);
break;
}
i++; i++;
} }
@@ -155,6 +192,12 @@ struct json_value** read_array(FILE* fp)
return realloc_or_die(output, i * sizeof(void*)); return realloc_or_die(output, i * sizeof(void*));
} }
/*
Consumes and discards a literal "null" from a file stream
If the next characters do not match "null"
it terminates the program with a non-zero code
*/
void read_null(FILE* fp) void read_null(FILE* fp)
{ {
static const char ok[] = { 'n', 'u', 'l', 'l' }; static const char ok[] = { 'n', 'u', 'l', 'l' };
@@ -170,6 +213,13 @@ void read_null(FILE* fp)
ftell(fp)); ftell(fp));
} }
/*
JSON boolean values are either "true" or "false".
Consumes a JSON boolean from a file stream and returns true or false.
Terminates with a non-zero code if the next characters do not match these.
*/
bool read_boolean(FILE* fp) bool read_boolean(FILE* fp)
{ {
static const char t[] = { 't', 'r', 'u', 'e' }; static const char t[] = { 't', 'r', 'u', 'e' };
@@ -193,67 +243,72 @@ bool read_boolean(FILE* fp)
} }
// TODO: fix int overflow // TODO: fix int overflow
int64_t read_number(FILE* fp) /*
A JSON number is very much like a C or Java number,
except that the octal and hexadecimal formats are not used.
Consumes a JSON number from a file stream and returns the number
*/
double read_number(FILE* fp)
{ {
int c; double n;
int64_t sum = 0; fscanf(fp, "%lf", &n);
do { return n;
c = fgetc(fp);
if (c == EOF)
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
sum *= 10;
sum += c - '0';
} while (isdigit(c));
ungetc(c, fp);
return sum;
} }
/*
A JSON value can be a JSON string in double quotes, or a JSON number,
or true or false or null, or a JOSN object or a JSON array.
These structures can be nested.
*/
struct json_value parse_json_value(FILE* fp) struct json_value parse_json_value(FILE* fp)
{ {
discard_whitespace(fp); discard_whitespace(fp);
int c = fgetc(fp); int c = fgetc(fp);
struct json_value result = { 0 }; struct json_value result = { 0 };
switch (c) { switch (c) {
case EOF: case EOF:
err(EARLY_EOF, "(%s) unexpected EOF", __func__); err(EARLY_EOF, "(%s) unexpected EOF", __func__);
case '{': case '{':
result.type = object; result.type = object;
result.object = read_object(fp); result.object = read_object(fp);
break; break;
case '"': case '"':
result.type = string; result.type = string;
result.string = read_string(fp); result.string = read_string(fp);
break; break;
case '[': case '[':
result.type = array; result.type = array;
result.array = read_array(fp); result.array = read_array(fp);
break; break;
case 't': case 't':
case 'f': case 'f':
ungetc(c, fp); ungetc(c, fp);
result.type = boolean; result.type = boolean;
result.boolean = read_boolean(fp); result.boolean = read_boolean(fp);
break; break;
case 'n': case 'n':
ungetc(c, fp); ungetc(c, fp);
read_null(fp); read_null(fp);
result.type = null; result.type = null;
result.number = 0L; result.number = 0L;
break; break;
default: default:
if (isdigit(c)) { if (isdigit(c)) {
result.type = number; result.type = number;
result.number = read_number(fp); result.number = read_number(fp);
} else { } else {
errx(UNEXPECTED_CHAR, "(%s) unexpected symbol %c at index %zu", __func__, errx(UNEXPECTED_CHAR, "(%s) unexpected symbol %c at index %zu", __func__, c, ftell(fp) - 1);
c, ftell(fp) - 1);
} }
} }
@@ -262,47 +317,58 @@ struct json_value parse_json_value(FILE* fp)
void add_indent(int n) void add_indent(int n)
{ {
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++)
putchar(' '); putchar(' ');
}
} }
void print_object(obj_t obj, int cur_indent, int indent_amount) void print_object(obj_t obj, int cur_indent, int indent_amount)
{ {
printf("{"); putchar('{');
bool first = true;
for (size_t i = 0; i < OBJ_SIZE; i++) { for (size_t i = 0; i < OBJ_SIZE; i++) {
struct obj_entry* e = obj[i]; struct obj_entry* e = obj[i];
if (e == NULL)
continue;
while (e != NULL) { while (e != NULL) {
if (!first)
putchar(',');
first = false;
putchar('\n'); putchar('\n');
add_indent(cur_indent); add_indent(cur_indent);
printf("\"%s\": ", e->key); printf("\"%s\": ", e->key);
print_json_value(*(e->val), cur_indent + indent_amount, indent_amount); print_json_value(*(e->val), cur_indent + indent_amount, indent_amount);
putchar(',');
e = e->next; e = e->next;
} }
} }
printf("\b"); // undo last comma
putchar('\n');
putchar('\n');
add_indent(cur_indent - indent_amount * 2); add_indent(cur_indent - indent_amount * 2);
printf("}"); putchar('}');
} }
void print_array(struct json_value** arr, int cur_indent, int indent_amount) void print_array(struct json_value** arr, int cur_indent, int indent_amount)
{ {
putchar('['); putchar('[');
for (size_t i = 0; arr[i] != NULL; i++) { size_t i;
for (i = 0; arr[i + 1] != NULL; i++) {
putchar('\n');
add_indent(cur_indent);
print_json_value(*arr[i], cur_indent + indent_amount, indent_amount); print_json_value(*arr[i], cur_indent + indent_amount, indent_amount);
putchar(','); putchar(',');
} }
putchar('\n');
add_indent(cur_indent);
print_json_value(*arr[i], cur_indent + indent_amount, indent_amount);
putchar('\n');
add_indent(cur_indent - indent_amount * 2);
putchar(']'); putchar(']');
} }
@@ -336,5 +402,4 @@ void print_json_value(struct json_value val, int cur_indent,
void print_json(struct json_value val, int indent) void print_json(struct json_value val, int indent)
{ {
print_json_value(val, 0, indent); print_json_value(val, 0, indent);
putchar('\n');
} }