First version correctly parsing sample.json
This commit is contained in:
4
Makefile
4
Makefile
@@ -1,8 +1,8 @@
|
|||||||
|
|
||||||
CC=gcc
|
CC=gcc
|
||||||
CFLAGS=-ggdb -O0
|
CFLAGS=-ggdb -Og
|
||||||
CFLAGS+=-Wextra -Wall -Wpedantic
|
CFLAGS+=-Wextra -Wall -Wpedantic
|
||||||
CFLAGS+=-fsanitize=address -fsanitize=undefined
|
#CFLAGS+=-fsanitize=address -fsanitize=undefined
|
||||||
CFLAGS+=-fanalyzer
|
CFLAGS+=-fanalyzer
|
||||||
CFLAGS+=-rdynamic
|
CFLAGS+=-rdynamic
|
||||||
CFLAGS+=-Iinclude
|
CFLAGS+=-Iinclude
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "json_obj.h"
|
#include "json_obj.h"
|
||||||
|
#include "util.h"
|
||||||
|
|
||||||
/* djb2 string hash
|
/* djb2 string hash
|
||||||
credits: Daniel J. Bernstein */
|
credits: Daniel J. Bernstein */
|
||||||
@@ -75,7 +76,7 @@ bool obj_insert(obj_t m, char* const key, struct json_value* value)
|
|||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* populate new entry */
|
/* populate new entry */
|
||||||
cur = malloc(sizeof(struct obj_entry));
|
cur = malloc_or_die(sizeof(struct obj_entry));
|
||||||
cur->key = strdup(key);
|
cur->key = strdup(key);
|
||||||
cur->val = value;
|
cur->val = value;
|
||||||
cur->next = m[i];
|
cur->next = m[i];
|
||||||
|
|||||||
207
src/parse.c
207
src/parse.c
@@ -19,13 +19,24 @@ obj_t* read_object(FILE* fp);
|
|||||||
void discard_whitespace(FILE* fp);
|
void discard_whitespace(FILE* fp);
|
||||||
bool read_boolean(FILE* fp);
|
bool read_boolean(FILE* fp);
|
||||||
void read_null(FILE* fp);
|
void read_null(FILE* fp);
|
||||||
int64_t read_number(FILE* fp);
|
double read_number(FILE* fp);
|
||||||
struct json_value** read_array(FILE* fp);
|
struct json_value** read_array(FILE* fp);
|
||||||
|
|
||||||
void print_object(obj_t obj, int cur_indent, int indent_amount);
|
void print_object(obj_t obj, int cur_indent, int indent_amount);
|
||||||
void print_json_value(struct json_value val, int cur_indent, int indent_amount);
|
void print_json_value(struct json_value val, int cur_indent, int indent_amount);
|
||||||
void print_array(struct json_value** arr, int cur_indent, int indent_amount);
|
void print_array(struct json_value** arr, int cur_indent, int indent_amount);
|
||||||
|
|
||||||
|
// define as a macro to make debugging smoother
|
||||||
|
#define discard_whitespace(fp) \
|
||||||
|
do { \
|
||||||
|
int c; \
|
||||||
|
while (isspace(c = fgetc(fp))) { \
|
||||||
|
if (c == EOF) \
|
||||||
|
err(EARLY_EOF, "(%s) unexpected EOF", __func__); \
|
||||||
|
} \
|
||||||
|
ungetc(c, fp); \
|
||||||
|
} while (0);
|
||||||
|
|
||||||
char* read_string(FILE* fp)
|
char* read_string(FILE* fp)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
@@ -56,70 +67,90 @@ char* read_string(FILE* fp)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void discard_whitespace(FILE* fp)
|
/*
|
||||||
{
|
A JSON object is an unordered set of name/value pairs.
|
||||||
int c;
|
|
||||||
|
|
||||||
while (isspace(c = fgetc(fp)))
|
An object begins with "{" and ends with "}".
|
||||||
if (c == EOF)
|
Each name is followed by ":" and the name/value pairs are separated by ",".
|
||||||
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
|
||||||
|
|
||||||
ungetc(c, fp);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
Consumes a JSON object from a file stream and returns a corresponding obj_t
|
||||||
|
*/
|
||||||
obj_t* read_object(FILE* fp)
|
obj_t* read_object(FILE* fp)
|
||||||
{
|
{
|
||||||
obj_t* result = calloc_or_die(1, sizeof(obj_t));
|
obj_t* result = calloc_or_die(1, sizeof(obj_t));
|
||||||
char* key;
|
char* key;
|
||||||
struct json_value* val = calloc_or_die(1, sizeof(struct json_value));
|
|
||||||
int c;
|
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
/* read key */
|
||||||
discard_whitespace(fp);
|
discard_whitespace(fp);
|
||||||
|
|
||||||
if ((c = fgetc(fp)) == EOF)
|
switch (fgetc(fp)) {
|
||||||
|
case EOF:
|
||||||
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
||||||
|
|
||||||
if (c == '"')
|
default:
|
||||||
|
errx(UNEXPECTED_CHAR, "(%s) expected \" at index %zu", __func__, ftell(fp));
|
||||||
|
|
||||||
|
case '"':
|
||||||
key = read_string(fp);
|
key = read_string(fp);
|
||||||
else if (c == '}')
|
break;
|
||||||
return result;
|
|
||||||
|
|
||||||
|
case '}':
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check for ':' separator */
|
||||||
discard_whitespace(fp);
|
discard_whitespace(fp);
|
||||||
|
|
||||||
if ((c = fgetc(fp)) == EOF)
|
switch (fgetc(fp)) {
|
||||||
|
case ':':
|
||||||
|
break;
|
||||||
|
|
||||||
|
case EOF:
|
||||||
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
||||||
|
|
||||||
if (c != ':')
|
default:
|
||||||
errx(UNEXPECTED_CHAR, "(%s) expected separator (':') at index %zu",
|
errx(UNEXPECTED_CHAR, "(%s) expected ':' at index %zu", __func__, ftell(fp));
|
||||||
__func__, ftell(fp));
|
}
|
||||||
|
|
||||||
|
/* read value */
|
||||||
discard_whitespace(fp);
|
discard_whitespace(fp);
|
||||||
|
|
||||||
|
struct json_value* val = calloc_or_die(1, sizeof(struct json_value));
|
||||||
*val = parse_json_value(fp);
|
*val = parse_json_value(fp);
|
||||||
|
|
||||||
bool ok = obj_insert(*result, key, val);
|
/* insert key-value pair to obj */
|
||||||
|
if (!obj_insert(*result, key, val))
|
||||||
if (!ok)
|
|
||||||
err(EXIT_FAILURE, "failed to insert pair (%s, %p)", key, (void*)val);
|
err(EXIT_FAILURE, "failed to insert pair (%s, %p)", key, (void*)val);
|
||||||
|
|
||||||
|
/* read separator or end of object */
|
||||||
discard_whitespace(fp);
|
discard_whitespace(fp);
|
||||||
|
|
||||||
if ((c = fgetc(fp)) == EOF)
|
switch (fgetc(fp)) {
|
||||||
|
case EOF:
|
||||||
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
||||||
|
|
||||||
if (c == ',')
|
case ',':
|
||||||
continue;
|
continue;
|
||||||
else if (c == '}')
|
|
||||||
|
case '}':
|
||||||
return result;
|
return result;
|
||||||
else
|
|
||||||
errx(UNEXPECTED_CHAR, "(%s) expected ',' or '}' at index %zu", __func__,
|
default:
|
||||||
ftell(fp));
|
errx(UNEXPECTED_CHAR, "(%s) expected ',' or '}' at index %zu", __func__, ftell(fp));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
A JSON array is an ordered collection of values.
|
||||||
|
It begins with "[" and ends with "]". Values are separated by ","
|
||||||
|
|
||||||
|
Consumes a JSON array from a file stream and returns
|
||||||
|
a NULL separated array of json_value pointers
|
||||||
|
*/
|
||||||
struct json_value** read_array(FILE* fp)
|
struct json_value** read_array(FILE* fp)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
@@ -127,26 +158,32 @@ struct json_value** read_array(FILE* fp)
|
|||||||
struct json_value** output = malloc_or_die(output_size);
|
struct json_value** output = malloc_or_die(output_size);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
c = fgetc(fp);
|
|
||||||
|
|
||||||
if (c == EOF)
|
|
||||||
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
|
||||||
|
|
||||||
if (c == ']')
|
|
||||||
break;
|
|
||||||
|
|
||||||
if (c == ',')
|
|
||||||
continue;
|
|
||||||
|
|
||||||
ungetc(c, fp);
|
|
||||||
|
|
||||||
if (i > output_size) {
|
if (i > output_size) {
|
||||||
output_size *= 2;
|
output_size *= 2;
|
||||||
output = realloc_or_die(output, output_size);
|
output = realloc_or_die(output, output_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
c = fgetc(fp);
|
||||||
|
|
||||||
|
switch (c) {
|
||||||
|
case EOF:
|
||||||
|
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
||||||
|
|
||||||
|
case ']':
|
||||||
|
output[i] = NULL;
|
||||||
|
return realloc_or_die(output, i * sizeof(struct json_value*));
|
||||||
|
|
||||||
|
case ',':
|
||||||
|
continue;
|
||||||
|
|
||||||
|
default:
|
||||||
|
ungetc(c, fp);
|
||||||
output[i] = malloc_or_die(sizeof(struct json_value));
|
output[i] = malloc_or_die(sizeof(struct json_value));
|
||||||
*output[i] = parse_json_value(fp);
|
*output[i] = parse_json_value(fp);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -155,6 +192,12 @@ struct json_value** read_array(FILE* fp)
|
|||||||
return realloc_or_die(output, i * sizeof(void*));
|
return realloc_or_die(output, i * sizeof(void*));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Consumes and discards a literal "null" from a file stream
|
||||||
|
|
||||||
|
If the next characters do not match "null"
|
||||||
|
it terminates the program with a non-zero code
|
||||||
|
*/
|
||||||
void read_null(FILE* fp)
|
void read_null(FILE* fp)
|
||||||
{
|
{
|
||||||
static const char ok[] = { 'n', 'u', 'l', 'l' };
|
static const char ok[] = { 'n', 'u', 'l', 'l' };
|
||||||
@@ -170,6 +213,13 @@ void read_null(FILE* fp)
|
|||||||
ftell(fp));
|
ftell(fp));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
JSON boolean values are either "true" or "false".
|
||||||
|
|
||||||
|
Consumes a JSON boolean from a file stream and returns true or false.
|
||||||
|
|
||||||
|
Terminates with a non-zero code if the next characters do not match these.
|
||||||
|
*/
|
||||||
bool read_boolean(FILE* fp)
|
bool read_boolean(FILE* fp)
|
||||||
{
|
{
|
||||||
static const char t[] = { 't', 'r', 'u', 'e' };
|
static const char t[] = { 't', 'r', 'u', 'e' };
|
||||||
@@ -193,67 +243,72 @@ bool read_boolean(FILE* fp)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: fix int overflow
|
// TODO: fix int overflow
|
||||||
int64_t read_number(FILE* fp)
|
/*
|
||||||
|
A JSON number is very much like a C or Java number,
|
||||||
|
except that the octal and hexadecimal formats are not used.
|
||||||
|
|
||||||
|
Consumes a JSON number from a file stream and returns the number
|
||||||
|
*/
|
||||||
|
double read_number(FILE* fp)
|
||||||
{
|
{
|
||||||
int c;
|
double n;
|
||||||
|
|
||||||
int64_t sum = 0;
|
fscanf(fp, "%lf", &n);
|
||||||
|
|
||||||
do {
|
return n;
|
||||||
c = fgetc(fp);
|
|
||||||
|
|
||||||
if (c == EOF)
|
|
||||||
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
|
||||||
|
|
||||||
sum *= 10;
|
|
||||||
sum += c - '0';
|
|
||||||
} while (isdigit(c));
|
|
||||||
|
|
||||||
ungetc(c, fp);
|
|
||||||
|
|
||||||
return sum;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
A JSON value can be a JSON string in double quotes, or a JSON number,
|
||||||
|
or true or false or null, or a JOSN object or a JSON array.
|
||||||
|
These structures can be nested.
|
||||||
|
*/
|
||||||
struct json_value parse_json_value(FILE* fp)
|
struct json_value parse_json_value(FILE* fp)
|
||||||
{
|
{
|
||||||
discard_whitespace(fp);
|
discard_whitespace(fp);
|
||||||
int c = fgetc(fp);
|
int c = fgetc(fp);
|
||||||
|
|
||||||
struct json_value result = { 0 };
|
struct json_value result = { 0 };
|
||||||
|
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case EOF:
|
case EOF:
|
||||||
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
err(EARLY_EOF, "(%s) unexpected EOF", __func__);
|
||||||
|
|
||||||
case '{':
|
case '{':
|
||||||
result.type = object;
|
result.type = object;
|
||||||
result.object = read_object(fp);
|
result.object = read_object(fp);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '"':
|
case '"':
|
||||||
result.type = string;
|
result.type = string;
|
||||||
result.string = read_string(fp);
|
result.string = read_string(fp);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '[':
|
case '[':
|
||||||
result.type = array;
|
result.type = array;
|
||||||
result.array = read_array(fp);
|
result.array = read_array(fp);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 't':
|
case 't':
|
||||||
case 'f':
|
case 'f':
|
||||||
ungetc(c, fp);
|
ungetc(c, fp);
|
||||||
result.type = boolean;
|
result.type = boolean;
|
||||||
result.boolean = read_boolean(fp);
|
result.boolean = read_boolean(fp);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 'n':
|
case 'n':
|
||||||
ungetc(c, fp);
|
ungetc(c, fp);
|
||||||
read_null(fp);
|
read_null(fp);
|
||||||
result.type = null;
|
result.type = null;
|
||||||
result.number = 0L;
|
result.number = 0L;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (isdigit(c)) {
|
if (isdigit(c)) {
|
||||||
result.type = number;
|
result.type = number;
|
||||||
result.number = read_number(fp);
|
result.number = read_number(fp);
|
||||||
} else {
|
} else {
|
||||||
errx(UNEXPECTED_CHAR, "(%s) unexpected symbol %c at index %zu", __func__,
|
errx(UNEXPECTED_CHAR, "(%s) unexpected symbol %c at index %zu", __func__, c, ftell(fp) - 1);
|
||||||
c, ftell(fp) - 1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -262,47 +317,58 @@ struct json_value parse_json_value(FILE* fp)
|
|||||||
|
|
||||||
void add_indent(int n)
|
void add_indent(int n)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < n; i++) {
|
for (int i = 0; i < n; i++)
|
||||||
putchar(' ');
|
putchar(' ');
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_object(obj_t obj, int cur_indent, int indent_amount)
|
void print_object(obj_t obj, int cur_indent, int indent_amount)
|
||||||
{
|
{
|
||||||
printf("{");
|
putchar('{');
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
|
||||||
for (size_t i = 0; i < OBJ_SIZE; i++) {
|
for (size_t i = 0; i < OBJ_SIZE; i++) {
|
||||||
struct obj_entry* e = obj[i];
|
struct obj_entry* e = obj[i];
|
||||||
|
|
||||||
if (e == NULL)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
while (e != NULL) {
|
while (e != NULL) {
|
||||||
|
if (!first)
|
||||||
|
putchar(',');
|
||||||
|
|
||||||
|
first = false;
|
||||||
|
|
||||||
putchar('\n');
|
putchar('\n');
|
||||||
add_indent(cur_indent);
|
add_indent(cur_indent);
|
||||||
printf("\"%s\": ", e->key);
|
printf("\"%s\": ", e->key);
|
||||||
print_json_value(*(e->val), cur_indent + indent_amount, indent_amount);
|
print_json_value(*(e->val), cur_indent + indent_amount, indent_amount);
|
||||||
putchar(',');
|
|
||||||
|
|
||||||
e = e->next;
|
e = e->next;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
printf("\b"); // undo last comma
|
|
||||||
putchar('\n');
|
|
||||||
|
|
||||||
|
putchar('\n');
|
||||||
add_indent(cur_indent - indent_amount * 2);
|
add_indent(cur_indent - indent_amount * 2);
|
||||||
printf("}");
|
putchar('}');
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_array(struct json_value** arr, int cur_indent, int indent_amount)
|
void print_array(struct json_value** arr, int cur_indent, int indent_amount)
|
||||||
{
|
{
|
||||||
putchar('[');
|
putchar('[');
|
||||||
|
|
||||||
for (size_t i = 0; arr[i] != NULL; i++) {
|
size_t i;
|
||||||
|
|
||||||
|
for (i = 0; arr[i + 1] != NULL; i++) {
|
||||||
|
putchar('\n');
|
||||||
|
add_indent(cur_indent);
|
||||||
print_json_value(*arr[i], cur_indent + indent_amount, indent_amount);
|
print_json_value(*arr[i], cur_indent + indent_amount, indent_amount);
|
||||||
putchar(',');
|
putchar(',');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
putchar('\n');
|
||||||
|
add_indent(cur_indent);
|
||||||
|
print_json_value(*arr[i], cur_indent + indent_amount, indent_amount);
|
||||||
|
|
||||||
|
putchar('\n');
|
||||||
|
add_indent(cur_indent - indent_amount * 2);
|
||||||
putchar(']');
|
putchar(']');
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -336,5 +402,4 @@ void print_json_value(struct json_value val, int cur_indent,
|
|||||||
void print_json(struct json_value val, int indent)
|
void print_json(struct json_value val, int indent)
|
||||||
{
|
{
|
||||||
print_json_value(val, 0, indent);
|
print_json_value(val, 0, indent);
|
||||||
putchar('\n');
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user