From d39e71dd70aa9383156f7736374c8b565f0fa493 Mon Sep 17 00:00:00 2001 From: Lassi Kortela Date: Wed, 29 Sep 2021 14:15:07 +0300 Subject: [PATCH 1/8] Add -Wextra -pedantic -ansi --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 848cdc2..1927a53 100644 --- a/Makefile +++ b/Makefile @@ -18,7 +18,7 @@ test_object_dirs = $(sort $(foreach fn,$(test_objects),$(dir $(fn)))) CC = clang LD = clang -CFLAGS += -Wall -g -MMD -std=c99 -I. +CFLAGS += -Wall -Wextra -pedantic -ansi -g -MMD -std=c99 -I. TEST_CFLAGS := $(CFLAGS) -O0 #LDFLAGS += ifneq ($(DEBUG),) From eb66fbebbf970f62c44c3c299406e5434d535e6f Mon Sep 17 00:00:00 2001 From: Lassi Kortela Date: Wed, 29 Sep 2021 14:15:56 +0300 Subject: [PATCH 2/8] Fix bug in _hex_str_to_ul() --- jsont.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jsont.c b/jsont.c index 5863c7a..bfa8f31 100644 --- a/jsont.c +++ b/jsont.c @@ -74,7 +74,7 @@ unsigned long _hex_str_to_ul(const uint8_t* bytes, size_t len) { for (size_t i = 0; i != len; ++i) { uint8_t b = bytes[i]; int digit = (b > '0'-1 && b < 'f'+1) ? kHexValueTable[b-'0'] : -1; - if (b == -1 || // bad digit + if (digit == -1 || // bad digit (value > cutoff) || // overflow ((value == cutoff) && (digit > cutoff_digit)) ) { return ULONG_MAX; From cb19fdb8e021b44d7de298dbd07917b89eb17e54 Mon Sep 17 00:00:00 2001 From: Lassi Kortela Date: Wed, 29 Sep 2021 14:16:18 +0300 Subject: [PATCH 3/8] Fix signedness warning in jsont_data_equals() --- jsont.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/jsont.c b/jsont.c index bfa8f31..8ac1a2a 100644 --- a/jsont.c +++ b/jsont.c @@ -172,9 +172,13 @@ bool jsont_data_equals(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) { (memcmp((const void*)ctx->value_buf.data, (const void*)bytes, length) == 0); } else { - return (ctx->input_buf_value_end - ctx->input_buf_value_start == length) && - (memcmp((const void*)ctx->input_buf_value_start, - (const void*)bytes, length) == 0); + size_t value_length = + ctx->input_buf_value_end - ctx->input_buf_value_start; + if (value_length != length) { + return 0; + } + return (memcmp((const void*)ctx->input_buf_value_start, + (const void*)bytes, length) == 0); } } From 66279d637cb7a781966078b3f7c9a1ff6be7db75 Mon Sep 17 00:00:00 2001 From: Lassi Kortela Date: Wed, 29 Sep 2021 14:17:03 +0300 Subject: [PATCH 4/8] Fix warnings about unused argc, argv --- example1.c | 2 +- example2.c | 2 +- test/test_tokenizer.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/example1.c b/example1.c index c36559c..6526b9d 100644 --- a/example1.c +++ b/example1.c @@ -8,7 +8,7 @@ static const char* _tok_name(jsont_tok_t tok); -int main(int argc, const char** argv) { +int main(void) { // Create a new reusable tokenizer jsont_ctx_t* S = jsont_create(0); diff --git a/example2.c b/example2.c index 5077f85..894fd9b 100644 --- a/example2.c +++ b/example2.c @@ -137,7 +137,7 @@ if (jsont_next(S) != JSONT_OBJECT_START) { return rsp; } -int main(int argc, const char** argv) { +int main(void) { // Create a new reusable tokenizer jsont_ctx_t* S = jsont_create(0); diff --git a/test/test_tokenizer.c b/test/test_tokenizer.c index f994c85..6f29dd1 100644 --- a/test/test_tokenizer.c +++ b/test/test_tokenizer.c @@ -10,7 +10,7 @@ strlen(fieldName)) == true); \ } while(0) -int main(int argc, const char** argv) { +int main(void) { // Create a new reusable tokenizer jsont_ctx_t* S = jsont_create(0); From a351bf8e309a035207747cf451c5372cbc8b93c6 Mon Sep 17 00:00:00 2001 From: Lassi Kortela Date: Wed, 29 Sep 2021 14:17:27 +0300 Subject: [PATCH 5/8] Clean up whitespace --- example1.c | 2 +- example2.c | 6 +++--- jsont.c | 1 - test/test_tokenizer.c | 2 +- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/example1.c b/example1.c index 6526b9d..97a8c13 100644 --- a/example1.c +++ b/example1.c @@ -37,7 +37,7 @@ int main(void) { } else if (tok == JSONT_NUMBER_FLOAT) { printf(" %f", jsont_float_value(S)); } - + printf("\n"); } diff --git a/example2.c b/example2.c index 894fd9b..e25bd35 100644 --- a/example2.c +++ b/example2.c @@ -58,7 +58,7 @@ typedef struct my_response { bool my_user_build(jsont_ctx_t* S, my_user_t* obj) { jsont_tok_t tok = jsont_current(S); if (tok != JSONT_OBJECT_START) return false; - + // for each field while ((tok = jsont_next(S)) == JSONT_FIELD_NAME) { const uint8_t* fieldname = 0; @@ -67,7 +67,7 @@ bool my_user_build(jsont_ctx_t* S, my_user_t* obj) { if (memcmp("id", fieldname, len) == 0) { MY_NEXT_EXPECT(S, JSONT_STRING); obj->id = jsont_strcpy_value(S); - + } else if (memcmp("name", fieldname, len) == 0) { MY_NEXT_EXPECT(S, JSONT_STRING); obj->name = jsont_strcpy_value(S); @@ -119,7 +119,7 @@ bool my_response_build(jsont_ctx_t* S, my_response_t* obj) { return false; } } - + return true; } diff --git a/jsont.c b/jsont.c index 8ac1a2a..d52bbff 100644 --- a/jsont.c +++ b/jsont.c @@ -570,4 +570,3 @@ jsont_tok_t jsont_next(jsont_ctx_t* ctx) { } } // while (1) } - diff --git a/test/test_tokenizer.c b/test/test_tokenizer.c index 6f29dd1..8e67d0f 100644 --- a/test/test_tokenizer.c +++ b/test/test_tokenizer.c @@ -81,7 +81,7 @@ int main(void) { // Expect the string '\u2192' (RIGHTWARDS ARROW, UTF8: E2,86,92) assert(jsont_next(S) == JSONT_STRING); assert(jsont_str_equals(S, "\xe2\x86\x92") == true); - + // Expect a field name 'n' jsont_next(S); JSONT_ASSERT_FIELD_NAME("n"); From e59f29ff7b15aa518674157207295ca91b5065f3 Mon Sep 17 00:00:00 2001 From: Edward Riede Date: Tue, 26 Oct 2021 11:57:04 -0700 Subject: [PATCH 6/8] Add ability to provide custom malloc functions --- jsont.c | 46 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/jsont.c b/jsont.c index d52bbff..39370e5 100644 --- a/jsont.c +++ b/jsont.c @@ -45,6 +45,9 @@ typedef uint8_t jsont_tok_t; typedef struct jsont_ctx { void* user_data; + void * (*lalloc)(void * user_data, size_t size); + void * (*lrealloc)(void * user_data, void * p, size_t size); + void (*lfree)(void * user_data, void * p); const uint8_t* input_buf; const uint8_t* input_buf_ptr; size_t input_len; @@ -86,18 +89,47 @@ unsigned long _hex_str_to_ul(const uint8_t* bytes, size_t len) { return value; } -jsont_ctx_t* jsont_create(void* user_data) { - jsont_ctx_t* ctx = (jsont_ctx_t*)calloc(1, sizeof(jsont_ctx_t)); +static void * lalloc(void * ign, size_t size) { + (void)ign; // unused + return malloc(size); +} + +static void * lrealloc(void * ign, void *p, size_t size) { + (void)ign; // unused + return realloc(p, size); +} + +static void lfree(void * ign, void *p) { + (void)ign; // unused + free(p); +} + + +jsont_ctx_t* jsont_create_alloc( + void* user_data, + void * (*lalloc)(void * user_data, size_t size), + void * (*lrealloc)(void * user_data, void * p, size_t size), + void (*lfree)(void * user_data, void * p)) { + jsont_ctx_t* ctx = (jsont_ctx_t*)lalloc(user_data, sizeof(jsont_ctx_t)); + memset(ctx, 0, sizeof(jsont_ctx_t)); ctx->user_data = user_data; + ctx->lalloc = lalloc; + ctx->lrealloc = lrealloc; + ctx->lfree = lfree; ctx->st_stack_size = _STRUCT_TYPE_STACK_SIZE; return ctx; } +jsont_ctx_t* jsont_create(void* user_data) { + return jsont_create_alloc(user_data, lalloc, lrealloc, lfree); +} + + void jsont_destroy(jsont_ctx_t* ctx) { if (ctx->value_buf.data != 0) { - free(ctx->value_buf.data); + ctx->lfree(ctx->user_data, ctx->value_buf.data); } - free(ctx); + ctx->lfree(ctx->user_data, ctx); } void jsont_reset(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) { @@ -188,7 +220,7 @@ char* jsont_strcpy_value(jsont_ctx_t* ctx) { } else { const uint8_t* bytes = 0; size_t len = jsont_data_value(ctx, &bytes); - char* buf = (char*)malloc(len+1); + char* buf = (char*)ctx->lalloc(ctx->user_data, len+1); if (memcpy((void*)buf, (const void*)bytes, len) != buf) { return 0; } @@ -355,14 +387,14 @@ static void _value_buf_append(jsont_ctx_t* ctx, const uint8_t* data, size_t len) if (ctx->value_buf.size < _VALUE_BUF_MIN_SIZE) { ctx->value_buf.size = _VALUE_BUF_MIN_SIZE; } - ctx->value_buf.data = (uint8_t*)malloc(ctx->value_buf.size); + ctx->value_buf.data = (uint8_t*)ctx->lalloc(ctx->user_data, ctx->value_buf.size); if (len != 0) { memcpy(ctx->value_buf.data, data, len); } } else { if (ctx->value_buf.length + len > ctx->value_buf.size) { size_t new_size = ctx->value_buf.size + (len * 2); - ctx->value_buf.data = realloc(ctx->value_buf.data, new_size); + ctx->value_buf.data = ctx->lrealloc(ctx->user_data, ctx->value_buf.data, new_size); assert(ctx->value_buf.data != 0); ctx->value_buf.size = new_size; } From ecb6e7bbea4c2258a9bd88ce84c823666c4bd03f Mon Sep 17 00:00:00 2001 From: Edward Riede Date: Tue, 26 Oct 2021 11:59:17 -0700 Subject: [PATCH 7/8] Add exponent parsing --- jsont.c | 23 +++++++++++++++++++++++ test/test_tokenizer.c | 19 ++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/jsont.c b/jsont.c index 39370e5..034a040 100644 --- a/jsont.c +++ b/jsont.c @@ -21,6 +21,8 @@ DEF_EM(UNEXPECTED_OBJECT_END, DEF_EM(UNEXPECTED_ARRAY_END, "Unexpected end of array while not in an array"); DEF_EM(UNEXPECTED_COMMA, "Unexpected \",\""); DEF_EM(UNEXPECTED_COLON, "Unexpected \":\""); +DEF_EM(EXTRA_DOT_IN_FLOAT, "Extra \".\" found while parsing float"); +DEF_EM(BAD_EXPONENT, "Bad format while parsing exponent"); DEF_EM(UNEXPECTED, "Unexpected input"); DEF_EM(UNEXPECTED_UNICODE_SEQ, "Malformed unicode encoded sequence in string"); #undef DEF_EM @@ -579,10 +581,31 @@ jsont_tok_t jsont_next(jsont_ctx_t* ctx) { ctx->input_buf_value_start = ctx->input_buf_ptr-1; //uint8_t prev_b = 0; bool is_float = false; + bool is_exp = false; while (1) { b = _next_byte(ctx); if (b == '.') { + if (is_float || is_exp) { + ctx->error_info = JSONT_ERRINFO_EXTRA_DOT_IN_FLOAT; + return _set_tok(ctx, JSONT_ERR); + } + is_float = true; + } else if ( b == 'E' || b == 'e') { + if (is_exp) { + ctx->error_info = JSONT_ERRINFO_BAD_EXPONENT; + return _set_tok(ctx, JSONT_ERR); + } + is_exp = true; is_float = true; + // check for +- on exponent + b = _next_byte(ctx); + if (b == '+' || b == '-') { + b = _next_byte(ctx); + } + if (!isdigit((int)b)) { + ctx->error_info = JSONT_ERRINFO_BAD_EXPONENT; + return _set_tok(ctx, JSONT_ERR); + } } else if (!isdigit((int)b)) { _rewind_one_byte(ctx); ctx->input_buf_value_end = ctx->input_buf_ptr; diff --git a/test/test_tokenizer.c b/test/test_tokenizer.c index 8e67d0f..fc17658 100644 --- a/test/test_tokenizer.c +++ b/test/test_tokenizer.c @@ -37,7 +37,10 @@ int main(void) { "\"a\\rb\"," "\"a\\tb\"," "\"\"," - "\" \"" + "\" \"," + "123.4e-2," + "345.6E2," + "789.12e+10" "]" "}"; @@ -169,6 +172,20 @@ int main(void) { assert(jsont_str_equals(S, " ") == true); assert(jsont_str_equals(S, "") == false); + // exponent formatting works + //"123.4e-2" + assert(jsont_next(S) == JSONT_NUMBER_FLOAT); + assert(fabs(jsont_float_value(S) - 1.234) < 0.001); + + //"345.6E2" + assert(jsont_next(S) == JSONT_NUMBER_FLOAT); + assert(fabs(jsont_float_value(S) - 34560.0) < 0.001); + + + //"789.12E+10" + assert(jsont_next(S) == JSONT_NUMBER_FLOAT); + assert(fabs(jsont_float_value(S) - 7891200000000.0) < 0.1); + // ] } assert(jsont_next(S) == JSONT_ARRAY_END); assert(jsont_next(S) == JSONT_OBJECT_END); From 1f3566904ccdc33eee5fb9b7d331108984ea6e7e Mon Sep 17 00:00:00 2001 From: Edward Riede Date: Tue, 2 Nov 2021 22:37:57 -0700 Subject: [PATCH 8/8] Update to "multitool" allocator. * Use a multitool allocation function as per suggestion in PR. * This way, only one function is needed to be written/stored in the context --- jsont.c | 42 ++++++++++++++++++------------------------ jsont.h | 11 +++++++++++ 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/jsont.c b/jsont.c index 034a040..ea825c6 100644 --- a/jsont.c +++ b/jsont.c @@ -47,9 +47,7 @@ typedef uint8_t jsont_tok_t; typedef struct jsont_ctx { void* user_data; - void * (*lalloc)(void * user_data, size_t size); void * (*lrealloc)(void * user_data, void * p, size_t size); - void (*lfree)(void * user_data, void * p); const uint8_t* input_buf; const uint8_t* input_buf_ptr; size_t input_len; @@ -91,47 +89,43 @@ unsigned long _hex_str_to_ul(const uint8_t* bytes, size_t len) { return value; } -static void * lalloc(void * ign, size_t size) { - (void)ign; // unused - return malloc(size); -} - +/** + * a "multitool" allocator that encapsulates allocate, reallocate + * and free functionality. + * + * if p == NULL, then allocate "size" bytes. + * if p != NULL, and size != 0, then reallocate to "size" bytes. + * otherwise, free(p) + */ static void * lrealloc(void * ign, void *p, size_t size) { (void)ign; // unused - return realloc(p, size); -} - -static void lfree(void * ign, void *p) { - (void)ign; // unused - free(p); + if (size) return realloc(p, size); + free(p); + return NULL; } jsont_ctx_t* jsont_create_alloc( void* user_data, - void * (*lalloc)(void * user_data, size_t size), - void * (*lrealloc)(void * user_data, void * p, size_t size), - void (*lfree)(void * user_data, void * p)) { - jsont_ctx_t* ctx = (jsont_ctx_t*)lalloc(user_data, sizeof(jsont_ctx_t)); + void * (*lrealloc)(void * user_data, void * p, size_t size)) { + jsont_ctx_t* ctx = (jsont_ctx_t*)lrealloc(user_data, NULL, sizeof(jsont_ctx_t)); memset(ctx, 0, sizeof(jsont_ctx_t)); ctx->user_data = user_data; - ctx->lalloc = lalloc; ctx->lrealloc = lrealloc; - ctx->lfree = lfree; ctx->st_stack_size = _STRUCT_TYPE_STACK_SIZE; return ctx; } jsont_ctx_t* jsont_create(void* user_data) { - return jsont_create_alloc(user_data, lalloc, lrealloc, lfree); + return jsont_create_alloc(user_data, lrealloc); } void jsont_destroy(jsont_ctx_t* ctx) { if (ctx->value_buf.data != 0) { - ctx->lfree(ctx->user_data, ctx->value_buf.data); + ctx->lrealloc(ctx->user_data, ctx->value_buf.data, 0); } - ctx->lfree(ctx->user_data, ctx); + ctx->lrealloc(ctx->user_data, ctx, 0); } void jsont_reset(jsont_ctx_t* ctx, const uint8_t* bytes, size_t length) { @@ -222,7 +216,7 @@ char* jsont_strcpy_value(jsont_ctx_t* ctx) { } else { const uint8_t* bytes = 0; size_t len = jsont_data_value(ctx, &bytes); - char* buf = (char*)ctx->lalloc(ctx->user_data, len+1); + char* buf = (char*)ctx->lrealloc(ctx->user_data, NULL, len+1); if (memcpy((void*)buf, (const void*)bytes, len) != buf) { return 0; } @@ -389,7 +383,7 @@ static void _value_buf_append(jsont_ctx_t* ctx, const uint8_t* data, size_t len) if (ctx->value_buf.size < _VALUE_BUF_MIN_SIZE) { ctx->value_buf.size = _VALUE_BUF_MIN_SIZE; } - ctx->value_buf.data = (uint8_t*)ctx->lalloc(ctx->user_data, ctx->value_buf.size); + ctx->value_buf.data = (uint8_t*)ctx->lrealloc(ctx->user_data, NULL, ctx->value_buf.size); if (len != 0) { memcpy(ctx->value_buf.data, data, len); } diff --git a/jsont.h b/jsont.h index 22cd043..64ecd09 100644 --- a/jsont.h +++ b/jsont.h @@ -51,6 +51,17 @@ extern "C" { // accessible through `jsont_user_data`. jsont_ctx_t* jsont_create(void* user_data); +// Create a new JSON tokenizer context with a +// custom allocation function. +// +// the custom allocation function serves as malloc, realloc and free. +// * if size == 0, then the operation is a free, +// * if ptr == NULL, then the operation is a malloc. +// * otherwise, the operation is a realloc +jsont_ctx_t* jsont_create_alloc( + void* user_data, + void * (*lrealloc)(void * user_data, void * p, size_t size)); + // Destroy a JSON tokenizer context. This will free any internal data, except // from the input buffer. void jsont_destroy(jsont_ctx_t* ctx);