Skip to content

Commit 1835ea3

Browse files
Various review updates
- Add specific errors for JSON blob decode - Update changelog and API docs - Switch to char* for blobs for consistency - Update Python changelog Fixup lint
1 parent 89b06c4 commit 1835ea3

File tree

7 files changed

+77
-38
lines changed

7 files changed

+77
-38
lines changed

c/CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
In development
66

7+
- Add ``tsk_json_struct_metadata_get_blob`` function
8+
(:user:`benjeffery`, :pr:`3306`)
9+
710
--------------------
811
[1.3.1] - 2026-03-06
912
--------------------

c/tests/test_core.c

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -86,14 +86,14 @@ test_generate_uuid(void)
8686
static void
8787
set_u64_le(uint8_t *dest, uint64_t value)
8888
{
89-
dest[0] = (uint8_t)(value & 0xFF);
90-
dest[1] = (uint8_t)((value >> 8) & 0xFF);
91-
dest[2] = (uint8_t)((value >> 16) & 0xFF);
92-
dest[3] = (uint8_t)((value >> 24) & 0xFF);
93-
dest[4] = (uint8_t)((value >> 32) & 0xFF);
94-
dest[5] = (uint8_t)((value >> 40) & 0xFF);
95-
dest[6] = (uint8_t)((value >> 48) & 0xFF);
96-
dest[7] = (uint8_t)((value >> 56) & 0xFF);
89+
dest[0] = (uint8_t) (value & 0xFF);
90+
dest[1] = (uint8_t) ((value >> 8) & 0xFF);
91+
dest[2] = (uint8_t) ((value >> 16) & 0xFF);
92+
dest[3] = (uint8_t) ((value >> 24) & 0xFF);
93+
dest[4] = (uint8_t) ((value >> 32) & 0xFF);
94+
dest[5] = (uint8_t) ((value >> 40) & 0xFF);
95+
dest[6] = (uint8_t) ((value >> 48) & 0xFF);
96+
dest[7] = (uint8_t) ((value >> 56) & 0xFF);
9797
}
9898

9999
static void
@@ -103,7 +103,7 @@ test_json_struct_metadata_get_blob(void)
103103
char metadata[128];
104104
const char *json;
105105
tsk_size_t json_buffer_length;
106-
const uint8_t *blob;
106+
const char *blob;
107107
tsk_size_t blob_length;
108108
uint8_t *bytes;
109109
tsk_size_t metadata_length;
@@ -181,25 +181,25 @@ test_json_struct_metadata_get_blob(void)
181181
metadata_length = header_length - 1;
182182
ret = tsk_json_struct_metadata_get_blob(
183183
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
184-
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT);
184+
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED);
185185

186186
metadata_length = (tsk_size_t) total_length;
187187
bytes[0] = 'X';
188188
ret = tsk_json_struct_metadata_get_blob(
189189
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
190-
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT);
190+
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC);
191191
bytes[0] = 'J';
192192

193193
bytes[4] = 2;
194194
ret = tsk_json_struct_metadata_get_blob(
195195
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
196-
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_VERSION_TOO_NEW);
196+
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION);
197197
bytes[4] = 1;
198198

199-
metadata_length = (tsk_size_t)(total_length - 1);
199+
metadata_length = (tsk_size_t) (total_length - 1);
200200
ret = tsk_json_struct_metadata_get_blob(
201201
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
202-
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT);
202+
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED);
203203

204204
ret = tsk_json_struct_metadata_get_blob(
205205
NULL, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
@@ -229,13 +229,13 @@ test_json_struct_metadata_get_blob(void)
229229
set_u64_le(bytes + 13, 0);
230230
ret = tsk_json_struct_metadata_get_blob(
231231
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
232-
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT);
232+
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH);
233233

234234
set_u64_le(bytes + 5, 8);
235-
set_u64_le(bytes + 13, UINT64_MAX - (uint64_t)(header_length + 8) + 1);
235+
set_u64_le(bytes + 13, UINT64_MAX - (uint64_t) (header_length + 8) + 1);
236236
ret = tsk_json_struct_metadata_get_blob(
237237
metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length);
238-
CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT);
238+
CU_ASSERT_EQUAL(ret, TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH);
239239
}
240240

241241
static void

c/tskit/core.c

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@
3232
#include <kastore.h>
3333
#include <tskit/core.h>
3434

35-
#define UUID_NUM_BYTES 16
35+
#define UUID_NUM_BYTES 16
3636
#define TSK_JSON_BINARY_HEADER_SIZE 21
3737

38-
static const uint8_t TSK_JSON_BINARY_MAGIC[4] = { 'J', 'B', 'L', 'B' };
38+
static const uint8_t _tsk_json_binary_magic[4] = { 'J', 'B', 'L', 'B' };
3939

4040
#if defined(_WIN32)
4141

@@ -143,7 +143,7 @@ tsk_generate_uuid(char *dest, int TSK_UNUSED(flags))
143143

144144
int
145145
tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_length,
146-
const char **json, tsk_size_t *json_length, const uint8_t **blob,
146+
const char **json, tsk_size_t *json_length, const char **blob,
147147
tsk_size_t *blob_length)
148148
{
149149
int ret;
@@ -153,7 +153,7 @@ tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_leng
153153
uint64_t header_and_json_length;
154154
uint64_t total_length;
155155
const uint8_t *bytes;
156-
const uint8_t *blob_start;
156+
const char *blob_start;
157157
const char *json_start;
158158

159159
if (metadata == NULL || json == NULL || json_length == NULL || blob == NULL
@@ -163,36 +163,36 @@ tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_leng
163163
}
164164
bytes = (const uint8_t *) metadata;
165165
if (metadata_length < TSK_JSON_BINARY_HEADER_SIZE) {
166-
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
166+
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED);
167167
goto out;
168168
}
169-
if (memcmp(bytes, TSK_JSON_BINARY_MAGIC, sizeof(TSK_JSON_BINARY_MAGIC)) != 0) {
170-
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
169+
if (memcmp(bytes, _tsk_json_binary_magic, sizeof(_tsk_json_binary_magic)) != 0) {
170+
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC);
171171
goto out;
172172
}
173173
version = bytes[4];
174174
if (version != 1) {
175-
ret = tsk_trace_error(TSK_ERR_FILE_VERSION_TOO_NEW);
175+
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION);
176176
goto out;
177177
}
178178
json_length_u64 = tsk_load_u64_le(bytes + 5);
179179
binary_length_u64 = tsk_load_u64_le(bytes + 13);
180180
if (json_length_u64 > UINT64_MAX - (uint64_t) TSK_JSON_BINARY_HEADER_SIZE) {
181-
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
181+
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH);
182182
goto out;
183183
}
184184
header_and_json_length = (uint64_t) TSK_JSON_BINARY_HEADER_SIZE + json_length_u64;
185185
if (binary_length_u64 > UINT64_MAX - header_and_json_length) {
186-
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
186+
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH);
187187
goto out;
188188
}
189189
total_length = header_and_json_length + binary_length_u64;
190190
if ((uint64_t) metadata_length < total_length) {
191-
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
191+
ret = tsk_trace_error(TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED);
192192
goto out;
193193
}
194194
json_start = (const char *) bytes + TSK_JSON_BINARY_HEADER_SIZE;
195-
blob_start = bytes + TSK_JSON_BINARY_HEADER_SIZE + json_length_u64;
195+
blob_start = (const char *) bytes + TSK_JSON_BINARY_HEADER_SIZE + json_length_u64;
196196
*json = json_start;
197197
*json_length = (tsk_size_t) json_length_u64;
198198
*blob = blob_start;
@@ -268,6 +268,22 @@ tsk_strerror_internal(int err)
268268
ret = "An incompatible type for a column was found in the file. "
269269
"(TSK_ERR_BAD_COLUMN_TYPE)";
270270
break;
271+
case TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC:
272+
ret = "JSON binary struct metadata does not begin with the expected "
273+
"magic bytes. (TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC)";
274+
break;
275+
case TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED:
276+
ret = "JSON binary struct metadata is shorter than the expected size. "
277+
"(TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED)";
278+
break;
279+
case TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH:
280+
ret = "A length field in the JSON binary struct metadata header is invalid. "
281+
"(TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH)";
282+
break;
283+
case TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION:
284+
ret = "JSON binary struct metadata uses an unsupported version number. "
285+
"(TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION)";
286+
break;
271287

272288
/* Out of bounds errors */
273289
case TSK_ERR_BAD_OFFSET:

c/tskit/core.h

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,26 @@ not found in the file.
309309
An unsupported type was provided for a column in the file.
310310
*/
311311
#define TSK_ERR_BAD_COLUMN_TYPE -105
312+
313+
/**
314+
The JSON binary struct metadata does not begin with the expected magic bytes.
315+
*/
316+
#define TSK_ERR_JSON_STRUCT_METADATA_BAD_MAGIC -106
317+
318+
/**
319+
The JSON binary struct metadata is shorter than the expected size.
320+
*/
321+
#define TSK_ERR_JSON_STRUCT_METADATA_TRUNCATED -107
322+
323+
/**
324+
A length field in the JSON binary struct metadata header is invalid.
325+
*/
326+
#define TSK_ERR_JSON_STRUCT_METADATA_INVALID_LENGTH -108
327+
328+
/**
329+
The JSON binary struct metadata uses an unsupported version number.
330+
*/
331+
#define TSK_ERR_JSON_STRUCT_METADATA_BAD_VERSION -109
312332
/** @} */
313333

314334
/**
@@ -1116,7 +1136,7 @@ int tsk_generate_uuid(char *dest, int flags);
11161136
@brief Extract the binary payload from ``json+struct`` encoded metadata.
11171137
11181138
@rst
1119-
Metadata produced by :py:class:`tskit.metadata.JSONStructCodec` consists of a fixed-size
1139+
Metadata produced by the JSONStructCodec consists of a fixed-size
11201140
header followed by canonical JSON bytes and an optional binary payload. This helper
11211141
validates the framing, returning pointers to the embedded JSON and binary sections
11221142
without copying.
@@ -1131,10 +1151,10 @@ the original metadata buffer is alive.
11311151
@param[out] json_length On success, set to the JSON length in bytes.
11321152
@param[out] blob On success, set to the start of the binary payload.
11331153
@param[out] blob_length On success, set to the payload length in bytes.
1134-
@return 0 on success, or a :ref:`TSK_ERR <c_api_errors>` code on failure.
1154+
@return Return 0 on success or a negative value on failure.
11351155
*/
11361156
int tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_length,
1137-
const char **json, tsk_size_t *json_length, const uint8_t **blob,
1157+
const char **json, tsk_size_t *json_length, const char **blob,
11381158
tsk_size_t *blob_length);
11391159

11401160
/* TODO most of these can probably be macros so they compile out as no-ops.

docs/c-api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,7 @@ Miscellaneous functions
620620

621621
.. doxygenfunction:: tsk_is_unknown_time
622622

623+
.. doxygenfunction:: tsk_json_struct_metadata_get_blob
623624

624625
*************************
625626
Function Specific Options

python/CHANGELOG.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
In development
66

7+
- Add ``json+struct`` metadata codec that allows storing binary data using a struct
8+
schema alongside JSON metadata. (:user:`benjeffery`, :pr:`3306`)
9+
710
--------------------
811
[1.0.2] - 2026-03-06
912
--------------------
@@ -107,8 +110,6 @@ Maintenance release.
107110
also around 10% faster.
108111
(:user:`benjeffery`, :pr:`3313`, :pr:`3317`, :issue:`1896`)
109112

110-
- Add ``json+struct`` metadata codec that allows storing binary data using a struct
111-
schema alongside JSON metadata. (:user:`benjeffery`, :pr:`3306`)
112113

113114
**Bugfixes**
114115

python/tskit/metadata.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ class JSONStructCodec(AbstractMetadataCodec):
208208
_HDR = struct.Struct("<4sBQQ") # magic, version, json_len, blob_len
209209

210210
@classmethod
211-
def is_schema_trivial(self, schema: Mapping) -> bool:
211+
def is_schema_trivial(cls, schema: Mapping) -> bool:
212212
return False
213213

214214
def __init__(self, schema: Mapping[str, Any]) -> None:
@@ -265,9 +265,7 @@ def __init__(self, schema: Mapping[str, Any]) -> None:
265265
self.struct_codec = StructCodec(self.struct_schema)
266266
self._struct_keys = set(struct_props.keys())
267267
self._validate_json = TSKITMetadataSchemaValidator(self.json_schema).validate
268-
self._validate_struct = TSKITMetadataSchemaValidator(
269-
self.struct_schema
270-
).validate
268+
self._validate_struct = TSKITMetadataSchemaValidator(self.struct_schema).validate
271269

272270
def validate_row(self, row: Any) -> None:
273271
if not isinstance(row, dict):

0 commit comments

Comments
 (0)