Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,36 +14,62 @@

import static org.assertj.core.api.Assertions.assertThatThrownBy;

/// Adversarial test for the encoded-array-tree recursion in
/// Adversarial tests for the encoded-array-tree recursion in
/// [FlatSegmentDecoder]'s `convertArrayNode`.
///
/// The decoder walks the array node tree recursively (validity, patches, run-ends, dictionary
/// codes/values, …). Without the [FlatSegmentDecoder#MAX_ARRAY_TREE_DEPTH] cap a crafted segment
/// with thousands of nested children produces a [StackOverflowError] — an `Error` that escapes the
/// "malformed input must surface as [VortexException]" contract (ADR 0003). This pins that contract.
/// "malformed input must surface as [VortexException]" contract (ADR 0003).
///
/// The two cases bracket the exact `depth > MAX_ARRAY_TREE_DEPTH` boundary: a tree whose deepest
/// node sits at exactly the limit must clear the guard (and then fail later for an unrelated reason
/// — no decoder), while one node deeper must be rejected by the depth guard itself.
class ArrayNodeDepthBombSecurityTest {

private static final DType DTYPE = DType.I32;

private final FlatSegmentDecoder sut = new FlatSegmentDecoder(ReadRegistry.empty());

@Test
void deeplyNestedArrayTree_throwsVortexException() {
void arrayTreeAtDepthLimit_clearsGuard() {
try (Arena arena = Arena.ofConfined()) {
// Given — deepest node at exactly MAX_ARRAY_TREE_DEPTH: convertArrayNode is entered with
// depth == limit there, and `limit > limit` is false. The walk completes and only then
// fails because the empty registry has no decoder. Kills `depth >` relaxed to `>=`,
// which would wrongly reject this legal max-depth tree with the depth message.
byte[] fb = deeplyNestedArrayFlatBuffer(FlatSegmentDecoder.MAX_ARRAY_TREE_DEPTH);
MemorySegment seg = wrapAsSegment(fb, arena);

// When / Then
assertThatThrownBy(() -> sut.decode(seg, List.of("vortex.flat"), DTYPE, 1, arena))
.isInstanceOf(VortexException.class)
.hasMessageContaining("no decoder");
}
}

@Test
void arrayTreeOneOverDepthLimit_throwsVortexException() {
try (Arena arena = Arena.ofConfined()) {
// Given — a flat segment whose FbsArray root nests 65536 levels of single-child nodes.
// Real encodings nest only a handful of levels; 65536 reliably blows the JVM stack on
// the recursive convertArrayNode walk if the depth cap is removed.
byte[] fb = deeplyNestedArrayFlatBuffer(65536);
MemorySegment seg = arena.allocate(fb.length + 4L);
MemorySegment.copy(MemorySegment.ofArray(fb), 0, seg, 0, fb.length);
seg.set(PTypeIO.LE_INT, fb.length, fb.length);
// Given — one level deeper: the deepest node reaches limit + 1, tripping the guard
// before any StackOverflowError can escape.
byte[] fb = deeplyNestedArrayFlatBuffer(FlatSegmentDecoder.MAX_ARRAY_TREE_DEPTH + 1);
MemorySegment seg = wrapAsSegment(fb, arena);

// When / Then — must surface as VortexException, not StackOverflowError
assertThatThrownBy(() -> sut.decode(seg, List.of("vortex.flat"), DTYPE, 1, arena))
.isInstanceOf(VortexException.class);
.isInstanceOf(VortexException.class)
.hasMessageContaining("depth");
}
}

private static MemorySegment wrapAsSegment(byte[] fb, Arena arena) {
MemorySegment seg = arena.allocate(fb.length + 4L);
MemorySegment.copy(MemorySegment.ofArray(fb), 0, seg, 0, fb.length);
seg.set(PTypeIO.LE_INT, fb.length, fb.length);
return seg;
}

/// Builds a minimal `FbsArray` whose root node has `depth` levels of single-child nesting,
/// each level a buffer-less node referencing encoding index 0.
private static byte[] deeplyNestedArrayFlatBuffer(int depth) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@

import io.github.dfa1.vortex.core.error.VortexException;
import io.github.dfa1.vortex.core.io.VortexFormat;
import org.junit.jupiter.api.Test;
import io.github.dfa1.vortex.reader.MalformedFiles.NestKind;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;

import static io.github.dfa1.vortex.reader.MalformedFiles.buildDeeplyNestedListDtype;
import static io.github.dfa1.vortex.reader.MalformedFiles.buildDeeplyNestedDtype;
import static io.github.dfa1.vortex.reader.MalformedFiles.buildFlatLayout;
import static io.github.dfa1.vortex.reader.MalformedFiles.buildFooter;
import static io.github.dfa1.vortex.reader.MalformedFiles.buildPostscript;
Expand All @@ -29,25 +31,28 @@ class DTypeDepthBombSecurityTest {

private static final ReadRegistry REGISTRY = ReadRegistry.empty();

@Test
void deeplyNestedDtype_throwsVortexException(@TempDir Path tmp) throws Exception {
// Given — a file whose DType nests 65536 levels of List. Real schemas nest a handful of
// levels; 65536 reliably blows the JVM stack on the recursive convertDType walk.
Path file = buildDeeplyNestedDtypeFile(tmp, 65536);
@ParameterizedTest(name = "nested via {0}")
@EnumSource(NestKind.class)
void deeplyNestedDtype_throwsVortexException(NestKind kind, @TempDir Path tmp) throws Exception {
// Given — a file whose DType nests 65536 levels of `kind`. Real schemas nest a handful of
// levels; 65536 reliably blows the JVM stack on the recursive convertDType walk. Each kind
// drives a different recursion arm (List/Struct/FixedSizeList/Extension), so all four must
// increment the depth counter for the MAX_DTYPE_DEPTH guard to bound them.
Path file = buildDeeplyNestedDtypeFile(tmp, 65536, kind);

// When / Then — must surface as VortexException, not StackOverflowError
assertThatThrownBy(() -> VortexReader.open(file, REGISTRY))
.isInstanceOf(VortexException.class);
}

private static Path buildDeeplyNestedDtypeFile(Path dir, int depth) throws Exception {
private static Path buildDeeplyNestedDtypeFile(Path dir, int depth, NestKind kind) throws Exception {
byte[] body = new byte[8]; // unused placeholder
ByteBuffer footerBuf = buildFooter(
new String[]{"vortex.primitive"},
new String[]{"vortex.flat"},
new long[]{0L},
new long[]{(long) body.length});
ByteBuffer dtypeBuf = buildDeeplyNestedListDtype(depth);
ByteBuffer dtypeBuf = buildDeeplyNestedDtype(depth, kind);
ByteBuffer layoutBuf = buildFlatLayout(0, 1L, 0);

long footerOff = body.length;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@
import io.github.dfa1.vortex.core.fbs.FbsFooter;
import io.github.dfa1.vortex.core.fbs.FbsLayout;
import io.github.dfa1.vortex.core.fbs.FbsLayoutSpec;
import io.github.dfa1.vortex.core.fbs.FbsExtension;
import io.github.dfa1.vortex.core.fbs.FbsFixedSizeList;
import io.github.dfa1.vortex.core.fbs.FbsList;
import io.github.dfa1.vortex.core.fbs.FbsStruct_;
import io.github.dfa1.vortex.core.fbs.FbsPostscript;
import io.github.dfa1.vortex.core.fbs.FbsPostscriptSegment;
import io.github.dfa1.vortex.core.fbs.FbsPrimitive;
Expand Down Expand Up @@ -35,18 +38,50 @@ static ByteBuffer buildI64Dtype() {
return slice(fbb);
}

/// Builds a DType blob nesting `depth` levels of `List` around an I64 primitive leaf.
/// The recursive DType variant used to wrap each level of a depth-bomb dtype. Each constant
/// exercises a different recursion arm of `PostscriptParser.convertDType`.
enum NestKind {
/// Nest via `List` element type.
LIST,
/// Nest via single-field `Struct` field type.
STRUCT,
/// Nest via `FixedSizeList` element type.
FIXED_SIZE_LIST,
/// Nest via `Extension` storage type.
EXTENSION
}

/// Builds a DType blob nesting `depth` levels of `kind` around an I64 primitive leaf.
///
/// @param depth number of nested `List` wrappers around the leaf
/// @param depth number of nested wrappers around the leaf
/// @param kind the recursive DType variant to wrap each level with
/// @return the finished DType FlatBuffer
static ByteBuffer buildDeeplyNestedListDtype(int depth) {
var fbb = new FbsBuilder(depth * 32);
static ByteBuffer buildDeeplyNestedDtype(int depth, NestKind kind) {
var fbb = new FbsBuilder(depth * 64);
// Leaf first; FlatBuffer requires children be finished before parents.
int prim = FbsPrimitive.createFbsPrimitive(fbb, io.github.dfa1.vortex.core.fbs.FbsPType.I64, false);
int current = io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsPrimitive, prim);
for (int i = 0; i < depth; i++) {
int list = FbsList.createFbsList(fbb, current, false);
current = io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsList, list);
current = switch (kind) {
case LIST -> {
int list = FbsList.createFbsList(fbb, current, false);
yield io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsList, list);
}
case STRUCT -> {
int names = FbsStruct_.createNamesVector(fbb, new int[]{fbb.createString("f")});
int dtypes = FbsStruct_.createDtypesVector(fbb, new int[]{current});
int struct = FbsStruct_.createFbsStruct_(fbb, names, dtypes, false);
yield io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsStruct_, struct);
}
case FIXED_SIZE_LIST -> {
int fsl = FbsFixedSizeList.createFbsFixedSizeList(fbb, current, 1L, false);
yield io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsFixedSizeList, fsl);
}
case EXTENSION -> {
int ext = FbsExtension.createFbsExtension(fbb, fbb.createString("x"), current, 0);
yield io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsExtension, ext);
}
};
}
io.github.dfa1.vortex.core.fbs.FbsDType.finishFbsDTypeBuffer(fbb, current);
return slice(fbb);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import io.github.dfa1.vortex.core.fbs.FbsFooter;
import io.github.dfa1.vortex.core.fbs.FbsLayout;
import io.github.dfa1.vortex.core.fbs.FbsLayoutSpec;
import io.github.dfa1.vortex.core.fbs.FbsList;
import io.github.dfa1.vortex.core.fbs.FbsPrimitive;
import io.github.dfa1.vortex.core.fbs.FbsType;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
Expand Down Expand Up @@ -158,6 +160,51 @@ void parseBlobs_layoutMetadata_oneOverLimit_throws() {
.hasMessageContaining("metadata size");
}

// ── FbsDType depth bound: depth > MAX_DTYPE_DEPTH (64) ──────────────────────────

@Test
void parseBlobs_dtypeDepth_atLimit_parses() {
// Given — a single-child List chain whose deepest node sits at exactly MAX_DTYPE_DEPTH.
// convertDType is entered with depth == 64 there, and `64 > 64` is false. Kills the
// `depth >` relaxed to `depth >=` mutant, which would reject the legal max-depth dtype.
MemorySegment dtype = nestedListDtype(PostscriptParser.MAX_DTYPE_DEPTH);

// When
DType result = parseDtype(dtype);

// Then
assertThat(result).isInstanceOf(DType.List.class);
}

@Test
void parseBlobs_dtypeDepth_oneOverLimit_throws() {
// Given — one level deeper: the deepest node reaches depth 65, tripping `65 > 64`
MemorySegment dtype = nestedListDtype(PostscriptParser.MAX_DTYPE_DEPTH + 1);

// When / Then
assertThatThrownBy(() -> parseDtype(dtype))
.isInstanceOf(VortexException.class)
.hasMessageContaining("depth");
}

// ── parseBlobs dtype-presence guard: dtypeBuf != null && byteSize() > 0 ──────────

@Test
void parseBlobs_emptyDtypeBlob_yieldsNullDtype() {
// Given — a present but zero-length dtype blob. The guard treats an empty blob as "no dtype"
// (`byteSize() > 0`); relaxing `> 0` to `>= 0` would instead feed the empty buffer to
// getRootAsFbsDType and throw. Pins that an empty blob parses to a null dtype.
MemorySegment footer = footerWithLayoutSpecs("vortex.flat");
MemorySegment layout = flatLayout(0);
MemorySegment emptyDtype = MemorySegment.ofArray(new byte[0]);

// When
DType result = PostscriptParser.parseBlobs(footer, layout, emptyDtype).dtype();

// Then
assertThat(result).isNull();
}

// ── helpers ──────────────────────────────────────────────────────────────────

/// Parses a dtype blob through the full parseBlobs path, paired with a minimal valid
Expand Down Expand Up @@ -214,6 +261,20 @@ private static MemorySegment nestedLayout(int depth) {
return slice(fbb);
}

private static MemorySegment nestedListDtype(int depth) {
var fbb = new FbsBuilder(depth * 32 + 64);
// Leaf first; FlatBuffer requires children be finished before parents.
int prim = FbsPrimitive.createFbsPrimitive(fbb, io.github.dfa1.vortex.core.fbs.FbsPType.I64, false);
int current = io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsPrimitive, prim);
// Wrap `depth` times: the innermost leaf ends up at recursion depth == `depth`.
for (int i = 0; i < depth; i++) {
int list = FbsList.createFbsList(fbb, current, false);
current = io.github.dfa1.vortex.core.fbs.FbsDType.createFbsDType(fbb, FbsType.FbsList, list);
}
io.github.dfa1.vortex.core.fbs.FbsDType.finishFbsDTypeBuffer(fbb, current);
return slice(fbb);
}

private static MemorySegment decimalDtype(int precision, byte scale) {
var fbb = new FbsBuilder(64);
int dec = FbsDecimal.createFbsDecimal(fbb, precision, scale, false);
Expand Down
Loading