From 43795bb4f0b677af2965aa6c3514c7bf596f6298 Mon Sep 17 00:00:00 2001 From: Entlein Date: Wed, 29 Apr 2026 10:41:27 +0200 Subject: [PATCH 1/3] feat(dynamicpathdetector): add CompareExecArgs matcher for wildcards in exec arg vectors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-defined ApplicationProfile entries can now express argument-vector patterns containing two wildcard tokens, exposed via the existing constants: DynamicIdentifier ("⋯") — matches exactly one argument position. WildcardIdentifier ("*") — matches zero or more consecutive args. Anything else is a literal-equality match. The match is anchored at both ends — every runtime arg must be consumed by the profile vector, either by a literal, a single-position ⋯, or a *-absorbing run. Implementation is recursive backtracking. Real exec arg vectors are short (typically ≤ a dozen entries) with at most a handful of wildcards, so the worst case stays well below a regex compile and we avoid the ReDoS surface that comes with regex. 40 unit subtests cover empty/literal/dynamic/wildcard/mixed/realistic patterns; matcher is consumer-ready for node-agent's CEL exec rule. --- .../dynamicpathdetector/compare_exec_args.go | 45 +++++ .../tests/compare_exec_args_test.go | 167 ++++++++++++++++++ 2 files changed, 212 insertions(+) create mode 100644 pkg/registry/file/dynamicpathdetector/compare_exec_args.go create mode 100644 pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go diff --git a/pkg/registry/file/dynamicpathdetector/compare_exec_args.go b/pkg/registry/file/dynamicpathdetector/compare_exec_args.go new file mode 100644 index 000000000..3bff8a79d --- /dev/null +++ b/pkg/registry/file/dynamicpathdetector/compare_exec_args.go @@ -0,0 +1,45 @@ +package dynamicpathdetector + +// CompareExecArgs reports whether a runtime exec argument vector matches a +// profile argument vector. The profile vector may contain two wildcard +// tokens: +// +// DynamicIdentifier ("⋯") — matches exactly one argument position. +// WildcardIdentifier ("*") — matches zero or more consecutive arguments. +// +// Anything else is a literal-equality match. The match is anchored at both +// ends: every runtime argument must be consumed by the profile vector, +// either by a literal, a DynamicIdentifier, or absorbed into a +// WildcardIdentifier run. +// +// Implementation is recursive backtracking. Argument vectors in real +// profiles are short (typically ≤ a dozen entries) and contain at most a +// handful of wildcards, so the worst case stays well below the cost of a +// regex compile. +func CompareExecArgs(profileArgs, runtimeArgs []string) bool { + if len(profileArgs) == 0 { + return len(runtimeArgs) == 0 + } + + head := profileArgs[0] + + if head == WildcardIdentifier { + // Try absorbing 0..len(runtimeArgs) of the runtime into this *, + // then match the remaining profile against the remaining runtime. + for k := 0; k <= len(runtimeArgs); k++ { + if CompareExecArgs(profileArgs[1:], runtimeArgs[k:]) { + return true + } + } + return false + } + + if len(runtimeArgs) == 0 { + return false + } + + if head == DynamicIdentifier || head == runtimeArgs[0] { + return CompareExecArgs(profileArgs[1:], runtimeArgs[1:]) + } + return false +} diff --git a/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go b/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go new file mode 100644 index 000000000..9add938b0 --- /dev/null +++ b/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go @@ -0,0 +1,167 @@ +package dynamicpathdetectortests + +import ( + "testing" + + "github.com/kubescape/storage/pkg/registry/file/dynamicpathdetector" +) + +// CompareExecArgs matches a runtime argument vector against a profile +// argument vector that may contain two wildcard tokens: +// +// "⋯" (DynamicIdentifier) — matches exactly ONE argument position. +// "*" (WildcardIdentifier) — matches ZERO OR MORE consecutive args. +// +// Anything else is a literal string match. The match must be exact across +// the full vectors — extra runtime args after the profile is exhausted (and +// no trailing wildcard absorbs them) is a non-match. + +func TestCompareExecArgs_LiteralMatch(t *testing.T) { + cases := []struct { + name string + profile []string + runtime []string + want bool + }{ + {"both empty", nil, nil, true}, + {"empty profile, non-empty runtime", nil, []string{"a"}, false}, + {"non-empty profile, empty runtime", []string{"a"}, nil, false}, + {"single literal match", []string{"--help"}, []string{"--help"}, true}, + {"single literal mismatch", []string{"--help"}, []string{"--version"}, false}, + {"profile longer than runtime", []string{"a", "b"}, []string{"a"}, false}, + {"runtime longer than profile (no wildcard)", []string{"a"}, []string{"a", "b"}, false}, + {"multi-literal match", []string{"-l", "-a", "/tmp"}, []string{"-l", "-a", "/tmp"}, true}, + {"multi-literal mismatch in middle", []string{"-l", "-a", "/tmp"}, []string{"-l", "-z", "/tmp"}, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want { + t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want) + } + }) + } +} + +func TestCompareExecArgs_DynamicIdentifier(t *testing.T) { + cases := []struct { + name string + profile []string + runtime []string + want bool + }{ + {"⋯ matches one arg", []string{"⋯"}, []string{"anything"}, true}, + {"⋯ does NOT match zero args", []string{"⋯"}, nil, false}, + {"⋯ does NOT match two args", []string{"⋯"}, []string{"a", "b"}, false}, + {"⋯ in middle, full vector matches", []string{"--user", "⋯", "--port", "8080"}, []string{"--user", "alice", "--port", "8080"}, true}, + {"⋯ in middle, surrounding literal mismatch", []string{"--user", "⋯", "--port", "8080"}, []string{"--user", "alice", "--port", "9090"}, false}, + {"adjacent ⋯⋯ matches exactly two args", []string{"⋯", "⋯"}, []string{"a", "b"}, true}, + {"adjacent ⋯⋯ rejects one arg", []string{"⋯", "⋯"}, []string{"a"}, false}, + {"adjacent ⋯⋯ rejects three args", []string{"⋯", "⋯"}, []string{"a", "b", "c"}, false}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want { + t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want) + } + }) + } +} + +func TestCompareExecArgs_WildcardIdentifier(t *testing.T) { + cases := []struct { + name string + profile []string + runtime []string + want bool + }{ + {"* matches empty runtime", []string{"*"}, nil, true}, + {"* matches one arg", []string{"*"}, []string{"a"}, true}, + {"* matches many args", []string{"*"}, []string{"a", "b", "c", "d"}, true}, + {"trailing * with prefix match", []string{"-c", "*"}, []string{"-c", "echo hi"}, true}, + {"trailing * absorbs nothing when runtime exact-prefix length", []string{"-c", "*"}, []string{"-c"}, true}, + {"trailing * mismatch in literal prefix", []string{"-c", "*"}, []string{"-x", "echo hi"}, false}, + {"middle * matches and re-anchors on literal", []string{"sh", "*", "exit"}, []string{"sh", "-c", "echo hi", "exit"}, true}, + {"middle * with literal that does not appear", []string{"sh", "*", "exit"}, []string{"sh", "-c", "echo hi"}, false}, + {"middle * matches when zero args between anchors", []string{"sh", "*", "exit"}, []string{"sh", "exit"}, true}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want { + t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want) + } + }) + } +} + +func TestCompareExecArgs_MixedTokens(t *testing.T) { + cases := []struct { + name string + profile []string + runtime []string + want bool + }{ + {"⋯ then * — needs at least one arg before the *", + []string{"⋯", "*"}, []string{"a"}, true}, + {"⋯ then * — empty runtime fails (⋯ needs one)", + []string{"⋯", "*"}, nil, false}, + {"⋯ then * — many args ok", + []string{"⋯", "*"}, []string{"a", "b", "c"}, true}, + {"* then ⋯ — needs at least one arg for ⋯", + []string{"*", "⋯"}, []string{"x"}, true}, + {"* then ⋯ — empty runtime fails", + []string{"*", "⋯"}, nil, false}, + {"literal, ⋯, * — typical user pattern", + []string{"--user", "⋯", "*"}, []string{"--user", "alice", "--verbose", "--out", "/tmp"}, true}, + {"literal, ⋯, * — runtime too short for ⋯", + []string{"--user", "⋯", "*"}, []string{"--user"}, false}, + {"only ⋯, runtime empty — fails (⋯ requires exactly one)", + []string{"⋯"}, []string{}, false}, + {"only *, runtime empty — passes", + []string{"*"}, []string{}, true}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want { + t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want) + } + }) + } +} + +func TestCompareExecArgs_RealisticPatterns(t *testing.T) { + cases := []struct { + name string + profile []string + runtime []string + want bool + }{ + {"curl with any URL", []string{"-s", "⋯"}, []string{"-s", "https://example.com"}, true}, + {"sh -c with any command", + []string{"-c", "*"}, + []string{"-c", "while true; do sleep 1; done"}, + true, + }, + {"echo with any number of words", + []string{"hello", "*"}, + []string{"hello", "world", "from", "test"}, + true, + }, + {"ls -l in arbitrary directory", + []string{"-l", "⋯"}, + []string{"-l", "/var/log"}, + true, + }, + {"ls without args fails wildcard arg pattern", + []string{"-l", "⋯"}, + []string{"-l"}, + false, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want { + t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want) + } + }) + } +} From b0d68d3d15841a667696be4747a9e3af89fdbdde Mon Sep 17 00:00:00 2001 From: Entlein Date: Mon, 4 May 2026 11:38:01 +0200 Subject: [PATCH 2/3] fix(matcher): empty profile Args = wildcard match (no argv constraint) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit (node-agent PR #38). Path-only Execs entries in user-defined ApplicationProfiles (the common case) carry a nil/empty Args slice. The strict empty-vs-empty semantics caused R0040 to fire on every legit exec of those paths because was_executed_with_args returned false even when the profile had explicitly allowed the path. Split CompareExecArgs into a thin outer wrapper (empty profile → match) and a strict inner matcher (matchExecArgs, retains the original empty- empty anchor). This preserves the recursive backtracking's anchoring contract — profile fully consumed in the middle of recursion still fails when runtime has args left — while letting the public API treat empty input as 'no constraint'. Test pin: empty profile + multi-arg runtime returns true; both-empty still true (degenerate); non-empty profile + empty runtime stays false. Existing 30+ subtests for literal/dynamic/wildcard/mixed all pass. --- .../dynamicpathdetector/compare_exec_args.go | 27 +++++++++++++++++-- .../tests/compare_exec_args_test.go | 9 ++++++- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/pkg/registry/file/dynamicpathdetector/compare_exec_args.go b/pkg/registry/file/dynamicpathdetector/compare_exec_args.go index 3bff8a79d..38c20abc4 100644 --- a/pkg/registry/file/dynamicpathdetector/compare_exec_args.go +++ b/pkg/registry/file/dynamicpathdetector/compare_exec_args.go @@ -12,11 +12,34 @@ package dynamicpathdetector // either by a literal, a DynamicIdentifier, or absorbed into a // WildcardIdentifier run. // +// Empty profileArgs is treated as "no argv constraint" — i.e. matches any +// runtime arg vector. This keeps path-only Execs entries (the common case +// in user-defined ApplicationProfiles, which omit the Args field) from +// silently triggering R0040 just because the rule started consulting +// was_executed_with_args. A user that wants to assert "this exec must have +// no args" can write Args: []string{} in their profile and the empty +// runtime vector still matches by virtue of the wildcard semantics. +// // Implementation is recursive backtracking. Argument vectors in real // profiles are short (typically ≤ a dozen entries) and contain at most a // handful of wildcards, so the worst case stays well below the cost of a // regex compile. func CompareExecArgs(profileArgs, runtimeArgs []string) bool { + // Outer-level empty profile = "no argv constraint" — wildcard match. + // The inner matcher keeps strict empty-empty semantics so anchoring + // during recursion (`profile fully consumed but runtime has more`) + // remains a mismatch. + if len(profileArgs) == 0 { + return true + } + return matchExecArgs(profileArgs, runtimeArgs) +} + +// matchExecArgs is the strict recursive matcher. Both sides must consume +// fully for a match; this is what gives the function its anchored shape. +// Callers that want "no argv constraint" semantics on empty profile go +// through CompareExecArgs, which short-circuits before this is reached. +func matchExecArgs(profileArgs, runtimeArgs []string) bool { if len(profileArgs) == 0 { return len(runtimeArgs) == 0 } @@ -27,7 +50,7 @@ func CompareExecArgs(profileArgs, runtimeArgs []string) bool { // Try absorbing 0..len(runtimeArgs) of the runtime into this *, // then match the remaining profile against the remaining runtime. for k := 0; k <= len(runtimeArgs); k++ { - if CompareExecArgs(profileArgs[1:], runtimeArgs[k:]) { + if matchExecArgs(profileArgs[1:], runtimeArgs[k:]) { return true } } @@ -39,7 +62,7 @@ func CompareExecArgs(profileArgs, runtimeArgs []string) bool { } if head == DynamicIdentifier || head == runtimeArgs[0] { - return CompareExecArgs(profileArgs[1:], runtimeArgs[1:]) + return matchExecArgs(profileArgs[1:], runtimeArgs[1:]) } return false } diff --git a/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go b/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go index 9add938b0..774224f5b 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go @@ -23,8 +23,15 @@ func TestCompareExecArgs_LiteralMatch(t *testing.T) { runtime []string want bool }{ + // Empty profileArgs = "no argv constraint" — matches any runtime. + // Pinned this way so path-only Execs entries in user-defined + // ApplicationProfiles don't silently trigger R0040 when the rule + // consults was_executed_with_args. See storage/node-agent issue + // where Test_28 (and others using path-only entries) failed because + // the strict empty-empty match was firing R0040 on every legit exec. {"both empty", nil, nil, true}, - {"empty profile, non-empty runtime", nil, []string{"a"}, false}, + {"empty profile, non-empty runtime", nil, []string{"a"}, true}, + {"empty profile, multi-arg runtime", nil, []string{"a", "b", "c"}, true}, {"non-empty profile, empty runtime", []string{"a"}, nil, false}, {"single literal match", []string{"--help"}, []string{"--help"}, true}, {"single literal mismatch", []string{"--help"}, []string{"--version"}, false}, From 9f087406fb39bb9e829c3c2dee0e364eb669c298 Mon Sep 17 00:00:00 2001 From: Entlein Date: Wed, 6 May 2026 19:58:02 +0200 Subject: [PATCH 3/3] fix(matcher): memoise CompareExecArgs to bound worst-case to O(P*R) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeRabbit Major finding on storage PR #27 (compare_exec_args.go:56). Naïve recursive backtracking on '[*, *, *, …, literal]' patterns re-explores every prefix split before the trailing-literal mismatch surfaces, degrading to exponential time on adversarial inputs. This is the canonical ReDoS-style hazard for wildcard matchers. Switch to index-based recursion with memoisation on (profileIndex, runtimeIndex) state pairs. Both indices only shrink during recursion so the reachable state space is bounded by (len(profile)+1) * (len(runtime)+1), giving O(P*R) total work. Behaviour unchanged: all 30+ existing subtests across literal / DynamicIdentifier / WildcardIdentifier / mixed / realistic groups still pass. Outer empty-profile-as-wildcard short-circuit retained. Add TestCompareExecArgs_ReDoSResistance: 20 leading wildcards + a non-matching trailing literal vs a 50-element runtime. The naïve matcher would take seconds (~2^20 path splits); the memoised matcher completes in microseconds. Test asserts <100ms — generous budget that catches any regression to the unmemoised form. --- .../dynamicpathdetector/compare_exec_args.go | 83 ++++++++++++------- .../tests/compare_exec_args_test.go | 43 ++++++++++ 2 files changed, 98 insertions(+), 28 deletions(-) diff --git a/pkg/registry/file/dynamicpathdetector/compare_exec_args.go b/pkg/registry/file/dynamicpathdetector/compare_exec_args.go index 38c20abc4..31550a467 100644 --- a/pkg/registry/file/dynamicpathdetector/compare_exec_args.go +++ b/pkg/registry/file/dynamicpathdetector/compare_exec_args.go @@ -20,10 +20,15 @@ package dynamicpathdetector // no args" can write Args: []string{} in their profile and the empty // runtime vector still matches by virtue of the wildcard semantics. // -// Implementation is recursive backtracking. Argument vectors in real -// profiles are short (typically ≤ a dozen entries) and contain at most a -// handful of wildcards, so the worst case stays well below the cost of a -// regex compile. +// Implementation is index-based recursive backtracking with memoisation +// on (profileIndex, runtimeIndex) state pairs. The naive backtracking +// form would degrade to exponential time on adversarial inputs like +// `[*, *, *, …, x]` against a long literal vector — every prefix `*` +// has multiple split choices and the suffix mismatch only surfaces +// at the very end, so each path gets re-explored. Memoisation bounds +// the work at O(len(profile) * len(runtime)) — i.e. quadratic in the +// vector lengths, the standard wildcard-match complexity. CodeRabbit +// flagged this as a Major on PR #27. func CompareExecArgs(profileArgs, runtimeArgs []string) bool { // Outer-level empty profile = "no argv constraint" — wildcard match. // The inner matcher keeps strict empty-empty semantics so anchoring @@ -32,37 +37,59 @@ func CompareExecArgs(profileArgs, runtimeArgs []string) bool { if len(profileArgs) == 0 { return true } - return matchExecArgs(profileArgs, runtimeArgs) -} -// matchExecArgs is the strict recursive matcher. Both sides must consume -// fully for a match; this is what gives the function its anchored shape. -// Callers that want "no argv constraint" semantics on empty profile go -// through CompareExecArgs, which short-circuits before this is reached. -func matchExecArgs(profileArgs, runtimeArgs []string) bool { - if len(profileArgs) == 0 { - return len(runtimeArgs) == 0 - } + // State key for memoisation: (pi, ri) is the suffix-matching position + // in profile and runtime vectors respectively. Because both sides only + // shrink (we never re-enter a prefix), there are at most + // (len(profile)+1) * (len(runtime)+1) reachable states. + type state struct{ pi, ri int } + memo := make(map[state]bool, (len(profileArgs)+1)*(len(runtimeArgs)+1)) + seen := make(map[state]bool, (len(profileArgs)+1)*(len(runtimeArgs)+1)) + + var match func(pi, ri int) bool + match = func(pi, ri int) bool { + s := state{pi: pi, ri: ri} + if seen[s] { + return memo[s] + } + seen[s] = true + + // Profile fully consumed → runtime must also be fully consumed + // (anchored match). + if pi == len(profileArgs) { + memo[s] = ri == len(runtimeArgs) + return memo[s] + } - head := profileArgs[0] + head := profileArgs[pi] - if head == WildcardIdentifier { - // Try absorbing 0..len(runtimeArgs) of the runtime into this *, - // then match the remaining profile against the remaining runtime. - for k := 0; k <= len(runtimeArgs); k++ { - if matchExecArgs(profileArgs[1:], runtimeArgs[k:]) { - return true + if head == WildcardIdentifier { + // Try absorbing 0..(remaining runtime) into this *, + // then match the rest. First successful split wins. + for k := ri; k <= len(runtimeArgs); k++ { + if match(pi+1, k) { + memo[s] = true + return true + } } + memo[s] = false + return false + } + + // Non-wildcard head needs a runtime argument to consume. + if ri == len(runtimeArgs) { + memo[s] = false + return false } - return false - } - if len(runtimeArgs) == 0 { + if head == DynamicIdentifier || head == runtimeArgs[ri] { + memo[s] = match(pi+1, ri+1) + return memo[s] + } + + memo[s] = false return false } - if head == DynamicIdentifier || head == runtimeArgs[0] { - return matchExecArgs(profileArgs[1:], runtimeArgs[1:]) - } - return false + return match(0, 0) } diff --git a/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go b/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go index 774224f5b..82ab0da4a 100644 --- a/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go +++ b/pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go @@ -2,6 +2,7 @@ package dynamicpathdetectortests import ( "testing" + "time" "github.com/kubescape/storage/pkg/registry/file/dynamicpathdetector" ) @@ -172,3 +173,45 @@ func TestCompareExecArgs_RealisticPatterns(t *testing.T) { }) } } + +// TestCompareExecArgs_ReDoSResistance pins that the matcher handles +// adversarial wildcard-heavy inputs in bounded time. The classic +// catastrophic-backtracking case is `[*, *, *, …, "literal"]` vs a +// long literal-runtime vector that mismatches the trailing literal +// — every prefix * has multiple split choices and the suffix +// mismatch only surfaces at the very end, so each path gets +// re-explored. With memoisation this is O(P*R); without it, naïve +// recursion would be exponential. +// +// CodeRabbit flagged the unmemoised version on PR #27 (Major). +func TestCompareExecArgs_ReDoSResistance(t *testing.T) { + // 20 leading wildcards + a literal that won't match. Without + // memoisation, the naïve matcher tries roughly 2^20 path splits + // before failing — observable as a many-second test. The + // memoised version completes in microseconds. + profile := make([]string, 0, 21) + for i := 0; i < 20; i++ { + profile = append(profile, dynamicpathdetector.WildcardIdentifier) + } + profile = append(profile, "needle-that-does-not-exist") + + runtime := make([]string, 0, 50) + for i := 0; i < 50; i++ { + runtime = append(runtime, "a") + } + + start := time.Now() + got := dynamicpathdetector.CompareExecArgs(profile, runtime) + elapsed := time.Since(start) + + if got { + t.Errorf("expected mismatch for trailing-literal that isn't in runtime") + } + // Memoised matcher: 21 * 51 = ~1100 states, each O(R) work for + // the wildcard split → total bound ~50K ops. Generous budget of + // 100ms catches any regression to the unmemoised form (which + // would be measured in seconds, not milliseconds, on this input). + if elapsed > 100*time.Millisecond { + t.Errorf("matcher took %v on adversarial input — memoisation regression?", elapsed) + } +}