Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 95 additions & 0 deletions pkg/registry/file/dynamicpathdetector/compare_exec_args.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
package dynamicpathdetector

// CompareExecArgs reports whether a runtime exec argument vector matches a
// profile argument vector. The profile vector may contain two wildcard
// tokens:
//
// DynamicIdentifier ("⋯") — matches exactly one argument position.
// WildcardIdentifier ("*") — matches zero or more consecutive arguments.
//
// Anything else is a literal-equality match. The match is anchored at both
// ends: every runtime argument must be consumed by the profile vector,
// either by a literal, a DynamicIdentifier, or absorbed into a
// WildcardIdentifier run.
//
// Empty profileArgs is treated as "no argv constraint" — i.e. matches any
// runtime arg vector. This keeps path-only Execs entries (the common case
// in user-defined ApplicationProfiles, which omit the Args field) from
// silently triggering R0040 just because the rule started consulting
// was_executed_with_args. A user that wants to assert "this exec must have
// no args" can write Args: []string{} in their profile and the empty
// runtime vector still matches by virtue of the wildcard semantics.
//
// Implementation is index-based recursive backtracking with memoisation
// on (profileIndex, runtimeIndex) state pairs. The naive backtracking
// form would degrade to exponential time on adversarial inputs like
// `[*, *, *, …, x]` against a long literal vector — every prefix `*`
// has multiple split choices and the suffix mismatch only surfaces
// at the very end, so each path gets re-explored. Memoisation bounds
// the work at O(len(profile) * len(runtime)) — i.e. quadratic in the
// vector lengths, the standard wildcard-match complexity. CodeRabbit
// flagged this as a Major on PR #27.
func CompareExecArgs(profileArgs, runtimeArgs []string) bool {
// Outer-level empty profile = "no argv constraint" — wildcard match.
// The inner matcher keeps strict empty-empty semantics so anchoring
// during recursion (`profile fully consumed but runtime has more`)
// remains a mismatch.
if len(profileArgs) == 0 {
return true
}

// State key for memoisation: (pi, ri) is the suffix-matching position
// in profile and runtime vectors respectively. Because both sides only
// shrink (we never re-enter a prefix), there are at most
// (len(profile)+1) * (len(runtime)+1) reachable states.
type state struct{ pi, ri int }
memo := make(map[state]bool, (len(profileArgs)+1)*(len(runtimeArgs)+1))
seen := make(map[state]bool, (len(profileArgs)+1)*(len(runtimeArgs)+1))

var match func(pi, ri int) bool
match = func(pi, ri int) bool {
s := state{pi: pi, ri: ri}
if seen[s] {
return memo[s]
}
seen[s] = true

// Profile fully consumed → runtime must also be fully consumed
// (anchored match).
if pi == len(profileArgs) {
memo[s] = ri == len(runtimeArgs)
return memo[s]
}

head := profileArgs[pi]

if head == WildcardIdentifier {
// Try absorbing 0..(remaining runtime) into this *,
// then match the rest. First successful split wins.
for k := ri; k <= len(runtimeArgs); k++ {
if match(pi+1, k) {
memo[s] = true
return true
}
}
memo[s] = false
return false
}

// Non-wildcard head needs a runtime argument to consume.
if ri == len(runtimeArgs) {
memo[s] = false
return false
}

if head == DynamicIdentifier || head == runtimeArgs[ri] {
memo[s] = match(pi+1, ri+1)
return memo[s]
}

memo[s] = false
return false
}

return match(0, 0)
}
217 changes: 217 additions & 0 deletions pkg/registry/file/dynamicpathdetector/tests/compare_exec_args_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
package dynamicpathdetectortests

import (
"testing"
"time"

"github.com/kubescape/storage/pkg/registry/file/dynamicpathdetector"
)

// CompareExecArgs matches a runtime argument vector against a profile
// argument vector that may contain two wildcard tokens:
//
// "⋯" (DynamicIdentifier) — matches exactly ONE argument position.
// "*" (WildcardIdentifier) — matches ZERO OR MORE consecutive args.
//
// Anything else is a literal string match. The match must be exact across
// the full vectors — extra runtime args after the profile is exhausted (and
// no trailing wildcard absorbs them) is a non-match.

func TestCompareExecArgs_LiteralMatch(t *testing.T) {
cases := []struct {
name string
profile []string
runtime []string
want bool
}{
// Empty profileArgs = "no argv constraint" — matches any runtime.
// Pinned this way so path-only Execs entries in user-defined
// ApplicationProfiles don't silently trigger R0040 when the rule
// consults was_executed_with_args. See storage/node-agent issue
// where Test_28 (and others using path-only entries) failed because
// the strict empty-empty match was firing R0040 on every legit exec.
{"both empty", nil, nil, true},
{"empty profile, non-empty runtime", nil, []string{"a"}, true},
{"empty profile, multi-arg runtime", nil, []string{"a", "b", "c"}, true},
{"non-empty profile, empty runtime", []string{"a"}, nil, false},
{"single literal match", []string{"--help"}, []string{"--help"}, true},
{"single literal mismatch", []string{"--help"}, []string{"--version"}, false},
{"profile longer than runtime", []string{"a", "b"}, []string{"a"}, false},
{"runtime longer than profile (no wildcard)", []string{"a"}, []string{"a", "b"}, false},
{"multi-literal match", []string{"-l", "-a", "/tmp"}, []string{"-l", "-a", "/tmp"}, true},
{"multi-literal mismatch in middle", []string{"-l", "-a", "/tmp"}, []string{"-l", "-z", "/tmp"}, false},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want {
t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want)
}
})
}
}

func TestCompareExecArgs_DynamicIdentifier(t *testing.T) {
cases := []struct {
name string
profile []string
runtime []string
want bool
}{
{"⋯ matches one arg", []string{"⋯"}, []string{"anything"}, true},
{"⋯ does NOT match zero args", []string{"⋯"}, nil, false},
{"⋯ does NOT match two args", []string{"⋯"}, []string{"a", "b"}, false},
{"⋯ in middle, full vector matches", []string{"--user", "⋯", "--port", "8080"}, []string{"--user", "alice", "--port", "8080"}, true},
{"⋯ in middle, surrounding literal mismatch", []string{"--user", "⋯", "--port", "8080"}, []string{"--user", "alice", "--port", "9090"}, false},
{"adjacent ⋯⋯ matches exactly two args", []string{"⋯", "⋯"}, []string{"a", "b"}, true},
{"adjacent ⋯⋯ rejects one arg", []string{"⋯", "⋯"}, []string{"a"}, false},
{"adjacent ⋯⋯ rejects three args", []string{"⋯", "⋯"}, []string{"a", "b", "c"}, false},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want {
t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want)
}
})
}
}

func TestCompareExecArgs_WildcardIdentifier(t *testing.T) {
cases := []struct {
name string
profile []string
runtime []string
want bool
}{
{"* matches empty runtime", []string{"*"}, nil, true},
{"* matches one arg", []string{"*"}, []string{"a"}, true},
{"* matches many args", []string{"*"}, []string{"a", "b", "c", "d"}, true},
{"trailing * with prefix match", []string{"-c", "*"}, []string{"-c", "echo hi"}, true},
{"trailing * absorbs nothing when runtime exact-prefix length", []string{"-c", "*"}, []string{"-c"}, true},
{"trailing * mismatch in literal prefix", []string{"-c", "*"}, []string{"-x", "echo hi"}, false},
{"middle * matches and re-anchors on literal", []string{"sh", "*", "exit"}, []string{"sh", "-c", "echo hi", "exit"}, true},
{"middle * with literal that does not appear", []string{"sh", "*", "exit"}, []string{"sh", "-c", "echo hi"}, false},
{"middle * matches when zero args between anchors", []string{"sh", "*", "exit"}, []string{"sh", "exit"}, true},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want {
t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want)
}
})
}
}

func TestCompareExecArgs_MixedTokens(t *testing.T) {
cases := []struct {
name string
profile []string
runtime []string
want bool
}{
{"⋯ then * — needs at least one arg before the *",
[]string{"⋯", "*"}, []string{"a"}, true},
{"⋯ then * — empty runtime fails (⋯ needs one)",
[]string{"⋯", "*"}, nil, false},
{"⋯ then * — many args ok",
[]string{"⋯", "*"}, []string{"a", "b", "c"}, true},
{"* then ⋯ — needs at least one arg for ⋯",
[]string{"*", "⋯"}, []string{"x"}, true},
{"* then ⋯ — empty runtime fails",
[]string{"*", "⋯"}, nil, false},
{"literal, ⋯, * — typical user pattern",
[]string{"--user", "⋯", "*"}, []string{"--user", "alice", "--verbose", "--out", "/tmp"}, true},
{"literal, ⋯, * — runtime too short for ⋯",
[]string{"--user", "⋯", "*"}, []string{"--user"}, false},
{"only ⋯, runtime empty — fails (⋯ requires exactly one)",
[]string{"⋯"}, []string{}, false},
{"only *, runtime empty — passes",
[]string{"*"}, []string{}, true},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want {
t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want)
}
})
}
}

func TestCompareExecArgs_RealisticPatterns(t *testing.T) {
cases := []struct {
name string
profile []string
runtime []string
want bool
}{
{"curl with any URL", []string{"-s", "⋯"}, []string{"-s", "https://example.com"}, true},
{"sh -c with any command",
[]string{"-c", "*"},
[]string{"-c", "while true; do sleep 1; done"},
true,
},
{"echo with any number of words",
[]string{"hello", "*"},
[]string{"hello", "world", "from", "test"},
true,
},
{"ls -l in arbitrary directory",
[]string{"-l", "⋯"},
[]string{"-l", "/var/log"},
true,
},
{"ls without args fails wildcard arg pattern",
[]string{"-l", "⋯"},
[]string{"-l"},
false,
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
if got := dynamicpathdetector.CompareExecArgs(tc.profile, tc.runtime); got != tc.want {
t.Errorf("CompareExecArgs(%v, %v) = %v, want %v", tc.profile, tc.runtime, got, tc.want)
}
})
}
}

// TestCompareExecArgs_ReDoSResistance pins that the matcher handles
// adversarial wildcard-heavy inputs in bounded time. The classic
// catastrophic-backtracking case is `[*, *, *, …, "literal"]` vs a
// long literal-runtime vector that mismatches the trailing literal
// — every prefix * has multiple split choices and the suffix
// mismatch only surfaces at the very end, so each path gets
// re-explored. With memoisation this is O(P*R); without it, naïve
// recursion would be exponential.
//
// CodeRabbit flagged the unmemoised version on PR #27 (Major).
func TestCompareExecArgs_ReDoSResistance(t *testing.T) {
// 20 leading wildcards + a literal that won't match. Without
// memoisation, the naïve matcher tries roughly 2^20 path splits
// before failing — observable as a many-second test. The
// memoised version completes in microseconds.
profile := make([]string, 0, 21)
for i := 0; i < 20; i++ {
profile = append(profile, dynamicpathdetector.WildcardIdentifier)
}
profile = append(profile, "needle-that-does-not-exist")

runtime := make([]string, 0, 50)
for i := 0; i < 50; i++ {
runtime = append(runtime, "a")
}

start := time.Now()
got := dynamicpathdetector.CompareExecArgs(profile, runtime)
elapsed := time.Since(start)

if got {
t.Errorf("expected mismatch for trailing-literal that isn't in runtime")
}
// Memoised matcher: 21 * 51 = ~1100 states, each O(R) work for
// the wildcard split → total bound ~50K ops. Generous budget of
// 100ms catches any regression to the unmemoised form (which
// would be measured in seconds, not milliseconds, on this input).
if elapsed > 100*time.Millisecond {
t.Errorf("matcher took %v on adversarial input — memoisation regression?", elapsed)
}
}
Loading