diff --git a/docs/problems/security-threat-model.md b/docs/problems/security-threat-model.md index 30ff7c5be..bb52fb739 100644 --- a/docs/problems/security-threat-model.md +++ b/docs/problems/security-threat-model.md @@ -421,6 +421,42 @@ This is distinct from hallucination (generating incorrect facts). Self-report un - Review agents that assess code quality or security properties produce natural-language assessments that are, structurally, self-reports. If a review agent claims "no security issues found," the harness can validate the format of the output but cannot independently verify the claim's truth. This is where property-based testing, coverage metrics, and static analysis serve as independent verification — they check the code directly rather than trusting the reviewer's summary. +## Cross-cutting concern: audit log integrity + +The security architecture depends on auditability (principle 4 below): every agent action is logged, attributable, and reviewable. But the audit log itself is a security-critical artifact. If an attacker (or a compromised agent) can modify the log after the fact, auditability is an illusion. This concern compounds with [agent self-report unreliability](#cross-cutting-concern-agent-self-report-unreliability): if an agent can both misrepresent its own actions and tamper with the log that would catch the misrepresentation, the combination defeats both controls. + +### The problem + +The current audit trail (`findings.jsonl` and related trace files) is append-only by convention, not by enforcement. A process with write access to the log directory can: + +- Delete entries that record suspicious activity +- Modify findings to downgrade severity (e.g., change `critical` to `low`) +- Insert fabricated entries to create a false trail +- Truncate the log to remove evidence of a compromise window + +In the sandbox model ([ADR 0017](../ADRs/0017-credential-isolation-for-sandboxed-agents.md)), the agent has write access to the workspace, which includes the audit log. The sandbox is the security boundary for credentials, but the audit log sits inside that boundary. A compromised agent that cannot exfiltrate credentials can still cover its tracks by tampering with the log. + +This matters most for forensics. When investigating a security incident, the first question is "what did the agent actually do?" If the log cannot be trusted, the investigation starts from zero. + +### Defense considerations + +- **Cryptographic hash chaining.** Each log entry includes a hash of the previous entry, creating a tamper-evident chain. Modifying or deleting any entry breaks the chain from that point forward. Verification is O(n) and requires no external infrastructure. This does not prevent tampering, but it makes tampering detectable. **Limitation:** hash chaining does not detect tail truncation (removing the last N entries). The remaining chain is internally consistent; it just ends earlier. Detecting tail truncation requires an external record of the expected chain length or latest hash. +- **Write-once external sink.** Stream audit events to an append-only external store (object storage with retention policies, a log aggregation service, or a separate write-only endpoint) that the sandbox cannot modify. This prevents tampering entirely but adds infrastructure complexity and a network dependency. +- **Post-run verification.** After each agent run, a harness-level step (outside the sandbox) verifies the audit log's integrity before the run is considered complete. If verification fails, the run is flagged for investigation regardless of whether the agent's output otherwise looks clean. +- **Signed entries.** Each audit entry is signed with a key the agent does not control (e.g., the harness signs entries before writing, or the trace system uses a key injected at sandbox creation and revoked at sandbox teardown). This is stronger than hash chaining but more complex to implement. + +### Relationship to commit signing + +Issue [#1685](https://github.com/fullsend-ai/fullsend/issues/1685) explores using gitsign for agent-generated commits. Audit log integrity is a prerequisite concern: if the log of what the agent did during the run cannot be trusted, signing the resulting commit provides provenance for the output but not accountability for the process. Both are needed, and hash-chained audit logs are a simpler first step that does not require external signing infrastructure. + +### Open questions + +- Should hash chaining use a seed derived from the run's trace ID, or a global chain that spans runs? Per-run chains are simpler but cannot detect deletion of entire runs. +- Is hash chaining sufficient, or does the threat model require external write-once storage? +- Should the harness verify log integrity synchronously (blocking the run) or asynchronously (flagging for later review)? +- How do we handle legitimate log rotation without breaking the chain? +- What is the right granularity for hashing: individual findings, batches, or the entire log? + ## Cross-cutting security principles 1. **Defense in depth** — no single control should be the only thing preventing an attack diff --git a/internal/cli/run.go b/internal/cli/run.go index 321430f26..daebc0cf6 100644 --- a/internal/cli/run.go +++ b/internal/cli/run.go @@ -978,6 +978,19 @@ func runAgent(ctx context.Context, agentName, fullsendDir, outputBase, targetRep printer.StepDone("Security findings extracted") } } + + findingsJSONL := filepath.Join(runDir, "security", "findings.jsonl") + if _, statErr := os.Stat(findingsJSONL); statErr == nil { + cv, verifyErr := security.VerifyChain(findingsJSONL) + if verifyErr != nil { + printer.StepWarn("Audit log verification error: " + verifyErr.Error()) + } else if !cv.Valid { + printer.StepFail(fmt.Sprintf("Audit log integrity check FAILED: %s", cv.BrokenMsg)) + return fmt.Errorf("audit log integrity check failed: %s", cv.BrokenMsg) + } else if cv.Entries > 0 { + printer.StepDone(fmt.Sprintf("Audit log integrity verified (%d entries)", cv.Entries)) + } + } } // 10. Print results. diff --git a/internal/security/trace.go b/internal/security/trace.go index 9fb37b7ce..fb13e6852 100644 --- a/internal/security/trace.go +++ b/internal/security/trace.go @@ -1,7 +1,9 @@ package security import ( + "bufio" "crypto/rand" + "crypto/sha256" "encoding/json" "fmt" "os" @@ -29,17 +31,75 @@ func IsValidTraceID(id string) bool { return reTraceID.MatchString(id) } +// seedHash is the well-known genesis hash for the first entry in a chain. +const seedHash = "0000000000000000000000000000000000000000000000000000000000000000" + // TracedFinding is a Finding enriched with trace and phase metadata for the -// JSONL audit log. +// JSONL audit log. PrevHash and Hash form a SHA-256 chain: each entry's Hash +// covers PrevHash and the rest of the entry, making tampering detectable. type TracedFinding struct { TraceID string `json:"trace_id"` Timestamp string `json:"timestamp"` Phase string `json:"phase"` // "host_input", "sandbox_context", "hook_pretool", "hook_posttool", "host_output" + PrevHash string `json:"prev_hash"` + Hash string `json:"hash"` Finding } +// computeHash returns the hex-encoded SHA-256 of prevHash concatenated with +// the JSON-encoded finding payload (all fields except prev_hash and hash). +func computeHash(prevHash string, tf TracedFinding) string { + payload := struct { + TraceID string `json:"trace_id"` + Timestamp string `json:"timestamp"` + Phase string `json:"phase"` + Finding + }{ + TraceID: tf.TraceID, + Timestamp: tf.Timestamp, + Phase: tf.Phase, + Finding: tf.Finding, + } + data, _ := json.Marshal(payload) + sum := sha256.Sum256(append([]byte(prevHash), data...)) + return fmt.Sprintf("%x", sum) +} + +// lastHash reads the final line of the JSONL file and extracts the hash field. +// Returns seedHash if the file does not exist or is empty. +func lastHash(path string) string { + f, err := os.Open(path) + if err != nil { + return seedHash + } + defer f.Close() + + var last string + scanner := bufio.NewScanner(f) + for scanner.Scan() { + last = scanner.Text() + } + if last == "" { + return seedHash + } + + var entry struct { + Hash string `json:"hash"` + } + if err := json.Unmarshal([]byte(last), &entry); err != nil || entry.Hash == "" { + return seedHash + } + return entry.Hash +} + // AppendFinding writes a traced finding as a JSON line to the given file path. +// It computes a SHA-256 hash chain: each entry's hash covers the previous +// entry's hash and the current entry's payload, making the log tamper-evident. func AppendFinding(path string, tf TracedFinding) error { + prev := lastHash(path) + tf.PrevHash = prev + tf.Hash = computeHash(prev, tf) + f, err := os.OpenFile(path, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o600) if err != nil { return fmt.Errorf("opening findings file: %w", err) @@ -55,3 +115,77 @@ func AppendFinding(path string, tf TracedFinding) error { } return nil } + +// ChainVerification holds the result of verifying a findings JSONL file. +type ChainVerification struct { + Valid bool + Entries int + BrokenAt int // 0-indexed; -1 if valid + BrokenMsg string // empty if valid +} + +// VerifyChain reads a findings JSONL file and verifies the hash chain +// integrity. Returns a ChainVerification indicating whether the chain is +// intact. Entries without hash fields (from older versions) are skipped. +func VerifyChain(path string) (ChainVerification, error) { + f, err := os.Open(path) + if err != nil { + return ChainVerification{}, fmt.Errorf("opening findings file: %w", err) + } + defer f.Close() + + scanner := bufio.NewScanner(f) + prev := seedHash + idx := 0 + + for scanner.Scan() { + line := scanner.Text() + if line == "" { + continue + } + + var tf TracedFinding + if err := json.Unmarshal([]byte(line), &tf); err != nil { + return ChainVerification{ + Valid: false, + Entries: idx, + BrokenAt: idx, + BrokenMsg: fmt.Sprintf("entry %d: invalid JSON: %v", idx, err), + }, nil + } + + // Skip entries from before hash chaining was added. + if tf.Hash == "" && tf.PrevHash == "" { + idx++ + continue + } + + if tf.PrevHash != prev { + return ChainVerification{ + Valid: false, + Entries: idx, + BrokenAt: idx, + BrokenMsg: fmt.Sprintf("entry %d: prev_hash mismatch: expected %s, got %s", idx, prev, tf.PrevHash), + }, nil + } + + expected := computeHash(prev, tf) + if tf.Hash != expected { + return ChainVerification{ + Valid: false, + Entries: idx, + BrokenAt: idx, + BrokenMsg: fmt.Sprintf("entry %d: hash mismatch: expected %s, got %s", idx, expected, tf.Hash), + }, nil + } + + prev = tf.Hash + idx++ + } + + if err := scanner.Err(); err != nil { + return ChainVerification{}, fmt.Errorf("reading findings file: %w", err) + } + + return ChainVerification{Valid: true, Entries: idx, BrokenAt: -1}, nil +} diff --git a/internal/security/trace_test.go b/internal/security/trace_test.go new file mode 100644 index 000000000..089cca9fa --- /dev/null +++ b/internal/security/trace_test.go @@ -0,0 +1,221 @@ +package security + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" +) + +func TestGenerateTraceID(t *testing.T) { + id := GenerateTraceID() + if !IsValidTraceID(id) { + t.Errorf("generated trace ID %q is not valid", id) + } +} + +func TestAppendFindingHashChain(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "findings.jsonl") + + tf1 := TracedFinding{ + TraceID: "00000000-0000-4000-8000-000000000001", + Timestamp: "2026-06-08T00:00:00Z", + Phase: "host_input", + Finding: Finding{ + Scanner: "test", + Name: "test-finding-1", + Severity: "high", + Detail: "first entry", + Position: -1, + }, + } + + if err := AppendFinding(path, tf1); err != nil { + t.Fatalf("AppendFinding 1: %v", err) + } + + tf2 := TracedFinding{ + TraceID: "00000000-0000-4000-8000-000000000001", + Timestamp: "2026-06-08T00:00:01Z", + Phase: "hook_pretool", + Finding: Finding{ + Scanner: "test", + Name: "test-finding-2", + Severity: "critical", + Detail: "second entry", + Position: 42, + }, + } + + if err := AppendFinding(path, tf2); err != nil { + t.Fatalf("AppendFinding 2: %v", err) + } + + // Verify the chain is intact. + result, err := VerifyChain(path) + if err != nil { + t.Fatalf("VerifyChain: %v", err) + } + if !result.Valid { + t.Errorf("chain should be valid, got broken at %d: %s", result.BrokenAt, result.BrokenMsg) + } + if result.Entries != 2 { + t.Errorf("expected 2 entries, got %d", result.Entries) + } + + // Read back and verify first entry has seedHash as prev_hash. + data, _ := os.ReadFile(path) + lines := splitLines(data) + var first TracedFinding + if err := json.Unmarshal([]byte(lines[0]), &first); err != nil { + t.Fatalf("unmarshal first: %v", err) + } + if first.PrevHash != seedHash { + t.Errorf("first entry prev_hash should be seed, got %s", first.PrevHash) + } + if first.Hash == "" { + t.Error("first entry hash should not be empty") + } + + // Verify second entry's prev_hash matches first entry's hash. + var second TracedFinding + if err := json.Unmarshal([]byte(lines[1]), &second); err != nil { + t.Fatalf("unmarshal second: %v", err) + } + if second.PrevHash != first.Hash { + t.Errorf("second prev_hash %s != first hash %s", second.PrevHash, first.Hash) + } +} + +func TestVerifyChainDetectsTampering(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "findings.jsonl") + + for i := 0; i < 3; i++ { + tf := TracedFinding{ + TraceID: "00000000-0000-4000-8000-000000000001", + Timestamp: "2026-06-08T00:00:00Z", + Phase: "host_input", + Finding: Finding{ + Scanner: "test", + Name: "finding", + Severity: "medium", + Detail: "entry", + Position: -1, + }, + } + if err := AppendFinding(path, tf); err != nil { + t.Fatalf("AppendFinding %d: %v", i, err) + } + } + + // Tamper: modify the second line's detail field. + data, _ := os.ReadFile(path) + lines := splitLines(data) + var tampered TracedFinding + json.Unmarshal([]byte(lines[1]), &tampered) + tampered.Detail = "TAMPERED" + newLine, _ := json.Marshal(tampered) + lines[1] = string(newLine) + + // Write tampered file. + var out []byte + for _, l := range lines { + out = append(out, []byte(l+"\n")...) + } + os.WriteFile(path, out, 0o600) + + result, err := VerifyChain(path) + if err != nil { + t.Fatalf("VerifyChain: %v", err) + } + if result.Valid { + t.Error("chain should be invalid after tampering") + } + if result.BrokenAt != 1 { + t.Errorf("expected break at entry 1, got %d", result.BrokenAt) + } +} + +func TestVerifyChainDetectsDeletion(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "findings.jsonl") + + for i := 0; i < 3; i++ { + tf := TracedFinding{ + TraceID: "00000000-0000-4000-8000-000000000001", + Timestamp: "2026-06-08T00:00:00Z", + Phase: "host_input", + Finding: Finding{ + Scanner: "test", + Name: "finding", + Severity: "medium", + Detail: "entry", + Position: -1, + }, + } + if err := AppendFinding(path, tf); err != nil { + t.Fatalf("AppendFinding %d: %v", i, err) + } + } + + // Delete the second entry (keep first and third). + data, _ := os.ReadFile(path) + lines := splitLines(data) + deleted := lines[0] + "\n" + lines[2] + "\n" + os.WriteFile(path, []byte(deleted), 0o600) + + result, err := VerifyChain(path) + if err != nil { + t.Fatalf("VerifyChain: %v", err) + } + if result.Valid { + t.Error("chain should be invalid after deletion") + } + if result.BrokenAt != 1 { + t.Errorf("expected break at entry 1, got %d", result.BrokenAt) + } +} + +func TestVerifyChainEmptyFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "findings.jsonl") + os.WriteFile(path, []byte(""), 0o600) + + result, err := VerifyChain(path) + if err != nil { + t.Fatalf("VerifyChain: %v", err) + } + if !result.Valid { + t.Error("empty file should be valid") + } + if result.Entries != 0 { + t.Errorf("expected 0 entries, got %d", result.Entries) + } +} + +func splitLines(data []byte) []string { + var lines []string + for _, line := range split(data) { + if line != "" { + lines = append(lines, line) + } + } + return lines +} + +func split(data []byte) []string { + var result []string + start := 0 + for i, b := range data { + if b == '\n' { + result = append(result, string(data[start:i])) + start = i + 1 + } + } + if start < len(data) { + result = append(result, string(data[start:])) + } + return result +}