-
Notifications
You must be signed in to change notification settings - Fork 56
docs(security): add audit log integrity to threat model #2010
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6b7cff5
d314317
cfe2e01
5e3d64b
8abd8e2
965cb17
627e603
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,7 +1,9 @@ | ||
| package security | ||
|
|
||
| import ( | ||
| "bufio" | ||
| "crypto/rand" | ||
| "crypto/sha256" | ||
| "encoding/json" | ||
| "fmt" | ||
| "os" | ||
|
|
@@ -29,17 +31,75 @@ func IsValidTraceID(id string) bool { | |
| return reTraceID.MatchString(id) | ||
| } | ||
|
|
||
| // seedHash is the well-known genesis hash for the first entry in a chain. | ||
| const seedHash = "0000000000000000000000000000000000000000000000000000000000000000" | ||
|
|
||
| // TracedFinding is a Finding enriched with trace and phase metadata for the | ||
| // JSONL audit log. | ||
| // JSONL audit log. PrevHash and Hash form a SHA-256 chain: each entry's Hash | ||
| // covers PrevHash and the rest of the entry, making tampering detectable. | ||
| type TracedFinding struct { | ||
| TraceID string `json:"trace_id"` | ||
| Timestamp string `json:"timestamp"` | ||
| Phase string `json:"phase"` // "host_input", "sandbox_context", "hook_pretool", "hook_posttool", "host_output" | ||
| PrevHash string `json:"prev_hash"` | ||
| Hash string `json:"hash"` | ||
| Finding | ||
| } | ||
|
|
||
| // computeHash returns the hex-encoded SHA-256 of prevHash concatenated with | ||
| // the JSON-encoded finding payload (all fields except prev_hash and hash). | ||
| func computeHash(prevHash string, tf TracedFinding) string { | ||
| payload := struct { | ||
| TraceID string `json:"trace_id"` | ||
| Timestamp string `json:"timestamp"` | ||
| Phase string `json:"phase"` | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [low] error-handling-gap computeHash discards the json.Marshal error with data, _ := json.Marshal(payload). Deviates from the package established error-handling pattern.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The payload struct contains only string fields. json.Marshal cannot fail on a struct of strings (no channels, functions, or unsupported types). Accepting the _ here avoids an error path that can never trigger. |
||
| Finding | ||
| }{ | ||
| TraceID: tf.TraceID, | ||
| Timestamp: tf.Timestamp, | ||
| Phase: tf.Phase, | ||
| Finding: tf.Finding, | ||
| } | ||
| data, _ := json.Marshal(payload) | ||
| sum := sha256.Sum256(append([]byte(prevHash), data...)) | ||
| return fmt.Sprintf("%x", sum) | ||
| } | ||
|
|
||
| // lastHash reads the final line of the JSONL file and extracts the hash field. | ||
| // Returns seedHash if the file does not exist or is empty. | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [low] edge-case lastHash uses bufio.Scanner with the default 64KB buffer. Lines exceeding this limit cause scanner.Scan() to return false, and lastHash silently returns seedHash — forking the chain. scanner.Err() is not checked after the scan loop.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. valid point, i think in practice, a single JSONL finding entry is unlikely to exceed 64KB (typical entries are under 1KB), but adding scanner.Err() check and a larger buffer would make this more robust. Happy to add if the maintainers want it |
||
| func lastHash(path string) string { | ||
| f, err := os.Open(path) | ||
| if err != nil { | ||
| return seedHash | ||
| } | ||
| defer f.Close() | ||
|
|
||
| var last string | ||
| scanner := bufio.NewScanner(f) | ||
| for scanner.Scan() { | ||
| last = scanner.Text() | ||
| } | ||
| if last == "" { | ||
| return seedHash | ||
| } | ||
|
|
||
| var entry struct { | ||
| Hash string `json:"hash"` | ||
| } | ||
| if err := json.Unmarshal([]byte(last), &entry); err != nil || entry.Hash == "" { | ||
| return seedHash | ||
| } | ||
| return entry.Hash | ||
| } | ||
|
|
||
| // AppendFinding writes a traced finding as a JSON line to the given file path. | ||
| // It computes a SHA-256 hash chain: each entry's hash covers the previous | ||
| // entry's hash and the current entry's payload, making the log tamper-evident. | ||
| func AppendFinding(path string, tf TracedFinding) error { | ||
| prev := lastHash(path) | ||
| tf.PrevHash = prev | ||
| tf.Hash = computeHash(prev, tf) | ||
|
|
||
| f, err := os.OpenFile(path, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o600) | ||
| if err != nil { | ||
| return fmt.Errorf("opening findings file: %w", err) | ||
|
|
@@ -55,3 +115,77 @@ func AppendFinding(path string, tf TracedFinding) error { | |
| } | ||
| return nil | ||
| } | ||
|
|
||
| // ChainVerification holds the result of verifying a findings JSONL file. | ||
| type ChainVerification struct { | ||
| Valid bool | ||
| Entries int | ||
| BrokenAt int // 0-indexed; -1 if valid | ||
| BrokenMsg string // empty if valid | ||
| } | ||
|
|
||
| // VerifyChain reads a findings JSONL file and verifies the hash chain | ||
| // integrity. Returns a ChainVerification indicating whether the chain is | ||
| // intact. Entries without hash fields (from older versions) are skipped. | ||
| func VerifyChain(path string) (ChainVerification, error) { | ||
| f, err := os.Open(path) | ||
| if err != nil { | ||
| return ChainVerification{}, fmt.Errorf("opening findings file: %w", err) | ||
| } | ||
| defer f.Close() | ||
|
|
||
| scanner := bufio.NewScanner(f) | ||
| prev := seedHash | ||
| idx := 0 | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [low] edge-case VerifyChain skips legacy entries (both Hash and PrevHash empty) without updating prev. Legacy entries can be deleted, inserted, or modified without detection — a known trade-off worth documenting in code comments.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Intentional. Legacy entries are skipped for backward compatibility so that existing logs from before hash chaining was added don't break verification. The trade-off is documented in the threat model's open questions section (per-run vs. global chains). Once all entries are hashed, this path becomes dead code |
||
| for scanner.Scan() { | ||
| line := scanner.Text() | ||
| if line == "" { | ||
| continue | ||
| } | ||
|
|
||
| var tf TracedFinding | ||
| if err := json.Unmarshal([]byte(line), &tf); err != nil { | ||
| return ChainVerification{ | ||
| Valid: false, | ||
| Entries: idx, | ||
| BrokenAt: idx, | ||
| BrokenMsg: fmt.Sprintf("entry %d: invalid JSON: %v", idx, err), | ||
| }, nil | ||
| } | ||
|
|
||
| // Skip entries from before hash chaining was added. | ||
| if tf.Hash == "" && tf.PrevHash == "" { | ||
| idx++ | ||
| continue | ||
| } | ||
|
|
||
| if tf.PrevHash != prev { | ||
| return ChainVerification{ | ||
| Valid: false, | ||
| Entries: idx, | ||
| BrokenAt: idx, | ||
| BrokenMsg: fmt.Sprintf("entry %d: prev_hash mismatch: expected %s, got %s", idx, prev, tf.PrevHash), | ||
| }, nil | ||
| } | ||
|
|
||
| expected := computeHash(prev, tf) | ||
| if tf.Hash != expected { | ||
| return ChainVerification{ | ||
| Valid: false, | ||
| Entries: idx, | ||
| BrokenAt: idx, | ||
| BrokenMsg: fmt.Sprintf("entry %d: hash mismatch: expected %s, got %s", idx, expected, tf.Hash), | ||
| }, nil | ||
| } | ||
|
|
||
| prev = tf.Hash | ||
| idx++ | ||
| } | ||
|
|
||
| if err := scanner.Err(); err != nil { | ||
| return ChainVerification{}, fmt.Errorf("reading findings file: %w", err) | ||
| } | ||
|
|
||
| return ChainVerification{Valid: true, Entries: idx, BrokenAt: -1}, nil | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[medium] logic-error
VerifyChain failure calls printer.StepFail but does not return an error or set a non-zero exit code. Every other StepFail call in run.go is followed by return fmt.Errorf(...) to actually halt the run. The threat model document added in this PR states post-run verification should flag the run for investigation if verification fails. The current implementation contradicts this — a tampered log produces a print-only message that does not affect the run outcome.
Suggested fix: Return an error when cv.Valid is false, consistent with the pattern used by all other StepFail calls in this function. If soft-fail is intentional for the initial rollout, add a code comment and a TODO referencing the threat model open question about synchronous vs. asynchronous verification.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in 8abd8e2. StepFail is now followed by return fmt.Errorf(...), consistent with the rest of run.go. A tampered log halts the run