diff --git a/README.md b/README.md index 3362448..35c3491 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,9 @@ single zero-config command. - **GitHub-wide dorking** — `--dork` searches *all* of GitHub (Code Search) for the target's leaks using a parameterized dork corpus, scanning each matched file (`SOURCE=dork`). Add `--domain acme.com`, supply `--dork-file`, or cap with `--dork-max`. Requires `--tokens`. +- **Live-key intelligence** — `--analyze` enumerates what a *verified* GitHub token can actually + do (identity + scopes, e.g. `user:bob; scopes: repo, workflow, admin:org`), printed in a + "Key intelligence" section. Turns "it's live" into blast radius. - **Keyless by default** — `revenant --org acme` runs with no signup, no API keys, no subscription (waybackurls/gau ergonomics). A GitHub PAT is optional and only adds speed. - **Tiered discovery** — the repo **activity log** (`force_push`/`branch_deletion` @@ -103,6 +106,7 @@ SCORE TYPE STATUS AUTHORS FIRST_COMMIT | `--domain` | Extra dork search term (e.g. a company domain) | | `--dork-file` | Custom dork templates (`{term}`/`{domain}`); overrides built-in corpus | | `--dork-max` | Cap on total dork hits scanned (default 200) | +| `--analyze` | Enumerate capabilities of verified keys (GitHub tokens) | | `-o, --output` | Write JSON findings to a file | > **Tokens:** the activity-log tier (the precise, immediate one) needs a token with repo diff --git a/cmd/revenant/main.go b/cmd/revenant/main.go index 053b565..6e2059d 100644 --- a/cmd/revenant/main.go +++ b/cmd/revenant/main.go @@ -39,6 +39,7 @@ func main() { noHistory, noDeleted, verifiedOnly bool members, noGists bool dork bool + analyze bool domain, dorkFile string dorkMax int ) @@ -66,6 +67,7 @@ func main() { domain: domain, dorkFile: dorkFile, dorkMax: dorkMax, + analyze: analyze, }) }, } @@ -86,6 +88,7 @@ func main() { root.Flags().StringVar(&domain, "domain", "", "extra dork search term, e.g. a company domain") root.Flags().StringVar(&dorkFile, "dork-file", "", "custom dork templates ({term}/{domain}); overrides built-in corpus") root.Flags().IntVar(&dorkMax, "dork-max", 200, "cap on total dork hits scanned") + root.Flags().BoolVar(&analyze, "analyze", false, "enumerate capabilities of verified keys (GitHub tokens)") if err := root.Execute(); err != nil { fmt.Fprintln(os.Stderr, "error:", err) os.Exit(1) diff --git a/cmd/revenant/run.go b/cmd/revenant/run.go index 027e113..75fe0de 100644 --- a/cmd/revenant/run.go +++ b/cmd/revenant/run.go @@ -12,6 +12,7 @@ import ( "github.com/smakarim/revenant/internal/enumerate" "github.com/smakarim/revenant/internal/fetch" "github.com/smakarim/revenant/internal/githubclient" + "github.com/smakarim/revenant/internal/keyintel" "github.com/smakarim/revenant/internal/model" "github.com/smakarim/revenant/internal/report" "github.com/smakarim/revenant/internal/scan" @@ -35,14 +36,15 @@ type runConfig struct { domain string dorkFile string dorkMax int + analyze bool } // scanner is the local alias for the per-repo secret source. type scanner = scan.RepoScanner // runPipeline runs repo scanners over every repo and the gist scanner over every -// gist, appends any dork candidates, then correlates all candidates. -func runPipeline(ctx context.Context, repos []model.RepoRef, scanners []scanner, gists []model.GistRef, gistScanner *scan.GistScanner, dorkCands []model.Candidate) ([]model.Finding, error) { +// gist, appends dork candidates, optionally enriches all candidates, then correlates. +func runPipeline(ctx context.Context, repos []model.RepoRef, scanners []scanner, gists []model.GistRef, gistScanner *scan.GistScanner, dorkCands []model.Candidate, enrich func(context.Context, []model.Candidate) []model.Candidate) ([]model.Finding, error) { var all []model.Candidate for _, repo := range repos { for _, sc := range scanners { @@ -63,6 +65,9 @@ func runPipeline(ctx context.Context, repos []model.RepoRef, scanners []scanner, } } all = append(all, dorkCands...) + if enrich != nil { + all = enrich(ctx, all) + } return correlate.Correlate(ctx, all, correlate.DefaultWeights()), nil } @@ -191,7 +196,15 @@ func run(ctx context.Context, cfg runConfig) error { dorkCands = dorkCandidates(ctx, cfg, client, fsScanner) } - findings, err := runPipeline(ctx, repos, scanners, gists, gistScanner, dorkCands) + var enrich func(context.Context, []model.Candidate) []model.Candidate + if cfg.analyze { + analyzers := map[string]keyintel.Analyzer{"Github": &keyintel.GitHubAnalyzer{}} + enrich = func(ctx context.Context, c []model.Candidate) []model.Candidate { + return keyintel.Enrich(ctx, c, analyzers) + } + } + + findings, err := runPipeline(ctx, repos, scanners, gists, gistScanner, dorkCands, enrich) if err != nil { return err } @@ -208,7 +221,10 @@ func run(ctx context.Context, cfg runConfig) error { return err } } - return report.WriteTable(os.Stdout, findings, cfg.minScore) + if err := report.WriteTable(os.Stdout, findings, cfg.minScore); err != nil { + return err + } + return report.WriteIntel(os.Stdout, findings) } // buildScanners assembles the enabled repo scanners from the config. It may return diff --git a/cmd/revenant/run_test.go b/cmd/revenant/run_test.go index 1aa5209..a841804 100644 --- a/cmd/revenant/run_test.go +++ b/cmd/revenant/run_test.go @@ -23,7 +23,7 @@ func TestRunPipelineMergesScanners(t *testing.T) { s2 := fakeScanner{cands: []model.Candidate{{SecretType: "AWS", Raw: "AKIA1", Verified: true, Occurrence: model.Occurrence{Repo: repo, SHA: "d", Source: "deleted"}}}} - findings, err := runPipeline(context.Background(), []model.RepoRef{repo}, []scanner{s1, s2}, nil, nil, nil) + findings, err := runPipeline(context.Background(), []model.RepoRef{repo}, []scanner{s1, s2}, nil, nil, nil, nil) if err != nil { t.Fatal(err) } @@ -35,6 +35,25 @@ func TestRunPipelineMergesScanners(t *testing.T) { } } +func TestRunPipelineAppliesEnrich(t *testing.T) { + repo := model.RepoRef{Owner: "a", Name: "b"} + s := fakeScanner{cands: []model.Candidate{{SecretType: "Github", Raw: "t", Verified: true, + Occurrence: model.Occurrence{Repo: repo, SHA: "s"}}}} + enrich := func(_ context.Context, c []model.Candidate) []model.Candidate { + for i := range c { + c[i].Capabilities = "tagged" + } + return c + } + findings, err := runPipeline(context.Background(), []model.RepoRef{repo}, []scanner{s}, nil, nil, nil, enrich) + if err != nil { + t.Fatal(err) + } + if len(findings) != 1 || findings[0].Capabilities != "tagged" { + t.Fatalf("enrich not applied: %+v", findings) + } +} + func TestBuildScannersSelection(t *testing.T) { client := githubclient.New(githubclient.Config{}) fs := detect.ExecScanner{} diff --git a/internal/correlate/correlate.go b/internal/correlate/correlate.go index 3f850ed..e845125 100644 --- a/internal/correlate/correlate.go +++ b/internal/correlate/correlate.go @@ -25,7 +25,7 @@ func DefaultWeights() Weights { PerOccurrence: 2, TypeSeverity: map[string]float64{ "AWS": 50, - "GitHub": 50, + "Github": 50, // TruffleHog's detector name is "Github", not "GitHub" "Stripe": 20, }, } @@ -44,6 +44,9 @@ func Correlate(_ context.Context, cands []model.Candidate, w Weights) []model.Fi authors[c.Raw] = map[string]bool{} } f.Verified = f.Verified || c.Verified + if f.Capabilities == "" && c.Capabilities != "" { + f.Capabilities = c.Capabilities + } f.Occurrences = append(f.Occurrences, c.Occurrence) if c.Occurrence.AuthorEmail != "" { authors[c.Raw][strings.ToLower(c.Occurrence.AuthorEmail)] = true diff --git a/internal/correlate/correlate_test.go b/internal/correlate/correlate_test.go index a9b66ad..b88ef67 100644 --- a/internal/correlate/correlate_test.go +++ b/internal/correlate/correlate_test.go @@ -8,6 +8,18 @@ import ( "github.com/smakarim/revenant/internal/model" ) +func TestCorrelateAppliesGithubSeverity(t *testing.T) { + repo := model.RepoRef{Owner: "a", Name: "b"} + cands := []model.Candidate{ + {SecretType: "Github", Raw: "g", Verified: false, + Occurrence: model.Occurrence{Repo: repo, SHA: "s"}}, + } + got := Correlate(context.Background(), cands, DefaultWeights()) + if len(got) != 1 || got[0].Score < 50 { + t.Fatalf("github type-severity bump not applied: %+v", got) + } +} + func TestCorrelateGroupsAndScores(t *testing.T) { repo := model.RepoRef{Owner: "acme", Name: "web"} t1 := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) @@ -58,3 +70,15 @@ func TestCorrelateMergesAcrossSources(t *testing.T) { t.Fatal("verified should be OR across sources") } } + +func TestCorrelateCarriesCapabilities(t *testing.T) { + repo := model.RepoRef{Owner: "a", Name: "b"} + cands := []model.Candidate{ + {SecretType: "Github", Raw: "t", Verified: true, Capabilities: "user:bob; scopes: repo", + Occurrence: model.Occurrence{Repo: repo, SHA: "s"}}, + } + got := Correlate(context.Background(), cands, DefaultWeights()) + if len(got) != 1 || got[0].Capabilities != "user:bob; scopes: repo" { + t.Fatalf("capabilities not carried: %+v", got) + } +} diff --git a/internal/keyintel/keyintel.go b/internal/keyintel/keyintel.go new file mode 100644 index 0000000..8adcbe1 --- /dev/null +++ b/internal/keyintel/keyintel.go @@ -0,0 +1,92 @@ +// Package keyintel enriches verified secret findings with a safe summary of what +// the live key can actually do (its blast radius). +package keyintel + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "time" + + "github.com/smakarim/revenant/internal/model" +) + +// Analyzer turns a raw secret into a safe capability summary (never the secret). +type Analyzer interface { + Analyze(ctx context.Context, raw string) (string, error) +} + +// GitHubAnalyzer probes /user with the found token and reports its scopes + identity. +type GitHubAnalyzer struct { + HTTP *http.Client + BaseURL string // defaults to https://api.github.com +} + +// Analyze authenticates to /user with the token and summarizes its scopes/identity. +func (g *GitHubAnalyzer) Analyze(ctx context.Context, raw string) (string, error) { + base := g.BaseURL + if base == "" { + base = "https://api.github.com" + } + client := g.HTTP + if client == nil { + client = &http.Client{Timeout: 15 * time.Second} + } + req, err := http.NewRequestWithContext(ctx, http.MethodGet, base+"/user", nil) + if err != nil { + return "", err + } + req.Header.Set("Authorization", "token "+raw) + resp, err := client.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("github analyze: status %d", resp.StatusCode) + } + scopes := resp.Header.Get("X-OAuth-Scopes") + var body struct { + Login string `json:"login"` + } + _ = json.NewDecoder(resp.Body).Decode(&body) + summary := "user:" + body.Login + if scopes != "" { + summary += "; scopes: " + scopes + } else { + summary += "; scopes: (none / fine-grained)" + } + return summary, nil +} + +// Enrich fills Capabilities for verified candidates whose SecretType has an +// analyzer, deduping by raw so each unique secret is analyzed at most once. +func Enrich(ctx context.Context, cands []model.Candidate, analyzers map[string]Analyzer) []model.Candidate { + cache := map[string]string{} + out := make([]model.Candidate, len(cands)) + for i, c := range cands { + out[i] = c + if !c.Verified { + continue + } + a, ok := analyzers[c.SecretType] + if !ok { + continue + } + caps, cached := cache[c.Raw] + if !cached { + res, err := a.Analyze(ctx, c.Raw) + if err != nil { + cache[c.Raw] = "" + continue + } + caps = res + cache[c.Raw] = caps + } + if caps != "" { + out[i].Capabilities = caps + } + } + return out +} diff --git a/internal/keyintel/keyintel_test.go b/internal/keyintel/keyintel_test.go new file mode 100644 index 0000000..fab5f51 --- /dev/null +++ b/internal/keyintel/keyintel_test.go @@ -0,0 +1,81 @@ +package keyintel + +import ( + "context" + "io" + "net/http" + "net/http/httptest" + "testing" + + "github.com/smakarim/revenant/internal/model" +) + +func TestGitHubAnalyzerParsesScopes(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("Authorization") != "token ghp_x" { + w.WriteHeader(http.StatusUnauthorized) + return + } + w.Header().Set("X-OAuth-Scopes", "repo, workflow") + io.WriteString(w, `{"login":"bob"}`) + })) + defer srv.Close() + a := &GitHubAnalyzer{BaseURL: srv.URL} + got, err := a.Analyze(context.Background(), "ghp_x") + if err != nil { + t.Fatal(err) + } + if got != "user:bob; scopes: repo, workflow" { + t.Fatalf("got %q", got) + } +} + +func TestGitHubAnalyzerEmptyScopes(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + io.WriteString(w, `{"login":"fg"}`) + })) + defer srv.Close() + a := &GitHubAnalyzer{BaseURL: srv.URL} + got, _ := a.Analyze(context.Background(), "x") + if got != "user:fg; scopes: (none / fine-grained)" { + t.Fatalf("got %q", got) + } +} + +func TestGitHubAnalyzerNon200Errors(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + })) + defer srv.Close() + a := &GitHubAnalyzer{BaseURL: srv.URL} + if _, err := a.Analyze(context.Background(), "bad"); err == nil { + t.Fatal("expected error on non-200") + } +} + +type fakeAnalyzer struct{ calls int } + +func (f *fakeAnalyzer) Analyze(_ context.Context, raw string) (string, error) { + f.calls++ + return "caps:" + raw, nil +} + +func TestEnrichVerifiedTargetsOnlyAndDedups(t *testing.T) { + fa := &fakeAnalyzer{} + cands := []model.Candidate{ + {SecretType: "Github", Raw: "t1", Verified: true}, + {SecretType: "Github", Raw: "t1", Verified: true}, + {SecretType: "Github", Raw: "t2", Verified: false}, + {SecretType: "AWS", Raw: "a1", Verified: true}, + } + out := Enrich(context.Background(), cands, map[string]Analyzer{"Github": fa}) + if fa.calls != 1 { + t.Fatalf("analyze calls = %d, want 1 (dedup)", fa.calls) + } + if out[0].Capabilities != "caps:t1" || out[1].Capabilities != "caps:t1" { + t.Fatalf("verified github not enriched: %+v", out[:2]) + } + if out[2].Capabilities != "" || out[3].Capabilities != "" { + t.Fatalf("non-target enriched: %+v", out[2:]) + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 40eddff..7052455 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -40,12 +40,13 @@ type HiddenCommit struct { // Candidate is a secret detected in a fetched commit, prior to verification. type Candidate struct { - SecretType string - Verified bool - Raw string // the raw secret value (used as the content-addressed dedup key) - Repo RepoRef - SHA string - Occurrence Occurrence + SecretType string + Verified bool + Capabilities string // safe summary of what a verified key can do; never the raw secret + Raw string // the raw secret value (used as the content-addressed dedup key) + Repo RepoRef + SHA string + Occurrence Occurrence } // Occurrence records where a secret was seen. @@ -63,6 +64,7 @@ type Occurrence struct { type Finding struct { SecretType string Verified bool + Capabilities string Score float64 Occurrences []Occurrence FirstSeen time.Time diff --git a/internal/model/model_test.go b/internal/model/model_test.go index e551628..a45fed5 100644 --- a/internal/model/model_test.go +++ b/internal/model/model_test.go @@ -35,3 +35,11 @@ func TestGistRef(t *testing.T) { t.Fatalf("got %+v", g) } } + +func TestCapabilitiesFields(t *testing.T) { + c := Candidate{Capabilities: "x"} + f := Finding{Capabilities: "y"} + if c.Capabilities != "x" || f.Capabilities != "y" { + t.Fatalf("missing fields: %q %q", c.Capabilities, f.Capabilities) + } +} diff --git a/internal/report/report.go b/internal/report/report.go index 23a34e7..5959ce5 100644 --- a/internal/report/report.go +++ b/internal/report/report.go @@ -41,6 +41,33 @@ func WriteTable(w io.Writer, findings []model.Finding, minScore float64) error { return tw.Flush() } +// WriteIntel prints a "Key intelligence" section listing findings whose verified +// key was analyzed for capabilities. Emits nothing when none have capabilities. +func WriteIntel(w io.Writer, findings []model.Finding) error { + any := false + for _, f := range findings { + if f.Capabilities != "" { + any = true + break + } + } + if !any { + return nil + } + if _, err := fmt.Fprintln(w, "\nKey intelligence:"); err != nil { + return err + } + for _, f := range findings { + if f.Capabilities == "" { + continue + } + if _, err := fmt.Fprintf(w, " [%s] %s\n", f.SecretType, f.Capabilities); err != nil { + return err + } + } + return nil +} + // sourcesOf returns the distinct occurrence sources, sorted and comma-joined // (e.g. "deleted", "history", or "deleted,history"). func sourcesOf(f model.Finding) string { diff --git a/internal/report/report_test.go b/internal/report/report_test.go index 96d8869..b086ecf 100644 --- a/internal/report/report_test.go +++ b/internal/report/report_test.go @@ -59,3 +59,26 @@ func TestTableMinScoreFilters(t *testing.T) { t.Fatal("min-score should have filtered the finding out") } } + +func TestWriteIntelPrintsWhenPresent(t *testing.T) { + var buf bytes.Buffer + if err := WriteIntel(&buf, []model.Finding{{SecretType: "Github", Capabilities: "user:bob; scopes: repo"}}); err != nil { + t.Fatal(err) + } + out := buf.String() + for _, want := range []string{"Key intelligence", "Github", "user:bob"} { + if !strings.Contains(out, want) { + t.Fatalf("missing %q in:\n%s", want, out) + } + } +} + +func TestWriteIntelEmptyWhenNoCapabilities(t *testing.T) { + var buf bytes.Buffer + if err := WriteIntel(&buf, []model.Finding{{SecretType: "AWS"}}); err != nil { + t.Fatal(err) + } + if buf.String() != "" { + t.Fatalf("expected no output, got %q", buf.String()) + } +}