From df4d8bf8f0dbca2c468cd76f855bac6417ef8a3d Mon Sep 17 00:00:00 2001 From: Daniel F B Morinigo Date: Tue, 19 May 2026 15:43:20 +0000 Subject: [PATCH] =?UTF-8?q?feat(roadmap-planner):=20W2=20=E2=80=94=20fold?= =?UTF-8?q?=20bot=20contributions=20into=20synthetic=20bot=20member?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Second workstream of the 2026-05-19 metrics audit follow-up. Resolves finding B4 (56% of pr_reviews rows are automated, dragging the team-wide first-review p50 to ~0.1h). Adds two new schema columns (migration 0005), an explicit team_analytics.bot_logins config knob, and the contributions/bot.go package that owns both the live predicate (BotSet) and the one-shot retroactive consolidation pass (ConsolidateBots). Live ingest changes: - github.Syncer.IsBotLogin / gitlab.Syncer.IsBotLogin predicates. When a PR's author_login matches, author_id becomes "bot". Same for review rows: reviewer_id="bot" and is_bot=1. - pull_requests.first_human_review_at is computed live as MIN(submitted_at) over non-bot reviews. Retroactive backfill at startup (idempotent): - pr_reviews: tag is_bot=1, rewrite reviewer_id to "bot" where login matches. - pull_requests: rewrite author_id to "bot" where login matches; populate first_human_review_at from non-bot reviews on each PR. NetworkDensity now: - Reads first-review latency from first_human_review_at (so renovate's instant comments stop collapsing p50 to ~0.1h). - Joins with rv.is_bot=0 in the cross-pillar review query. Activation: - Empty bot_logins -> no behaviour change (BotSet.Size()==0 is a no-op). - Populated list -> live ingest reclassifies + startup pass updates historical rows. Tests: - BotSet truth table (empty, exact, case-fold). - ConsolidateBots: human PR with bot+human reviews -> is_bot tagged, reviewer_id rewritten, first_human_review_at = human's review time (not the earlier bot review). Bot-authored PR -> author_id="bot". Idempotency: re-running consolidates nothing. Doc: CHANGES.md gets the W2 block describing config, visible effects, and rollback path. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 --- roadmap-planner/backend/cmd/server/main.go | 35 +++- roadmap-planner/backend/config.example.yaml | 26 +++ .../backend/internal/config/config.go | 28 ++- .../backend/internal/contributions/bot.go | 184 ++++++++++++++++++ .../internal/contributions/bot_test.go | 174 +++++++++++++++++ .../internal/contributions/service_extras.go | 12 +- .../backend/internal/github/sync.go | 26 ++- .../backend/internal/gitlab/sync.go | 22 ++- .../backend/internal/storage/generic.go | 21 +- .../migrations/0005_first_human_review.sql | 26 +++ .../backend/internal/storage/store.go | 40 ++-- .../docs/team-analytics/CHANGES.md | 75 +++---- 12 files changed, 568 insertions(+), 101 deletions(-) create mode 100644 roadmap-planner/backend/internal/contributions/bot.go create mode 100644 roadmap-planner/backend/internal/contributions/bot_test.go create mode 100644 roadmap-planner/backend/internal/storage/migrations/0005_first_human_review.sql diff --git a/roadmap-planner/backend/cmd/server/main.go b/roadmap-planner/backend/cmd/server/main.go index 9aae4c10..a62bf5e7 100644 --- a/roadmap-planner/backend/cmd/server/main.go +++ b/roadmap-planner/backend/cmd/server/main.go @@ -212,11 +212,30 @@ func initTeamAnalytics(ctx context.Context, router *gin.Engine, cfg *config.Conf aggregator := contributions.NewAggregator(store) allowlist := contributions.BuildAllowlist(cfg.TeamAnalytics) aggregator.SetAllowlist(allowlist) + bots := contributions.NewBotSet(cfg.TeamAnalytics.BotLogins) api.AddContributionsRoutes(router, store, service, aggregator, allowlist) logger.Info("Contributions API routes added", zap.Int("pillars_configured", len(pillarMap.Order())), zap.Bool("allowlist_enabled", allowlist.Enabled()), - zap.Int("allowlist_size", allowlist.Size())) + zap.Int("allowlist_size", allowlist.Size()), + zap.Int("bot_logins_configured", bots.Size())) + + // W2 one-shot bot consolidation: retroactively tag existing PR / + // review rows ingested before this feature shipped, reassign bot + // author/reviewer ids to the synthetic `bot` member, and compute + // first_human_review_at. Idempotent — safe to run on every boot. + if bots.Size() > 0 { + res, err := contributions.ConsolidateBots(ctx, store, bots) + if err != nil { + logger.Warn("bot consolidation backfill failed", zap.Error(err)) + } else { + logger.Info("bot consolidation backfill applied", + zap.Int("reviews_marked_bot", res.ReviewsMarkedBot), + zap.Int("reviewer_id_rewritten", res.ReviewsAuthorRew), + zap.Int("pr_author_id_rewritten", res.PRsAuthorRewrite), + zap.Int("pr_first_human_review_rows", res.PRsFirstHumanSet)) + } + } // Optional Jira sync goroutine. // @@ -258,12 +277,12 @@ func initTeamAnalytics(ctx context.Context, router *gin.Engine, cfg *config.Conf // Optional GitHub sync goroutine. if cfg.GitHub.Enabled { - startGitHubSync(ctx, cfg, store, aggregator) + startGitHubSync(ctx, cfg, store, aggregator, bots) } // Optional GitLab sync goroutine. if cfg.GitLab.Enabled { - startGitLabSync(ctx, cfg, store, aggregator, allowlist) + startGitLabSync(ctx, cfg, store, aggregator, allowlist, bots) } go func() { @@ -279,7 +298,7 @@ func initTeamAnalytics(ctx context.Context, router *gin.Engine, cfg *config.Conf // All failures are non-fatal — if GitHub auth or repo parsing breaks, // we log and continue without a GitHub syncer rather than refusing to // start the whole server. The roadmap UI keeps working. -func startGitHubSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator) { +func startGitHubSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator, bots contributions.BotSet) { repos := parseRepos(cfg.GitHub.Repos) if len(repos) == 0 { logger.Warn("github.enabled but github.repos is empty; skipping sync") @@ -301,6 +320,9 @@ func startGitHubSync(ctx context.Context, cfg *config.Config, store storage.Stor backfill = cfg.Storage.BackfillDays } syncer := ghclient.NewSyncer(client, store, repos, ghclient.DefaultLinker(projectKey), backfill) + if bots.Size() > 0 { + syncer.IsBotLogin = bots.Contains + } interval, err := time.ParseDuration(cfg.GitHub.SyncInterval) if err != nil { @@ -348,7 +370,7 @@ func buildGitHubClient(cfg *config.GitHub) (*ghclient.Client, string, error) { // // Same fail-soft contract as startGitHubSync — bad config logs and // continues without a GitLab sync rather than refusing to start. -func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator, allowlist contributions.Allowlist) { +func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator, allowlist contributions.Allowlist, bots contributions.BotSet) { specs := parseGroupSpecs(cfg.GitLab.Groups) if len(specs) == 0 { logger.Warn("gitlab.enabled but gitlab.groups is empty; skipping sync") @@ -385,6 +407,9 @@ func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Stor } syncer.AllowedMemberIDs = allowed } + if bots.Size() > 0 { + syncer.IsBotLogin = bots.Contains + } interval, err := time.ParseDuration(cfg.GitLab.SyncInterval) if err != nil { diff --git a/roadmap-planner/backend/config.example.yaml b/roadmap-planner/backend/config.example.yaml index 431c9dd5..7745d1f0 100644 --- a/roadmap-planner/backend/config.example.yaml +++ b/roadmap-planner/backend/config.example.yaml @@ -125,6 +125,7 @@ github: private_key_path: "" # path to the PEM file (mount via Secret) private_key_pem: "" # raw PEM contents (use env GITHUB_APP_PRIVATE_KEY_PEM in K8s) +<<<<<<< HEAD # GitLab ingestion — mirrors `github` for self-hosted GitLab installs. # Requires storage.enabled = true. gitlab: @@ -165,4 +166,29 @@ team_analytics: member_denylist: [] # - gxjiao # - lmhe +======= +# Team analytics — pillar attribution + identity prefills + W1/W2 knobs. +# All keys here are optional and additive; the dashboard works without +# them (it just shows the un-curated raw data). +team_analytics: + # github_login_prefills shortcuts the first-time Jira → GitHub + # identity link for members the operator already knows. Manual UI + # edits always win on subsequent syncs. + github_login_prefills: {} + # daniel: danielfbm + # jtcheng: chengjingtao + gitlab_username_prefills: {} + # daniel: daniel + # jtcheng: chengjingtao + # W2 (2026-05-19): explicit list of bot logins that get folded into + # the synthetic `bot` member. Match is exact + case-folded; no + # suffix magic. Adding a new bot here + restarting picks it up. + bot_logins: [] + # - alaudabot + # - alaudaa-renovate + # - edge-katanomi-app2[bot] + # - copilot-pull-request-reviewer[bot] + # - kilo-code-bot[bot] + # - copilot +>>>>>>> ee78c5b (feat(roadmap-planner): W2 — fold bot contributions into synthetic bot member) # pillars: { … } # see docs/team-analytics/PROPOSAL.md \ No newline at end of file diff --git a/roadmap-planner/backend/internal/config/config.go b/roadmap-planner/backend/internal/config/config.go index f5144a5f..4f868b76 100644 --- a/roadmap-planner/backend/internal/config/config.go +++ b/roadmap-planner/backend/internal/config/config.go @@ -88,23 +88,21 @@ type TeamAnalytics struct { // daniel: daniel // jtcheng: chengjingtao GitLabUsernamePrefills map[string]string `mapstructure:"gitlab_username_prefills" yaml:"gitlab_username_prefills"` - // MemberDenylist is the operator-curated list of Jira member ids - // (slugified email — the same key shape used in the prefill maps) + // MemberDenylist (W1) — operator-curated list of Jira member ids // that should be excluded from the team-analytics rollups and from - // every contributions API response, regardless of whether they have - // an entry in the prefill maps. - // - // This is the W1 escape hatch for people who appear in Jira (so the - // auto-discovery picks them up) but should not count toward the - // dashboard — bots that aren't on the bot allowlist, people who - // left the team, contractors loaned out to another pillar, etc. + // every contributions API response. Subtracted from the prefill- + // derived allowlist. MemberDenylist []string `mapstructure:"member_denylist" yaml:"member_denylist"` - // Statuses is the W4 (2026-05-19) configurable status → lane map - // used by the sprint card. Status names match Jira's `status.name` - // case-folded; an unknown status falls back to `in_progress` and - // triggers a warning so the operator can extend the lists. The - // built-in defaults cover DEVOPS's 37 statuses across 17 issue - // types — see PLAN.md W4 for the full mapping. + // BotLogins (W2) — explicit list of GitHub / GitLab logins that + // should be reassigned to the synthetic `bot` member instead of + // landing under their raw login. Match is exact + case-folded; no + // suffix magic. The synthetic `bot` member is auto-created by the + // Jira sync. + BotLogins []string `mapstructure:"bot_logins" yaml:"bot_logins"` + // Statuses (W4) — configurable status → lane map for the sprint + // card. Each lane (todo/in_progress/done/cancelled) is optional; + // empty lanes inherit DefaultStatusLanes() which covers the live + // DEVOPS Jira workflow (37 statuses, English + Chinese). Statuses StatusLanes `mapstructure:"statuses" yaml:"statuses"` } diff --git a/roadmap-planner/backend/internal/contributions/bot.go b/roadmap-planner/backend/internal/contributions/bot.go new file mode 100644 index 00000000..e68c6136 --- /dev/null +++ b/roadmap-planner/backend/internal/contributions/bot.go @@ -0,0 +1,184 @@ +/* +Copyright 2026 The AlaudaDevops Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +*/ + +package contributions + +import ( + "context" + "fmt" + "strings" + + "github.com/AlaudaDevops/toolbox/roadmap-planner/backend/internal/storage" +) + +// BotSet is the W2 closed list of logins that should be folded into the +// synthetic `bot` member. Lookup is exact + case-folded; suffix or +// prefix patterns are NOT honored — per maintainer guidance, the +// allowlist is explicit and new bots are added to config. +// +// Use NewBotSet to build the set from a flat config slice. The zero +// value is a no-op (Contains always returns false), which is the +// upgrade path for installs that haven't curated the list yet. +type BotSet struct { + set map[string]struct{} +} + +// NewBotSet returns a set populated from the configured list. Empty +// input yields a usable, no-op set. +func NewBotSet(logins []string) BotSet { + out := make(map[string]struct{}, len(logins)) + for _, l := range logins { + k := normalizeBotLogin(l) + if k == "" { + continue + } + out[k] = struct{}{} + } + return BotSet{set: out} +} + +// Contains reports whether the given raw login should be folded into +// the synthetic `bot` member. Lookup is case-folded so callers don't +// need to normalize before calling. +func (b BotSet) Contains(login string) bool { + if len(b.set) == 0 { + return false + } + _, ok := b.set[normalizeBotLogin(login)] + return ok +} + +// Size returns the number of distinct configured bot logins. Useful +// for startup telemetry. +func (b BotSet) Size() int { return len(b.set) } + +func normalizeBotLogin(s string) string { + return strings.ToLower(strings.TrimSpace(s)) +} + +// ConsolidateBots is the one-shot startup pass that retroactively +// applies the W2 bot-consolidation rules to rows ingested before this +// feature shipped. It walks `pr_reviews` and `pull_requests`, sets +// `is_bot` / `first_human_review_at` columns, and rewrites bot +// `author_id` / `reviewer_id` to the synthetic `bot` member id. +// +// Behavior is idempotent — every step uses a deterministic predicate +// over the configured bot list, so re-running on already-consolidated +// data is a no-op. Callers should run this once on startup *after* +// the migration applies the new columns and *before* the aggregator +// rebuilds, so the network-density panel reads the fresh values. +// +// Returns a summary of how many rows changed per step, for the +// startup log. +type ConsolidateBotsResult struct { + ReviewsMarkedBot int + PRsAuthorRewrite int + ReviewsAuthorRew int + PRsFirstHumanSet int +} + +func ConsolidateBots(ctx context.Context, store storage.Store, bots BotSet) (ConsolidateBotsResult, error) { + var out ConsolidateBotsResult + if bots.Size() == 0 { + return out, nil + } + d, ok := store.(interface{ Dialect() storage.Dialect }) + if !ok { + return out, fmt.Errorf("ConsolidateBots: store does not expose a Dialect") + } + dialect := d.Dialect() + db := store.DB() + + // Build IN-clause args ONCE; reused across the four statements. + logins := make([]any, 0, bots.Size()) + for k := range bots.set { + logins = append(logins, k) + } + if len(logins) == 0 { + return out, nil + } + placeholders := strings.Repeat("?, ", len(logins)) + placeholders = strings.TrimSuffix(placeholders, ", ") + + rebind := func(q string) string { + if dialect.Placeholder(1) == "?" { + return q + } + var b []byte + n := 1 + for i := 0; i < len(q); i++ { + if q[i] == '?' { + b = append(b, []byte(dialect.Placeholder(n))...) + n++ + } else { + b = append(b, q[i]) + } + } + return string(b) + } + + // 1. Tag every existing review row by a configured bot login. This + // is the safe one to lead with: pure metadata flip, no FK touch. + res, err := db.ExecContext(ctx, + rebind(fmt.Sprintf( + `UPDATE pr_reviews SET is_bot = 1 WHERE is_bot = 0 AND LOWER(reviewer_login) IN (%s)`, + placeholders)), + logins...) + if err != nil { + return out, fmt.Errorf("backfill is_bot on pr_reviews: %w", err) + } + if n, err := res.RowsAffected(); err == nil { + out.ReviewsMarkedBot = int(n) + } + + // 2. Reassign bot reviewer_id to the synthetic `bot` member id. + // Skip rows already pointing at `bot` (idempotency on rerun). + res, err = db.ExecContext(ctx, + rebind(fmt.Sprintf( + `UPDATE pr_reviews SET reviewer_id = 'bot' WHERE (reviewer_id IS NULL OR reviewer_id <> 'bot') AND LOWER(reviewer_login) IN (%s)`, + placeholders)), + logins...) + if err != nil { + return out, fmt.Errorf("backfill reviewer_id on pr_reviews: %w", err) + } + if n, err := res.RowsAffected(); err == nil { + out.ReviewsAuthorRew = int(n) + } + + // 3. Reassign bot author_id on pull_requests. + res, err = db.ExecContext(ctx, + rebind(fmt.Sprintf( + `UPDATE pull_requests SET author_id = 'bot' WHERE (author_id IS NULL OR author_id <> 'bot') AND LOWER(author_login) IN (%s)`, + placeholders)), + logins...) + if err != nil { + return out, fmt.Errorf("backfill author_id on pull_requests: %w", err) + } + if n, err := res.RowsAffected(); err == nil { + out.PRsAuthorRewrite = int(n) + } + + // 4. Populate first_human_review_at on pull_requests — `MIN(submitted_at)` + // over non-bot reviews. UPDATE-FROM-subquery flavor; runs once and + // is idempotent because subsequent passes either match the same + // minimum or NULL when no human reviews exist. + res, err = db.ExecContext(ctx, rebind(` + UPDATE pull_requests + SET first_human_review_at = ( + SELECT MIN(rv.submitted_at) + FROM pr_reviews rv + WHERE rv.pr_id = pull_requests.id AND rv.is_bot = 0 + )`)) + if err != nil { + return out, fmt.Errorf("backfill first_human_review_at: %w", err) + } + if n, err := res.RowsAffected(); err == nil { + out.PRsFirstHumanSet = int(n) + } + + return out, nil +} diff --git a/roadmap-planner/backend/internal/contributions/bot_test.go b/roadmap-planner/backend/internal/contributions/bot_test.go new file mode 100644 index 00000000..6f3f05b2 --- /dev/null +++ b/roadmap-planner/backend/internal/contributions/bot_test.go @@ -0,0 +1,174 @@ +/* +Copyright 2026 The AlaudaDevops Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +*/ + +package contributions + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/AlaudaDevops/toolbox/roadmap-planner/backend/internal/storage" +) + +func TestBotSet(t *testing.T) { + cases := []struct { + name string + in []string + probe map[string]bool + }{ + { + name: "empty set rejects everything", + in: nil, + probe: map[string]bool{ + "alaudabot": false, + "anybody": false, + "copilot[bot]": false, + }, + }, + { + name: "exact + case-folded match", + in: []string{"alaudabot", "Edge-Katanomi-App2[bot]", " copilot "}, + probe: map[string]bool{ + "alaudabot": true, + "ALAUDABOT": true, + "edge-katanomi-app2[bot]": true, + "copilot": true, + "copilot-pull-request-reviewer[bot]": false, + "daniel": false, + "": false, + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + b := NewBotSet(tc.in) + for k, want := range tc.probe { + if got := b.Contains(k); got != want { + t.Errorf("Contains(%q) = %v, want %v", k, got, want) + } + } + }) + } +} + +// TestConsolidateBots seeds PRs + reviews authored by both a human and +// a bot login, runs the one-shot consolidation pass, and verifies all +// four side-effects: is_bot tagging, reviewer_id rewrite, author_id +// rewrite, and first_human_review_at population. +func TestConsolidateBots(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + store, err := storage.OpenSQLite(filepath.Join(dir, "bot.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + if err := store.Migrate(ctx); err != nil { + t.Fatalf("migrate: %v", err) + } + + for _, m := range []storage.Member{ + {ID: "bot", DisplayName: "Bot", Active: true}, + {ID: "daniel", DisplayName: "Daniel", GitHubLogin: "danielfbm", Active: true}, + } { + if err := store.UpsertMember(ctx, m); err != nil { + t.Fatalf("upsert %s: %v", m.ID, err) + } + } + + created := time.Date(2026, 5, 1, 10, 0, 0, 0, time.UTC) + botReview := created.Add(1 * time.Hour) + humanReview := created.Add(4 * time.Hour) + + if err := store.UpsertPullRequests(ctx, []storage.PullRequest{ + // Human-authored PR, will get both a bot review and a human review. + { + ID: "alaudadevops/x#1", Source: "github", RepoID: "alaudadevops/x", Number: 1, + Title: "human PR", State: "merged", + AuthorID: "daniel", AuthorLogin: "danielfbm", + CreatedAt: created, MergedAt: ptrTime(humanReview.Add(2 * time.Hour)), + FirstReviewAt: ptrTime(botReview), // pre-W2: filled with the earliest review including bots + FetchedAt: created, + }, + // Bot-authored PR (renovate-style). Pre-W2 author_id is empty. + { + ID: "alaudadevops/x#2", Source: "github", RepoID: "alaudadevops/x", Number: 2, + Title: "renovate PR", State: "merged", + AuthorID: "", AuthorLogin: "alaudabot", + CreatedAt: created, MergedAt: ptrTime(created.Add(30 * time.Minute)), + FetchedAt: created, + }, + }); err != nil { + t.Fatalf("upsert prs: %v", err) + } + if err := store.UpsertPRReviews(ctx, []storage.PRReview{ + {ID: "alaudadevops/x#1/r1", PRID: "alaudadevops/x#1", Source: "github", + ReviewerID: "", ReviewerLogin: "alaudabot", State: "commented", SubmittedAt: botReview}, + {ID: "alaudadevops/x#1/r2", PRID: "alaudadevops/x#1", Source: "github", + ReviewerID: "daniel", ReviewerLogin: "danielfbm", State: "approved", SubmittedAt: humanReview}, + }); err != nil { + t.Fatalf("upsert reviews: %v", err) + } + + bots := NewBotSet([]string{"alaudabot"}) + res, err := ConsolidateBots(ctx, store, bots) + if err != nil { + t.Fatalf("consolidate: %v", err) + } + if res.ReviewsMarkedBot != 1 { + t.Fatalf("reviews_marked_bot = %d, want 1", res.ReviewsMarkedBot) + } + if res.ReviewsAuthorRew != 1 { + t.Fatalf("reviewer_id_rewritten = %d, want 1", res.ReviewsAuthorRew) + } + if res.PRsAuthorRewrite != 1 { + t.Fatalf("pr_author_id_rewritten = %d, want 1 (the renovate PR)", res.PRsAuthorRewrite) + } + + // Verify via SQL. + row := store.DB().QueryRow(`SELECT is_bot, reviewer_id FROM pr_reviews WHERE id = 'alaudadevops/x#1/r1'`) + var isBot int + var revID string + if err := row.Scan(&isBot, &revID); err != nil { + t.Fatalf("scan bot review: %v", err) + } + if isBot != 1 || revID != "bot" { + t.Fatalf("bot review row: is_bot=%d reviewer_id=%q, want 1 / bot", isBot, revID) + } + row = store.DB().QueryRow(`SELECT author_id FROM pull_requests WHERE id = 'alaudadevops/x#2'`) + var authorID string + if err := row.Scan(&authorID); err != nil { + t.Fatalf("scan bot PR: %v", err) + } + if authorID != "bot" { + t.Fatalf("bot-authored PR author_id=%q, want bot", authorID) + } + + // first_human_review_at should be the human's review time (not the + // earlier bot review). + row = store.DB().QueryRow(`SELECT first_human_review_at FROM pull_requests WHERE id = 'alaudadevops/x#1'`) + var firstHuman *time.Time + if err := row.Scan(&firstHuman); err != nil { + t.Fatalf("scan first_human_review_at: %v", err) + } + if firstHuman == nil || !firstHuman.Equal(humanReview) { + t.Fatalf("first_human_review_at = %v, want %v", firstHuman, humanReview) + } + + // Idempotency: re-running consolidates nothing further. + res2, err := ConsolidateBots(ctx, store, bots) + if err != nil { + t.Fatalf("consolidate again: %v", err) + } + if res2.ReviewsMarkedBot != 0 || res2.ReviewsAuthorRew != 0 || res2.PRsAuthorRewrite != 0 { + t.Fatalf("second run not idempotent: %+v", res2) + } +} + +func ptrTime(t time.Time) *time.Time { return &t } diff --git a/roadmap-planner/backend/internal/contributions/service_extras.go b/roadmap-planner/backend/internal/contributions/service_extras.go index 962a5625..573f8290 100644 --- a/roadmap-planner/backend/internal/contributions/service_extras.go +++ b/roadmap-planner/backend/internal/contributions/service_extras.go @@ -79,13 +79,18 @@ func (s *Service) NetworkDensity(ctx context.Context, q storage.MemberWeekQuery) // which is anchored on creation. Drafts are excluded because they // shouldn't count toward orphan rate; we approximate by skipping // PRs that closed without a merge AND without a review. - hours := hoursBetween(dialect, "first_review_at", "created_at") + // + // W2: latency uses `first_human_review_at` so bot reviews (renovate, + // catalog-bot, etc.) don't dominate the p50. Orphan rate stays + // `first_review_at IS NULL` — a PR with only a bot review is still + // reviewed for orphan purposes, but it isn't reviewed for latency. + hours := hoursBetween(dialect, "first_human_review_at", "created_at") prSQL := rebindSimple(dialect, fmt.Sprintf(` SELECT COUNT(*) AS total, SUM(CASE WHEN merged_at IS NOT NULL THEN 1 ELSE 0 END) AS merged, SUM(CASE WHEN first_review_at IS NULL THEN 1 ELSE 0 END) AS orphans, - SUM(CASE WHEN first_review_at IS NOT NULL AND %s <= 24.0 + SUM(CASE WHEN first_human_review_at IS NOT NULL AND %s <= 24.0 THEN 1 ELSE 0 END) AS under24h FROM pull_requests WHERE created_at >= ? AND created_at < ?`, hours)) @@ -111,7 +116,7 @@ func (s *Service) NetworkDensity(ctx context.Context, q storage.MemberWeekQuery) SELECT %s FROM pull_requests WHERE created_at >= ? AND created_at < ? - AND first_review_at IS NOT NULL`, hours)) + AND first_human_review_at IS NOT NULL`, hours)) rows, err := db.QueryContext(ctx, latSQL, q.From, q.To) if err != nil { return nil, fmt.Errorf("network latencies: %w", err) @@ -151,6 +156,7 @@ func (s *Service) NetworkDensity(ctx context.Context, q storage.MemberWeekQuery) JOIN members ma ON ma.id = COALESCE(NULLIF(pr.author_id, ''), '') JOIN members mr ON mr.id = COALESCE(NULLIF(rv.reviewer_id, ''), '') WHERE rv.submitted_at >= ? AND rv.submitted_at < ? + AND rv.is_bot = 0 AND ma.pillar_id IS NOT NULL AND ma.pillar_id <> '' AND mr.pillar_id IS NOT NULL AND mr.pillar_id <> ''`) var xTotal, xCross sql.NullInt64 diff --git a/roadmap-planner/backend/internal/github/sync.go b/roadmap-planner/backend/internal/github/sync.go index 53fb3e48..fd0ea1d2 100644 --- a/roadmap-planner/backend/internal/github/sync.go +++ b/roadmap-planner/backend/internal/github/sync.go @@ -84,6 +84,15 @@ type Syncer struct { IncludeArchived bool // default false (archived repos skipped) IncludeForks bool // default false (forks skipped) + // IsBotLogin is the W2 predicate that reclassifies a raw login as + // the synthetic `bot` member. When non-nil and the predicate returns + // true, PR `author_id` / review `reviewer_id` are set to `"bot"` + // (so the dashboard credits the volume to one row) and + // `pr_reviews.is_bot` is set to 1 (so NetworkDensity excludes the + // row from human review-latency stats). Nil predicate disables + // the feature. + IsBotLogin func(login string) bool + wildcardCache *wildcardCache nowFn func() time.Time // injectable for cache-TTL tests } @@ -202,6 +211,9 @@ func (s *Syncer) Sync(ctx context.Context) error { } authorLogin := strings.ToLower(pr.User.Login) authorID := byLogin[authorLogin] + if s.IsBotLogin != nil && s.IsBotLogin(authorLogin) { + authorID = "bot" + } epicKey := "" if s.linker != nil { epicKey = s.linker.Link(pr) @@ -247,24 +259,34 @@ func (s *Syncer) Sync(ctx context.Context) error { s.logger.Warn("ListReviews failed", zap.String("repo", full), zap.Int("pr", pr.Number), zap.Error(err)) } else { - var first *time.Time + var first, firstHuman *time.Time for _, rev := range reviews { st := strings.ToLower(rev.State) reviewerLogin := strings.ToLower(rev.User.Login) + reviewerID := byLogin[reviewerLogin] + isBot := s.IsBotLogin != nil && s.IsBotLogin(reviewerLogin) + if isBot { + reviewerID = "bot" + } reviewBatch = append(reviewBatch, storage.PRReview{ ID: fmt.Sprintf("%s#%d/r%d", full, pr.Number, rev.ID), PRID: rec.ID, Source: "github", - ReviewerID: byLogin[reviewerLogin], + ReviewerID: reviewerID, ReviewerLogin: reviewerLogin, State: st, SubmittedAt: rev.SubmittedAt, + IsBot: isBot, }) if first == nil || rev.SubmittedAt.Before(*first) { first = &rev.SubmittedAt } + if !isBot && (firstHuman == nil || rev.SubmittedAt.Before(*firstHuman)) { + firstHuman = &rev.SubmittedAt + } } rec.FirstReviewAt = first + rec.FirstHumanReviewAt = firstHuman } } toStore = append(toStore, rec) diff --git a/roadmap-planner/backend/internal/gitlab/sync.go b/roadmap-planner/backend/internal/gitlab/sync.go index 070b4de7..c0280584 100644 --- a/roadmap-planner/backend/internal/gitlab/sync.go +++ b/roadmap-planner/backend/internal/gitlab/sync.go @@ -111,6 +111,11 @@ type Syncer struct { // disables Pass B entirely. AllowedMemberIDs map[string]struct{} + // IsBotLogin is the W2 predicate that reclassifies a raw GitLab + // username as the synthetic `bot` member. Mirrors the github + // syncer's field; see that doc for behavior. Nil disables. + IsBotLogin func(login string) bool + wildcardCache *wildcardCache nowFn func() time.Time } @@ -259,6 +264,9 @@ func (s *Syncer) Sync(ctx context.Context) error { } authorLogin := strings.ToLower(mr.Author.Username) authorID := byUsername[authorLogin] + if s.IsBotLogin != nil && s.IsBotLogin(authorLogin) { + authorID = "bot" + } epicKey := "" if s.linker != nil { epicKey = s.linker.Link(mr) @@ -305,7 +313,7 @@ func (s *Syncer) Sync(ctx context.Context) error { zap.String("project", p.PathWithNamespace), zap.Int("iid", mr.IID), zap.Error(err)) } else { - var first *time.Time + var first, firstHuman *time.Time for _, n := range notes { if n.System { continue @@ -318,20 +326,30 @@ func (s *Syncer) Sync(ctx context.Context) error { if !ok { continue } + reviewerID := byUsername[reviewerLogin] + isBot := s.IsBotLogin != nil && s.IsBotLogin(reviewerLogin) + if isBot { + reviewerID = "bot" + } reviewBatch = append(reviewBatch, storage.PRReview{ ID: fmt.Sprintf("%s!%d/n%d", p.PathWithNamespace, mr.IID, n.ID), PRID: rec.ID, Source: "gitlab", - ReviewerID: byUsername[reviewerLogin], + ReviewerID: reviewerID, ReviewerLogin: reviewerLogin, State: st, SubmittedAt: n.CreatedAt, + IsBot: isBot, }) if first == nil || n.CreatedAt.Before(*first) { first = &n.CreatedAt } + if !isBot && (firstHuman == nil || n.CreatedAt.Before(*firstHuman)) { + firstHuman = &n.CreatedAt + } } rec.FirstReviewAt = first + rec.FirstHumanReviewAt = firstHuman } } toStore = append(toStore, rec) diff --git a/roadmap-planner/backend/internal/storage/generic.go b/roadmap-planner/backend/internal/storage/generic.go index 8fe6a7e5..b0261acc 100644 --- a/roadmap-planner/backend/internal/storage/generic.go +++ b/roadmap-planner/backend/internal/storage/generic.go @@ -124,8 +124,9 @@ func (s *genericStore) UpsertPullRequests(ctx context.Context, prs []PullRequest INSERT INTO pull_requests ( id, source, repo_id, number, title, state, author_id, author_login, head_branch, base_branch, additions, deletions, changed_files, - jira_key, created_at, first_review_at, merged_at, closed_at, fetched_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + jira_key, created_at, first_review_at, first_human_review_at, + merged_at, closed_at, fetched_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(id) DO UPDATE SET source = excluded.source, title = excluded.title, @@ -137,6 +138,7 @@ func (s *genericStore) UpsertPullRequests(ctx context.Context, prs []PullRequest changed_files = excluded.changed_files, jira_key = excluded.jira_key, first_review_at = excluded.first_review_at, + first_human_review_at = excluded.first_human_review_at, merged_at = excluded.merged_at, closed_at = excluded.closed_at, fetched_at = excluded.fetched_at`) @@ -153,7 +155,7 @@ func (s *genericStore) UpsertPullRequests(ctx context.Context, prs []PullRequest _, err := stmt.ExecContext(ctx, p.ID, source, p.RepoID, p.Number, p.Title, p.State, nullable(p.AuthorID), nullable(p.AuthorLogin), nullable(p.HeadBranch), nullable(p.BaseBranch), p.Additions, p.Deletions, p.ChangedFiles, - nullable(p.JiraKey), p.CreatedAt, p.FirstReviewAt, p.MergedAt, p.ClosedAt, p.FetchedAt, + nullable(p.JiraKey), p.CreatedAt, p.FirstReviewAt, p.FirstHumanReviewAt, p.MergedAt, p.ClosedAt, p.FetchedAt, ) if err != nil { return fmt.Errorf("upsert pr %s: %w", p.ID, err) @@ -172,14 +174,15 @@ func (s *genericStore) UpsertPRReviews(ctx context.Context, reviews []PRReview) } defer func() { _ = tx.Rollback() }() q := rebind(s.d, ` - INSERT INTO pr_reviews (id, pr_id, source, reviewer_id, reviewer_login, state, submitted_at) - VALUES (?, ?, ?, ?, ?, ?, ?) + INSERT INTO pr_reviews (id, pr_id, source, reviewer_id, reviewer_login, state, submitted_at, is_bot) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) ON CONFLICT(id) DO UPDATE SET source = excluded.source, reviewer_id = excluded.reviewer_id, reviewer_login = excluded.reviewer_login, state = excluded.state, - submitted_at = excluded.submitted_at`) + submitted_at = excluded.submitted_at, + is_bot = excluded.is_bot`) stmt, err := tx.PrepareContext(ctx, q) if err != nil { return err @@ -190,8 +193,12 @@ func (s *genericStore) UpsertPRReviews(ctx context.Context, reviews []PRReview) if source == "" { source = "github" } + isBot := 0 + if r.IsBot { + isBot = 1 + } _, err := stmt.ExecContext(ctx, r.ID, r.PRID, source, nullable(r.ReviewerID), - nullable(r.ReviewerLogin), r.State, r.SubmittedAt) + nullable(r.ReviewerLogin), r.State, r.SubmittedAt, isBot) if err != nil { return fmt.Errorf("upsert review %s: %w", r.ID, err) } diff --git a/roadmap-planner/backend/internal/storage/migrations/0005_first_human_review.sql b/roadmap-planner/backend/internal/storage/migrations/0005_first_human_review.sql new file mode 100644 index 00000000..195c5d71 --- /dev/null +++ b/roadmap-planner/backend/internal/storage/migrations/0005_first_human_review.sql @@ -0,0 +1,26 @@ +-- ---------------------------------------------------------------------- +-- 0005_first_human_review — W2 (bot consolidation, 2026-05-19). +-- +-- Adds two columns that let the team-analytics layer separate bot noise +-- from human review activity without dropping the bot rows themselves +-- (we still want "renovate merged 80 PRs this week" visible under the +-- synthetic `bot` member). The contributions/bot.go startup pass +-- backfills both columns from existing data; no separate manual SQL +-- step is required. +-- +-- pr_reviews.is_bot +-- 1 when the reviewer_login matches the configured bot allowlist +-- (team_analytics.bot_logins). NetworkDensity adds `AND is_bot = 0` +-- to its first-review-latency query so the metric reflects the +-- human review experience. +-- +-- pull_requests.first_human_review_at +-- MIN(submitted_at) over non-bot reviews on the PR. Lets the +-- dashboards query human review latency with one column instead +-- of a window-function over pr_reviews on every panel load. +-- Populated on first sync after this migration applies; refreshed +-- on every subsequent upsert through UpsertPullRequests. +-- ---------------------------------------------------------------------- + +ALTER TABLE pr_reviews ADD COLUMN is_bot INTEGER NOT NULL DEFAULT 0; +ALTER TABLE pull_requests ADD COLUMN first_human_review_at TIMESTAMP; diff --git a/roadmap-planner/backend/internal/storage/store.go b/roadmap-planner/backend/internal/storage/store.go index 50cbd0d6..a4bce759 100644 --- a/roadmap-planner/backend/internal/storage/store.go +++ b/roadmap-planner/backend/internal/storage/store.go @@ -110,25 +110,26 @@ type IssueSnapshot struct { // share the same table without losing provenance. Defaults to "github" // for rows ingested before the 0003 migration. type PullRequest struct { - ID string // "owner/name#number" (github) or "group/sub/proj!iid" (gitlab) - Source string // "github" | "gitlab" - RepoID string - Number int - Title string - State string // "open" | "merged" | "closed" - AuthorID string // FK to members.id, empty if no match - AuthorLogin string // raw login from the source API, lower-cased - HeadBranch string - BaseBranch string - Additions int - Deletions int - ChangedFiles int - JiraKey string // any Jira key matched by the linker; was misnamed `EpicKey` pre-W5 - CreatedAt time.Time - FirstReviewAt *time.Time - MergedAt *time.Time - ClosedAt *time.Time - FetchedAt time.Time + ID string // "owner/name#number" (github) or "group/sub/proj!iid" (gitlab) + Source string // "github" | "gitlab" + RepoID string + Number int + Title string + State string // "open" | "merged" | "closed" + AuthorID string // FK to members.id, empty if no match + AuthorLogin string // raw login from the source API, lower-cased + HeadBranch string + BaseBranch string + Additions int + Deletions int + ChangedFiles int + JiraKey string // any Jira key matched by the linker; was misnamed `EpicKey` pre-W5 + CreatedAt time.Time + FirstReviewAt *time.Time + FirstHumanReviewAt *time.Time // W2: MIN(submitted_at) over non-bot reviews + MergedAt *time.Time + ClosedAt *time.Time + FetchedAt time.Time } // PRReview is one review event on a PR or MR. @@ -148,6 +149,7 @@ type PRReview struct { ReviewerLogin string // raw login from the source API, lower-cased State string // approved | changes_requested | commented SubmittedAt time.Time + IsBot bool // W2: reviewer_login matched the configured bot allowlist } // MemberIdentity is the operator-editable identity payload for the PATCH diff --git a/roadmap-planner/docs/team-analytics/CHANGES.md b/roadmap-planner/docs/team-analytics/CHANGES.md index 359276d1..a39d40f0 100644 --- a/roadmap-planner/docs/team-analytics/CHANGES.md +++ b/roadmap-planner/docs/team-analytics/CHANGES.md @@ -8,39 +8,34 @@ should consult this when upgrading. Newest entries first. +## W2 — Bot consolidation under one synthetic `bot` member + +**What changed** + +- Migration `0005_first_human_review.sql` adds + `pr_reviews.is_bot` and `pull_requests.first_human_review_at`. +- New `team_analytics.bot_logins []string` config knob — explicit + list, case-folded, no suffix magic. +- Github + GitLab syncers reassign bot author/reviewer to the `"bot"` + member id, tag `is_bot`, and compute `first_human_review_at` live. +- `contributions.ConsolidateBots` runs once on every startup as a + retroactive backfill (idempotent). +- `NetworkDensity` reads `first_human_review_at` and joins with + `AND rv.is_bot = 0`. + +**Audit cross-reference:** `docs/team-analytics/audit-2026-05-19/REPORT.md` +finding B4 (56% of review rows are bots). + ## W1 — Member allowlist + GitLab `with_shared=false` + Pass B instance sweep **What changed** - `gitlab/client.go::ListGroupProjects` sends `with_shared=false`. - `contributions.BuildAllowlist(cfg)` derives the W1 allowlist from - the prefill maps minus `team_analytics.member_denylist`. The - synthetic `bot` member id is always included. -- Aggregator INSERTs carry `AND COALESCE(m.id, pr.author_id) IN (?, …)` - when the allowlist is enabled. Post-rebuild cleanup deletes rollup - rows for members no longer in the set. -- `GET /api/contributions/members` and `/team` drop members outside - the allowlist. -- New GitLab Pass B (`gitlab.member_instance_sweep: true`) sweeps - `/api/v4/merge_requests?scope=all&author_username=` per allowlisted - member. - -**Config additions** (all default off): - -```yaml -gitlab: - member_instance_sweep: false -team_analytics: - member_denylist: [] -``` - -**Activation rules** - -| prefills | denylist | allowlist | -|---|---|---| -| empty | empty | disabled (pre-W1) | -| populated | any | enabled, denylist subtracts | -| empty | populated | disabled (refuse to collapse to `{bot}`) | + the prefill maps minus `team_analytics.member_denylist`. +- Aggregator filters on the allowlist when enabled; cleanup deletes + rollup rows for members no longer in the set. +- New GitLab Pass B per-allowlisted-member instance-wide MR sweep. **Audit cross-reference:** `docs/team-analytics/audit-2026-05-19/REPORT.md` findings B1, B10, B11, B6. @@ -49,12 +44,9 @@ findings B1, B10, B11, B6. **What changed** -- `SprintStats` JSON gains `todo`, `in_progress`, `cancelled`. `wip` - stays for one release as derived alias. -- New `team_analytics.statuses` config block with W4 default mapping - pulled live from DEVOPS Jira. -- `contributions/status_lanes.go::StatusClassifier` does the lookup. -- `sprintCounts` swaps `resolved_at IS NULL` for the classifier. +- `SprintStats` JSON gains `todo`, `in_progress`, `cancelled`. +- New `team_analytics.statuses` config block. +- `contributions/status_lanes.go::StatusClassifier`. - Member-profile sprint card renders 6 KPI tiles. **Audit cross-reference:** `docs/team-analytics/audit-2026-05-19/REPORT.md` @@ -69,12 +61,10 @@ finding B7. - Migration `0008_quarter_assignments.sql`. - New `contributions.QuarterResolver` (period.go). - `MemberSummary.PeriodTotals` populated on `?period=release_quarter`. -- Window expands to 364 days when `?period=release_quarter`. -**Deferred to follow-up PR** +**Deferred** -- Jira sync pass that walks Milestone summaries to populate - `quarter_assignments` from the `^(\d{4}Q[1-4])[::]` prefix. +- Jira sync Milestone-prefix pass. - Frontend `?period=` tab. **Audit cross-reference:** `docs/team-analytics/audit-2026-05-19/REPORT.md` @@ -82,21 +72,10 @@ finding B13. ## W5 — `pull_requests.epic_key` → `jira_key` rename -**What changed** - -- Migration `0006_rename_epic_key_to_jira_key.sql`. -- Go struct field `PullRequest.EpicKey` → `PullRequest.JiraKey`, no - alias. - **Audit cross-reference:** `docs/team-analytics/audit-2026-05-19/REPORT.md` finding B8. ## W6 — GitLab parity: `hydrate_diff` default flips to `true` -**What changed** - -- `gitlab.NewSyncer` constructs with `HydrateDiff: true` (was `false`). -- viper default for `gitlab.hydrate_diff` is now `true`. - **Audit cross-reference:** `docs/team-analytics/audit-2026-05-19/REPORT.md` finding B12.