diff --git a/roadmap-planner/backend/cmd/server/main.go b/roadmap-planner/backend/cmd/server/main.go index 511e71f..9aae4c1 100644 --- a/roadmap-planner/backend/cmd/server/main.go +++ b/roadmap-planner/backend/cmd/server/main.go @@ -210,9 +210,13 @@ func initTeamAnalytics(ctx context.Context, router *gin.Engine, cfg *config.Conf service.SetPillarMap(pillarMap) service.SetStatusClassifier(contributions.NewStatusClassifier(cfg.TeamAnalytics.Statuses)) aggregator := contributions.NewAggregator(store) - api.AddContributionsRoutes(router, store, service, aggregator) + allowlist := contributions.BuildAllowlist(cfg.TeamAnalytics) + aggregator.SetAllowlist(allowlist) + api.AddContributionsRoutes(router, store, service, aggregator, allowlist) logger.Info("Contributions API routes added", - zap.Int("pillars_configured", len(pillarMap.Order()))) + zap.Int("pillars_configured", len(pillarMap.Order())), + zap.Bool("allowlist_enabled", allowlist.Enabled()), + zap.Int("allowlist_size", allowlist.Size())) // Optional Jira sync goroutine. // @@ -259,7 +263,7 @@ func initTeamAnalytics(ctx context.Context, router *gin.Engine, cfg *config.Conf // Optional GitLab sync goroutine. if cfg.GitLab.Enabled { - startGitLabSync(ctx, cfg, store, aggregator) + startGitLabSync(ctx, cfg, store, aggregator, allowlist) } go func() { @@ -344,7 +348,7 @@ func buildGitHubClient(cfg *config.GitHub) (*ghclient.Client, string, error) { // // Same fail-soft contract as startGitHubSync — bad config logs and // continues without a GitLab sync rather than refusing to start. -func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator) { +func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator, allowlist contributions.Allowlist) { specs := parseGroupSpecs(cfg.GitLab.Groups) if len(specs) == 0 { logger.Warn("gitlab.enabled but gitlab.groups is empty; skipping sync") @@ -372,6 +376,15 @@ func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Stor syncer := glclient.NewSyncer(client, store, specs, glclient.DefaultLinker(projectKey), backfill) syncer.HydrateDiff = cfg.GitLab.HydrateDiff syncer.IncludeArchived = cfg.GitLab.IncludeArchived + syncer.MemberInstanceSweep = cfg.GitLab.MemberInstanceSweep + if allowlist.Enabled() { + ids := allowlist.IDs() + allowed := make(map[string]struct{}, len(ids)) + for _, id := range ids { + allowed[id] = struct{}{} + } + syncer.AllowedMemberIDs = allowed + } interval, err := time.ParseDuration(cfg.GitLab.SyncInterval) if err != nil { @@ -381,7 +394,9 @@ func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Stor logger.Info("GitLab sync started", zap.Duration("interval", interval), zap.Int("specs", len(specs)), - zap.Int("backfill_days", backfill)) + zap.Int("backfill_days", backfill), + zap.Bool("pass_b_sweep", syncer.MemberInstanceSweep && len(syncer.AllowedMemberIDs) > 0), + zap.Int("allowlisted_members", len(syncer.AllowedMemberIDs))) } func parseGroupSpecs(specs []string) []glclient.GroupSpec { diff --git a/roadmap-planner/backend/config.example.yaml b/roadmap-planner/backend/config.example.yaml index 28756b9..431c9dd 100644 --- a/roadmap-planner/backend/config.example.yaml +++ b/roadmap-planner/backend/config.example.yaml @@ -123,4 +123,46 @@ github: installation_id: 0 # numeric, from the org's "installed apps" page # Provide ONE of: private_key_path: "" # path to the PEM file (mount via Secret) - private_key_pem: "" # raw PEM contents (use env GITHUB_APP_PRIVATE_KEY_PEM in K8s) \ No newline at end of file + private_key_pem: "" # raw PEM contents (use env GITHUB_APP_PRIVATE_KEY_PEM in K8s) + +# GitLab ingestion — mirrors `github` for self-hosted GitLab installs. +# Requires storage.enabled = true. +gitlab: + enabled: false + base_url: "" # e.g. "https://gitlab-ce.example.com" + token: "" # personal access token, read_api scope (or GITLAB_TOKEN env) + sync_interval: "30m" + project_key: "" # for the default Linker; defaults to jira.project + backfill_days: 0 # 0 = inherit storage.backfill_days + groups: + - "devops/**" # "**" = whole subtree; "group/*" = direct children only + hydrate_diff: false # fetch additions/deletions/changed_files per merged MR + include_archived: false # include archived projects in the sweep + # W1 Pass B (2026-05-19). When true, every cycle also sweeps + # `/merge_requests?scope=all&author_username=` for every member in + # the derived allowlist (`team_analytics.{github,gitlab}_username_prefills` + # minus `team_analytics.member_denylist`). Captures MRs your team + # filed in projects outside `groups`. No-op when no prefills are + # configured — adopt gradually. + member_instance_sweep: false + +# Team analytics — pillar attribution + identity prefills + W1 filters. +# All keys here are optional and additive; the dashboard works without +# them (it just shows the un-curated raw data). +team_analytics: + # Identity prefills — one-shot github_login / gitlab_username + # assignment to Jira members on the first sync. Manual UI edits always + # win on subsequent syncs. + github_login_prefills: + # daniel: danielfbm + # jtcheng: chengjingtao + gitlab_username_prefills: + # daniel: daniel + # jtcheng: chengjingtao + # W1 allowlist denylist. The effective allowlist is the union of the + # two prefill maps above minus the entries here. Use this to suppress + # ex-team-members or contractors who shouldn't count. + member_denylist: [] + # - gxjiao + # - lmhe + # pillars: { … } # see docs/team-analytics/PROPOSAL.md \ No newline at end of file diff --git a/roadmap-planner/backend/internal/api/handlers/contributions.go b/roadmap-planner/backend/internal/api/handlers/contributions.go index a7286f8..d3e0ed4 100644 --- a/roadmap-planner/backend/internal/api/handlers/contributions.go +++ b/roadmap-planner/backend/internal/api/handlers/contributions.go @@ -26,20 +26,33 @@ import ( // contributions.Service, and returns JSON. All logic lives one layer // down. This makes it easy to keep the API stable while we evolve the // rollup pipeline (B2/B3). +// +// allowlist is the W1 "who counts" filter; when disabled (Enabled() +// returns false) every endpoint reverts to the pre-W1 behaviour and +// shows whoever is in the underlying tables. type ContributionsHandler struct { store storage.Store service *contributions.Service aggregator *contributions.Aggregator + allowlist contributions.Allowlist } func NewContributionsHandler(store storage.Store, service *contributions.Service, aggregator *contributions.Aggregator) *ContributionsHandler { return &ContributionsHandler{store: store, service: service, aggregator: aggregator} } +// SetAllowlist installs the W1 allowlist filter. Safe to call at +// startup; not safe to swap at runtime (no synchronisation on reads). +func (h *ContributionsHandler) SetAllowlist(al contributions.Allowlist) { + h.allowlist = al +} + // ListMembers — GET /api/contributions/members?include_inactive=1 // // Inactive Jira users (deactivated accounts) are hidden by default — the -// team-overview UI only shows people who can still receive work. +// team-overview UI only shows people who can still receive work. When +// the W1 allowlist is configured, members outside it are filtered out +// regardless of their Active flag. func (h *ContributionsHandler) ListMembers(c *gin.Context) { members, err := h.store.ListMembers(c.Request.Context()) if err != nil { @@ -49,6 +62,7 @@ func (h *ContributionsHandler) ListMembers(c *gin.Context) { if !truthy(c.Query("include_inactive")) { members = filterActive(members) } + members = filterAllowlist(members, h.allowlist) c.JSON(http.StatusOK, gin.H{"members": members}) } @@ -96,6 +110,19 @@ func (h *ContributionsHandler) TeamOverview(c *gin.Context) { out = filtered } } + // W1 allowlist: when enabled, drop summaries for members outside + // the configured set. The aggregator already filters them out of + // the rollup table, but a manual rebuild that uses a different + // allowlist may have left stragglers — defence in depth. + if h.allowlist.Enabled() { + filtered := out[:0] + for _, ms := range out { + if h.allowlist.Contains(ms.MemberID) { + filtered = append(filtered, ms) + } + } + out = filtered + } c.JSON(http.StatusOK, gin.H{"members": out, "from": q.From, "to": q.To}) } @@ -403,3 +430,18 @@ func filterActive(in []storage.Member) []storage.Member { } return out } + +// filterAllowlist returns only members whose ID is in the allowlist. +// No-op when the allowlist is disabled. +func filterAllowlist(in []storage.Member, al contributions.Allowlist) []storage.Member { + if !al.Enabled() { + return in + } + out := in[:0] + for _, m := range in { + if al.Contains(m.ID) { + out = append(out, m) + } + } + return out +} diff --git a/roadmap-planner/backend/internal/api/routes.go b/roadmap-planner/backend/internal/api/routes.go index fcfb62b..39ed091 100644 --- a/roadmap-planner/backend/internal/api/routes.go +++ b/roadmap-planner/backend/internal/api/routes.go @@ -147,11 +147,12 @@ func NewRouter(cfg *config.Config) *gin.Engine { // GET /api/contributions/network — review-network density panel // GET /api/contributions/pillars — pillar throughput stack // GET /api/contributions/status — last-sync timestamps -func AddContributionsRoutes(router *gin.Engine, store storage.Store, service *contributions.Service, aggregator *contributions.Aggregator) { +func AddContributionsRoutes(router *gin.Engine, store storage.Store, service *contributions.Service, aggregator *contributions.Aggregator, allowlist contributions.Allowlist) { if store == nil || service == nil { return } h := handlers.NewContributionsHandler(store, service, aggregator) + h.SetAllowlist(allowlist) api := router.Group("/api") g := api.Group("/contributions") g.Use(middleware.AuthMiddleware()) diff --git a/roadmap-planner/backend/internal/config/config.go b/roadmap-planner/backend/internal/config/config.go index 1dcf3f5..f5144a5 100644 --- a/roadmap-planner/backend/internal/config/config.go +++ b/roadmap-planner/backend/internal/config/config.go @@ -88,6 +88,17 @@ type TeamAnalytics struct { // daniel: daniel // jtcheng: chengjingtao GitLabUsernamePrefills map[string]string `mapstructure:"gitlab_username_prefills" yaml:"gitlab_username_prefills"` + // MemberDenylist is the operator-curated list of Jira member ids + // (slugified email — the same key shape used in the prefill maps) + // that should be excluded from the team-analytics rollups and from + // every contributions API response, regardless of whether they have + // an entry in the prefill maps. + // + // This is the W1 escape hatch for people who appear in Jira (so the + // auto-discovery picks them up) but should not count toward the + // dashboard — bots that aren't on the bot allowlist, people who + // left the team, contractors loaned out to another pillar, etc. + MemberDenylist []string `mapstructure:"member_denylist" yaml:"member_denylist"` // Statuses is the W4 (2026-05-19) configurable status → lane map // used by the sprint card. Status names match Jira's `status.name` // case-folded; an unknown status falls back to `in_progress` and @@ -207,6 +218,12 @@ type GitLab struct { BackfillDays int `mapstructure:"backfill_days"` HydrateDiff bool `mapstructure:"hydrate_diff"` IncludeArchived bool `mapstructure:"include_archived"` + // MemberInstanceSweep turns on the W1 Pass B: after the group-scoped + // fetch finishes, sweep `/merge_requests?scope=all&author_username=` + // for every allowlisted member whose GitLab username we know. Off + // by default so existing installs upgrade with no behaviour change; + // flip on once `team_analytics.gitlab_username_prefills` is curated. + MemberInstanceSweep bool `mapstructure:"member_instance_sweep"` } // Logger represents logger configuration settings @@ -418,6 +435,7 @@ func Load() (*Config, error) { viper.SetDefault("gitlab.backfill_days", 0) viper.SetDefault("gitlab.hydrate_diff", true) viper.SetDefault("gitlab.include_archived", false) + viper.SetDefault("gitlab.member_instance_sweep", false) // GitHub defaults viper.SetDefault("github.enabled", false) @@ -490,6 +508,18 @@ func Load() (*Config, error) { TeamAnalytics TeamAnalytics `yaml:"team_analytics"` } if uerr := yaml.Unmarshal(raw, &preserve); uerr == nil && len(preserve.TeamAnalytics.Pillars) > 0 { + // Preserve the YAML-side pillar names (case-sensitive), + // but merge back the viper-loaded denylist + prefills + // so they aren't lost when the YAML file omits them. + if len(preserve.TeamAnalytics.MemberDenylist) == 0 { + preserve.TeamAnalytics.MemberDenylist = config.TeamAnalytics.MemberDenylist + } + if len(preserve.TeamAnalytics.GitHubLoginPrefills) == 0 { + preserve.TeamAnalytics.GitHubLoginPrefills = config.TeamAnalytics.GitHubLoginPrefills + } + if len(preserve.TeamAnalytics.GitLabUsernamePrefills) == 0 { + preserve.TeamAnalytics.GitLabUsernamePrefills = config.TeamAnalytics.GitLabUsernamePrefills + } config.TeamAnalytics = preserve.TeamAnalytics } } diff --git a/roadmap-planner/backend/internal/contributions/aggregator.go b/roadmap-planner/backend/internal/contributions/aggregator.go index d305ba7..48d6bbe 100644 --- a/roadmap-planner/backend/internal/contributions/aggregator.go +++ b/roadmap-planner/backend/internal/contributions/aggregator.go @@ -10,6 +10,7 @@ package contributions import ( "context" "fmt" + "strings" "time" "github.com/AlaudaDevops/toolbox/roadmap-planner/backend/internal/logger" @@ -28,9 +29,14 @@ import ( // The intended cadence is: every successful Collect() in the Jira side // triggers a Rebuild for the affected window. Manual rebuild is also // exposed via the API for operators. +// +// SetAllowlist (W1) gates which members get rolled up; when disabled, +// Rebuild reverts to the pre-W1 "everyone with a Jira id" behaviour so +// the upgrade is opt-in. type Aggregator struct { - store storage.Store - logger *zap.Logger + store storage.Store + logger *zap.Logger + allowlist Allowlist } func NewAggregator(store storage.Store) *Aggregator { @@ -40,6 +46,11 @@ func NewAggregator(store storage.Store) *Aggregator { } } +// SetAllowlist installs the W1 "who counts" filter. Pass a zero-value +// Allowlist (Enabled() == false) to disable the filter and preserve the +// pre-W1 rollup shape — that's the safe rollback knob. +func (a *Aggregator) SetAllowlist(al Allowlist) { a.allowlist = al } + // Rebuild deletes existing rollup rows in [from, to) and recomputes them // from raw tables. Cheap because the rollup table is small; we use it as // our cache-coherency strategy rather than incremental updates. @@ -88,6 +99,15 @@ func (a *Aggregator) Rebuild(ctx context.Context, from, to time.Time) error { return fmt.Errorf("clear window: %w", err) } + // W1 allowlist: when enabled, the four INSERTs below all carry an + // `AND IN (?, …)` clause. The fragment is composed + // per-INSERT (the member expression differs: COALESCE(m.id, + // pr.author_id) for the PR passes, COALESCE(m.id, rv.reviewer_id) + // for the review pass, assignee_id for the Jira pass). The bound + // arguments are the same for every INSERT and get appended to each + // param slice. + allowArgs := a.allowArgs() + // PRs/MRs merged, by author × week. Pillar/component empty for now — // requires the repos table to be populated, which B2 fixes. // @@ -119,15 +139,17 @@ func (a *Aggregator) Rebuild(ctx context.Context, from, to time.Time) error { AND m.gitlab_username IS NOT NULL AND m.gitlab_username <> '' AND LOWER(m.gitlab_username) = pr.author_login) - WHERE COALESCE(m.id, pr.author_id) IS NOT NULL + WHERE COALESCE(m.id, pr.author_id) IS NOT NULL%s AND pr.merged_at IS NOT NULL AND pr.merged_at >= ? AND pr.merged_at < ? GROUP BY 1, 2 ON CONFLICT(member_id, week_start, pillar_id, component) DO UPDATE SET - prs_merged = excluded.prs_merged`, dialect.WeekStart("merged_at"))) + prs_merged = excluded.prs_merged`, dialect.WeekStart("merged_at"), a.allowFragment("COALESCE(m.id, pr.author_id)"))) - if _, err := db.ExecContext(ctx, prSQL, from, to); err != nil { + prArgs := append([]any{}, allowArgs...) + prArgs = append(prArgs, from, to) + if _, err := db.ExecContext(ctx, prSQL, prArgs...); err != nil { return fmt.Errorf("aggregate PRs: %w", err) } @@ -152,14 +174,16 @@ func (a *Aggregator) Rebuild(ctx context.Context, from, to time.Time) error { AND m.gitlab_username IS NOT NULL AND m.gitlab_username <> '' AND LOWER(m.gitlab_username) = pr.author_login) - WHERE COALESCE(m.id, pr.author_id) IS NOT NULL + WHERE COALESCE(m.id, pr.author_id) IS NOT NULL%s AND pr.created_at >= ? AND pr.created_at < ? GROUP BY 1, 2 ON CONFLICT(member_id, week_start, pillar_id, component) DO UPDATE SET - prs_opened = excluded.prs_opened`, dialect.WeekStart("pr.created_at"))) + prs_opened = excluded.prs_opened`, dialect.WeekStart("pr.created_at"), a.allowFragment("COALESCE(m.id, pr.author_id)"))) - if _, err := db.ExecContext(ctx, openedSQL, from, to); err != nil { + openedArgs := append([]any{}, allowArgs...) + openedArgs = append(openedArgs, from, to) + if _, err := db.ExecContext(ctx, openedSQL, openedArgs...); err != nil { return fmt.Errorf("aggregate opened PRs: %w", err) } @@ -185,14 +209,16 @@ func (a *Aggregator) Rebuild(ctx context.Context, from, to time.Time) error { AND m.gitlab_username IS NOT NULL AND m.gitlab_username <> '' AND LOWER(m.gitlab_username) = rv.reviewer_login) - WHERE COALESCE(m.id, rv.reviewer_id) IS NOT NULL + WHERE COALESCE(m.id, rv.reviewer_id) IS NOT NULL%s AND rv.submitted_at >= ? AND rv.submitted_at < ? GROUP BY 1, 2 ON CONFLICT(member_id, week_start, pillar_id, component) DO UPDATE SET - prs_reviewed = excluded.prs_reviewed`, dialect.WeekStart("submitted_at"))) + prs_reviewed = excluded.prs_reviewed`, dialect.WeekStart("submitted_at"), a.allowFragment("COALESCE(m.id, rv.reviewer_id)"))) - if _, err := db.ExecContext(ctx, reviewSQL, from, to); err != nil { + rvArgs := append([]any{}, allowArgs...) + rvArgs = append(rvArgs, from, to) + if _, err := db.ExecContext(ctx, reviewSQL, rvArgs...); err != nil { return fmt.Errorf("aggregate reviews: %w", err) } @@ -233,13 +259,15 @@ func (a *Aggregator) Rebuild(ctx context.Context, from, to time.Time) error { AND s.resolved_at < ? ) ranked WHERE rn = 1 - AND assignee_id IS NOT NULL + AND assignee_id IS NOT NULL%s GROUP BY 1, 2 ON CONFLICT(member_id, week_start, pillar_id, component) DO UPDATE SET jira_issues_done = excluded.jira_issues_done, - jira_points_done = excluded.jira_points_done`, dialect.WeekStart("resolved_at"))) + jira_points_done = excluded.jira_points_done`, dialect.WeekStart("resolved_at"), a.allowFragment("assignee_id"))) - if _, err := db.ExecContext(ctx, jiraSQL, from, to); err != nil { + jiraArgs := []any{from, to} + jiraArgs = append(jiraArgs, allowArgs...) + if _, err := db.ExecContext(ctx, jiraSQL, jiraArgs...); err != nil { return fmt.Errorf("aggregate jira: %w", err) } @@ -247,9 +275,74 @@ func (a *Aggregator) Rebuild(ctx context.Context, from, to time.Time) error { // pull_requests.first_review_at - pull_requests.created_at, p50 by // (reviewer_id, week). p50 across reviews per author per week. + // W1 cleanup: every rebuild leaves the four INSERTs above blocked + // on the allowlist, but historical rollup rows for now-denied + // members linger outside the rebuild window. A simple sweep keeps + // the table consistent with the configured set on every rebuild. + // No-op when the allowlist is disabled (rebinds away to a no-arg + // query that matches nothing). + if a.allowlist.Enabled() { + ids := a.allowlist.IDs() + placeholders := make([]string, len(ids)) + args := make([]any, len(ids)) + for i, id := range ids { + placeholders[i] = "?" + args[i] = id + } + cleanup := rebind(fmt.Sprintf( + `DELETE FROM member_week_metrics WHERE member_id NOT IN (%s)`, + strings.Join(placeholders, ", "), + )) + res, err := db.ExecContext(ctx, cleanup, args...) + if err != nil { + return fmt.Errorf("cleanup non-allowlisted rollups: %w", err) + } + var removed int64 + if res != nil { + removed, _ = res.RowsAffected() + } + a.logger.Info("allowlist cleanup", + zap.Int("allowlist_size", len(ids)), + zap.Int64("rows_removed", removed)) + } + return nil } +// allowArgs returns the parameter slice for the IN clause emitted by +// allowFragment. It's separated so callers can append it onto each +// INSERT's existing param slice (the order of bound args has to match +// the order of '?' placeholders in the final, rebound SQL). +func (a *Aggregator) allowArgs() []any { + if !a.allowlist.Enabled() { + return nil + } + ids := a.allowlist.IDs() + out := make([]any, len(ids)) + for i, id := range ids { + out[i] = id + } + return out +} + +// allowFragment returns the SQL fragment that filters by the W1 +// allowlist on the given member-id expression. The fragment is the +// empty string when the allowlist is disabled, so the surrounding +// query keeps its pre-W1 shape. When enabled, the fragment starts +// with " AND " so it can be appended straight after an existing +// `WHERE x IS NOT NULL` predicate. +// +// Bound args for the '?' placeholders come from allowArgs and are +// appended to the INSERT's existing param slice in source order. +func (a *Aggregator) allowFragment(memberExpr string) string { + if !a.allowlist.Enabled() { + return "" + } + placeholders := strings.Repeat("?, ", a.allowlist.Size()) + placeholders = strings.TrimSuffix(placeholders, ", ") + return fmt.Sprintf("\n\t\t AND %s IN (%s)", memberExpr, placeholders) +} + // RebuildRecent is a convenience wrapper for the auto-trigger after a // successful sync: it rebuilds the rollup over a recent-history window // large enough to cover both the backfill default and any out-of-band diff --git a/roadmap-planner/backend/internal/contributions/aggregator_test.go b/roadmap-planner/backend/internal/contributions/aggregator_test.go index 2b3cdea..8b04f0d 100644 --- a/roadmap-planner/backend/internal/contributions/aggregator_test.go +++ b/roadmap-planner/backend/internal/contributions/aggregator_test.go @@ -13,6 +13,7 @@ import ( "testing" "time" + "github.com/AlaudaDevops/toolbox/roadmap-planner/backend/internal/config" "github.com/AlaudaDevops/toolbox/roadmap-planner/backend/internal/storage" ) @@ -112,3 +113,101 @@ func TestAggregatorJiraRollup(t *testing.T) { t.Fatalf("week_start=%s want %s", r.WeekStart, weekStart) } } + +// TestAggregatorAllowlist seeds three GitHub PRs by three different +// authors, configures an allowlist that names only one of them, and +// verifies the rollup keeps just that author. The other two authors +// have GitHub-login prefills (so they're real Jira members) but one +// is in the denylist and the other isn't in either prefill map — both +// have to disappear from `member_week_metrics`. +// +// Then it flips the allowlist off and re-runs Rebuild to confirm the +// no-filter sentinel preserves the pre-W1 shape (all three return). +func TestAggregatorAllowlist(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + store, err := storage.OpenSQLite(filepath.Join(dir, "agg-allow.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + if err := store.Migrate(ctx); err != nil { + t.Fatalf("migrate: %v", err) + } + + // Three Jira members, each linked to a distinct GitHub login. + for _, m := range []storage.Member{ + {ID: "daniel", DisplayName: "Daniel", GitHubLogin: "danielfbm", Active: true}, + {ID: "zhwang", DisplayName: "ZH Wang", GitHubLogin: "zhwang", Active: true}, + {ID: "stranger", DisplayName: "External Author", GitHubLogin: "stranger", Active: true}, + } { + if err := store.UpsertMember(ctx, m); err != nil { + t.Fatalf("upsert %s: %v", m.ID, err) + } + } + + // Three merged PRs in the same week, one per author. + now := time.Now().UTC().Truncate(time.Second) + week := MondayOf(now).Add(36 * time.Hour) + for i, login := range []string{"danielfbm", "zhwang", "stranger"} { + merged := week.Add(time.Duration(i) * time.Hour) + if err := store.UpsertPullRequests(ctx, []storage.PullRequest{{ + ID: "alaudadevops/repo#" + login, + Source: "github", + RepoID: "alaudadevops/repo", + Number: i + 1, + Title: "PR by " + login, + State: "merged", + AuthorLogin: login, + CreatedAt: merged.Add(-24 * time.Hour), + MergedAt: &merged, + FetchedAt: now, + }}); err != nil { + t.Fatalf("upsert PR %s: %v", login, err) + } + } + + agg := NewAggregator(store) + // Allowlist via the prefill maps: daniel and zhwang are configured, + // stranger is not — and zhwang is explicitly denied. After the + // filter the only survivor is daniel (+ the bot synthetic, which + // has no PRs in this fixture so it contributes nothing). + agg.SetAllowlist(BuildAllowlist(config.TeamAnalytics{ + GitHubLoginPrefills: map[string]string{ + "daniel": "danielfbm", + "zhwang": "zhwang", + }, + MemberDenylist: []string{"zhwang"}, + })) + rebuildFrom := MondayOf(week).Add(-7 * 24 * time.Hour) + rebuildTo := MondayOf(week).Add(14 * 24 * time.Hour) + if err := agg.Rebuild(ctx, rebuildFrom, rebuildTo); err != nil { + t.Fatalf("rebuild filtered: %v", err) + } + rows, err := store.MemberWeekMetrics(ctx, storage.MemberWeekQuery{From: rebuildFrom, To: rebuildTo}) + if err != nil { + t.Fatalf("read filtered: %v", err) + } + if len(rows) != 1 || rows[0].MemberID != "daniel" { + t.Fatalf("filtered rollup = %+v, want exactly daniel", rows) + } + + // Flip filter off and confirm all three rows come back. + agg.SetAllowlist(Allowlist{}) + if err := agg.Rebuild(ctx, rebuildFrom, rebuildTo); err != nil { + t.Fatalf("rebuild unfiltered: %v", err) + } + rows, err = store.MemberWeekMetrics(ctx, storage.MemberWeekQuery{From: rebuildFrom, To: rebuildTo}) + if err != nil { + t.Fatalf("read unfiltered: %v", err) + } + got := map[string]bool{} + for _, r := range rows { + got[r.MemberID] = true + } + for _, id := range []string{"daniel", "zhwang", "stranger"} { + if !got[id] { + t.Fatalf("unfiltered rollup missing %s; got %+v", id, got) + } + } +} diff --git a/roadmap-planner/backend/internal/contributions/allowlist.go b/roadmap-planner/backend/internal/contributions/allowlist.go new file mode 100644 index 0000000..b71f067 --- /dev/null +++ b/roadmap-planner/backend/internal/contributions/allowlist.go @@ -0,0 +1,135 @@ +/* +Copyright 2026 The AlaudaDevops Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +*/ + +package contributions + +import ( + "sort" + "strings" + + "github.com/AlaudaDevops/toolbox/roadmap-planner/backend/internal/config" +) + +// BotMemberID is the synthetic member every recognised bot login is +// reassigned to. The Jira sync auto-creates this row with empty logins; +// we keep it permanently in the allowlist so that W2's "merge bot +// contributions into one fictional Bot user" surfaces a single dashboard +// row instead of being filtered away. +const BotMemberID = "bot" + +// Allowlist is the W1 "who counts" set — the operator-curated subset of +// Jira member ids whose contributions land in the rollup table and in +// every contributions-API response. An empty Allowlist is the sentinel +// for "no filter configured"; callers must respect Enabled() before +// applying it as a WHERE clause, otherwise an empty set would silently +// drop the whole dashboard. +// +// The set is derived rather than configured: the operator declares who +// the team is via the `github_login_prefills` and `gitlab_username_prefills` +// maps, and W1 reuses that declaration. The escape hatch is the +// `member_denylist` field — entries there override the prefills (a +// person can be configured as a contributor and still excluded). +// +// Allowlist values are Jira member ids (slugified email), lowercased, +// trimmed — the same key shape used in the prefill maps. The membership +// check is also case-folded so callers don't need to normalise IDs +// before lookup. +type Allowlist struct { + set map[string]struct{} +} + +// BuildAllowlist computes the effective allowlist from the team-analytics +// config block. The set is the union of: +// - the keys of GitHubLoginPrefills +// - the keys of GitLabUsernamePrefills +// - the synthetic BotMemberID (W2 keeps bot rollups visible) +// +// minus everything in MemberDenylist. +// +// If both prefill maps are empty AND the denylist is empty, the returned +// Allowlist is empty — meaning "no filter configured, preserve today's +// behaviour" (the aggregator and handlers fall back to the pre-W1 path +// when the allowlist is empty). This gives operators a clean upgrade: +// adopting W1 is opt-in via the existing prefill curation. +func BuildAllowlist(cfg config.TeamAnalytics) Allowlist { + prefilled := len(cfg.GitHubLoginPrefills) > 0 || len(cfg.GitLabUsernamePrefills) > 0 + if !prefilled && len(cfg.MemberDenylist) == 0 { + return Allowlist{} + } + deny := make(map[string]struct{}, len(cfg.MemberDenylist)) + for _, id := range cfg.MemberDenylist { + k := normalize(id) + if k == "" { + continue + } + deny[k] = struct{}{} + } + out := make(map[string]struct{}, len(cfg.GitHubLoginPrefills)+len(cfg.GitLabUsernamePrefills)+1) + add := func(id string) { + k := normalize(id) + if k == "" { + return + } + if _, denied := deny[k]; denied { + return + } + out[k] = struct{}{} + } + for id := range cfg.GitHubLoginPrefills { + add(id) + } + for id := range cfg.GitLabUsernamePrefills { + add(id) + } + if prefilled { + // Only inject `bot` when we're already filtering — otherwise an + // installation with only a denylist would have its allowlist + // shrink to `{bot}` and silently drop every human contributor. + add(BotMemberID) + } + return Allowlist{set: out} +} + +// Enabled reports whether the allowlist should be applied as a filter. +// An empty set means "no filter configured" — callers should preserve +// the pre-W1 behaviour rather than reject everything. +func (a Allowlist) Enabled() bool { return len(a.set) > 0 } + +// Size returns the number of member ids in the set. +func (a Allowlist) Size() int { return len(a.set) } + +// Contains reports whether the given member id is in the allowlist. +// Empty allowlist returns true for any id (matches the "no filter +// configured" semantics — see Enabled). +func (a Allowlist) Contains(memberID string) bool { + if !a.Enabled() { + return true + } + _, ok := a.set[normalize(memberID)] + return ok +} + +// IDs returns the allowlisted member ids in deterministic order. +// Returns nil when the allowlist is empty so callers can branch on it. +func (a Allowlist) IDs() []string { + if !a.Enabled() { + return nil + } + out := make([]string, 0, len(a.set)) + for id := range a.set { + out = append(out, id) + } + sort.Strings(out) + return out +} + +// normalize matches the resolution rules used by the Jira sync when it +// derives member ids — lowercase + trim — so a denylist entry of +// "ZhWang " correctly filters out the `zhwang` member. +func normalize(id string) string { + return strings.ToLower(strings.TrimSpace(id)) +} diff --git a/roadmap-planner/backend/internal/contributions/allowlist_test.go b/roadmap-planner/backend/internal/contributions/allowlist_test.go new file mode 100644 index 0000000..ad30f05 --- /dev/null +++ b/roadmap-planner/backend/internal/contributions/allowlist_test.go @@ -0,0 +1,107 @@ +/* +Copyright 2026 The AlaudaDevops Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +*/ + +package contributions + +import ( + "reflect" + "testing" + + "github.com/AlaudaDevops/toolbox/roadmap-planner/backend/internal/config" +) + +func TestBuildAllowlist(t *testing.T) { + cases := []struct { + name string + cfg config.TeamAnalytics + want []string // nil means Allowlist.Enabled() should be false + }{ + { + name: "empty config = disabled", + cfg: config.TeamAnalytics{}, + want: nil, + }, + { + name: "prefills only — bot is auto-injected", + cfg: config.TeamAnalytics{ + GitHubLoginPrefills: map[string]string{ + "daniel": "danielfbm", + "jtcheng": "chengjingtao", + }, + GitLabUsernamePrefills: map[string]string{ + "daniel": "daniel", + "qliu": "qingliu", + }, + }, + want: []string{"bot", "daniel", "jtcheng", "qliu"}, + }, + { + name: "denylist removes prefilled entries", + cfg: config.TeamAnalytics{ + GitHubLoginPrefills: map[string]string{ + "daniel": "danielfbm", + "zhwang": "zonghang", + }, + MemberDenylist: []string{"zhwang"}, + }, + want: []string{"bot", "daniel"}, + }, + { + name: "case is folded and whitespace trimmed", + cfg: config.TeamAnalytics{ + GitHubLoginPrefills: map[string]string{ + "Daniel": "danielfbm", + "ZhWang ": "zonghang", + }, + MemberDenylist: []string{" ZHWANG"}, + }, + want: []string{"bot", "daniel"}, + }, + { + name: "denylist alone keeps allowlist disabled", + cfg: config.TeamAnalytics{ + MemberDenylist: []string{"zhwang"}, + }, + // Denylist with no prefills configured: we can't derive a + // human-meaningful allowlist, so we leave the filter off + // rather than collapse to {bot}. + want: nil, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := BuildAllowlist(tc.cfg) + if tc.want == nil { + if got.Enabled() { + t.Fatalf("expected disabled allowlist, got %v", got.IDs()) + } + if got.Size() != 0 { + t.Fatalf("expected size 0, got %d", got.Size()) + } + // Disabled allowlist must accept any id (preserve old behaviour). + if !got.Contains("anyone") { + t.Fatalf("disabled allowlist must Contains(anyone)") + } + return + } + if !got.Enabled() { + t.Fatalf("expected enabled allowlist for %v", tc.want) + } + if !reflect.DeepEqual(got.IDs(), tc.want) { + t.Fatalf("IDs() = %v, want %v", got.IDs(), tc.want) + } + for _, id := range tc.want { + if !got.Contains(id) { + t.Fatalf("Contains(%q) = false, want true", id) + } + } + if got.Contains("definitely-not-in-the-set") { + t.Fatalf("Contains(unknown) = true, want false") + } + }) + } +} diff --git a/roadmap-planner/backend/internal/gitlab/client.go b/roadmap-planner/backend/internal/gitlab/client.go index 94e32f0..a31ba2b 100644 --- a/roadmap-planner/backend/internal/gitlab/client.go +++ b/roadmap-planner/backend/internal/gitlab/client.go @@ -69,6 +69,7 @@ type Project struct { // MergeRequest mirrors what we persist into the shared pull_requests table. type MergeRequest struct { IID int `json:"iid"` + ProjectID int64 `json:"project_id"` Title string `json:"title"` State string `json:"state"` // "opened" | "closed" | "merged" | "locked" WebURL string `json:"web_url"` @@ -84,6 +85,12 @@ type MergeRequest struct { ID int64 `json:"id"` Username string `json:"username"` } `json:"author"` + // References.Full is the GitLab-canonical "//!" + // pointer. We rely on it in the instance-wide MR sweep to recover + // the project path-with-namespace without a per-MR show call. + References struct { + Full string `json:"full"` + } `json:"references"` // Diff stats are only populated when we explicitly request them on // the show endpoint; the list endpoint omits them. We hydrate a // follow-up call only for merged MRs (the few that drive the @@ -95,6 +102,35 @@ type MergeRequest struct { } `json:"-"` } +// ProjectPath returns the path-with-namespace ("group/sub/proj") for an +// MR returned by the instance-wide endpoint. It first tries the +// References.Full pointer ("group/sub/proj!iid"), then falls back to +// parsing the web_url. Returns the empty string if neither path works. +func (m MergeRequest) ProjectPath() string { + if full := m.References.Full; full != "" { + if idx := strings.Index(full, "!"); idx > 0 { + return full[:idx] + } + } + // web_url is of the form + // "https://gitlab.example.com////-/merge_requests/". + // Strip the scheme+host, then trim "/-/merge_requests/...". + if u := m.WebURL; u != "" { + // Skip past "scheme://host/". + const sep = "://" + if i := strings.Index(u, sep); i >= 0 { + rest := u[i+len(sep):] + if j := strings.Index(rest, "/"); j >= 0 { + rest = rest[j+1:] + if k := strings.Index(rest, "/-/merge_requests/"); k > 0 { + return rest[:k] + } + } + } + } + return "" +} + // Note is one MR comment. We only persist non-system non-author notes; // the classifier in sync.go derives review state from Body. type Note struct { @@ -130,6 +166,14 @@ type ListGroupProjectsOptions struct { // // The group identifier is URL-path-escaped — `devops/edge` works as the // path lookup. For very large groups bump PerPage to 100 (default). +// +// `with_shared=false` is enforced server-side: GitLab's default is +// `true`, which makes the endpoint return every shared project the +// group has access to. That was the root cause of the W1/B1 audit +// finding — prod's gitlab.groups=["devops/**"] pulled in +// `container-platform/*`, `ops/*` etc. and credited their authors as +// devops members. We never want that fan-out for ingest; per-member +// instance-wide coverage now lives in ListInstanceMergeRequests. func (c *Client) ListGroupProjects(ctx context.Context, group string, opts ListGroupProjectsOptions) ([]Project, error) { if opts.PerPage == 0 { opts.PerPage = 100 @@ -140,6 +184,7 @@ func (c *Client) ListGroupProjects(ctx context.Context, group string, opts ListG q.Set("per_page", strconv.Itoa(opts.PerPage)) q.Set("page", strconv.Itoa(page)) q.Set("include_subgroups", strconv.FormatBool(opts.IncludeSubgroups)) + q.Set("with_shared", "false") if !opts.IncludeArchived { q.Set("archived", "false") } @@ -218,6 +263,75 @@ func (c *Client) ListMergeRequests(ctx context.Context, projectID int64, opts Li return out, nil } +// ListInstanceMergeRequestsOptions controls the instance-wide MR sweep +// (the per-member Pass B in W1). The endpoint backing this is +// `GET /api/v4/merge_requests?scope=all&author_username=`. +// +// `with_shared` doesn't apply at this endpoint — GitLab always scopes +// the result by what the auth token can see. For our bot user with +// `read_api` over the alauda devops members, that's the full instance +// up to project visibility limits. +type ListInstanceMergeRequestsOptions struct { + AuthorUsername string + State string // "opened" | "closed" | "merged" | "all" (default "all") + UpdatedAfter time.Time + OrderBy string // "updated_at" (default) + Sort string // "asc" | "desc" (default "desc") + PerPage int + MaxPages int // 0 = unlimited +} + +// ListInstanceMergeRequests returns every MR the token can see that +// matches the supplied filter. We page until either MaxPages or a +// short page is hit. Pagination semantics mirror ListMergeRequests; the +// endpoint accepts the same `updated_after`/`order_by`/`sort`/`state` +// query knobs server-side. +// +// Author scoping is required — the unscoped endpoint returns the +// caller's MRs only, which isn't what Pass B needs. Pass the configured +// member's gitlab username here. +func (c *Client) ListInstanceMergeRequests(ctx context.Context, opts ListInstanceMergeRequestsOptions) ([]MergeRequest, error) { + if opts.AuthorUsername == "" { + return nil, fmt.Errorf("ListInstanceMergeRequests: AuthorUsername required") + } + if opts.State == "" { + opts.State = "all" + } + if opts.OrderBy == "" { + opts.OrderBy = "updated_at" + } + if opts.Sort == "" { + opts.Sort = "desc" + } + if opts.PerPage == 0 { + opts.PerPage = 100 + } + var out []MergeRequest + for page := 1; opts.MaxPages == 0 || page <= opts.MaxPages; page++ { + q := url.Values{} + q.Set("scope", "all") + q.Set("author_username", opts.AuthorUsername) + q.Set("state", opts.State) + q.Set("order_by", opts.OrderBy) + q.Set("sort", opts.Sort) + q.Set("per_page", strconv.Itoa(opts.PerPage)) + q.Set("page", strconv.Itoa(page)) + if !opts.UpdatedAfter.IsZero() { + q.Set("updated_after", opts.UpdatedAfter.UTC().Format(time.RFC3339)) + } + path := "/api/v4/merge_requests?" + q.Encode() + var batch []MergeRequest + if err := c.do(ctx, "GET", path, nil, &batch); err != nil { + return nil, fmt.Errorf("list instance MRs author=%s page %d: %w", opts.AuthorUsername, page, err) + } + out = append(out, batch...) + if len(batch) < opts.PerPage { + break + } + } + return out, nil +} + // GetMergeRequest fetches one MR with diff stats — the list endpoint // omits additions/deletions/changes_count, so we hydrate per-MR for // the merged ones we care about. diff --git a/roadmap-planner/backend/internal/gitlab/sync.go b/roadmap-planner/backend/internal/gitlab/sync.go index 6c41e89..070b4de 100644 --- a/roadmap-planner/backend/internal/gitlab/sync.go +++ b/roadmap-planner/backend/internal/gitlab/sync.go @@ -95,6 +95,22 @@ type Syncer struct { // `gitlab.hydrate_diff: false`. HydrateDiff bool + // MemberInstanceSweep enables Pass B (W1): after the project-scoped + // fetch finishes, sweep `/merge_requests?scope=all&author_username=` + // for each Jira member whose GitLab username we know. Captures MRs + // our members file in projects that aren't on the configured + // gitlab.groups list. Off by default; main.go flips it on when + // gitlab.member_instance_sweep is true. No-op when AllowedMemberIDs + // is empty. + MemberInstanceSweep bool + + // AllowedMemberIDs is the W1 set that Pass B walks. Each id must be + // a `members.id` (slugified email). Pass B looks up the matching + // `members.gitlab_username` and calls + // `/merge_requests?scope=all&author_username=`. Nil/empty + // disables Pass B entirely. + AllowedMemberIDs map[string]struct{} + wildcardCache *wildcardCache nowFn func() time.Time } @@ -163,16 +179,21 @@ func classifyNote(body string) (string, bool) { // The shape mirrors github.Syncer.Sync — see those comments for first- // run vs. incremental behaviour. func (s *Syncer) Sync(ctx context.Context) error { - if len(s.specs) == 0 { + passBActive := s.MemberInstanceSweep && len(s.AllowedMemberIDs) > 0 + if len(s.specs) == 0 && !passBActive { return nil } now := s.nowFn() - resolved, resolveErr := s.resolveProjects(ctx, now) - if resolveErr != nil { - s.logger.Warn("group spec resolution failed (partial)", zap.Error(resolveErr)) + var resolved []Project + var resolveErr error + if len(s.specs) > 0 { + resolved, resolveErr = s.resolveProjects(ctx, now) + if resolveErr != nil { + s.logger.Warn("group spec resolution failed (partial)", zap.Error(resolveErr)) + } } - if len(resolved) == 0 { + if len(resolved) == 0 && !passBActive { if resolveErr != nil { return resolveErr } @@ -207,6 +228,12 @@ func (s *Syncer) Sync(ctx context.Context) error { } } + // Track every storage id we've already written this cycle so Pass B + // can skip MRs Pass A already covered. Cheaper than asking the + // store after each upsert; uses the same key shape as the storage + // PK ("!"). + seenIDs := make(map[string]struct{}, 256) + for _, p := range resolved { mrs, err := s.client.ListMergeRequests(ctx, p.ID, ListMergeRequestsOptions{ State: "all", UpdatedAfter: since, MaxPages: 10, @@ -316,10 +343,25 @@ func (s *Syncer) Sync(ctx context.Context) error { if err := s.store.UpsertPRReviews(ctx, reviewBatch); err != nil { return fmt.Errorf("upsert MR reviews for %s: %w", p.PathWithNamespace, err) } + for _, r := range toStore { + seenIDs[r.ID] = struct{}{} + } totalMRs += len(toStore) totalReviews += len(reviewBatch) } + // Pass B (W1): per-allowlisted-member instance-wide MR sweep. + // Captures MRs our team filed in projects outside the configured + // `gitlab.groups` list. De-dupes against Pass A via seenIDs. + if s.MemberInstanceSweep && len(s.AllowedMemberIDs) > 0 { + passBMRs, passBReviews, passBErr := s.sweepInstanceByMember(ctx, members, since, runStart, seenIDs) + if passBErr != nil && firstErr == nil { + firstErr = passBErr + } + totalMRs += passBMRs + totalReviews += passBReviews + } + run := storage.CollectionRun{ ID: runID, CapturedAt: runStart, @@ -341,6 +383,177 @@ func (s *Syncer) Sync(ctx context.Context) error { return firstErr } +// sweepInstanceByMember runs Pass B (W1): for each allowlisted member +// whose `members.gitlab_username` is populated, sweep +// `/merge_requests?scope=all&author_username=` over the entire +// instance and upsert every MR not already covered by Pass A. +// +// `seenIDs` is mutated as we accept new rows so back-to-back members +// referencing the same MR (rare but possible — cross-author MRs are +// surfaced as the author's row only) don't double-upsert. +// +// Errors from any single member are logged and skipped: we don't want +// one bad ticker tick to abort the whole sweep. The first error is +// returned for the caller to bubble up into the collection_run. +func (s *Syncer) sweepInstanceByMember( + ctx context.Context, + members []storage.Member, + since time.Time, + runStart time.Time, + seenIDs map[string]struct{}, +) (int, int, error) { + byID := make(map[string]storage.Member, len(members)) + byUsername := make(map[string]string, len(members)) + for _, m := range members { + byID[m.ID] = m + if m.GitLabUsername != "" { + byUsername[strings.ToLower(m.GitLabUsername)] = m.ID + } + } + + totalMRs, totalReviews := 0, 0 + var firstErr error + for memberID := range s.AllowedMemberIDs { + m, ok := byID[memberID] + if !ok { + // Allowlisted member doesn't exist in the directory yet — + // either the Jira sync hasn't created the row yet, or the + // operator typo'd the denylist/prefill key. Either way Pass + // B has nothing to fetch for them; skip silently. + continue + } + if m.GitLabUsername == "" { + continue + } + mrs, err := s.client.ListInstanceMergeRequests(ctx, ListInstanceMergeRequestsOptions{ + AuthorUsername: m.GitLabUsername, + State: "all", + UpdatedAfter: since, + MaxPages: 20, + }) + if err != nil { + s.logger.Warn("Pass B: ListInstanceMergeRequests failed", + zap.String("member", memberID), + zap.String("gitlab_username", m.GitLabUsername), + zap.Error(err)) + if firstErr == nil { + firstErr = err + } + continue + } + s.logger.Debug("Pass B: fetched member MRs", + zap.String("member", memberID), zap.Int("count", len(mrs))) + + toStore := make([]storage.PullRequest, 0, len(mrs)) + reviewBatch := make([]storage.PRReview, 0) + for _, mr := range mrs { + projectPath := mr.ProjectPath() + if projectPath == "" { + s.logger.Warn("Pass B: skipping MR with empty project path", + zap.String("member", memberID), zap.Int("iid", mr.IID), zap.String("web_url", mr.WebURL)) + continue + } + id := fmt.Sprintf("%s!%d", projectPath, mr.IID) + if _, dup := seenIDs[id]; dup { + continue + } + + state := "open" + if mr.MergedAt != nil { + state = "merged" + } else if mr.State == "closed" || mr.State == "locked" { + state = "closed" + } + authorLogin := strings.ToLower(mr.Author.Username) + epicKey := "" + if s.linker != nil { + epicKey = s.linker.Link(mr) + } + rec := storage.PullRequest{ + ID: id, + Source: "gitlab", + RepoID: projectPath, + Number: mr.IID, + Title: mr.Title, + State: state, + AuthorID: byUsername[authorLogin], + AuthorLogin: authorLogin, + HeadBranch: mr.SourceBranch, + BaseBranch: mr.TargetBranch, + JiraKey: epicKey, + CreatedAt: mr.CreatedAt, + MergedAt: mr.MergedAt, + ClosedAt: mr.ClosedAt, + FetchedAt: runStart, + } + if s.HydrateDiff && mr.MergedAt != nil && mr.ProjectID > 0 { + if full, err := s.client.GetMergeRequest(ctx, mr.ProjectID, mr.IID); err == nil { + rec.Additions = full.Diff.Additions + rec.Deletions = full.Diff.Deletions + rec.ChangedFiles = full.Diff.ChangedFiles + } + } + + skipNotes := mr.Draft || mr.WorkInProg + if !skipNotes && mr.MergedAt == nil && mr.ClosedAt != nil && + time.Since(*mr.ClosedAt) > 7*24*time.Hour { + skipNotes = true + } + if !skipNotes && mr.ProjectID > 0 { + notes, err := s.client.ListMRNotes(ctx, mr.ProjectID, mr.IID) + if err != nil { + s.logger.Warn("Pass B: ListMRNotes failed", + zap.String("project", projectPath), + zap.Int("iid", mr.IID), zap.Error(err)) + } else { + var first *time.Time + for _, n := range notes { + if n.System { + continue + } + reviewerLogin := strings.ToLower(n.Author.Username) + if reviewerLogin == authorLogin { + continue + } + st, ok := classifyNote(n.Body) + if !ok { + continue + } + reviewBatch = append(reviewBatch, storage.PRReview{ + ID: fmt.Sprintf("%s!%d/n%d", projectPath, mr.IID, n.ID), + PRID: rec.ID, + Source: "gitlab", + ReviewerID: byUsername[reviewerLogin], + ReviewerLogin: reviewerLogin, + State: st, + SubmittedAt: n.CreatedAt, + }) + if first == nil || n.CreatedAt.Before(*first) { + first = &n.CreatedAt + } + } + rec.FirstReviewAt = first + } + } + toStore = append(toStore, rec) + seenIDs[id] = struct{}{} + } + + if len(toStore) == 0 { + continue + } + if err := s.store.UpsertPullRequests(ctx, toStore); err != nil { + return totalMRs, totalReviews, fmt.Errorf("Pass B upsert MRs for member %s: %w", memberID, err) + } + if err := s.store.UpsertPRReviews(ctx, reviewBatch); err != nil { + return totalMRs, totalReviews, fmt.Errorf("Pass B upsert MR reviews for member %s: %w", memberID, err) + } + totalMRs += len(toStore) + totalReviews += len(reviewBatch) + } + return totalMRs, totalReviews, firstErr +} + // resolveProjects expands every spec, dropping archived projects when // IncludeArchived is false. Duplicates across overlapping specs are // dropped here so we don't waste API budget syncing the same project diff --git a/roadmap-planner/backend/internal/gitlab/sync_test.go b/roadmap-planner/backend/internal/gitlab/sync_test.go index 0a98c4b..76a86bb 100644 --- a/roadmap-planner/backend/internal/gitlab/sync_test.go +++ b/roadmap-planner/backend/internal/gitlab/sync_test.go @@ -7,7 +7,20 @@ you may not use this file except in compliance with the License. package gitlab -import "testing" +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "net/url" + "path/filepath" + "sort" + "strings" + "testing" + "time" + + "github.com/AlaudaDevops/toolbox/roadmap-planner/backend/internal/storage" +) func TestParseGroupSpec(t *testing.T) { cases := []struct { @@ -110,3 +123,211 @@ func TestSyncerHydrateDiffDefault(t *testing.T) { t.Fatalf("NewSyncer.HydrateDiff = false, want true (W6 default)") } } + +func TestMergeRequestProjectPath(t *testing.T) { + cases := []struct { + name string + mr MergeRequest + want string + }{ + { + name: "references.full takes priority", + mr: MergeRequest{ + References: struct { + Full string `json:"full"` + }{Full: "group/sub/proj!42"}, + WebURL: "https://gitlab.example/x/y/-/merge_requests/42", + }, + want: "group/sub/proj", + }, + { + name: "falls back to web_url", + mr: MergeRequest{ + WebURL: "https://gitlab.example.com/group/sub/proj/-/merge_requests/7", + }, + want: "group/sub/proj", + }, + { + name: "empty when nothing parses", + mr: MergeRequest{WebURL: "not a url"}, + want: "", + }, + } + for _, tc := range cases { + if got := tc.mr.ProjectPath(); got != tc.want { + t.Errorf("%s: ProjectPath() = %q, want %q", tc.name, got, tc.want) + } + } +} + +// TestSyncPassBMemberSweep verifies Pass B (W1): with no group specs +// resolved, only allowlisted members produce MRs, and a re-run with the +// same data leaves the row count unchanged (dedup against existing). +// +// The fake GitLab serves: +// - `/api/v4/merge_requests?author_username=daniel` → 2 MRs in +// `container-platform/edge` and `alaudadevops/toolbox`. +// - `/api/v4/merge_requests?author_username=stranger` → 1 MR (must +// NOT be ingested — stranger is not allowlisted). +// - Empty notes for every MR so we don't fight with note ordering. +func TestSyncPassBMemberSweep(t *testing.T) { + ctx := context.Background() + dir := t.TempDir() + store, err := storage.OpenSQLite(filepath.Join(dir, "gl.db")) + if err != nil { + t.Fatalf("open: %v", err) + } + t.Cleanup(func() { _ = store.Close() }) + if err := store.Migrate(ctx); err != nil { + t.Fatalf("migrate: %v", err) + } + if err := store.UpsertMember(ctx, storage.Member{ + ID: "daniel", DisplayName: "Daniel", GitLabUsername: "daniel", Active: true, + }); err != nil { + t.Fatalf("upsert daniel: %v", err) + } + if err := store.UpsertMember(ctx, storage.Member{ + ID: "stranger", DisplayName: "Stranger", GitLabUsername: "stranger", Active: true, + }); err != nil { + t.Fatalf("upsert stranger: %v", err) + } + + now := time.Now().UTC().Truncate(time.Second) + merged := now.Add(-24 * time.Hour) + mkMR := func(iid int, fullRef string, author string) MergeRequest { + webHost := "https://gitlab.example" + projPath := fullRef[:strings.Index(fullRef, "!")] + out := MergeRequest{ + IID: iid, ProjectID: int64(iid * 1000), Title: "fix", State: "merged", + WebURL: webHost + "/" + projPath + "/-/merge_requests/" + itoa(iid), + CreatedAt: merged.Add(-time.Hour), MergedAt: &merged, + } + out.Author.Username = author + out.References.Full = fullRef + return out + } + + mux := http.NewServeMux() + mux.HandleFunc("/api/v4/merge_requests", func(w http.ResponseWriter, r *http.Request) { + u, _ := url.Parse(r.RequestURI) + q := u.Query() + var out []MergeRequest + switch q.Get("author_username") { + case "daniel": + out = []MergeRequest{ + mkMR(1, "container-platform/edge!1", "daniel"), + mkMR(2, "alaudadevops/toolbox!2", "daniel"), + } + case "stranger": + out = []MergeRequest{mkMR(3, "ops/some-thing!3", "stranger")} + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(out) + }) + mux.HandleFunc("/api/v4/projects/", func(w http.ResponseWriter, r *http.Request) { + // Per-project notes (always empty for this test). Path is + // /api/v4/projects//merge_requests//notes. + if strings.HasSuffix(r.URL.Path, "/notes") { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte("[]")) + return + } + w.WriteHeader(http.StatusNotFound) + }) + srv := httptest.NewServer(mux) + t.Cleanup(srv.Close) + + client := New(srv.URL, "fake-token", nil) + syncer := NewSyncer(client, store, nil, DefaultLinker("DEVOPS"), 30) + syncer.MemberInstanceSweep = true + syncer.AllowedMemberIDs = map[string]struct{}{"daniel": {}} + + if err := syncer.Sync(ctx); err != nil { + t.Fatalf("first sync: %v", err) + } + + gotIDs := listPRIDs(t, store) + sort.Strings(gotIDs) + wantIDs := []string{"alaudadevops/toolbox!2", "container-platform/edge!1"} + if !equalSlices(gotIDs, wantIDs) { + t.Fatalf("after first sync got %v, want %v", gotIDs, wantIDs) + } + + // Re-run: same data must dedupe via the `id` PK; no growth. + if err := syncer.Sync(ctx); err != nil { + t.Fatalf("second sync: %v", err) + } + got2 := listPRIDs(t, store) + if len(got2) != len(gotIDs) { + t.Fatalf("re-sync grew PR count from %d to %d; expected dedup", len(gotIDs), len(got2)) + } + + // And with the sweep disabled, no MRs land at all (Pass B is the + // only ingest path in this fixture — no group specs are configured). + syncer2 := NewSyncer(client, store, nil, DefaultLinker("DEVOPS"), 30) + syncer2.MemberInstanceSweep = false + syncer2.AllowedMemberIDs = map[string]struct{}{"daniel": {}} + // Clear the store first. + if _, err := store.DB().ExecContext(ctx, `DELETE FROM pull_requests`); err != nil { + t.Fatalf("clear PRs: %v", err) + } + if err := syncer2.Sync(ctx); err != nil { + t.Fatalf("sweep-off sync: %v", err) + } + if got := listPRIDs(t, store); len(got) != 0 { + t.Fatalf("sweep disabled but got PRs: %v", got) + } +} + +// listPRIDs returns every pull_requests.id sorted ascending — the +// storage interface doesn't expose a list method, so we read straight +// from the DB handle for the test. +func listPRIDs(t *testing.T, store storage.Store) []string { + t.Helper() + rows, err := store.DB().Query(`SELECT id FROM pull_requests ORDER BY id`) + if err != nil { + t.Fatalf("query PR ids: %v", err) + } + defer rows.Close() + out := []string{} + for rows.Next() { + var id string + if err := rows.Scan(&id); err != nil { + t.Fatalf("scan: %v", err) + } + out = append(out, id) + } + return out +} + +func itoa(n int) string { + // Tiny helper to avoid pulling strconv into the test fixtures. + if n == 0 { + return "0" + } + neg := n < 0 + if neg { + n = -n + } + digits := []byte{} + for n > 0 { + digits = append([]byte{'0' + byte(n%10)}, digits...) + n /= 10 + } + if neg { + digits = append([]byte{'-'}, digits...) + } + return string(digits) +} + +func equalSlices(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} diff --git a/roadmap-planner/docs/team-analytics/CHANGES.md b/roadmap-planner/docs/team-analytics/CHANGES.md index da6bb62..359276d 100644 --- a/roadmap-planner/docs/team-analytics/CHANGES.md +++ b/roadmap-planner/docs/team-analytics/CHANGES.md @@ -8,17 +8,52 @@ should consult this when upgrading. Newest entries first. +## W1 — Member allowlist + GitLab `with_shared=false` + Pass B instance sweep + +**What changed** + +- `gitlab/client.go::ListGroupProjects` sends `with_shared=false`. +- `contributions.BuildAllowlist(cfg)` derives the W1 allowlist from + the prefill maps minus `team_analytics.member_denylist`. The + synthetic `bot` member id is always included. +- Aggregator INSERTs carry `AND COALESCE(m.id, pr.author_id) IN (?, …)` + when the allowlist is enabled. Post-rebuild cleanup deletes rollup + rows for members no longer in the set. +- `GET /api/contributions/members` and `/team` drop members outside + the allowlist. +- New GitLab Pass B (`gitlab.member_instance_sweep: true`) sweeps + `/api/v4/merge_requests?scope=all&author_username=` per allowlisted + member. + +**Config additions** (all default off): + +```yaml +gitlab: + member_instance_sweep: false +team_analytics: + member_denylist: [] +``` + +**Activation rules** + +| prefills | denylist | allowlist | +|---|---|---| +| empty | empty | disabled (pre-W1) | +| populated | any | enabled, denylist subtracts | +| empty | populated | disabled (refuse to collapse to `{bot}`) | + +**Audit cross-reference:** `docs/team-analytics/audit-2026-05-19/REPORT.md` +findings B1, B10, B11, B6. + ## W4 — Sprint card: 4 lanes (todo / in_progress / done / cancelled) **What changed** - `SprintStats` JSON gains `todo`, `in_progress`, `cancelled`. `wip` - stays for one release as derived alias `todo + in_progress`. -- New `team_analytics.statuses` config block — four optional lists. - Empty lane inherits the W4 default (37 statuses pulled live from - DEVOPS Jira, English + Chinese mix). -- `contributions/status_lanes.go::StatusClassifier` does the lookup - (case-folded; unknown → `in_progress`, reported `known=false`). + stays for one release as derived alias. +- New `team_analytics.statuses` config block with W4 default mapping + pulled live from DEVOPS Jira. +- `contributions/status_lanes.go::StatusClassifier` does the lookup. - `sprintCounts` swaps `resolved_at IS NULL` for the classifier. - Member-profile sprint card renders 6 KPI tiles. @@ -30,25 +65,17 @@ finding B7. **What changed (delivered)** - `storage.backfill_days` default flips from 180 → 365. -- `aggregator.RebuildRecent`'s default rebuild window flips from - 187 → 400 days (cap 730). -- New migration `0008_quarter_assignments.sql` adds a small - `(issue_key, quarter_label, source)` table. Empty on install. +- `aggregator.RebuildRecent`'s default rebuild window 187 → 400 days. +- Migration `0008_quarter_assignments.sql`. - New `contributions.QuarterResolver` (period.go). -- `MemberSummary.PeriodTotals []PeriodBucket` populated on - `?period=release_quarter`. -- `parseQuery` default window expands to 364 days when - `?period=release_quarter` requests so the chart shows a full year - without explicit `from`/`to`. +- `MemberSummary.PeriodTotals` populated on `?period=release_quarter`. +- Window expands to 364 days when `?period=release_quarter`. -**Deferred to a follow-up PR** +**Deferred to follow-up PR** -- Jira sync pass that walks every Milestone, parses - `^(\d{4}Q[1-4])[::]` (full-width Chinese colon!), follows the - Blocks inward link to the Epic, writes `(epic_key, quarter_label)` - into `quarter_assignments`. -- Story / Bug → Epic Link walk for parent-Epic milestone-quarter. -- Frontend `?period=` tab on the Team Dashboard. +- Jira sync pass that walks Milestone summaries to populate + `quarter_assignments` from the `^(\d{4}Q[1-4])[::]` prefix. +- Frontend `?period=` tab. **Audit cross-reference:** `docs/team-analytics/audit-2026-05-19/REPORT.md` finding B13. @@ -57,7 +84,7 @@ finding B13. **What changed** -- Migration `0006_rename_epic_key_to_jira_key.sql` renames the column. +- Migration `0006_rename_epic_key_to_jira_key.sql`. - Go struct field `PullRequest.EpicKey` → `PullRequest.JiraKey`, no alias.