Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 30 additions & 5 deletions roadmap-planner/backend/cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,30 @@ func initTeamAnalytics(ctx context.Context, router *gin.Engine, cfg *config.Conf
aggregator := contributions.NewAggregator(store)
allowlist := contributions.BuildAllowlist(cfg.TeamAnalytics)
aggregator.SetAllowlist(allowlist)
bots := contributions.NewBotSet(cfg.TeamAnalytics.BotLogins)
api.AddContributionsRoutes(router, store, service, aggregator, allowlist)
logger.Info("Contributions API routes added",
zap.Int("pillars_configured", len(pillarMap.Order())),
zap.Bool("allowlist_enabled", allowlist.Enabled()),
zap.Int("allowlist_size", allowlist.Size()))
zap.Int("allowlist_size", allowlist.Size()),
zap.Int("bot_logins_configured", bots.Size()))

// W2 one-shot bot consolidation: retroactively tag existing PR /
// review rows ingested before this feature shipped, reassign bot
// author/reviewer ids to the synthetic `bot` member, and compute
// first_human_review_at. Idempotent — safe to run on every boot.
if bots.Size() > 0 {
res, err := contributions.ConsolidateBots(ctx, store, bots)
if err != nil {
logger.Warn("bot consolidation backfill failed", zap.Error(err))

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Warning (style/error-handling): ConsolidateBots errors are logged as warnings rather than returning a fatal error. If backfill fails, the server continues with potentially inconsistent state. Consider whether this should be fatal for production deployments, or add a startup health check to verify the backfill succeeded.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Warning (style/error-handling): ConsolidateBots errors are logged as warnings rather than returned, meaning startup continues even if the backfill fails. This may hide data inconsistency issues in production. Consider adding a startup health check or returning the error.

} else {
logger.Info("bot consolidation backfill applied",
zap.Int("reviews_marked_bot", res.ReviewsMarkedBot),
zap.Int("reviewer_id_rewritten", res.ReviewsAuthorRew),
zap.Int("pr_author_id_rewritten", res.PRsAuthorRewrite),
zap.Int("pr_first_human_review_rows", res.PRsFirstHumanSet))
}
}

// Optional Jira sync goroutine.
//
Expand Down Expand Up @@ -258,12 +277,12 @@ func initTeamAnalytics(ctx context.Context, router *gin.Engine, cfg *config.Conf

// Optional GitHub sync goroutine.
if cfg.GitHub.Enabled {
startGitHubSync(ctx, cfg, store, aggregator)
startGitHubSync(ctx, cfg, store, aggregator, bots)
}

// Optional GitLab sync goroutine.
if cfg.GitLab.Enabled {
startGitLabSync(ctx, cfg, store, aggregator, allowlist)
startGitLabSync(ctx, cfg, store, aggregator, allowlist, bots)
}

go func() {
Expand All @@ -279,7 +298,7 @@ func initTeamAnalytics(ctx context.Context, router *gin.Engine, cfg *config.Conf
// All failures are non-fatal — if GitHub auth or repo parsing breaks,
// we log and continue without a GitHub syncer rather than refusing to
// start the whole server. The roadmap UI keeps working.
func startGitHubSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator) {
func startGitHubSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator, bots contributions.BotSet) {
repos := parseRepos(cfg.GitHub.Repos)
if len(repos) == 0 {
logger.Warn("github.enabled but github.repos is empty; skipping sync")
Expand All @@ -301,6 +320,9 @@ func startGitHubSync(ctx context.Context, cfg *config.Config, store storage.Stor
backfill = cfg.Storage.BackfillDays
}
syncer := ghclient.NewSyncer(client, store, repos, ghclient.DefaultLinker(projectKey), backfill)
if bots.Size() > 0 {
syncer.IsBotLogin = bots.Contains
}

interval, err := time.ParseDuration(cfg.GitHub.SyncInterval)
if err != nil {
Expand Down Expand Up @@ -348,7 +370,7 @@ func buildGitHubClient(cfg *config.GitHub) (*ghclient.Client, string, error) {
//
// Same fail-soft contract as startGitHubSync — bad config logs and
// continues without a GitLab sync rather than refusing to start.
func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator, allowlist contributions.Allowlist) {
func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Store, aggregator *contributions.Aggregator, allowlist contributions.Allowlist, bots contributions.BotSet) {
specs := parseGroupSpecs(cfg.GitLab.Groups)
if len(specs) == 0 {
logger.Warn("gitlab.enabled but gitlab.groups is empty; skipping sync")
Expand Down Expand Up @@ -385,6 +407,9 @@ func startGitLabSync(ctx context.Context, cfg *config.Config, store storage.Stor
}
syncer.AllowedMemberIDs = allowed
}
if bots.Size() > 0 {
syncer.IsBotLogin = bots.Contains
}

interval, err := time.ParseDuration(cfg.GitLab.SyncInterval)
if err != nil {
Expand Down
26 changes: 26 additions & 0 deletions roadmap-planner/backend/config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ github:
private_key_path: "" # path to the PEM file (mount via Secret)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CRITICAL (bug/merge-conflict): Unresolved merge conflict markers (<<<<<<< HEAD, =======, >>>>>>> ee78c5b) present in this file. This will cause YAML parsing failures when the config is loaded. The conflict must be resolved before this PR can be deployed.

private_key_pem: "" # raw PEM contents (use env GITHUB_APP_PRIVATE_KEY_PEM in K8s)

<<<<<<< HEAD
# GitLab ingestion — mirrors `github` for self-hosted GitLab installs.
# Requires storage.enabled = true.
gitlab:
Expand Down Expand Up @@ -165,4 +166,29 @@ team_analytics:
member_denylist: []
# - gxjiao
# - lmhe
=======
# Team analytics — pillar attribution + identity prefills + W1/W2 knobs.
# All keys here are optional and additive; the dashboard works without
# them (it just shows the un-curated raw data).
team_analytics:
# github_login_prefills shortcuts the first-time Jira → GitHub
# identity link for members the operator already knows. Manual UI
# edits always win on subsequent syncs.
github_login_prefills: {}
# daniel: danielfbm
# jtcheng: chengjingtao
gitlab_username_prefills: {}
# daniel: daniel
# jtcheng: chengjingtao
# W2 (2026-05-19): explicit list of bot logins that get folded into
# the synthetic `bot` member. Match is exact + case-folded; no
# suffix magic. Adding a new bot here + restarting picks it up.
bot_logins: []
# - alaudabot
# - alaudaa-renovate
# - edge-katanomi-app2[bot]
# - copilot-pull-request-reviewer[bot]
# - kilo-code-bot[bot]
# - copilot
>>>>>>> ee78c5b (feat(roadmap-planner): W2 — fold bot contributions into synthetic bot member)
# pillars: { … } # see docs/team-analytics/PROPOSAL.md
28 changes: 13 additions & 15 deletions roadmap-planner/backend/internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,23 +88,21 @@ type TeamAnalytics struct {
// daniel: daniel
// jtcheng: chengjingtao
GitLabUsernamePrefills map[string]string `mapstructure:"gitlab_username_prefills" yaml:"gitlab_username_prefills"`
// MemberDenylist is the operator-curated list of Jira member ids
// (slugified email — the same key shape used in the prefill maps)
// MemberDenylist (W1) — operator-curated list of Jira member ids
// that should be excluded from the team-analytics rollups and from
// every contributions API response, regardless of whether they have
// an entry in the prefill maps.
//
// This is the W1 escape hatch for people who appear in Jira (so the
// auto-discovery picks them up) but should not count toward the
// dashboard — bots that aren't on the bot allowlist, people who
// left the team, contractors loaned out to another pillar, etc.
// every contributions API response. Subtracted from the prefill-
// derived allowlist.
MemberDenylist []string `mapstructure:"member_denylist" yaml:"member_denylist"`
// Statuses is the W4 (2026-05-19) configurable status → lane map
// used by the sprint card. Status names match Jira's `status.name`
// case-folded; an unknown status falls back to `in_progress` and
// triggers a warning so the operator can extend the lists. The
// built-in defaults cover DEVOPS's 37 statuses across 17 issue
// types — see PLAN.md W4 for the full mapping.
// BotLogins (W2) — explicit list of GitHub / GitLab logins that
// should be reassigned to the synthetic `bot` member instead of
// landing under their raw login. Match is exact + case-folded; no
// suffix magic. The synthetic `bot` member is auto-created by the
// Jira sync.
BotLogins []string `mapstructure:"bot_logins" yaml:"bot_logins"`
// Statuses (W4) — configurable status → lane map for the sprint
// card. Each lane (todo/in_progress/done/cancelled) is optional;
// empty lanes inherit DefaultStatusLanes() which covers the live
// DEVOPS Jira workflow (37 statuses, English + Chinese).
Statuses StatusLanes `mapstructure:"statuses" yaml:"statuses"`
}

Expand Down
184 changes: 184 additions & 0 deletions roadmap-planner/backend/internal/contributions/bot.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
/*
Copyright 2026 The AlaudaDevops Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
*/

package contributions

import (
"context"
"fmt"
"strings"

"github.com/AlaudaDevops/toolbox/roadmap-planner/backend/internal/storage"
)

// BotSet is the W2 closed list of logins that should be folded into the
// synthetic `bot` member. Lookup is exact + case-folded; suffix or
// prefix patterns are NOT honored — per maintainer guidance, the
// allowlist is explicit and new bots are added to config.
//
// Use NewBotSet to build the set from a flat config slice. The zero
// value is a no-op (Contains always returns false), which is the
// upgrade path for installs that haven't curated the list yet.
type BotSet struct {
set map[string]struct{}
}

// NewBotSet returns a set populated from the configured list. Empty
// input yields a usable, no-op set.
func NewBotSet(logins []string) BotSet {
out := make(map[string]struct{}, len(logins))
for _, l := range logins {
k := normalizeBotLogin(l)
if k == "" {
continue
}
out[k] = struct{}{}
}
return BotSet{set: out}
}

// Contains reports whether the given raw login should be folded into
// the synthetic `bot` member. Lookup is case-folded so callers don't
// need to normalize before calling.
func (b BotSet) Contains(login string) bool {
if len(b.set) == 0 {
return false
}
_, ok := b.set[normalizeBotLogin(login)]
return ok
}

// Size returns the number of distinct configured bot logins. Useful
// for startup telemetry.
func (b BotSet) Size() int { return len(b.set) }

func normalizeBotLogin(s string) string {
return strings.ToLower(strings.TrimSpace(s))
}

// ConsolidateBots is the one-shot startup pass that retroactively
// applies the W2 bot-consolidation rules to rows ingested before this
// feature shipped. It walks `pr_reviews` and `pull_requests`, sets
// `is_bot` / `first_human_review_at` columns, and rewrites bot
// `author_id` / `reviewer_id` to the synthetic `bot` member id.
//
// Behavior is idempotent — every step uses a deterministic predicate
// over the configured bot list, so re-running on already-consolidated
// data is a no-op. Callers should run this once on startup *after*
// the migration applies the new columns and *before* the aggregator
// rebuilds, so the network-density panel reads the fresh values.
//
// Returns a summary of how many rows changed per step, for the
// startup log.
type ConsolidateBotsResult struct {
ReviewsMarkedBot int
PRsAuthorRewrite int
ReviewsAuthorRew int
PRsFirstHumanSet int
}

func ConsolidateBots(ctx context.Context, store storage.Store, bots BotSet) (ConsolidateBotsResult, error) {
var out ConsolidateBotsResult
if bots.Size() == 0 {
return out, nil
}
d, ok := store.(interface{ Dialect() storage.Dialect })
if !ok {
return out, fmt.Errorf("ConsolidateBots: store does not expose a Dialect")
}
dialect := d.Dialect()
db := store.DB()

// Build IN-clause args ONCE; reused across the four statements.
logins := make([]any, 0, bots.Size())
for k := range bots.set {
logins = append(logins, k)
}
if len(logins) == 0 {
return out, nil
}
placeholders := strings.Repeat("?, ", len(logins))
placeholders = strings.TrimSuffix(placeholders, ", ")

rebind := func(q string) string {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion (docs/readability): The rebind function uses dialect.Placeholder(n) assuming 1-based indexing. Consider adding a brief comment explaining this assumption to improve maintainability.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion (style/code-structure): The rebind function is complex and handles dialect-specific placeholder translation. Consider extracting this into a dedicated helper or adding more documentation about which dialects require this transformation.

if dialect.Placeholder(1) == "?" {
return q
}
var b []byte
n := 1
for i := 0; i < len(q); i++ {
if q[i] == '?' {
b = append(b, []byte(dialect.Placeholder(n))...)
n++
} else {
b = append(b, q[i])
}
}
return string(b)
}

// 1. Tag every existing review row by a configured bot login. This
// is the safe one to lead with: pure metadata flip, no FK touch.
res, err := db.ExecContext(ctx,
rebind(fmt.Sprintf(
`UPDATE pr_reviews SET is_bot = 1 WHERE is_bot = 0 AND LOWER(reviewer_login) IN (%s)`,
placeholders)),
logins...)
if err != nil {
return out, fmt.Errorf("backfill is_bot on pr_reviews: %w", err)
}
if n, err := res.RowsAffected(); err == nil {
out.ReviewsMarkedBot = int(n)
}

// 2. Reassign bot reviewer_id to the synthetic `bot` member id.
// Skip rows already pointing at `bot` (idempotency on rerun).
res, err = db.ExecContext(ctx,
rebind(fmt.Sprintf(
`UPDATE pr_reviews SET reviewer_id = 'bot' WHERE (reviewer_id IS NULL OR reviewer_id <> 'bot') AND LOWER(reviewer_login) IN (%s)`,
placeholders)),
logins...)
if err != nil {
return out, fmt.Errorf("backfill reviewer_id on pr_reviews: %w", err)
}
if n, err := res.RowsAffected(); err == nil {
out.ReviewsAuthorRew = int(n)
}

// 3. Reassign bot author_id on pull_requests.
res, err = db.ExecContext(ctx,
rebind(fmt.Sprintf(
`UPDATE pull_requests SET author_id = 'bot' WHERE (author_id IS NULL OR author_id <> 'bot') AND LOWER(author_login) IN (%s)`,
placeholders)),
logins...)
if err != nil {
return out, fmt.Errorf("backfill author_id on pull_requests: %w", err)
}
if n, err := res.RowsAffected(); err == nil {
out.PRsAuthorRewrite = int(n)
}

// 4. Populate first_human_review_at on pull_requests — `MIN(submitted_at)`
// over non-bot reviews. UPDATE-FROM-subquery flavor; runs once and
// is idempotent because subsequent passes either match the same
// minimum or NULL when no human reviews exist.
res, err = db.ExecContext(ctx, rebind(`
UPDATE pull_requests
SET first_human_review_at = (
SELECT MIN(rv.submitted_at)
FROM pr_reviews rv
WHERE rv.pr_id = pull_requests.id AND rv.is_bot = 0
)`))
if err != nil {
return out, fmt.Errorf("backfill first_human_review_at: %w", err)
}
if n, err := res.RowsAffected(); err == nil {
out.PRsFirstHumanSet = int(n)
}

return out, nil
}
Loading
Loading