From 2ba4576f90f93c103017822e6ac9c1fe078b05a8 Mon Sep 17 00:00:00 2001 From: Matija Stepanic Date: Thu, 25 Jun 2026 16:47:59 +0200 Subject: [PATCH 1/2] Add repository exclusion from contribution counts Adds the ability to exclude specific repositories (owner/name) from the commit/contribution totals used to rank users. The GraphQL query now fetches commitContributionsByRepository, and commits made to excluded repositories are subtracted from each user's total/public/private/commit counts. This keeps rankings representative by ignoring dataset/archive repositories whose automated commit volume would otherwise dominate a user's contribution count. - github: fetch per-repo commit breakdown and apply exclusions (pure, tested helpers) - presets: global ExcludedRepos list, folded into PresetChecksum so changing it regenerates every region page - main: add repeatable --exclude-repo flag (adds to the global list) - output: publish excluded_repos in the YAML so the site can show them transparently - tests: cover exclusion math, per-repo parsing and edge cases (none existed before) Co-Authored-By: Claude Opus 4.8 (1M context) --- github/github.go | 106 +++++++++++++++++++++++++++++++++++++++--- github/github_test.go | 95 +++++++++++++++++++++++++++++++++++++ main.go | 7 ++- output/output.go | 7 +++ presets.go | 14 ++++++ top/top.go | 3 +- 6 files changed, 224 insertions(+), 8 deletions(-) create mode 100644 github/github_test.go diff --git a/github/github.go b/github/github.go index 8ea1c36d69..fa7c7a1c59 100644 --- a/github/github.go +++ b/github/github.go @@ -65,6 +65,14 @@ func (client HTTPGithubClient) SearchUsers(query UserSearchQuery) (GithubSearchR users := []User{} userLogins := map[string]bool{} + // Repositories whose commit contributions should be ignored when ranking + // users (e.g. dataset/archive repos that would otherwise inflate counts). + // Matching is case-insensitive on the "owner/name" form. + excludeRepos := map[string]bool{} + for _, repo := range query.ExcludeRepos { + excludeRepos[strings.ToLower(repo)] = true + } + totalCount := 0 minFollowerCount := -1 maxPerQuery := 1000 @@ -111,7 +119,16 @@ Pages: }, totalCommitContributions, totalPullRequestContributions, - restrictedContributionsCount + restrictedContributionsCount, + commitContributionsByRepository(maxRepositories: 100) { + repository { + nameWithOwner, + isPrivate + }, + contributions { + totalCount + } + } } } }, @@ -205,6 +222,14 @@ Pages: commitsCount := int(contributionsCollection["totalCommitContributions"].(float64)) pullRequestsCount := int(contributionsCollection["totalPullRequestContributions"].(float64)) + repoContributions := parseRepoCommitContributions(contributionsCollection) + excludedPublic, excludedPrivate := repoExclusions(repoContributions, excludeRepos) + excludedTotal := excludedPublic + excludedPrivate + + contributionCount = clampZero(contributionCount - excludedTotal) + privateContributionCount = clampZero(privateContributionCount - excludedPrivate) + commitsCount = clampZero(commitsCount - excludedTotal) + user := User{ Login: login, AvatarURL: avatarURL, @@ -213,7 +238,7 @@ Pages: Organizations: organizations, FollowerCount: followerCount, ContributionCount: contributionCount, - PublicContributionCount: (contributionCount - privateContributionCount), + PublicContributionCount: clampZero(contributionCount - privateContributionCount), PrivateContributionCount: privateContributionCount, CommitsCount: commitsCount, PullRequestsCount: pullRequestsCount} @@ -235,6 +260,74 @@ Pages: TotalUserCount: totalUsersCount}, nil } +// RepoCommitContribution holds the number of commit contributions a user made +// to a single repository within the queried time window. +type RepoCommitContribution struct { + NameWithOwner string + IsPrivate bool + Commits int +} + +// parseRepoCommitContributions extracts the per-repository commit breakdown from +// a parsed contributionsCollection node. Missing/malformed entries are skipped. +func parseRepoCommitContributions(contributionsCollection map[string]interface{}) []RepoCommitContribution { + result := []RepoCommitContribution{} + rawRepos, ok := contributionsCollection["commitContributionsByRepository"].([]interface{}) + if !ok { + return result + } + for _, raw := range rawRepos { + node, ok := raw.(map[string]interface{}) + if !ok { + continue + } + repo, ok := node["repository"].(map[string]interface{}) + if !ok { + continue + } + nameWithOwner, _ := repo["nameWithOwner"].(string) + isPrivate, _ := repo["isPrivate"].(bool) + commits := 0 + if contribs, ok := node["contributions"].(map[string]interface{}); ok { + if total, ok := contribs["totalCount"].(float64); ok { + commits = int(total) + } + } + result = append(result, RepoCommitContribution{ + NameWithOwner: nameWithOwner, + IsPrivate: isPrivate, + Commits: commits, + }) + } + return result +} + +// repoExclusions sums the commit contributions belonging to excluded repositories, +// split by visibility so they can be subtracted from the right totals. The exclude +// set keys are expected to be lower-cased "owner/name" strings. +func repoExclusions(repos []RepoCommitContribution, exclude map[string]bool) (excludedPublic int, excludedPrivate int) { + if len(exclude) == 0 { + return 0, 0 + } + for _, repo := range repos { + if exclude[strings.ToLower(repo.NameWithOwner)] { + if repo.IsPrivate { + excludedPrivate += repo.Commits + } else { + excludedPublic += repo.Commits + } + } + } + return excludedPublic, excludedPrivate +} + +func clampZero(n int) int { + if n < 0 { + return 0 + } + return n +} + func strPropOrEmpty(obj map[string]interface{}, prop string) string { switch t := obj[prop].(type) { case string: @@ -290,10 +383,11 @@ type User struct { } type UserSearchQuery struct { - Q string - Sort string - Order string - MaxUsers int + Q string + Sort string + Order string + MaxUsers int + ExcludeRepos []string } type GithubSearchResults struct { diff --git a/github/github_test.go b/github/github_test.go new file mode 100644 index 0000000000..fa71eae262 --- /dev/null +++ b/github/github_test.go @@ -0,0 +1,95 @@ +package github + +import "testing" + +func buildExcludeSet(repos ...string) map[string]bool { + set := map[string]bool{} + for _, r := range repos { + set[r] = true + } + return set +} + +func TestRepoExclusionsEmptySet(t *testing.T) { + repos := []RepoCommitContribution{ + {NameWithOwner: "owner/repo", IsPrivate: false, Commits: 100}, + } + pub, priv := repoExclusions(repos, map[string]bool{}) + if pub != 0 || priv != 0 { + t.Fatalf("expected no exclusions, got public=%d private=%d", pub, priv) + } +} + +func TestRepoExclusionsPublicAndPrivate(t *testing.T) { + repos := []RepoCommitContribution{ + {NameWithOwner: "domovinatv/dataset.domovina.tv", IsPrivate: false, Commits: 27604}, + {NameWithOwner: "owner/private-archive", IsPrivate: true, Commits: 500}, + {NameWithOwner: "owner/real-project", IsPrivate: false, Commits: 120}, + } + exclude := buildExcludeSet("domovinatv/dataset.domovina.tv", "owner/private-archive") + + pub, priv := repoExclusions(repos, exclude) + if pub != 27604 { + t.Errorf("expected excludedPublic=27604, got %d", pub) + } + if priv != 500 { + t.Errorf("expected excludedPrivate=500, got %d", priv) + } +} + +func TestRepoExclusionsCaseInsensitive(t *testing.T) { + repos := []RepoCommitContribution{ + {NameWithOwner: "DomovinaTV/Dataset.Domovina.TV", IsPrivate: false, Commits: 42}, + } + // The exclude set is lower-cased by the caller; repoExclusions lower-cases the + // repository name before matching. + exclude := buildExcludeSet("domovinatv/dataset.domovina.tv") + + pub, priv := repoExclusions(repos, exclude) + if pub != 42 || priv != 0 { + t.Errorf("expected case-insensitive match (public=42), got public=%d private=%d", pub, priv) + } +} + +func TestParseRepoCommitContributions(t *testing.T) { + collection := map[string]interface{}{ + "commitContributionsByRepository": []interface{}{ + map[string]interface{}{ + "repository": map[string]interface{}{ + "nameWithOwner": "owner/repo", + "isPrivate": true, + }, + "contributions": map[string]interface{}{ + "totalCount": float64(13), + }, + }, + // malformed entry should be skipped, not panic + map[string]interface{}{"unexpected": "shape"}, + }, + } + + parsed := parseRepoCommitContributions(collection) + if len(parsed) != 1 { + t.Fatalf("expected 1 parsed repo, got %d", len(parsed)) + } + got := parsed[0] + if got.NameWithOwner != "owner/repo" || !got.IsPrivate || got.Commits != 13 { + t.Errorf("unexpected parsed contribution: %+v", got) + } +} + +func TestParseRepoCommitContributionsMissingKey(t *testing.T) { + parsed := parseRepoCommitContributions(map[string]interface{}{}) + if len(parsed) != 0 { + t.Fatalf("expected no contributions for missing key, got %d", len(parsed)) + } +} + +func TestClampZero(t *testing.T) { + if got := clampZero(-5); got != 0 { + t.Errorf("expected clampZero(-5)=0, got %d", got) + } + if got := clampZero(7); got != 7 { + t.Errorf("expected clampZero(7)=7, got %d", got) + } +} diff --git a/main.go b/main.go index c156355668..1a959edf31 100644 --- a/main.go +++ b/main.go @@ -24,6 +24,7 @@ func (i *arrayFlags) Set(value string) error { var locations arrayFlags var excludeLocations arrayFlags +var excludeRepos arrayFlags var presetTitle string var presetChecksum string @@ -37,8 +38,12 @@ func main() { listPresets := flag.Bool("list-presets", false, "List all available presets as CSV and exit immediately") flag.Var(&locations, "location", "Location to query") + flag.Var(&excludeRepos, "exclude-repo", "Repository (owner/name) whose commits are excluded from counts; repeatable") flag.Parse() + // The global ExcludedRepos list always applies; --exclude-repo adds to it. + excludeRepos = append(excludeRepos, ExcludedRepos...) + if *listPresets { fmt.Println("preset,title,definition_checksum") for name, _ := range PRESETS { @@ -67,7 +72,7 @@ func main() { log.Fatal("Unrecognized output format: ", *outputOpt) } - opts := top.Options{Token: *token, Locations: locations, ExcludeLocations: excludeLocations, Amount: *amount, ConsiderNum: *considerNum, PresetTitle: presetTitle, PresetChecksum: presetChecksum} + opts := top.Options{Token: *token, Locations: locations, ExcludeLocations: excludeLocations, ExcludeRepos: excludeRepos, Amount: *amount, ConsiderNum: *considerNum, PresetTitle: presetTitle, PresetChecksum: presetChecksum} data, err := top.GithubTop(opts) if err != nil { diff --git a/output/output.go b/output/output.go index e990019dd8..69c441b3bd 100644 --- a/output/output.go +++ b/output/output.go @@ -134,6 +134,13 @@ func YamlOutput(results github.GithubSearchResults, writer io.Writer, options to fmt.Fprintf(writer, "definition_checksum: %+v\n", options.PresetChecksum) } + if len(options.ExcludeRepos) > 0 { + fmt.Fprintln(writer, "excluded_repos:") + for _, repo := range options.ExcludeRepos { + fmt.Fprintf(writer, " - %+v\n", strconv.QuoteToASCII(repo)) + } + } + return nil } diff --git a/presets.go b/presets.go index 41a23084e9..803dcc6287 100644 --- a/presets.go +++ b/presets.go @@ -13,6 +13,17 @@ type QueryPreset struct { exclude []string } +// ExcludedRepos lists repositories whose commit contributions are ignored when +// ranking users, applied across every preset. This keeps the rankings +// representative by excluding dataset/archive repositories whose automated commit +// volume would otherwise dominate a user's contribution count. +// +// Entries are "owner/name" and matched case-insensitively. This list is published +// on every region page so the exclusions are transparent. +var ExcludedRepos = []string{ + "domovinatv/dataset.domovina.tv", +} + var PRESETS = map[string]QueryPreset{ "panama": QueryPreset{ title: "Panama", @@ -638,5 +649,8 @@ func PresetTitle(name string) string { func PresetChecksum(name string) string { hash := sha256.New() io.WriteString(hash, fmt.Sprintf("%+v", Preset(name))) + // Fold in the global repo-exclusion list so that changing it invalidates every + // preset's checksum, triggering a regeneration of all region pages. + io.WriteString(hash, fmt.Sprintf("excluded_repos:%+v", ExcludedRepos)) return fmt.Sprintf("%x", hash.Sum(nil)) } diff --git a/top/top.go b/top/top.go index 153a0a9e89..2febedfd17 100644 --- a/top/top.go +++ b/top/top.go @@ -24,7 +24,7 @@ func GithubTop(options Options) (github.GithubSearchResults, error) { } var client = github.NewGithubClient(net.TokenAuth(token)) - users, err := client.SearchUsers(github.UserSearchQuery{Q: query, Sort: "followers", Order: "desc", MaxUsers: options.ConsiderNum}) + users, err := client.SearchUsers(github.UserSearchQuery{Q: query, Sort: "followers", Order: "desc", MaxUsers: options.ConsiderNum, ExcludeRepos: options.ExcludeRepos}) if err != nil { return github.GithubSearchResults{}, err } @@ -35,6 +35,7 @@ type Options struct { Token string Locations []string ExcludeLocations []string + ExcludeRepos []string Amount int ConsiderNum int PresetTitle string From 8cdbe7e1e8c8c50e4930a6b0c55424730d49c843 Mon Sep 17 00:00:00 2001 From: Matija Stepanic Date: Thu, 25 Jun 2026 17:18:09 +0200 Subject: [PATCH 2/2] Expose per-user excluded contribution count Stores the commit contributions removed by excluded repositories on each User (ExcludedContributionCount) and emits it as 'excluded:' per user in the YAML output, so the site can show how much each user count was reduced and why they rank where they do. Co-Authored-By: Claude Opus 4.8 (1M context) --- github/github.go | 46 ++++++++++++++++++++++++---------------------- output/output.go | 2 ++ 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/github/github.go b/github/github.go index fa7c7a1c59..759e59f4eb 100644 --- a/github/github.go +++ b/github/github.go @@ -231,17 +231,18 @@ Pages: commitsCount = clampZero(commitsCount - excludedTotal) user := User{ - Login: login, - AvatarURL: avatarURL, - Name: name, - Company: company, - Organizations: organizations, - FollowerCount: followerCount, - ContributionCount: contributionCount, - PublicContributionCount: clampZero(contributionCount - privateContributionCount), - PrivateContributionCount: privateContributionCount, - CommitsCount: commitsCount, - PullRequestsCount: pullRequestsCount} + Login: login, + AvatarURL: avatarURL, + Name: name, + Company: company, + Organizations: organizations, + FollowerCount: followerCount, + ContributionCount: contributionCount, + PublicContributionCount: clampZero(contributionCount - privateContributionCount), + PrivateContributionCount: privateContributionCount, + CommitsCount: commitsCount, + PullRequestsCount: pullRequestsCount, + ExcludedContributionCount: excludedTotal} if !userLogins[login] { userLogins[login] = true @@ -369,17 +370,18 @@ func NewGithubClient(wrappers ...net.Wrapper) HTTPGithubClient { } type User struct { - Login string - AvatarURL string - Name string - Company string - Organizations []string - FollowerCount int - ContributionCount int - PublicContributionCount int - PrivateContributionCount int - CommitsCount int - PullRequestsCount int + Login string + AvatarURL string + Name string + Company string + Organizations []string + FollowerCount int + ContributionCount int + PublicContributionCount int + PrivateContributionCount int + CommitsCount int + PullRequestsCount int + ExcludedContributionCount int } type UserSearchQuery struct { diff --git a/output/output.go b/output/output.go index 69c441b3bd..90fb5bc098 100644 --- a/output/output.go +++ b/output/output.go @@ -78,6 +78,7 @@ func YamlOutput(results github.GithubSearchResults, writer io.Writer, options to login: %+v avatarUrl: %+v contributions: %+v + excluded: %+v company: %+v organizations: %+v `, @@ -86,6 +87,7 @@ func YamlOutput(results github.GithubSearchResults, writer io.Writer, options to strconv.QuoteToASCII(u.Login), u.AvatarURL, contributionCount, + u.ExcludedContributionCount, strconv.QuoteToASCII(u.Company), strconv.QuoteToASCII(strings.Join(u.Organizations, ","))) }