From 61402f83c5a799aebf14537f09475ae9494b2feb Mon Sep 17 00:00:00 2001
From: saagpatel
Date: Fri, 19 Jun 2026 22:40:28 -0700
Subject: [PATCH 01/13] feat(api-only): clone-free portfolio scoring from the
GitHub API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add an API-only scoring path so an arbitrary public GitHub user can be scored without cloning any repository — the engine behind the hosted 'paste your username' report.
- github_client: get_repo_tree (Git Trees API, recursive) + get_file_content (Contents API, base64); fail-soft with logging on unexpected statuses and a tree-truncation signal.
- api_checkout: materialize a sparse on-disk skeleton from the tree (dirs + presence files + curated file content), path-traversal + null-byte guarded; drop-in replacement for cloner.clone_workspace.
- api_only: score_repos_api_only / audit_user_api_only run the existing, unmodified 13-analyzer engine against the skeleton. Interactive 'fast' mode skips slow async stats endpoints (~10x faster on live scans).
OSS CLI and analyzers unchanged. New unit tests cover the client methods, materializer, and orchestrator; live-verified on a public user.
---
src/api_checkout.py | 175 ++++++++++++++++++++++++
src/api_only.py | 158 ++++++++++++++++++++++
src/github_client.py | 236 ++++++++++++++++++++++++++------
tests/test_api_checkout.py | 194 +++++++++++++++++++++++++++
tests/test_api_only.py | 220 ++++++++++++++++++++++++++++++
tests/test_github_client.py | 260 +++++++++++++++++++++++++++++++++---
6 files changed, 1186 insertions(+), 57 deletions(-)
create mode 100644 src/api_checkout.py
create mode 100644 src/api_only.py
create mode 100644 tests/test_api_checkout.py
create mode 100644 tests/test_api_only.py
diff --git a/src/api_checkout.py b/src/api_checkout.py
new file mode 100644
index 0000000..32546bb
--- /dev/null
+++ b/src/api_checkout.py
@@ -0,0 +1,175 @@
+"""Materialize a sparse, API-sourced repo skeleton for clone-free scoring.
+
+The audit engine's analyzers read a repo from the local filesystem. To score an
+arbitrary public GitHub user *without* cloning every repo (the hosted, multi-tenant
+path), this module reconstructs a sparse on-disk skeleton from the GitHub API:
+
+* one Git Trees API call yields every path → directories are created and files are
+ ``touch``-ed so presence-based analyzers (structure, testing, CI, docs, build)
+ see the real shape of the repo;
+* a bounded set of high-signal files (README, dependency manifests) are fetched via
+ the Contents API and written with real content, so content-based analyzers
+ (README quality, dependency counts, test-framework detection) still work.
+
+The existing analyzers run against this skeleton unmodified. ``materialize_api_workspace``
+mirrors ``cloner.clone_workspace`` exactly (context manager yielding ``{name: Path}``),
+so it is a drop-in replacement for the clone step.
+
+Materialization is sequential on purpose: it keeps API access well under GitHub's
+secondary rate limits (concurrent-request and points-per-minute caps) that a
+parallel burst across many repos would trip.
+"""
+
+from __future__ import annotations
+
+import logging
+import tempfile
+from contextlib import contextmanager
+from pathlib import Path
+from typing import TYPE_CHECKING, Callable, Generator
+
+from src.models import RepoMetadata
+
+if TYPE_CHECKING:
+ from src.github_client import GitHubClient
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_MAX_FILES = 5000
+DEFAULT_MAX_CONTENT_FILES = 20
+
+# Files whose *content* (not just presence) carries real scoring signal. Matched
+# case-insensitively by basename; anything starting with ``readme`` also qualifies.
+CONTENT_FILE_NAMES = {
+ "package.json",
+ "pyproject.toml",
+ "requirements.txt",
+ "setup.py",
+ "setup.cfg",
+ "pipfile",
+ "cargo.toml",
+ "go.mod",
+ "pom.xml",
+ "build.gradle",
+ "gemfile",
+ "composer.json",
+}
+
+
+def _is_content_file(path: str) -> bool:
+ base = path.rsplit("/", 1)[-1].lower()
+ return base.startswith("readme") or base in CONTENT_FILE_NAMES
+
+
+def _safe_target(dest: Path, rel: str) -> Path | None:
+ """Resolve ``rel`` under ``dest``, rejecting traversal/absolute escapes.
+
+ Tree paths come from arbitrary remote repos, so a malicious entry like
+ ``../../etc/passwd`` or ``/abs/evil`` must never resolve outside ``dest``.
+ """
+ rel = rel.strip()
+ if not rel or rel in (".", "..") or "\x00" in rel:
+ return None
+ candidate = (dest / rel).resolve()
+ dest_resolved = dest.resolve()
+ if candidate == dest_resolved:
+ return None
+ if dest_resolved not in candidate.parents:
+ return None
+ return candidate
+
+
+def materialize_api_checkout(
+ metadata: RepoMetadata,
+ client: "GitHubClient",
+ dest: Path,
+ *,
+ max_files: int = DEFAULT_MAX_FILES,
+ max_content_files: int = DEFAULT_MAX_CONTENT_FILES,
+) -> Path:
+ """Build a sparse skeleton of one repo under ``dest`` from the GitHub API.
+
+ Returns ``dest``. If the repo tree is unavailable (empty repo, missing ref,
+ or an API error), ``dest`` is created empty so downstream analyzers score it
+ as a near-empty repo rather than crashing.
+ """
+ dest = Path(dest)
+ dest.mkdir(parents=True, exist_ok=True)
+
+ owner, _, repo = metadata.full_name.partition("/")
+ if not owner or not repo:
+ logger.warning(
+ "Cannot materialize %r: full_name is not 'owner/repo'",
+ metadata.full_name,
+ )
+ return dest
+
+ tree = client.get_repo_tree(owner, repo, metadata.default_branch)
+ if not tree.get("available"):
+ return dest
+ if tree.get("truncated"):
+ logger.warning(
+ "Tree truncated for %s — skeleton is incomplete", metadata.full_name
+ )
+
+ for rel in tree.get("dirs", []):
+ target = _safe_target(dest, rel)
+ if target is not None:
+ target.mkdir(parents=True, exist_ok=True)
+
+ content_budget = max_content_files
+ for rel in tree.get("files", [])[:max_files]:
+ target = _safe_target(dest, rel)
+ if target is None:
+ continue
+ target.parent.mkdir(parents=True, exist_ok=True)
+ text = ""
+ if content_budget > 0 and _is_content_file(rel):
+ fetched = client.get_file_content(
+ owner, repo, rel, ref=metadata.default_branch
+ )
+ if fetched is not None:
+ text = fetched
+ content_budget -= 1
+ target.write_text(text, encoding="utf-8")
+
+ return dest
+
+
+@contextmanager
+def materialize_api_workspace(
+ repos: list[RepoMetadata],
+ client: "GitHubClient",
+ *,
+ on_progress: Callable[[int, int, str], None] | None = None,
+ on_error: Callable[[str, str], None] | None = None,
+ max_files: int = DEFAULT_MAX_FILES,
+ max_content_files: int = DEFAULT_MAX_CONTENT_FILES,
+) -> Generator[dict[str, Path], None, None]:
+ """Materialize API skeletons for many repos into a session-unique temp dir.
+
+ Drop-in replacement for ``cloner.clone_workspace``: yields a dict mapping
+ repo name → skeleton path. A repo that fails to materialize is skipped with
+ a warning so one bad repo never aborts a portfolio scan.
+ """
+ with tempfile.TemporaryDirectory(prefix="audit-api-") as tmpdir:
+ root = Path(tmpdir)
+ workspace: dict[str, Path] = {}
+ total = len(repos)
+ for index, repo in enumerate(repos, 1):
+ if on_progress:
+ on_progress(index, total, repo.name)
+ try:
+ dest = materialize_api_checkout(
+ repo,
+ client,
+ root / repo.name,
+ max_files=max_files,
+ max_content_files=max_content_files,
+ )
+ workspace[repo.name] = dest
+ except Exception as exc: # noqa: BLE001 — one bad repo must not abort the scan
+ logger.warning("API checkout failed for %s: %s", repo.name, exc)
+ if on_error:
+ on_error(repo.name, str(exc))
+ yield workspace
diff --git a/src/api_only.py b/src/api_only.py
new file mode 100644
index 0000000..46fdbac
--- /dev/null
+++ b/src/api_only.py
@@ -0,0 +1,158 @@
+"""Clone-free portfolio scoring from the GitHub API alone.
+
+Lists a user's repos, materializes a sparse API-sourced skeleton for each
+(``api_checkout``), runs the *existing, unmodified* analyzer engine against the
+skeleton, and scores with ``scorer.score_repo`` — producing a portfolio report
+without cloning any repository.
+
+This is the engine behind the hosted "paste your GitHub username" report. The
+result is honestly labelled API-only: structure / testing / CI / docs / README /
+dependency presence are recovered from the API, but deep code-quality,
+secret-scanning, and dependency-age signals require the full local scan (the OSS
+CLI). Security scoring runs offline by default because GitHub Advanced Security
+endpoints are not readable on other users' repositories.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, cast
+
+from src.analyzers import run_all_analyzers
+from src.api_checkout import materialize_api_workspace
+from src.models import RepoAudit, RepoMetadata
+from src.scorer import score_repo
+
+if TYPE_CHECKING:
+ from src.github_client import GitHubClient
+
+logger = logging.getLogger(__name__)
+
+API_ONLY_MODE = "api_only"
+API_ONLY_FIDELITY_NOTE = (
+ "API-only scan: scored from GitHub API metadata and repository structure "
+ "without cloning. Deep code-quality, secret-scanning, and dependency-age "
+ "signals require the full local scan (OSS CLI)."
+)
+
+
+class _InteractiveClient:
+ """Wrap a GitHubClient to skip GitHub's async-computed ``stats/*`` endpoints.
+
+ ``stats/contributors``, ``stats/commit_activity`` and ``stats/participation``
+ return ``202 Accepted`` while GitHub computes them, and the client retries
+ with multi-second backoff — fine for a batch CLI run, far too slow for an
+ interactive hosted report (it dominated a 5-repo live scan at ~100s). The
+ analyzers already treat these as "unavailable" (empty list / dict), so scores
+ degrade gracefully rather than break. Every other method delegates unchanged.
+ """
+
+ def __init__(self, inner: GitHubClient) -> None:
+ self._inner = inner
+
+ def get_contributor_stats(self, *args, **kwargs) -> list:
+ return []
+
+ def get_commit_activity(self, *args, **kwargs) -> list:
+ return []
+
+ def get_participation_stats(self, *args, **kwargs) -> dict:
+ return {}
+
+ def __getattr__(self, name: str):
+ return getattr(self._inner, name)
+
+
+def _portfolio_lang_freq(repos: list[RepoMetadata]) -> dict[str, float]:
+ """Fraction of repos using each primary language (for novelty discounting)."""
+ counts: dict[str, int] = {}
+ for repo in repos:
+ if repo.language:
+ counts[repo.language] = counts.get(repo.language, 0) + 1
+ total = sum(counts.values())
+ if not total:
+ return {}
+ return {lang: n / total for lang, n in counts.items()}
+
+
+def score_repos_api_only(
+ repos: list[RepoMetadata],
+ client: GitHubClient,
+ *,
+ portfolio_lang_freq: dict[str, float] | None = None,
+ security_offline: bool = True,
+ fast: bool = True,
+) -> list[RepoAudit]:
+ """Score a list of repos from the API alone, returning one audit per repo.
+
+ ``fast`` (default) skips GitHub's slow async ``stats/*`` endpoints so the
+ scan stays interactive; pass ``fast=False`` for a thorough scan that includes
+ contributor/commit-activity stats. A repo that fails to materialize or score
+ is skipped with a warning so one bad repo never aborts the portfolio scan.
+ """
+ if portfolio_lang_freq is None:
+ portfolio_lang_freq = _portfolio_lang_freq(repos)
+
+ scan_client = cast("GitHubClient", _InteractiveClient(client)) if fast else client
+
+ audits: list[RepoAudit] = []
+ with materialize_api_workspace(repos, scan_client) as workspace:
+ for repo in repos:
+ repo_path = workspace.get(repo.name)
+ if repo_path is None:
+ continue
+ try:
+ results = run_all_analyzers(repo_path, repo, scan_client)
+ audit = score_repo(
+ repo,
+ results,
+ repo_path=repo_path,
+ portfolio_lang_freq=portfolio_lang_freq,
+ github_client=scan_client,
+ security_offline=security_offline,
+ )
+ audits.append(audit)
+ except Exception as exc: # noqa: BLE001 — one bad repo must not abort the scan
+ logger.warning("API-only scoring failed for %s: %s", repo.name, exc)
+ return audits
+
+
+@dataclass
+class ApiOnlyReport:
+ """A clone-free portfolio report, ready for JSON serialization."""
+
+ username: str
+ audits: list[RepoAudit]
+ mode: str = API_ONLY_MODE
+ fidelity_note: str = API_ONLY_FIDELITY_NOTE
+
+ def to_dict(self) -> dict:
+ return {
+ "username": self.username,
+ "mode": self.mode,
+ "fidelity_note": self.fidelity_note,
+ "repo_count": len(self.audits),
+ "repos": [audit.to_dict() for audit in self.audits],
+ }
+
+
+def audit_user_api_only(
+ username: str,
+ client: GitHubClient,
+ *,
+ max_repos: int | None = None,
+ fast: bool = True,
+) -> ApiOnlyReport:
+ """List a user's repos and score them clone-free via the GitHub API."""
+ raw = client.list_repos(username)
+ if max_repos is not None:
+ raw = raw[:max_repos]
+
+ # API-only fidelity tradeoff: scored from the repo-list payload's primary
+ # `language` without spending a get_languages() call per repo, so
+ # `metadata.languages` (byte breakdown) stays empty — language-fraction
+ # signals are intentionally absent, not silently wrong.
+ repos = [RepoMetadata.from_api_response(data) for data in raw]
+ audits = score_repos_api_only(repos, client, fast=fast)
+ return ApiOnlyReport(username=username, audits=audits)
diff --git a/src/github_client.py b/src/github_client.py
index 7790dbb..8467906 100644
--- a/src/github_client.py
+++ b/src/github_client.py
@@ -1,5 +1,6 @@
from __future__ import annotations
+import base64
import logging
import re
import sys
@@ -23,6 +24,11 @@
}
REST_API_VERSION = "2026-03-10"
EXPECTED_SECURITY_ENDPOINT_UNAVAILABLE_STATUSES = {403, 404}
+# Statuses that mean "no readable tree" (missing/private repo, empty repo, gone,
+# legal hold) — silent fail-soft. Anything else (auth, rate-limit, 5xx) is logged.
+EXPECTED_TREE_UNAVAILABLE_STATUSES = {404, 409, 410, 451}
+# Contents API: 404 = missing file (expected). Other statuses are logged.
+EXPECTED_CONTENT_UNAVAILABLE_STATUSES = {404}
class GitHubClientError(Exception):
@@ -104,7 +110,9 @@ def _request_method(
json_body: dict | list | None = None,
) -> requests.Response:
"""Make a non-GET request with the same rate-limit handling."""
- response = self.session.request(method, url, params=params, json=json_body, timeout=30)
+ response = self.session.request(
+ method, url, params=params, json=json_body, timeout=30
+ )
self._check_rate_limit(response)
response.raise_for_status()
return response
@@ -172,7 +180,9 @@ def _fetch_json(self, url: str, params: dict | None = None) -> object:
return data
- def _fetch_json_with_202_retry(self, url: str, params: dict | None = None) -> object:
+ def _fetch_json_with_202_retry(
+ self, url: str, params: dict | None = None
+ ) -> object:
"""Fetch JSON with 202 retry, checking cache first."""
if self.cache:
cached = self.cache.get(url, params)
@@ -216,9 +226,13 @@ def get_community_profile(self, owner: str, repo: str) -> dict:
Single API call returns presence of all health files.
"""
try:
- return self._fetch_json(f"{API_BASE}/repos/{owner}/{repo}/community/profile")
+ return self._fetch_json(
+ f"{API_BASE}/repos/{owner}/{repo}/community/profile"
+ )
except requests.HTTPError as exc:
- logger.warning("Failed to fetch community profile for %s/%s: %s", owner, repo, exc)
+ logger.warning(
+ "Failed to fetch community profile for %s/%s: %s", owner, repo, exc
+ )
return {}
def get_participation_stats(self, owner: str, repo: str) -> dict:
@@ -231,7 +245,9 @@ def get_participation_stats(self, owner: str, repo: str) -> dict:
f"{API_BASE}/repos/{owner}/{repo}/stats/participation"
)
except requests.HTTPError as exc:
- logger.warning("Failed to fetch participation for %s/%s: %s", owner, repo, exc)
+ logger.warning(
+ "Failed to fetch participation for %s/%s: %s", owner, repo, exc
+ )
return {}
def get_authenticated_user(self) -> str | None:
@@ -260,7 +276,9 @@ def _repo_list_cache_scope(self, username: str) -> str:
def list_repos(self, username: str) -> list[dict]:
"""Fetch all repos for a user. Uses /user/repos for the authenticated user."""
# Check cache for the complete repo list
- cache_key = f"{API_BASE}/list_repos/{username}/{self._repo_list_cache_scope(username)}"
+ cache_key = (
+ f"{API_BASE}/list_repos/{username}/{self._repo_list_cache_scope(username)}"
+ )
if self.cache:
cached = self.cache.get(cache_key)
if cached is not None:
@@ -294,7 +312,9 @@ def get_languages(self, owner: str, repo: str) -> dict[str, int]:
logger.warning("Failed to fetch languages for %s/%s: %s", owner, repo, exc)
return {}
- def get_releases(self, owner: str, repo: str, per_page: int = 10) -> tuple[list[dict], bool]:
+ def get_releases(
+ self, owner: str, repo: str, per_page: int = 10
+ ) -> tuple[list[dict], bool]:
"""Fetch releases for a repo.
Returns a (releases, available) tuple.
@@ -311,7 +331,9 @@ def get_releases(self, owner: str, repo: str, per_page: int = 10) -> tuple[list[
except requests.HTTPError as exc:
status = self._http_error_status(exc)
if status == 404:
- logger.debug("Releases endpoint unavailable for %s/%s (404)", owner, repo)
+ logger.debug(
+ "Releases endpoint unavailable for %s/%s (404)", owner, repo
+ )
return [], False
logger.warning("Failed to fetch releases for %s/%s: %s", owner, repo, exc)
return [], True
@@ -377,7 +399,9 @@ def get_repo_security_and_analysis(self, owner: str, repo: str) -> dict:
}
except requests.HTTPError as exc:
status = self._http_error_status(exc)
- logger.warning("Failed to fetch repo security metadata for %s/%s: %s", owner, repo, exc)
+ logger.warning(
+ "Failed to fetch repo security metadata for %s/%s: %s", owner, repo, exc
+ )
return {
"available": False,
"http_status": status,
@@ -425,9 +449,7 @@ def get_code_scanning_alert_count(self, owner: str, repo: str) -> dict:
for alert in alerts:
rule = alert.get("rule", {}) if isinstance(alert, dict) else {}
raw = (
- rule.get("security_severity_level")
- or rule.get("severity")
- or ""
+ rule.get("security_severity_level") or rule.get("severity") or ""
).lower()
rule_id = str(rule.get("id") or "")
if raw == "critical":
@@ -472,7 +494,9 @@ def get_code_scanning_alert_count(self, owner: str, repo: str) -> dict:
def get_sbom_exportability(self, owner: str, repo: str) -> dict:
"""Check whether the SBOM export endpoint is available for a repo."""
try:
- data = self._fetch_json(f"{API_BASE}/repos/{owner}/{repo}/dependency-graph/sbom")
+ data = self._fetch_json(
+ f"{API_BASE}/repos/{owner}/{repo}/dependency-graph/sbom"
+ )
payload = data if isinstance(data, dict) else {}
packages = payload.get("sbom", {}).get("packages", [])
return {
@@ -482,7 +506,9 @@ def get_sbom_exportability(self, owner: str, repo: str) -> dict:
}
except requests.HTTPError as exc:
status = self._http_error_status(exc)
- logger.warning("Failed to fetch SBOM exportability for %s/%s: %s", owner, repo, exc)
+ logger.warning(
+ "Failed to fetch SBOM exportability for %s/%s: %s", owner, repo, exc
+ )
return {
"available": False,
"http_status": status,
@@ -531,7 +557,9 @@ def get_dependency_sbom(self, owner: str, repo: str) -> dict:
logger.warning("Failed to fetch SBOM for %s/%s: %s", owner, repo, exc)
return {"available": False, "http_status": status, "reason": str(exc)}
except requests.RequestException as exc:
- logger.warning("Network error fetching SBOM for %s/%s: %s", owner, repo, exc)
+ logger.warning(
+ "Network error fetching SBOM for %s/%s: %s", owner, repo, exc
+ )
return {"available": False, "http_status": None, "reason": str(exc)}
payload = data if isinstance(data, dict) else {}
@@ -662,6 +690,90 @@ def get_file_sha(
return None
raise
+ def get_repo_tree(
+ self,
+ owner: str,
+ repo: str,
+ ref: str,
+ ) -> dict:
+ """List every file and directory path in a repo via the Git Trees API.
+
+ A single recursive call returns the whole tree, powering clone-free
+ structure / testing / CI / docs presence signals for API-only scoring.
+ Fails soft (``available=False``) when the tree is unreadable (missing or
+ private repo, empty repo, gone). Unexpected statuses (auth, rate-limit,
+ 5xx) are logged so a bad token never silently scores every repo as empty.
+ """
+ url = f"{API_BASE}/repos/{owner}/{repo}/git/trees/{ref}"
+ try:
+ data = self._fetch_json(url, params={"recursive": "1"})
+ except requests.HTTPError as exc:
+ status = self._http_error_status(exc)
+ if status not in EXPECTED_TREE_UNAVAILABLE_STATUSES:
+ logger.warning(
+ "Failed to fetch tree for %s/%s (HTTP %s)", owner, repo, status
+ )
+ return {"available": False, "files": [], "dirs": [], "truncated": False}
+
+ entries = data.get("tree", []) if isinstance(data, dict) else []
+ files = [
+ e["path"] for e in entries if e.get("type") == "blob" and e.get("path")
+ ]
+ dirs = [e["path"] for e in entries if e.get("type") == "tree" and e.get("path")]
+ truncated = (
+ bool(data.get("truncated", False)) if isinstance(data, dict) else False
+ )
+ return {
+ "available": True,
+ "files": files,
+ "dirs": dirs,
+ "truncated": truncated,
+ }
+
+ def get_file_content(
+ self,
+ owner: str,
+ repo: str,
+ path: str,
+ *,
+ ref: str | None = None,
+ max_bytes: int = 1_000_000,
+ ) -> str | None:
+ """Fetch and base64-decode a single file's text via the Contents API.
+
+ Returns ``None`` when the file is missing (404), larger than
+ ``max_bytes`` (the Contents API returns empty content above ~1MB and
+ directs callers to the blob API), non-base64, or not valid UTF-8 text.
+ """
+ url = f"{API_BASE}/repos/{owner}/{repo}/contents/{path}"
+ params: dict = {}
+ if ref is not None:
+ params["ref"] = ref
+ try:
+ data = self._fetch_json(url, params=params or None)
+ except requests.HTTPError as exc:
+ status = self._http_error_status(exc)
+ if status not in EXPECTED_CONTENT_UNAVAILABLE_STATUSES:
+ logger.warning(
+ "Failed to fetch %s from %s/%s (HTTP %s)", path, owner, repo, status
+ )
+ return None
+
+ if not isinstance(data, dict) or data.get("type") != "file":
+ return None
+ if int(data.get("size", 0)) > max_bytes:
+ return None
+ if data.get("encoding") != "base64":
+ return None
+ try:
+ decoded = base64.b64decode(data.get("content") or "")
+ except ValueError:
+ return None
+ try:
+ return decoded.decode("utf-8")
+ except UnicodeDecodeError:
+ return None
+
def update_repo_file(
self,
owner: str,
@@ -690,14 +802,18 @@ def update_repo_file(
}
except requests.HTTPError as exc:
status = self._http_error_status(exc)
- logger.warning("Failed to update file %s in %s/%s: %s", path, owner, repo, exc)
+ logger.warning(
+ "Failed to update file %s in %s/%s: %s", path, owner, repo, exc
+ )
return {
"ok": False,
"http_status": status,
"sha": "",
}
- def list_repo_issues(self, owner: str, repo: str, state: str = "open") -> list[dict]:
+ def list_repo_issues(
+ self, owner: str, repo: str, state: str = "open"
+ ) -> list[dict]:
"""List repository issues for managed issue reconciliation."""
try:
return self._paginate(
@@ -733,7 +849,9 @@ def create_issue(self, owner: str, repo: str, payload: dict) -> dict:
"node_id": None,
}
- def update_issue(self, owner: str, repo: str, issue_number: int, payload: dict) -> dict:
+ def update_issue(
+ self, owner: str, repo: str, issue_number: int, payload: dict
+ ) -> dict:
"""Update an existing managed issue."""
try:
response = self._request_method(
@@ -752,7 +870,11 @@ def update_issue(self, owner: str, repo: str, issue_number: int, payload: dict)
except requests.HTTPError as exc:
status = self._http_error_status(exc)
logger.warning(
- "Failed to update issue %s for %s/%s: %s", issue_number, owner, repo, exc
+ "Failed to update issue %s for %s/%s: %s",
+ issue_number,
+ owner,
+ repo,
+ exc,
)
return {
"ok": False,
@@ -764,7 +886,9 @@ def update_issue(self, owner: str, repo: str, issue_number: int, payload: dict)
def get_repo_custom_property_values(self, owner: str, repo: str) -> dict:
"""Get current repository custom property values when available."""
try:
- data = self._fetch_json(f"{API_BASE}/repos/{owner}/{repo}/properties/values")
+ data = self._fetch_json(
+ f"{API_BASE}/repos/{owner}/{repo}/properties/values"
+ )
values = {}
if isinstance(data, list):
for item in data:
@@ -775,7 +899,9 @@ def get_repo_custom_property_values(self, owner: str, repo: str) -> dict:
}
except requests.HTTPError as exc:
status = self._http_error_status(exc)
- logger.warning("Failed to fetch custom properties for %s/%s: %s", owner, repo, exc)
+ logger.warning(
+ "Failed to fetch custom properties for %s/%s: %s", owner, repo, exc
+ )
return {
"available": False,
"http_status": status,
@@ -792,7 +918,9 @@ def list_org_custom_properties(self, owner: str) -> dict:
}
except requests.HTTPError as exc:
status = self._http_error_status(exc)
- logger.warning("Failed to list custom property schema for %s: %s", owner, exc)
+ logger.warning(
+ "Failed to list custom property schema for %s: %s", owner, exc
+ )
return {
"available": False,
"http_status": status,
@@ -818,7 +946,9 @@ def update_repo_custom_property_values(
for item in schema.get("properties", [])
if item.get("property_name")
}
- to_update = {name: value for name, value in properties.items() if name in allowed}
+ to_update = {
+ name: value for name, value in properties.items() if name in allowed
+ }
before = self.get_repo_custom_property_values(owner, repo)
if not to_update:
return {
@@ -831,7 +961,8 @@ def update_repo_custom_property_values(
payload = {
"properties": [
- {"property_name": name, "value": value} for name, value in to_update.items()
+ {"property_name": name, "value": value}
+ for name, value in to_update.items()
]
}
try:
@@ -851,7 +982,9 @@ def update_repo_custom_property_values(
}
except requests.HTTPError as exc:
status = self._http_error_status(exc)
- logger.warning("Failed to update custom properties for %s/%s: %s", owner, repo, exc)
+ logger.warning(
+ "Failed to update custom properties for %s/%s: %s", owner, repo, exc
+ )
return {
"ok": False,
"status": "failed",
@@ -920,10 +1053,15 @@ def get_project_v2(self, owner: str, project_number: int) -> dict:
}
"""
try:
- data = self._graphql_query(query, {"login": owner, "number": int(project_number)})
+ data = self._graphql_query(
+ query, {"login": owner, "number": int(project_number)}
+ )
except (requests.HTTPError, GitHubClientError) as exc:
logger.warning(
- "Failed to resolve GitHub Project %s #%s: %s", owner, project_number, exc
+ "Failed to resolve GitHub Project %s #%s: %s",
+ owner,
+ project_number,
+ exc,
)
return {
"available": False,
@@ -932,9 +1070,9 @@ def get_project_v2(self, owner: str, project_number: int) -> dict:
"fields": {},
}
- project = (data.get("user") or {}).get("projectV2") or (data.get("organization") or {}).get(
- "projectV2"
- )
+ project = (data.get("user") or {}).get("projectV2") or (
+ data.get("organization") or {}
+ ).get("projectV2")
if not isinstance(project, dict):
return {
"available": False,
@@ -1006,7 +1144,9 @@ def find_project_v2_item_by_issue(
after: str | None = None
try:
while True:
- data = self._graphql_query(query, {"projectId": project_id, "after": after})
+ data = self._graphql_query(
+ query, {"projectId": project_id, "after": after}
+ )
items = ((data.get("node") or {}).get("items") or {}).get("nodes") or []
for item in items:
content = (item or {}).get("content") or {}
@@ -1021,13 +1161,17 @@ def find_project_v2_item_by_issue(
"issue_url": content.get("url", ""),
},
}
- page_info = ((data.get("node") or {}).get("items") or {}).get("pageInfo") or {}
+ page_info = ((data.get("node") or {}).get("items") or {}).get(
+ "pageInfo"
+ ) or {}
if not page_info.get("hasNextPage"):
break
after = page_info.get("endCursor")
except (requests.HTTPError, GitHubClientError) as exc:
logger.warning(
- "Failed to inspect GitHub Project item for issue %s: %s", issue_node_id, exc
+ "Failed to inspect GitHub Project item for issue %s: %s",
+ issue_node_id,
+ exc,
)
return {"available": False, "item": None}
return {"available": True, "item": None}
@@ -1063,7 +1207,9 @@ def find_project_v2_item_by_id(self, project_id: str, item_id: str) -> dict:
after: str | None = None
try:
while True:
- data = self._graphql_query(query, {"projectId": project_id, "after": after})
+ data = self._graphql_query(
+ query, {"projectId": project_id, "after": after}
+ )
items = ((data.get("node") or {}).get("items") or {}).get("nodes") or []
for item in items:
if (item or {}).get("id") == item_id:
@@ -1078,7 +1224,9 @@ def find_project_v2_item_by_id(self, project_id: str, item_id: str) -> dict:
"issue_url": content.get("url", ""),
},
}
- page_info = ((data.get("node") or {}).get("items") or {}).get("pageInfo") or {}
+ page_info = ((data.get("node") or {}).get("items") or {}).get(
+ "pageInfo"
+ ) or {}
if not page_info.get("hasNextPage"):
break
after = page_info.get("endCursor")
@@ -1110,7 +1258,10 @@ def add_issue_to_project_v2(self, project_id: str, issue_node_id: str) -> dict:
}
except (requests.HTTPError, GitHubClientError) as exc:
logger.warning(
- "Failed to add issue %s to project %s: %s", issue_node_id, project_id, exc
+ "Failed to add issue %s to project %s: %s",
+ issue_node_id,
+ project_id,
+ exc,
)
return {"ok": False, "status": "failed", "item_id": ""}
@@ -1175,7 +1326,10 @@ def update_project_v2_item_field(
return {"ok": True, "status": "updated"}
except (requests.HTTPError, GitHubClientError) as exc:
logger.warning(
- "Failed to update project field %s on item %s: %s", field_id, item_id, exc
+ "Failed to update project field %s on item %s: %s",
+ field_id,
+ item_id,
+ exc,
)
return {"ok": False, "status": "failed"}
@@ -1215,7 +1369,9 @@ def get_commit_activity(self, owner: str, repo: str) -> list[dict]:
)
return data if isinstance(data, list) else []
except requests.HTTPError as exc:
- logger.warning("Failed to fetch commit activity for %s/%s: %s", owner, repo, exc)
+ logger.warning(
+ "Failed to fetch commit activity for %s/%s: %s", owner, repo, exc
+ )
return []
def get_contributor_stats(self, owner: str, repo: str) -> list[dict]:
@@ -1229,7 +1385,9 @@ def get_contributor_stats(self, owner: str, repo: str) -> list[dict]:
)
return data if isinstance(data, list) else []
except requests.HTTPError as exc:
- logger.warning("Failed to fetch contributor stats for %s/%s: %s", owner, repo, exc)
+ logger.warning(
+ "Failed to fetch contributor stats for %s/%s: %s", owner, repo, exc
+ )
return []
def get_repo_metadata(
diff --git a/tests/test_api_checkout.py b/tests/test_api_checkout.py
new file mode 100644
index 0000000..63e52f7
--- /dev/null
+++ b/tests/test_api_checkout.py
@@ -0,0 +1,194 @@
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from pathlib import Path
+
+from src.api_checkout import materialize_api_checkout, materialize_api_workspace
+from src.models import RepoMetadata
+
+
+def _meta(name: str = "demo", full_name: str = "octocat/demo") -> RepoMetadata:
+ dt = datetime(2020, 1, 1, tzinfo=timezone.utc)
+ return RepoMetadata(
+ name=name,
+ full_name=full_name,
+ description="d",
+ language="Python",
+ languages={"Python": 100},
+ private=False,
+ fork=False,
+ archived=False,
+ created_at=dt,
+ updated_at=dt,
+ pushed_at=dt,
+ default_branch="main",
+ stars=1,
+ forks=0,
+ open_issues=0,
+ size_kb=10,
+ html_url="https://example/x",
+ clone_url="https://example/x.git",
+ topics=[],
+ )
+
+
+class _FakeClient:
+ """Duck-typed stand-in for GitHubClient — no HTTP."""
+
+ def __init__(self, tree: dict, contents: dict[str, str] | None = None) -> None:
+ self._tree = tree
+ self._contents = contents or {}
+ self.content_requests: list[str] = []
+
+ def get_repo_tree(self, owner: str, repo: str, ref: str) -> dict:
+ return self._tree
+
+ def get_file_content(
+ self,
+ owner: str,
+ repo: str,
+ path: str,
+ *,
+ ref: str | None = None,
+ max_bytes: int = 1_000_000,
+ ) -> str | None:
+ self.content_requests.append(path)
+ return self._contents.get(path)
+
+
+def test_materialize_creates_skeleton_dirs_and_files(tmp_path):
+ tree = {
+ "available": True,
+ "truncated": False,
+ "files": ["README.md", "src/main.py", "tests/test_main.py"],
+ "dirs": ["src", "tests"],
+ }
+ client = _FakeClient(tree, contents={"README.md": "# Demo\nHello\n"})
+ dest = tmp_path / "demo"
+
+ result = materialize_api_checkout(_meta(), client, dest)
+
+ assert result == dest
+ assert (dest / "src").is_dir()
+ assert (dest / "tests").is_dir()
+ assert (dest / "src" / "main.py").is_file()
+ assert (dest / "tests" / "test_main.py").is_file()
+
+
+def test_curated_content_files_are_written_with_real_content(tmp_path):
+ tree = {
+ "available": True,
+ "truncated": False,
+ "files": ["README.md", "pyproject.toml", "src/main.py"],
+ "dirs": ["src"],
+ }
+ client = _FakeClient(
+ tree,
+ contents={
+ "README.md": "# Title\n\nLong readme body.\n",
+ "pyproject.toml": "[project]\nname='demo'\n",
+ },
+ )
+ dest = tmp_path / "demo"
+
+ materialize_api_checkout(_meta(), client, dest)
+
+ assert (dest / "README.md").read_text() == "# Title\n\nLong readme body.\n"
+ assert "name='demo'" in (dest / "pyproject.toml").read_text()
+ # Source files are presence-only (empty) — never content-fetched.
+ assert (dest / "src" / "main.py").read_text() == ""
+ assert "src/main.py" not in client.content_requests
+
+
+def test_unavailable_tree_yields_empty_dir(tmp_path):
+ client = _FakeClient(
+ {"available": False, "files": [], "dirs": [], "truncated": False}
+ )
+ dest = tmp_path / "empty"
+
+ result = materialize_api_checkout(_meta(), client, dest)
+
+ assert result == dest
+ assert dest.is_dir()
+ assert list(dest.iterdir()) == []
+
+
+def test_path_traversal_entries_are_rejected(tmp_path):
+ tree = {
+ "available": True,
+ "truncated": False,
+ "files": ["../escape.txt", "/abs/evil.txt", "ok.py"],
+ "dirs": ["../evildir"],
+ }
+ client = _FakeClient(tree)
+ dest = tmp_path / "demo"
+
+ materialize_api_checkout(_meta(), client, dest)
+
+ # Nothing escaped the destination directory.
+ assert not (tmp_path / "escape.txt").exists()
+ assert not Path("/abs/evil.txt").exists()
+ assert not (tmp_path / "evildir").exists()
+ # The safe file still materialized.
+ assert (dest / "ok.py").is_file()
+
+
+def test_max_files_cap_is_respected(tmp_path):
+ files = [f"f{i}.py" for i in range(50)]
+ tree = {"available": True, "truncated": False, "files": files, "dirs": []}
+ client = _FakeClient(tree)
+ dest = tmp_path / "demo"
+
+ materialize_api_checkout(_meta(), client, dest, max_files=10)
+
+ created = list(dest.rglob("*.py"))
+ assert len(created) == 10
+
+
+def test_content_fetch_budget_is_bounded(tmp_path):
+ # Many README-like content files, but only a bounded number get fetched.
+ files = [f"pkg{i}/README.md" for i in range(30)]
+ dirs = [f"pkg{i}" for i in range(30)]
+ tree = {"available": True, "truncated": False, "files": files, "dirs": dirs}
+ contents = {f: "# readme\n" for f in files}
+ client = _FakeClient(tree, contents=contents)
+ dest = tmp_path / "demo"
+
+ materialize_api_checkout(_meta(), client, dest, max_content_files=5)
+
+ assert len(client.content_requests) == 5
+
+
+def test_workspace_yields_paths_and_cleans_up():
+ tree = {"available": True, "truncated": False, "files": ["README.md"], "dirs": []}
+ client = _FakeClient(tree, contents={"README.md": "# hi\n"})
+ repos = [_meta(name="a", full_name="o/a"), _meta(name="b", full_name="o/b")]
+
+ captured: dict[str, Path] = {}
+ with materialize_api_workspace(repos, client) as workspace:
+ assert set(workspace.keys()) == {"a", "b"}
+ for name, path in workspace.items():
+ assert path.is_dir()
+ captured[name] = path
+ assert (workspace["a"] / "README.md").read_text() == "# hi\n"
+
+ # Temp dirs are removed when the context exits.
+ for path in captured.values():
+ assert not path.exists()
+
+
+def test_null_byte_paths_are_rejected(tmp_path):
+ tree = {
+ "available": True,
+ "truncated": False,
+ "files": ["ok.py", "evil\x00.py"],
+ "dirs": [],
+ }
+ client = _FakeClient(tree)
+ dest = tmp_path / "demo"
+
+ materialize_api_checkout(_meta(), client, dest)
+
+ assert (dest / "ok.py").is_file()
+ # The null-byte entry is rejected at the guard, not written.
+ assert len(list(dest.rglob("*.py"))) == 1
diff --git a/tests/test_api_only.py b/tests/test_api_only.py
new file mode 100644
index 0000000..355f54b
--- /dev/null
+++ b/tests/test_api_only.py
@@ -0,0 +1,220 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+
+from src.api_only import (
+ API_ONLY_MODE,
+ ApiOnlyReport,
+ audit_user_api_only,
+ score_repos_api_only,
+)
+from src.models import RepoMetadata
+
+
+def _meta(
+ name: str = "demo", full_name: str = "octocat/demo", language: str = "Python"
+) -> RepoMetadata:
+ dt = datetime(2024, 6, 1, tzinfo=timezone.utc)
+ return RepoMetadata(
+ name=name,
+ full_name=full_name,
+ description="A demo project",
+ language=language,
+ languages={},
+ private=False,
+ fork=False,
+ archived=False,
+ created_at=dt,
+ updated_at=dt,
+ pushed_at=dt,
+ default_branch="main",
+ stars=12,
+ forks=2,
+ open_issues=1,
+ size_kb=200,
+ html_url="https://github.com/octocat/demo",
+ clone_url="https://github.com/octocat/demo.git",
+ topics=["cli"],
+ )
+
+
+def _rich_tree() -> dict:
+ return {
+ "available": True,
+ "truncated": False,
+ "files": [
+ "README.md",
+ "pyproject.toml",
+ "src/app.py",
+ "tests/test_app.py",
+ ".github/workflows/ci.yml",
+ ],
+ "dirs": ["src", "tests", ".github", ".github/workflows"],
+ }
+
+
+class _FakeClient:
+ """Minimal duck-typed client: tree + content + repo list. No HTTP.
+
+ Analyzers that reach for API-only endpoints (activity, community, security)
+ fail soft inside ``run_all_analyzers`` — exactly the API-only fidelity floor.
+ """
+
+ def __init__(
+ self,
+ tree: dict,
+ contents: dict[str, str] | None = None,
+ repos: list[dict] | None = None,
+ ) -> None:
+ self._tree = tree
+ self._contents = contents or {}
+ self._repos = repos or []
+
+ def get_repo_tree(self, owner: str, repo: str, ref: str) -> dict:
+ return self._tree
+
+ def get_file_content(
+ self, owner, repo, path, *, ref=None, max_bytes=1_000_000
+ ) -> str | None:
+ return self._contents.get(path)
+
+ def list_repos(self, username: str) -> list[dict]:
+ return self._repos
+
+
+def test_score_repos_api_only_runs_real_engine_without_clone():
+ contents = {
+ "README.md": (
+ "# App\n\nA real project that does a real thing.\n\n"
+ "## Usage\n\nRun it.\n\n## Install\n\npip install app\n"
+ ),
+ "pyproject.toml": "[project]\nname = 'app'\n\n[tool.pytest.ini_options]\n",
+ }
+ client = _FakeClient(_rich_tree(), contents)
+
+ audits = score_repos_api_only([_meta()], client)
+
+ assert len(audits) == 1
+ audit = audits[0]
+ assert audit.metadata.name == "demo"
+ assert 0.0 <= audit.overall_score <= 1.0
+
+ dims = {r.dimension: r.score for r in audit.analyzer_results}
+ # Presence signals recovered from the API tree alone — no clone:
+ assert dims["testing"] > 0 # tests/ dir + test file present
+ assert dims["readme"] > 0 # README present, with content
+ assert dims["cicd"] > 0 # .github/workflows/ci.yml present
+ assert dims["structure"] > 0
+
+
+def test_bare_repo_is_detected_as_having_no_tests():
+ tree = {"available": True, "truncated": False, "files": ["README.md"], "dirs": []}
+ client = _FakeClient(tree, {"README.md": "# bare\n"})
+
+ audits = score_repos_api_only([_meta(name="bare", full_name="o/bare")], client)
+
+ audit = audits[0]
+ dims = {r.dimension: r.score for r in audit.analyzer_results}
+ assert dims["testing"] == 0.0
+ assert "no-tests" in audit.flags
+
+
+def test_audit_user_api_only_lists_then_scores():
+ repo_dict = {
+ "name": "demo",
+ "full_name": "octocat/demo",
+ "description": "A demo project",
+ "language": "Python",
+ "private": False,
+ "fork": False,
+ "archived": False,
+ "created_at": "2024-01-01T00:00:00Z",
+ "updated_at": "2024-06-01T00:00:00Z",
+ "pushed_at": "2024-06-01T00:00:00Z",
+ "default_branch": "main",
+ "stargazers_count": 12,
+ "forks_count": 2,
+ "open_issues_count": 1,
+ "size": 200,
+ "html_url": "https://github.com/octocat/demo",
+ "clone_url": "https://github.com/octocat/demo.git",
+ "topics": ["cli"],
+ }
+ client = _FakeClient(
+ _rich_tree(), {"README.md": "# Demo\n\nbody\n"}, repos=[repo_dict]
+ )
+
+ report = audit_user_api_only("octocat", client)
+
+ assert isinstance(report, ApiOnlyReport)
+ assert report.username == "octocat"
+ assert report.mode == API_ONLY_MODE
+ assert len(report.audits) == 1
+
+ payload = report.to_dict()
+ assert payload["mode"] == "api_only"
+ assert payload["repo_count"] == 1
+ assert payload["fidelity_note"] # honest API-only caveat is present
+ assert payload["repos"][0]["metadata"]["name"] == "demo"
+
+
+def test_report_to_dict_is_json_serializable():
+ client = _FakeClient(_rich_tree(), {"README.md": "# x\n\nbody\n"})
+ audits = score_repos_api_only([_meta()], client)
+ report = ApiOnlyReport(username="octocat", audits=audits)
+
+ # Must serialize cleanly for the hosted (Next.js) consumer.
+ encoded = json.dumps(report.to_dict())
+ assert '"mode": "api_only"' in encoded
+
+
+class _RecordingClient(_FakeClient):
+ """Records calls to the slow async-stats endpoints."""
+
+ def __init__(self, *args, **kwargs) -> None:
+ super().__init__(*args, **kwargs)
+ self.stats_calls: list[str] = []
+
+ def get_contributor_stats(self, owner, repo):
+ self.stats_calls.append("contributor")
+ return []
+
+ def get_commit_activity(self, owner, repo):
+ self.stats_calls.append("commit_activity")
+ return []
+
+ def get_participation_stats(self, owner, repo):
+ self.stats_calls.append("participation")
+ return {}
+
+ # Fast (non-202) endpoints the analyzers also touch — provided so they
+ # delegate cleanly rather than fail-soft.
+ def get_releases(self, owner, repo, per_page=10):
+ return ([], True)
+
+ def get_recent_commits(self, owner, repo, per_page=10):
+ return []
+
+ def get_pull_requests(self, owner, repo, state="all", per_page=30):
+ return []
+
+ def get_community_profile(self, owner, repo):
+ return {"available": False}
+
+
+def test_fast_mode_skips_async_stats_endpoints():
+ client = _RecordingClient(_rich_tree(), {"README.md": "# x\n"})
+
+ score_repos_api_only([_meta()], client, fast=True)
+
+ assert client.stats_calls == []
+
+
+def test_thorough_mode_uses_async_stats_endpoints():
+ client = _RecordingClient(_rich_tree(), {"README.md": "# x\n"})
+
+ score_repos_api_only([_meta()], client, fast=False)
+
+ assert "contributor" in client.stats_calls
+ assert "commit_activity" in client.stats_calls
diff --git a/tests/test_github_client.py b/tests/test_github_client.py
index 5f5d15b..48e4413 100644
--- a/tests/test_github_client.py
+++ b/tests/test_github_client.py
@@ -1,5 +1,7 @@
from __future__ import annotations
+import base64
+
import requests
from src.github_client import REST_API_VERSION, GitHubClient
@@ -41,10 +43,16 @@ def test_repo_list_cache_key_includes_owner_private_scope(self, monkeypatch):
repos = client.list_repos("octocat")
assert repos == [{"name": "private-repo", "private": True}]
- assert any("/list_repos/octocat/owner-private" in call[0] for call in cache.get_calls)
- assert any("/list_repos/octocat/owner-private" in call[0] for call in cache.put_calls)
+ assert any(
+ "/list_repos/octocat/owner-private" in call[0] for call in cache.get_calls
+ )
+ assert any(
+ "/list_repos/octocat/owner-private" in call[0] for call in cache.put_calls
+ )
- def test_public_and_private_repo_list_cache_entries_do_not_collide(self, monkeypatch):
+ def test_public_and_private_repo_list_cache_entries_do_not_collide(
+ self, monkeypatch
+ ):
cache = _MemoryCache()
owner_client = GitHubClient(token="secret", cache=cache)
@@ -66,8 +74,13 @@ def test_public_and_private_repo_list_cache_entries_do_not_collide(self, monkeyp
assert owner_result == [{"name": "private-repo", "private": True}]
assert anon_result == [{"name": "public-repo", "private": False}]
- assert any("/list_repos/octocat/owner-private" in call[0] for call in cache.put_calls)
- assert any("/list_repos/octocat/public-anonymous" in call[0] for call in cache.put_calls)
+ assert any(
+ "/list_repos/octocat/owner-private" in call[0] for call in cache.put_calls
+ )
+ assert any(
+ "/list_repos/octocat/public-anonymous" in call[0]
+ for call in cache.put_calls
+ )
def test_security_endpoints_return_counts_when_available(self, monkeypatch):
client = GitHubClient()
@@ -79,7 +92,10 @@ def _fake_fetch(url, params=None):
return [
{"number": 1, "rule": {"security_severity_level": "critical"}},
{"number": 2, "rule": {"id": "CodeReviewID", "severity": "error"}},
- {"number": 5, "rule": {"id": "ConcreteRuleID", "severity": "error"}},
+ {
+ "number": 5,
+ "rule": {"id": "ConcreteRuleID", "severity": "error"},
+ },
{"number": 3, "rule": {"severity": "medium"}},
{"number": 4, "rule": {"severity": "note"}},
]
@@ -111,13 +127,17 @@ def test_security_endpoints_fail_soft_on_http_error(self, monkeypatch):
response.status_code = 404
error = requests.HTTPError(response=response)
- monkeypatch.setattr(client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error))
+ monkeypatch.setattr(
+ client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error)
+ )
assert client.get_secret_scanning_alert_count("o", "r")["available"] is False
assert client.get_code_scanning_alert_count("o", "r")["http_status"] == 404
assert client.get_sbom_exportability("o", "r")["available"] is False
- def test_security_alert_endpoint_403_and_404_are_not_warnings(self, monkeypatch, caplog):
+ def test_security_alert_endpoint_403_and_404_are_not_warnings(
+ self, monkeypatch, caplog
+ ):
client = GitHubClient()
def _raise(status_code):
@@ -128,7 +148,9 @@ def _raise(status_code):
monkeypatch.setattr(client, "_fetch_json", lambda *a, **k: _raise(403))
with caplog.at_level("WARNING"):
- assert client.get_secret_scanning_alert_count("o", "r")["http_status"] == 403
+ assert (
+ client.get_secret_scanning_alert_count("o", "r")["http_status"] == 403
+ )
assert "secret scanning alerts" not in caplog.text
@@ -147,7 +169,9 @@ def test_security_alert_endpoint_unexpected_http_errors_still_warn(
response = requests.Response()
response.status_code = 500
error = requests.HTTPError(response=response)
- monkeypatch.setattr(client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error))
+ monkeypatch.setattr(
+ client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error)
+ )
with caplog.at_level("WARNING"):
assert client.get_code_scanning_alert_count("o", "r")["http_status"] == 500
@@ -156,16 +180,35 @@ def test_security_alert_endpoint_unexpected_http_errors_still_warn(
def test_get_repo_topics_reads_names_payload(self, monkeypatch):
client = GitHubClient()
- monkeypatch.setattr(client, "_fetch_json", lambda *a, **k: {"names": ["python", "ghra-showcase"]})
+ monkeypatch.setattr(
+ client,
+ "_fetch_json",
+ lambda *a, **k: {"names": ["python", "ghra-showcase"]},
+ )
topics = client.get_repo_topics("o", "r")
assert topics["available"] is True
assert topics["topics"] == ["python", "ghra-showcase"]
- def test_update_repo_custom_property_values_skips_missing_definitions(self, monkeypatch):
+ def test_update_repo_custom_property_values_skips_missing_definitions(
+ self, monkeypatch
+ ):
client = GitHubClient()
- monkeypatch.setattr(client, "list_org_custom_properties", lambda owner: {"available": True, "properties": []})
- monkeypatch.setattr(client, "get_repo_custom_property_values", lambda owner, repo: {"available": True, "values": {"portfolio_call": "old"}})
- result = client.update_repo_custom_property_values("o", "r", {"portfolio_call": "new"})
+ monkeypatch.setattr(
+ client,
+ "list_org_custom_properties",
+ lambda owner: {"available": True, "properties": []},
+ )
+ monkeypatch.setattr(
+ client,
+ "get_repo_custom_property_values",
+ lambda owner, repo: {
+ "available": True,
+ "values": {"portfolio_call": "old"},
+ },
+ )
+ result = client.update_repo_custom_property_values(
+ "o", "r", {"portfolio_call": "new"}
+ )
assert result["status"] == "skipped"
assert result["before"] == {"portfolio_call": "old"}
@@ -218,7 +261,11 @@ def test_find_project_v2_item_by_issue_reads_linked_issue(self, monkeypatch):
{
"id": "PVTI_1",
"isArchived": False,
- "content": {"id": "ISSUE_1", "number": 12, "url": "https://github.com/o/r/issues/12"},
+ "content": {
+ "id": "ISSUE_1",
+ "number": 12,
+ "url": "https://github.com/o/r/issues/12",
+ },
}
],
"pageInfo": {"hasNextPage": False, "endCursor": None},
@@ -232,12 +279,18 @@ def test_find_project_v2_item_by_issue_reads_linked_issue(self, monkeypatch):
assert result["available"] is True
assert result["item"]["id"] == "PVTI_1"
- def test_add_and_archive_project_v2_item_return_normalized_payloads(self, monkeypatch):
+ def test_add_and_archive_project_v2_item_return_normalized_payloads(
+ self, monkeypatch
+ ):
client = GitHubClient()
responses = iter(
[
{"addProjectV2ItemById": {"item": {"id": "PVTI_1"}}},
- {"archiveProjectV2Item": {"item": {"id": "PVTI_1", "isArchived": True}}},
+ {
+ "archiveProjectV2Item": {
+ "item": {"id": "PVTI_1", "isArchived": True}
+ }
+ },
]
)
monkeypatch.setattr(client, "_graphql_query", lambda *_a, **_k: next(responses))
@@ -248,3 +301,174 @@ def test_add_and_archive_project_v2_item_return_normalized_payloads(self, monkey
assert created["ok"] is True
assert created["item_id"] == "PVTI_1"
assert archived["status"] == "archived"
+
+
+class TestGitHubClientTreeAndContents:
+ """Git Trees API + Contents API support for clone-free (API-only) scoring."""
+
+ def test_get_repo_tree_returns_files_and_dirs(self, monkeypatch):
+ client = GitHubClient()
+ payload = {
+ "tree": [
+ {"path": "README.md", "type": "blob"},
+ {"path": "src", "type": "tree"},
+ {"path": "src/main.py", "type": "blob"},
+ {"path": "tests", "type": "tree"},
+ {"path": "tests/test_main.py", "type": "blob"},
+ ],
+ "truncated": False,
+ }
+ monkeypatch.setattr(client, "_fetch_json", lambda url, params=None: payload)
+
+ tree = client.get_repo_tree("o", "r", "main")
+
+ assert tree["available"] is True
+ assert tree["truncated"] is False
+ assert "README.md" in tree["files"]
+ assert "src/main.py" in tree["files"]
+ assert "tests/test_main.py" in tree["files"]
+ assert "src" in tree["dirs"]
+ assert "tests" in tree["dirs"]
+ # tree entries are NOT files
+ assert "src" not in tree["files"]
+
+ def test_get_repo_tree_requests_recursive(self, monkeypatch):
+ client = GitHubClient()
+ seen: dict = {}
+
+ def _fake(url, params=None):
+ seen["url"] = url
+ seen["params"] = params
+ return {"tree": [], "truncated": False}
+
+ monkeypatch.setattr(client, "_fetch_json", _fake)
+
+ client.get_repo_tree("o", "r", "main")
+
+ assert "/repos/o/r/git/trees/main" in seen["url"]
+ assert seen["params"] == {"recursive": "1"}
+
+ def test_get_repo_tree_fails_soft_on_http_error(self, monkeypatch):
+ client = GitHubClient()
+ response = requests.Response()
+ response.status_code = 404
+ error = requests.HTTPError(response=response)
+ monkeypatch.setattr(
+ client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error)
+ )
+
+ tree = client.get_repo_tree("o", "r", "main")
+
+ assert tree["available"] is False
+ assert tree["files"] == []
+ assert tree["dirs"] == []
+
+ def test_get_repo_tree_flags_truncation(self, monkeypatch):
+ client = GitHubClient()
+ monkeypatch.setattr(
+ client,
+ "_fetch_json",
+ lambda url, params=None: {
+ "tree": [{"path": "a.py", "type": "blob"}],
+ "truncated": True,
+ },
+ )
+
+ tree = client.get_repo_tree("o", "r", "main")
+
+ assert tree["truncated"] is True
+ assert tree["available"] is True
+
+ def test_get_file_content_decodes_base64(self, monkeypatch):
+ client = GitHubClient()
+ raw = b"# Title\nbody text\n"
+ encoded = base64.b64encode(raw).decode("ascii")
+ monkeypatch.setattr(
+ client,
+ "_fetch_json",
+ lambda url, params=None: {
+ "type": "file",
+ "encoding": "base64",
+ "content": encoded,
+ "size": len(raw),
+ },
+ )
+
+ content = client.get_file_content("o", "r", "README.md")
+
+ assert content == "# Title\nbody text\n"
+
+ def test_get_file_content_returns_none_on_404(self, monkeypatch):
+ client = GitHubClient()
+ response = requests.Response()
+ response.status_code = 404
+ error = requests.HTTPError(response=response)
+ monkeypatch.setattr(
+ client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error)
+ )
+
+ assert client.get_file_content("o", "r", "missing.txt") is None
+
+ def test_get_file_content_skips_oversize_files(self, monkeypatch):
+ client = GitHubClient()
+ # Contents API returns content="" for files >1MB; we must not treat that
+ # as a real (empty) file. Skip anything over the byte cap.
+ monkeypatch.setattr(
+ client,
+ "_fetch_json",
+ lambda url, params=None: {
+ "type": "file",
+ "encoding": "base64",
+ "content": "",
+ "size": 5_000_000,
+ },
+ )
+
+ assert client.get_file_content("o", "r", "huge.bin", max_bytes=1_000_000) is None
+
+ def test_get_repo_tree_logs_unexpected_status_but_fails_soft(
+ self, monkeypatch, caplog
+ ):
+ client = GitHubClient()
+ response = requests.Response()
+ response.status_code = 500
+ error = requests.HTTPError(response=response)
+ monkeypatch.setattr(
+ client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error)
+ )
+
+ with caplog.at_level("WARNING"):
+ tree = client.get_repo_tree("o", "r", "main")
+
+ assert tree["available"] is False
+ assert "Failed to fetch tree" in caplog.text
+
+ def test_get_repo_tree_empty_repo_409_is_silent(self, monkeypatch, caplog):
+ client = GitHubClient()
+ response = requests.Response()
+ response.status_code = 409
+ error = requests.HTTPError(response=response)
+ monkeypatch.setattr(
+ client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error)
+ )
+
+ with caplog.at_level("WARNING"):
+ tree = client.get_repo_tree("o", "r", "main")
+
+ assert tree["available"] is False
+ assert "Failed to fetch tree" not in caplog.text
+
+ def test_get_file_content_logs_unexpected_status(self, monkeypatch, caplog):
+ client = GitHubClient()
+ response = requests.Response()
+ response.status_code = 500
+ error = requests.HTTPError(response=response)
+ monkeypatch.setattr(
+ client, "_fetch_json", lambda *a, **k: (_ for _ in ()).throw(error)
+ )
+
+ with caplog.at_level("WARNING"):
+ result = client.get_file_content("o", "r", "README.md")
+
+ assert result is None
+ assert "Failed to fetch README.md" in caplog.text
From d1d702b898331b2b976f25cdc5aa34badfe01ef1 Mon Sep 17 00:00:00 2001
From: saagpatel
Date: Sat, 20 Jun 2026 02:54:43 -0700
Subject: [PATCH 02/13] feat(serve): hosted clone-free report endpoint over
api_only
Add GET /api/report/{username} wrapping audit_user_api_only(fast=True),
returning ApiOnlyReport.to_dict() JSON. Plain-def route offloads the
blocking, network-bound scan to FastAPI's threadpool; GitHubClient is
injected via a dependency for test/deploy override and server-side token.
- Validates the username via the existing validate_username gate (422).
- Maps GitHub errors: 404 not-found, 429 rate-limit (429 or 403 w/ zero
quota), 403 forbidden, 502 for other HTTP/network/client errors.
- Clamps repos scored to MAX_REPOS_CAP to bound public cost.
- 14 endpoint tests; wired into the serve app factory.
---
src/serve/api.py | 103 +++++++++++++++++++++++++++
src/serve/app.py | 6 +-
tests/test_serve_api.py | 152 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 260 insertions(+), 1 deletion(-)
create mode 100644 src/serve/api.py
create mode 100644 tests/test_serve_api.py
diff --git a/src/serve/api.py b/src/serve/api.py
new file mode 100644
index 0000000..57102b5
--- /dev/null
+++ b/src/serve/api.py
@@ -0,0 +1,103 @@
+"""Hosted clone-free report endpoint — the HTTP surface over ``audit_user_api_only``.
+
+Exposes ``GET /api/report/{username}`` returning :meth:`ApiOnlyReport.to_dict`
+JSON. This is the free "paste your GitHub username" report's backend: it lists a
+user's repos and scores them from the GitHub API alone (no cloning), via the
+existing engine in :mod:`src.api_only`.
+
+The route is defined as a plain ``def`` so FastAPI runs the blocking,
+network-bound scan in a worker thread rather than on the event loop. The
+:class:`~src.github_client.GitHubClient` is supplied through a FastAPI
+dependency so tests can override it and a deployment can inject a shared
+server-side token.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import requests
+from fastapi import APIRouter, Depends, HTTPException, Path, Query
+
+from src.api_only import audit_user_api_only
+from src.github_client import GitHubClient, GitHubClientError
+from src.serve.runner import validate_username
+
+router = APIRouter(prefix="/api", tags=["report"])
+
+# Bound the interactive scan: a public free endpoint must not score an account
+# with hundreds of repos in one request. Requests above this clamp down.
+MAX_REPOS_CAP = 30
+
+# Env var for the shared server-side GitHub App / PAT token. Absent in tests
+# (the dependency is overridden) and acceptable locally (public, unauthenticated
+# requests still work, just at a lower rate limit).
+TOKEN_ENV_VAR = "GHRA_GITHUB_TOKEN"
+
+
+def get_github_client() -> GitHubClient:
+ """Provide a GitHubClient for the request (overridable in tests/deploys).
+
+ A fresh client (and ``requests.Session``) is built per request on purpose:
+ the route runs in FastAPI's threadpool, so a shared Session would be touched
+ by concurrent worker threads. Connection-pool reuse and a shared server-side
+ client land in Phase 2 step 3 alongside the per-IP throttle that bounds load.
+ """
+ return GitHubClient(token=os.environ.get(TOKEN_ENV_VAR))
+
+
+def _is_rate_limited(status: int | None, response: requests.Response | None) -> bool:
+ """True when a GitHub error is rate-limiting (429, or 403 with quota at 0)."""
+ if status == 429:
+ return True
+ if status == 403 and response is not None:
+ return response.headers.get("X-RateLimit-Remaining") == "0"
+ return False
+
+
+def _http_exception(exc: requests.HTTPError, username: str) -> HTTPException:
+ """Map a GitHub HTTP error onto the endpoint's client-facing status."""
+ response = getattr(exc, "response", None)
+ status = getattr(response, "status_code", None)
+ if status == 404:
+ return HTTPException(
+ status_code=404, detail=f"GitHub user '{username}' not found"
+ )
+ if _is_rate_limited(status, response):
+ return HTTPException(
+ status_code=429, detail="GitHub rate limit reached; try again later"
+ )
+ if status == 403:
+ return HTTPException(
+ status_code=403, detail="GitHub denied access to this resource"
+ )
+ return HTTPException(status_code=502, detail="Upstream GitHub error")
+
+
+@router.get("/report/{username}")
+def report(
+ username: str = Path(..., description="GitHub username or org name"),
+ max_repos: int | None = Query(
+ None, ge=1, description="Cap repos scored (clamped to the server limit)"
+ ),
+ client: GitHubClient = Depends(get_github_client),
+) -> dict[str, Any]:
+ """Score a user's portfolio clone-free and return the report as JSON."""
+ try:
+ safe_username = validate_username(username)
+ except ValueError as exc:
+ raise HTTPException(status_code=422, detail=str(exc)) from exc
+
+ capped = MAX_REPOS_CAP if max_repos is None else min(max_repos, MAX_REPOS_CAP)
+
+ try:
+ result = audit_user_api_only(safe_username, client, max_repos=capped, fast=True)
+ except requests.HTTPError as exc:
+ raise _http_exception(exc, safe_username) from exc
+ except (requests.RequestException, GitHubClientError) as exc:
+ # Network failures (DNS, timeout, connection reset) and non-HTTP client
+ # errors surface as a clean 502 rather than an unstructured 500.
+ raise HTTPException(status_code=502, detail="Upstream GitHub error") from exc
+
+ return result.to_dict()
diff --git a/src/serve/app.py b/src/serve/app.py
index 7b026cf..b0a0461 100644
--- a/src/serve/app.py
+++ b/src/serve/app.py
@@ -10,6 +10,7 @@ def create_app(output_dir: Path | None = None) -> "FastAPI": # noqa: F821
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
+ from src.serve.api import router as api_router
from src.serve.routes import router
app = FastAPI(
@@ -24,11 +25,14 @@ def create_app(output_dir: Path | None = None) -> "FastAPI": # noqa: F821
static_dir = Path(__file__).parent / "static"
app.mount("/static", StaticFiles(directory=str(static_dir)), name="static")
+ app.include_router(api_router)
app.include_router(router)
return app
-def run_serve(port: int = 8080, host: str = "127.0.0.1", output_dir: Path | None = None) -> None:
+def run_serve(
+ port: int = 8080, host: str = "127.0.0.1", output_dir: Path | None = None
+) -> None:
"""Launch uvicorn with the audit serve app."""
import uvicorn
diff --git a/tests/test_serve_api.py b/tests/test_serve_api.py
new file mode 100644
index 0000000..23c45c8
--- /dev/null
+++ b/tests/test_serve_api.py
@@ -0,0 +1,152 @@
+"""Tests for src/serve/api.py — hosted clone-free report JSON endpoint (Phase 2 S1)."""
+
+from __future__ import annotations
+
+from unittest.mock import patch
+
+import pytest
+
+# ---------------------------------------------------------------------------
+# Dependency guard — skip entire module if [serve] extra not installed
+# ---------------------------------------------------------------------------
+pytest.importorskip("fastapi", reason="[serve] extra not installed")
+
+import requests # noqa: E402
+from fastapi.testclient import TestClient # noqa: E402
+
+from src.api_only import ApiOnlyReport # noqa: E402
+from src.serve.api import get_github_client # noqa: E402
+from src.serve.app import create_app # noqa: E402
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+@pytest.fixture()
+def client(tmp_path) -> TestClient:
+ """TestClient with the GitHub client dependency stubbed to a sentinel.
+
+ The endpoint's network work is exercised through a patched
+ ``audit_user_api_only`` in each test, so the dependency only needs to avoid
+ constructing a real client (which would read env / open a session).
+ """
+ app = create_app(output_dir=tmp_path)
+ app.dependency_overrides[get_github_client] = lambda: object()
+ return TestClient(app)
+
+
+def _http_error(
+ status: int, headers: dict[str, str] | None = None
+) -> requests.HTTPError:
+ response = requests.Response()
+ response.status_code = status
+ if headers:
+ response.headers.update(headers)
+ return requests.HTTPError(f"{status} error", response=response)
+
+
+# ---------------------------------------------------------------------------
+# Happy path
+# ---------------------------------------------------------------------------
+def test_report_returns_serialized_report(client: TestClient) -> None:
+ report = ApiOnlyReport(username="octocat", audits=[])
+ with patch("src.serve.api.audit_user_api_only", return_value=report) as mock_audit:
+ resp = client.get("/api/report/octocat")
+
+ assert resp.status_code == 200
+ body = resp.json()
+ # Fast mode is the interactive default for the hosted endpoint.
+ assert mock_audit.call_args.kwargs["fast"] is True
+ assert body["username"] == "octocat"
+ assert body["mode"] == "api_only"
+ assert body["repo_count"] == 0
+ assert body["repos"] == []
+ assert "fidelity_note" in body
+
+
+def test_report_passes_validated_username(client: TestClient) -> None:
+ report = ApiOnlyReport(username="octocat", audits=[])
+ with patch("src.serve.api.audit_user_api_only", return_value=report) as mock_audit:
+ resp = client.get("/api/report/octocat")
+
+ assert resp.status_code == 200
+ # username is the first positional arg to audit_user_api_only
+ assert mock_audit.call_args.args[0] == "octocat"
+
+
+# ---------------------------------------------------------------------------
+# Validation
+# ---------------------------------------------------------------------------
+@pytest.mark.parametrize("bad", ["bad--name", "has space", "-leading", "a" * 40])
+def test_invalid_username_returns_422(client: TestClient, bad: str) -> None:
+ with patch("src.serve.api.audit_user_api_only") as mock_audit:
+ resp = client.get(f"/api/report/{bad}")
+ assert resp.status_code == 422
+ mock_audit.assert_not_called()
+
+
+# ---------------------------------------------------------------------------
+# Error mapping
+# ---------------------------------------------------------------------------
+def test_unknown_user_returns_404(client: TestClient) -> None:
+ with patch("src.serve.api.audit_user_api_only", side_effect=_http_error(404)):
+ resp = client.get("/api/report/ghost")
+ assert resp.status_code == 404
+
+
+def test_rate_limited_403_with_zero_quota_returns_429(client: TestClient) -> None:
+ err = _http_error(403, headers={"X-RateLimit-Remaining": "0"})
+ with patch("src.serve.api.audit_user_api_only", side_effect=err):
+ resp = client.get("/api/report/octocat")
+ assert resp.status_code == 429
+
+
+def test_rate_limited_429_returns_429(client: TestClient) -> None:
+ with patch("src.serve.api.audit_user_api_only", side_effect=_http_error(429)):
+ resp = client.get("/api/report/octocat")
+ assert resp.status_code == 429
+
+
+def test_forbidden_403_without_quota_header_returns_403(client: TestClient) -> None:
+ # A 403 that is NOT rate-limiting (e.g. private resource) stays a 403, not 429.
+ with patch("src.serve.api.audit_user_api_only", side_effect=_http_error(403)):
+ resp = client.get("/api/report/octocat")
+ assert resp.status_code == 403
+
+
+def test_upstream_error_returns_502(client: TestClient) -> None:
+ with patch("src.serve.api.audit_user_api_only", side_effect=_http_error(500)):
+ resp = client.get("/api/report/octocat")
+ assert resp.status_code == 502
+
+
+def test_network_error_returns_502(client: TestClient) -> None:
+ err = requests.ConnectionError("connection reset")
+ with patch("src.serve.api.audit_user_api_only", side_effect=err):
+ resp = client.get("/api/report/octocat")
+ assert resp.status_code == 502
+
+
+def test_github_client_error_returns_502(client: TestClient) -> None:
+ from src.github_client import GitHubClientError
+
+ with patch(
+ "src.serve.api.audit_user_api_only",
+ side_effect=GitHubClientError("graphql failed"),
+ ):
+ resp = client.get("/api/report/octocat")
+ assert resp.status_code == 502
+
+
+# ---------------------------------------------------------------------------
+# Cost bound
+# ---------------------------------------------------------------------------
+def test_max_repos_clamped_to_cap(client: TestClient) -> None:
+ from src.serve.api import MAX_REPOS_CAP
+
+ report = ApiOnlyReport(username="octocat", audits=[])
+ with patch("src.serve.api.audit_user_api_only", return_value=report) as mock_audit:
+ resp = client.get("/api/report/octocat?max_repos=9999")
+
+ assert resp.status_code == 200
+ assert mock_audit.call_args.kwargs["max_repos"] == MAX_REPOS_CAP
From 13d46fef830fa9823a673b7a33d35dc3b6890888 Mon Sep 17 00:00:00 2001
From: saagpatel
Date: Sat, 20 Jun 2026 02:59:52 -0700
Subject: [PATCH 03/13] feat(serve): CORS for the browser frontend on
/api/report
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add CORSMiddleware to the app factory with origins resolved from
GHRA_CORS_ORIGINS (defaults to the local Next.js dev server). GET-only,
no credentials — the report endpoint is public and unauthenticated.
---
src/serve/api.py | 13 +++++++++++++
src/serve/app.py | 11 +++++++++++
tests/test_serve_api.py | 21 +++++++++++++++++++++
3 files changed, 45 insertions(+)
diff --git a/src/serve/api.py b/src/serve/api.py
index 57102b5..ee14045 100644
--- a/src/serve/api.py
+++ b/src/serve/api.py
@@ -35,6 +35,19 @@
# requests still work, just at a lower rate limit).
TOKEN_ENV_VAR = "GHRA_GITHUB_TOKEN"
+# Comma-separated allowed CORS origins for the browser frontend. Defaults to the
+# local Next.js dev server; set to the deployed origin (or "*") in production.
+CORS_ORIGINS_ENV_VAR = "GHRA_CORS_ORIGINS"
+DEFAULT_CORS_ORIGINS = ("http://localhost:3000", "http://127.0.0.1:3000")
+
+
+def cors_origins() -> list[str]:
+ """Resolve allowed CORS origins from env, falling back to the dev server."""
+ raw = os.environ.get(CORS_ORIGINS_ENV_VAR, "").strip()
+ if not raw:
+ return list(DEFAULT_CORS_ORIGINS)
+ return [origin.strip() for origin in raw.split(",") if origin.strip()]
+
def get_github_client() -> GitHubClient:
"""Provide a GitHubClient for the request (overridable in tests/deploys).
diff --git a/src/serve/app.py b/src/serve/app.py
index b0a0461..df336f7 100644
--- a/src/serve/app.py
+++ b/src/serve/app.py
@@ -8,8 +8,10 @@
def create_app(output_dir: Path | None = None) -> "FastAPI": # noqa: F821
"""Create and configure the FastAPI application."""
from fastapi import FastAPI
+ from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
+ from src.serve.api import cors_origins
from src.serve.api import router as api_router
from src.serve.routes import router
@@ -19,6 +21,15 @@ def create_app(output_dir: Path | None = None) -> "FastAPI": # noqa: F821
version="1.0.0",
)
+ # CORS so the Next.js frontend can call /api/report from the browser. Only
+ # GET is exposed; no credentials (the endpoint is public + unauthenticated).
+ app.add_middleware(
+ CORSMiddleware,
+ allow_origins=cors_origins(),
+ allow_methods=["GET"],
+ allow_headers=["*"],
+ )
+
# Resolve output dir — default to ./output relative to cwd
app.state.output_dir = output_dir or (Path.cwd() / "output")
diff --git a/tests/test_serve_api.py b/tests/test_serve_api.py
index 23c45c8..f1b3a2f 100644
--- a/tests/test_serve_api.py
+++ b/tests/test_serve_api.py
@@ -150,3 +150,24 @@ def test_max_repos_clamped_to_cap(client: TestClient) -> None:
assert resp.status_code == 200
assert mock_audit.call_args.kwargs["max_repos"] == MAX_REPOS_CAP
+
+
+# ---------------------------------------------------------------------------
+# CORS (browser frontend reachability)
+# ---------------------------------------------------------------------------
+def test_cors_allows_frontend_origin(client: TestClient) -> None:
+ report = ApiOnlyReport(username="octocat", audits=[])
+ origin = "http://localhost:3000"
+ with patch("src.serve.api.audit_user_api_only", return_value=report):
+ resp = client.get("/api/report/octocat", headers={"Origin": origin})
+ assert resp.status_code == 200
+ assert resp.headers.get("access-control-allow-origin") == origin
+
+
+def test_cors_origins_reads_env(monkeypatch) -> None:
+ from src.serve.api import cors_origins
+
+ monkeypatch.setenv("GHRA_CORS_ORIGINS", "https://a.example, https://b.example")
+ assert cors_origins() == ["https://a.example", "https://b.example"]
+ monkeypatch.delenv("GHRA_CORS_ORIGINS", raising=False)
+ assert cors_origins() == ["http://localhost:3000", "http://127.0.0.1:3000"]
From aab05ee8a1a32e5d40cd7efce8c6b3177711c410 Mon Sep 17 00:00:00 2001
From: saagpatel
Date: Sat, 20 Jun 2026 03:17:34 -0700
Subject: [PATCH 04/13] feat(web): Next.js paste-username report frontend
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Add a Next.js 15 / React 19 App Router app under web/ that powers the
free hosted report: a username form does a client-side fetch to the
FastAPI /api/report endpoint and renders the result with a top-fixes
framing — grades, repo health, flags, and the engine's ranked action
candidates as the hero of each card. Repos sort worst-health-first.
- lib/api.ts: typed fetch client with per-status messages + a boundary
shape guard; lib/url.ts: https-only href allowlist (XSS guard).
- ReportExplorer: client-side idle/loading/done/error state machine
with ARIA live regions; ReportView + RepoCard are presentational.
- Dark editorial design, color-coded grade chips, mono accents.
- Typechecks clean, production build passes, visually verified end to
end against a live GitHub user (8 cards rendered).
---
web/.gitignore | 9 +
web/README.md | 39 ++
web/app/globals.css | 384 +++++++++++++++++++
web/app/layout.tsx | 20 +
web/app/page.tsx | 24 ++
web/components/RepoCard.tsx | 97 +++++
web/components/ReportExplorer.tsx | 79 ++++
web/components/ReportView.tsx | 43 +++
web/lib/api.ts | 65 ++++
web/lib/types.ts | 53 +++
web/lib/url.ts | 19 +
web/next.config.mjs | 9 +
web/package.json | 23 ++
web/pnpm-lock.yaml | 599 ++++++++++++++++++++++++++++++
web/pnpm-workspace.yaml | 2 +
web/tsconfig.json | 21 ++
16 files changed, 1486 insertions(+)
create mode 100644 web/.gitignore
create mode 100644 web/README.md
create mode 100644 web/app/globals.css
create mode 100644 web/app/layout.tsx
create mode 100644 web/app/page.tsx
create mode 100644 web/components/RepoCard.tsx
create mode 100644 web/components/ReportExplorer.tsx
create mode 100644 web/components/ReportView.tsx
create mode 100644 web/lib/api.ts
create mode 100644 web/lib/types.ts
create mode 100644 web/lib/url.ts
create mode 100644 web/next.config.mjs
create mode 100644 web/package.json
create mode 100644 web/pnpm-lock.yaml
create mode 100644 web/pnpm-workspace.yaml
create mode 100644 web/tsconfig.json
diff --git a/web/.gitignore b/web/.gitignore
new file mode 100644
index 0000000..7c70c31
--- /dev/null
+++ b/web/.gitignore
@@ -0,0 +1,9 @@
+/node_modules
+/.next
+/out
+/build
+next-env.d.ts
+*.tsbuildinfo
+.env.local
+.vercel
+.DS_Store
diff --git a/web/README.md b/web/README.md
new file mode 100644
index 0000000..5eb8360
--- /dev/null
+++ b/web/README.md
@@ -0,0 +1,39 @@
+# Portfolio Health — web frontend
+
+Next.js (App Router) paste-a-username frontend for the clone-free portfolio
+report. The form calls the FastAPI engine's `GET /api/report/{username}`
+endpoint (from `src/serve/api.py`) and renders the result with a "top fixes"
+framing: grades, repo health, and the highest-leverage actions per repo.
+
+## Develop
+
+Run the Python API and the web app side by side.
+
+**1. Start the report API** (from the repo root):
+
+```bash
+uv run --extra serve python -m uvicorn --factory src.serve.app:create_app --port 8080
+```
+
+**2. Start the frontend** (from `web/`):
+
+```bash
+pnpm install
+pnpm dev # http://localhost:3000
+```
+
+## Configuration
+
+| Env var | Default | Purpose |
+| ---------------------- | ----------------------- | ---------------------------------------- |
+| `NEXT_PUBLIC_API_BASE` | `http://127.0.0.1:8080` | Base URL of the FastAPI report API. |
+
+The API must allow the frontend origin via CORS (`GHRA_CORS_ORIGINS` on the API
+side; defaults already include `http://localhost:3000`).
+
+## Build
+
+```bash
+pnpm typecheck
+pnpm build
+```
diff --git a/web/app/globals.css b/web/app/globals.css
new file mode 100644
index 0000000..6d63fb2
--- /dev/null
+++ b/web/app/globals.css
@@ -0,0 +1,384 @@
+:root {
+ --bg: #0e1014;
+ --panel: #171a21;
+ --panel-2: #1f232c;
+ --ink: #e8eaed;
+ --ink-dim: #9aa3af;
+ --ink-faint: #6b7280;
+ --line: #2a2f3a;
+ --accent: #7dd3fc;
+ --accent-ink: #082f49;
+
+ --grade-a: #34d399;
+ --grade-b: #6ee7b7;
+ --grade-c: #fbbf24;
+ --grade-d: #fb923c;
+ --grade-f: #f87171;
+
+ --radius: 14px;
+ --mono: ui-monospace, "SF Mono", "JetBrains Mono", Menlo, monospace;
+ --sans:
+ ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, sans-serif;
+}
+
+* {
+ box-sizing: border-box;
+}
+
+html,
+body {
+ margin: 0;
+ padding: 0;
+}
+
+body {
+ background: var(--bg);
+ color: var(--ink);
+ font-family: var(--sans);
+ line-height: 1.55;
+ -webkit-font-smoothing: antialiased;
+}
+
+a {
+ color: var(--accent);
+ text-decoration: none;
+}
+a:hover {
+ text-decoration: underline;
+}
+
+.wrap {
+ max-width: 880px;
+ margin: 0 auto;
+ padding: 0 24px;
+}
+
+/* ---- Hero / form ---- */
+.hero {
+ padding: 72px 0 40px;
+}
+.eyebrow {
+ font-family: var(--mono);
+ font-size: 12px;
+ letter-spacing: 0.14em;
+ text-transform: uppercase;
+ color: var(--accent);
+ margin: 0 0 14px;
+}
+.hero h1 {
+ font-size: clamp(30px, 5vw, 44px);
+ line-height: 1.08;
+ letter-spacing: -0.02em;
+ margin: 0 0 14px;
+ font-weight: 680;
+}
+.hero p.lede {
+ font-size: 17px;
+ color: var(--ink-dim);
+ max-width: 60ch;
+ margin: 0 0 32px;
+}
+
+.form-row {
+ display: flex;
+ gap: 10px;
+ align-items: stretch;
+ max-width: 520px;
+}
+.input-shell {
+ display: flex;
+ align-items: center;
+ flex: 1;
+ background: var(--panel);
+ border: 1px solid var(--line);
+ border-radius: var(--radius);
+ padding: 0 14px;
+ transition: border-color 0.15s ease;
+}
+.input-shell:focus-within {
+ border-color: var(--accent);
+}
+.input-shell .at {
+ color: var(--ink-faint);
+ font-family: var(--mono);
+ margin-right: 2px;
+}
+.input-shell input {
+ flex: 1;
+ background: transparent;
+ border: 0;
+ outline: 0;
+ color: var(--ink);
+ font-size: 16px;
+ font-family: var(--mono);
+ padding: 14px 0;
+}
+button.go {
+ background: var(--accent);
+ color: var(--accent-ink);
+ border: 0;
+ border-radius: var(--radius);
+ font-weight: 640;
+ font-size: 15px;
+ padding: 0 22px;
+ cursor: pointer;
+ transition: filter 0.15s ease;
+}
+button.go:hover:not(:disabled) {
+ filter: brightness(1.08);
+}
+button.go:disabled {
+ opacity: 0.55;
+ cursor: progress;
+}
+
+.error {
+ margin-top: 18px;
+ color: var(--grade-f);
+ font-size: 15px;
+}
+
+/* ---- Loading ---- */
+.loading {
+ margin-top: 28px;
+ color: var(--ink-dim);
+ font-family: var(--mono);
+ font-size: 14px;
+ display: flex;
+ align-items: center;
+ gap: 10px;
+}
+.dot {
+ width: 8px;
+ height: 8px;
+ border-radius: 50%;
+ background: var(--accent);
+ animation: pulse 1s ease-in-out infinite;
+}
+@keyframes pulse {
+ 0%,
+ 100% {
+ opacity: 0.25;
+ }
+ 50% {
+ opacity: 1;
+ }
+}
+
+/* ---- Report ---- */
+.report-head {
+ padding: 8px 0 6px;
+ border-top: 1px solid var(--line);
+ margin-top: 40px;
+}
+.report-head h2 {
+ font-size: 22px;
+ margin: 22px 0 4px;
+ letter-spacing: -0.01em;
+}
+.report-head .sub {
+ color: var(--ink-dim);
+ font-size: 14px;
+}
+.fidelity {
+ margin: 16px 0 8px;
+ font-size: 13px;
+ color: var(--ink-faint);
+ background: var(--panel);
+ border: 1px solid var(--line);
+ border-left: 3px solid var(--accent);
+ border-radius: 8px;
+ padding: 12px 14px;
+}
+
+.cards {
+ display: flex;
+ flex-direction: column;
+ gap: 16px;
+ margin: 24px 0 64px;
+}
+
+.card {
+ background: var(--panel);
+ border: 1px solid var(--line);
+ border-radius: var(--radius);
+ padding: 20px 22px;
+}
+.card-top {
+ display: flex;
+ align-items: flex-start;
+ gap: 14px;
+}
+.grade {
+ flex: none;
+ width: 46px;
+ height: 46px;
+ border-radius: 11px;
+ display: grid;
+ place-items: center;
+ font-family: var(--mono);
+ font-weight: 700;
+ font-size: 20px;
+ color: #0c0d10;
+}
+.grade.g-a {
+ background: var(--grade-a);
+}
+.grade.g-b {
+ background: var(--grade-b);
+}
+.grade.g-c {
+ background: var(--grade-c);
+}
+.grade.g-d {
+ background: var(--grade-d);
+}
+.grade.g-f {
+ background: var(--grade-f);
+}
+
+.card-title {
+ flex: 1;
+ min-width: 0;
+}
+.card-title h3 {
+ margin: 0;
+ font-size: 17px;
+ font-family: var(--mono);
+ font-weight: 600;
+ word-break: break-word;
+}
+.card-title .desc {
+ margin: 4px 0 0;
+ color: var(--ink-dim);
+ font-size: 14px;
+}
+.card-meta {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 8px 14px;
+ margin-top: 8px;
+ font-size: 12.5px;
+ color: var(--ink-faint);
+ font-family: var(--mono);
+}
+.score {
+ flex: none;
+ text-align: right;
+ font-family: var(--mono);
+}
+.score .pct {
+ font-size: 20px;
+ font-weight: 680;
+}
+.score .pct-label {
+ font-size: 11px;
+ color: var(--ink-faint);
+ text-transform: uppercase;
+ letter-spacing: 0.08em;
+}
+
+.flags {
+ display: flex;
+ flex-wrap: wrap;
+ gap: 6px;
+ margin-top: 14px;
+}
+.flag {
+ font-family: var(--mono);
+ font-size: 11.5px;
+ color: var(--ink-dim);
+ background: var(--panel-2);
+ border: 1px solid var(--line);
+ border-radius: 999px;
+ padding: 3px 9px;
+}
+
+.fixes {
+ margin-top: 16px;
+ border-top: 1px dashed var(--line);
+ padding-top: 14px;
+}
+.fixes h4 {
+ margin: 0 0 10px;
+ font-size: 12px;
+ text-transform: uppercase;
+ letter-spacing: 0.1em;
+ color: var(--accent);
+ font-family: var(--mono);
+}
+.fix {
+ display: flex;
+ gap: 12px;
+ padding: 9px 0;
+}
+.fix + .fix {
+ border-top: 1px solid var(--line);
+}
+.fix-rank {
+ flex: none;
+ font-family: var(--mono);
+ color: var(--ink-faint);
+ font-size: 13px;
+ width: 18px;
+ padding-top: 1px;
+}
+.fix-body {
+ flex: 1;
+}
+.fix-body .fix-title {
+ font-weight: 600;
+ font-size: 14.5px;
+}
+.fix-body .fix-action {
+ color: var(--ink-dim);
+ font-size: 14px;
+ margin-top: 2px;
+}
+.fix-tags {
+ display: flex;
+ gap: 10px;
+ margin-top: 6px;
+ font-family: var(--mono);
+ font-size: 11.5px;
+ color: var(--ink-faint);
+}
+.fix-tags .impact {
+ color: var(--grade-a);
+}
+.clean {
+ color: var(--ink-dim);
+ font-size: 14px;
+ margin-top: 14px;
+}
+
+/* ---- CTA / footer ---- */
+.cta {
+ border-top: 1px solid var(--line);
+ margin: 8px 0 80px;
+ padding-top: 24px;
+ display: flex;
+ flex-wrap: wrap;
+ gap: 14px 28px;
+ align-items: baseline;
+}
+.cta .label {
+ font-size: 14px;
+ color: var(--ink-dim);
+}
+.cta code {
+ font-family: var(--mono);
+ background: var(--panel);
+ border: 1px solid var(--line);
+ border-radius: 7px;
+ padding: 3px 8px;
+ font-size: 13px;
+ color: var(--ink);
+}
+
+.foot {
+ color: var(--ink-faint);
+ font-size: 13px;
+ padding: 24px 0 60px;
+ border-top: 1px solid var(--line);
+}
diff --git a/web/app/layout.tsx b/web/app/layout.tsx
new file mode 100644
index 0000000..a28c729
--- /dev/null
+++ b/web/app/layout.tsx
@@ -0,0 +1,20 @@
+import type { Metadata } from "next";
+import "./globals.css";
+
+export const metadata: Metadata = {
+ title: "Portfolio Health — clone-free GitHub report",
+ description:
+ "Paste a GitHub username and get a clone-free portfolio health report: grades, the biggest drags, and the concrete fixes that move each repo forward.",
+};
+
+export default function RootLayout({
+ children,
+}: {
+ children: React.ReactNode;
+}) {
+ return (
+
+ {children}
+
+ );
+}
diff --git a/web/app/page.tsx b/web/app/page.tsx
new file mode 100644
index 0000000..6881837
--- /dev/null
+++ b/web/app/page.tsx
@@ -0,0 +1,24 @@
+import ReportExplorer from "@/components/ReportExplorer";
+
+export default function Home() {
+ return (
+
+
+
Portfolio health · clone-free
+
See what your GitHub portfolio is missing.
+
+ Paste a username. We read the GitHub API — no cloning — score every
+ public repo, and surface the highest-leverage fixes for each one.
+ Free, no signup.
+