diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..088bfc4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +.git +.github +__pycache__ +*.pyc +*.pyo +*.pyd +.pytest_cache +.mypy_cache +.ruff_cache +tests/ +docs/ +.venv/ +venv/ +*.md +*.egg-info +dist/ +build/ diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..4025840 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,99 @@ +# Copilot Instructions — pr-generator + +## Commands + +```bash +# Install (editable, no dev extras needed) +pip install -e . +pip install pytest + +# Run full test suite +python -m pytest + +# Run a single test file +python -m pytest tests/test_scanner.py -v + +# Run a single test by name +python -m pytest tests/test_scanner.py::TestScanCycle::test_dry_run_does_not_create_prs -v + +# Run the application locally +CONFIG_PATH=./config.yaml python -m pr_generator + +# Run tests with coverage (configured in pyproject.toml) +python -m pytest --cov=pr_generator --cov-report=term-missing +``` + +There is no linter configured. There is no type-checker configured. + +--- + +## Architecture + +`pr-generator` is a long-running polling daemon. The main loop lives in `__main__.py`: + +1. Load `AppConfig` from YAML (`CONFIG_PATH`) or legacy env vars (fallback). +2. Instantiate active providers (`GitHubProvider` / `BitbucketProvider`). +3. Start the health HTTP server in a daemon thread. +4. Loop: run `scan_cycle()` → sleep `scan_frequency` seconds → repeat. +5. Graceful shutdown on `SIGTERM`/`SIGINT` via a `threading.Event`. + +**Scan cycle** (`scanner.py`) is two-phase, both phases concurrent via `ThreadPoolExecutor`: +- **Phase 1**: fetch all branch names from every active provider in parallel. +- **Phase 2**: for each `rule × provider` pair — filter branches by regex, check for existing PRs, create missing ones. + +**Config loading** (`config.py`) priority: YAML file → legacy env vars. YAML supports multiple named providers and multiple rules. Legacy env-var mode supports exactly one rule. + +**Provider abstraction** — `ProviderInterface` is a `runtime_checkable` Protocol in `providers/base.py`. Both `GitHubProvider` and `BitbucketProvider` satisfy it structurally (no explicit inheritance). The scanner only uses the interface. + +**All HTTP** goes through `request_with_retry` in `http_client.py`. It handles retry/backoff (delays: 0.5 s, 1 s, 2 s) and logging. Providers never call `requests` directly. + +**Releases** are automated via `semantic-release` on push to `main`. Version is in `src/pr_generator/__init__.py` and `pyproject.toml`. + +--- + +## Key Conventions + +### Logging format +All log lines follow the structured pattern: +``` +[Component] Step: step_name action=verb cycle_id=N detail=... +``` +Examples: `[GitHub] Step: get_branches action=end total=42`, `[Core] Step: scan_cycle action=start cycle_id=3`. + +### `request_with_retry` — `headers` vs `headers_factory` +Pass **`headers`** (a plain dict) when auth tokens don't expire between retries (Bitbucket Bearer token). +Pass **`headers_factory`** (a `() → dict` callable) when tokens may rotate between attempts (GitHub App installation tokens). The factory is called fresh on each retry attempt, so a token refresh is picked up automatically. + +### Provider exceptions must carry `status_code` +Both `GitHubError` and `BitbucketError` have the constructor signature: +```python +def __init__(self, message: str, status_code: int | None = None) -> None: +``` +`http_client.request_with_retry` calls `exception_cls(message, status_code)`. Any new provider exception class must match this signature. + +### Per-cycle caches +Each provider caches PR-existence and branch-existence lookups within one scan cycle. `reset_cycle_cache()` is called at the start of every cycle. Do not persist cache state across cycles. + +### Rule matching uses `re.match` (start-anchored) +Patterns are matched with `rule.compiled.match(branch_name)`, not `re.search`. Patterns must match from the beginning of the branch name. + +### `AppConfig` and `ProviderConfig` are frozen dataclasses +Neither can be mutated after construction. In tests, build a new instance rather than modifying fields. + +### New provider checklist +To add a third provider (e.g. GitLab): +1. Create `src/pr_generator/providers/gitlab.py` implementing all 5 methods of `ProviderInterface`. +2. Define `GitLabError(Exception)` with `(message: str, status_code: int | None = None)`. +3. Add `"gitlab"` to the `ptype` allowlist in `config._parse_providers_from_yaml`. +4. In `_request`, pass `headers=` if tokens are static or `headers_factory=` if they refresh mid-cycle. +5. Add a `_parse_gitlab_provider` function and wire it in `__main__.py`. +6. Add tests in `tests/test_providers.py`. + +### Testing patterns +- **Scanner tests** — mock full providers with `MagicMock()` (see `_mock_provider` helper in `test_scanner.py`). +- **Provider tests** — mock `provider._request` directly, not `requests.request`. +- **Config tests** — use `tmp_path` fixture + `monkeypatch.setenv("CONFIG_PATH", path)`. +- Tests are plain classes with descriptive method names; no pytest markers are used. + +### Docker +Config is mounted at `/etc/pr-generator/config.yaml` (the default `CONFIG_PATH`). The container runs as non-root user `prgen`. `requirements.txt` drives the Docker build; `pyproject.toml` is the authoritative dependency source — keep both in sync when adding dependencies. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..e9d6f3a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,62 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: "/" + schedule: + interval: monthly + open-pull-requests-limit: 10 + labels: + - enhancement + - dependency-management + assignees: + - devops-ia/devops-ia + groups: + github-actions: + patterns: + - "*" + commit-message: + prefix: chore + include: scope + rebase-strategy: auto + pull-request-branch-name: + separator: "-" + - package-ecosystem: pip + directory: "/" + schedule: + interval: monthly + open-pull-requests-limit: 10 + labels: + - enhancement + - dependency-management + assignees: + - devops-ia/devops-ia + groups: + pip: + patterns: + - "*" + commit-message: + prefix: chore + include: scope + rebase-strategy: auto + pull-request-branch-name: + separator: "-" + - package-ecosystem: docker + directory: "/" + schedule: + interval: monthly + open-pull-requests-limit: 10 + labels: + - enhancement + - dependency-management + assignees: + - devops-ia/devops-ia + groups: + docker: + patterns: + - "*" + commit-message: + prefix: chore + include: scope + rebase-strategy: auto + pull-request-branch-name: + separator: "-" \ No newline at end of file diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 0000000..18a1a1f --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,154 @@ +name: Build and Push Docker Image + +permissions: {} + +env: + DOCKERHUB_USER: devopsiaci + DOCKERHUB_REPO: pr-generator + GHCR_REGISTRY: ghcr.io + GHCR_REPO: ${{ github.repository }} + +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test: + name: Test + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-python@v6 + with: + python-version: "3.13" + cache: pip + cache-dependency-path: requirements.txt + + - name: Install dependencies + run: pip install -r requirements.txt pytest + + - name: Run tests + run: python -m pytest tests/ -v + + release: + name: Release + needs: [test] + # Only run on direct pushes to main (not on pull requests) + if: github.event_name != 'pull_request' + runs-on: ubuntu-latest + permissions: + attestations: write + contents: write + id-token: write + issues: write + packages: write + pull-requests: write + + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Semantic Release + id: semantic + uses: cycjimmy/semantic-release-action@v6 + with: + tag_format: 'v${version}' + extra_plugins: | + @semantic-release/changelog + @semantic-release/git + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Set Docker metadata + id: meta + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/metadata-action@v6 + with: + images: | + ${{ env.DOCKERHUB_USER }}/${{ env.DOCKERHUB_REPO }} + ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_REPO }} + labels: | + org.opencontainers.image.maintainer=adrianmg231189@gmail.com + org.opencontainers.image.title=PR Generator + org.opencontainers.image.description=PR Generator to automate pull request management + org.opencontainers.image.vendor=devops-ia + tags: | + type=raw,value=${{ steps.semantic.outputs.new_release_git_tag }} + + - name: Set up QEMU + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/setup-qemu-action@v4 + + - name: Set up Docker Buildx + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/setup-buildx-action@v4 + + - name: Cache Docker layers + if: steps.semantic.outputs.new_release_published == 'true' + uses: actions/cache@v5 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + + - name: "[DOCKERHUB] Log in" + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/login-action@v4 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + + - name: "[GHCR] Log in" + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/login-action@v4 + with: + registry: ${{ env.GHCR_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image + id: push + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/build-push-action@v7 + with: + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max + context: . + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64,linux/arm64 + push: true + sbom: true + tags: ${{ steps.meta.outputs.tags }} + + - name: "[DOCKERHUB] Update registry description" + if: steps.semantic.outputs.new_release_published == 'true' + uses: peter-evans/dockerhub-description@v5 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + repository: ${{ env.DOCKERHUB_USER }}/${{ env.DOCKERHUB_REPO }} + + - name: "[GHCR] Generate artifact attestation" + if: steps.semantic.outputs.new_release_published == 'true' + uses: actions/attest-build-provenance@v4 + with: + subject-name: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_REPO }} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true + + - name: Move Docker cache + if: steps.semantic.outputs.new_release_published == 'true' + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache diff --git a/.github/workflows/github-auto-assign.yml b/.github/workflows/github-auto-assign.yml new file mode 100644 index 0000000..05a56f2 --- /dev/null +++ b/.github/workflows/github-auto-assign.yml @@ -0,0 +1,19 @@ +name: Auto-assign Issue + +on: + issues: + types: [opened] + pull_request_target: + types: [opened, ready_for_review] + +jobs: + auto-assign: + permissions: + contents: read + issues: write + pull-requests: write + uses: devops-ia/.github/.github/workflows/github-auto-assign.yml@main + with: + teams: devops-ia + secrets: + PAT_GITHUB: ${{ secrets.PAT_GITHUB }} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2b85002 --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python +*.egg-info/ +dist/ +build/ + +# Testing / coverage +.coverage +coverage.json +coverage.xml +htmlcov/ +.pytest_cache/ + +# Env +.env +*.env +venv/ +.venv/ + +# IDE +.vscode/ +.idea/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..693c7be --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,66 @@ +## [1.2.3](https://github.com/devops-ia/pr-generator/compare/v1.2.2...v1.2.3) (2026-03-25) + + +### Bug Fixes + +* Fixed app and remove old values ([0eb7c8f](https://github.com/devops-ia/pr-generator/commit/0eb7c8f5a6d1e8b4a5a0da35da9ad6f61a5a6744)) + +## [1.2.2](https://github.com/devops-ia/pr-generator/compare/v1.2.1...v1.2.2) (2026-03-25) + + +### Bug Fixes + +* Fixed Dockerfile ([87fa429](https://github.com/devops-ia/pr-generator/commit/87fa429b8732c3b149e2c3bb602edbd430c900e7)) + +## [1.2.1](https://github.com/devops-ia/pr-generator/compare/v1.2.0...v1.2.1) (2026-03-25) + + +### Bug Fixes + +* Force release ([fe18685](https://github.com/devops-ia/pr-generator/commit/fe1868583176986f5119692ca365f883c7e8737e)) + +# [1.2.0](https://github.com/devops-ia/pr-generator/compare/v1.1.0...v1.2.0) (2026-03-25) + + +### Bug Fixes + +* Remove cache files ([71e9092](https://github.com/devops-ia/pr-generator/commit/71e90928a4ab0bec3a50ec471c615d0052568e3c)) +* resolve remaining gaps after critical review ([868b675](https://github.com/devops-ia/pr-generator/commit/868b6756248f90af94147c288f730133f99424eb)) + + +### Features + +* add suport for GH PAT ([b082c34](https://github.com/devops-ia/pr-generator/commit/b082c34af3b018eb616f4a9511e982719b6660ee)) +* add suport for GH PAT ([31a2e96](https://github.com/devops-ia/pr-generator/commit/31a2e96fb67088df33e4b2b8b0724e07505ddaa0)) +* refactor ([67a0c42](https://github.com/devops-ia/pr-generator/commit/67a0c4269de3b86d6197d6acc242121dfac0848f)) +* update dependencies ([6f13505](https://github.com/devops-ia/pr-generator/commit/6f135057b36fcac1dd62575c9f33cd064d0677a7)) + +# [1.2.0](https://github.com/devops-ia/pr-generator/compare/v1.1.0...v1.2.0) (2026-03-25) + + +### Bug Fixes + +* Remove cache files ([71e9092](https://github.com/devops-ia/pr-generator/commit/71e90928a4ab0bec3a50ec471c615d0052568e3c)) +* resolve remaining gaps after critical review ([868b675](https://github.com/devops-ia/pr-generator/commit/868b6756248f90af94147c288f730133f99424eb)) + + +### Features + +* add suport for GH PAT ([b082c34](https://github.com/devops-ia/pr-generator/commit/b082c34af3b018eb616f4a9511e982719b6660ee)) +* add suport for GH PAT ([31a2e96](https://github.com/devops-ia/pr-generator/commit/31a2e96fb67088df33e4b2b8b0724e07505ddaa0)) +* refactor ([67a0c42](https://github.com/devops-ia/pr-generator/commit/67a0c4269de3b86d6197d6acc242121dfac0848f)) + +# [1.1.0](https://github.com/devops-ia/pr-generator/compare/v1.0.0...v1.1.0) (2026-03-25) + + +### Features + +* Force release ([1397554](https://github.com/devops-ia/pr-generator/commit/1397554f660c698daaf294d86d5a8f5de07f1a13)) +* Force release ([a3b7eac](https://github.com/devops-ia/pr-generator/commit/a3b7eacfb603a0107fce2604522736241c85e4db)) + +# 1.0.0 (2026-03-25) + + +### Features + +* Upload code ([9286e3e](https://github.com/devops-ia/pr-generator/commit/9286e3e65932ab2a9526beb481a370d775771f2b)) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..826282f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +# ============================================================ +# Stage 1: Install Python dependencies +# ============================================================ +FROM python:3.14-slim AS builder + +WORKDIR /build +COPY requirements.txt . +RUN pip install --no-cache-dir --prefix=/install -r requirements.txt + +# ============================================================ +# Stage 2: Minimal runtime image +# ============================================================ +FROM python:3.14-slim + +LABEL maintainer="adrianmg231189@gmail.com" +LABEL org.opencontainers.image.source="https://github.com/devops-ia/pr-generator" +LABEL org.opencontainers.image.description="Automated PR creation from branch patterns" + +# Non-root user +RUN groupadd -r prgen && useradd -r -g prgen -d /app -s /sbin/nologin prgen + +# Copy installed packages from builder +COPY --from=builder /install /usr/local + +# Copy application source +WORKDIR /app +COPY src/ ./src/ + +RUN chown -R prgen:prgen /app + +ENV PYTHONPATH=/app/src + +USER prgen + +HEALTHCHECK --interval=30s --timeout=3s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1 + +EXPOSE 8080 + +ENTRYPOINT ["python", "-m", "pr_generator"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b6eddf8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 DevOps Solutions + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 1d6e80c..25643a0 100644 --- a/README.md +++ b/README.md @@ -1 +1,333 @@ -# pr-generator \ No newline at end of file +# PR generator image + +[![CI](https://github.com/devops-ia/pr-generator/actions/workflows/docker-build.yml/badge.svg)](https://github.com/devops-ia/pr-generator/actions/workflows/docker-build.yml) +[![GitHub release](https://img.shields.io/github/v/release/devops-ia/pr-generator)](https://github.com/devops-ia/pr-generator/releases) +[![Docker Hub](https://img.shields.io/docker/v/devopsiaci/pr-generator?label=Docker%20Hub&logo=docker)](https://hub.docker.com/r/devopsiaci/pr-generator) +[![Docker Pulls](https://img.shields.io/docker/pulls/devopsiaci/pr-generator?logo=docker)](https://hub.docker.com/r/devopsiaci/pr-generator) +[![Python](https://img.shields.io/badge/python-3.11%2B-blue?logo=python&logoColor=white)](https://www.python.org) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +Automated Pull Request creation daemon for **GitHub** and **Bitbucket Cloud**. + +`pr-generator` runs as a long-lived service that periodically scans your repository branches, matches them against configurable regex patterns, and automatically opens Pull Requests toward the configured destination branches — skipping any PR that already exists. + +--- + +## Table of Contents + +- [How it works](#how-it-works) +- [Quick start](#quick-start) +- [Configuration](#configuration) + - [YAML file](#yaml-file) +- [Providers](#providers) + - [GitHub — App authentication](#github--app-authentication) + - [GitHub — PAT authentication](#github--pat-authentication) + - [Bitbucket Cloud](#bitbucket-cloud) +- [Rules](#rules) +- [Health endpoints](#health-endpoints) +- [Docker](#docker) +- [Development](#development) + +--- + +## How it works + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Scan cycle │ +│ │ +│ 1. Fetch all branches ──▶ GitHub / Bitbucket │ +│ 2. For every rule │ +│ match branches against regex pattern │ +│ for each match │ +│ skip if open PR already exists │ +│ create PR source ──▶ destination │ +│ 3. Sleep scan_frequency seconds │ +│ 4. Repeat │ +└─────────────────────────────────────────────────────────────┘ +``` + +Key design points: + +- **Concurrent**: branches are fetched from all providers in parallel; rule×provider pairs are also processed concurrently (up to 10 workers). +- **Idempotent**: an existing open PR for the same source→destination pair is detected and skipped. +- **Dry-run mode**: log what would be created without actually calling the API. +- **Graceful shutdown**: handles `SIGTERM` / `SIGINT` and drains in-progress work. + +--- + +## Quick start + +```bash +# Install +pip install -e . + +# Point to your config file and run +CONFIG_PATH=./config.yaml pr-generator +``` + +Or with Docker: + +```bash +docker run --rm \ + -v "$(pwd)/config.yaml:/etc/pr-generator/config.yaml:ro" \ + ghcr.io/devops-ia/pr-generator:latest +``` + +--- + +## Configuration + +### YAML file + +The default config path is `/etc/pr-generator/config.yaml`. Override with the `CONFIG_PATH` environment variable. The application exits with an error at startup if the file is not found. + +```yaml +# config.yaml + +# How often (seconds) to scan for new branches. +scan_frequency: 300 # default: 300 + +# Logging level: DEBUG | INFO | WARNING | ERROR +log_level: INFO # default: INFO + +# Log format: "text" (human-readable) or "json" (structured, for log aggregators) +log_format: text # default: text + +# When true, PRs are logged but never actually created. +dry_run: false # default: false + +# Port for the built-in health server. +health_port: 8080 # default: 8080 + +providers: + github: + enabled: true + owner: my-org + repo: my-repo + app_id: "123456" + installation_id: "78901234" # optional — auto-resolved if omitted + private_key_path: /secrets/github-app.pem # path to PEM file + # Alternative: set GITHUB_APP_PRIVATE_KEY env var (plain PEM or base64-encoded) + timeout: 30 # HTTP timeout in seconds + + bitbucket: + enabled: true + workspace: my-workspace + repo_slug: my-repo + token_env: BITBUCKET_TOKEN # name of the env var that holds the token + close_source_branch: true # delete source branch after merge (default: true) + timeout: 30 + +rules: + - pattern: "feature/.*" # Python regex matched against branch names + destinations: + github: main + bitbucket: develop + + - pattern: "release/.*" + destinations: + github: main + + - pattern: ".*-hotfix-.*" + destinations: + bitbucket: master +``` + +#### Multiple GitHub organisations + +Use any name as the provider key and set `type: github` (or `type: bitbucket`) to identify the implementation. Rules reference providers by their name. + +```yaml +providers: + github-acme: + type: github # required for non-standard key names + enabled: true + owner: acme-org + repo: backend + app_id: "111" + private_key_path: /secrets/acme-app.pem + + github-skunkworks: + type: github + enabled: true + owner: skunkworks-org + repo: platform + auth_method: pat + token_env: SKUNKWORKS_GITHUB_TOKEN + + bitbucket: # "github" / "bitbucket" keys default type automatically + enabled: true + workspace: my-workspace + repo_slug: my-repo + token_env: BITBUCKET_TOKEN + +rules: + - pattern: "feature/.*" + destinations: + github-acme: main + github-skunkworks: develop + bitbucket: develop +``` + +**Config fields reference** + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `scan_frequency` | int | `300` | Seconds between scan cycles | +| `log_level` | string | `"INFO"` | Python logging level | +| `dry_run` | bool | `false` | Simulate PR creation without API calls | +| `health_port` | int | `8080` | Port for health HTTP server | +| `providers..type` | string | *(key name)* | Provider implementation: `github` or `bitbucket`. Required when the key name is not `github` or `bitbucket` | +| `providers..enabled` | bool | `false` | Activate this provider instance. If no providers are enabled the application starts in **idle mode** — it logs a warning and keeps running without performing any scans | +| `providers..owner` | string | — | GitHub organisation or user *(GitHub only)* | +| `providers..repo` | string | — | Repository name *(GitHub only)* | +| `providers..app_id` | string | — | GitHub App ID *(GitHub App auth)* | +| `providers..installation_id` | string | *(auto)* | Installation ID; resolved automatically if omitted *(GitHub App auth)* | +| `providers..private_key_path` | string | — | Path to GitHub App private key PEM file *(GitHub App auth)* | +| `providers..auth_method` | string | `"app"` | `app` (GitHub App) or `pat` (Personal Access Token) *(GitHub only)* | +| `providers..token_env` | string | `"GITHUB_TOKEN"` / `"BITBUCKET_TOKEN"` | Env var name containing the token *(PAT / Bitbucket)* | +| `providers..workspace` | string | — | Bitbucket workspace slug *(Bitbucket only)* | +| `providers..repo_slug` | string | — | Bitbucket repository slug *(Bitbucket only)* | +| `providers..close_source_branch` | bool | `true` | Delete source branch after PR merges *(Bitbucket only)* | +| `providers..timeout` | float | `30` | HTTP timeout (seconds) | +| `rules[].pattern` | string | — | Python regex applied to branch names | +| `rules[].destinations` | map | — | `provider_name: destination_branch` pairs | + +--- + +## Providers + +### GitHub App + +Authentication uses a [GitHub App](https://docs.github.com/en/apps/creating-github-apps/about-creating-github-apps/about-creating-github-apps). Two modes are available: + +**GitHub App (recommended)** — the provider: +1. Signs a short-lived JWT with the App's RSA private key. +2. Exchanges it for an installation access token (cached up to ~55 minutes). +3. Uses the installation token for all API calls. +4. Caches per-cycle PR-existence and branch-existence lookups to reduce API usage. + +**Personal Access Token (PAT)** — set `auth_method: pat` and point `token_env` at an env var holding the PAT. + +Required GitHub App permissions: **Contents** (read), **Pull requests** (read & write). + +### Bitbucket Cloud + +Authentication uses a project/repository **Bearer token** (HTTP access token). + +The provider fetches default reviewers at PR creation time and automatically includes them in the payload. + +Required Bitbucket permissions: **Repositories** (read), **Pull requests** (read & write). + +--- + +## Rules + +Each rule has: + +- **`pattern`** — a Python regex (`re.compile`) matched against branch names using `re.match` (anchored at the start). The destination branch is excluded from matching. +- **`destinations`** — a map of `provider_name → destination_branch`. Only providers that are both listed here **and** active in `providers` are processed. + +```yaml +rules: + - pattern: "feature/.*" + destinations: + github: main # create PRs toward "main" on GitHub + bitbucket: develop # create PRs toward "develop" on Bitbucket +``` + +Multiple rules are supported. + +--- + +## Health endpoints + +A lightweight HTTP server starts on `health_port` (default `8080`): + +| Endpoint | Behaviour | +|----------|-----------| +| `GET /livez` | `200 live` while running; `503 shutting down` during shutdown | +| `GET /healthz` | Same as `/livez` (alias) | +| `GET /readyz` | `200 ready` after the **first** scan cycle completes; `503 not ready` before that | + +Suitable for Kubernetes liveness, readiness, and startup probes: + +```yaml +livenessProbe: + httpGet: + path: /livez + port: 8080 +readinessProbe: + httpGet: + path: /readyz + port: 8080 +``` + +--- + +## Docker + +The image is built from a two-stage Dockerfile: + +- **Stage 1** – installs Python dependencies into `/install`. +- **Stage 2** – minimal `python:3.14-slim` runtime; runs as a non-root user (`prgen`). + +```bash +# Build +docker build -t pr-generator . + +# Run with YAML config +docker run --rm \ + -v "$(pwd)/config.yaml:/etc/pr-generator/config.yaml:ro" \ + -v "$(pwd)/github-app.pem:/secrets/github-app.pem:ro" \ + -e BITBUCKET_TOKEN= \ + -p 8080:8080 \ + pr-generator +``` + +--- + +## Development + +**Prerequisites**: Python ≥ 3.11 + +```bash +# Create and activate a virtual environment +python -m venv .venv +source .venv/bin/activate + +# Install the package in editable mode with dev extras +pip install -e . +pip install pytest + +# Run tests +pytest + +# Run with a local config +CONFIG_PATH=./config.yaml python -m pr_generator +``` + +**Project layout** + +``` +src/pr_generator/ +├── __main__.py # Entry point: startup, provider init, scan loop +├── config.py # Config loading from YAML file +├── models.py # Dataclasses: AppConfig, ProviderConfig, ScanRule, … +├── scanner.py # Concurrent scan cycle orchestrator +├── health.py # HTTP health server (/livez, /readyz, /healthz) +├── http_client.py # Shared HTTP client with retry/backoff +├── logging_config.py # Logging setup (plain text or structured JSON) +└── providers/ + ├── base.py # ProviderInterface Protocol + ├── github.py # GitHub App provider + └── bitbucket.py # Bitbucket Cloud provider + +tests/ +├── conftest.py # Shared pytest fixtures +├── test_config.py # Config loading tests +├── test_health.py # Health server tests +├── test_models.py # Model tests +└── test_scanner.py # Scan cycle tests +``` diff --git a/config.yaml.example b/config.yaml.example new file mode 100644 index 0000000..2874014 --- /dev/null +++ b/config.yaml.example @@ -0,0 +1,140 @@ +# config.yaml.example +# +# Copy this file to config.yaml (or any path) and set CONFIG_PATH to point to it. +# Remove or comment out sections for providers you are not using. +# +# Usage: +# CONFIG_PATH=./config.yaml pr-generator + +# ────────────────────────────────────────────────────────── +# General settings +# ────────────────────────────────────────────────────────── + +# How often (seconds) to scan for new branches. +scan_frequency: 300 # default: 300 + +# Logging level: DEBUG | INFO | WARNING | ERROR +log_level: INFO # default: INFO + +# Log format: "text" (human-readable) or "json" (for log aggregators like ELK / Loki). +log_format: text # default: text + +# When true, PRs are logged but never actually created. Useful for testing config. +dry_run: false # default: false + +# Port for the built-in health HTTP server (/livez, /readyz, /healthz). +health_port: 8080 # default: 8080 + +# ────────────────────────────────────────────────────────── +# Providers +# +# Each entry is a named provider instance. +# The key is a free-form name used in rules (e.g. "github", "github-acme"). +# Set "type" to "github" or "bitbucket" when the key name is not one of those. +# ────────────────────────────────────────────────────────── + +providers: + + # ── GitHub (GitHub App authentication — recommended) ────────────── + github: + enabled: true + + # The GitHub organisation or user that owns the repository. + owner: my-org + + # The repository name. + repo: my-repo + + # Authentication method: "app" (GitHub App, default) or "pat" (Personal Access Token). + auth_method: app # default: app + + # GitHub App credentials (required when auth_method is "app"). + app_id: "123456" + installation_id: "78901234" # optional — auto-resolved from the repo if omitted + + # Path to the GitHub App RSA private key PEM file. + private_key_path: /secrets/github-app.pem + # Alternative: export GITHUB_APP_PRIVATE_KEY="" + + # HTTP timeout in seconds. + timeout: 30 # default: 30 + + # ── GitHub (PAT authentication) ─────────────────────────────────── + # github: + # enabled: true + # auth_method: pat + # owner: my-org + # repo: my-repo + # token_env: GITHUB_TOKEN # env var that holds the Personal Access Token + # timeout: 30 + + # ── Multiple GitHub organisations ───────────────────────────────── + # Use any name as the key and set type: github. + # Rules reference providers by the key name. + # + # github-acme: + # type: github + # enabled: true + # auth_method: pat + # owner: acme-org + # repo: backend + # token_env: GITHUB_TOKEN_ACME + # + # github-skunkworks: + # type: github + # enabled: true + # auth_method: pat + # owner: skunkworks-org + # repo: platform + # token_env: GITHUB_TOKEN_SKUNKWORKS + + # ── Bitbucket Cloud ─────────────────────────────────────────────── + bitbucket: + enabled: true + + # Bitbucket workspace slug. + workspace: my-workspace + + # Repository slug. + repo_slug: my-repo + + # Name of the environment variable that holds the Bearer token. + token_env: BITBUCKET_TOKEN # default: BITBUCKET_TOKEN + + # Delete source branch after the PR is merged. + close_source_branch: true # default: true + + # HTTP timeout in seconds. + timeout: 30 # default: 30 + +# ────────────────────────────────────────────────────────── +# Rules +# +# Each rule matches branches by regex and maps provider → destination branch. +# The destination branch is automatically excluded from matching. +# Multiple rules are processed in parallel. +# ────────────────────────────────────────────────────────── + +rules: + # Match any branch starting with "feature/" and open PRs toward "main" on GitHub + # and "develop" on Bitbucket. + - pattern: "feature/.*" + destinations: + github: main + bitbucket: develop + + # Match release branches and target "main" on GitHub only. + - pattern: "release/.*" + destinations: + github: main + + # Match hotfix branches on Bitbucket only. + - pattern: ".*-hotfix-.*" + destinations: + bitbucket: master + + # Example: multi-org rules (uncomment if using github-acme / github-skunkworks above) + # - pattern: "feature/.*" + # destinations: + # github-acme: main + # github-skunkworks: develop diff --git a/package.json b/package.json new file mode 100644 index 0000000..102777a --- /dev/null +++ b/package.json @@ -0,0 +1,21 @@ +{ + "name": "pr-generator", + "private": true, + "release": { + "branches": ["main"], + "tagFormat": "v${version}", + "plugins": [ + "@semantic-release/commit-analyzer", + "@semantic-release/release-notes-generator", + "@semantic-release/changelog", + [ + "@semantic-release/git", + { + "assets": ["CHANGELOG.md"], + "message": "chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" + } + ], + "@semantic-release/github" + ] + } +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7b03496 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools>=70", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "pr-generator" +version = "1.1.0" +description = "Automated PR creation from branch patterns across GitHub and Bitbucket" +requires-python = ">=3.11" +dependencies = [ + "requests==2.32.5", + "PyJWT[crypto]==2.12.1", + "cryptography==46.0.5", + "pyyaml==6.0.3", +] + +[project.scripts] +pr-generator = "pr_generator.__main__:main" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["src"] + +[tool.coverage.run] +omit = ["src/pr_generator/__main__.py"] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "if __name__ == .__main__.:", +] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c4e0c2d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +requests==2.32.5 +PyJWT[crypto]==2.12.1 +cryptography==46.0.5 +pyyaml==6.0.3 diff --git a/src/pr_generator/__init__.py b/src/pr_generator/__init__.py new file mode 100644 index 0000000..768c5ab --- /dev/null +++ b/src/pr_generator/__init__.py @@ -0,0 +1,3 @@ +"""pr_generator — automated PR creation from branch patterns.""" + +__version__ = "1.1.0" diff --git a/src/pr_generator/__main__.py b/src/pr_generator/__main__.py new file mode 100644 index 0000000..b1e5ebc --- /dev/null +++ b/src/pr_generator/__main__.py @@ -0,0 +1,108 @@ +"""Application entry point.""" + +from __future__ import annotations + +import argparse +import logging +import signal +import sys +import time +from importlib.metadata import version as pkg_version +from threading import Event + +from pr_generator.config import load_config +from pr_generator.health import start_health_server +from pr_generator.logging_config import setup_logging +from pr_generator.providers.bitbucket import BitbucketProvider +from pr_generator.providers.github import GitHubProvider +from pr_generator.scanner import scan_cycle + +logger = logging.getLogger("pr_generator") + + +def main() -> None: + """Entry point: load config, start health server, run scan loop.""" + parser = argparse.ArgumentParser( + description="Automated PR creation daemon for GitHub and Bitbucket Cloud.", + ) + parser.add_argument( + "--version", + action="version", + version=f"pr-generator {pkg_version('pr-generator')}", + ) + parser.parse_args() + + # Bootstrap logging with a sensible default before config is loaded + setup_logging("INFO") + + try: + config = load_config() + except (ValueError, FileNotFoundError) as exc: + logger.error("[Core] Step: startup action=error detail=%s", exc) + sys.exit(1) + + # Re-configure logging with the level and format from config + setup_logging(config.log_level, json_format=(config.log_format == "json")) + + # Instantiate active providers + providers = {} + for pname, pconf in config.providers.items(): + if not pconf.enabled: + continue + if pconf.type == "github": + providers[pname] = GitHubProvider(pconf) + elif pconf.type == "bitbucket": + providers[pname] = BitbucketProvider(pconf) + else: + logger.warning("[Core] Unknown provider type '%s' for '%s'; skipping.", pconf.type, pname) + + if not providers: + logger.warning("[Core] Step: startup action=warn detail=No active providers configured; running in idle mode") + + # Graceful shutdown + stop = Event() + + def _handler(sig, _frame): + logger.info("[Core] Received signal %s; initiating graceful shutdown.", sig) + stop.set() + + signal.signal(signal.SIGTERM, _handler) + signal.signal(signal.SIGINT, _handler) + + # Health server (readiness flips after first cycle) + _server, ready_event = start_health_server(config.health_port, stop) + + logger.info("[Core] Active providers: %s", ", ".join(providers.keys())) + logger.info("[Core] Rules configured: %d", len(config.rules)) + for rule in config.rules: + logger.info("[Core] Rule: pattern=%s destinations=%s", rule.pattern, rule.destinations) + if config.dry_run: + logger.info("[Core] Dry-run mode enabled — PR creations will only be logged") + + cycle_id = 0 + while not stop.is_set(): + cycle_id += 1 + cycle_start = time.time() + scan_cycle(config, providers, cycle_id) + duration = time.time() - cycle_start + logger.info("[Core] Step: cycle action=complete cycle_id=%d duration_sec=%.1f", cycle_id, duration) + + if not ready_event.is_set(): + ready_event.set() + logger.info("[Core] Ready state achieved (first cycle completed)") + + _sleep_interval(config.scan_frequency, stop) + + logger.info("[Core] Shutdown complete.") + + +def _sleep_interval(total: int, stop: Event) -> None: + """Sleep in ≤1 s slices to react quickly to stop signals.""" + waited = 0 + while waited < total and not stop.is_set(): + stop.wait(timeout=min(1, total - waited)) + waited += 1 + + +if __name__ == "__main__": + main() diff --git a/src/pr_generator/config.py b/src/pr_generator/config.py new file mode 100644 index 0000000..587ec42 --- /dev/null +++ b/src/pr_generator/config.py @@ -0,0 +1,234 @@ +"""Configuration loading from YAML file.""" + +from __future__ import annotations + +import base64 +import logging +import os +import re + +import yaml + +from pr_generator.models import AppConfig, ProviderConfig, ScanRule + +logger = logging.getLogger("pr_generator.config") + +_DEFAULT_CONFIG_PATH = "/etc/pr-generator/config.yaml" + + +def load_config() -> AppConfig: + """Load application configuration from a YAML file. + + The config file path defaults to /etc/pr-generator/config.yaml and can be + overridden with the CONFIG_PATH environment variable. + """ + config_path = os.getenv("CONFIG_PATH", _DEFAULT_CONFIG_PATH) + if not os.path.exists(config_path): + raise FileNotFoundError( + f"[Core] Config file not found at '{config_path}'. " + "Set CONFIG_PATH to the correct path or create the file." + ) + logger.info("[Core] Step: load_config action=start source=file path=%s", config_path) + return _load_from_file(config_path) + + +# ------------------------------------------------------------------ +# YAML-based loading +# ------------------------------------------------------------------ + +def _load_from_file(path: str) -> AppConfig: + with open(path) as fh: + raw = yaml.safe_load(fh) + + raw = raw or {} + providers = _parse_providers_from_yaml(raw.get("providers") or {}) + rules = _parse_rules(raw.get("rules") or []) + + if not rules: + raise ValueError("[Core] config.yaml has no rules defined.") + if not providers: + logger.info("[Core] Step: load_config action=warn detail=no enabled providers configured; running in idle mode") + + config = AppConfig( + scan_frequency=int(raw.get("scan_frequency", 300)), + log_level=str(raw.get("log_level", "INFO")), + log_format=str(raw.get("log_format", "text")).lower(), + dry_run=bool(raw.get("dry_run", False)), + health_port=int(raw.get("health_port", 8080)), + providers=providers, + rules=rules, + ) + logger.info( + "[Core] Step: load_config action=end source=file providers=%s rules=%d", + list(providers.keys()), len(rules), + ) + return config + + +def _parse_providers_from_yaml(raw: dict) -> dict[str, ProviderConfig]: + """Parse the providers section of the YAML config. + + Each key is a provider *name* (e.g. ``github``, ``github-acme``, ``bitbucket``). + The optional ``type`` field selects the provider implementation; it defaults to + the key name for the two built-in values ``"github"`` and ``"bitbucket"`` to keep + backward compatibility with existing configs. + + Example — multiple GitHub orgs:: + + providers: + github-acme: + type: github + enabled: true + owner: acme-org + repo: backend + ... + github-skunkworks: + type: github + enabled: true + owner: skunkworks-org + repo: platform + ... + """ + providers: dict[str, ProviderConfig] = {} + + for pname, pcfg in raw.items(): + if not isinstance(pcfg, dict): + continue + if not pcfg.get("enabled", False): + continue + + # Resolve type: explicit field wins; fall back to key name for known types. + ptype = str(pcfg.get("type", "")).lower() or ( + pname if pname in {"github", "bitbucket"} else "" + ) + if ptype not in {"github", "bitbucket"}: + raise ValueError( + f"[Core] Provider '{pname}' has unknown or missing type '{ptype}'. " + "Set 'type: github' or 'type: bitbucket'." + ) + + if ptype == "github": + providers[pname] = _parse_github_provider(pname, pcfg) + else: + providers[pname] = _parse_bitbucket_provider(pname, pcfg) + + return providers + + +def _parse_github_provider(name: str, gh: dict) -> ProviderConfig: + """Build a ProviderConfig for a GitHub provider entry.""" + auth_method = str(gh.get("auth_method", "app")).lower() + owner = str(gh.get("owner", "")).strip() + repo = str(gh.get("repo", "")).strip() + if not owner or not repo: + raise ValueError( + f"[Core] Provider '{name}': 'owner' and 'repo' are required fields. " + f"Check providers.{name} in your config.yaml." + ) + if auth_method == "pat": + token_env = str(gh.get("token_env", "GITHUB_TOKEN")) + token = os.getenv(token_env, "") + if not token: + raise ValueError( + f"[Core] Provider '{name}': env var '{token_env}' is empty or not set. " + f"Set {token_env} with a valid GitHub PAT." + ) + return ProviderConfig( + name=name, + type="github", + enabled=True, + owner=owner, + repo=repo, + auth_method="pat", + token=token, + timeout=float(gh.get("timeout", 30)), + ) + app_id = str(gh.get("app_id", "")).strip() + if not app_id: + raise ValueError( + f"[Core] Provider '{name}': 'app_id' is required for GitHub App auth. " + f"Check providers.{name} in your config.yaml." + ) + private_key = _load_private_key(gh) + if not private_key: + raise ValueError( + f"[Core] Provider '{name}': no private key found. " + f"Set 'private_key_path' in config or the GITHUB_APP_PRIVATE_KEY env var." + ) + return ProviderConfig( + name=name, + type="github", + enabled=True, + owner=owner, + repo=repo, + app_id=app_id, + installation_id=str(gh.get("installation_id", "")), + private_key=private_key, + auth_method="app", + timeout=float(gh.get("timeout", 30)), + ) + + +def _parse_bitbucket_provider(name: str, bb: dict) -> ProviderConfig: + """Build a ProviderConfig for a Bitbucket provider entry.""" + workspace = str(bb.get("workspace", "")).strip() + repo_slug = str(bb.get("repo_slug", "")).strip() + if not workspace or not repo_slug: + raise ValueError( + f"[Core] Provider '{name}': 'workspace' and 'repo_slug' are required fields. " + f"Check providers.{name} in your config.yaml." + ) + token_env = str(bb.get("token_env", "BITBUCKET_TOKEN")) + token = os.getenv(token_env, "") + if not token: + raise ValueError( + f"[Core] Provider '{name}': env var '{token_env}' is empty or not set. " + f"Set {token_env} with a valid Bitbucket access token." + ) + return ProviderConfig( + name=name, + type="bitbucket", + enabled=True, + workspace=workspace, + repo_slug=repo_slug, + token=token, + timeout=float(bb.get("timeout", 30)), + close_source_branch=bool(bb.get("close_source_branch", True)), + ) + + +def _load_private_key(gh_cfg: dict) -> str: + """Load GitHub App private key from file path or env var.""" + key_path = str(gh_cfg.get("private_key_path", "")) + if key_path and os.path.exists(key_path): + with open(key_path) as fh: + return fh.read() + + # Fallback: try env var (supports base64-encoded PEM) + raw = os.getenv("GITHUB_APP_PRIVATE_KEY", "") + if raw and "-----BEGIN" not in raw: + raw = base64.b64decode(raw).decode() + return raw + + +def _parse_rules(raw_rules: list) -> list[ScanRule]: + rules: list[ScanRule] = [] + for item in raw_rules: + if not isinstance(item, dict): + logger.warning("[Core] Step: load_config action=warn detail=rule entry is not a mapping; skipping") + continue + pattern = str(item.get("pattern", "")) + if not pattern: + logger.warning("[Core] Step: load_config action=warn detail=rule with empty pattern; skipping") + continue + try: + compiled = re.compile(pattern) + except re.error as exc: + raise ValueError(f"[Core] Invalid regex pattern '{pattern}': {exc}") from exc + destinations = {str(k): str(v) for k, v in (item.get("destinations") or {}).items()} + if not destinations: + logger.warning("[Core] Step: load_config action=warn detail=rule pattern=%s has no destinations; skipping", pattern) + continue + rules.append(ScanRule(pattern=pattern, compiled=compiled, destinations=destinations)) + return rules + diff --git a/src/pr_generator/health.py b/src/pr_generator/health.py new file mode 100644 index 0000000..2345cf4 --- /dev/null +++ b/src/pr_generator/health.py @@ -0,0 +1,70 @@ +"""Health HTTP server exposing /livez, /readyz and /healthz endpoints.""" + +from __future__ import annotations + +import logging +import threading +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from threading import Event + +logger = logging.getLogger("pr_generator.health") + + +class _HealthHandler(BaseHTTPRequestHandler): + """Lightweight HTTP handler for Kubernetes health probes. + + Endpoints: + /livez, /healthz → 200 while running; 503 when shutting down. + /readyz → 200 after the first full scan cycle; 503 before that. + """ + + # Injected by the server factory below + stop_event: Event + ready_event: Event + + def _write(self, code: int, body: str) -> None: + self.send_response(code) + self.send_header("Content-Type", "text/plain") + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(body.encode()) + + def do_GET(self) -> None: # noqa: N802 + if self.path in ("/livez", "/healthz"): + if self.stop_event.is_set(): + self._write(503, "shutting down") + else: + self._write(200, "live") + elif self.path == "/readyz": + if self.ready_event.is_set() and not self.stop_event.is_set(): + self._write(200, "ready") + else: + self._write(503, "not ready") + else: + self._write(404, "not found") + + def log_message(self, fmt: str, *args) -> None: # noqa: ANN002 + # Suppress default access logs; health probes are very frequent + pass + + +def start_health_server(port: int, stop_event: Event) -> tuple[ThreadingHTTPServer, Event]: + """Start the health HTTP server in a daemon thread. + + Returns: + (server, ready_event) — set ready_event after the first successful cycle. + """ + ready_event = Event() + + # Inject shared state into the handler class via a closure-built subclass + handler_cls = type( + "_BoundHealthHandler", + (_HealthHandler,), + {"stop_event": stop_event, "ready_event": ready_event}, + ) + + server = ThreadingHTTPServer(("0.0.0.0", port), handler_cls) + thread = threading.Thread(target=server.serve_forever, name="health-server", daemon=True) + thread.start() + logger.info("[Core] Step: health_server action=start port=%d", port) + return server, ready_event diff --git a/src/pr_generator/http_client.py b/src/pr_generator/http_client.py new file mode 100644 index 0000000..dffd23a --- /dev/null +++ b/src/pr_generator/http_client.py @@ -0,0 +1,86 @@ +"""Shared HTTP client with retry/backoff logic for all providers.""" + +from __future__ import annotations + +import logging +import time +from typing import Callable + +import requests + +_BACKOFF_DELAYS = (0.5, 1, 2) + +ShouldRetry = Callable[[int | None, Exception | None], bool] +HeadersFactory = Callable[[], dict] + + +def request_with_retry( + *, + logger: logging.Logger, + client_name: str, + method: str, + url: str, + timeout: float, + exception_cls, + should_retry: ShouldRetry, + headers: dict | None = None, + headers_factory: HeadersFactory | None = None, + **request_kwargs, +): + """Execute an HTTP request with shared logging and retry logic. + + Args: + logger: module logger. + client_name: human-readable label, e.g. "GitHub". + method: HTTP verb. + url: request URL. + timeout: seconds passed to requests. + exception_cls: provider-specific exception raised on failure. + Must accept ``(message: str, status_code: int | None)`` positional args. + should_retry: predicate receiving (status_code, exception). + headers: static headers (mutually exclusive with headers_factory). + headers_factory: callable returning fresh headers per attempt. + **request_kwargs: forwarded to ``requests.request``. + """ + if headers is None and headers_factory is None: + raise ValueError("Provide either headers or headers_factory") + + attempts = (0,) + _BACKOFF_DELAYS + last_error: Exception | None = None + + for delay in attempts: + if delay: + time.sleep(delay) + + hdrs = headers if headers_factory is None else headers_factory() + try: + logger.debug( + "[%s] [HTTP] %s %s params=%s", + client_name, method, url, + request_kwargs.get("params"), + ) + start = time.time() + response = requests.request(method, url, headers=hdrs, timeout=timeout, **request_kwargs) + duration_ms = int((time.time() - start) * 1000) + logger.debug("[%s] [HTTP] %s %s -> %s (%dms)", client_name, method, url, response.status_code, duration_ms) + except requests.RequestException as exc: + logger.exception("[%s] [HTTP] %s %s failed: %s", client_name, method, url, exc) + err = exception_cls(f"Request failure: {exc}", None) + last_error = err + if should_retry(None, exc): + continue + raise err + + if response.status_code >= 400: + logger.error("[%s] [HTTP] %s %s error %s: %s", client_name, method, url, response.status_code, response.text) + err = exception_cls(f"{client_name} API error {response.status_code}: {response.text}", response.status_code) + last_error = err + if should_retry(response.status_code, None): + continue + raise err + + return response + + if last_error is None: + raise RuntimeError(f"[{client_name}] request_with_retry exhausted retries with no recorded error") # pragma: no cover + raise last_error diff --git a/src/pr_generator/logging_config.py b/src/pr_generator/logging_config.py new file mode 100644 index 0000000..082e316 --- /dev/null +++ b/src/pr_generator/logging_config.py @@ -0,0 +1,43 @@ +"""Logging setup.""" + +from __future__ import annotations + +import json +import logging + + +class _StructuredFormatter(logging.Formatter): + """JSON formatter for structured log aggregators (ELK, Loki, etc.).""" + + def format(self, record: logging.LogRecord) -> str: + payload: dict = { + "timestamp": self.formatTime(record), + "level": record.levelname, + "logger": record.name, + "message": record.getMessage(), + } + if record.exc_info: + payload["exception"] = self.formatException(record.exc_info) + if record.stack_info: + payload["stack_info"] = self.formatStack(record.stack_info) + return json.dumps(payload) + + +def setup_logging(level: str, json_format: bool = False) -> None: + """Configure the root logger. + + Args: + level: log level string, e.g. "INFO", "DEBUG". + json_format: emit structured JSON lines when True. + """ + root = logging.getLogger() + root.setLevel(getattr(logging, level.upper(), logging.INFO)) + handler = logging.StreamHandler() + if json_format: + handler.setFormatter(_StructuredFormatter()) + else: + handler.setFormatter(logging.Formatter( + "%(asctime)s %(levelname)-8s %(name)s %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S", + )) + root.handlers = [handler] diff --git a/src/pr_generator/models.py b/src/pr_generator/models.py new file mode 100644 index 0000000..5a06622 --- /dev/null +++ b/src/pr_generator/models.py @@ -0,0 +1,75 @@ +"""Data models shared across the application.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field + + +@dataclass(frozen=True) +class ProviderConfig: + """Immutable configuration for a single Git provider instance.""" + + name: str + enabled: bool + type: str = "" # "github" | "bitbucket" — provider class to use + timeout: float = 30.0 + # GitHub — common + owner: str = "" + repo: str = "" + auth_method: str = "app" # "app" (GitHub App) | "pat" (Personal Access Token) + # GitHub App auth + app_id: str = "" + installation_id: str = "" + private_key: str = "" # PEM content (loaded at startup) + # Bitbucket / GitHub PAT + workspace: str = "" + repo_slug: str = "" + token: str = "" # Bearer/PAT token + # Bitbucket behaviour + close_source_branch: bool = True + + +@dataclass +class ScanRule: + """A scanning rule: one regex pattern and its destination branch per provider.""" + + pattern: str + compiled: re.Pattern + destinations: dict[str, str] = field(default_factory=dict) + # e.g. {"github": "develop", "bitbucket": "nonpro"} + + +@dataclass(frozen=True) +class AppConfig: + """Full application configuration.""" + + scan_frequency: int + log_level: str + log_format: str # "text" | "json" + dry_run: bool + health_port: int + providers: dict[str, ProviderConfig] # "github" | "bitbucket" → ProviderConfig + rules: list[ScanRule] + + +@dataclass +class RuleResult: + """Outcome of processing one ScanRule for one provider in a cycle.""" + + rule_pattern: str + provider: str + destination: str + processed: int = 0 + created: int = 0 + skipped_existing: int = 0 + simulated: int = 0 + errors: int = 0 + + +@dataclass +class CycleResult: + """Aggregated outcome of a full scan cycle.""" + + cycle_id: int + rule_results: list[RuleResult] = field(default_factory=list) diff --git a/src/pr_generator/providers/__init__.py b/src/pr_generator/providers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pr_generator/providers/base.py b/src/pr_generator/providers/base.py new file mode 100644 index 0000000..3546f22 --- /dev/null +++ b/src/pr_generator/providers/base.py @@ -0,0 +1,37 @@ +"""Provider interface contract.""" + +from __future__ import annotations + +from typing import Protocol, runtime_checkable + + +@runtime_checkable +class ProviderInterface(Protocol): + """Contract that every Git provider must fulfil.""" + + @property + def name(self) -> str: + """Lowercase provider identifier: 'github' or 'bitbucket'.""" + ... # pragma: no cover + + def get_branches(self) -> list[str]: + """Return all branch names in the repository (handles pagination). + + Raises a provider-specific exception on API failure. + """ + ... # pragma: no cover + + def check_existing_pr(self, source: str, destination: str) -> bool: + """Return True if an open PR from source to destination already exists.""" + ... # pragma: no cover + + def create_pull_request(self, source: str, destination: str) -> None: + """Create a PR from source to destination. + + Raises a provider-specific exception on API failure. + """ + ... # pragma: no cover + + def reset_cycle_cache(self) -> None: + """Clear any per-cycle caches. No-op if the provider has no cache.""" + ... # pragma: no cover diff --git a/src/pr_generator/providers/bitbucket.py b/src/pr_generator/providers/bitbucket.py new file mode 100644 index 0000000..9c867c6 --- /dev/null +++ b/src/pr_generator/providers/bitbucket.py @@ -0,0 +1,164 @@ +"""Bitbucket Cloud provider implementation.""" + +from __future__ import annotations + +import logging +from typing import Any + +from pr_generator.http_client import request_with_retry +from pr_generator.models import ProviderConfig + + +class BitbucketError(Exception): + """Raised when a Bitbucket API call fails.""" + + def __init__(self, message: str, status_code: int | None = None) -> None: + super().__init__(message) + self.status_code = status_code + + +class BitbucketProvider: + """Bitbucket Cloud provider. + + Receives all configuration via constructor — no module-level env-var reads. + """ + + def __init__(self, config: ProviderConfig) -> None: + self._name = config.name + self._workspace = config.workspace + self._repo_slug = config.repo_slug + self._token = config.token + self._timeout = config.timeout + self._close_source_branch = config.close_source_branch + self._api_url = ( + f"https://api.bitbucket.org/2.0/repositories" + f"/{self._workspace}/{self._repo_slug}" + ) + self._logger = logging.getLogger("pr_generator.providers.bitbucket") + + # Per-cycle cache (reset via reset_cycle_cache) + self._pr_cache: dict[tuple[str, str], bool] = {} + + # ------------------------------------------------------------------ + # ProviderInterface + # ------------------------------------------------------------------ + + @property + def name(self) -> str: + return self._name + + def get_branches(self) -> list[str]: + """Fetch all branch names (handles pagination).""" + self._logger.info("[%s] Step: get_branches action=start", self._name) + if not (self._token and self._workspace and self._repo_slug): + self._logger.error("[%s] Step: get_branches action=error detail=missing configuration", self._name) + return [] + + url = f"{self._api_url}/refs/branches" + names: list[str] = [] + page = 1 + + while True: + self._logger.debug("[%s] Step: get_branches action=fetch page=%d", self._name, page) + resp = self._request("GET", url, params={"pagelen": 100, "page": page}) + data = resp.json() + page_values: list[dict[str, Any]] = data.get("values", []) + names.extend(b["name"] for b in page_values if b.get("name")) + self._logger.debug( + "[%s] Step: get_branches action=fetch page=%d count=%d total=%d", + self._name, page, len(page_values), len(names), + ) + if "next" in data: + page += 1 + else: + break + + self._logger.info("[%s] Step: get_branches action=end total=%d", self._name, len(names)) + return names + + def check_existing_pr(self, source: str, destination: str) -> bool: + """Return True if an open PR from source to destination already exists.""" + self._logger.info( + "[%s] Step: check_existing_pr action=start source=%s dest=%s", + self._name, source, destination, + ) + key = (source, destination) + if key in self._pr_cache: + self._logger.debug("[%s] Step: check_existing_pr action=cache_hit source=%s dest=%s", self._name, source, destination) + return self._pr_cache[key] + + resp = self._request( + "GET", + f"{self._api_url}/pullrequests", + params={ + "state": "OPEN", + "q": f'source.branch.name="{source}" AND destination.branch.name="{destination}"', + "pagelen": 1, + }, + ) + exists = len(resp.json().get("values", [])) > 0 + self._pr_cache[key] = exists + self._logger.info( + "[%s] Step: check_existing_pr action=end source=%s dest=%s exists=%s", + self._name, source, destination, str(exists).lower(), + ) + return exists + + def create_pull_request(self, source: str, destination: str) -> None: + """Create a PR from source to destination including default reviewers.""" + reviewers = self._get_default_reviewers() + self._logger.info( + "[%s] Step: create_pull_request action=start source=%s dest=%s reviewers=%d", + self._name, source, destination, len(reviewers), + ) + payload = { + "title": f"Merge {source} into {destination}", + "source": {"branch": {"name": source}}, + "destination": {"branch": {"name": destination}}, + "reviewers": reviewers, + "close_source_branch": self._close_source_branch, + } + resp = self._request("POST", f"{self._api_url}/pullrequests", json=payload) + self._pr_cache[(source, destination)] = True + self._logger.info( + "[%s] Step: create_pull_request action=end source=%s dest=%s status=created", + self._name, source, destination, + ) + + def reset_cycle_cache(self) -> None: + """Clear per-cycle PR-existence cache.""" + self._pr_cache.clear() + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _get_default_reviewers(self) -> list[dict[str, Any]]: + self._logger.info("[%s] Step: get_default_reviewers action=start", self._name) + if not (self._token and self._workspace and self._repo_slug): + self._logger.warning("[%s] Step: get_default_reviewers action=skip detail=missing config", self._name) + return [] + url = f"{self._api_url}/default-reviewers" + resp = self._request("GET", url) + reviewers = [{"uuid": r.get("uuid")} for r in resp.json().get("values", [])] + self._logger.info("[%s] Step: get_default_reviewers action=end count=%d", self._name, len(reviewers)) + return reviewers + + def _request(self, method: str, url: str, **kwargs): + return request_with_retry( + logger=self._logger, + client_name=self._name, + method=method, + url=url, + timeout=self._timeout, + headers={"Authorization": f"Bearer {self._token}", "Content-Type": "application/json"}, + exception_cls=BitbucketError, + should_retry=self._should_retry, + **kwargs, + ) + + def _should_retry(self, status_code: int | None, exc: Exception | None) -> bool: + if exc is not None: + self._logger.warning("[%s] Retry due to request failure: %s", self._name, exc) + return True + return bool(status_code and (500 <= status_code < 600 or status_code in (408, 429))) diff --git a/src/pr_generator/providers/github.py b/src/pr_generator/providers/github.py new file mode 100644 index 0000000..21bd2e8 --- /dev/null +++ b/src/pr_generator/providers/github.py @@ -0,0 +1,270 @@ +"""GitHub App provider implementation.""" + +from __future__ import annotations + +import logging +import time +from datetime import datetime + +import jwt + +from pr_generator.http_client import request_with_retry +from pr_generator.models import ProviderConfig + +_API_BASE = "https://api.github.com" + + +class GitHubError(Exception): + """Raised when a GitHub API call fails.""" + + def __init__(self, message: str, status_code: int | None = None) -> None: + super().__init__(message) + self.status_code = status_code + + +class GitHubProvider: + """GitHub App provider. + + Receives all configuration via constructor — no module-level env-var reads. + JWT and installation tokens are cached within the instance and refreshed + automatically before expiry. + """ + + def __init__(self, config: ProviderConfig) -> None: + self._name = config.name + self._owner = config.owner + self._repo = config.repo + self._auth_method = config.auth_method # "app" | "pat" + self._pat = config.token # used when auth_method == "pat" + self._app_id = config.app_id + self._installation_id = config.installation_id + self._private_key = config.private_key + self._timeout = config.timeout + self._repo_root = f"{_API_BASE}/repos/{self._owner}/{self._repo}" + self._logger = logging.getLogger("pr_generator.providers.github") + + # Token caches + self._jwt_cache: str | None = None + self._jwt_exp: float = 0.0 + self._install_token: str | None = None + self._install_token_exp: float = 0.0 + + # Per-cycle caches (reset via reset_cycle_cache) + self._pr_cache: dict[tuple[str, str], bool] = {} + self._branch_cache: dict[str, bool] = {} + + # ------------------------------------------------------------------ + # ProviderInterface + # ------------------------------------------------------------------ + + @property + def name(self) -> str: + return self._name + + def get_branches(self) -> list[str]: + """List all branch names in the repository (handles pagination).""" + self._logger.info("[%s] Step: get_branches action=start", self._name) + if self._auth_method == "pat": + ready = all([self._owner, self._repo, self._pat]) + else: + ready = all([self._owner, self._repo, self._private_key, self._app_id]) + if not ready: + self._logger.error("[%s] Step: get_branches action=error detail=incomplete config" + " auth_method=%s", self._name, self._auth_method) + return [] + + out: list[str] = [] + page = 1 + while True: + self._logger.debug("[%s] Step: get_branches action=fetch page=%d", self._name, page) + r = self._request("GET", f"{self._repo_root}/branches", params={"per_page": 100, "page": page}) + data = r.json() + if not data: + break + out.extend(b["name"] for b in data) + if len(data) < 100: + break + page += 1 + + # Populate branch cache from the full list to avoid redundant API calls later + for branch_name in out: + self._branch_cache[branch_name] = True + + self._logger.info("[%s] Step: get_branches action=end total=%d", self._name, len(out)) + return out + + def check_existing_pr(self, source: str, destination: str) -> bool: + """Return True if an open PR from source to destination already exists.""" + self._logger.info( + "[%s] Step: check_existing_pr action=start source=%s dest=%s", + self._name, source, destination, + ) + key = (source, destination) + if key in self._pr_cache: + self._logger.debug("[%s] Step: check_existing_pr action=cache_hit source=%s dest=%s", self._name, source, destination) + return self._pr_cache[key] + + r = self._request( + "GET", + f"{self._repo_root}/pulls", + params={ + "state": "open", + "base": destination, + "head": f"{self._owner}:{source}", + "per_page": 1, + }, + ) + exists = len(r.json()) > 0 + self._pr_cache[key] = exists + self._logger.info( + "[%s] Step: check_existing_pr action=end source=%s dest=%s exists=%s", + self._name, source, destination, str(exists).lower(), + ) + return exists + + def create_pull_request(self, source: str, destination: str) -> None: + """Create a PR from source to destination if source branch exists.""" + self._logger.info( + "[%s] Step: create_pull_request action=start source=%s dest=%s", + self._name, source, destination, + ) + if not self._branch_exists(source): + self._logger.warning( + "[%s] Step: create_pull_request action=skip source=%s detail=branch not found", + self._name, source, + ) + return + + payload = { + "title": f"Merge {source} into {destination}", + "head": source, + "base": destination, + "body": "Automated PR generated by pr-generator.", + "draft": False, + } + resp = self._request("POST", f"{self._repo_root}/pulls", json=payload) + self._pr_cache[(source, destination)] = True + self._logger.info( + "[%s] Step: create_pull_request action=end source=%s dest=%s" + " status=created number=%s", + self._name, source, destination, resp.json().get("number"), + ) + + def reset_cycle_cache(self) -> None: + """Clear per-cycle branch-existence and PR-existence caches.""" + self._pr_cache.clear() + self._branch_cache.clear() + + @staticmethod + def _now() -> float: + return time.time() + + def _new_jwt(self) -> str: + self._logger.debug("[GitHub] Step: get_jwt action=generate") + if not (self._app_id and self._private_key): + raise RuntimeError("[GitHub] Missing GITHUB_APP_ID or GITHUB_APP_PRIVATE_KEY.") + now = int(self._now()) + payload = {"iat": now - 60, "exp": now + (9 * 60), "iss": self._app_id} + return jwt.encode(payload, self._private_key, algorithm="RS256") + + def _get_jwt(self) -> str: + if self._jwt_cache and self._now() < self._jwt_exp - 30: + return self._jwt_cache + self._jwt_cache = self._new_jwt() + self._jwt_exp = self._now() + (9 * 60) + return self._jwt_cache + + def _resolve_installation_id(self) -> str: + self._logger.info("[GitHub] Step: resolve_installation_id action=start") + if self._installation_id: + self._logger.info("[GitHub] Step: resolve_installation_id action=end detail=provided") + return self._installation_id + r = self._request( + "GET", + f"{_API_BASE}/repos/{self._owner}/{self._repo}/installation", + installation=False, + ) + inst_id = str(r.json().get("id", "")) + if not inst_id: + raise RuntimeError("[GitHub] Could not resolve installation id.") + # Cache so subsequent token refreshes don't make an extra API call + self._installation_id = inst_id + self._logger.info("[GitHub] Step: resolve_installation_id action=end id=%s", inst_id) + return inst_id + + def _get_installation_token(self) -> str: + if self._install_token and self._now() < self._install_token_exp - 30: + return self._install_token + self._logger.info("[GitHub] Step: get_installation_token action=start") + inst_id = self._resolve_installation_id() + r = self._request( + "POST", + f"{_API_BASE}/app/installations/{inst_id}/access_tokens", + installation=False, + ) + data = r.json() + self._install_token = data.get("token") + expires_at = data.get("expires_at", "") + try: + self._install_token_exp = datetime.fromisoformat( + expires_at.replace("Z", "+00:00") + ).timestamp() + except Exception as exc: + self._logger.warning( + "[%s] Step: get_installation_token action=warn detail=failed to parse expiry (%s); using 55min default", + self._name, exc, + ) + self._install_token_exp = self._now() + (55 * 60) + self._logger.info("[GitHub] Step: get_installation_token action=end") + return self._install_token + + def _headers(self, installation: bool = True) -> dict: + if self._auth_method == "pat": + return { + "Authorization": f"token {self._pat}", + "Accept": "application/vnd.github+json", + } + if installation: + return { + "Authorization": f"Bearer {self._get_installation_token()}", + "Accept": "application/vnd.github+json", + } + return { + "Authorization": f"Bearer {self._get_jwt()}", + "Accept": "application/vnd.github+json", + } + + def _request(self, method: str, url: str, installation: bool = True, **kwargs): + return request_with_retry( + logger=self._logger, + client_name="GitHub", + method=method, + url=url, + timeout=self._timeout, + headers_factory=lambda: self._headers(installation), + exception_cls=GitHubError, + should_retry=self._should_retry, + **kwargs, + ) + + def _should_retry(self, status_code: int | None, exc: Exception | None) -> bool: + if exc is not None: + self._logger.warning("[%s] Retry due to request failure: %s", self._name, exc) + return True + return bool(status_code and (500 <= status_code < 600 or status_code in (408, 429))) + + def _branch_exists(self, branch: str) -> bool: + if branch in self._branch_cache: + return self._branch_cache[branch] + self._logger.info("[%s] Step: branch_exists action=start branch=%s", self._name, branch) + try: + self._request("GET", f"{self._repo_root}/branches/{branch}") + self._branch_cache[branch] = True + self._logger.info("[%s] Step: branch_exists action=end branch=%s exists=true", self._name, branch) + return True + except GitHubError as exc: + if exc.status_code == 404: + self._branch_cache[branch] = False + self._logger.info("[%s] Step: branch_exists action=end branch=%s exists=false", self._name, branch) + return False + raise diff --git a/src/pr_generator/scanner.py b/src/pr_generator/scanner.py new file mode 100644 index 0000000..d6af7df --- /dev/null +++ b/src/pr_generator/scanner.py @@ -0,0 +1,154 @@ +"""Scan cycle orchestrator with concurrent rule processing.""" + +from __future__ import annotations + +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed + +from pr_generator.models import AppConfig, CycleResult, RuleResult, ScanRule +from pr_generator.providers.base import ProviderInterface + +logger = logging.getLogger("pr_generator.scanner") + +_MAX_RULE_WORKERS = 10 + + +def scan_cycle( + config: AppConfig, + providers: dict[str, ProviderInterface], + cycle_id: int, +) -> CycleResult: + """Execute one full scan cycle. + + Phase 1: Fetch branches from every active provider concurrently. + Phase 2: Process every rule×provider pair concurrently. + """ + logger.info( + "[Core] Step: scan_cycle action=start cycle_id=%d rules=%d providers=%s", + cycle_id, len(config.rules), list(providers.keys()), + ) + + # Reset per-cycle caches on all providers + for prov in providers.values(): + prov.reset_cycle_cache() + + # Phase 1 — fetch branches in parallel (one task per provider) + branches_by_provider: dict[str, list[str]] = {} + with ThreadPoolExecutor(max_workers=max(1, len(providers))) as pool: + futures = { + pool.submit(prov.get_branches): prov_name + for prov_name, prov in providers.items() + } + for future in as_completed(futures): + prov_name = futures[future] + try: + branches_by_provider[prov_name] = future.result() + except Exception as exc: + logger.error( + "[%s] Step: get_branches action=error cycle_id=%d detail=%s", + prov_name.capitalize(), cycle_id, exc, + ) + branches_by_provider[prov_name] = [] + + # Phase 2 — process rules × providers in parallel + result = CycleResult(cycle_id=cycle_id) + task_futures = [] + + with ThreadPoolExecutor(max_workers=_MAX_RULE_WORKERS) as pool: + for rule in config.rules: + for prov_name, dest_branch in rule.destinations.items(): + if prov_name not in providers: + logger.debug( + "[Core] Step: process_rule action=skip rule=%s detail=provider %s not active", + rule.pattern, prov_name, + ) + continue + task_futures.append(pool.submit( + _process_rule, + provider=providers[prov_name], + branches=branches_by_provider.get(prov_name, []), + rule=rule, + dest_branch=dest_branch, + dry_run=config.dry_run, + cycle_id=cycle_id, + )) + + for future in as_completed(task_futures): + try: + result.rule_results.append(future.result()) + except Exception as exc: + logger.error("[Core] Step: process_rule action=error cycle_id=%d detail=%s", cycle_id, exc) + + # Aggregate and log cycle summary + total = sum(r.processed for r in result.rule_results) + created = sum(r.created for r in result.rule_results) + skipped = sum(r.skipped_existing for r in result.rule_results) + simulated = sum(r.simulated for r in result.rule_results) + errors = sum(r.errors for r in result.rule_results) + logger.info( + "[Core] Step: scan_cycle action=end cycle_id=%d processed=%d" + " created=%d skipped_existing=%d dry_run=%d errors=%d", + cycle_id, total, created, skipped, simulated, errors, + ) + return result + + +# ------------------------------------------------------------------ +# Helpers +# ------------------------------------------------------------------ + +def _process_rule( + provider: ProviderInterface, + branches: list[str], + rule: ScanRule, + dest_branch: str, + dry_run: bool, + cycle_id: int, +) -> RuleResult: + """Filter branches by rule and create PRs where needed.""" + pname = provider.name.capitalize() + result = RuleResult( + rule_pattern=rule.pattern, + provider=provider.name, + destination=dest_branch, + ) + logger.info( + "[%s] Step: process_rule action=start cycle_id=%d pattern=%s dest=%s", + pname, cycle_id, rule.pattern, dest_branch, + ) + + matched = [ + b for b in branches + if b != dest_branch and rule.compiled.match(b) + ] + + for branch in matched: + result.processed += 1 + try: + if provider.check_existing_pr(branch, dest_branch): + result.skipped_existing += 1 + continue + if dry_run: + logger.info( + "[%s] Step: create_pull_request action=dry_run cycle_id=%d source=%s dest=%s", + pname, cycle_id, branch, dest_branch, + ) + result.simulated += 1 + continue + provider.create_pull_request(branch, dest_branch) + result.created += 1 + except Exception as exc: + logger.error( + "[%s] Step: create_pull_request action=error cycle_id=%d source=%s dest=%s detail=%s", + pname, cycle_id, branch, dest_branch, exc, + ) + result.errors += 1 + + logger.info( + "[%s] Step: process_rule action=end cycle_id=%d pattern=%s dest=%s" + " processed=%d created=%d dry_run=%d skipped=%d errors=%d", + pname, cycle_id, rule.pattern, dest_branch, + result.processed, result.created, result.simulated, + result.skipped_existing, result.errors, + ) + return result diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..32f0ec4 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,49 @@ +"""Shared fixtures for the test suite.""" + +import pytest + + +@pytest.fixture +def github_app_config(): + from pr_generator.models import ProviderConfig + return ProviderConfig( + name="github", + type="github", + enabled=True, + owner="test-owner", + repo="test-repo", + auth_method="app", + app_id="12345", + installation_id="67890", + private_key="fake-pem", + timeout=5.0, + ) + + +@pytest.fixture +def github_pat_config(): + from pr_generator.models import ProviderConfig + return ProviderConfig( + name="github", + type="github", + enabled=True, + owner="test-owner", + repo="test-repo", + auth_method="pat", + token="ghp_testtoken123", + timeout=5.0, + ) + + +@pytest.fixture +def bitbucket_provider_config(): + from pr_generator.models import ProviderConfig + return ProviderConfig( + name="bitbucket", + type="bitbucket", + enabled=True, + workspace="test-workspace", + repo_slug="test-repo", + token="test-token", + timeout=5.0, + ) diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..15b2cf4 --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,716 @@ +"""Tests for config loading.""" + +import os +import re +import textwrap +import pytest + + +def _write_config(tmp_path, content: str) -> str: + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(content)) + return str(path) + + +_FAKE_PEM = "-----BEGIN RSA PRIVATE KEY-----\nZmFrZQ==\n-----END RSA PRIVATE KEY-----" + + +class TestLoadFromFile: + def test_single_rule_both_providers(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "bb-token") + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = _write_config(tmp_path, """ + scan_frequency: 60 + log_level: DEBUG + dry_run: true + health_port: 9090 + providers: + github: + enabled: true + owner: my-org + repo: my-repo + app_id: "111" + installation_id: "222" + private_key_path: /nonexistent + timeout: 10 + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + timeout: 15 + rules: + - pattern: "feature/.*" + destinations: + github: main + bitbucket: develop + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + + assert cfg.scan_frequency == 60 + assert cfg.log_level == "DEBUG" + assert cfg.dry_run is True + assert cfg.health_port == 9090 + assert "github" in cfg.providers + assert "bitbucket" in cfg.providers + assert cfg.providers["bitbucket"].token == "bb-token" + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "feature/.*" + assert cfg.rules[0].destinations == {"github": "main", "bitbucket": "develop"} + assert cfg.rules[0].compiled.match("feature/my-branch") + + def test_multiple_rules(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: ".*-nonpro-.*" + destinations: + bitbucket: nonpro + - pattern: ".*-pro-.*" + destinations: + bitbucket: master + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 2 + assert cfg.rules[0].destinations == {"bitbucket": "nonpro"} + assert cfg.rules[1].destinations == {"bitbucket": "master"} + + def test_missing_rules_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("BB_TOKEN_TEST", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: w + repo_slug: r + token_env: BB_TOKEN_TEST + rules: [] + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="no rules"): + load_config() + + def test_invalid_regex_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: "(" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="Invalid regex"): + load_config() + + def test_missing_github_private_key_raises(self, tmp_path, monkeypatch): + """GitHub App provider with no private key should fail at load time.""" + monkeypatch.delenv("GITHUB_APP_PRIVATE_KEY", raising=False) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "1" + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="no private key"): + load_config() + + def test_missing_bitbucket_token_raises(self, tmp_path, monkeypatch): + """Bitbucket provider with empty token env var should fail at load time.""" + monkeypatch.delenv("BB_MISSING_TOKEN", raising=False) + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BB_MISSING_TOKEN + rules: + - pattern: ".*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="BB_MISSING_TOKEN"): + load_config() + + +class TestGitHubPATConfig: + def test_pat_auth_method_from_yaml(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(""" + providers: + github: + enabled: true + auth_method: pat + owner: my-org + repo: my-repo + token_env: GITHUB_TOKEN + timeout: 10 + rules: + - pattern: "feature/.*" + destinations: + github: main + """)) + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + cfg = load_config() + gh = cfg.providers["github"] + assert gh.auth_method == "pat" + assert gh.token == "ghp_testtoken" + assert gh.app_id == "" + assert gh.private_key == "" + + def test_pat_custom_token_env(self, tmp_path, monkeypatch): + monkeypatch.setenv("MY_GH_TOKEN", "ghp_custom") + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(""" + providers: + github: + enabled: true + auth_method: pat + owner: org + repo: repo + token_env: MY_GH_TOKEN + rules: + - pattern: ".*" + destinations: + github: main + """)) + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].token == "ghp_custom" + + def test_app_auth_method_default(self, tmp_path, monkeypatch): + """auth_method defaults to 'app' when not specified.""" + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(""" + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "111" + installation_id: "222" + rules: + - pattern: ".*" + destinations: + github: main + """)) + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].auth_method == "app" + + def test_log_format_json_from_yaml(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(""" + log_format: json + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: ".*" + destinations: + bitbucket: main + """)) + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + assert load_config().log_format == "json" + + +class TestMultiOrgGitHub: + """Tests for multiple GitHub provider instances (different orgs/repos).""" + + def test_two_github_providers_different_orgs(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN_ACME", "ghp_acme") + monkeypatch.setenv("GITHUB_TOKEN_SKW", "ghp_skw") + path = _write_config(tmp_path, """ + providers: + github-acme: + type: github + enabled: true + auth_method: pat + owner: acme-org + repo: backend + token_env: GITHUB_TOKEN_ACME + github-skunkworks: + type: github + enabled: true + auth_method: pat + owner: skunkworks-org + repo: platform + token_env: GITHUB_TOKEN_SKW + rules: + - pattern: "feature/.*" + destinations: + github-acme: main + github-skunkworks: develop + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + + assert set(cfg.providers.keys()) == {"github-acme", "github-skunkworks"} + acme = cfg.providers["github-acme"] + assert acme.type == "github" + assert acme.owner == "acme-org" + assert acme.repo == "backend" + assert acme.token == "ghp_acme" + skw = cfg.providers["github-skunkworks"] + assert skw.type == "github" + assert skw.owner == "skunkworks-org" + assert skw.token == "ghp_skw" + assert cfg.rules[0].destinations == { + "github-acme": "main", + "github-skunkworks": "develop", + } + + def test_named_provider_defaults_type_from_key(self, tmp_path, monkeypatch): + """Key 'github' without explicit type should still work (backward compat).""" + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "1" + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].type == "github" + + def test_named_provider_unknown_type_raises(self, tmp_path, monkeypatch): + """A named provider with an unrecognised type should raise ValueError.""" + path = _write_config(tmp_path, """ + providers: + my-provider: + type: gitlab + enabled: true + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + my-provider: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="unknown or missing type"): + load_config() + + def test_named_provider_missing_type_raises(self, tmp_path, monkeypatch): + """A non-standard provider key without 'type' should raise ValueError.""" + path = _write_config(tmp_path, """ + providers: + my-github-instance: + enabled: true + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + my-github-instance: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="unknown or missing type"): + load_config() + + def test_mixed_github_and_bitbucket_named_providers(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN_ORG", "ghp_org") + monkeypatch.setenv("BB_TOKEN", "bb_tok") + path = _write_config(tmp_path, """ + providers: + github-myorg: + type: github + enabled: true + auth_method: pat + owner: my-org + repo: app + token_env: GITHUB_TOKEN_ORG + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BB_TOKEN + rules: + - pattern: "feature/.*" + destinations: + github-myorg: main + bitbucket: develop + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github-myorg"].type == "github" + assert cfg.providers["bitbucket"].type == "bitbucket" + + +class TestConfigValidationEdgeCases: + """Cover validation branches not exercised by the main test classes.""" + + def test_no_enabled_providers_loads_idle(self, tmp_path, monkeypatch): + """All providers disabled → app loads successfully in idle mode (no ValueError).""" + path = _write_config(tmp_path, """ + providers: + github: + enabled: false + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + config = load_config() + assert config.providers == {} + + def test_non_dict_provider_entry_skipped(self, tmp_path, monkeypatch): + """A provider entry that isn't a dict is silently skipped.""" + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bad_entry: "not-a-dict" + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: ".*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert "bitbucket" in cfg.providers + assert "bad_entry" not in cfg.providers + + def test_disabled_provider_not_loaded(self, tmp_path, monkeypatch): + """A provider with enabled: false is excluded from the result.""" + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + github: + enabled: false + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert "github" not in cfg.providers + + def test_github_missing_owner_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + repo: repo + app_id: "1" + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="'owner' and 'repo' are required"): + load_config() + + def test_github_missing_app_id_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="'app_id' is required"): + load_config() + + def test_github_pat_missing_token_raises(self, tmp_path, monkeypatch): + monkeypatch.delenv("MISSING_GH_PAT", raising=False) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + auth_method: pat + owner: org + repo: repo + token_env: MISSING_GH_PAT + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="MISSING_GH_PAT"): + load_config() + + def test_bitbucket_missing_workspace_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: ".*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="'workspace' and 'repo_slug' are required"): + load_config() + + def test_private_key_loaded_from_file(self, tmp_path, monkeypatch): + """private_key_path pointing to an existing file loads the key from disk.""" + key_file = tmp_path / "app.pem" + key_file.write_text(_FAKE_PEM) + monkeypatch.delenv("GITHUB_APP_PRIVATE_KEY", raising=False) + path = _write_config(tmp_path, f""" + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "1" + private_key_path: {key_file} + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].private_key == _FAKE_PEM + + def test_private_key_base64_decoded_from_env(self, tmp_path, monkeypatch): + """GITHUB_APP_PRIVATE_KEY as base64 is decoded automatically.""" + import base64 + encoded = base64.b64encode(_FAKE_PEM.encode()).decode() + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", encoded) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "1" + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].private_key == _FAKE_PEM + + def test_rule_with_empty_pattern_skipped(self, tmp_path, monkeypatch): + """A rule with no pattern is silently skipped.""" + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: "" + destinations: + bitbucket: main + - pattern: "feature/.*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "feature/.*" + + def test_rule_with_no_destinations_skipped(self, tmp_path, monkeypatch): + """A rule with empty destinations is silently skipped.""" + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: "feature/.*" + destinations: {} + - pattern: "release/.*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "release/.*" + + +class TestNullYamlValues: + """Regression tests for null/empty YAML values that previously caused AttributeError.""" + + def _base_config(self, tmp_path, monkeypatch, content: str) -> str: + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, content) + monkeypatch.setenv("CONFIG_PATH", path) + return path + + def test_empty_yaml_file_raises(self, tmp_path, monkeypatch): + """An empty YAML file must raise ValueError, not AttributeError.""" + path = tmp_path / "config.yaml" + path.write_text("") + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + with pytest.raises(ValueError): + load_config() + + def test_null_providers_section_loads_idle(self, tmp_path, monkeypatch): + """providers: with no value (null) loads successfully in idle mode.""" + self._base_config(tmp_path, monkeypatch, """ + providers: + rules: + - pattern: "feature/.*" + destinations: + bitbucket: main + """) + from pr_generator.config import load_config + config = load_config() + assert config.providers == {} + + def test_null_rules_section_raises(self, tmp_path, monkeypatch): + """rules: with no value (null) must raise ValueError, not AttributeError.""" + self._base_config(tmp_path, monkeypatch, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + """) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="no rules"): + load_config() + + def test_null_destinations_in_rule_skipped(self, tmp_path, monkeypatch): + """destinations: with no value (null) must be treated as empty and skipped.""" + self._base_config(tmp_path, monkeypatch, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: "feature/.*" + destinations: + - pattern: "release/.*" + destinations: + bitbucket: main + """) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "release/.*" + + def test_null_rule_item_skipped(self, tmp_path, monkeypatch): + """A null entry in the rules list must be skipped, not crash.""" + self._base_config(tmp_path, monkeypatch, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - + - pattern: "release/.*" + destinations: + bitbucket: main + """) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "release/.*" + + def test_config_file_not_found_raises(self, monkeypatch): + """Missing config file must raise FileNotFoundError.""" + monkeypatch.setenv("CONFIG_PATH", "/nonexistent/path/config.yaml") + from pr_generator.config import load_config + with pytest.raises(FileNotFoundError): + load_config() + diff --git a/tests/test_health.py b/tests/test_health.py new file mode 100644 index 0000000..35fccd1 --- /dev/null +++ b/tests/test_health.py @@ -0,0 +1,58 @@ +"""Tests for the health server.""" + +import time +import urllib.request +from threading import Event + +import pytest + +from pr_generator.health import start_health_server + +_PORT = 18081 + + +@pytest.fixture(scope="module") +def health_server(): + stop = Event() + server, ready = start_health_server(_PORT, stop) + time.sleep(0.1) + yield stop, ready + stop.set() + server.shutdown() + + +def _get(path: str) -> int: + try: + resp = urllib.request.urlopen(f"http://127.0.0.1:{_PORT}{path}", timeout=2) + return resp.status + except urllib.error.HTTPError as exc: + return exc.code + + +class TestHealthServer: + def test_livez_returns_200(self, health_server): + stop, _ready = health_server + assert _get("/livez") == 200 + + def test_healthz_alias(self, health_server): + stop, _ready = health_server + assert _get("/healthz") == 200 + + def test_readyz_returns_503_before_ready(self, health_server): + stop, ready = health_server + ready.clear() + assert _get("/readyz") == 503 + + def test_readyz_returns_200_after_ready(self, health_server): + stop, ready = health_server + ready.set() + assert _get("/readyz") == 200 + + def test_livez_returns_503_when_stopping(self, health_server): + stop, _ready = health_server + stop.set() + assert _get("/livez") == 503 + stop.clear() # reset for other tests + + def test_unknown_path_returns_404(self, health_server): + assert _get("/unknown") == 404 diff --git a/tests/test_http_client.py b/tests/test_http_client.py new file mode 100644 index 0000000..0ac7115 --- /dev/null +++ b/tests/test_http_client.py @@ -0,0 +1,217 @@ +"""Tests for the shared HTTP client with retry/backoff logic.""" + +from __future__ import annotations + +import logging +from unittest.mock import MagicMock, call, patch + +import pytest +import requests + +from pr_generator.http_client import request_with_retry + + +class _TestError(Exception): + """Stub provider exception used in tests.""" + + def __init__(self, message: str, status_code: int | None = None) -> None: + super().__init__(message) + self.status_code = status_code + + +_logger = logging.getLogger("test_http_client") + + +def _make_response(status_code: int, json_data=None, text: str = ""): + resp = MagicMock() + resp.status_code = status_code + resp.text = text + resp.json.return_value = json_data or {} + return resp + + +class TestRequestWithRetrySuccess: + def test_returns_response_on_200(self): + with patch("requests.request", return_value=_make_response(200, {"ok": True})) as mock_req: + resp = request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com/api", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers={"Authorization": "Bearer tok"}, + ) + assert resp.json() == {"ok": True} + mock_req.assert_called_once() + + def test_uses_headers_factory_per_attempt(self): + call_count = 0 + + def factory(): + nonlocal call_count + call_count += 1 + return {"X-Attempt": str(call_count)} + + with patch("requests.request", return_value=_make_response(200)): + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers_factory=factory, + ) + assert call_count == 1 + + def test_raises_if_neither_headers_nor_factory(self): + with pytest.raises(ValueError, match="Provide either headers"): + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + ) + + +class TestRequestWithRetryHttpErrors: + def test_raises_provider_exception_on_4xx(self): + with patch("requests.request", return_value=_make_response(404, text="not found")): + with pytest.raises(_TestError) as exc_info: + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers={}, + ) + assert exc_info.value.status_code == 404 + assert "404" in str(exc_info.value) + + def test_exception_carries_status_code(self): + with patch("requests.request", return_value=_make_response(422, text="unprocessable")): + with pytest.raises(_TestError) as exc_info: + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers={}, + ) + assert exc_info.value.status_code == 422 + + def test_retries_on_500_then_succeeds(self): + responses = [_make_response(500), _make_response(200, {"ok": True})] + with patch("requests.request", side_effect=responses): + with patch("time.sleep"): # skip actual backoff delays + resp = request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: s is not None and s >= 500, + headers={}, + ) + assert resp.json() == {"ok": True} + + def test_raises_after_exhausting_all_retries(self): + """All 4 attempts return 503 → should raise with the last error.""" + with patch("requests.request", return_value=_make_response(503)): + with patch("time.sleep"): + with pytest.raises(_TestError) as exc_info: + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: s is not None and s >= 500, + headers={}, + ) + assert exc_info.value.status_code == 503 + + def test_no_retry_on_4xx(self): + """4xx errors should NOT be retried — only one HTTP call made.""" + with patch("requests.request", return_value=_make_response(400)) as mock_req: + with pytest.raises(_TestError): + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: s is not None and s >= 500, + headers={}, + ) + assert mock_req.call_count == 1 + + +class TestRequestWithRetryNetworkErrors: + def test_raises_on_network_exception(self): + with patch("requests.request", side_effect=requests.ConnectionError("refused")): + with pytest.raises(_TestError) as exc_info: + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers={}, + ) + assert exc_info.value.status_code is None + assert "Request failure" in str(exc_info.value) + + def test_retries_on_network_exception_then_succeeds(self): + responses = [ + requests.ConnectionError("refused"), + _make_response(200, {"ok": True}), + ] + with patch("requests.request", side_effect=responses): + with patch("time.sleep"): + resp = request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: e is not None, + headers={}, + ) + assert resp.json() == {"ok": True} + + def test_backoff_delays_are_applied(self): + """All 4 attempts fail → sleep called 3 times with backoff delays.""" + with patch("requests.request", side_effect=requests.ConnectionError("x")): + with patch("time.sleep") as mock_sleep: + with pytest.raises(_TestError): + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: True, + headers={}, + ) + assert mock_sleep.call_count == 3 + assert mock_sleep.call_args_list == [call(0.5), call(1), call(2)] diff --git a/tests/test_logging_config.py b/tests/test_logging_config.py new file mode 100644 index 0000000..6241e68 --- /dev/null +++ b/tests/test_logging_config.py @@ -0,0 +1,75 @@ +"""Tests for logging setup.""" + +import json +import logging + +from pr_generator.logging_config import setup_logging + + +class TestSetupLogging: + def test_text_format_sets_level(self): + setup_logging("DEBUG") + assert logging.getLogger().level == logging.DEBUG + + def test_info_level(self): + setup_logging("INFO") + assert logging.getLogger().level == logging.INFO + + def test_invalid_level_falls_back_to_info(self): + setup_logging("NOTAREAL") + assert logging.getLogger().level == logging.INFO + + def test_text_format_is_plain_formatter(self): + setup_logging("INFO", json_format=False) + root = logging.getLogger() + assert len(root.handlers) == 1 + assert not isinstance(root.handlers[0].formatter, logging.Formatter.__class__) + + def test_json_format_emits_valid_json(self): + setup_logging("INFO", json_format=True) + root = logging.getLogger() + formatter = root.handlers[0].formatter + record = logging.LogRecord( + name="test", level=logging.INFO, pathname="", lineno=0, + msg="hello %s", args=("world",), exc_info=None, + ) + output = formatter.format(record) + parsed = json.loads(output) + assert parsed["message"] == "hello world" + assert parsed["level"] == "INFO" + assert "timestamp" in parsed + + def test_json_format_includes_exception(self): + setup_logging("INFO", json_format=True) + formatter = logging.getLogger().handlers[0].formatter + try: + raise ValueError("boom") + except ValueError: + import sys + exc_info = sys.exc_info() + record = logging.LogRecord( + name="test", level=logging.ERROR, pathname="", lineno=0, + msg="err", args=(), exc_info=exc_info, + ) + output = json.loads(formatter.format(record)) + assert "exception" in output + assert "ValueError" in output["exception"] + + def test_replaces_existing_handlers(self): + root = logging.getLogger() + root.addHandler(logging.NullHandler()) + initial_count = len(root.handlers) + setup_logging("INFO") + assert len(root.handlers) == 1 + + def test_json_format_includes_stack_info(self): + setup_logging("INFO", json_format=True) + formatter = logging.getLogger().handlers[0].formatter + record = logging.LogRecord( + name="test", level=logging.WARNING, pathname="", lineno=0, + msg="with stack", args=(), exc_info=None, + ) + record.stack_info = "Stack (most recent call last):\n File 'x.py', line 1" + output = json.loads(formatter.format(record)) + assert "stack_info" in output + assert "most recent" in output["stack_info"] diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..44f0be5 --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,50 @@ +"""Tests for models and data structures.""" + +import re +import pytest +from pr_generator.models import ( + AppConfig, CycleResult, ProviderConfig, RuleResult, ScanRule, +) + + +def test_provider_config_is_immutable(): + cfg = ProviderConfig(name="github", enabled=True) + with pytest.raises(Exception): + cfg.name = "bitbucket" # type: ignore[misc] + + +def test_app_config_is_immutable(): + cfg = AppConfig( + scan_frequency=300, + log_level="INFO", + log_format="text", + dry_run=False, + health_port=8080, + providers={}, + rules=[], + ) + with pytest.raises(Exception): + cfg.dry_run = True # type: ignore[misc] + + +def test_scan_rule_destinations_default_empty(): + rule = ScanRule(pattern=".*", compiled=re.compile(".*")) + assert rule.destinations == {} + + +def test_rule_result_defaults(): + r = RuleResult(rule_pattern="x", provider="github", destination="main") + assert r.processed == 0 + assert r.created == 0 + assert r.skipped_existing == 0 + assert r.simulated == 0 + assert r.errors == 0 + + +def test_cycle_result_aggregation(): + r1 = RuleResult("p1", "github", "main", processed=3, created=1, skipped_existing=2) + r2 = RuleResult("p2", "bitbucket", "nonpro", processed=5, created=3, errors=1) + cycle = CycleResult(cycle_id=1, rule_results=[r1, r2]) + assert sum(r.processed for r in cycle.rule_results) == 8 + assert sum(r.created for r in cycle.rule_results) == 4 + assert sum(r.errors for r in cycle.rule_results) == 1 diff --git a/tests/test_providers.py b/tests/test_providers.py new file mode 100644 index 0000000..294526e --- /dev/null +++ b/tests/test_providers.py @@ -0,0 +1,411 @@ +"""Unit tests for GitHub and Bitbucket provider implementations.""" + +from __future__ import annotations + +import time +from unittest.mock import MagicMock, patch + +import pytest + +from pr_generator.providers.bitbucket import BitbucketError, BitbucketProvider +from pr_generator.providers.github import GitHubError, GitHubProvider + + +# ────────────────────────────────────────────────────────── +# Helpers +# ────────────────────────────────────────────────────────── + +def _mock_response(status_code: int = 200, json_data=None, text: str = ""): + resp = MagicMock() + resp.status_code = status_code + resp.text = text + resp.json.return_value = json_data if json_data is not None else {} + return resp + + +# ────────────────────────────────────────────────────────── +# GitHub App auth — token caching and JWT logic +# ────────────────────────────────────────────────────────── + +class TestGitHubAppAuth: + """Tests for GitHub App JWT and installation token caching.""" + + @pytest.fixture + def provider(self, github_app_config): + return GitHubProvider(github_app_config) + + def test_get_jwt_raises_without_credentials(self, provider): + provider._app_id = "" + with pytest.raises(RuntimeError, match="Missing GITHUB_APP_ID"): + provider._new_jwt() + + def test_get_jwt_cached_within_window(self, provider): + provider._jwt_cache = "cached-jwt" + provider._jwt_exp = time.time() + 300 # well within expiry + + with patch.object(provider, "_new_jwt") as mock_new_jwt: + result = provider._get_jwt() + + mock_new_jwt.assert_not_called() + assert result == "cached-jwt" + + def test_get_jwt_refreshed_when_expired(self, provider): + provider._jwt_cache = "old-jwt" + provider._jwt_exp = time.time() - 1 # already expired + + with patch.object(provider, "_new_jwt", return_value="new-jwt"): + result = provider._get_jwt() + + assert result == "new-jwt" + + def test_get_installation_token_cached(self, provider): + provider._install_token = "cached-token" + provider._install_token_exp = time.time() + 300 + + with patch.object(provider, "_request") as mock_req: + result = provider._get_installation_token() + + mock_req.assert_not_called() + assert result == "cached-token" + + def test_get_installation_token_fetched_when_missing(self, provider): + install_resp = _mock_response(201, { + "token": "ghs_fresh_token", + "expires_at": "2099-01-01T00:00:00Z", + }) + with patch.object(provider, "_request", return_value=install_resp): + result = provider._get_installation_token() + + assert result == "ghs_fresh_token" + assert provider._install_token == "ghs_fresh_token" + + def test_get_installation_token_uses_55min_default_on_bad_expiry(self, provider): + install_resp = _mock_response(201, {"token": "ghs_tok", "expires_at": "not-a-date"}) + before = time.time() + with patch.object(provider, "_request", return_value=install_resp): + provider._get_installation_token() + after = time.time() + + # 55 min default: expiry should be ~3300 seconds from now + assert 3290 < provider._install_token_exp - before < 3310 + (after - before) + + def test_resolve_installation_id_uses_config_value(self, provider): + """When installation_id is provided in config, no API call is made.""" + assert provider._installation_id == "67890" + with patch.object(provider, "_request") as mock_req: + result = provider._resolve_installation_id() + mock_req.assert_not_called() + assert result == "67890" + + def test_resolve_installation_id_fetches_and_caches_when_missing(self, provider): + """When installation_id is absent, it is fetched from the API and cached.""" + provider._installation_id = "" + api_resp = _mock_response(200, {"id": 99999}) + + with patch.object(provider, "_request", return_value=api_resp) as mock_req: + result1 = provider._resolve_installation_id() + # Second call should use cached value — no extra API call + result2 = provider._resolve_installation_id() + + assert result1 == "99999" + assert result2 == "99999" + assert provider._installation_id == "99999" # cached on instance + assert mock_req.call_count == 1 # only one API call total + + def test_resolve_installation_id_raises_when_api_returns_no_id(self, provider): + provider._installation_id = "" + with patch.object(provider, "_request", return_value=_mock_response(200, {})): + with pytest.raises(RuntimeError, match="Could not resolve installation id"): + provider._resolve_installation_id() + + def test_headers_use_installation_token_for_app_auth(self, provider): + with patch.object(provider, "_get_installation_token", return_value="ghs_tok"): + hdrs = provider._headers(installation=True) + assert hdrs["Authorization"] == "Bearer ghs_tok" + + def test_headers_use_jwt_for_non_installation_calls(self, provider): + with patch.object(provider, "_get_jwt", return_value="jwt.token.here"): + hdrs = provider._headers(installation=False) + assert hdrs["Authorization"] == "Bearer jwt.token.here" + + def test_new_jwt_generates_token_with_valid_credentials(self, provider): + with patch("pr_generator.providers.github.jwt.encode", return_value="signed.jwt") as mock_enc: + result = provider._new_jwt() + assert result == "signed.jwt" + call_payload = mock_enc.call_args[0][0] + assert call_payload["iss"] == "12345" + assert "iat" in call_payload and "exp" in call_payload + + def test_get_branches_returns_empty_when_app_config_incomplete(self): + from pr_generator.models import ProviderConfig + cfg = ProviderConfig( + name="github", type="github", enabled=True, + auth_method="app", owner="org", repo="repo", + app_id="", private_key="", # missing credentials + ) + prov = GitHubProvider(cfg) + assert prov.get_branches() == [] + + +# ────────────────────────────────────────────────────────── +# GitHub PAT provider (simpler — no token caching) +# ────────────────────────────────────────────────────────── + +class TestGitHubProviderPAT: + """Tests for GitHub provider using PAT authentication.""" + + @pytest.fixture + def provider(self, github_pat_config): + return GitHubProvider(github_pat_config) + + def test_name_matches_config(self, provider): + assert provider.name == "github" + + def test_get_branches_single_page(self, provider): + page_data = [{"name": "main"}, {"name": "feature/x"}] + with patch.object(provider, "_request", return_value=_mock_response(200, page_data)): + branches = provider.get_branches() + assert branches == ["main", "feature/x"] + + def test_get_branches_empty(self, provider): + with patch.object(provider, "_request", return_value=_mock_response(200, [])): + branches = provider.get_branches() + assert branches == [] + + def test_get_branches_multi_page(self, provider): + """Two pages: first returns 100 items (triggers next page), second returns 2.""" + page1 = [{"name": f"branch-{i}"} for i in range(100)] + page2 = [{"name": "extra-1"}, {"name": "extra-2"}] + responses = iter([_mock_response(200, page1), _mock_response(200, page2)]) + with patch.object(provider, "_request", side_effect=lambda *a, **kw: next(responses)): + branches = provider.get_branches() + assert len(branches) == 102 + + def test_check_existing_pr_found(self, provider): + pr_list = [{"number": 1, "title": "Merge feature/x into main"}] + with patch.object(provider, "_request", return_value=_mock_response(200, pr_list)): + assert provider.check_existing_pr("feature/x", "main") is True + + def test_check_existing_pr_not_found(self, provider): + with patch.object(provider, "_request", return_value=_mock_response(200, [])): + assert provider.check_existing_pr("feature/x", "main") is False + + def test_check_existing_pr_uses_cache(self, provider): + """Second call with same args should not make an HTTP request.""" + with patch.object(provider, "_request", return_value=_mock_response(200, [])) as mock_req: + provider.check_existing_pr("feature/x", "main") + provider.check_existing_pr("feature/x", "main") + assert mock_req.call_count == 1 + + def test_reset_cycle_cache_clears_pr_cache(self, provider): + with patch.object(provider, "_request", return_value=_mock_response(200, [])) as mock_req: + provider.check_existing_pr("feature/x", "main") + provider.reset_cycle_cache() + provider.check_existing_pr("feature/x", "main") + assert mock_req.call_count == 2 + + def test_create_pull_request_success(self, provider): + pr_resp = {"number": 42, "title": "Merge feature/x into main"} + with patch.object(provider, "_branch_exists", return_value=True), \ + patch.object(provider, "_request", return_value=_mock_response(201, pr_resp)): + provider.create_pull_request("feature/x", "main") + assert provider._pr_cache[("feature/x", "main")] is True + + def test_create_pull_request_skips_missing_branch(self, provider): + with patch.object(provider, "_branch_exists", return_value=False), \ + patch.object(provider, "_request") as mock_req: + provider.create_pull_request("feature/gone", "main") + mock_req.assert_not_called() + + def test_branch_exists_returns_true(self, provider): + with patch.object(provider, "_request", return_value=_mock_response(200, {"name": "feature/x"})): + assert provider._branch_exists("feature/x") is True + + def test_branch_exists_returns_false_on_404(self, provider): + err = GitHubError("GitHub API error 404: not found", status_code=404) + with patch.object(provider, "_request", side_effect=err): + assert provider._branch_exists("feature/gone") is False + + def test_branch_exists_reraises_non_404(self, provider): + err = GitHubError("GitHub API error 500: server error", status_code=500) + with patch.object(provider, "_request", side_effect=err): + with pytest.raises(GitHubError): + provider._branch_exists("feature/x") + + def test_headers_use_pat(self, provider): + hdrs = provider._headers() + assert hdrs["Authorization"] == "token ghp_testtoken123" + + def test_get_branches_returns_empty_when_config_incomplete(self, github_pat_config): + from pr_generator.models import ProviderConfig + cfg = ProviderConfig( + name="github", type="github", enabled=True, + auth_method="pat", owner="", repo="", token="", + ) + prov = GitHubProvider(cfg) + assert prov.get_branches() == [] + + def test_should_retry_true_on_exception(self, provider): + assert provider._should_retry(None, RuntimeError("conn error")) is True + + def test_should_retry_true_on_5xx(self, provider): + assert provider._should_retry(503, None) is True + + def test_should_retry_true_on_429(self, provider): + assert provider._should_retry(429, None) is True + + def test_should_retry_false_on_4xx(self, provider): + assert provider._should_retry(404, None) is False + + def test_branch_exists_uses_cache(self, provider): + provider._branch_cache["feature/cached"] = True + with patch.object(provider, "_request") as mock_req: + result = provider._branch_exists("feature/cached") + mock_req.assert_not_called() + assert result is True + + def test_request_delegates_to_retry_client(self, provider): + """_request must call request_with_retry (exercises the method body).""" + with patch("pr_generator.providers.github.request_with_retry", return_value=_mock_response(200)) as mock_retry: + provider._request("GET", "https://api.github.com/repos/org/repo/branches") + mock_retry.assert_called_once() + call_kw = mock_retry.call_args.kwargs + assert call_kw["method"] == "GET" + assert call_kw["exception_cls"] is GitHubError + + +# ────────────────────────────────────────────────────────── +# Bitbucket provider +# ────────────────────────────────────────────────────────── + +class TestBitbucketProvider: + """Tests for Bitbucket Cloud provider.""" + + @pytest.fixture + def provider(self, bitbucket_provider_config): + return BitbucketProvider(bitbucket_provider_config) + + def test_name_matches_config(self, provider): + assert provider.name == "bitbucket" + + def test_get_branches_single_page(self, provider): + data = {"values": [{"name": "main"}, {"name": "feature/y"}]} + with patch.object(provider, "_request", return_value=_mock_response(200, data)): + branches = provider.get_branches() + assert branches == ["main", "feature/y"] + + def test_get_branches_multi_page(self, provider): + """Uses 'next' key to determine pagination.""" + page1 = {"values": [{"name": "a"}, {"name": "b"}], "next": "http://page2"} + page2 = {"values": [{"name": "c"}]} + responses = iter([_mock_response(200, page1), _mock_response(200, page2)]) + with patch.object(provider, "_request", side_effect=lambda *a, **kw: next(responses)): + branches = provider.get_branches() + assert branches == ["a", "b", "c"] + + def test_get_branches_missing_token_returns_empty(self, bitbucket_provider_config): + from pr_generator.models import ProviderConfig + cfg = ProviderConfig( + name="bitbucket", type="bitbucket", enabled=True, + workspace="ws", repo_slug="rs", token="", + ) + prov = BitbucketProvider(cfg) + assert prov.get_branches() == [] + + def test_check_existing_pr_found(self, provider): + data = {"values": [{"id": 1}]} + with patch.object(provider, "_request", return_value=_mock_response(200, data)): + assert provider.check_existing_pr("feature/y", "main") is True + + def test_check_existing_pr_not_found(self, provider): + data = {"values": []} + with patch.object(provider, "_request", return_value=_mock_response(200, data)): + assert provider.check_existing_pr("feature/y", "main") is False + + def test_check_existing_pr_uses_query_filter(self, provider): + """Verify the q param is sent (efficient single-request lookup).""" + data = {"values": []} + with patch.object(provider, "_request", return_value=_mock_response(200, data)) as mock_req: + provider.check_existing_pr("feature/y", "main") + call_kwargs = mock_req.call_args + params = call_kwargs.kwargs.get("params", {}) + assert "q" in params + assert 'source.branch.name="feature/y"' in params["q"] + assert 'destination.branch.name="main"' in params["q"] + assert params.get("pagelen") == 1 + + def test_check_existing_pr_uses_cache(self, provider): + data = {"values": []} + with patch.object(provider, "_request", return_value=_mock_response(200, data)) as mock_req: + provider.check_existing_pr("feature/y", "main") + provider.check_existing_pr("feature/y", "main") + assert mock_req.call_count == 1 + + def test_reset_cycle_cache_clears_pr_cache(self, provider): + data = {"values": []} + with patch.object(provider, "_request", return_value=_mock_response(200, data)) as mock_req: + provider.check_existing_pr("feature/y", "main") + provider.reset_cycle_cache() + provider.check_existing_pr("feature/y", "main") + assert mock_req.call_count == 2 + + def test_create_pull_request_success(self, provider): + reviewers_data = {"values": [{"uuid": "{abc-123}"}]} + pr_data = {"id": 10, "title": "Merge feature/y into main"} + responses = iter([ + _mock_response(200, reviewers_data), + _mock_response(201, pr_data), + ]) + with patch.object(provider, "_request", side_effect=lambda *a, **kw: next(responses)): + provider.create_pull_request("feature/y", "main") + assert provider._pr_cache[("feature/y", "main")] is True + + def test_create_pull_request_includes_close_source_branch(self, provider): + """close_source_branch from config must appear in the POST payload.""" + reviewers_data = {"values": []} + pr_data = {"id": 11} + responses = iter([_mock_response(200, reviewers_data), _mock_response(201, pr_data)]) + with patch.object(provider, "_request", side_effect=lambda *a, **kw: next(responses)) as mock_req: + provider.create_pull_request("feature/y", "main") + pr_call = list(mock_req.call_args_list)[-1] + payload = pr_call.kwargs.get("json", {}) + assert "close_source_branch" in payload + assert payload["close_source_branch"] is True + + def test_bitbucket_error_carries_status_code(self): + """BitbucketError.__init__ must store the status_code attribute.""" + err = BitbucketError("boom", status_code=503) + assert str(err) == "boom" + assert err.status_code == 503 + + def test_bitbucket_error_defaults_status_code_to_none(self): + err = BitbucketError("network error") + assert err.status_code is None + + def test_should_retry_returns_true_on_exception(self, provider): + assert provider._should_retry(None, exc=ValueError("timeout")) is True + + def test_should_retry_returns_true_on_5xx(self, provider): + assert provider._should_retry(503, exc=None) is True + + def test_should_retry_returns_false_on_4xx(self, provider): + assert provider._should_retry(404, exc=None) is False + + def test_get_default_reviewers_returns_empty_on_missing_config(self): + from pr_generator.models import ProviderConfig + cfg = ProviderConfig( + name="bitbucket", type="bitbucket", enabled=True, + token="", workspace="", repo_slug="", + ) + prov = BitbucketProvider(cfg) + result = prov._get_default_reviewers() + assert result == [] + + def test_request_delegates_to_retry_client(self, provider): + """_request must call request_with_retry (exercises the method body).""" + with patch("pr_generator.providers.bitbucket.request_with_retry", return_value=_mock_response(200)) as mock_retry: + provider._request("GET", "https://api.bitbucket.org/2.0/repos/ws/r") + mock_retry.assert_called_once() + call_kw = mock_retry.call_args.kwargs + assert call_kw["method"] == "GET" + assert call_kw["exception_cls"] is BitbucketError diff --git a/tests/test_scanner.py b/tests/test_scanner.py new file mode 100644 index 0000000..1a2ebcd --- /dev/null +++ b/tests/test_scanner.py @@ -0,0 +1,182 @@ +"""Tests for the scan cycle orchestrator.""" + +import re +from unittest.mock import MagicMock, patch + +import pytest + +from pr_generator.models import AppConfig, CycleResult, ProviderConfig, ScanRule +from pr_generator.scanner import scan_cycle + + +def _make_config(rules, providers, dry_run=False): + return AppConfig( + scan_frequency=60, + log_level="DEBUG", + log_format="text", + dry_run=dry_run, + health_port=8080, + providers=providers, + rules=rules, + ) + + +def _mock_provider(name: str, branches: list[str], existing_prs: set | None = None): + prov = MagicMock() + prov.name = name + prov.get_branches.return_value = branches + prov.check_existing_pr.side_effect = lambda src, dst: (src, dst) in (existing_prs or set()) + prov.create_pull_request.return_value = None + prov.reset_cycle_cache.return_value = None + return prov + + +class TestScanCycle: + def test_creates_prs_for_matched_branches(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["feature/a", "feature/b", "hotfix/c", "main"]) + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + assert prov.create_pull_request.call_count == 2 + prov.create_pull_request.assert_any_call("feature/a", "main") + prov.create_pull_request.assert_any_call("feature/b", "main") + assert result.rule_results[0].created == 2 + assert result.rule_results[0].processed == 2 + + def test_skips_existing_prs(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["feature/a"], existing_prs={("feature/a", "main")}) + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + prov.create_pull_request.assert_not_called() + assert result.rule_results[0].skipped_existing == 1 + + def test_dry_run_does_not_create_prs(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["feature/a"]) + config = _make_config([rule], {"github": MagicMock()}, dry_run=True) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + prov.create_pull_request.assert_not_called() + assert result.rule_results[0].simulated == 1 + + def test_destination_branch_excluded_from_matches(self): + rule = ScanRule( + pattern=".*", + compiled=re.compile(".*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["main", "feature/a"]) + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + # "main" must be excluded; only "feature/a" should be processed + assert result.rule_results[0].processed == 1 + + def test_multiple_rules_processed(self): + rule1 = ScanRule( + pattern="nonpro/.*", + compiled=re.compile("nonpro/.*"), + destinations={"github": "develop"}, + ) + rule2 = ScanRule( + pattern="pro/.*", + compiled=re.compile("pro/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["nonpro/svc1", "pro/svc2", "unrelated"]) + config = _make_config([rule1, rule2], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + assert len(result.rule_results) == 2 + total_created = sum(r.created for r in result.rule_results) + assert total_created == 2 + + def test_provider_error_does_not_abort_other_rules(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["feature/a"]) + prov.create_pull_request.side_effect = RuntimeError("API down") + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + assert result.rule_results[0].errors == 1 + + def test_unknown_provider_in_rule_is_skipped(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"unknown_provider": "main"}, + ) + prov = _mock_provider("github", ["feature/a"]) + config = _make_config([rule], {"github": MagicMock()}) + + # Should not raise; the rule simply has no matching active provider + result = scan_cycle(config, {"github": prov}, cycle_id=1) + assert result.rule_results == [] + + def test_reset_cycle_cache_called_on_all_providers(self): + rule = ScanRule(".*", re.compile(".*"), destinations={"github": "main"}) + prov = _mock_provider("github", []) + config = _make_config([rule], {"github": MagicMock()}) + + scan_cycle(config, {"github": prov}, cycle_id=1) + + prov.reset_cycle_cache.assert_called_once() + + def test_get_branches_error_returns_empty_branch_list(self): + """If get_branches raises, the provider gets an empty branch list (no crash).""" + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", []) + prov.get_branches.side_effect = RuntimeError("API down") + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + # No branches → no PRs created, but no exception raised either + assert result.rule_results[0].processed == 0 + + def test_process_rule_unexpected_exception_logged(self): + """An exception raised outside _process_rule's inner try is caught by the futures loop.""" + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + # Make compiled.match raise — this happens outside the inner try in _process_rule + rule.compiled = MagicMock() + rule.compiled.match.side_effect = ValueError("regex engine failure") + prov = _mock_provider("github", ["feature/a"]) + config = _make_config([rule], {"github": MagicMock()}) + + # scan_cycle should catch the exception and not propagate it + result = scan_cycle(config, {"github": prov}, cycle_id=1) + assert result is not None + assert result.rule_results == []