From 1955d663a008bbff73f70a1e298a8c64739d507e Mon Sep 17 00:00:00 2001 From: Adrian Martin Garcia Date: Wed, 25 Mar 2026 15:49:49 +0100 Subject: [PATCH 1/2] feat: upload code --- .dockerignore | 17 + .github/copilot-instructions.md | 99 ++++ .github/dependabot.yml | 62 ++ .github/workflows/docker-build.yml | 155 +++++ .github/workflows/github-auto-assign.yml | 19 + .gitignore | 26 + CHANGELOG.md | 66 +++ Dockerfile | 40 ++ LICENSE | 21 + README.md | 334 ++++++++++- config.yaml.example | 140 +++++ package.json | 21 + pyproject.toml | 34 ++ requirements.txt | 4 + src/pr_generator/__init__.py | 3 + src/pr_generator/__main__.py | 109 ++++ src/pr_generator/config.py | 234 ++++++++ src/pr_generator/health.py | 70 +++ src/pr_generator/http_client.py | 86 +++ src/pr_generator/logging_config.py | 43 ++ src/pr_generator/models.py | 75 +++ src/pr_generator/providers/__init__.py | 0 src/pr_generator/providers/base.py | 37 ++ src/pr_generator/providers/bitbucket.py | 164 ++++++ src/pr_generator/providers/github.py | 270 +++++++++ src/pr_generator/scanner.py | 154 +++++ tests/__init__.py | 0 tests/conftest.py | 49 ++ tests/test_config.py | 716 +++++++++++++++++++++++ tests/test_health.py | 58 ++ tests/test_http_client.py | 217 +++++++ tests/test_logging_config.py | 75 +++ tests/test_models.py | 50 ++ tests/test_providers.py | 411 +++++++++++++ tests/test_scanner.py | 182 ++++++ 35 files changed, 4040 insertions(+), 1 deletion(-) create mode 100644 .dockerignore create mode 100644 .github/copilot-instructions.md create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/docker-build.yml create mode 100644 .github/workflows/github-auto-assign.yml create mode 100644 .gitignore create mode 100644 CHANGELOG.md create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 config.yaml.example create mode 100644 package.json create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 src/pr_generator/__init__.py create mode 100644 src/pr_generator/__main__.py create mode 100644 src/pr_generator/config.py create mode 100644 src/pr_generator/health.py create mode 100644 src/pr_generator/http_client.py create mode 100644 src/pr_generator/logging_config.py create mode 100644 src/pr_generator/models.py create mode 100644 src/pr_generator/providers/__init__.py create mode 100644 src/pr_generator/providers/base.py create mode 100644 src/pr_generator/providers/bitbucket.py create mode 100644 src/pr_generator/providers/github.py create mode 100644 src/pr_generator/scanner.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/test_config.py create mode 100644 tests/test_health.py create mode 100644 tests/test_http_client.py create mode 100644 tests/test_logging_config.py create mode 100644 tests/test_models.py create mode 100644 tests/test_providers.py create mode 100644 tests/test_scanner.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..088bfc4 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,17 @@ +.git +.github +__pycache__ +*.pyc +*.pyo +*.pyd +.pytest_cache +.mypy_cache +.ruff_cache +tests/ +docs/ +.venv/ +venv/ +*.md +*.egg-info +dist/ +build/ diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..4025840 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,99 @@ +# Copilot Instructions — pr-generator + +## Commands + +```bash +# Install (editable, no dev extras needed) +pip install -e . +pip install pytest + +# Run full test suite +python -m pytest + +# Run a single test file +python -m pytest tests/test_scanner.py -v + +# Run a single test by name +python -m pytest tests/test_scanner.py::TestScanCycle::test_dry_run_does_not_create_prs -v + +# Run the application locally +CONFIG_PATH=./config.yaml python -m pr_generator + +# Run tests with coverage (configured in pyproject.toml) +python -m pytest --cov=pr_generator --cov-report=term-missing +``` + +There is no linter configured. There is no type-checker configured. + +--- + +## Architecture + +`pr-generator` is a long-running polling daemon. The main loop lives in `__main__.py`: + +1. Load `AppConfig` from YAML (`CONFIG_PATH`) or legacy env vars (fallback). +2. Instantiate active providers (`GitHubProvider` / `BitbucketProvider`). +3. Start the health HTTP server in a daemon thread. +4. Loop: run `scan_cycle()` → sleep `scan_frequency` seconds → repeat. +5. Graceful shutdown on `SIGTERM`/`SIGINT` via a `threading.Event`. + +**Scan cycle** (`scanner.py`) is two-phase, both phases concurrent via `ThreadPoolExecutor`: +- **Phase 1**: fetch all branch names from every active provider in parallel. +- **Phase 2**: for each `rule × provider` pair — filter branches by regex, check for existing PRs, create missing ones. + +**Config loading** (`config.py`) priority: YAML file → legacy env vars. YAML supports multiple named providers and multiple rules. Legacy env-var mode supports exactly one rule. + +**Provider abstraction** — `ProviderInterface` is a `runtime_checkable` Protocol in `providers/base.py`. Both `GitHubProvider` and `BitbucketProvider` satisfy it structurally (no explicit inheritance). The scanner only uses the interface. + +**All HTTP** goes through `request_with_retry` in `http_client.py`. It handles retry/backoff (delays: 0.5 s, 1 s, 2 s) and logging. Providers never call `requests` directly. + +**Releases** are automated via `semantic-release` on push to `main`. Version is in `src/pr_generator/__init__.py` and `pyproject.toml`. + +--- + +## Key Conventions + +### Logging format +All log lines follow the structured pattern: +``` +[Component] Step: step_name action=verb cycle_id=N detail=... +``` +Examples: `[GitHub] Step: get_branches action=end total=42`, `[Core] Step: scan_cycle action=start cycle_id=3`. + +### `request_with_retry` — `headers` vs `headers_factory` +Pass **`headers`** (a plain dict) when auth tokens don't expire between retries (Bitbucket Bearer token). +Pass **`headers_factory`** (a `() → dict` callable) when tokens may rotate between attempts (GitHub App installation tokens). The factory is called fresh on each retry attempt, so a token refresh is picked up automatically. + +### Provider exceptions must carry `status_code` +Both `GitHubError` and `BitbucketError` have the constructor signature: +```python +def __init__(self, message: str, status_code: int | None = None) -> None: +``` +`http_client.request_with_retry` calls `exception_cls(message, status_code)`. Any new provider exception class must match this signature. + +### Per-cycle caches +Each provider caches PR-existence and branch-existence lookups within one scan cycle. `reset_cycle_cache()` is called at the start of every cycle. Do not persist cache state across cycles. + +### Rule matching uses `re.match` (start-anchored) +Patterns are matched with `rule.compiled.match(branch_name)`, not `re.search`. Patterns must match from the beginning of the branch name. + +### `AppConfig` and `ProviderConfig` are frozen dataclasses +Neither can be mutated after construction. In tests, build a new instance rather than modifying fields. + +### New provider checklist +To add a third provider (e.g. GitLab): +1. Create `src/pr_generator/providers/gitlab.py` implementing all 5 methods of `ProviderInterface`. +2. Define `GitLabError(Exception)` with `(message: str, status_code: int | None = None)`. +3. Add `"gitlab"` to the `ptype` allowlist in `config._parse_providers_from_yaml`. +4. In `_request`, pass `headers=` if tokens are static or `headers_factory=` if they refresh mid-cycle. +5. Add a `_parse_gitlab_provider` function and wire it in `__main__.py`. +6. Add tests in `tests/test_providers.py`. + +### Testing patterns +- **Scanner tests** — mock full providers with `MagicMock()` (see `_mock_provider` helper in `test_scanner.py`). +- **Provider tests** — mock `provider._request` directly, not `requests.request`. +- **Config tests** — use `tmp_path` fixture + `monkeypatch.setenv("CONFIG_PATH", path)`. +- Tests are plain classes with descriptive method names; no pytest markers are used. + +### Docker +Config is mounted at `/etc/pr-generator/config.yaml` (the default `CONFIG_PATH`). The container runs as non-root user `prgen`. `requirements.txt` drives the Docker build; `pyproject.toml` is the authoritative dependency source — keep both in sync when adding dependencies. diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..e9d6f3a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,62 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: "/" + schedule: + interval: monthly + open-pull-requests-limit: 10 + labels: + - enhancement + - dependency-management + assignees: + - devops-ia/devops-ia + groups: + github-actions: + patterns: + - "*" + commit-message: + prefix: chore + include: scope + rebase-strategy: auto + pull-request-branch-name: + separator: "-" + - package-ecosystem: pip + directory: "/" + schedule: + interval: monthly + open-pull-requests-limit: 10 + labels: + - enhancement + - dependency-management + assignees: + - devops-ia/devops-ia + groups: + pip: + patterns: + - "*" + commit-message: + prefix: chore + include: scope + rebase-strategy: auto + pull-request-branch-name: + separator: "-" + - package-ecosystem: docker + directory: "/" + schedule: + interval: monthly + open-pull-requests-limit: 10 + labels: + - enhancement + - dependency-management + assignees: + - devops-ia/devops-ia + groups: + docker: + patterns: + - "*" + commit-message: + prefix: chore + include: scope + rebase-strategy: auto + pull-request-branch-name: + separator: "-" \ No newline at end of file diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml new file mode 100644 index 0000000..b9bcaef --- /dev/null +++ b/.github/workflows/docker-build.yml @@ -0,0 +1,155 @@ +name: Build and Push Docker Image + +permissions: {} + +env: + DOCKERHUB_USER: devopsiaci + DOCKERHUB_REPO: pr-generator + GHCR_REGISTRY: ghcr.io + GHCR_REPO: ${{ github.repository }} + +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test: + name: Test + runs-on: ubuntu-latest + permissions: + contents: read + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-python@v6 + with: + python-version: "3.13" + cache: pip + cache-dependency-path: requirements.txt + + - name: Install dependencies + run: pip install -r requirements.txt pytest + + - name: Run tests + run: python -m pytest tests/ -v + + release: + name: Release + needs: [test] + # Only run on direct pushes to main (not on pull requests) + if: github.event_name != 'pull_request' + runs-on: ubuntu-latest + permissions: + attestations: write + contents: write + id-token: write + issues: write + packages: write + pull-requests: write + + steps: + - name: Checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Semantic Release + id: semantic + uses: cycjimmy/semantic-release-action@v6 + with: + tag_format: 'v${version}' + extra_plugins: | + @semantic-release/changelog + @semantic-release/git + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Set Docker metadata + id: meta + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/metadata-action@v6 + with: + images: | + ${{ env.DOCKERHUB_USER }}/${{ env.DOCKERHUB_REPO }} + ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_REPO }} + labels: | + org.opencontainers.image.maintainer=adrianmg231189@gmail.com + org.opencontainers.image.title=PR Generator + org.opencontainers.image.description=PR Generator to automate pull request management + org.opencontainers.image.vendor=devops-ia + tags: | + type=raw,value=${{ steps.semantic.outputs.new_release_git_tag }} + type=raw,value=latest + + - name: Set up QEMU + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/setup-qemu-action@v4 + + - name: Set up Docker Buildx + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/setup-buildx-action@v4 + + - name: Cache Docker layers + if: steps.semantic.outputs.new_release_published == 'true' + uses: actions/cache@v5 + with: + path: /tmp/.buildx-cache + key: ${{ runner.os }}-buildx-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-buildx- + + - name: "[DOCKERHUB] Log in" + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/login-action@v4 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + + - name: "[GHCR] Log in" + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/login-action@v4 + with: + registry: ${{ env.GHCR_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and push Docker image + id: push + if: steps.semantic.outputs.new_release_published == 'true' + uses: docker/build-push-action@v7 + with: + cache-from: type=local,src=/tmp/.buildx-cache + cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max + context: . + labels: ${{ steps.meta.outputs.labels }} + platforms: linux/amd64,linux/arm64 + push: true + sbom: true + tags: ${{ steps.meta.outputs.tags }} + + - name: "[DOCKERHUB] Update registry description" + if: steps.semantic.outputs.new_release_published == 'true' + uses: peter-evans/dockerhub-description@v5 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_PASSWORD }} + repository: ${{ env.DOCKERHUB_USER }}/${{ env.DOCKERHUB_REPO }} + + - name: "[GHCR] Generate artifact attestation" + if: steps.semantic.outputs.new_release_published == 'true' + uses: actions/attest-build-provenance@v4 + with: + subject-name: ${{ env.GHCR_REGISTRY }}/${{ env.GHCR_REPO }} + subject-digest: ${{ steps.push.outputs.digest }} + push-to-registry: true + + - name: Move Docker cache + if: steps.semantic.outputs.new_release_published == 'true' + run: | + rm -rf /tmp/.buildx-cache + mv /tmp/.buildx-cache-new /tmp/.buildx-cache diff --git a/.github/workflows/github-auto-assign.yml b/.github/workflows/github-auto-assign.yml new file mode 100644 index 0000000..05a56f2 --- /dev/null +++ b/.github/workflows/github-auto-assign.yml @@ -0,0 +1,19 @@ +name: Auto-assign Issue + +on: + issues: + types: [opened] + pull_request_target: + types: [opened, ready_for_review] + +jobs: + auto-assign: + permissions: + contents: read + issues: write + pull-requests: write + uses: devops-ia/.github/.github/workflows/github-auto-assign.yml@main + with: + teams: devops-ia + secrets: + PAT_GITHUB: ${{ secrets.PAT_GITHUB }} \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2b85002 --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +*.pyd +.Python +*.egg-info/ +dist/ +build/ + +# Testing / coverage +.coverage +coverage.json +coverage.xml +htmlcov/ +.pytest_cache/ + +# Env +.env +*.env +venv/ +.venv/ + +# IDE +.vscode/ +.idea/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..693c7be --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,66 @@ +## [1.2.3](https://github.com/devops-ia/pr-generator/compare/v1.2.2...v1.2.3) (2026-03-25) + + +### Bug Fixes + +* Fixed app and remove old values ([0eb7c8f](https://github.com/devops-ia/pr-generator/commit/0eb7c8f5a6d1e8b4a5a0da35da9ad6f61a5a6744)) + +## [1.2.2](https://github.com/devops-ia/pr-generator/compare/v1.2.1...v1.2.2) (2026-03-25) + + +### Bug Fixes + +* Fixed Dockerfile ([87fa429](https://github.com/devops-ia/pr-generator/commit/87fa429b8732c3b149e2c3bb602edbd430c900e7)) + +## [1.2.1](https://github.com/devops-ia/pr-generator/compare/v1.2.0...v1.2.1) (2026-03-25) + + +### Bug Fixes + +* Force release ([fe18685](https://github.com/devops-ia/pr-generator/commit/fe1868583176986f5119692ca365f883c7e8737e)) + +# [1.2.0](https://github.com/devops-ia/pr-generator/compare/v1.1.0...v1.2.0) (2026-03-25) + + +### Bug Fixes + +* Remove cache files ([71e9092](https://github.com/devops-ia/pr-generator/commit/71e90928a4ab0bec3a50ec471c615d0052568e3c)) +* resolve remaining gaps after critical review ([868b675](https://github.com/devops-ia/pr-generator/commit/868b6756248f90af94147c288f730133f99424eb)) + + +### Features + +* add suport for GH PAT ([b082c34](https://github.com/devops-ia/pr-generator/commit/b082c34af3b018eb616f4a9511e982719b6660ee)) +* add suport for GH PAT ([31a2e96](https://github.com/devops-ia/pr-generator/commit/31a2e96fb67088df33e4b2b8b0724e07505ddaa0)) +* refactor ([67a0c42](https://github.com/devops-ia/pr-generator/commit/67a0c4269de3b86d6197d6acc242121dfac0848f)) +* update dependencies ([6f13505](https://github.com/devops-ia/pr-generator/commit/6f135057b36fcac1dd62575c9f33cd064d0677a7)) + +# [1.2.0](https://github.com/devops-ia/pr-generator/compare/v1.1.0...v1.2.0) (2026-03-25) + + +### Bug Fixes + +* Remove cache files ([71e9092](https://github.com/devops-ia/pr-generator/commit/71e90928a4ab0bec3a50ec471c615d0052568e3c)) +* resolve remaining gaps after critical review ([868b675](https://github.com/devops-ia/pr-generator/commit/868b6756248f90af94147c288f730133f99424eb)) + + +### Features + +* add suport for GH PAT ([b082c34](https://github.com/devops-ia/pr-generator/commit/b082c34af3b018eb616f4a9511e982719b6660ee)) +* add suport for GH PAT ([31a2e96](https://github.com/devops-ia/pr-generator/commit/31a2e96fb67088df33e4b2b8b0724e07505ddaa0)) +* refactor ([67a0c42](https://github.com/devops-ia/pr-generator/commit/67a0c4269de3b86d6197d6acc242121dfac0848f)) + +# [1.1.0](https://github.com/devops-ia/pr-generator/compare/v1.0.0...v1.1.0) (2026-03-25) + + +### Features + +* Force release ([1397554](https://github.com/devops-ia/pr-generator/commit/1397554f660c698daaf294d86d5a8f5de07f1a13)) +* Force release ([a3b7eac](https://github.com/devops-ia/pr-generator/commit/a3b7eacfb603a0107fce2604522736241c85e4db)) + +# 1.0.0 (2026-03-25) + + +### Features + +* Upload code ([9286e3e](https://github.com/devops-ia/pr-generator/commit/9286e3e65932ab2a9526beb481a370d775771f2b)) diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..826282f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,40 @@ +# ============================================================ +# Stage 1: Install Python dependencies +# ============================================================ +FROM python:3.14-slim AS builder + +WORKDIR /build +COPY requirements.txt . +RUN pip install --no-cache-dir --prefix=/install -r requirements.txt + +# ============================================================ +# Stage 2: Minimal runtime image +# ============================================================ +FROM python:3.14-slim + +LABEL maintainer="adrianmg231189@gmail.com" +LABEL org.opencontainers.image.source="https://github.com/devops-ia/pr-generator" +LABEL org.opencontainers.image.description="Automated PR creation from branch patterns" + +# Non-root user +RUN groupadd -r prgen && useradd -r -g prgen -d /app -s /sbin/nologin prgen + +# Copy installed packages from builder +COPY --from=builder /install /usr/local + +# Copy application source +WORKDIR /app +COPY src/ ./src/ + +RUN chown -R prgen:prgen /app + +ENV PYTHONPATH=/app/src + +USER prgen + +HEALTHCHECK --interval=30s --timeout=3s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8080/healthz')" || exit 1 + +EXPOSE 8080 + +ENTRYPOINT ["python", "-m", "pr_generator"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b6eddf8 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 DevOps Solutions + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 1d6e80c..5ca1d39 100644 --- a/README.md +++ b/README.md @@ -1 +1,333 @@ -# pr-generator \ No newline at end of file +# PR generator image + +[![CI](https://github.com/devops-ia/pr-generator/actions/workflows/docker-build.yml/badge.svg)](https://github.com/devops-ia/pr-generator/actions/workflows/docker-build.yml) +[![GitHub release](https://img.shields.io/github/v/release/devops-ia/pr-generator)](https://github.com/devops-ia/pr-generator/releases) +[![Docker Hub](https://img.shields.io/docker/v/devopsiaci/pr-generator?label=Docker%20Hub&logo=docker)](https://hub.docker.com/r/devopsiaci/pr-generator) +[![Docker Pulls](https://img.shields.io/docker/pulls/devopsiaci/pr-generator?logo=docker)](https://hub.docker.com/r/devopsiaci/pr-generator) +[![Python](https://img.shields.io/badge/python-3.11%2B-blue?logo=python&logoColor=white)](https://www.python.org) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) + +Automated Pull Request creation daemon for **GitHub** and **Bitbucket Cloud**. + +`pr-generator` runs as a long-lived service that periodically scans your repository branches, matches them against configurable regex patterns, and automatically opens Pull Requests toward the configured destination branches — skipping any PR that already exists. + +--- + +## Table of Contents + +- [How it works](#how-it-works) +- [Quick start](#quick-start) +- [Configuration](#configuration) + - [YAML file](#yaml-file) +- [Providers](#providers) + - [GitHub — App authentication](#github--app-authentication) + - [GitHub — PAT authentication](#github--pat-authentication) + - [Bitbucket Cloud](#bitbucket-cloud) +- [Rules](#rules) +- [Health endpoints](#health-endpoints) +- [Docker](#docker) +- [Development](#development) + +--- + +## How it works + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Scan cycle │ +│ │ +│ 1. Fetch all branches ──▶ GitHub / Bitbucket │ +│ 2. For every rule │ +│ match branches against regex pattern │ +│ for each match │ +│ skip if open PR already exists │ +│ create PR source ──▶ destination │ +│ 3. Sleep scan_frequency seconds │ +│ 4. Repeat │ +└─────────────────────────────────────────────────────────────┘ +``` + +Key design points: + +- **Concurrent**: branches are fetched from all providers in parallel; rule×provider pairs are also processed concurrently (up to 10 workers). +- **Idempotent**: an existing open PR for the same source→destination pair is detected and skipped. +- **Dry-run mode**: log what would be created without actually calling the API. +- **Graceful shutdown**: handles `SIGTERM` / `SIGINT` and drains in-progress work. + +--- + +## Quick start + +```bash +# Install +pip install -e . + +# Point to your config file and run +CONFIG_PATH=./config.yaml pr-generator +``` + +Or with Docker: + +```bash +docker run --rm \ + -v "$(pwd)/config.yaml:/etc/pr-generator/config.yaml:ro" \ + ghcr.io/devops-ia/pr-generator:latest +``` + +--- + +## Configuration + +### YAML file + +The default config path is `/etc/pr-generator/config.yaml`. Override with the `CONFIG_PATH` environment variable. The application exits with an error at startup if the file is not found. + +```yaml +# config.yaml + +# How often (seconds) to scan for new branches. +scan_frequency: 300 # default: 300 + +# Logging level: DEBUG | INFO | WARNING | ERROR +log_level: INFO # default: INFO + +# Log format: "text" (human-readable) or "json" (structured, for log aggregators) +log_format: text # default: text + +# When true, PRs are logged but never actually created. +dry_run: false # default: false + +# Port for the built-in health server. +health_port: 8080 # default: 8080 + +providers: + github: + enabled: true + owner: my-org + repo: my-repo + app_id: "123456" + installation_id: "78901234" # optional — auto-resolved if omitted + private_key_path: /secrets/github-app.pem # path to PEM file + # Alternative: set GITHUB_APP_PRIVATE_KEY env var (plain PEM or base64-encoded) + timeout: 30 # HTTP timeout in seconds + + bitbucket: + enabled: true + workspace: my-workspace + repo_slug: my-repo + token_env: BITBUCKET_TOKEN # name of the env var that holds the token + close_source_branch: true # delete source branch after merge (default: true) + timeout: 30 + +rules: + - pattern: "feature/.*" # Python regex matched against branch names + destinations: + github: main + bitbucket: develop + + - pattern: "release/.*" + destinations: + github: main + + - pattern: ".*-hotfix-.*" + destinations: + bitbucket: master +``` + +#### Multiple GitHub organisations + +Use any name as the provider key and set `type: github` (or `type: bitbucket`) to identify the implementation. Rules reference providers by their name. + +```yaml +providers: + github-acme: + type: github # required for non-standard key names + enabled: true + owner: acme-org + repo: backend + app_id: "111" + private_key_path: /secrets/acme-app.pem + + github-skunkworks: + type: github + enabled: true + owner: skunkworks-org + repo: platform + auth_method: pat + token_env: SKUNKWORKS_GITHUB_TOKEN + + bitbucket: # "github" / "bitbucket" keys default type automatically + enabled: true + workspace: my-workspace + repo_slug: my-repo + token_env: BITBUCKET_TOKEN + +rules: + - pattern: "feature/.*" + destinations: + github-acme: main + github-skunkworks: develop + bitbucket: develop +``` + +**Config fields reference** + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `scan_frequency` | int | `300` | Seconds between scan cycles | +| `log_level` | string | `"INFO"` | Python logging level | +| `dry_run` | bool | `false` | Simulate PR creation without API calls | +| `health_port` | int | `8080` | Port for health HTTP server | +| `providers..type` | string | *(key name)* | Provider implementation: `github` or `bitbucket`. Required when the key name is not `github` or `bitbucket` | +| `providers..enabled` | bool | — | Activate this provider instance | +| `providers..owner` | string | — | GitHub organisation or user *(GitHub only)* | +| `providers..repo` | string | — | Repository name *(GitHub only)* | +| `providers..app_id` | string | — | GitHub App ID *(GitHub App auth)* | +| `providers..installation_id` | string | *(auto)* | Installation ID; resolved automatically if omitted *(GitHub App auth)* | +| `providers..private_key_path` | string | — | Path to GitHub App private key PEM file *(GitHub App auth)* | +| `providers..auth_method` | string | `"app"` | `app` (GitHub App) or `pat` (Personal Access Token) *(GitHub only)* | +| `providers..token_env` | string | `"GITHUB_TOKEN"` / `"BITBUCKET_TOKEN"` | Env var name containing the token *(PAT / Bitbucket)* | +| `providers..workspace` | string | — | Bitbucket workspace slug *(Bitbucket only)* | +| `providers..repo_slug` | string | — | Bitbucket repository slug *(Bitbucket only)* | +| `providers..close_source_branch` | bool | `true` | Delete source branch after PR merges *(Bitbucket only)* | +| `providers..timeout` | float | `30` | HTTP timeout (seconds) | +| `rules[].pattern` | string | — | Python regex applied to branch names | +| `rules[].destinations` | map | — | `provider_name: destination_branch` pairs | + +--- + +## Providers + +### GitHub App + +Authentication uses a [GitHub App](https://docs.github.com/en/apps/creating-github-apps/about-creating-github-apps/about-creating-github-apps). Two modes are available: + +**GitHub App (recommended)** — the provider: +1. Signs a short-lived JWT with the App's RSA private key. +2. Exchanges it for an installation access token (cached up to ~55 minutes). +3. Uses the installation token for all API calls. +4. Caches per-cycle PR-existence and branch-existence lookups to reduce API usage. + +**Personal Access Token (PAT)** — set `auth_method: pat` and point `token_env` at an env var holding the PAT. + +Required GitHub App permissions: **Contents** (read), **Pull requests** (read & write). + +### Bitbucket Cloud + +Authentication uses a project/repository **Bearer token** (HTTP access token). + +The provider fetches default reviewers at PR creation time and automatically includes them in the payload. + +Required Bitbucket permissions: **Repositories** (read), **Pull requests** (read & write). + +--- + +## Rules + +Each rule has: + +- **`pattern`** — a Python regex (`re.compile`) matched against branch names using `re.match` (anchored at the start). The destination branch is excluded from matching. +- **`destinations`** — a map of `provider_name → destination_branch`. Only providers that are both listed here **and** active in `providers` are processed. + +```yaml +rules: + - pattern: "feature/.*" + destinations: + github: main # create PRs toward "main" on GitHub + bitbucket: develop # create PRs toward "develop" on Bitbucket +``` + +Multiple rules are supported. + +--- + +## Health endpoints + +A lightweight HTTP server starts on `health_port` (default `8080`): + +| Endpoint | Behaviour | +|----------|-----------| +| `GET /livez` | `200 live` while running; `503 shutting down` during shutdown | +| `GET /healthz` | Same as `/livez` (alias) | +| `GET /readyz` | `200 ready` after the **first** scan cycle completes; `503 not ready` before that | + +Suitable for Kubernetes liveness, readiness, and startup probes: + +```yaml +livenessProbe: + httpGet: + path: /livez + port: 8080 +readinessProbe: + httpGet: + path: /readyz + port: 8080 +``` + +--- + +## Docker + +The image is built from a two-stage Dockerfile: + +- **Stage 1** – installs Python dependencies into `/install`. +- **Stage 2** – minimal `python:3.14-slim` runtime; runs as a non-root user (`prgen`). + +```bash +# Build +docker build -t pr-generator . + +# Run with YAML config +docker run --rm \ + -v "$(pwd)/config.yaml:/etc/pr-generator/config.yaml:ro" \ + -v "$(pwd)/github-app.pem:/secrets/github-app.pem:ro" \ + -e BITBUCKET_TOKEN= \ + -p 8080:8080 \ + pr-generator +``` + +--- + +## Development + +**Prerequisites**: Python ≥ 3.11 + +```bash +# Create and activate a virtual environment +python -m venv .venv +source .venv/bin/activate + +# Install the package in editable mode with dev extras +pip install -e . +pip install pytest + +# Run tests +pytest + +# Run with a local config +CONFIG_PATH=./config.yaml python -m pr_generator +``` + +**Project layout** + +``` +src/pr_generator/ +├── __main__.py # Entry point: startup, provider init, scan loop +├── config.py # Config loading from YAML file +├── models.py # Dataclasses: AppConfig, ProviderConfig, ScanRule, … +├── scanner.py # Concurrent scan cycle orchestrator +├── health.py # HTTP health server (/livez, /readyz, /healthz) +├── http_client.py # Shared HTTP client with retry/backoff +├── logging_config.py # Logging setup (plain text or structured JSON) +└── providers/ + ├── base.py # ProviderInterface Protocol + ├── github.py # GitHub App provider + └── bitbucket.py # Bitbucket Cloud provider + +tests/ +├── conftest.py # Shared pytest fixtures +├── test_config.py # Config loading tests +├── test_health.py # Health server tests +├── test_models.py # Model tests +└── test_scanner.py # Scan cycle tests +``` diff --git a/config.yaml.example b/config.yaml.example new file mode 100644 index 0000000..2874014 --- /dev/null +++ b/config.yaml.example @@ -0,0 +1,140 @@ +# config.yaml.example +# +# Copy this file to config.yaml (or any path) and set CONFIG_PATH to point to it. +# Remove or comment out sections for providers you are not using. +# +# Usage: +# CONFIG_PATH=./config.yaml pr-generator + +# ────────────────────────────────────────────────────────── +# General settings +# ────────────────────────────────────────────────────────── + +# How often (seconds) to scan for new branches. +scan_frequency: 300 # default: 300 + +# Logging level: DEBUG | INFO | WARNING | ERROR +log_level: INFO # default: INFO + +# Log format: "text" (human-readable) or "json" (for log aggregators like ELK / Loki). +log_format: text # default: text + +# When true, PRs are logged but never actually created. Useful for testing config. +dry_run: false # default: false + +# Port for the built-in health HTTP server (/livez, /readyz, /healthz). +health_port: 8080 # default: 8080 + +# ────────────────────────────────────────────────────────── +# Providers +# +# Each entry is a named provider instance. +# The key is a free-form name used in rules (e.g. "github", "github-acme"). +# Set "type" to "github" or "bitbucket" when the key name is not one of those. +# ────────────────────────────────────────────────────────── + +providers: + + # ── GitHub (GitHub App authentication — recommended) ────────────── + github: + enabled: true + + # The GitHub organisation or user that owns the repository. + owner: my-org + + # The repository name. + repo: my-repo + + # Authentication method: "app" (GitHub App, default) or "pat" (Personal Access Token). + auth_method: app # default: app + + # GitHub App credentials (required when auth_method is "app"). + app_id: "123456" + installation_id: "78901234" # optional — auto-resolved from the repo if omitted + + # Path to the GitHub App RSA private key PEM file. + private_key_path: /secrets/github-app.pem + # Alternative: export GITHUB_APP_PRIVATE_KEY="" + + # HTTP timeout in seconds. + timeout: 30 # default: 30 + + # ── GitHub (PAT authentication) ─────────────────────────────────── + # github: + # enabled: true + # auth_method: pat + # owner: my-org + # repo: my-repo + # token_env: GITHUB_TOKEN # env var that holds the Personal Access Token + # timeout: 30 + + # ── Multiple GitHub organisations ───────────────────────────────── + # Use any name as the key and set type: github. + # Rules reference providers by the key name. + # + # github-acme: + # type: github + # enabled: true + # auth_method: pat + # owner: acme-org + # repo: backend + # token_env: GITHUB_TOKEN_ACME + # + # github-skunkworks: + # type: github + # enabled: true + # auth_method: pat + # owner: skunkworks-org + # repo: platform + # token_env: GITHUB_TOKEN_SKUNKWORKS + + # ── Bitbucket Cloud ─────────────────────────────────────────────── + bitbucket: + enabled: true + + # Bitbucket workspace slug. + workspace: my-workspace + + # Repository slug. + repo_slug: my-repo + + # Name of the environment variable that holds the Bearer token. + token_env: BITBUCKET_TOKEN # default: BITBUCKET_TOKEN + + # Delete source branch after the PR is merged. + close_source_branch: true # default: true + + # HTTP timeout in seconds. + timeout: 30 # default: 30 + +# ────────────────────────────────────────────────────────── +# Rules +# +# Each rule matches branches by regex and maps provider → destination branch. +# The destination branch is automatically excluded from matching. +# Multiple rules are processed in parallel. +# ────────────────────────────────────────────────────────── + +rules: + # Match any branch starting with "feature/" and open PRs toward "main" on GitHub + # and "develop" on Bitbucket. + - pattern: "feature/.*" + destinations: + github: main + bitbucket: develop + + # Match release branches and target "main" on GitHub only. + - pattern: "release/.*" + destinations: + github: main + + # Match hotfix branches on Bitbucket only. + - pattern: ".*-hotfix-.*" + destinations: + bitbucket: master + + # Example: multi-org rules (uncomment if using github-acme / github-skunkworks above) + # - pattern: "feature/.*" + # destinations: + # github-acme: main + # github-skunkworks: develop diff --git a/package.json b/package.json new file mode 100644 index 0000000..102777a --- /dev/null +++ b/package.json @@ -0,0 +1,21 @@ +{ + "name": "pr-generator", + "private": true, + "release": { + "branches": ["main"], + "tagFormat": "v${version}", + "plugins": [ + "@semantic-release/commit-analyzer", + "@semantic-release/release-notes-generator", + "@semantic-release/changelog", + [ + "@semantic-release/git", + { + "assets": ["CHANGELOG.md"], + "message": "chore(release): ${nextRelease.version} [skip ci]\n\n${nextRelease.notes}" + } + ], + "@semantic-release/github" + ] + } +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7b03496 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools>=70", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "pr-generator" +version = "1.1.0" +description = "Automated PR creation from branch patterns across GitHub and Bitbucket" +requires-python = ">=3.11" +dependencies = [ + "requests==2.32.5", + "PyJWT[crypto]==2.12.1", + "cryptography==46.0.5", + "pyyaml==6.0.3", +] + +[project.scripts] +pr-generator = "pr_generator.__main__:main" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["src"] + +[tool.coverage.run] +omit = ["src/pr_generator/__main__.py"] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "if __name__ == .__main__.:", +] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c4e0c2d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +requests==2.32.5 +PyJWT[crypto]==2.12.1 +cryptography==46.0.5 +pyyaml==6.0.3 diff --git a/src/pr_generator/__init__.py b/src/pr_generator/__init__.py new file mode 100644 index 0000000..768c5ab --- /dev/null +++ b/src/pr_generator/__init__.py @@ -0,0 +1,3 @@ +"""pr_generator — automated PR creation from branch patterns.""" + +__version__ = "1.1.0" diff --git a/src/pr_generator/__main__.py b/src/pr_generator/__main__.py new file mode 100644 index 0000000..20849c7 --- /dev/null +++ b/src/pr_generator/__main__.py @@ -0,0 +1,109 @@ +"""Application entry point.""" + +from __future__ import annotations + +import argparse +import logging +import signal +import sys +import time +from importlib.metadata import version as pkg_version +from threading import Event + +from pr_generator.config import load_config +from pr_generator.health import start_health_server +from pr_generator.logging_config import setup_logging +from pr_generator.providers.bitbucket import BitbucketProvider +from pr_generator.providers.github import GitHubProvider +from pr_generator.scanner import scan_cycle + +logger = logging.getLogger("pr_generator") + + +def main() -> None: + """Entry point: load config, start health server, run scan loop.""" + parser = argparse.ArgumentParser( + description="Automated PR creation daemon for GitHub and Bitbucket Cloud.", + ) + parser.add_argument( + "--version", + action="version", + version=f"pr-generator {pkg_version('pr-generator')}", + ) + parser.parse_args() + + # Bootstrap logging with a sensible default before config is loaded + setup_logging("INFO") + + try: + config = load_config() + except (ValueError, FileNotFoundError) as exc: + logger.error("[Core] Step: startup action=error detail=%s", exc) + sys.exit(1) + + # Re-configure logging with the level and format from config + setup_logging(config.log_level, json_format=(config.log_format == "json")) + + # Instantiate active providers + providers = {} + for pname, pconf in config.providers.items(): + if not pconf.enabled: + continue + if pconf.type == "github": + providers[pname] = GitHubProvider(pconf) + elif pconf.type == "bitbucket": + providers[pname] = BitbucketProvider(pconf) + else: + logger.warning("[Core] Unknown provider type '%s' for '%s'; skipping.", pconf.type, pname) + + if not providers: + logger.error("[Core] Step: startup action=error detail=No active providers configured.") + sys.exit(1) + + # Graceful shutdown + stop = Event() + + def _handler(sig, _frame): + logger.info("[Core] Received signal %s; initiating graceful shutdown.", sig) + stop.set() + + signal.signal(signal.SIGTERM, _handler) + signal.signal(signal.SIGINT, _handler) + + # Health server (readiness flips after first cycle) + _server, ready_event = start_health_server(config.health_port, stop) + + logger.info("[Core] Active providers: %s", ", ".join(providers.keys())) + logger.info("[Core] Rules configured: %d", len(config.rules)) + for rule in config.rules: + logger.info("[Core] Rule: pattern=%s destinations=%s", rule.pattern, rule.destinations) + if config.dry_run: + logger.info("[Core] Dry-run mode enabled — PR creations will only be logged") + + cycle_id = 0 + while not stop.is_set(): + cycle_id += 1 + cycle_start = time.time() + scan_cycle(config, providers, cycle_id) + duration = time.time() - cycle_start + logger.info("[Core] Step: cycle action=complete cycle_id=%d duration_sec=%.1f", cycle_id, duration) + + if not ready_event.is_set(): + ready_event.set() + logger.info("[Core] Ready state achieved (first cycle completed)") + + _sleep_interval(config.scan_frequency, stop) + + logger.info("[Core] Shutdown complete.") + + +def _sleep_interval(total: int, stop: Event) -> None: + """Sleep in ≤1 s slices to react quickly to stop signals.""" + waited = 0 + while waited < total and not stop.is_set(): + stop.wait(timeout=min(1, total - waited)) + waited += 1 + + +if __name__ == "__main__": + main() diff --git a/src/pr_generator/config.py b/src/pr_generator/config.py new file mode 100644 index 0000000..a7f0379 --- /dev/null +++ b/src/pr_generator/config.py @@ -0,0 +1,234 @@ +"""Configuration loading from YAML file.""" + +from __future__ import annotations + +import base64 +import logging +import os +import re + +import yaml + +from pr_generator.models import AppConfig, ProviderConfig, ScanRule + +logger = logging.getLogger("pr_generator.config") + +_DEFAULT_CONFIG_PATH = "/etc/pr-generator/config.yaml" + + +def load_config() -> AppConfig: + """Load application configuration from a YAML file. + + The config file path defaults to /etc/pr-generator/config.yaml and can be + overridden with the CONFIG_PATH environment variable. + """ + config_path = os.getenv("CONFIG_PATH", _DEFAULT_CONFIG_PATH) + if not os.path.exists(config_path): + raise FileNotFoundError( + f"[Core] Config file not found at '{config_path}'. " + "Set CONFIG_PATH to the correct path or create the file." + ) + logger.info("[Core] Step: load_config action=start source=file path=%s", config_path) + return _load_from_file(config_path) + + +# ------------------------------------------------------------------ +# YAML-based loading +# ------------------------------------------------------------------ + +def _load_from_file(path: str) -> AppConfig: + with open(path) as fh: + raw = yaml.safe_load(fh) + + raw = raw or {} + providers = _parse_providers_from_yaml(raw.get("providers") or {}) + rules = _parse_rules(raw.get("rules") or []) + + if not rules: + raise ValueError("[Core] config.yaml has no rules defined.") + if not providers: + raise ValueError("[Core] config.yaml has no enabled providers.") + + config = AppConfig( + scan_frequency=int(raw.get("scan_frequency", 300)), + log_level=str(raw.get("log_level", "INFO")), + log_format=str(raw.get("log_format", "text")).lower(), + dry_run=bool(raw.get("dry_run", False)), + health_port=int(raw.get("health_port", 8080)), + providers=providers, + rules=rules, + ) + logger.info( + "[Core] Step: load_config action=end source=file providers=%s rules=%d", + list(providers.keys()), len(rules), + ) + return config + + +def _parse_providers_from_yaml(raw: dict) -> dict[str, ProviderConfig]: + """Parse the providers section of the YAML config. + + Each key is a provider *name* (e.g. ``github``, ``github-acme``, ``bitbucket``). + The optional ``type`` field selects the provider implementation; it defaults to + the key name for the two built-in values ``"github"`` and ``"bitbucket"`` to keep + backward compatibility with existing configs. + + Example — multiple GitHub orgs:: + + providers: + github-acme: + type: github + enabled: true + owner: acme-org + repo: backend + ... + github-skunkworks: + type: github + enabled: true + owner: skunkworks-org + repo: platform + ... + """ + providers: dict[str, ProviderConfig] = {} + + for pname, pcfg in raw.items(): + if not isinstance(pcfg, dict): + continue + if not pcfg.get("enabled", False): + continue + + # Resolve type: explicit field wins; fall back to key name for known types. + ptype = str(pcfg.get("type", "")).lower() or ( + pname if pname in {"github", "bitbucket"} else "" + ) + if ptype not in {"github", "bitbucket"}: + raise ValueError( + f"[Core] Provider '{pname}' has unknown or missing type '{ptype}'. " + "Set 'type: github' or 'type: bitbucket'." + ) + + if ptype == "github": + providers[pname] = _parse_github_provider(pname, pcfg) + else: + providers[pname] = _parse_bitbucket_provider(pname, pcfg) + + return providers + + +def _parse_github_provider(name: str, gh: dict) -> ProviderConfig: + """Build a ProviderConfig for a GitHub provider entry.""" + auth_method = str(gh.get("auth_method", "app")).lower() + owner = str(gh.get("owner", "")).strip() + repo = str(gh.get("repo", "")).strip() + if not owner or not repo: + raise ValueError( + f"[Core] Provider '{name}': 'owner' and 'repo' are required fields. " + f"Check providers.{name} in your config.yaml." + ) + if auth_method == "pat": + token_env = str(gh.get("token_env", "GITHUB_TOKEN")) + token = os.getenv(token_env, "") + if not token: + raise ValueError( + f"[Core] Provider '{name}': env var '{token_env}' is empty or not set. " + f"Set {token_env} with a valid GitHub PAT." + ) + return ProviderConfig( + name=name, + type="github", + enabled=True, + owner=owner, + repo=repo, + auth_method="pat", + token=token, + timeout=float(gh.get("timeout", 30)), + ) + app_id = str(gh.get("app_id", "")).strip() + if not app_id: + raise ValueError( + f"[Core] Provider '{name}': 'app_id' is required for GitHub App auth. " + f"Check providers.{name} in your config.yaml." + ) + private_key = _load_private_key(gh) + if not private_key: + raise ValueError( + f"[Core] Provider '{name}': no private key found. " + f"Set 'private_key_path' in config or the GITHUB_APP_PRIVATE_KEY env var." + ) + return ProviderConfig( + name=name, + type="github", + enabled=True, + owner=owner, + repo=repo, + app_id=app_id, + installation_id=str(gh.get("installation_id", "")), + private_key=private_key, + auth_method="app", + timeout=float(gh.get("timeout", 30)), + ) + + +def _parse_bitbucket_provider(name: str, bb: dict) -> ProviderConfig: + """Build a ProviderConfig for a Bitbucket provider entry.""" + workspace = str(bb.get("workspace", "")).strip() + repo_slug = str(bb.get("repo_slug", "")).strip() + if not workspace or not repo_slug: + raise ValueError( + f"[Core] Provider '{name}': 'workspace' and 'repo_slug' are required fields. " + f"Check providers.{name} in your config.yaml." + ) + token_env = str(bb.get("token_env", "BITBUCKET_TOKEN")) + token = os.getenv(token_env, "") + if not token: + raise ValueError( + f"[Core] Provider '{name}': env var '{token_env}' is empty or not set. " + f"Set {token_env} with a valid Bitbucket access token." + ) + return ProviderConfig( + name=name, + type="bitbucket", + enabled=True, + workspace=workspace, + repo_slug=repo_slug, + token=token, + timeout=float(bb.get("timeout", 30)), + close_source_branch=bool(bb.get("close_source_branch", True)), + ) + + +def _load_private_key(gh_cfg: dict) -> str: + """Load GitHub App private key from file path or env var.""" + key_path = str(gh_cfg.get("private_key_path", "")) + if key_path and os.path.exists(key_path): + with open(key_path) as fh: + return fh.read() + + # Fallback: try env var (supports base64-encoded PEM) + raw = os.getenv("GITHUB_APP_PRIVATE_KEY", "") + if raw and "-----BEGIN" not in raw: + raw = base64.b64decode(raw).decode() + return raw + + +def _parse_rules(raw_rules: list) -> list[ScanRule]: + rules: list[ScanRule] = [] + for item in raw_rules: + if not isinstance(item, dict): + logger.warning("[Core] Step: load_config action=warn detail=rule entry is not a mapping; skipping") + continue + pattern = str(item.get("pattern", "")) + if not pattern: + logger.warning("[Core] Step: load_config action=warn detail=rule with empty pattern; skipping") + continue + try: + compiled = re.compile(pattern) + except re.error as exc: + raise ValueError(f"[Core] Invalid regex pattern '{pattern}': {exc}") from exc + destinations = {str(k): str(v) for k, v in (item.get("destinations") or {}).items()} + if not destinations: + logger.warning("[Core] Step: load_config action=warn detail=rule pattern=%s has no destinations; skipping", pattern) + continue + rules.append(ScanRule(pattern=pattern, compiled=compiled, destinations=destinations)) + return rules + diff --git a/src/pr_generator/health.py b/src/pr_generator/health.py new file mode 100644 index 0000000..2345cf4 --- /dev/null +++ b/src/pr_generator/health.py @@ -0,0 +1,70 @@ +"""Health HTTP server exposing /livez, /readyz and /healthz endpoints.""" + +from __future__ import annotations + +import logging +import threading +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from threading import Event + +logger = logging.getLogger("pr_generator.health") + + +class _HealthHandler(BaseHTTPRequestHandler): + """Lightweight HTTP handler for Kubernetes health probes. + + Endpoints: + /livez, /healthz → 200 while running; 503 when shutting down. + /readyz → 200 after the first full scan cycle; 503 before that. + """ + + # Injected by the server factory below + stop_event: Event + ready_event: Event + + def _write(self, code: int, body: str) -> None: + self.send_response(code) + self.send_header("Content-Type", "text/plain") + self.send_header("Cache-Control", "no-store") + self.end_headers() + self.wfile.write(body.encode()) + + def do_GET(self) -> None: # noqa: N802 + if self.path in ("/livez", "/healthz"): + if self.stop_event.is_set(): + self._write(503, "shutting down") + else: + self._write(200, "live") + elif self.path == "/readyz": + if self.ready_event.is_set() and not self.stop_event.is_set(): + self._write(200, "ready") + else: + self._write(503, "not ready") + else: + self._write(404, "not found") + + def log_message(self, fmt: str, *args) -> None: # noqa: ANN002 + # Suppress default access logs; health probes are very frequent + pass + + +def start_health_server(port: int, stop_event: Event) -> tuple[ThreadingHTTPServer, Event]: + """Start the health HTTP server in a daemon thread. + + Returns: + (server, ready_event) — set ready_event after the first successful cycle. + """ + ready_event = Event() + + # Inject shared state into the handler class via a closure-built subclass + handler_cls = type( + "_BoundHealthHandler", + (_HealthHandler,), + {"stop_event": stop_event, "ready_event": ready_event}, + ) + + server = ThreadingHTTPServer(("0.0.0.0", port), handler_cls) + thread = threading.Thread(target=server.serve_forever, name="health-server", daemon=True) + thread.start() + logger.info("[Core] Step: health_server action=start port=%d", port) + return server, ready_event diff --git a/src/pr_generator/http_client.py b/src/pr_generator/http_client.py new file mode 100644 index 0000000..dffd23a --- /dev/null +++ b/src/pr_generator/http_client.py @@ -0,0 +1,86 @@ +"""Shared HTTP client with retry/backoff logic for all providers.""" + +from __future__ import annotations + +import logging +import time +from typing import Callable + +import requests + +_BACKOFF_DELAYS = (0.5, 1, 2) + +ShouldRetry = Callable[[int | None, Exception | None], bool] +HeadersFactory = Callable[[], dict] + + +def request_with_retry( + *, + logger: logging.Logger, + client_name: str, + method: str, + url: str, + timeout: float, + exception_cls, + should_retry: ShouldRetry, + headers: dict | None = None, + headers_factory: HeadersFactory | None = None, + **request_kwargs, +): + """Execute an HTTP request with shared logging and retry logic. + + Args: + logger: module logger. + client_name: human-readable label, e.g. "GitHub". + method: HTTP verb. + url: request URL. + timeout: seconds passed to requests. + exception_cls: provider-specific exception raised on failure. + Must accept ``(message: str, status_code: int | None)`` positional args. + should_retry: predicate receiving (status_code, exception). + headers: static headers (mutually exclusive with headers_factory). + headers_factory: callable returning fresh headers per attempt. + **request_kwargs: forwarded to ``requests.request``. + """ + if headers is None and headers_factory is None: + raise ValueError("Provide either headers or headers_factory") + + attempts = (0,) + _BACKOFF_DELAYS + last_error: Exception | None = None + + for delay in attempts: + if delay: + time.sleep(delay) + + hdrs = headers if headers_factory is None else headers_factory() + try: + logger.debug( + "[%s] [HTTP] %s %s params=%s", + client_name, method, url, + request_kwargs.get("params"), + ) + start = time.time() + response = requests.request(method, url, headers=hdrs, timeout=timeout, **request_kwargs) + duration_ms = int((time.time() - start) * 1000) + logger.debug("[%s] [HTTP] %s %s -> %s (%dms)", client_name, method, url, response.status_code, duration_ms) + except requests.RequestException as exc: + logger.exception("[%s] [HTTP] %s %s failed: %s", client_name, method, url, exc) + err = exception_cls(f"Request failure: {exc}", None) + last_error = err + if should_retry(None, exc): + continue + raise err + + if response.status_code >= 400: + logger.error("[%s] [HTTP] %s %s error %s: %s", client_name, method, url, response.status_code, response.text) + err = exception_cls(f"{client_name} API error {response.status_code}: {response.text}", response.status_code) + last_error = err + if should_retry(response.status_code, None): + continue + raise err + + return response + + if last_error is None: + raise RuntimeError(f"[{client_name}] request_with_retry exhausted retries with no recorded error") # pragma: no cover + raise last_error diff --git a/src/pr_generator/logging_config.py b/src/pr_generator/logging_config.py new file mode 100644 index 0000000..082e316 --- /dev/null +++ b/src/pr_generator/logging_config.py @@ -0,0 +1,43 @@ +"""Logging setup.""" + +from __future__ import annotations + +import json +import logging + + +class _StructuredFormatter(logging.Formatter): + """JSON formatter for structured log aggregators (ELK, Loki, etc.).""" + + def format(self, record: logging.LogRecord) -> str: + payload: dict = { + "timestamp": self.formatTime(record), + "level": record.levelname, + "logger": record.name, + "message": record.getMessage(), + } + if record.exc_info: + payload["exception"] = self.formatException(record.exc_info) + if record.stack_info: + payload["stack_info"] = self.formatStack(record.stack_info) + return json.dumps(payload) + + +def setup_logging(level: str, json_format: bool = False) -> None: + """Configure the root logger. + + Args: + level: log level string, e.g. "INFO", "DEBUG". + json_format: emit structured JSON lines when True. + """ + root = logging.getLogger() + root.setLevel(getattr(logging, level.upper(), logging.INFO)) + handler = logging.StreamHandler() + if json_format: + handler.setFormatter(_StructuredFormatter()) + else: + handler.setFormatter(logging.Formatter( + "%(asctime)s %(levelname)-8s %(name)s %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S", + )) + root.handlers = [handler] diff --git a/src/pr_generator/models.py b/src/pr_generator/models.py new file mode 100644 index 0000000..5a06622 --- /dev/null +++ b/src/pr_generator/models.py @@ -0,0 +1,75 @@ +"""Data models shared across the application.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field + + +@dataclass(frozen=True) +class ProviderConfig: + """Immutable configuration for a single Git provider instance.""" + + name: str + enabled: bool + type: str = "" # "github" | "bitbucket" — provider class to use + timeout: float = 30.0 + # GitHub — common + owner: str = "" + repo: str = "" + auth_method: str = "app" # "app" (GitHub App) | "pat" (Personal Access Token) + # GitHub App auth + app_id: str = "" + installation_id: str = "" + private_key: str = "" # PEM content (loaded at startup) + # Bitbucket / GitHub PAT + workspace: str = "" + repo_slug: str = "" + token: str = "" # Bearer/PAT token + # Bitbucket behaviour + close_source_branch: bool = True + + +@dataclass +class ScanRule: + """A scanning rule: one regex pattern and its destination branch per provider.""" + + pattern: str + compiled: re.Pattern + destinations: dict[str, str] = field(default_factory=dict) + # e.g. {"github": "develop", "bitbucket": "nonpro"} + + +@dataclass(frozen=True) +class AppConfig: + """Full application configuration.""" + + scan_frequency: int + log_level: str + log_format: str # "text" | "json" + dry_run: bool + health_port: int + providers: dict[str, ProviderConfig] # "github" | "bitbucket" → ProviderConfig + rules: list[ScanRule] + + +@dataclass +class RuleResult: + """Outcome of processing one ScanRule for one provider in a cycle.""" + + rule_pattern: str + provider: str + destination: str + processed: int = 0 + created: int = 0 + skipped_existing: int = 0 + simulated: int = 0 + errors: int = 0 + + +@dataclass +class CycleResult: + """Aggregated outcome of a full scan cycle.""" + + cycle_id: int + rule_results: list[RuleResult] = field(default_factory=list) diff --git a/src/pr_generator/providers/__init__.py b/src/pr_generator/providers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/pr_generator/providers/base.py b/src/pr_generator/providers/base.py new file mode 100644 index 0000000..3546f22 --- /dev/null +++ b/src/pr_generator/providers/base.py @@ -0,0 +1,37 @@ +"""Provider interface contract.""" + +from __future__ import annotations + +from typing import Protocol, runtime_checkable + + +@runtime_checkable +class ProviderInterface(Protocol): + """Contract that every Git provider must fulfil.""" + + @property + def name(self) -> str: + """Lowercase provider identifier: 'github' or 'bitbucket'.""" + ... # pragma: no cover + + def get_branches(self) -> list[str]: + """Return all branch names in the repository (handles pagination). + + Raises a provider-specific exception on API failure. + """ + ... # pragma: no cover + + def check_existing_pr(self, source: str, destination: str) -> bool: + """Return True if an open PR from source to destination already exists.""" + ... # pragma: no cover + + def create_pull_request(self, source: str, destination: str) -> None: + """Create a PR from source to destination. + + Raises a provider-specific exception on API failure. + """ + ... # pragma: no cover + + def reset_cycle_cache(self) -> None: + """Clear any per-cycle caches. No-op if the provider has no cache.""" + ... # pragma: no cover diff --git a/src/pr_generator/providers/bitbucket.py b/src/pr_generator/providers/bitbucket.py new file mode 100644 index 0000000..9c867c6 --- /dev/null +++ b/src/pr_generator/providers/bitbucket.py @@ -0,0 +1,164 @@ +"""Bitbucket Cloud provider implementation.""" + +from __future__ import annotations + +import logging +from typing import Any + +from pr_generator.http_client import request_with_retry +from pr_generator.models import ProviderConfig + + +class BitbucketError(Exception): + """Raised when a Bitbucket API call fails.""" + + def __init__(self, message: str, status_code: int | None = None) -> None: + super().__init__(message) + self.status_code = status_code + + +class BitbucketProvider: + """Bitbucket Cloud provider. + + Receives all configuration via constructor — no module-level env-var reads. + """ + + def __init__(self, config: ProviderConfig) -> None: + self._name = config.name + self._workspace = config.workspace + self._repo_slug = config.repo_slug + self._token = config.token + self._timeout = config.timeout + self._close_source_branch = config.close_source_branch + self._api_url = ( + f"https://api.bitbucket.org/2.0/repositories" + f"/{self._workspace}/{self._repo_slug}" + ) + self._logger = logging.getLogger("pr_generator.providers.bitbucket") + + # Per-cycle cache (reset via reset_cycle_cache) + self._pr_cache: dict[tuple[str, str], bool] = {} + + # ------------------------------------------------------------------ + # ProviderInterface + # ------------------------------------------------------------------ + + @property + def name(self) -> str: + return self._name + + def get_branches(self) -> list[str]: + """Fetch all branch names (handles pagination).""" + self._logger.info("[%s] Step: get_branches action=start", self._name) + if not (self._token and self._workspace and self._repo_slug): + self._logger.error("[%s] Step: get_branches action=error detail=missing configuration", self._name) + return [] + + url = f"{self._api_url}/refs/branches" + names: list[str] = [] + page = 1 + + while True: + self._logger.debug("[%s] Step: get_branches action=fetch page=%d", self._name, page) + resp = self._request("GET", url, params={"pagelen": 100, "page": page}) + data = resp.json() + page_values: list[dict[str, Any]] = data.get("values", []) + names.extend(b["name"] for b in page_values if b.get("name")) + self._logger.debug( + "[%s] Step: get_branches action=fetch page=%d count=%d total=%d", + self._name, page, len(page_values), len(names), + ) + if "next" in data: + page += 1 + else: + break + + self._logger.info("[%s] Step: get_branches action=end total=%d", self._name, len(names)) + return names + + def check_existing_pr(self, source: str, destination: str) -> bool: + """Return True if an open PR from source to destination already exists.""" + self._logger.info( + "[%s] Step: check_existing_pr action=start source=%s dest=%s", + self._name, source, destination, + ) + key = (source, destination) + if key in self._pr_cache: + self._logger.debug("[%s] Step: check_existing_pr action=cache_hit source=%s dest=%s", self._name, source, destination) + return self._pr_cache[key] + + resp = self._request( + "GET", + f"{self._api_url}/pullrequests", + params={ + "state": "OPEN", + "q": f'source.branch.name="{source}" AND destination.branch.name="{destination}"', + "pagelen": 1, + }, + ) + exists = len(resp.json().get("values", [])) > 0 + self._pr_cache[key] = exists + self._logger.info( + "[%s] Step: check_existing_pr action=end source=%s dest=%s exists=%s", + self._name, source, destination, str(exists).lower(), + ) + return exists + + def create_pull_request(self, source: str, destination: str) -> None: + """Create a PR from source to destination including default reviewers.""" + reviewers = self._get_default_reviewers() + self._logger.info( + "[%s] Step: create_pull_request action=start source=%s dest=%s reviewers=%d", + self._name, source, destination, len(reviewers), + ) + payload = { + "title": f"Merge {source} into {destination}", + "source": {"branch": {"name": source}}, + "destination": {"branch": {"name": destination}}, + "reviewers": reviewers, + "close_source_branch": self._close_source_branch, + } + resp = self._request("POST", f"{self._api_url}/pullrequests", json=payload) + self._pr_cache[(source, destination)] = True + self._logger.info( + "[%s] Step: create_pull_request action=end source=%s dest=%s status=created", + self._name, source, destination, + ) + + def reset_cycle_cache(self) -> None: + """Clear per-cycle PR-existence cache.""" + self._pr_cache.clear() + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _get_default_reviewers(self) -> list[dict[str, Any]]: + self._logger.info("[%s] Step: get_default_reviewers action=start", self._name) + if not (self._token and self._workspace and self._repo_slug): + self._logger.warning("[%s] Step: get_default_reviewers action=skip detail=missing config", self._name) + return [] + url = f"{self._api_url}/default-reviewers" + resp = self._request("GET", url) + reviewers = [{"uuid": r.get("uuid")} for r in resp.json().get("values", [])] + self._logger.info("[%s] Step: get_default_reviewers action=end count=%d", self._name, len(reviewers)) + return reviewers + + def _request(self, method: str, url: str, **kwargs): + return request_with_retry( + logger=self._logger, + client_name=self._name, + method=method, + url=url, + timeout=self._timeout, + headers={"Authorization": f"Bearer {self._token}", "Content-Type": "application/json"}, + exception_cls=BitbucketError, + should_retry=self._should_retry, + **kwargs, + ) + + def _should_retry(self, status_code: int | None, exc: Exception | None) -> bool: + if exc is not None: + self._logger.warning("[%s] Retry due to request failure: %s", self._name, exc) + return True + return bool(status_code and (500 <= status_code < 600 or status_code in (408, 429))) diff --git a/src/pr_generator/providers/github.py b/src/pr_generator/providers/github.py new file mode 100644 index 0000000..21bd2e8 --- /dev/null +++ b/src/pr_generator/providers/github.py @@ -0,0 +1,270 @@ +"""GitHub App provider implementation.""" + +from __future__ import annotations + +import logging +import time +from datetime import datetime + +import jwt + +from pr_generator.http_client import request_with_retry +from pr_generator.models import ProviderConfig + +_API_BASE = "https://api.github.com" + + +class GitHubError(Exception): + """Raised when a GitHub API call fails.""" + + def __init__(self, message: str, status_code: int | None = None) -> None: + super().__init__(message) + self.status_code = status_code + + +class GitHubProvider: + """GitHub App provider. + + Receives all configuration via constructor — no module-level env-var reads. + JWT and installation tokens are cached within the instance and refreshed + automatically before expiry. + """ + + def __init__(self, config: ProviderConfig) -> None: + self._name = config.name + self._owner = config.owner + self._repo = config.repo + self._auth_method = config.auth_method # "app" | "pat" + self._pat = config.token # used when auth_method == "pat" + self._app_id = config.app_id + self._installation_id = config.installation_id + self._private_key = config.private_key + self._timeout = config.timeout + self._repo_root = f"{_API_BASE}/repos/{self._owner}/{self._repo}" + self._logger = logging.getLogger("pr_generator.providers.github") + + # Token caches + self._jwt_cache: str | None = None + self._jwt_exp: float = 0.0 + self._install_token: str | None = None + self._install_token_exp: float = 0.0 + + # Per-cycle caches (reset via reset_cycle_cache) + self._pr_cache: dict[tuple[str, str], bool] = {} + self._branch_cache: dict[str, bool] = {} + + # ------------------------------------------------------------------ + # ProviderInterface + # ------------------------------------------------------------------ + + @property + def name(self) -> str: + return self._name + + def get_branches(self) -> list[str]: + """List all branch names in the repository (handles pagination).""" + self._logger.info("[%s] Step: get_branches action=start", self._name) + if self._auth_method == "pat": + ready = all([self._owner, self._repo, self._pat]) + else: + ready = all([self._owner, self._repo, self._private_key, self._app_id]) + if not ready: + self._logger.error("[%s] Step: get_branches action=error detail=incomplete config" + " auth_method=%s", self._name, self._auth_method) + return [] + + out: list[str] = [] + page = 1 + while True: + self._logger.debug("[%s] Step: get_branches action=fetch page=%d", self._name, page) + r = self._request("GET", f"{self._repo_root}/branches", params={"per_page": 100, "page": page}) + data = r.json() + if not data: + break + out.extend(b["name"] for b in data) + if len(data) < 100: + break + page += 1 + + # Populate branch cache from the full list to avoid redundant API calls later + for branch_name in out: + self._branch_cache[branch_name] = True + + self._logger.info("[%s] Step: get_branches action=end total=%d", self._name, len(out)) + return out + + def check_existing_pr(self, source: str, destination: str) -> bool: + """Return True if an open PR from source to destination already exists.""" + self._logger.info( + "[%s] Step: check_existing_pr action=start source=%s dest=%s", + self._name, source, destination, + ) + key = (source, destination) + if key in self._pr_cache: + self._logger.debug("[%s] Step: check_existing_pr action=cache_hit source=%s dest=%s", self._name, source, destination) + return self._pr_cache[key] + + r = self._request( + "GET", + f"{self._repo_root}/pulls", + params={ + "state": "open", + "base": destination, + "head": f"{self._owner}:{source}", + "per_page": 1, + }, + ) + exists = len(r.json()) > 0 + self._pr_cache[key] = exists + self._logger.info( + "[%s] Step: check_existing_pr action=end source=%s dest=%s exists=%s", + self._name, source, destination, str(exists).lower(), + ) + return exists + + def create_pull_request(self, source: str, destination: str) -> None: + """Create a PR from source to destination if source branch exists.""" + self._logger.info( + "[%s] Step: create_pull_request action=start source=%s dest=%s", + self._name, source, destination, + ) + if not self._branch_exists(source): + self._logger.warning( + "[%s] Step: create_pull_request action=skip source=%s detail=branch not found", + self._name, source, + ) + return + + payload = { + "title": f"Merge {source} into {destination}", + "head": source, + "base": destination, + "body": "Automated PR generated by pr-generator.", + "draft": False, + } + resp = self._request("POST", f"{self._repo_root}/pulls", json=payload) + self._pr_cache[(source, destination)] = True + self._logger.info( + "[%s] Step: create_pull_request action=end source=%s dest=%s" + " status=created number=%s", + self._name, source, destination, resp.json().get("number"), + ) + + def reset_cycle_cache(self) -> None: + """Clear per-cycle branch-existence and PR-existence caches.""" + self._pr_cache.clear() + self._branch_cache.clear() + + @staticmethod + def _now() -> float: + return time.time() + + def _new_jwt(self) -> str: + self._logger.debug("[GitHub] Step: get_jwt action=generate") + if not (self._app_id and self._private_key): + raise RuntimeError("[GitHub] Missing GITHUB_APP_ID or GITHUB_APP_PRIVATE_KEY.") + now = int(self._now()) + payload = {"iat": now - 60, "exp": now + (9 * 60), "iss": self._app_id} + return jwt.encode(payload, self._private_key, algorithm="RS256") + + def _get_jwt(self) -> str: + if self._jwt_cache and self._now() < self._jwt_exp - 30: + return self._jwt_cache + self._jwt_cache = self._new_jwt() + self._jwt_exp = self._now() + (9 * 60) + return self._jwt_cache + + def _resolve_installation_id(self) -> str: + self._logger.info("[GitHub] Step: resolve_installation_id action=start") + if self._installation_id: + self._logger.info("[GitHub] Step: resolve_installation_id action=end detail=provided") + return self._installation_id + r = self._request( + "GET", + f"{_API_BASE}/repos/{self._owner}/{self._repo}/installation", + installation=False, + ) + inst_id = str(r.json().get("id", "")) + if not inst_id: + raise RuntimeError("[GitHub] Could not resolve installation id.") + # Cache so subsequent token refreshes don't make an extra API call + self._installation_id = inst_id + self._logger.info("[GitHub] Step: resolve_installation_id action=end id=%s", inst_id) + return inst_id + + def _get_installation_token(self) -> str: + if self._install_token and self._now() < self._install_token_exp - 30: + return self._install_token + self._logger.info("[GitHub] Step: get_installation_token action=start") + inst_id = self._resolve_installation_id() + r = self._request( + "POST", + f"{_API_BASE}/app/installations/{inst_id}/access_tokens", + installation=False, + ) + data = r.json() + self._install_token = data.get("token") + expires_at = data.get("expires_at", "") + try: + self._install_token_exp = datetime.fromisoformat( + expires_at.replace("Z", "+00:00") + ).timestamp() + except Exception as exc: + self._logger.warning( + "[%s] Step: get_installation_token action=warn detail=failed to parse expiry (%s); using 55min default", + self._name, exc, + ) + self._install_token_exp = self._now() + (55 * 60) + self._logger.info("[GitHub] Step: get_installation_token action=end") + return self._install_token + + def _headers(self, installation: bool = True) -> dict: + if self._auth_method == "pat": + return { + "Authorization": f"token {self._pat}", + "Accept": "application/vnd.github+json", + } + if installation: + return { + "Authorization": f"Bearer {self._get_installation_token()}", + "Accept": "application/vnd.github+json", + } + return { + "Authorization": f"Bearer {self._get_jwt()}", + "Accept": "application/vnd.github+json", + } + + def _request(self, method: str, url: str, installation: bool = True, **kwargs): + return request_with_retry( + logger=self._logger, + client_name="GitHub", + method=method, + url=url, + timeout=self._timeout, + headers_factory=lambda: self._headers(installation), + exception_cls=GitHubError, + should_retry=self._should_retry, + **kwargs, + ) + + def _should_retry(self, status_code: int | None, exc: Exception | None) -> bool: + if exc is not None: + self._logger.warning("[%s] Retry due to request failure: %s", self._name, exc) + return True + return bool(status_code and (500 <= status_code < 600 or status_code in (408, 429))) + + def _branch_exists(self, branch: str) -> bool: + if branch in self._branch_cache: + return self._branch_cache[branch] + self._logger.info("[%s] Step: branch_exists action=start branch=%s", self._name, branch) + try: + self._request("GET", f"{self._repo_root}/branches/{branch}") + self._branch_cache[branch] = True + self._logger.info("[%s] Step: branch_exists action=end branch=%s exists=true", self._name, branch) + return True + except GitHubError as exc: + if exc.status_code == 404: + self._branch_cache[branch] = False + self._logger.info("[%s] Step: branch_exists action=end branch=%s exists=false", self._name, branch) + return False + raise diff --git a/src/pr_generator/scanner.py b/src/pr_generator/scanner.py new file mode 100644 index 0000000..d6af7df --- /dev/null +++ b/src/pr_generator/scanner.py @@ -0,0 +1,154 @@ +"""Scan cycle orchestrator with concurrent rule processing.""" + +from __future__ import annotations + +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed + +from pr_generator.models import AppConfig, CycleResult, RuleResult, ScanRule +from pr_generator.providers.base import ProviderInterface + +logger = logging.getLogger("pr_generator.scanner") + +_MAX_RULE_WORKERS = 10 + + +def scan_cycle( + config: AppConfig, + providers: dict[str, ProviderInterface], + cycle_id: int, +) -> CycleResult: + """Execute one full scan cycle. + + Phase 1: Fetch branches from every active provider concurrently. + Phase 2: Process every rule×provider pair concurrently. + """ + logger.info( + "[Core] Step: scan_cycle action=start cycle_id=%d rules=%d providers=%s", + cycle_id, len(config.rules), list(providers.keys()), + ) + + # Reset per-cycle caches on all providers + for prov in providers.values(): + prov.reset_cycle_cache() + + # Phase 1 — fetch branches in parallel (one task per provider) + branches_by_provider: dict[str, list[str]] = {} + with ThreadPoolExecutor(max_workers=max(1, len(providers))) as pool: + futures = { + pool.submit(prov.get_branches): prov_name + for prov_name, prov in providers.items() + } + for future in as_completed(futures): + prov_name = futures[future] + try: + branches_by_provider[prov_name] = future.result() + except Exception as exc: + logger.error( + "[%s] Step: get_branches action=error cycle_id=%d detail=%s", + prov_name.capitalize(), cycle_id, exc, + ) + branches_by_provider[prov_name] = [] + + # Phase 2 — process rules × providers in parallel + result = CycleResult(cycle_id=cycle_id) + task_futures = [] + + with ThreadPoolExecutor(max_workers=_MAX_RULE_WORKERS) as pool: + for rule in config.rules: + for prov_name, dest_branch in rule.destinations.items(): + if prov_name not in providers: + logger.debug( + "[Core] Step: process_rule action=skip rule=%s detail=provider %s not active", + rule.pattern, prov_name, + ) + continue + task_futures.append(pool.submit( + _process_rule, + provider=providers[prov_name], + branches=branches_by_provider.get(prov_name, []), + rule=rule, + dest_branch=dest_branch, + dry_run=config.dry_run, + cycle_id=cycle_id, + )) + + for future in as_completed(task_futures): + try: + result.rule_results.append(future.result()) + except Exception as exc: + logger.error("[Core] Step: process_rule action=error cycle_id=%d detail=%s", cycle_id, exc) + + # Aggregate and log cycle summary + total = sum(r.processed for r in result.rule_results) + created = sum(r.created for r in result.rule_results) + skipped = sum(r.skipped_existing for r in result.rule_results) + simulated = sum(r.simulated for r in result.rule_results) + errors = sum(r.errors for r in result.rule_results) + logger.info( + "[Core] Step: scan_cycle action=end cycle_id=%d processed=%d" + " created=%d skipped_existing=%d dry_run=%d errors=%d", + cycle_id, total, created, skipped, simulated, errors, + ) + return result + + +# ------------------------------------------------------------------ +# Helpers +# ------------------------------------------------------------------ + +def _process_rule( + provider: ProviderInterface, + branches: list[str], + rule: ScanRule, + dest_branch: str, + dry_run: bool, + cycle_id: int, +) -> RuleResult: + """Filter branches by rule and create PRs where needed.""" + pname = provider.name.capitalize() + result = RuleResult( + rule_pattern=rule.pattern, + provider=provider.name, + destination=dest_branch, + ) + logger.info( + "[%s] Step: process_rule action=start cycle_id=%d pattern=%s dest=%s", + pname, cycle_id, rule.pattern, dest_branch, + ) + + matched = [ + b for b in branches + if b != dest_branch and rule.compiled.match(b) + ] + + for branch in matched: + result.processed += 1 + try: + if provider.check_existing_pr(branch, dest_branch): + result.skipped_existing += 1 + continue + if dry_run: + logger.info( + "[%s] Step: create_pull_request action=dry_run cycle_id=%d source=%s dest=%s", + pname, cycle_id, branch, dest_branch, + ) + result.simulated += 1 + continue + provider.create_pull_request(branch, dest_branch) + result.created += 1 + except Exception as exc: + logger.error( + "[%s] Step: create_pull_request action=error cycle_id=%d source=%s dest=%s detail=%s", + pname, cycle_id, branch, dest_branch, exc, + ) + result.errors += 1 + + logger.info( + "[%s] Step: process_rule action=end cycle_id=%d pattern=%s dest=%s" + " processed=%d created=%d dry_run=%d skipped=%d errors=%d", + pname, cycle_id, rule.pattern, dest_branch, + result.processed, result.created, result.simulated, + result.skipped_existing, result.errors, + ) + return result diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..32f0ec4 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,49 @@ +"""Shared fixtures for the test suite.""" + +import pytest + + +@pytest.fixture +def github_app_config(): + from pr_generator.models import ProviderConfig + return ProviderConfig( + name="github", + type="github", + enabled=True, + owner="test-owner", + repo="test-repo", + auth_method="app", + app_id="12345", + installation_id="67890", + private_key="fake-pem", + timeout=5.0, + ) + + +@pytest.fixture +def github_pat_config(): + from pr_generator.models import ProviderConfig + return ProviderConfig( + name="github", + type="github", + enabled=True, + owner="test-owner", + repo="test-repo", + auth_method="pat", + token="ghp_testtoken123", + timeout=5.0, + ) + + +@pytest.fixture +def bitbucket_provider_config(): + from pr_generator.models import ProviderConfig + return ProviderConfig( + name="bitbucket", + type="bitbucket", + enabled=True, + workspace="test-workspace", + repo_slug="test-repo", + token="test-token", + timeout=5.0, + ) diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..169784e --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,716 @@ +"""Tests for config loading.""" + +import os +import re +import textwrap +import pytest + + +def _write_config(tmp_path, content: str) -> str: + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(content)) + return str(path) + + +_FAKE_PEM = "-----BEGIN RSA PRIVATE KEY-----\nZmFrZQ==\n-----END RSA PRIVATE KEY-----" + + +class TestLoadFromFile: + def test_single_rule_both_providers(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "bb-token") + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = _write_config(tmp_path, """ + scan_frequency: 60 + log_level: DEBUG + dry_run: true + health_port: 9090 + providers: + github: + enabled: true + owner: my-org + repo: my-repo + app_id: "111" + installation_id: "222" + private_key_path: /nonexistent + timeout: 10 + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + timeout: 15 + rules: + - pattern: "feature/.*" + destinations: + github: main + bitbucket: develop + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + + assert cfg.scan_frequency == 60 + assert cfg.log_level == "DEBUG" + assert cfg.dry_run is True + assert cfg.health_port == 9090 + assert "github" in cfg.providers + assert "bitbucket" in cfg.providers + assert cfg.providers["bitbucket"].token == "bb-token" + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "feature/.*" + assert cfg.rules[0].destinations == {"github": "main", "bitbucket": "develop"} + assert cfg.rules[0].compiled.match("feature/my-branch") + + def test_multiple_rules(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: ".*-nonpro-.*" + destinations: + bitbucket: nonpro + - pattern: ".*-pro-.*" + destinations: + bitbucket: master + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 2 + assert cfg.rules[0].destinations == {"bitbucket": "nonpro"} + assert cfg.rules[1].destinations == {"bitbucket": "master"} + + def test_missing_rules_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("BB_TOKEN_TEST", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: w + repo_slug: r + token_env: BB_TOKEN_TEST + rules: [] + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="no rules"): + load_config() + + def test_invalid_regex_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: "(" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="Invalid regex"): + load_config() + + def test_missing_github_private_key_raises(self, tmp_path, monkeypatch): + """GitHub App provider with no private key should fail at load time.""" + monkeypatch.delenv("GITHUB_APP_PRIVATE_KEY", raising=False) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "1" + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="no private key"): + load_config() + + def test_missing_bitbucket_token_raises(self, tmp_path, monkeypatch): + """Bitbucket provider with empty token env var should fail at load time.""" + monkeypatch.delenv("BB_MISSING_TOKEN", raising=False) + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BB_MISSING_TOKEN + rules: + - pattern: ".*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="BB_MISSING_TOKEN"): + load_config() + + +class TestGitHubPATConfig: + def test_pat_auth_method_from_yaml(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken") + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(""" + providers: + github: + enabled: true + auth_method: pat + owner: my-org + repo: my-repo + token_env: GITHUB_TOKEN + timeout: 10 + rules: + - pattern: "feature/.*" + destinations: + github: main + """)) + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + cfg = load_config() + gh = cfg.providers["github"] + assert gh.auth_method == "pat" + assert gh.token == "ghp_testtoken" + assert gh.app_id == "" + assert gh.private_key == "" + + def test_pat_custom_token_env(self, tmp_path, monkeypatch): + monkeypatch.setenv("MY_GH_TOKEN", "ghp_custom") + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(""" + providers: + github: + enabled: true + auth_method: pat + owner: org + repo: repo + token_env: MY_GH_TOKEN + rules: + - pattern: ".*" + destinations: + github: main + """)) + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].token == "ghp_custom" + + def test_app_auth_method_default(self, tmp_path, monkeypatch): + """auth_method defaults to 'app' when not specified.""" + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(""" + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "111" + installation_id: "222" + rules: + - pattern: ".*" + destinations: + github: main + """)) + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].auth_method == "app" + + def test_log_format_json_from_yaml(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = tmp_path / "config.yaml" + path.write_text(textwrap.dedent(""" + log_format: json + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: ".*" + destinations: + bitbucket: main + """)) + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + assert load_config().log_format == "json" + + +class TestMultiOrgGitHub: + """Tests for multiple GitHub provider instances (different orgs/repos).""" + + def test_two_github_providers_different_orgs(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN_ACME", "ghp_acme") + monkeypatch.setenv("GITHUB_TOKEN_SKW", "ghp_skw") + path = _write_config(tmp_path, """ + providers: + github-acme: + type: github + enabled: true + auth_method: pat + owner: acme-org + repo: backend + token_env: GITHUB_TOKEN_ACME + github-skunkworks: + type: github + enabled: true + auth_method: pat + owner: skunkworks-org + repo: platform + token_env: GITHUB_TOKEN_SKW + rules: + - pattern: "feature/.*" + destinations: + github-acme: main + github-skunkworks: develop + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + + assert set(cfg.providers.keys()) == {"github-acme", "github-skunkworks"} + acme = cfg.providers["github-acme"] + assert acme.type == "github" + assert acme.owner == "acme-org" + assert acme.repo == "backend" + assert acme.token == "ghp_acme" + skw = cfg.providers["github-skunkworks"] + assert skw.type == "github" + assert skw.owner == "skunkworks-org" + assert skw.token == "ghp_skw" + assert cfg.rules[0].destinations == { + "github-acme": "main", + "github-skunkworks": "develop", + } + + def test_named_provider_defaults_type_from_key(self, tmp_path, monkeypatch): + """Key 'github' without explicit type should still work (backward compat).""" + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "1" + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].type == "github" + + def test_named_provider_unknown_type_raises(self, tmp_path, monkeypatch): + """A named provider with an unrecognised type should raise ValueError.""" + path = _write_config(tmp_path, """ + providers: + my-provider: + type: gitlab + enabled: true + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + my-provider: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="unknown or missing type"): + load_config() + + def test_named_provider_missing_type_raises(self, tmp_path, monkeypatch): + """A non-standard provider key without 'type' should raise ValueError.""" + path = _write_config(tmp_path, """ + providers: + my-github-instance: + enabled: true + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + my-github-instance: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="unknown or missing type"): + load_config() + + def test_mixed_github_and_bitbucket_named_providers(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_TOKEN_ORG", "ghp_org") + monkeypatch.setenv("BB_TOKEN", "bb_tok") + path = _write_config(tmp_path, """ + providers: + github-myorg: + type: github + enabled: true + auth_method: pat + owner: my-org + repo: app + token_env: GITHUB_TOKEN_ORG + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BB_TOKEN + rules: + - pattern: "feature/.*" + destinations: + github-myorg: main + bitbucket: develop + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github-myorg"].type == "github" + assert cfg.providers["bitbucket"].type == "bitbucket" + + +class TestConfigValidationEdgeCases: + """Cover validation branches not exercised by the main test classes.""" + + def test_no_enabled_providers_raises(self, tmp_path, monkeypatch): + """All providers disabled → ValueError about no enabled providers.""" + path = _write_config(tmp_path, """ + providers: + github: + enabled: false + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="no enabled providers"): + load_config() + + def test_non_dict_provider_entry_skipped(self, tmp_path, monkeypatch): + """A provider entry that isn't a dict is silently skipped.""" + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bad_entry: "not-a-dict" + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: ".*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert "bitbucket" in cfg.providers + assert "bad_entry" not in cfg.providers + + def test_disabled_provider_not_loaded(self, tmp_path, monkeypatch): + """A provider with enabled: false is excluded from the result.""" + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + github: + enabled: false + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert "github" not in cfg.providers + + def test_github_missing_owner_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + repo: repo + app_id: "1" + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="'owner' and 'repo' are required"): + load_config() + + def test_github_missing_app_id_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", _FAKE_PEM) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + owner: org + repo: repo + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="'app_id' is required"): + load_config() + + def test_github_pat_missing_token_raises(self, tmp_path, monkeypatch): + monkeypatch.delenv("MISSING_GH_PAT", raising=False) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + auth_method: pat + owner: org + repo: repo + token_env: MISSING_GH_PAT + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="MISSING_GH_PAT"): + load_config() + + def test_bitbucket_missing_workspace_raises(self, tmp_path, monkeypatch): + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: ".*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="'workspace' and 'repo_slug' are required"): + load_config() + + def test_private_key_loaded_from_file(self, tmp_path, monkeypatch): + """private_key_path pointing to an existing file loads the key from disk.""" + key_file = tmp_path / "app.pem" + key_file.write_text(_FAKE_PEM) + monkeypatch.delenv("GITHUB_APP_PRIVATE_KEY", raising=False) + path = _write_config(tmp_path, f""" + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "1" + private_key_path: {key_file} + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].private_key == _FAKE_PEM + + def test_private_key_base64_decoded_from_env(self, tmp_path, monkeypatch): + """GITHUB_APP_PRIVATE_KEY as base64 is decoded automatically.""" + import base64 + encoded = base64.b64encode(_FAKE_PEM.encode()).decode() + monkeypatch.setenv("GITHUB_APP_PRIVATE_KEY", encoded) + path = _write_config(tmp_path, """ + providers: + github: + enabled: true + owner: org + repo: repo + app_id: "1" + rules: + - pattern: ".*" + destinations: + github: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert cfg.providers["github"].private_key == _FAKE_PEM + + def test_rule_with_empty_pattern_skipped(self, tmp_path, monkeypatch): + """A rule with no pattern is silently skipped.""" + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: "" + destinations: + bitbucket: main + - pattern: "feature/.*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "feature/.*" + + def test_rule_with_no_destinations_skipped(self, tmp_path, monkeypatch): + """A rule with empty destinations is silently skipped.""" + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: "feature/.*" + destinations: {} + - pattern: "release/.*" + destinations: + bitbucket: main + """) + monkeypatch.setenv("CONFIG_PATH", path) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "release/.*" + + +class TestNullYamlValues: + """Regression tests for null/empty YAML values that previously caused AttributeError.""" + + def _base_config(self, tmp_path, monkeypatch, content: str) -> str: + monkeypatch.setenv("BITBUCKET_TOKEN", "tok") + path = _write_config(tmp_path, content) + monkeypatch.setenv("CONFIG_PATH", path) + return path + + def test_empty_yaml_file_raises(self, tmp_path, monkeypatch): + """An empty YAML file must raise ValueError, not AttributeError.""" + path = tmp_path / "config.yaml" + path.write_text("") + monkeypatch.setenv("CONFIG_PATH", str(path)) + from pr_generator.config import load_config + with pytest.raises(ValueError): + load_config() + + def test_null_providers_section_raises(self, tmp_path, monkeypatch): + """providers: with no value (null) must raise ValueError, not AttributeError.""" + self._base_config(tmp_path, monkeypatch, """ + providers: + rules: + - pattern: "feature/.*" + destinations: + bitbucket: main + """) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="no enabled providers"): + load_config() + + def test_null_rules_section_raises(self, tmp_path, monkeypatch): + """rules: with no value (null) must raise ValueError, not AttributeError.""" + self._base_config(tmp_path, monkeypatch, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + """) + from pr_generator.config import load_config + with pytest.raises(ValueError, match="no rules"): + load_config() + + def test_null_destinations_in_rule_skipped(self, tmp_path, monkeypatch): + """destinations: with no value (null) must be treated as empty and skipped.""" + self._base_config(tmp_path, monkeypatch, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - pattern: "feature/.*" + destinations: + - pattern: "release/.*" + destinations: + bitbucket: main + """) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "release/.*" + + def test_null_rule_item_skipped(self, tmp_path, monkeypatch): + """A null entry in the rules list must be skipped, not crash.""" + self._base_config(tmp_path, monkeypatch, """ + providers: + bitbucket: + enabled: true + workspace: ws + repo_slug: rs + token_env: BITBUCKET_TOKEN + rules: + - + - pattern: "release/.*" + destinations: + bitbucket: main + """) + from pr_generator.config import load_config + cfg = load_config() + assert len(cfg.rules) == 1 + assert cfg.rules[0].pattern == "release/.*" + + def test_config_file_not_found_raises(self, monkeypatch): + """Missing config file must raise FileNotFoundError.""" + monkeypatch.setenv("CONFIG_PATH", "/nonexistent/path/config.yaml") + from pr_generator.config import load_config + with pytest.raises(FileNotFoundError): + load_config() + diff --git a/tests/test_health.py b/tests/test_health.py new file mode 100644 index 0000000..35fccd1 --- /dev/null +++ b/tests/test_health.py @@ -0,0 +1,58 @@ +"""Tests for the health server.""" + +import time +import urllib.request +from threading import Event + +import pytest + +from pr_generator.health import start_health_server + +_PORT = 18081 + + +@pytest.fixture(scope="module") +def health_server(): + stop = Event() + server, ready = start_health_server(_PORT, stop) + time.sleep(0.1) + yield stop, ready + stop.set() + server.shutdown() + + +def _get(path: str) -> int: + try: + resp = urllib.request.urlopen(f"http://127.0.0.1:{_PORT}{path}", timeout=2) + return resp.status + except urllib.error.HTTPError as exc: + return exc.code + + +class TestHealthServer: + def test_livez_returns_200(self, health_server): + stop, _ready = health_server + assert _get("/livez") == 200 + + def test_healthz_alias(self, health_server): + stop, _ready = health_server + assert _get("/healthz") == 200 + + def test_readyz_returns_503_before_ready(self, health_server): + stop, ready = health_server + ready.clear() + assert _get("/readyz") == 503 + + def test_readyz_returns_200_after_ready(self, health_server): + stop, ready = health_server + ready.set() + assert _get("/readyz") == 200 + + def test_livez_returns_503_when_stopping(self, health_server): + stop, _ready = health_server + stop.set() + assert _get("/livez") == 503 + stop.clear() # reset for other tests + + def test_unknown_path_returns_404(self, health_server): + assert _get("/unknown") == 404 diff --git a/tests/test_http_client.py b/tests/test_http_client.py new file mode 100644 index 0000000..0ac7115 --- /dev/null +++ b/tests/test_http_client.py @@ -0,0 +1,217 @@ +"""Tests for the shared HTTP client with retry/backoff logic.""" + +from __future__ import annotations + +import logging +from unittest.mock import MagicMock, call, patch + +import pytest +import requests + +from pr_generator.http_client import request_with_retry + + +class _TestError(Exception): + """Stub provider exception used in tests.""" + + def __init__(self, message: str, status_code: int | None = None) -> None: + super().__init__(message) + self.status_code = status_code + + +_logger = logging.getLogger("test_http_client") + + +def _make_response(status_code: int, json_data=None, text: str = ""): + resp = MagicMock() + resp.status_code = status_code + resp.text = text + resp.json.return_value = json_data or {} + return resp + + +class TestRequestWithRetrySuccess: + def test_returns_response_on_200(self): + with patch("requests.request", return_value=_make_response(200, {"ok": True})) as mock_req: + resp = request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com/api", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers={"Authorization": "Bearer tok"}, + ) + assert resp.json() == {"ok": True} + mock_req.assert_called_once() + + def test_uses_headers_factory_per_attempt(self): + call_count = 0 + + def factory(): + nonlocal call_count + call_count += 1 + return {"X-Attempt": str(call_count)} + + with patch("requests.request", return_value=_make_response(200)): + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers_factory=factory, + ) + assert call_count == 1 + + def test_raises_if_neither_headers_nor_factory(self): + with pytest.raises(ValueError, match="Provide either headers"): + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + ) + + +class TestRequestWithRetryHttpErrors: + def test_raises_provider_exception_on_4xx(self): + with patch("requests.request", return_value=_make_response(404, text="not found")): + with pytest.raises(_TestError) as exc_info: + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers={}, + ) + assert exc_info.value.status_code == 404 + assert "404" in str(exc_info.value) + + def test_exception_carries_status_code(self): + with patch("requests.request", return_value=_make_response(422, text="unprocessable")): + with pytest.raises(_TestError) as exc_info: + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers={}, + ) + assert exc_info.value.status_code == 422 + + def test_retries_on_500_then_succeeds(self): + responses = [_make_response(500), _make_response(200, {"ok": True})] + with patch("requests.request", side_effect=responses): + with patch("time.sleep"): # skip actual backoff delays + resp = request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: s is not None and s >= 500, + headers={}, + ) + assert resp.json() == {"ok": True} + + def test_raises_after_exhausting_all_retries(self): + """All 4 attempts return 503 → should raise with the last error.""" + with patch("requests.request", return_value=_make_response(503)): + with patch("time.sleep"): + with pytest.raises(_TestError) as exc_info: + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: s is not None and s >= 500, + headers={}, + ) + assert exc_info.value.status_code == 503 + + def test_no_retry_on_4xx(self): + """4xx errors should NOT be retried — only one HTTP call made.""" + with patch("requests.request", return_value=_make_response(400)) as mock_req: + with pytest.raises(_TestError): + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: s is not None and s >= 500, + headers={}, + ) + assert mock_req.call_count == 1 + + +class TestRequestWithRetryNetworkErrors: + def test_raises_on_network_exception(self): + with patch("requests.request", side_effect=requests.ConnectionError("refused")): + with pytest.raises(_TestError) as exc_info: + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: False, + headers={}, + ) + assert exc_info.value.status_code is None + assert "Request failure" in str(exc_info.value) + + def test_retries_on_network_exception_then_succeeds(self): + responses = [ + requests.ConnectionError("refused"), + _make_response(200, {"ok": True}), + ] + with patch("requests.request", side_effect=responses): + with patch("time.sleep"): + resp = request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: e is not None, + headers={}, + ) + assert resp.json() == {"ok": True} + + def test_backoff_delays_are_applied(self): + """All 4 attempts fail → sleep called 3 times with backoff delays.""" + with patch("requests.request", side_effect=requests.ConnectionError("x")): + with patch("time.sleep") as mock_sleep: + with pytest.raises(_TestError): + request_with_retry( + logger=_logger, + client_name="Test", + method="GET", + url="http://example.com", + timeout=5, + exception_cls=_TestError, + should_retry=lambda s, e: True, + headers={}, + ) + assert mock_sleep.call_count == 3 + assert mock_sleep.call_args_list == [call(0.5), call(1), call(2)] diff --git a/tests/test_logging_config.py b/tests/test_logging_config.py new file mode 100644 index 0000000..6241e68 --- /dev/null +++ b/tests/test_logging_config.py @@ -0,0 +1,75 @@ +"""Tests for logging setup.""" + +import json +import logging + +from pr_generator.logging_config import setup_logging + + +class TestSetupLogging: + def test_text_format_sets_level(self): + setup_logging("DEBUG") + assert logging.getLogger().level == logging.DEBUG + + def test_info_level(self): + setup_logging("INFO") + assert logging.getLogger().level == logging.INFO + + def test_invalid_level_falls_back_to_info(self): + setup_logging("NOTAREAL") + assert logging.getLogger().level == logging.INFO + + def test_text_format_is_plain_formatter(self): + setup_logging("INFO", json_format=False) + root = logging.getLogger() + assert len(root.handlers) == 1 + assert not isinstance(root.handlers[0].formatter, logging.Formatter.__class__) + + def test_json_format_emits_valid_json(self): + setup_logging("INFO", json_format=True) + root = logging.getLogger() + formatter = root.handlers[0].formatter + record = logging.LogRecord( + name="test", level=logging.INFO, pathname="", lineno=0, + msg="hello %s", args=("world",), exc_info=None, + ) + output = formatter.format(record) + parsed = json.loads(output) + assert parsed["message"] == "hello world" + assert parsed["level"] == "INFO" + assert "timestamp" in parsed + + def test_json_format_includes_exception(self): + setup_logging("INFO", json_format=True) + formatter = logging.getLogger().handlers[0].formatter + try: + raise ValueError("boom") + except ValueError: + import sys + exc_info = sys.exc_info() + record = logging.LogRecord( + name="test", level=logging.ERROR, pathname="", lineno=0, + msg="err", args=(), exc_info=exc_info, + ) + output = json.loads(formatter.format(record)) + assert "exception" in output + assert "ValueError" in output["exception"] + + def test_replaces_existing_handlers(self): + root = logging.getLogger() + root.addHandler(logging.NullHandler()) + initial_count = len(root.handlers) + setup_logging("INFO") + assert len(root.handlers) == 1 + + def test_json_format_includes_stack_info(self): + setup_logging("INFO", json_format=True) + formatter = logging.getLogger().handlers[0].formatter + record = logging.LogRecord( + name="test", level=logging.WARNING, pathname="", lineno=0, + msg="with stack", args=(), exc_info=None, + ) + record.stack_info = "Stack (most recent call last):\n File 'x.py', line 1" + output = json.loads(formatter.format(record)) + assert "stack_info" in output + assert "most recent" in output["stack_info"] diff --git a/tests/test_models.py b/tests/test_models.py new file mode 100644 index 0000000..44f0be5 --- /dev/null +++ b/tests/test_models.py @@ -0,0 +1,50 @@ +"""Tests for models and data structures.""" + +import re +import pytest +from pr_generator.models import ( + AppConfig, CycleResult, ProviderConfig, RuleResult, ScanRule, +) + + +def test_provider_config_is_immutable(): + cfg = ProviderConfig(name="github", enabled=True) + with pytest.raises(Exception): + cfg.name = "bitbucket" # type: ignore[misc] + + +def test_app_config_is_immutable(): + cfg = AppConfig( + scan_frequency=300, + log_level="INFO", + log_format="text", + dry_run=False, + health_port=8080, + providers={}, + rules=[], + ) + with pytest.raises(Exception): + cfg.dry_run = True # type: ignore[misc] + + +def test_scan_rule_destinations_default_empty(): + rule = ScanRule(pattern=".*", compiled=re.compile(".*")) + assert rule.destinations == {} + + +def test_rule_result_defaults(): + r = RuleResult(rule_pattern="x", provider="github", destination="main") + assert r.processed == 0 + assert r.created == 0 + assert r.skipped_existing == 0 + assert r.simulated == 0 + assert r.errors == 0 + + +def test_cycle_result_aggregation(): + r1 = RuleResult("p1", "github", "main", processed=3, created=1, skipped_existing=2) + r2 = RuleResult("p2", "bitbucket", "nonpro", processed=5, created=3, errors=1) + cycle = CycleResult(cycle_id=1, rule_results=[r1, r2]) + assert sum(r.processed for r in cycle.rule_results) == 8 + assert sum(r.created for r in cycle.rule_results) == 4 + assert sum(r.errors for r in cycle.rule_results) == 1 diff --git a/tests/test_providers.py b/tests/test_providers.py new file mode 100644 index 0000000..294526e --- /dev/null +++ b/tests/test_providers.py @@ -0,0 +1,411 @@ +"""Unit tests for GitHub and Bitbucket provider implementations.""" + +from __future__ import annotations + +import time +from unittest.mock import MagicMock, patch + +import pytest + +from pr_generator.providers.bitbucket import BitbucketError, BitbucketProvider +from pr_generator.providers.github import GitHubError, GitHubProvider + + +# ────────────────────────────────────────────────────────── +# Helpers +# ────────────────────────────────────────────────────────── + +def _mock_response(status_code: int = 200, json_data=None, text: str = ""): + resp = MagicMock() + resp.status_code = status_code + resp.text = text + resp.json.return_value = json_data if json_data is not None else {} + return resp + + +# ────────────────────────────────────────────────────────── +# GitHub App auth — token caching and JWT logic +# ────────────────────────────────────────────────────────── + +class TestGitHubAppAuth: + """Tests for GitHub App JWT and installation token caching.""" + + @pytest.fixture + def provider(self, github_app_config): + return GitHubProvider(github_app_config) + + def test_get_jwt_raises_without_credentials(self, provider): + provider._app_id = "" + with pytest.raises(RuntimeError, match="Missing GITHUB_APP_ID"): + provider._new_jwt() + + def test_get_jwt_cached_within_window(self, provider): + provider._jwt_cache = "cached-jwt" + provider._jwt_exp = time.time() + 300 # well within expiry + + with patch.object(provider, "_new_jwt") as mock_new_jwt: + result = provider._get_jwt() + + mock_new_jwt.assert_not_called() + assert result == "cached-jwt" + + def test_get_jwt_refreshed_when_expired(self, provider): + provider._jwt_cache = "old-jwt" + provider._jwt_exp = time.time() - 1 # already expired + + with patch.object(provider, "_new_jwt", return_value="new-jwt"): + result = provider._get_jwt() + + assert result == "new-jwt" + + def test_get_installation_token_cached(self, provider): + provider._install_token = "cached-token" + provider._install_token_exp = time.time() + 300 + + with patch.object(provider, "_request") as mock_req: + result = provider._get_installation_token() + + mock_req.assert_not_called() + assert result == "cached-token" + + def test_get_installation_token_fetched_when_missing(self, provider): + install_resp = _mock_response(201, { + "token": "ghs_fresh_token", + "expires_at": "2099-01-01T00:00:00Z", + }) + with patch.object(provider, "_request", return_value=install_resp): + result = provider._get_installation_token() + + assert result == "ghs_fresh_token" + assert provider._install_token == "ghs_fresh_token" + + def test_get_installation_token_uses_55min_default_on_bad_expiry(self, provider): + install_resp = _mock_response(201, {"token": "ghs_tok", "expires_at": "not-a-date"}) + before = time.time() + with patch.object(provider, "_request", return_value=install_resp): + provider._get_installation_token() + after = time.time() + + # 55 min default: expiry should be ~3300 seconds from now + assert 3290 < provider._install_token_exp - before < 3310 + (after - before) + + def test_resolve_installation_id_uses_config_value(self, provider): + """When installation_id is provided in config, no API call is made.""" + assert provider._installation_id == "67890" + with patch.object(provider, "_request") as mock_req: + result = provider._resolve_installation_id() + mock_req.assert_not_called() + assert result == "67890" + + def test_resolve_installation_id_fetches_and_caches_when_missing(self, provider): + """When installation_id is absent, it is fetched from the API and cached.""" + provider._installation_id = "" + api_resp = _mock_response(200, {"id": 99999}) + + with patch.object(provider, "_request", return_value=api_resp) as mock_req: + result1 = provider._resolve_installation_id() + # Second call should use cached value — no extra API call + result2 = provider._resolve_installation_id() + + assert result1 == "99999" + assert result2 == "99999" + assert provider._installation_id == "99999" # cached on instance + assert mock_req.call_count == 1 # only one API call total + + def test_resolve_installation_id_raises_when_api_returns_no_id(self, provider): + provider._installation_id = "" + with patch.object(provider, "_request", return_value=_mock_response(200, {})): + with pytest.raises(RuntimeError, match="Could not resolve installation id"): + provider._resolve_installation_id() + + def test_headers_use_installation_token_for_app_auth(self, provider): + with patch.object(provider, "_get_installation_token", return_value="ghs_tok"): + hdrs = provider._headers(installation=True) + assert hdrs["Authorization"] == "Bearer ghs_tok" + + def test_headers_use_jwt_for_non_installation_calls(self, provider): + with patch.object(provider, "_get_jwt", return_value="jwt.token.here"): + hdrs = provider._headers(installation=False) + assert hdrs["Authorization"] == "Bearer jwt.token.here" + + def test_new_jwt_generates_token_with_valid_credentials(self, provider): + with patch("pr_generator.providers.github.jwt.encode", return_value="signed.jwt") as mock_enc: + result = provider._new_jwt() + assert result == "signed.jwt" + call_payload = mock_enc.call_args[0][0] + assert call_payload["iss"] == "12345" + assert "iat" in call_payload and "exp" in call_payload + + def test_get_branches_returns_empty_when_app_config_incomplete(self): + from pr_generator.models import ProviderConfig + cfg = ProviderConfig( + name="github", type="github", enabled=True, + auth_method="app", owner="org", repo="repo", + app_id="", private_key="", # missing credentials + ) + prov = GitHubProvider(cfg) + assert prov.get_branches() == [] + + +# ────────────────────────────────────────────────────────── +# GitHub PAT provider (simpler — no token caching) +# ────────────────────────────────────────────────────────── + +class TestGitHubProviderPAT: + """Tests for GitHub provider using PAT authentication.""" + + @pytest.fixture + def provider(self, github_pat_config): + return GitHubProvider(github_pat_config) + + def test_name_matches_config(self, provider): + assert provider.name == "github" + + def test_get_branches_single_page(self, provider): + page_data = [{"name": "main"}, {"name": "feature/x"}] + with patch.object(provider, "_request", return_value=_mock_response(200, page_data)): + branches = provider.get_branches() + assert branches == ["main", "feature/x"] + + def test_get_branches_empty(self, provider): + with patch.object(provider, "_request", return_value=_mock_response(200, [])): + branches = provider.get_branches() + assert branches == [] + + def test_get_branches_multi_page(self, provider): + """Two pages: first returns 100 items (triggers next page), second returns 2.""" + page1 = [{"name": f"branch-{i}"} for i in range(100)] + page2 = [{"name": "extra-1"}, {"name": "extra-2"}] + responses = iter([_mock_response(200, page1), _mock_response(200, page2)]) + with patch.object(provider, "_request", side_effect=lambda *a, **kw: next(responses)): + branches = provider.get_branches() + assert len(branches) == 102 + + def test_check_existing_pr_found(self, provider): + pr_list = [{"number": 1, "title": "Merge feature/x into main"}] + with patch.object(provider, "_request", return_value=_mock_response(200, pr_list)): + assert provider.check_existing_pr("feature/x", "main") is True + + def test_check_existing_pr_not_found(self, provider): + with patch.object(provider, "_request", return_value=_mock_response(200, [])): + assert provider.check_existing_pr("feature/x", "main") is False + + def test_check_existing_pr_uses_cache(self, provider): + """Second call with same args should not make an HTTP request.""" + with patch.object(provider, "_request", return_value=_mock_response(200, [])) as mock_req: + provider.check_existing_pr("feature/x", "main") + provider.check_existing_pr("feature/x", "main") + assert mock_req.call_count == 1 + + def test_reset_cycle_cache_clears_pr_cache(self, provider): + with patch.object(provider, "_request", return_value=_mock_response(200, [])) as mock_req: + provider.check_existing_pr("feature/x", "main") + provider.reset_cycle_cache() + provider.check_existing_pr("feature/x", "main") + assert mock_req.call_count == 2 + + def test_create_pull_request_success(self, provider): + pr_resp = {"number": 42, "title": "Merge feature/x into main"} + with patch.object(provider, "_branch_exists", return_value=True), \ + patch.object(provider, "_request", return_value=_mock_response(201, pr_resp)): + provider.create_pull_request("feature/x", "main") + assert provider._pr_cache[("feature/x", "main")] is True + + def test_create_pull_request_skips_missing_branch(self, provider): + with patch.object(provider, "_branch_exists", return_value=False), \ + patch.object(provider, "_request") as mock_req: + provider.create_pull_request("feature/gone", "main") + mock_req.assert_not_called() + + def test_branch_exists_returns_true(self, provider): + with patch.object(provider, "_request", return_value=_mock_response(200, {"name": "feature/x"})): + assert provider._branch_exists("feature/x") is True + + def test_branch_exists_returns_false_on_404(self, provider): + err = GitHubError("GitHub API error 404: not found", status_code=404) + with patch.object(provider, "_request", side_effect=err): + assert provider._branch_exists("feature/gone") is False + + def test_branch_exists_reraises_non_404(self, provider): + err = GitHubError("GitHub API error 500: server error", status_code=500) + with patch.object(provider, "_request", side_effect=err): + with pytest.raises(GitHubError): + provider._branch_exists("feature/x") + + def test_headers_use_pat(self, provider): + hdrs = provider._headers() + assert hdrs["Authorization"] == "token ghp_testtoken123" + + def test_get_branches_returns_empty_when_config_incomplete(self, github_pat_config): + from pr_generator.models import ProviderConfig + cfg = ProviderConfig( + name="github", type="github", enabled=True, + auth_method="pat", owner="", repo="", token="", + ) + prov = GitHubProvider(cfg) + assert prov.get_branches() == [] + + def test_should_retry_true_on_exception(self, provider): + assert provider._should_retry(None, RuntimeError("conn error")) is True + + def test_should_retry_true_on_5xx(self, provider): + assert provider._should_retry(503, None) is True + + def test_should_retry_true_on_429(self, provider): + assert provider._should_retry(429, None) is True + + def test_should_retry_false_on_4xx(self, provider): + assert provider._should_retry(404, None) is False + + def test_branch_exists_uses_cache(self, provider): + provider._branch_cache["feature/cached"] = True + with patch.object(provider, "_request") as mock_req: + result = provider._branch_exists("feature/cached") + mock_req.assert_not_called() + assert result is True + + def test_request_delegates_to_retry_client(self, provider): + """_request must call request_with_retry (exercises the method body).""" + with patch("pr_generator.providers.github.request_with_retry", return_value=_mock_response(200)) as mock_retry: + provider._request("GET", "https://api.github.com/repos/org/repo/branches") + mock_retry.assert_called_once() + call_kw = mock_retry.call_args.kwargs + assert call_kw["method"] == "GET" + assert call_kw["exception_cls"] is GitHubError + + +# ────────────────────────────────────────────────────────── +# Bitbucket provider +# ────────────────────────────────────────────────────────── + +class TestBitbucketProvider: + """Tests for Bitbucket Cloud provider.""" + + @pytest.fixture + def provider(self, bitbucket_provider_config): + return BitbucketProvider(bitbucket_provider_config) + + def test_name_matches_config(self, provider): + assert provider.name == "bitbucket" + + def test_get_branches_single_page(self, provider): + data = {"values": [{"name": "main"}, {"name": "feature/y"}]} + with patch.object(provider, "_request", return_value=_mock_response(200, data)): + branches = provider.get_branches() + assert branches == ["main", "feature/y"] + + def test_get_branches_multi_page(self, provider): + """Uses 'next' key to determine pagination.""" + page1 = {"values": [{"name": "a"}, {"name": "b"}], "next": "http://page2"} + page2 = {"values": [{"name": "c"}]} + responses = iter([_mock_response(200, page1), _mock_response(200, page2)]) + with patch.object(provider, "_request", side_effect=lambda *a, **kw: next(responses)): + branches = provider.get_branches() + assert branches == ["a", "b", "c"] + + def test_get_branches_missing_token_returns_empty(self, bitbucket_provider_config): + from pr_generator.models import ProviderConfig + cfg = ProviderConfig( + name="bitbucket", type="bitbucket", enabled=True, + workspace="ws", repo_slug="rs", token="", + ) + prov = BitbucketProvider(cfg) + assert prov.get_branches() == [] + + def test_check_existing_pr_found(self, provider): + data = {"values": [{"id": 1}]} + with patch.object(provider, "_request", return_value=_mock_response(200, data)): + assert provider.check_existing_pr("feature/y", "main") is True + + def test_check_existing_pr_not_found(self, provider): + data = {"values": []} + with patch.object(provider, "_request", return_value=_mock_response(200, data)): + assert provider.check_existing_pr("feature/y", "main") is False + + def test_check_existing_pr_uses_query_filter(self, provider): + """Verify the q param is sent (efficient single-request lookup).""" + data = {"values": []} + with patch.object(provider, "_request", return_value=_mock_response(200, data)) as mock_req: + provider.check_existing_pr("feature/y", "main") + call_kwargs = mock_req.call_args + params = call_kwargs.kwargs.get("params", {}) + assert "q" in params + assert 'source.branch.name="feature/y"' in params["q"] + assert 'destination.branch.name="main"' in params["q"] + assert params.get("pagelen") == 1 + + def test_check_existing_pr_uses_cache(self, provider): + data = {"values": []} + with patch.object(provider, "_request", return_value=_mock_response(200, data)) as mock_req: + provider.check_existing_pr("feature/y", "main") + provider.check_existing_pr("feature/y", "main") + assert mock_req.call_count == 1 + + def test_reset_cycle_cache_clears_pr_cache(self, provider): + data = {"values": []} + with patch.object(provider, "_request", return_value=_mock_response(200, data)) as mock_req: + provider.check_existing_pr("feature/y", "main") + provider.reset_cycle_cache() + provider.check_existing_pr("feature/y", "main") + assert mock_req.call_count == 2 + + def test_create_pull_request_success(self, provider): + reviewers_data = {"values": [{"uuid": "{abc-123}"}]} + pr_data = {"id": 10, "title": "Merge feature/y into main"} + responses = iter([ + _mock_response(200, reviewers_data), + _mock_response(201, pr_data), + ]) + with patch.object(provider, "_request", side_effect=lambda *a, **kw: next(responses)): + provider.create_pull_request("feature/y", "main") + assert provider._pr_cache[("feature/y", "main")] is True + + def test_create_pull_request_includes_close_source_branch(self, provider): + """close_source_branch from config must appear in the POST payload.""" + reviewers_data = {"values": []} + pr_data = {"id": 11} + responses = iter([_mock_response(200, reviewers_data), _mock_response(201, pr_data)]) + with patch.object(provider, "_request", side_effect=lambda *a, **kw: next(responses)) as mock_req: + provider.create_pull_request("feature/y", "main") + pr_call = list(mock_req.call_args_list)[-1] + payload = pr_call.kwargs.get("json", {}) + assert "close_source_branch" in payload + assert payload["close_source_branch"] is True + + def test_bitbucket_error_carries_status_code(self): + """BitbucketError.__init__ must store the status_code attribute.""" + err = BitbucketError("boom", status_code=503) + assert str(err) == "boom" + assert err.status_code == 503 + + def test_bitbucket_error_defaults_status_code_to_none(self): + err = BitbucketError("network error") + assert err.status_code is None + + def test_should_retry_returns_true_on_exception(self, provider): + assert provider._should_retry(None, exc=ValueError("timeout")) is True + + def test_should_retry_returns_true_on_5xx(self, provider): + assert provider._should_retry(503, exc=None) is True + + def test_should_retry_returns_false_on_4xx(self, provider): + assert provider._should_retry(404, exc=None) is False + + def test_get_default_reviewers_returns_empty_on_missing_config(self): + from pr_generator.models import ProviderConfig + cfg = ProviderConfig( + name="bitbucket", type="bitbucket", enabled=True, + token="", workspace="", repo_slug="", + ) + prov = BitbucketProvider(cfg) + result = prov._get_default_reviewers() + assert result == [] + + def test_request_delegates_to_retry_client(self, provider): + """_request must call request_with_retry (exercises the method body).""" + with patch("pr_generator.providers.bitbucket.request_with_retry", return_value=_mock_response(200)) as mock_retry: + provider._request("GET", "https://api.bitbucket.org/2.0/repos/ws/r") + mock_retry.assert_called_once() + call_kw = mock_retry.call_args.kwargs + assert call_kw["method"] == "GET" + assert call_kw["exception_cls"] is BitbucketError diff --git a/tests/test_scanner.py b/tests/test_scanner.py new file mode 100644 index 0000000..1a2ebcd --- /dev/null +++ b/tests/test_scanner.py @@ -0,0 +1,182 @@ +"""Tests for the scan cycle orchestrator.""" + +import re +from unittest.mock import MagicMock, patch + +import pytest + +from pr_generator.models import AppConfig, CycleResult, ProviderConfig, ScanRule +from pr_generator.scanner import scan_cycle + + +def _make_config(rules, providers, dry_run=False): + return AppConfig( + scan_frequency=60, + log_level="DEBUG", + log_format="text", + dry_run=dry_run, + health_port=8080, + providers=providers, + rules=rules, + ) + + +def _mock_provider(name: str, branches: list[str], existing_prs: set | None = None): + prov = MagicMock() + prov.name = name + prov.get_branches.return_value = branches + prov.check_existing_pr.side_effect = lambda src, dst: (src, dst) in (existing_prs or set()) + prov.create_pull_request.return_value = None + prov.reset_cycle_cache.return_value = None + return prov + + +class TestScanCycle: + def test_creates_prs_for_matched_branches(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["feature/a", "feature/b", "hotfix/c", "main"]) + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + assert prov.create_pull_request.call_count == 2 + prov.create_pull_request.assert_any_call("feature/a", "main") + prov.create_pull_request.assert_any_call("feature/b", "main") + assert result.rule_results[0].created == 2 + assert result.rule_results[0].processed == 2 + + def test_skips_existing_prs(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["feature/a"], existing_prs={("feature/a", "main")}) + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + prov.create_pull_request.assert_not_called() + assert result.rule_results[0].skipped_existing == 1 + + def test_dry_run_does_not_create_prs(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["feature/a"]) + config = _make_config([rule], {"github": MagicMock()}, dry_run=True) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + prov.create_pull_request.assert_not_called() + assert result.rule_results[0].simulated == 1 + + def test_destination_branch_excluded_from_matches(self): + rule = ScanRule( + pattern=".*", + compiled=re.compile(".*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["main", "feature/a"]) + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + # "main" must be excluded; only "feature/a" should be processed + assert result.rule_results[0].processed == 1 + + def test_multiple_rules_processed(self): + rule1 = ScanRule( + pattern="nonpro/.*", + compiled=re.compile("nonpro/.*"), + destinations={"github": "develop"}, + ) + rule2 = ScanRule( + pattern="pro/.*", + compiled=re.compile("pro/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["nonpro/svc1", "pro/svc2", "unrelated"]) + config = _make_config([rule1, rule2], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + assert len(result.rule_results) == 2 + total_created = sum(r.created for r in result.rule_results) + assert total_created == 2 + + def test_provider_error_does_not_abort_other_rules(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", ["feature/a"]) + prov.create_pull_request.side_effect = RuntimeError("API down") + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + assert result.rule_results[0].errors == 1 + + def test_unknown_provider_in_rule_is_skipped(self): + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"unknown_provider": "main"}, + ) + prov = _mock_provider("github", ["feature/a"]) + config = _make_config([rule], {"github": MagicMock()}) + + # Should not raise; the rule simply has no matching active provider + result = scan_cycle(config, {"github": prov}, cycle_id=1) + assert result.rule_results == [] + + def test_reset_cycle_cache_called_on_all_providers(self): + rule = ScanRule(".*", re.compile(".*"), destinations={"github": "main"}) + prov = _mock_provider("github", []) + config = _make_config([rule], {"github": MagicMock()}) + + scan_cycle(config, {"github": prov}, cycle_id=1) + + prov.reset_cycle_cache.assert_called_once() + + def test_get_branches_error_returns_empty_branch_list(self): + """If get_branches raises, the provider gets an empty branch list (no crash).""" + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + prov = _mock_provider("github", []) + prov.get_branches.side_effect = RuntimeError("API down") + config = _make_config([rule], {"github": MagicMock()}) + + result = scan_cycle(config, {"github": prov}, cycle_id=1) + + # No branches → no PRs created, but no exception raised either + assert result.rule_results[0].processed == 0 + + def test_process_rule_unexpected_exception_logged(self): + """An exception raised outside _process_rule's inner try is caught by the futures loop.""" + rule = ScanRule( + pattern="feature/.*", + compiled=re.compile("feature/.*"), + destinations={"github": "main"}, + ) + # Make compiled.match raise — this happens outside the inner try in _process_rule + rule.compiled = MagicMock() + rule.compiled.match.side_effect = ValueError("regex engine failure") + prov = _mock_provider("github", ["feature/a"]) + config = _make_config([rule], {"github": MagicMock()}) + + # scan_cycle should catch the exception and not propagate it + result = scan_cycle(config, {"github": prov}, cycle_id=1) + assert result is not None + assert result.rule_results == [] From a27dc6311f6b2b0839683ed1721cf113fc72eac6 Mon Sep 17 00:00:00 2001 From: Adrian Martin Garcia Date: Wed, 25 Mar 2026 15:59:34 +0100 Subject: [PATCH 2/2] feat: upload code --- .github/workflows/docker-build.yml | 1 - README.md | 2 +- src/pr_generator/__main__.py | 3 +-- src/pr_generator/config.py | 2 +- tests/test_config.py | 16 ++++++++-------- 5 files changed, 11 insertions(+), 13 deletions(-) diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml index b9bcaef..18a1a1f 100644 --- a/.github/workflows/docker-build.yml +++ b/.github/workflows/docker-build.yml @@ -84,7 +84,6 @@ jobs: org.opencontainers.image.vendor=devops-ia tags: | type=raw,value=${{ steps.semantic.outputs.new_release_git_tag }} - type=raw,value=latest - name: Set up QEMU if: steps.semantic.outputs.new_release_published == 'true' diff --git a/README.md b/README.md index 5ca1d39..25643a0 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ rules: | `dry_run` | bool | `false` | Simulate PR creation without API calls | | `health_port` | int | `8080` | Port for health HTTP server | | `providers..type` | string | *(key name)* | Provider implementation: `github` or `bitbucket`. Required when the key name is not `github` or `bitbucket` | -| `providers..enabled` | bool | — | Activate this provider instance | +| `providers..enabled` | bool | `false` | Activate this provider instance. If no providers are enabled the application starts in **idle mode** — it logs a warning and keeps running without performing any scans | | `providers..owner` | string | — | GitHub organisation or user *(GitHub only)* | | `providers..repo` | string | — | Repository name *(GitHub only)* | | `providers..app_id` | string | — | GitHub App ID *(GitHub App auth)* | diff --git a/src/pr_generator/__main__.py b/src/pr_generator/__main__.py index 20849c7..b1e5ebc 100644 --- a/src/pr_generator/__main__.py +++ b/src/pr_generator/__main__.py @@ -57,8 +57,7 @@ def main() -> None: logger.warning("[Core] Unknown provider type '%s' for '%s'; skipping.", pconf.type, pname) if not providers: - logger.error("[Core] Step: startup action=error detail=No active providers configured.") - sys.exit(1) + logger.warning("[Core] Step: startup action=warn detail=No active providers configured; running in idle mode") # Graceful shutdown stop = Event() diff --git a/src/pr_generator/config.py b/src/pr_generator/config.py index a7f0379..587ec42 100644 --- a/src/pr_generator/config.py +++ b/src/pr_generator/config.py @@ -47,7 +47,7 @@ def _load_from_file(path: str) -> AppConfig: if not rules: raise ValueError("[Core] config.yaml has no rules defined.") if not providers: - raise ValueError("[Core] config.yaml has no enabled providers.") + logger.info("[Core] Step: load_config action=warn detail=no enabled providers configured; running in idle mode") config = AppConfig( scan_frequency=int(raw.get("scan_frequency", 300)), diff --git a/tests/test_config.py b/tests/test_config.py index 169784e..15b2cf4 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -389,8 +389,8 @@ def test_mixed_github_and_bitbucket_named_providers(self, tmp_path, monkeypatch) class TestConfigValidationEdgeCases: """Cover validation branches not exercised by the main test classes.""" - def test_no_enabled_providers_raises(self, tmp_path, monkeypatch): - """All providers disabled → ValueError about no enabled providers.""" + def test_no_enabled_providers_loads_idle(self, tmp_path, monkeypatch): + """All providers disabled → app loads successfully in idle mode (no ValueError).""" path = _write_config(tmp_path, """ providers: github: @@ -404,8 +404,8 @@ def test_no_enabled_providers_raises(self, tmp_path, monkeypatch): """) monkeypatch.setenv("CONFIG_PATH", path) from pr_generator.config import load_config - with pytest.raises(ValueError, match="no enabled providers"): - load_config() + config = load_config() + assert config.providers == {} def test_non_dict_provider_entry_skipped(self, tmp_path, monkeypatch): """A provider entry that isn't a dict is silently skipped.""" @@ -638,8 +638,8 @@ def test_empty_yaml_file_raises(self, tmp_path, monkeypatch): with pytest.raises(ValueError): load_config() - def test_null_providers_section_raises(self, tmp_path, monkeypatch): - """providers: with no value (null) must raise ValueError, not AttributeError.""" + def test_null_providers_section_loads_idle(self, tmp_path, monkeypatch): + """providers: with no value (null) loads successfully in idle mode.""" self._base_config(tmp_path, monkeypatch, """ providers: rules: @@ -648,8 +648,8 @@ def test_null_providers_section_raises(self, tmp_path, monkeypatch): bitbucket: main """) from pr_generator.config import load_config - with pytest.raises(ValueError, match="no enabled providers"): - load_config() + config = load_config() + assert config.providers == {} def test_null_rules_section_raises(self, tmp_path, monkeypatch): """rules: with no value (null) must raise ValueError, not AttributeError."""