diff --git a/.env.prod.example b/.env.prod.example
new file mode 100644
index 0000000..383e2cf
--- /dev/null
+++ b/.env.prod.example
@@ -0,0 +1,104 @@
+# =============================================================================
+# QueryWise — PRODUCTION environment (docker-compose.prod.yml)
+# =============================================================================
+#   cp .env.prod.example .env.prod   # then fill in every value marked CHANGE ME
+#   docker compose -f docker-compose.prod.yml --env-file .env.prod up -d --build
+#
+# This file holds secrets — keep it out of version control (see .gitignore).
+# =============================================================================
+
+# -- Application --
+ENVIRONMENT=production
+DEBUG=false
+
+# -- App Database (pgvector) --
+# docker-compose.prod.yml builds DATABASE_URL from these; the app-db service
+# uses them too. For an external/managed Postgres, set DATABASE_URL directly
+# and drop the app-db service.
+POSTGRES_DB=querywise
+POSTGRES_USER=querywise
+POSTGRES_PASSWORD=CHANGE_ME_strong_db_password
+
+# -- Cache / Jobs --
+# REDIS_URL + JOB_BACKEND=arq are set by the compose file. arq worker runs as a
+# dedicated service.
+
+# -- Security --
+# Fernet key for connection-string encryption at rest. REQUIRED — rotating it
+# makes existing stored connections undecryptable.
+#   python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+ENCRYPTION_KEY=CHANGE_ME_generate_a_fernet_key
+
+# HS256 signing secret for session + magic-link JWTs. REQUIRED.
+#   python -c "import secrets; print(secrets.token_urlsafe(48))"
+JWT_SECRET=CHANGE_ME_generate_a_long_random_secret
+
+# Secrets backend: env (Fernet, default) | aws | gcp | azure | vault
+SECRETS_BACKEND=env
+
+# -- Auth --
+# NEVER true in production — this disables login entirely.
+DISABLE_AUTH=false
+AUTH_PROVIDER=local
+# Session cookie hardening (the edge terminates TLS).
+AUTH_COOKIE_SECURE=true
+AUTH_COOKIE_SAMESITE=lax
+# Bootstrap admin (created on first boot). Set a password to enable local login.
+DEFAULT_ADMIN_EMAIL=admin@yourcompany.com
+DEFAULT_ADMIN_PASSWORD=CHANGE_ME_admin_password
+
+# Allowed CORS origins (JSON list) — your public frontend origin(s).
+# Same-origin (SPA + API behind one host) needs no cross-origin entry.
+CORS_ORIGINS=["https://querywise.yourcompany.com"]
+
+# -- Observability --
+LOG_LEVEL=INFO
+LOG_FORMAT=json
+ENABLE_METRICS=true
+SERVICE_NAME=querywise-backend
+OTEL_ENABLED=false
+# OTEL_EXPORTER_OTLP_ENDPOINT=http://otel-collector:4318/v1/traces
+
+# -- Scaling --
+# uvicorn worker processes per backend replica.
+UVICORN_WORKERS=4
+# Public HTTP port for the edge (map 443 + mount certs for direct TLS).
+HTTP_PORT=80
+
+# -- Sample DB auto-setup: OFF in prod (point at real warehouses instead) --
+AUTO_SETUP_SAMPLE_DB=false
+
+# -- LLM Configuration --
+# Vector dimension: 1536 for OpenAI/Anthropic, 768 for Ollama nomic-embed-text
+EMBEDDING_DIMENSION=1536
+
+# ---- Anthropic ----
+# DEFAULT_LLM_PROVIDER=anthropic
+# DEFAULT_LLM_MODEL=claude-sonnet-4-5-20250929
+# ANTHROPIC_API_KEY=
+# OPENAI_API_KEY=                          # Required for embeddings
+# EMBEDDING_MODEL=text-embedding-3-small
+
+# ---- OpenAI ----
+DEFAULT_LLM_PROVIDER=openai
+DEFAULT_LLM_MODEL=gpt-5.2
+OPENAI_API_KEY=CHANGE_ME
+EMBEDDING_MODEL=text-embedding-3-small
+
+# ---- Azure OpenAI (in-VPC) ----
+# DEFAULT_LLM_PROVIDER=azure_openai
+# DEFAULT_LLM_MODEL=<chat-deployment>
+# AZURE_OPENAI_ENDPOINT=https://<resource>.openai.azure.com
+# AZURE_OPENAI_API_KEY=
+# AZURE_OPENAI_API_VERSION=2024-10-21
+# AZURE_OPENAI_DEPLOYMENT=<embedding-deployment>
+# EMBEDDING_MODEL=<embedding-deployment>
+
+# -- Query Defaults --
+DEFAULT_QUERY_TIMEOUT_SECONDS=30
+DEFAULT_MAX_ROWS=1000
+MAX_RETRY_ATTEMPTS=3
+
+# -- Rate Limiting --
+MAX_QUERIES_PER_MINUTE=30
+RATE_LIMIT_ENABLED=true
diff --git a/.github/actions/helm-deploy/action.yml b/.github/actions/helm-deploy/action.yml
new file mode 100644
index 0000000..1859509
--- /dev/null
+++ b/.github/actions/helm-deploy/action.yml
@@ -0,0 +1,72 @@
+name: Helm deploy
+description: Deploy QueryWise to a cluster with Helm, pinned to a specific image tag.
+
+inputs:
+  environment:
+    description: Target environment (staging | production). Selects the optional values-<env>.yaml overlay.
+    required: true
+  image_tag:
+    description: Image tag to deploy (both backend and frontend share it).
+    required: true
+  kube_config:
+    description: Base64-encoded kubeconfig for the target cluster.
+    required: true
+  namespace:
+    description: Kubernetes namespace.
+    required: false
+    default: querywise
+  release:
+    description: Helm release name.
+    required: false
+    default: querywise
+
+runs:
+  using: composite
+  steps:
+    - uses: azure/setup-helm@v4
+      with:
+        version: v3.16.0
+
+    - uses: azure/setup-kubectl@v4
+
+    - name: Write kubeconfig
+      shell: bash
+      run: |
+        echo "${{ inputs.kube_config }}" | base64 -d > "${RUNNER_TEMP}/kubeconfig"
+        chmod 600 "${RUNNER_TEMP}/kubeconfig"
+        echo "KUBECONFIG=${RUNNER_TEMP}/kubeconfig" >> "$GITHUB_ENV"
+
+    - name: Resolve per-environment values overlay
+      id: vals
+      shell: bash
+      run: |
+        f="deploy/helm/querywise/values-${{ inputs.environment }}.yaml"
+        if [ -f "$f" ]; then
+          echo "arg=--values $f" >> "$GITHUB_OUTPUT"
+          echo "Using overlay $f"
+        else
+          echo "arg=" >> "$GITHUB_OUTPUT"
+          echo "No overlay at $f — using chart defaults + --set."
+        fi
+
+    - name: Helm upgrade
+      shell: bash
+      env:
+        OWNER: ${{ github.repository_owner }}
+        TAG: ${{ inputs.image_tag }}
+      run: |
+        helm upgrade --install "${{ inputs.release }}" deploy/helm/querywise \
+          --namespace "${{ inputs.namespace }}" --create-namespace \
+          ${{ steps.vals.outputs.arg }} \
+          --set image.backend.repository="ghcr.io/${OWNER}/querywise-backend" \
+          --set image.backend.tag="${TAG}" \
+          --set image.frontend.repository="ghcr.io/${OWNER}/querywise-frontend" \
+          --set image.frontend.tag="${TAG}" \
+          --wait --atomic --timeout 10m
+
+    - name: Rollout summary
+      shell: bash
+      run: |
+        helm status "${{ inputs.release }}" --namespace "${{ inputs.namespace }}" || true
+        kubectl get pods -n "${{ inputs.namespace }}" \
+          -l app.kubernetes.io/instance="${{ inputs.release }}" || true
diff --git a/.github/workflows/deploy-validate.yml b/.github/workflows/deploy-validate.yml
new file mode 100644
index 0000000..9e302b3
--- /dev/null
+++ b/.github/workflows/deploy-validate.yml
@@ -0,0 +1,69 @@
+name: Deploy artifacts
+
+# Validates the deployment artifacts so a broken chart or module never merges.
+# Runs only when something under deploy/ (or these workflows) changes.
+
+on:
+  pull_request:
+    paths:
+      - "deploy/**"
+      - ".github/workflows/deploy-validate.yml"
+  push:
+    branches: [main]
+    paths:
+      - "deploy/**"
+
+jobs:
+  helm:
+    name: Helm lint + kubeconform
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: azure/setup-helm@v4
+        with:
+          version: v3.16.0
+
+      - name: Helm lint
+        run: helm lint deploy/helm/querywise
+
+      - name: Install kubeconform
+        run: |
+          curl -sSL -o /tmp/kubeconform.tar.gz \
+            https://github.com/yannh/kubeconform/releases/download/v0.6.7/kubeconform-linux-amd64.tar.gz
+          tar -xzf /tmp/kubeconform.tar.gz -C /tmp
+          sudo mv /tmp/kubeconform /usr/local/bin/
+
+      - name: Render + schema-validate
+        run: |
+          helm template querywise deploy/helm/querywise \
+            --set secrets.data.DATABASE_URL=postgresql+asyncpg://u:p@db:5432/querywise \
+            --set secrets.data.REDIS_URL=redis://redis:6379/0 \
+            --set secrets.data.ENCRYPTION_KEY=x --set secrets.data.JWT_SECRET=y \
+          | kubeconform -strict -summary -kubernetes-version 1.29.0
+
+  terraform:
+    name: Terraform fmt + validate (${{ matrix.cloud }})
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        cloud: [aws, gcp, azure]
+    defaults:
+      run:
+        working-directory: deploy/terraform/${{ matrix.cloud }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: hashicorp/setup-terraform@v3
+        with:
+          terraform_version: "1.9.5"
+
+      - name: Format check
+        run: terraform fmt -check -recursive
+
+      - name: Init (no backend)
+        run: terraform init -backend=false -input=false
+
+      - name: Validate
+        run: terraform validate
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..81f93b1
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,104 @@
+name: Release
+
+# Build + push the two production images, then deploy with Helm:
+#   push to main  -> build -> deploy to STAGING
+#   push tag v*   -> build -> deploy to PRODUCTION (gated by the environment's
+#                             required reviewers)
+#   manual        -> build only (workflow_dispatch)
+#
+# Required GitHub Environment secrets:
+#   staging / production:  KUBE_CONFIG  (base64-encoded kubeconfig for the cluster)
+# Images push to GHCR using the built-in GITHUB_TOKEN (packages: write).
+
+on:
+  push:
+    branches: [main]
+    tags: ["v*"]
+  workflow_dispatch:
+
+concurrency:
+  group: release-${{ github.ref }}
+  cancel-in-progress: false
+
+env:
+  REGISTRY: ghcr.io
+
+jobs:
+  images:
+    name: Build & push (${{ matrix.component }})
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    strategy:
+      matrix:
+        include:
+          - component: backend
+            context: backend
+            dockerfile: backend/Dockerfile.prod
+          - component: frontend
+            context: frontend
+            dockerfile: frontend/Dockerfile.prod
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: docker/setup-buildx-action@v3
+
+      - name: Log in to GHCR
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Image metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ github.repository_owner }}/querywise-${{ matrix.component }}
+          tags: |
+            type=raw,value=${{ github.sha }}
+            type=ref,event=branch
+            type=semver,pattern={{version}}
+            type=raw,value=latest,enable={{is_default_branch}}
+
+      - name: Build & push
+        uses: docker/build-push-action@v6
+        with:
+          context: ${{ matrix.context }}
+          file: ${{ matrix.dockerfile }}
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          # Frontend is built same-origin; nginx proxies /api to the backend.
+          build-args: ${{ matrix.component == 'frontend' && 'VITE_API_URL=' || '' }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+
+  deploy-staging:
+    name: Deploy to staging
+    needs: images
+    if: github.ref == 'refs/heads/main'
+    runs-on: ubuntu-latest
+    environment: staging
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/helm-deploy
+        with:
+          environment: staging
+          image_tag: ${{ github.sha }}
+          kube_config: ${{ secrets.KUBE_CONFIG }}
+
+  deploy-prod:
+    name: Deploy to production
+    needs: images
+    if: startsWith(github.ref, 'refs/tags/v')
+    runs-on: ubuntu-latest
+    environment: production
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/helm-deploy
+        with:
+          environment: production
+          image_tag: ${{ github.sha }}
+          kube_config: ${{ secrets.KUBE_CONFIG }}
diff --git a/.gitignore b/.gitignore
index e8a769b..fc77c21 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,6 +48,8 @@ Thumbs.db
 
 # Environment / secrets
 .env
+.env.prod
+.env.*.local
 *.pem
 *.key
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1f6a69f..8753a21 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -98,6 +98,36 @@ product surface; all optional dependencies degrade gracefully).
   so the lineage tests run (they `importorskip` past `sqlglot` when the extra is absent).
 - **Deferred to a later milestone:** column profiling (null rate / distinct counts / sample values).
 
+### Added (Packaging & deployability)
+- **Hardened production images** — multi-stage, non-root `backend/Dockerfile.prod`
+  (builder venv → slim runtime, `curl` healthcheck, prod extras only) and
+  `frontend/Dockerfile.prod` (Vite build → unprivileged nginx serving the SPA and
+  reverse-proxying `/api` + `/mcp`). The dev `Dockerfile`s are untouched.
+- **Production compose** (`docker-compose.prod.yml`) — pgvector app-db, Redis,
+  one-shot `migrate` service (gated so backend replicas never race on Alembic),
+  backend (uvicorn), arq `worker`, and the nginx edge. Configured by `.env.prod`
+  (`.env.prod.example` template).
+- **Helm chart** (`deploy/helm/querywise/`, EKS/GKE/AKS) — backend Deployment +
+  HPA + PDB, arq `worker`, frontend + PDB, path-based ingress (`/api`+`/mcp` →
+  backend, `/` → SPA), ServiceAccount, and a `pre-install`/`pre-upgrade`
+  migration hook Job. Secrets via a chart-created Secret or `existingSecret`
+  (external-secrets seam). Validated with `helm lint` + `kubeconform`.
+- **Terraform modules** (`deploy/terraform/{aws,gcp,azure}/`) — each provisions
+  the data plane + secrets in the customer's own account/VPC: managed Postgres 16
+  (pgvector) + managed Redis + a secret store with the assembled DSNs/keys +
+  object storage + optional networking + an identity/policy for external-secrets.
+  Compute (cluster) is intentionally separate state. `terraform validate`-clean.
+- **CI/CD** (`.github/workflows/`) — `deploy-validate.yml` lints the chart
+  (`kubeconform`) and Terraform (`fmt`/`validate`) on PRs; `release.yml` builds +
+  pushes both images to GHCR and deploys with Helm (`main` → staging, tag `v*` →
+  production, `--wait --atomic`) via a reusable composite action.
+- **Ops** (`deploy/ops/`) — `backup.sh`/`restore.sh` (encrypted `pg_dump`/
+  `pg_restore`), an in-cluster backup CronJob example, a DR runbook (backup/
+  restore, region rebuild, upgrade path, quarterly credential rotation), and a
+  production config reference.
+- **Deferred:** the managed-SaaS control plane (provisioning/billing/fleet
+  upgrades) — additive, since each tenant is already an isolated instance.
+
 ## [1.0.0] - 2026-06-04
 
 First stable release: natural-language-to-SQL with a semantic metadata layer.
diff --git a/CLAUDE.md b/CLAUDE.md
index ee60f1f..3f4e0ed 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -269,3 +269,21 @@ Makes the semantic layer discoverable and trustworthy. Two milestones; migration
 - **Lineage** (`app/services/lineage_service.py`, `ArtifactDependency` model): saved-query `pinned_sql` and metric `sql_expression` are parsed with **sqlglot** (optional `[lineage]` extra; lazy import, degrades to a no-op if absent) into table/column edges, recomputed on create/update (best-effort, never blocks the write). Per-artifact "what this touches" at `GET .../{saved-queries|metrics}/{id}/lineage`; impact view "what depends on this table" at `GET .../catalog/lineage?table=&column=`. Connector type → sqlglot dialect via `dialect_for`.
 - **Endpoints:** `/connections/{id}/catalog/{search,facets,lineage}`, plus `/status`, `/versions`, `/versions/{v}`, and `/lineage` sub-resources on the metric/glossary/sample-query/saved-query routers.
 - **Heads-up:** existing rows migrate to `status='draft'`, `version=1`. The saved-query PUT routes any `status` change through the governed lifecycle (no raw status writes). sqlglot is a new optional dep — install the `[lineage]` extra (or rebuild the backend image) for lineage to populate.
+
+## Packaging & deployability (parallel track)
+
+Production deployment artifacts under `deploy/` (+ root prod compose), separate from the dev `docker-compose.yml` / `Dockerfile`s (which stay untouched for local work). The whole **Packaging & deployability** parallel track from `planfull.md` is complete: hardened images, prod compose, Helm chart, Terraform for AWS + GCP + Azure, CI/CD (build/push/deploy), and ops (backup/restore, DR runbook, config reference). The only deferred item is the **SaaS control plane** (provisioning/billing/fleet upgrades), which is additive and build-on-demand. Overview: `deploy/README.md`.
+
+- **Hardened images:** `backend/Dockerfile.prod` (multi-stage: builder venv → slim runtime, non-root uid 1001, `curl` healthcheck on `/api/v1/health/live`, `uvicorn --workers ${UVICORN_WORKERS:-4}`, prod extras only — no `[dev]`) and `frontend/Dockerfile.prod` (Vite build → `nginxinc/nginx-unprivileged:1.27-alpine`, non-root uid 101, listens 8080). `.dockerignore` in both dirs.
+- **Edge:** `frontend/nginx.conf` serves the SPA bundle (with client-route fallback) and reverse-proxies `/api`, `/mcp` (buffering off for SSE), and health to the backend **same-origin**. Uses Docker's embedded resolver (`127.0.0.11`) + a `set $backend` variable `proxy_pass` so the edge boots even while the backend is starting (a static `upstream` would make nginx refuse to start). Internal `/healthz` for the container healthcheck. TLS terminates here (mount certs + add a 443 block) or upstream at a LB.
+- **Same-origin build:** `frontend/src/api/client.ts` uses `?? 'http://localhost:8000'` (not `||`) so the prod build with `VITE_API_URL=""` calls the API at relative `/api/v1`; unset (dev) still falls back to the local backend.
+- **Prod stack:** `docker-compose.prod.yml` — `app-db` (pgvector, no host port), `redis` (cache + arq), one-shot `migrate` (`alembic upgrade head`, gated by `service_completed_successfully` so backend replicas never race), `backend` (uvicorn, `JOB_BACKEND=arq`), `worker` (`arq app.jobs.worker.WorkerSettings`), `frontend` (edge, the only published port). Run: `cp .env.prod.example .env.prod` → edit → `docker compose -f docker-compose.prod.yml --env-file .env.prod up -d --build`.
+- **Config:** `.env.prod.example` is the prod-tuned template (CHANGE_ME secrets, `DISABLE_AUTH=false`, `AUTH_COOKIE_SECURE=true`, `LOG_FORMAT=json`, `AUTO_SETUP_SAMPLE_DB=false`). `.env.prod` is gitignored.
+- **Helm chart** (`deploy/helm/querywise/`, EKS/GKE/AKS): backend Deployment (uvicorn) + HPA + PDB, dedicated arq `worker` Deployment, frontend Deployment + PDB, two Services, ingress (path-based: `/api`+`/mcp`→backend, `/`→frontend SPA — same-origin), ServiceAccount (IRSA/Workload-Identity annotations). Managed Postgres+pgvector and Redis are **expected out-of-cluster** (supply DSNs via the release Secret). Config split: non-secret env → ConfigMap, secrets → chart-created Secret **or** `secrets.existingSecret` (external-secrets/sealed-secrets seam). **Migration:** `alembic upgrade head` runs as a `pre-install`/`pre-upgrade` hook Job (weight `-5`); the ConfigMap+Secret are also hooks (weight `-10`) so they exist first, and the Job gates new backend pods so replicas never race. Validate: `helm lint` + `helm template ... | kubeconform -strict` (both pass). `values-production.example.yaml` is a realistic override; `deploy/README.md` has the install flow.
+- **Terraform** (`deploy/terraform/{aws,gcp,azure}/`): each provisions the **data plane + secrets** the chart consumes, in the customer's own account/VPC, with the **same shape** — managed Postgres 16 (pgvector via app migrations) + managed Redis (cache + arq) + a secret store holding the assembled DSNs+keys (keys map 1:1 to backend env → external-secrets `dataFrom` into the `querywise-secrets` k8s Secret) + object storage (exports/backups) + optional network + an identity/policy for external-secrets to read the secret. DB password + JWT secret auto-generate if unset; `ENCRYPTION_KEY` is required (Fernet). **Compute (EKS/GKE/AKS) is deliberately out of scope** — BYO or the upstream cluster module, kept in a separate state so cluster rebuilds never risk the DB.
+  - **AWS:** RDS (Multi-AZ, gp3, `rds.force_ssl`) + ElastiCache + Secrets Manager + S3; IAM policy for the external-secrets IRSA role.
+  - **GCP:** Cloud SQL (private IP via PSA peering) + Memorystore + Secret Manager + GCS; a service account with `secretAccessor` for Workload Identity.
+  - **Azure:** Postgres flexible server (VNet-integrated, `azure.extensions=VECTOR` allow-list) + Cache for Redis + Key Vault + Blob; a user-assigned managed identity with Key Vault read for Workload Identity.
+  - All three pass `tofu/terraform validate` + `fmt`. `*.tfvars` gitignored; lockfiles committed.
+- **CI/CD** (`.github/workflows/`): `ci.yml` (existing — backend tests gating + advisory lint/type, frontend lint/build) is unchanged. **`deploy-validate.yml`** runs on PRs touching `deploy/**` — `helm lint` + `helm template | kubeconform -strict`, and `terraform fmt -check`/`validate` across aws/gcp/azure (matrix). **`release.yml`** builds+pushes both images to GHCR (`querywise-{backend,frontend}`, tagged SHA/branch/semver/latest, gha cache) then deploys via the `.github/actions/helm-deploy` composite action: push to `main` → **staging**, tag `v*` → **production** (gate with environment required-reviewers). Deploys pin the release to the commit SHA with `--wait --atomic` (auto-rollback) and inject only image coords; per-env overlay `values-<environment>.yaml` (committed, non-secret) is applied if present. Each environment needs a `KUBE_CONFIG` secret (base64 kubeconfig); clusters run external-secrets to sync `querywise-secrets`. Lint with `actionlint`.
+- **Ops** (`deploy/ops/`): `backup.sh` (`pg_dump` custom format → AES-256/openssl PBKDF2 → `querywise-<ts>.dump.enc`, optional S3/GCS upload, local retention prune) + `restore.sh` (decrypt → `pg_restore --clean --if-exists`, guarded by `RESTORE_CONFIRM=yes`); both strip the `+asyncpg` suffix from `DATABASE_URL`, shellcheck-clean. `backup-cronjob.example.yaml` schedules backups in-cluster (postgres:16 image, script via ConfigMap, `BACKUP_PASSPHRASE`+`DATABASE_URL` from `querywise-secrets`). `RUNBOOK.md` covers backup/restore, managed-DB PITR, full-region DR rebuild, the Alembic upgrade path (migrations only run via the Helm/compose migrate hook), and quarterly credential rotation — **`ENCRYPTION_KEY` must not be blind-rotated** (it Fernet-encrypts stored connection strings; re-encrypt each connection before swapping). `config-reference.md` is the production-focused settings catalogue (the full list is in the env-vars table above / `.env.example`).
diff --git a/README.md b/README.md
index 332dc38..c33519c 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,7 @@ A full-stack application that translates natural language questions into SQL que
 - **Data catalog** — hybrid search (embeddings + keyword) across tables, columns, metrics, glossary, and knowledge, with facets and certified-first ranking
 - **Lineage** — sqlglot parses saved-query/metric SQL to show what each touches and what depends on a given table (impact view)
 - **Production hardening** — rate limiting, async job queue, OpenTelemetry tracing, structured logging, health probes
+- **Deploy anywhere** — hardened non-root images, a production Docker Compose stack, a Helm chart (HPA/PDB/ingress/migration hook), Terraform for AWS/GCP/Azure (managed Postgres+pgvector, Redis, secrets, in your own VPC), GitHub Actions CI/CD (build → staging → prod), and ops tooling (encrypted backup/restore + DR runbook) — see [`deploy/`](deploy/)
 
 
 ---
@@ -391,6 +392,28 @@ For development, `docker compose up app-db sample-db` starts both databases with
 
 ---
 
+## Production Deployment
+
+The `docker compose up` flow above is for **local development**. For production,
+QueryWise ships a full set of deployment artifacts under [`deploy/`](deploy/) —
+the same build-once images configured entirely by environment:
+
+| Target | Where | Best for |
+|--------|-------|----------|
+| **Docker Compose (prod)** | [`docker-compose.prod.yml`](docker-compose.prod.yml) | Small / on-prem, single host |
+| **Helm chart** | [`deploy/helm/querywise/`](deploy/helm/querywise) | EKS / GKE / AKS |
+| **Terraform** | [`deploy/terraform/{aws,gcp,azure}/`](deploy/terraform) | Managed Postgres+pgvector, Redis, secrets — in your own VPC |
+| **CI/CD** | [`.github/workflows/release.yml`](.github/workflows/release.yml) | Build → push images → Helm deploy (staging → prod) |
+| **Ops** | [`deploy/ops/`](deploy/ops) | Encrypted backup/restore, DR runbook, config reference |
+
+Highlights: hardened multi-stage **non-root** images, a one-shot Alembic
+migration that runs before new pods roll (replicas never race), backend
+autoscaling + PodDisruptionBudgets, secrets via the **external-secrets** seam,
+and a same-origin SPA behind an nginx edge. Start at [`deploy/README.md`](deploy/README.md);
+the production env template is [`.env.prod.example`](.env.prod.example).
+
+---
+
 ## Environment Variables
 
 | Variable | Default | Description |
@@ -425,10 +448,18 @@ For development, `docker compose up app-db sample-db` starts both databases with
 
 ```
 querywise/
-├── docker-compose.yml              # 4 services: app-db, sample-db, backend, frontend
-├── .env.example                    # Environment variable template
+├── docker-compose.yml              # Dev: app-db, sample-db, backend, frontend
+├── docker-compose.prod.yml         # Prod: + redis, migrate, arq worker, nginx edge
+├── .env.example                    # Environment variable template (dev)
+├── .env.prod.example               # Environment variable template (prod)
 ├── CLAUDE.md                       # Claude Code project conventions
+├── CHANGELOG.md                    # Release notes
 ├── README.md                       # This file
+├── deploy/                         # Production deployment artifacts
+│   ├── helm/querywise/             # Helm chart (HPA, PDB, ingress, migration hook)
+│   ├── terraform/{aws,gcp,azure}/  # Managed Postgres+pgvector, Redis, secrets
+│   └── ops/                        # backup/restore, DR runbook, config reference
+├── .github/workflows/              # CI (tests/lint) + release (build → deploy)
 │
 ├── backend/
 │   ├── Dockerfile
diff --git a/backend/.dockerignore b/backend/.dockerignore
new file mode 100644
index 0000000..979fd6b
--- /dev/null
+++ b/backend/.dockerignore
@@ -0,0 +1,16 @@
+# Keep the build context lean + avoid baking local state into the image.
+__pycache__/
+*.py[cod]
+*.egg-info/
+.mypy_cache/
+.pytest_cache/
+.ruff_cache/
+.venv/
+venv/
+tests/
+.env
+.env.*
+*.sqlite
+*.log
+.git/
+.DS_Store
diff --git a/backend/Dockerfile.prod b/backend/Dockerfile.prod
new file mode 100644
index 0000000..17b87c5
--- /dev/null
+++ b/backend/Dockerfile.prod
@@ -0,0 +1,71 @@
+# syntax=docker/dockerfile:1
+#
+# Hardened, multi-stage production image for the QueryWise backend.
+#   * builder stage compiles deps into an isolated venv (needs gcc/libpq-dev)
+#   * runtime stage carries only the venv + app + libpq runtime, runs non-root
+#
+# Build:  docker build -f Dockerfile.prod -t querywise-backend:prod .
+# Used by docker-compose.prod.yml for the backend, worker, and migrate services.
+
+# ---- builder ---------------------------------------------------------------
+FROM python:3.12-slim AS builder
+
+ENV PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PYTHONDONTWRITEBYTECODE=1
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc libpq-dev && \
+    rm -rf /var/lib/apt/lists/*
+
+# Isolated venv we can copy wholesale into the runtime image.
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+WORKDIR /build
+# Copy only what the package + build needs (keeps the layer cache warm).
+COPY pyproject.toml ./
+COPY app ./app
+COPY alembic ./alembic
+COPY alembic.ini ./
+COPY scripts ./scripts
+
+# Production extras only — no [dev]. Non-editable install so the venv is
+# self-contained and the source tree isn't needed at runtime.
+RUN pip install ".[llm,bigquery,databricks,export,lineage,observability,jobs,scheduling]"
+
+# ---- runtime ---------------------------------------------------------------
+FROM python:3.12-slim AS runtime
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PATH="/opt/venv/bin:$PATH"
+
+# libpq for asyncpg/psycopg at runtime; curl for the container healthcheck.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        libpq5 curl && \
+    rm -rf /var/lib/apt/lists/* && \
+    # Non-root runtime user.
+    groupadd --system --gid 1001 querywise && \
+    useradd --system --uid 1001 --gid querywise --no-create-home querywise
+
+COPY --from=builder /opt/venv /opt/venv
+
+WORKDIR /app
+# alembic.ini + migrations are needed by the `migrate` service; app code is the
+# package itself (installed into the venv) but we keep the tree for alembic env.
+COPY --chown=querywise:querywise alembic ./alembic
+COPY --chown=querywise:querywise alembic.ini ./
+COPY --chown=querywise:querywise app ./app
+COPY --chown=querywise:querywise scripts ./scripts
+
+USER querywise
+
+EXPOSE 8000
+
+# Liveness probe hits the app's own /health/live (process-only, no DB/LLM).
+HEALTHCHECK --interval=15s --timeout=5s --start-period=20s --retries=3 \
+    CMD curl -fsS http://localhost:8000/api/v1/health/live || exit 1
+
+# Workers come from env so the same image scales without a rebuild.
+CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port 8000 --workers ${UVICORN_WORKERS:-4}"]
diff --git a/deploy/README.md b/deploy/README.md
new file mode 100644
index 0000000..079454d
--- /dev/null
+++ b/deploy/README.md
@@ -0,0 +1,128 @@
+# Deploying QueryWise
+
+Production deployment artifacts. Single-tenant per deployment; isolation is by
+workspace within the auto-created default organization. The app is a
+**build-once image configured entirely by env** — the same backend/frontend
+images run under Docker Compose, Helm, or any of the cloud targets.
+
+| Target | Where | Best for |
+|--------|-------|----------|
+| **Docker Compose (prod)** | [`../docker-compose.prod.yml`](../docker-compose.prod.yml) | Small / on-prem, single host |
+| **Helm chart** | [`helm/querywise/`](helm/querywise) | EKS / GKE / AKS |
+| **Terraform — AWS** | [`terraform/aws/`](terraform/aws) | RDS pgvector + ElastiCache + Secrets Manager + S3, in your VPC |
+| **Terraform — GCP** | [`terraform/gcp/`](terraform/gcp) | Cloud SQL pgvector + Memorystore + Secret Manager + GCS |
+| **Terraform — Azure** | [`terraform/azure/`](terraform/azure) | Postgres flexible server + Cache for Redis + Key Vault + Blob |
+| **Ops** | [`ops/`](ops) | Backup/restore, DR runbook, config reference |
+
+External dependencies (not bundled in the Helm chart): **managed PostgreSQL 16
+with the `pgvector` extension** and **Redis** (cache + the arq job queue). The
+Terraform modules provision these; for the chart you supply their DSNs via the
+release Secret.
+
+## Images
+
+Built from `backend/Dockerfile.prod` and `frontend/Dockerfile.prod`:
+
+```bash
+docker build -f backend/Dockerfile.prod  -t ghcr.io/your-org/querywise-backend:1.0.0  backend
+docker build -f frontend/Dockerfile.prod -t ghcr.io/your-org/querywise-frontend:1.0.0 frontend
+docker push ghcr.io/your-org/querywise-backend:1.0.0
+docker push ghcr.io/your-org/querywise-frontend:1.0.0
+```
+
+Both run **non-root**; the frontend serves the SPA and the backend is uvicorn
+with an arq worker alongside. The SPA is built same-origin (`VITE_API_URL=""`),
+so the edge / ingress routes `/api` + `/mcp` to the backend and everything else
+to the frontend.
+
+## Helm
+
+```bash
+# 1. Provide secrets — ideally via external-secrets / sealed-secrets:
+kubectl create namespace querywise
+kubectl -n querywise create secret generic querywise-secrets \
+  --from-literal=DATABASE_URL='postgresql+asyncpg://user:pass@host:5432/querywise' \
+  --from-literal=REDIS_URL='redis://host:6379/0' \
+  --from-literal=ENCRYPTION_KEY='...' \
+  --from-literal=JWT_SECRET='...' \
+  --from-literal=OPENAI_API_KEY='...'
+
+# 2. Install (a pre-upgrade hook runs `alembic upgrade head` before pods roll):
+helm upgrade --install querywise deploy/helm/querywise \
+  -n querywise \
+  -f deploy/helm/querywise/values-production.example.yaml \
+  --set secrets.existingSecret=querywise-secrets
+```
+
+Key chart features:
+
+- **Migration hook** — `alembic upgrade head` runs as a `pre-install`/`pre-upgrade`
+  Job (ordered after the config/secret hooks) so schema changes land before new
+  backend code serves and the N replicas never race.
+- **Scaling** — backend HPA (CPU), PodDisruptionBudgets on backend + frontend,
+  dedicated arq `worker` Deployment.
+- **Secrets seam** — `secrets.existingSecret` to bring your own (external-secrets
+  operator, sealed-secrets, cloud sync) instead of putting values in the release.
+- **Service account annotations** — for IRSA (EKS) / Workload Identity (GKE) /
+  Azure Workload Identity.
+
+See [`helm/querywise/values.yaml`](helm/querywise/values.yaml) for the full set
+of knobs and [`values-production.example.yaml`](helm/querywise/values-production.example.yaml)
+for a realistic production override.
+
+### Validate locally
+
+```bash
+helm lint deploy/helm/querywise
+helm template querywise deploy/helm/querywise | kubeconform -strict -summary
+```
+
+## CI/CD
+
+Two GitHub Actions workflows under [`../.github/workflows`](../.github/workflows):
+
+- **`deploy-validate.yml`** (PRs touching `deploy/**`) — `helm lint` + `helm
+  template | kubeconform -strict`, and `terraform fmt -check` + `validate` for
+  each of aws/gcp/azure. Keeps a broken chart or module from merging.
+- **`release.yml`** — builds + pushes both images to GHCR
+  (`ghcr.io/<owner>/querywise-{backend,frontend}`, tagged with the commit SHA,
+  branch, semver, and `latest`), then deploys with Helm via the
+  [`helm-deploy`](../.github/actions/helm-deploy) composite action:
+  - **push to `main`** → deploy to the **staging** environment
+  - **push tag `v*`** → deploy to the **production** environment (gate it with
+    required reviewers in the environment's protection rules for manual approval)
+  - **manual run** → build only
+
+Both deploys pin the release to the exact commit SHA (`--wait --atomic`, so a
+failed rollout auto-reverts) and inject only the image coordinates; everything
+else comes from the chart defaults plus an optional committed overlay
+`deploy/helm/querywise/values-<environment>.yaml` (see the `*-staging` /
+`*-production` examples).
+
+### Required GitHub config
+
+| What | Where | Value |
+|------|-------|-------|
+| `KUBE_CONFIG` | Environment secret on **staging** and **production** | base64-encoded kubeconfig for that cluster |
+| Required reviewers | **production** environment protection rules | who approves prod deploys |
+| Packages: write | repo default `GITHUB_TOKEN` | already granted in the workflow |
+
+The clusters are expected to run the **external-secrets operator** syncing the
+cloud secret store (provisioned by Terraform) into the `querywise-secrets`
+Kubernetes Secret the chart references.
+
+## Operations
+
+Day-2 procedures live in [`ops/`](ops):
+
+- **Backups** — [`ops/backup.sh`](ops/backup.sh): `pg_dump` (custom format) →
+  AES-256 (openssl) → `querywise-<ts>.dump.enc`, with optional S3/GCS upload and
+  local retention. Schedule it in-cluster with
+  [`ops/backup-cronjob.example.yaml`](ops/backup-cronjob.example.yaml).
+- **Restore** — [`ops/restore.sh`](ops/restore.sh): decrypt → `pg_restore
+  --clean --if-exists` (guarded by `RESTORE_CONFIRM=yes`).
+- **Runbook** — [`ops/RUNBOOK.md`](ops/RUNBOOK.md): backup/restore, full-region
+  DR rebuild, the Alembic upgrade path, and quarterly credential rotation
+  (including the `ENCRYPTION_KEY` caveat).
+- **Config reference** — [`ops/config-reference.md`](ops/config-reference.md):
+  every production-critical setting, where it's set, and what's a secret.
diff --git a/deploy/helm/querywise/.helmignore b/deploy/helm/querywise/.helmignore
new file mode 100644
index 0000000..a313914
--- /dev/null
+++ b/deploy/helm/querywise/.helmignore
@@ -0,0 +1,8 @@
+.DS_Store
+.git/
+.gitignore
+*.tmproj
+*.bak
+*.swp
+*~
+ci/
diff --git a/deploy/helm/querywise/Chart.yaml b/deploy/helm/querywise/Chart.yaml
new file mode 100644
index 0000000..85c3bf1
--- /dev/null
+++ b/deploy/helm/querywise/Chart.yaml
@@ -0,0 +1,21 @@
+apiVersion: v2
+name: querywise
+description: >-
+  QueryWise — text-to-SQL with a semantic metadata layer. Single-tenant per
+  deployment; brings up the backend (uvicorn), arq worker, and frontend edge,
+  with a one-shot migration hook. Managed Postgres (pgvector) and Redis are
+  expected to be provided out-of-cluster (see the Terraform modules).
+type: application
+# Chart version — bump on chart changes.
+version: 0.1.0
+# Tracks the QueryWise app release the chart defaults target.
+appVersion: "0.1.0"
+keywords:
+  - text-to-sql
+  - semantic-layer
+  - analytics
+home: https://github.com/kosminus/querywise
+sources:
+  - https://github.com/kosminus/querywise
+maintainers:
+  - name: QueryWise
diff --git a/deploy/helm/querywise/templates/NOTES.txt b/deploy/helm/querywise/templates/NOTES.txt
new file mode 100644
index 0000000..57a5194
--- /dev/null
+++ b/deploy/helm/querywise/templates/NOTES.txt
@@ -0,0 +1,30 @@
+QueryWise {{ .Chart.AppVersion }} deployed as release "{{ .Release.Name }}".
+
+Components:
+  backend   {{ include "querywise.backendImage" . }}  (uvicorn, {{ .Values.backend.uvicornWorkers }} workers/pod)
+  worker    arq background jobs
+  frontend  {{ include "querywise.frontendImage" . }}  (nginx edge / SPA)
+
+A pre-install/pre-upgrade hook ran `alembic upgrade head`{{ if not .Values.migrate.enabled }} (DISABLED — migrate.enabled=false){{ end }}.
+
+{{- if .Values.ingress.enabled }}
+
+Reach it at: http{{ if .Values.ingress.tls.enabled }}s{{ end }}://{{ .Values.ingress.host }}/
+{{- else }}
+
+Ingress is disabled. Port-forward the frontend to try it locally:
+  kubectl port-forward svc/{{ include "querywise.fullname" . }}-frontend 8080:{{ .Values.frontend.service.port }}
+  open http://localhost:8080
+{{- end }}
+
+{{- if not .Values.secrets.existingSecret }}
+{{- if not .Values.secrets.data.DATABASE_URL }}
+
+WARNING: secrets.data.DATABASE_URL is empty. Set DATABASE_URL and REDIS_URL
+(managed Postgres+pgvector / Redis), ENCRYPTION_KEY, and JWT_SECRET — or point
+secrets.existingSecret at an external-secrets-managed Secret.
+{{- end }}
+{{- end }}
+
+Check rollout:
+  kubectl get pods -l app.kubernetes.io/instance={{ .Release.Name }}
diff --git a/deploy/helm/querywise/templates/_helpers.tpl b/deploy/helm/querywise/templates/_helpers.tpl
new file mode 100644
index 0000000..6b0ca60
--- /dev/null
+++ b/deploy/helm/querywise/templates/_helpers.tpl
@@ -0,0 +1,86 @@
+{{/* Chart name (overridable). */}}
+{{- define "querywise.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/* Fully-qualified release name. */}}
+{{- define "querywise.fullname" -}}
+{{- if .Values.fullnameOverride -}}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- $name := default .Chart.Name .Values.nameOverride -}}
+{{- if contains $name .Release.Name -}}
+{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
+{{- else -}}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+{{- end -}}
+{{- end -}}
+
+{{- define "querywise.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
+{{- end -}}
+
+{{/* Common labels. */}}
+{{- define "querywise.labels" -}}
+helm.sh/chart: {{ include "querywise.chart" . }}
+{{ include "querywise.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end -}}
+
+{{/* Selector labels (app-wide). */}}
+{{- define "querywise.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "querywise.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end -}}
+
+{{/* Per-component selector labels. Pass a dict: (dict "ctx" . "component" "backend"). */}}
+{{- define "querywise.componentSelectorLabels" -}}
+{{ include "querywise.selectorLabels" .ctx }}
+app.kubernetes.io/component: {{ .component }}
+{{- end -}}
+
+{{/* Service account name. */}}
+{{- define "querywise.serviceAccountName" -}}
+{{- if .Values.serviceAccount.create -}}
+{{- default (include "querywise.fullname" .) .Values.serviceAccount.name -}}
+{{- else -}}
+{{- default "default" .Values.serviceAccount.name -}}
+{{- end -}}
+{{- end -}}
+
+{{/* Name of the Secret to read env from (existing or chart-created). */}}
+{{- define "querywise.secretName" -}}
+{{- if .Values.secrets.existingSecret -}}
+{{- .Values.secrets.existingSecret -}}
+{{- else -}}
+{{- printf "%s-secrets" (include "querywise.fullname" .) -}}
+{{- end -}}
+{{- end -}}
+
+{{/* Name of the ConfigMap to read env from. */}}
+{{- define "querywise.configMapName" -}}
+{{- printf "%s-config" (include "querywise.fullname" .) -}}
+{{- end -}}
+
+{{/* Image refs (tag defaults to appVersion). */}}
+{{- define "querywise.backendImage" -}}
+{{- $tag := default .Chart.AppVersion .Values.image.backend.tag -}}
+{{- printf "%s:%s" .Values.image.backend.repository $tag -}}
+{{- end -}}
+
+{{- define "querywise.frontendImage" -}}
+{{- $tag := default .Chart.AppVersion .Values.image.frontend.tag -}}
+{{- printf "%s:%s" .Values.image.frontend.repository $tag -}}
+{{- end -}}
+
+{{/* envFrom block shared by backend, worker, and migrate. */}}
+{{- define "querywise.envFrom" -}}
+- configMapRef:
+    name: {{ include "querywise.configMapName" . }}
+- secretRef:
+    name: {{ include "querywise.secretName" . }}
+{{- end -}}
diff --git a/deploy/helm/querywise/templates/backend-deployment.yaml b/deploy/helm/querywise/templates/backend-deployment.yaml
new file mode 100644
index 0000000..c29fac1
--- /dev/null
+++ b/deploy/helm/querywise/templates/backend-deployment.yaml
@@ -0,0 +1,70 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "querywise.fullname" . }}-backend
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+    app.kubernetes.io/component: backend
+spec:
+  {{- if not .Values.backend.autoscaling.enabled }}
+  replicas: {{ .Values.backend.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "backend") | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "backend") | nindent 8 }}
+      annotations:
+        # Roll pods when config/secret content changes.
+        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
+    spec:
+      serviceAccountName: {{ include "querywise.serviceAccountName" . }}
+      {{- with .Values.image.pullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: backend
+          image: {{ include "querywise.backendImage" . }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          securityContext:
+            {{- toYaml .Values.backend.securityContext | nindent 12 }}
+          command:
+            - sh
+            - -c
+            - uvicorn app.main:app --host 0.0.0.0 --port {{ .Values.backend.containerPort }} --workers {{ .Values.backend.uvicornWorkers }}
+          ports:
+            - name: http
+              containerPort: {{ .Values.backend.containerPort }}
+          envFrom:
+            {{- include "querywise.envFrom" . | nindent 12 }}
+          livenessProbe:
+            httpGet:
+              path: /api/v1/health/live
+              port: http
+            initialDelaySeconds: 15
+            periodSeconds: 15
+          readinessProbe:
+            httpGet:
+              path: /api/v1/health/ready
+              port: http
+            initialDelaySeconds: 10
+            periodSeconds: 10
+          resources:
+            {{- toYaml .Values.backend.resources | nindent 12 }}
+      {{- with .Values.backend.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.backend.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.backend.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/deploy/helm/querywise/templates/backend-hpa.yaml b/deploy/helm/querywise/templates/backend-hpa.yaml
new file mode 100644
index 0000000..110e0a6
--- /dev/null
+++ b/deploy/helm/querywise/templates/backend-hpa.yaml
@@ -0,0 +1,23 @@
+{{- if .Values.backend.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: {{ include "querywise.fullname" . }}-backend
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+    app.kubernetes.io/component: backend
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: {{ include "querywise.fullname" . }}-backend
+  minReplicas: {{ .Values.backend.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.backend.autoscaling.maxReplicas }}
+  metrics:
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.backend.autoscaling.targetCPUUtilizationPercentage }}
+{{- end }}
diff --git a/deploy/helm/querywise/templates/backend-pdb.yaml b/deploy/helm/querywise/templates/backend-pdb.yaml
new file mode 100644
index 0000000..4d2f5e6
--- /dev/null
+++ b/deploy/helm/querywise/templates/backend-pdb.yaml
@@ -0,0 +1,14 @@
+{{- if .Values.backend.podDisruptionBudget.enabled }}
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ include "querywise.fullname" . }}-backend
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+    app.kubernetes.io/component: backend
+spec:
+  minAvailable: {{ .Values.backend.podDisruptionBudget.minAvailable }}
+  selector:
+    matchLabels:
+      {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "backend") | nindent 6 }}
+{{- end }}
diff --git a/deploy/helm/querywise/templates/backend-service.yaml b/deploy/helm/querywise/templates/backend-service.yaml
new file mode 100644
index 0000000..06b0ab5
--- /dev/null
+++ b/deploy/helm/querywise/templates/backend-service.yaml
@@ -0,0 +1,16 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "querywise.fullname" . }}-backend
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+    app.kubernetes.io/component: backend
+spec:
+  type: {{ .Values.backend.service.type }}
+  ports:
+    - name: http
+      port: {{ .Values.backend.service.port }}
+      targetPort: http
+      protocol: TCP
+  selector:
+    {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "backend") | nindent 4 }}
diff --git a/deploy/helm/querywise/templates/configmap.yaml b/deploy/helm/querywise/templates/configmap.yaml
new file mode 100644
index 0000000..848a82f
--- /dev/null
+++ b/deploy/helm/querywise/templates/configmap.yaml
@@ -0,0 +1,16 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "querywise.configMapName" . }}
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+  annotations:
+    # Also a pre-install/pre-upgrade hook so it exists before the migrate Job
+    # (lower weight = runs earlier). Persists after success for the Deployments.
+    "helm.sh/hook": pre-install,pre-upgrade
+    "helm.sh/hook-weight": "-10"
+    "helm.sh/hook-delete-policy": before-hook-creation
+data:
+  {{- range $key, $val := .Values.config }}
+  {{ $key }}: {{ $val | quote }}
+  {{- end }}
diff --git a/deploy/helm/querywise/templates/frontend-deployment.yaml b/deploy/helm/querywise/templates/frontend-deployment.yaml
new file mode 100644
index 0000000..0815a1e
--- /dev/null
+++ b/deploy/helm/querywise/templates/frontend-deployment.yaml
@@ -0,0 +1,58 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "querywise.fullname" . }}-frontend
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+    app.kubernetes.io/component: frontend
+spec:
+  replicas: {{ .Values.frontend.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "frontend") | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "frontend") | nindent 8 }}
+    spec:
+      {{- with .Values.image.pullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: frontend
+          image: {{ include "querywise.frontendImage" . }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          securityContext:
+            {{- toYaml .Values.frontend.securityContext | nindent 12 }}
+          ports:
+            - name: http
+              containerPort: {{ .Values.frontend.containerPort }}
+          livenessProbe:
+            httpGet:
+              path: /healthz
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 15
+          readinessProbe:
+            httpGet:
+              path: /healthz
+              port: http
+            initialDelaySeconds: 5
+            periodSeconds: 10
+          resources:
+            {{- toYaml .Values.frontend.resources | nindent 12 }}
+      {{- with .Values.frontend.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.frontend.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.frontend.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/deploy/helm/querywise/templates/frontend-pdb.yaml b/deploy/helm/querywise/templates/frontend-pdb.yaml
new file mode 100644
index 0000000..31204ef
--- /dev/null
+++ b/deploy/helm/querywise/templates/frontend-pdb.yaml
@@ -0,0 +1,14 @@
+{{- if .Values.frontend.podDisruptionBudget.enabled }}
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: {{ include "querywise.fullname" . }}-frontend
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+    app.kubernetes.io/component: frontend
+spec:
+  minAvailable: {{ .Values.frontend.podDisruptionBudget.minAvailable }}
+  selector:
+    matchLabels:
+      {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "frontend") | nindent 6 }}
+{{- end }}
diff --git a/deploy/helm/querywise/templates/frontend-service.yaml b/deploy/helm/querywise/templates/frontend-service.yaml
new file mode 100644
index 0000000..8950199
--- /dev/null
+++ b/deploy/helm/querywise/templates/frontend-service.yaml
@@ -0,0 +1,16 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "querywise.fullname" . }}-frontend
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+    app.kubernetes.io/component: frontend
+spec:
+  type: {{ .Values.frontend.service.type }}
+  ports:
+    - name: http
+      port: {{ .Values.frontend.service.port }}
+      targetPort: http
+      protocol: TCP
+  selector:
+    {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "frontend") | nindent 4 }}
diff --git a/deploy/helm/querywise/templates/ingress.yaml b/deploy/helm/querywise/templates/ingress.yaml
new file mode 100644
index 0000000..9b551f3
--- /dev/null
+++ b/deploy/helm/querywise/templates/ingress.yaml
@@ -0,0 +1,52 @@
+{{- if .Values.ingress.enabled }}
+{{- $fullName := include "querywise.fullname" . -}}
+{{- $backendSvc := printf "%s-backend" $fullName -}}
+{{- $frontendSvc := printf "%s-frontend" $fullName -}}
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: {{ $fullName }}
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+  {{- with .Values.ingress.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+spec:
+  {{- with .Values.ingress.className }}
+  ingressClassName: {{ . }}
+  {{- end }}
+  {{- if .Values.ingress.tls.enabled }}
+  tls:
+    - hosts:
+        - {{ .Values.ingress.host | quote }}
+      secretName: {{ .Values.ingress.tls.secretName }}
+  {{- end }}
+  rules:
+    - host: {{ .Values.ingress.host | quote }}
+      http:
+        paths:
+          # API + MCP go straight to the backend; the SPA calls these same-origin.
+          - path: /api
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ $backendSvc }}
+                port:
+                  number: {{ .Values.backend.service.port }}
+          - path: /mcp
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ $backendSvc }}
+                port:
+                  number: {{ .Values.backend.service.port }}
+          # Everything else is the SPA bundle.
+          - path: /
+            pathType: Prefix
+            backend:
+              service:
+                name: {{ $frontendSvc }}
+                port:
+                  number: {{ .Values.frontend.service.port }}
+{{- end }}
diff --git a/deploy/helm/querywise/templates/migrate-job.yaml b/deploy/helm/querywise/templates/migrate-job.yaml
new file mode 100644
index 0000000..e2e6a03
--- /dev/null
+++ b/deploy/helm/querywise/templates/migrate-job.yaml
@@ -0,0 +1,42 @@
+{{- if .Values.migrate.enabled }}
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: {{ include "querywise.fullname" . }}-migrate
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+    app.kubernetes.io/component: migrate
+  annotations:
+    # Runs after the ConfigMap/Secret hooks (weight -10) and before the main
+    # phase rolls new backend pods — so migrations land before new code serves
+    # and the N backend replicas never race on `alembic upgrade`.
+    "helm.sh/hook": pre-install,pre-upgrade
+    "helm.sh/hook-weight": "-5"
+    "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded
+spec:
+  backoffLimit: {{ .Values.migrate.backoffLimit }}
+  template:
+    metadata:
+      labels:
+        {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "migrate") | nindent 8 }}
+    spec:
+      restartPolicy: Never
+      serviceAccountName: {{ include "querywise.serviceAccountName" . }}
+      {{- with .Values.image.pullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: migrate
+          image: {{ include "querywise.backendImage" . }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          securityContext:
+            {{- toYaml .Values.backend.securityContext | nindent 12 }}
+          command: ["alembic", "upgrade", "head"]
+          envFrom:
+            {{- include "querywise.envFrom" . | nindent 12 }}
+          resources:
+            {{- toYaml .Values.migrate.resources | nindent 12 }}
+{{- end }}
diff --git a/deploy/helm/querywise/templates/secret.yaml b/deploy/helm/querywise/templates/secret.yaml
new file mode 100644
index 0000000..d1a3167
--- /dev/null
+++ b/deploy/helm/querywise/templates/secret.yaml
@@ -0,0 +1,22 @@
+{{- if not .Values.secrets.existingSecret }}
+apiVersion: v1
+kind: Secret
+metadata:
+  name: {{ include "querywise.secretName" . }}
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+  annotations:
+    # Hook-ordered before the migrate Job, like the ConfigMap. For real
+    # deployments prefer `secrets.existingSecret` (external-secrets / sealed-
+    # secrets) so credentials never live in the Helm release.
+    "helm.sh/hook": pre-install,pre-upgrade
+    "helm.sh/hook-weight": "-10"
+    "helm.sh/hook-delete-policy": before-hook-creation
+type: Opaque
+stringData:
+  {{- range $key, $val := .Values.secrets.data }}
+  {{- if $val }}
+  {{ $key }}: {{ $val | quote }}
+  {{- end }}
+  {{- end }}
+{{- end }}
diff --git a/deploy/helm/querywise/templates/serviceaccount.yaml b/deploy/helm/querywise/templates/serviceaccount.yaml
new file mode 100644
index 0000000..246bad3
--- /dev/null
+++ b/deploy/helm/querywise/templates/serviceaccount.yaml
@@ -0,0 +1,12 @@
+{{- if .Values.serviceAccount.create }}
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ include "querywise.serviceAccountName" . }}
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+  {{- with .Values.serviceAccount.annotations }}
+  annotations:
+    {{- toYaml . | nindent 4 }}
+  {{- end }}
+{{- end }}
diff --git a/deploy/helm/querywise/templates/worker-deployment.yaml b/deploy/helm/querywise/templates/worker-deployment.yaml
new file mode 100644
index 0000000..7acd9ed
--- /dev/null
+++ b/deploy/helm/querywise/templates/worker-deployment.yaml
@@ -0,0 +1,49 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "querywise.fullname" . }}-worker
+  labels:
+    {{- include "querywise.labels" . | nindent 4 }}
+    app.kubernetes.io/component: worker
+spec:
+  replicas: {{ .Values.worker.replicaCount }}
+  selector:
+    matchLabels:
+      {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "worker") | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "querywise.componentSelectorLabels" (dict "ctx" . "component" "worker") | nindent 8 }}
+      annotations:
+        checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
+    spec:
+      serviceAccountName: {{ include "querywise.serviceAccountName" . }}
+      {{- with .Values.image.pullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      containers:
+        - name: worker
+          image: {{ include "querywise.backendImage" . }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          securityContext:
+            {{- toYaml .Values.worker.securityContext | nindent 12 }}
+          command: ["arq", "app.jobs.worker.WorkerSettings"]
+          envFrom:
+            {{- include "querywise.envFrom" . | nindent 12 }}
+          resources:
+            {{- toYaml .Values.worker.resources | nindent 12 }}
+      {{- with .Values.worker.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.worker.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.worker.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
diff --git a/deploy/helm/querywise/values-production.example.yaml b/deploy/helm/querywise/values-production.example.yaml
new file mode 100644
index 0000000..7e23a9e
--- /dev/null
+++ b/deploy/helm/querywise/values-production.example.yaml
@@ -0,0 +1,61 @@
+# Example production overrides for the QueryWise chart.
+#   helm upgrade --install querywise deploy/helm/querywise \
+#     -n querywise --create-namespace \
+#     -f deploy/helm/querywise/values-production.example.yaml
+#
+# Prefer secrets.existingSecret (external-secrets / sealed-secrets) over inline
+# secret values for anything real.
+
+image:
+  backend:
+    repository: ghcr.io/your-org/querywise-backend
+    tag: "1.0.0"
+  frontend:
+    repository: ghcr.io/your-org/querywise-frontend
+    tag: "1.0.0"
+
+backend:
+  uvicornWorkers: 4
+  autoscaling:
+    enabled: true
+    minReplicas: 3
+    maxReplicas: 12
+    targetCPUUtilizationPercentage: 70
+
+worker:
+  replicaCount: 2
+
+frontend:
+  replicaCount: 3
+
+# IRSA / Workload Identity so pods can reach a managed secrets backend.
+serviceAccount:
+  annotations:
+    # eks.amazonaws.com/role-arn: arn:aws:iam::123456789012:role/querywise
+    # iam.gke.io/gcp-service-account: querywise@PROJECT.iam.gserviceaccount.com
+
+ingress:
+  enabled: true
+  className: nginx
+  host: querywise.yourcompany.com
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-prod
+    # Streamable-HTTP MCP needs buffering off + long read timeout at the edge:
+    nginx.ingress.kubernetes.io/proxy-buffering: "off"
+    nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
+  tls:
+    enabled: true
+    secretName: querywise-tls
+
+config:
+  CORS_ORIGINS: '["https://querywise.yourcompany.com"]'
+  DEFAULT_ADMIN_EMAIL: admin@yourcompany.com
+  DEFAULT_LLM_PROVIDER: openai
+  DEFAULT_LLM_MODEL: gpt-5.2
+  EMBEDDING_DIMENSION: "1536"
+
+# Real deployments: sync this Secret with external-secrets and reference it.
+# It must carry DATABASE_URL, REDIS_URL, ENCRYPTION_KEY, JWT_SECRET, and your
+# LLM provider key(s).
+secrets:
+  existingSecret: querywise-secrets
diff --git a/deploy/helm/querywise/values-staging.example.yaml b/deploy/helm/querywise/values-staging.example.yaml
new file mode 100644
index 0000000..379b4cb
--- /dev/null
+++ b/deploy/helm/querywise/values-staging.example.yaml
@@ -0,0 +1,36 @@
+# Example STAGING overlay. Copy to values-staging.yaml (committed, no secrets —
+# those come via external-secrets) and the Release workflow's staging deploy
+# picks it up automatically (.github/actions/helm-deploy resolves
+# values-<environment>.yaml). Image repo/tag are injected by CI via --set.
+
+backend:
+  replicaCount: 1
+  uvicornWorkers: 2
+  autoscaling:
+    enabled: false
+
+worker:
+  replicaCount: 1
+
+frontend:
+  replicaCount: 1
+
+ingress:
+  enabled: true
+  className: nginx
+  host: querywise.staging.yourcompany.com
+  annotations:
+    cert-manager.io/cluster-issuer: letsencrypt-staging
+    nginx.ingress.kubernetes.io/proxy-buffering: "off"
+    nginx.ingress.kubernetes.io/proxy-read-timeout: "3600"
+  tls:
+    enabled: true
+    secretName: querywise-staging-tls
+
+config:
+  ENVIRONMENT: staging
+  CORS_ORIGINS: '["https://querywise.staging.yourcompany.com"]'
+
+# Synced from the cloud secret store by external-secrets.
+secrets:
+  existingSecret: querywise-secrets
diff --git a/deploy/helm/querywise/values.yaml b/deploy/helm/querywise/values.yaml
new file mode 100644
index 0000000..35282ca
--- /dev/null
+++ b/deploy/helm/querywise/values.yaml
@@ -0,0 +1,204 @@
+# Default values for the QueryWise chart.
+# Override per-environment with `-f my-values.yaml` or `--set`.
+
+# -- Naming -------------------------------------------------------------------
+nameOverride: ""
+fullnameOverride: ""
+
+# -- Images -------------------------------------------------------------------
+# Built from backend/Dockerfile.prod and frontend/Dockerfile.prod, pushed to
+# your registry by CI. Override repository/tag per environment.
+image:
+  pullPolicy: IfNotPresent
+  pullSecrets: []
+  backend:
+    repository: ghcr.io/kosminus/querywise-backend
+    tag: "" # defaults to .Chart.AppVersion
+  frontend:
+    repository: ghcr.io/kosminus/querywise-frontend
+    tag: "" # defaults to .Chart.AppVersion
+
+# -- Backend (uvicorn API) ----------------------------------------------------
+backend:
+  replicaCount: 2
+  uvicornWorkers: 4
+  containerPort: 8000
+  service:
+    type: ClusterIP
+    port: 8000
+  resources:
+    requests:
+      cpu: 250m
+      memory: 512Mi
+    limits:
+      memory: 1Gi
+  autoscaling:
+    enabled: true
+    minReplicas: 2
+    maxReplicas: 8
+    targetCPUUtilizationPercentage: 70
+  podDisruptionBudget:
+    enabled: true
+    minAvailable: 1
+  # runAsNonRoot — image runs as uid 1001.
+  securityContext:
+    runAsNonRoot: true
+    runAsUser: 1001
+    allowPrivilegeEscalation: false
+    capabilities:
+      drop: ["ALL"]
+    seccompProfile:
+      type: RuntimeDefault
+  nodeSelector: {}
+  tolerations: []
+  affinity: {}
+
+# -- Worker (arq background jobs) ---------------------------------------------
+worker:
+  replicaCount: 1
+  resources:
+    requests:
+      cpu: 250m
+      memory: 512Mi
+    limits:
+      memory: 1Gi
+  securityContext:
+    runAsNonRoot: true
+    runAsUser: 1001
+    allowPrivilegeEscalation: false
+    capabilities:
+      drop: ["ALL"]
+    seccompProfile:
+      type: RuntimeDefault
+  nodeSelector: {}
+  tolerations: []
+  affinity: {}
+
+# -- Frontend (nginx edge — serves the SPA) -----------------------------------
+frontend:
+  replicaCount: 2
+  containerPort: 8080
+  service:
+    type: ClusterIP
+    port: 80
+  resources:
+    requests:
+      cpu: 50m
+      memory: 64Mi
+    limits:
+      memory: 128Mi
+  podDisruptionBudget:
+    enabled: true
+    minAvailable: 1
+  # nginx-unprivileged runs as uid 101.
+  securityContext:
+    runAsNonRoot: true
+    runAsUser: 101
+    allowPrivilegeEscalation: false
+    capabilities:
+      drop: ["ALL"]
+    seccompProfile:
+      type: RuntimeDefault
+  nodeSelector: {}
+  tolerations: []
+  affinity: {}
+
+# -- Schema migration ---------------------------------------------------------
+# Runs `alembic upgrade head` as a pre-install/pre-upgrade Helm hook Job, so
+# migrations complete before new backend pods roll and replicas never race.
+migrate:
+  enabled: true
+  backoffLimit: 2
+  resources:
+    requests:
+      cpu: 100m
+      memory: 256Mi
+    limits:
+      memory: 512Mi
+
+# -- Service account ----------------------------------------------------------
+# Annotate for IRSA (EKS) / Workload Identity (GKE) / Azure Workload Identity so
+# pods can reach a managed secrets backend or the external-secrets operator.
+serviceAccount:
+  create: true
+  name: ""
+  annotations: {}
+
+# -- Ingress ------------------------------------------------------------------
+# Path-based routing: /api + /mcp -> backend, everything else -> frontend SPA.
+# The SPA is built same-origin so this is all it needs.
+ingress:
+  enabled: true
+  className: ""
+  annotations: {}
+  host: querywise.example.com
+  tls:
+    enabled: false
+    secretName: querywise-tls
+
+# -- Non-secret configuration (rendered into a ConfigMap, injected as env) -----
+config:
+  ENVIRONMENT: production
+  DEBUG: "false"
+  LOG_LEVEL: INFO
+  LOG_FORMAT: json
+  ENABLE_METRICS: "true"
+  SERVICE_NAME: querywise-backend
+  OTEL_ENABLED: "false"
+  # OTEL_EXPORTER_OTLP_ENDPOINT: http://otel-collector:4318/v1/traces
+
+  # Jobs: arq (Redis) is required for the multi-pod worker topology.
+  JOB_BACKEND: arq
+
+  # Auth (Phase 1) — never disable in prod.
+  DISABLE_AUTH: "false"
+  AUTH_PROVIDER: local
+  AUTH_COOKIE_SECURE: "true"
+  AUTH_COOKIE_SAMESITE: lax
+  DEFAULT_ORG_SLUG: default
+  DEFAULT_ADMIN_EMAIL: admin@querywise.local
+
+  # No sample-DB seeding in prod.
+  AUTO_SETUP_SAMPLE_DB: "false"
+
+  # CORS — same-origin needs no entries; add cross-origin frontends here.
+  CORS_ORIGINS: '["https://querywise.example.com"]'
+
+  # LLM (OpenAI default — switch provider here; keys go in `secrets`).
+  DEFAULT_LLM_PROVIDER: openai
+  DEFAULT_LLM_MODEL: gpt-5.2
+  EMBEDDING_MODEL: text-embedding-3-small
+  EMBEDDING_DIMENSION: "1536"
+
+  # Query defaults + rate limiting.
+  DEFAULT_QUERY_TIMEOUT_SECONDS: "30"
+  DEFAULT_MAX_ROWS: "1000"
+  MAX_RETRY_ATTEMPTS: "3"
+  MAX_QUERIES_PER_MINUTE: "30"
+  RATE_LIMIT_ENABLED: "true"
+
+# -- Secrets ------------------------------------------------------------------
+# Either:
+#  (a) point at a Secret you manage out-of-band (external-secrets operator,
+#      sealed-secrets, cloud sync) via `existingSecret`, OR
+#  (b) let the chart create one from `data` below (fine for bootstrap, but the
+#      values land in your release — prefer (a) for real deployments).
+secrets:
+  existingSecret: ""
+  data:
+    # REQUIRED — managed Postgres (pgvector) and Redis DSNs.
+    DATABASE_URL: ""
+    REDIS_URL: ""
+    # REQUIRED — generate before install (see .env.prod.example).
+    ENCRYPTION_KEY: ""
+    JWT_SECRET: ""
+    # Optional bootstrap admin password.
+    DEFAULT_ADMIN_PASSWORD: ""
+    # LLM provider keys (set the ones your provider needs).
+    OPENAI_API_KEY: ""
+    # ANTHROPIC_API_KEY: ""
+    # AZURE_OPENAI_API_KEY: ""
+
+# -- Pod-level security context (shared) --------------------------------------
+podSecurityContext:
+  fsGroup: 1001
diff --git a/deploy/ops/RUNBOOK.md b/deploy/ops/RUNBOOK.md
new file mode 100644
index 0000000..f5e8806
--- /dev/null
+++ b/deploy/ops/RUNBOOK.md
@@ -0,0 +1,162 @@
+# QueryWise — Operations & DR Runbook
+
+Operational procedures for a production QueryWise deployment: backups, restore /
+disaster recovery, schema upgrades, and credential rotation. Pairs with the
+deploy artifacts in [`../`](../) (Helm chart, Terraform, CI/CD).
+
+Two stateful systems hold everything that matters:
+
+| System | Holds | Recovery source |
+|--------|-------|-----------------|
+| **App Postgres (pgvector)** | metadata, semantic layer, saved queries, dashboards, users, history, embeddings | logical backups (below) + managed PITR |
+| **Cloud secret store** | `ENCRYPTION_KEY`, `JWT_SECRET`, DSNs, LLM keys | your IaC / secret manager |
+
+Redis is a **cache + transient job queue** — it is not a source of truth and
+needs no backup (embeddings regenerate; the cache repopulates).
+
+---
+
+## 1. Backups
+
+The managed databases provisioned by the Terraform modules already have
+**automated snapshots + PITR** (RDS backup retention, Cloud SQL PITR, Azure
+flexible-server backups). Logical backups via `backup.sh` are the second layer —
+portable, offsite-able, and restorable to any Postgres.
+
+**What's covered:** the entire app database (schema + data, including pgvector
+columns) in `pg_dump` custom format, AES-256 encrypted.
+
+### Run a one-off backup
+
+```bash
+export DATABASE_URL='postgresql://querywise:…@db-host:5432/querywise'
+export BACKUP_PASSPHRASE='…'         # from your secret store
+./backup.sh                          # -> ./backups/querywise-<ts>.dump.enc
+# Offsite: also set BACKUP_S3_URI=s3://… or BACKUP_GCS_URI=gs://…
+```
+
+From a cluster without DB network exposure, exec through a pod:
+
+```bash
+kubectl -n querywise exec deploy/querywise-backend -- \
+  sh -c 'DATABASE_URL="$DATABASE_URL" BACKUP_PASSPHRASE="$BACKUP_PASSPHRASE" ...'
+# or apply the scheduled CronJob — see backup-cronjob.example.yaml
+```
+
+### Scheduled backups
+
+Apply [`backup-cronjob.example.yaml`](backup-cronjob.example.yaml) for nightly
+encrypted dumps to a PVC (or offsite). **Verify restores quarterly** — an
+untested backup is not a backup (see §2.3).
+
+---
+
+## 2. Restore / Disaster Recovery
+
+**Targets:** RPO ≈ last backup / PITR window (minutes with managed PITR); RTO ≈
+time to provision a DB + restore (tens of minutes).
+
+### 2.1 Data loss / corruption (DB intact)
+
+Prefer the managed DB's **point-in-time recovery** — restore to a timestamp just
+before the bad change (RDS/Cloud SQL/Azure console or Terraform). This avoids
+losing everything since the last logical dump.
+
+### 2.2 Restore from a logical backup
+
+```bash
+export DATABASE_URL='postgresql://querywise:…@new-db-host:5432/querywise'
+export BACKUP_PASSPHRASE='…'
+RESTORE_CONFIRM=yes ./restore.sh ./backups/querywise-<ts>.dump.enc
+```
+
+Then make the schema current (the dump may predate a migration):
+
+```bash
+kubectl -n querywise create job --from=cronjob/none qw-migrate || true   # or:
+kubectl -n querywise exec deploy/querywise-backend -- alembic upgrade head
+# Simplest: re-run `helm upgrade` — the pre-upgrade hook runs the migration.
+```
+
+### 2.3 Full region/cluster loss (clean-room rebuild)
+
+1. **Infra:** `terraform apply` the relevant `deploy/terraform/<cloud>` module in
+   the recovery region → new Postgres, Redis, secret store, networking.
+2. **Secrets:** restore the cloud secret values (or re-generate — but **keep the
+   original `ENCRYPTION_KEY`**, see §4, or stored connection strings become
+   undecryptable).
+3. **Data:** `restore.sh` the latest backup into the new Postgres.
+4. **App:** point kubeconfig at the recovery cluster, install external-secrets,
+   `helm upgrade --install` the chart. The migrate hook reconciles the schema.
+5. **DNS/TLS:** repoint the hostname to the new ingress; re-issue certs.
+6. **Verify:** `GET /api/v1/health/ready` is 200; run a known query; confirm a
+   saved query + dashboard render.
+
+---
+
+## 3. Schema upgrades (Alembic)
+
+Migrations live in `backend/alembic/versions`. The normal path is automatic:
+
+- **Helm:** every `helm upgrade` runs `alembic upgrade head` as a
+  `pre-install`/`pre-upgrade` hook Job **before** new backend pods roll, so code
+  and schema move together and replicas never race (the migrate hook is the only
+  place migrations run).
+- **Compose:** the `migrate` service runs once before backend/worker start.
+
+**Manual** (rarely needed):
+
+```bash
+kubectl -n querywise exec deploy/querywise-backend -- alembic current
+kubectl -n querywise exec deploy/querywise-backend -- alembic upgrade head
+```
+
+**Rollback:** Alembic `downgrade` exists but data-dropping migrations are not
+safely reversible — prefer rolling **forward** with a fix migration, or restore
+from backup (§2). Always take a backup before a major upgrade.
+
+---
+
+## 4. Credential rotation (quarterly)
+
+Rotate on a quarterly cadence (and immediately on suspected compromise). All
+secrets live in the cloud secret store; external-secrets syncs them into the
+`querywise-secrets` Kubernetes Secret, then restart pods to pick up changes:
+
+```bash
+kubectl -n querywise rollout restart deploy/querywise-backend deploy/querywise-worker
+```
+
+| Secret | Procedure | Blast radius |
+|--------|-----------|--------------|
+| **DB password** | Change the master password on the managed DB (cloud/Terraform), update `DATABASE_URL` in the secret store, restart pods. | Brief; pods reconnect. |
+| **`JWT_SECRET`** | New random value in the secret store, restart pods. | All sessions invalidated + pending magic links — users re-login. |
+| **LLM API keys** | Rotate at the provider, update the secret, restart pods. | None if overlapping validity. |
+| **User API keys** | Per-user via `/api-keys` (only the SHA-256 hash is stored; plaintext shown once). | Per key. |
+| **`ENCRYPTION_KEY`** | ⚠️ **Do not blind-rotate.** This Fernet key encrypts stored DB-connection strings; a new key cannot decrypt existing ones. To rotate: decrypt each connection with the old key and re-save with the new one (or re-enter connection credentials in the UI), *then* swap the key. Keep the old key available until every connection is re-encrypted. | Connections become unusable until re-encrypted. |
+
+> Prefer cloud-managed rotation where available (e.g. Secrets Manager rotation
+> for the DB password) so rotation is automatic and audited.
+
+---
+
+## 5. Quick reference
+
+```bash
+# Health
+kubectl -n querywise get pods
+curl -fsS https://<host>/api/v1/health/ready
+
+# Logs (JSON in prod — pipe to jq)
+kubectl -n querywise logs deploy/querywise-backend --tail=200
+
+# Roll back a bad release (Helm keeps history)
+helm -n querywise history querywise
+helm -n querywise rollback querywise <REVISION>
+
+# Scale
+kubectl -n querywise scale deploy/querywise-backend --replicas=4   # if HPA disabled
+```
+
+See [`config-reference.md`](config-reference.md) for every tunable and which
+ones must change for production.
diff --git a/deploy/ops/backup-cronjob.example.yaml b/deploy/ops/backup-cronjob.example.yaml
new file mode 100644
index 0000000..528db51
--- /dev/null
+++ b/deploy/ops/backup-cronjob.example.yaml
@@ -0,0 +1,78 @@
+# Optional: scheduled encrypted backups in-cluster. Runs deploy/ops/backup.sh
+# nightly against the same Secret the Helm release uses.
+#
+# Prereqs:
+#   1. The script as a ConfigMap (keeps it in sync with deploy/ops/backup.sh):
+#        kubectl -n querywise create configmap querywise-backup-script \
+#          --from-file=backup.sh=deploy/ops/backup.sh
+#   2. A BACKUP_PASSPHRASE key in the querywise-secrets Secret (add it to your
+#      cloud secret store so external-secrets syncs it alongside DATABASE_URL).
+#   3. A StorageClass for the PVC (or swap the PVC for an emptyDir + offsite
+#      upload by setting BACKUP_S3_URI/BACKUP_GCS_URI and using an image that
+#      bundles the matching cloud CLI).
+#
+# The postgres:16 image provides pg_dump + openssl. Apply with:
+#   kubectl -n querywise apply -f backup-cronjob.example.yaml
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: querywise-backups
+  namespace: querywise
+spec:
+  accessModes: ["ReadWriteOnce"]
+  resources:
+    requests:
+      storage: 20Gi
+---
+apiVersion: batch/v1
+kind: CronJob
+metadata:
+  name: querywise-backup
+  namespace: querywise
+spec:
+  schedule: "0 3 * * *" # 03:00 UTC daily
+  concurrencyPolicy: Forbid
+  successfulJobsHistoryLimit: 3
+  failedJobsHistoryLimit: 3
+  jobTemplate:
+    spec:
+      backoffLimit: 2
+      template:
+        spec:
+          restartPolicy: Never
+          securityContext:
+            runAsNonRoot: true
+            runAsUser: 999
+            fsGroup: 999
+          containers:
+            - name: backup
+              image: postgres:16
+              command: ["bash", "/scripts/backup.sh"]
+              env:
+                - name: DATABASE_URL
+                  valueFrom:
+                    secretKeyRef:
+                      name: querywise-secrets
+                      key: DATABASE_URL
+                - name: BACKUP_PASSPHRASE
+                  valueFrom:
+                    secretKeyRef:
+                      name: querywise-secrets
+                      key: BACKUP_PASSPHRASE
+                - name: BACKUP_DIR
+                  value: /backups
+                - name: BACKUP_RETENTION_DAYS
+                  value: "14"
+              volumeMounts:
+                - name: script
+                  mountPath: /scripts
+                - name: backups
+                  mountPath: /backups
+          volumes:
+            - name: script
+              configMap:
+                name: querywise-backup-script
+            - name: backups
+              persistentVolumeClaim:
+                claimName: querywise-backups
diff --git a/deploy/ops/backup.sh b/deploy/ops/backup.sh
new file mode 100755
index 0000000..545614b
--- /dev/null
+++ b/deploy/ops/backup.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+#
+# Encrypted logical backup of the QueryWise app database.
+#
+#   pg_dump (custom format)  ->  AES-256 (openssl, PBKDF2)  ->  <dir>/querywise-<ts>.dump.enc
+#
+# Runs anywhere with a postgres client + openssl and reachability to the DB:
+# a cron host, a CI job, or `kubectl exec` into a pod. Decrypt/restore with
+# restore.sh using the same passphrase.
+#
+# Required env:
+#   DATABASE_URL          postgresql+asyncpg://… or postgresql://… (driver suffix stripped)
+#   BACKUP_PASSPHRASE     symmetric key for encryption — store in your secret manager
+# Optional env:
+#   BACKUP_DIR            output directory          (default ./backups)
+#   BACKUP_RETENTION_DAYS prune local dumps older than N days (default 14; 0 = keep all)
+#   BACKUP_S3_URI         s3://bucket/prefix    -> uploaded with `aws`
+#   BACKUP_GCS_URI        gs://bucket/prefix    -> uploaded with `gcloud storage`
+set -euo pipefail
+
+: "${DATABASE_URL:?set DATABASE_URL}"
+: "${BACKUP_PASSPHRASE:?set BACKUP_PASSPHRASE}"
+BACKUP_DIR="${BACKUP_DIR:-./backups}"
+RETENTION="${BACKUP_RETENTION_DAYS:-14}"
+
+command -v pg_dump >/dev/null || { echo "ERROR: pg_dump not found (install the postgresql client)" >&2; exit 1; }
+command -v openssl >/dev/null || { echo "ERROR: openssl not found" >&2; exit 1; }
+
+# pg_dump speaks plain postgresql:// — drop the SQLAlchemy +asyncpg driver suffix.
+PG_URL="${DATABASE_URL/+asyncpg/}"
+
+mkdir -p "$BACKUP_DIR"
+TS="$(date -u +%Y%m%dT%H%M%SZ)"
+OUT="$BACKUP_DIR/querywise-${TS}.dump.enc"
+
+echo "Backing up database -> ${OUT}"
+pg_dump --format=custom --no-owner --no-privileges "$PG_URL" \
+  | openssl enc -aes-256-cbc -pbkdf2 -salt -pass env:BACKUP_PASSPHRASE \
+  > "$OUT"
+
+echo "Wrote $(du -h "$OUT" | cut -f1) encrypted backup."
+
+# Optional offsite upload.
+if [[ -n "${BACKUP_S3_URI:-}" ]]; then
+  echo "Uploading to ${BACKUP_S3_URI%/}/$(basename "$OUT")"
+  aws s3 cp "$OUT" "${BACKUP_S3_URI%/}/$(basename "$OUT")"
+fi
+if [[ -n "${BACKUP_GCS_URI:-}" ]]; then
+  echo "Uploading to ${BACKUP_GCS_URI%/}/$(basename "$OUT")"
+  gcloud storage cp "$OUT" "${BACKUP_GCS_URI%/}/$(basename "$OUT")"
+fi
+
+# Prune old local backups.
+if [[ "$RETENTION" -gt 0 ]]; then
+  find "$BACKUP_DIR" -name 'querywise-*.dump.enc' -type f -mtime +"$RETENTION" -print -delete
+fi
+
+echo "Done."
diff --git a/deploy/ops/config-reference.md b/deploy/ops/config-reference.md
new file mode 100644
index 0000000..cbe2aff
--- /dev/null
+++ b/deploy/ops/config-reference.md
@@ -0,0 +1,66 @@
+# QueryWise — Production Config Reference
+
+Every backend setting is an environment variable. The **full catalogue with
+defaults** lives in [`../../.env.example`](../../.env.example) and the project
+`CLAUDE.md`; this page is the **production-focused** view — what to change, where
+to set it, and what's a secret.
+
+## Where settings come from
+
+| Layer | Carries | Source of truth |
+|-------|---------|-----------------|
+| **Compose (prod)** | everything | `.env.prod` (from `.env.prod.example`) |
+| **Helm — non-secret** | tunables, feature flags | `config:` map → ConfigMap (`values.yaml`) |
+| **Helm — secret** | keys, DSNs | `querywise-secrets` Secret via `secrets.existingSecret` |
+| **Terraform** | DSNs + keys assembled into the cloud secret store | `*.tfvars` |
+
+Secrets must **never** sit in `values.yaml` or a committed overlay — they flow
+cloud secret store → external-secrets → `querywise-secrets`.
+
+## Must-set for production
+
+| Setting | Why | Notes |
+|---------|-----|-------|
+| `DATABASE_URL` | app database | secret; managed pgvector Postgres |
+| `REDIS_URL` | cache + arq queue | secret; `JOB_BACKEND=arq` in prod |
+| `ENCRYPTION_KEY` | encrypts stored connection strings | **secret; never rotate blind** (see RUNBOOK §4) |
+| `JWT_SECRET` | session/magic-link signing | secret; rotating logs everyone out |
+| `DISABLE_AUTH=false` | enforce login | **never `true` in prod** |
+| `AUTH_COOKIE_SECURE=true` | HTTPS-only session cookie | TLS terminates at the edge/ingress |
+| `CORS_ORIGINS` | allowed browser origins | JSON list; same-origin needs none |
+| `AUTO_SETUP_SAMPLE_DB=false` | no IFRS-9 seed in prod | point at real warehouses |
+| LLM provider + key | SQL generation + embeddings | `DEFAULT_LLM_PROVIDER` + the matching `*_API_KEY` (secret) |
+| `EMBEDDING_DIMENSION` | vector column size | 1536 (OpenAI/Anthropic) / 768 (Ollama nomic) — must match the model |
+
+## Operational tunables (non-secret)
+
+| Setting | Default | Effect |
+|---------|---------|--------|
+| `UVICORN_WORKERS` | 4 | uvicorn processes per backend pod/container |
+| `LOG_FORMAT` | `json` (prod) | structured logs for aggregation |
+| `LOG_LEVEL` | `INFO` | verbosity |
+| `ENABLE_METRICS` | `true` | Prometheus at `GET /metrics` |
+| `OTEL_ENABLED` / `OTEL_EXPORTER_OTLP_ENDPOINT` | `false` / — | tracing to Jaeger/Tempo/Collector |
+| `RATE_LIMIT_ENABLED` / `MAX_QUERIES_PER_MINUTE` | `true` / 30 | `/query` throttle |
+| `DEFAULT_QUERY_TIMEOUT_SECONDS` / `DEFAULT_MAX_ROWS` | 30 / 1000 | query guardrails |
+| `SECRETS_BACKEND` | `env` | `aws`/`gcp`/`azure`/`vault` for managed connection-string encryption |
+
+## Scaling knobs (Helm `values.yaml`)
+
+| Value | Purpose |
+|-------|---------|
+| `backend.autoscaling.{enabled,min,max,targetCPU}` | HPA on the API |
+| `backend.replicaCount` | fixed replicas when HPA off |
+| `worker.replicaCount` | arq worker concurrency (separate pods) |
+| `frontend.replicaCount` | edge replicas |
+| `*.podDisruptionBudget` | availability during node drains |
+| `ingress.{host,className,annotations,tls}` | routing + TLS |
+| `image.{backend,frontend}.{repository,tag}` | which images (CI injects `tag`) |
+
+## Cross-checks
+
+- `EMBEDDING_DIMENSION` must match the embedding model, or startup resizes the
+  vector columns and nulls embeddings (they regenerate in the background).
+- `JOB_BACKEND=arq` ⇒ a running `worker` and a reachable `REDIS_URL`.
+- `AUTH_COOKIE_SECURE=true` ⇒ the app is served over HTTPS (else the cookie is
+  dropped and login silently fails).
diff --git a/deploy/ops/restore.sh b/deploy/ops/restore.sh
new file mode 100755
index 0000000..87acda4
--- /dev/null
+++ b/deploy/ops/restore.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+#
+# Restore an encrypted backup produced by backup.sh.
+#
+#   <file>.dump.enc  ->  AES-256 decrypt  ->  pg_restore --clean --if-exists
+#
+# DESTRUCTIVE: drops and recreates the objects in the target database. Guarded
+# behind RESTORE_CONFIRM=yes. After restoring, run the app's migrations to make
+# sure the schema matches the running code (`alembic upgrade head`, or just
+# redeploy — the Helm migrate hook does it).
+#
+# Required env:
+#   DATABASE_URL        target database (postgresql+asyncpg://… or postgresql://…)
+#   BACKUP_PASSPHRASE   the passphrase the backup was encrypted with
+#   RESTORE_CONFIRM=yes acknowledge that this overwrites the target
+# Usage:
+#   RESTORE_CONFIRM=yes ./restore.sh ./backups/querywise-20260608T030000Z.dump.enc
+set -euo pipefail
+
+FILE="${1:-}"
+: "${DATABASE_URL:?set DATABASE_URL}"
+: "${BACKUP_PASSPHRASE:?set BACKUP_PASSPHRASE}"
+[[ -n "$FILE" ]] || { echo "usage: restore.sh <backup-file.dump.enc>" >&2; exit 2; }
+[[ -f "$FILE" ]] || { echo "ERROR: no such file: $FILE" >&2; exit 2; }
+
+if [[ "${RESTORE_CONFIRM:-}" != "yes" ]]; then
+  echo "This will OVERWRITE the database at the configured DATABASE_URL." >&2
+  echo "Re-run with RESTORE_CONFIRM=yes to proceed." >&2
+  exit 1
+fi
+
+command -v pg_restore >/dev/null || { echo "ERROR: pg_restore not found (install the postgresql client)" >&2; exit 1; }
+command -v openssl >/dev/null || { echo "ERROR: openssl not found" >&2; exit 1; }
+
+PG_URL="${DATABASE_URL/+asyncpg/}"
+
+echo "Restoring ${FILE} -> database ..."
+openssl enc -d -aes-256-cbc -pbkdf2 -pass env:BACKUP_PASSPHRASE -in "$FILE" \
+  | pg_restore --clean --if-exists --no-owner --no-privileges --dbname "$PG_URL"
+
+echo "Restore complete."
+echo "Next: ensure the schema is current — 'alembic upgrade head' or redeploy."
diff --git a/deploy/terraform/.gitignore b/deploy/terraform/.gitignore
new file mode 100644
index 0000000..52479d2
--- /dev/null
+++ b/deploy/terraform/.gitignore
@@ -0,0 +1,17 @@
+# Local provider plugins / cache
+**/.terraform/*
+
+# State (use a remote, encrypted backend — never commit state)
+*.tfstate
+*.tfstate.*
+crash.log
+
+# Variable files with secrets
+*.tfvars
+!*.tfvars.example
+
+# Plan outputs
+*.tfplan
+
+# Keep the provider lockfile committed for reproducible versions:
+!**/.terraform.lock.hcl
diff --git a/deploy/terraform/aws/.terraform.lock.hcl b/deploy/terraform/aws/.terraform.lock.hcl
new file mode 100644
index 0000000..9e6c643
--- /dev/null
+++ b/deploy/terraform/aws/.terraform.lock.hcl
@@ -0,0 +1,66 @@
+# This file is maintained automatically by "tofu init".
+# Manual edits may be lost in future updates.
+
+provider "registry.opentofu.org/hashicorp/aws" {
+  version     = "5.100.0"
+  constraints = "~> 5.40"
+  hashes = [
+    "h1:7/GgVlN+KplSVCuc8qb4ct2R7gotYooPNRd0cnj9GxE=",
+    "h1:BrNG7eFOdRrRRbHdvrTjMJ8X8Oh/tiegURiKf7J2db8=",
+    "h1:C6eM6fGJVktK2M5vH3Yhv5NnqmegcBDY0EuDHhiXoVY=",
+    "h1:C7yD4Be2zhVdjnilsKPfucYAYMG5UCJYuUSoY6FCtGQ=",
+    "h1:H8CH2vfXXP/WQgJw+Qrn72umKs9UlGYQvn+QdnwO8Nc=",
+    "h1:J7L5bgyYNRAbtwAFJl2Lj+IMI2DJTrbbL33PTK4OWVY=",
+    "h1:JJ+EJQ+sIN3XRmNmrSUnUQtR8i3P22z+AbtAf8O/cRE=",
+    "h1:Wm5Ofhc15lX1OMMCt7iDV0NY5FDIouQDjX7I1iab55s=",
+    "h1:crKvBCgX6RlMcE6Ewm8o8YVuIg6mkXqKNgt/kSFYTvQ=",
+    "h1:zef23ac/YWw9O2FepFWRs+my9iWWUkniL4dT4LnCKjU=",
+    "zh:1a41f3ee26720fee7a9a0a361890632a1701b5dc1cf5355dc651ddbe115682ff",
+    "zh:30457f36690c19307921885cc5e72b9dbeba369445815903acd5c39ac0e41e7a",
+    "zh:42c22674d5f23f6309eaf3ac3a4f1f8b66b566c1efe1dcb0dd2fb30c17ce1f78",
+    "zh:4cc271c795ff8ce6479ec2d11a8ba65a0a9ed6331def6693f4b9dccb6e662838",
+    "zh:60932aa376bb8c87cd1971240063d9d38ba6a55502c867fdbb9f5361dc93d003",
+    "zh:864e42784bde77b18393ebfcc0104cea9123da5f4392e8a059789e296952eefa",
+    "zh:9750423138bb01ecaa5cec1a6691664f7783d301fb1628d3b64a231b6b564e0e",
+    "zh:e5d30c4dec271ef9d6fe09f48237ec6cfea1036848f835b4e47f274b48bda5a7",
+    "zh:e62bd314ae97b43d782e0841b13e68a3f8ec85cc762004f973ce5ce7b6cdbfd0",
+    "zh:ea851a3c072528a4445ac6236ba2ce58ffc99ec466019b0bd0e4adde63a248e4",
+  ]
+}
+
+provider "registry.opentofu.org/hashicorp/random" {
+  version     = "3.9.0"
+  constraints = "~> 3.5"
+  hashes = [
+    "h1:8EQU5KSxezcjo/phRSe69rDOI0lk4pSaggj7FsskYp8=",
+    "h1:Lw9im2VBBJQ3RyAbHPQ0rcvcmmcZWm3x+kIOpN+Tv9s=",
+    "h1:U8KXqGCoNI9/guYbTvzgdtVk3fRthoG0UXwm1JoEpIs=",
+    "h1:YXaVd4p6qXPPVaxIBaIDNXmBwT02ZqDn0qD+tYpw8sA=",
+    "h1:cOpc03fphEt/G9Rfc4jLL/fW0D7tgvlXqiDKPF4vuww=",
+    "h1:g09RR7T1xWkeGrZwWvWMT9ncJrFGr1k3CBD585UmO7w=",
+    "h1:gGDdPPibmw2EWROx+sh1RGLjR5+nPwZyrf6/N9jXfeM=",
+    "h1:haE7/nXCOhXKP4oXeEnER3t5CaVQWqujz4nBnpeTUv4=",
+    "h1:ieSVpfZS2lKuMr05ph0QsOVpCzg7uk3cgKBaXR+Ikug=",
+    "h1:ig2s1IS9IzehorRjvVAnKIsUUj8fkgyxct1L/kswcc4=",
+    "h1:j3lS+ZEERFnoab8t1ppDrScGVP/cgWbzlCrEYKTCXYw=",
+    "h1:lxezrKmOiQIySHAM+os8qLVq7hqufDr8h3Hpzvsk+78=",
+    "h1:lzRqBJAG+NETxHbEZUJ/YP3RMEjZBinTX7VmgH3lw60=",
+    "h1:tdSNWK5ApqUsgbdYieyeYLTu6nIZUV3hR1oFqUfAuGo=",
+    "h1:xedet8yH/zI2CfdxsGlK0nlFWc/Bp61yrWsEa3fHB8g=",
+    "zh:03f1114cc20b8913523735ab76e0f0a2b16ce13c92923a53304bf85f07fc0dbc",
+    "zh:105b678ee72322a3067f105d7e05e940f6143238f377f6e87ff4ec909246ac2a",
+    "zh:55f3bbf13ea18cbace61a706566a80f25f33fe2b1780b6f3d7b582af2a05b6d2",
+    "zh:63adf996db48f082f7a6351eb485e219cd88795fc71e6ec60a837263ab0d2cb1",
+    "zh:7e99550738a4e3cc68b8a467714b0d69371025fe95e3326d5323d026d55653e9",
+    "zh:8342b54af3a18a37e075eeae61be57f4de2ba71b35d95c5075d402dd2c1f289d",
+    "zh:83ee18e32ac9dd5fc91298554b7c4cfa4c3a1db50f4c797945637cc93c0844ae",
+    "zh:993ecc0adbf6bd535a59fbc9b735d8c33950e6f6eb5e621d750da9b71d65d80a",
+    "zh:ad722bc59d4edbf1415e827fc007c0efe6e0e9462d5568bae20b34be1058a261",
+    "zh:ae9448e1f87b2f9a6c5197a0e9862162ec6b137cb3a3835e11522995d8939e7c",
+    "zh:bc9cdd3aac784f759125c6627f6f6416e8726a1c184eb9cf3e55b9edbc94c627",
+    "zh:c8e35b89572ba1c40a9b20022e033a3395fb8d42e7604d50c900f193ba10382e",
+    "zh:e2deaa8a9975ef81d9f62baed12c41286918b0a10908e0e031f13f69a3b730a1",
+    "zh:ee39707557210a0ab1098aa357d2cdfe502e5a312d0dbdffb09d08facc4d3fc5",
+    "zh:f81afe4eb63e8aa9e0ea71be6c990f0dc69cb360e7191c0742a991f4a5081b64",
+  ]
+}
diff --git a/deploy/terraform/aws/README.md b/deploy/terraform/aws/README.md
new file mode 100644
index 0000000..2724c36
--- /dev/null
+++ b/deploy/terraform/aws/README.md
@@ -0,0 +1,63 @@
+# QueryWise on AWS — Terraform (data plane + secrets)
+
+Provisions the **managed dependencies** the QueryWise Helm chart needs, in your
+own VPC — your data never leaves your account:
+
+- **RDS PostgreSQL 16** (pgvector-ready, encrypted, Multi-AZ, gp3, TLS enforced)
+- **ElastiCache Redis** (result cache + the arq job queue)
+- **Secrets Manager** secret with the assembled DSNs + keys
+- **S3** bucket for exports / `pg_dump` backups (optional)
+- **VPC + private subnets** (optional — or drop into an existing VPC)
+- **IAM policy** to read the app secret (for the external-secrets IRSA role)
+
+**Compute is intentionally out of scope.** Provision EKS (or ECS) separately —
+BYO, or the upstream [`terraform-aws-modules/eks`](https://github.com/terraform-aws-modules/terraform-aws-eks)
+module — then deploy the app with the Helm chart in [`../../helm/querywise`](../../helm/querywise).
+Keeping the data plane and the cluster in separate states means a `helm`
+rollback or cluster rebuild never risks the database.
+
+## Usage
+
+```bash
+cp terraform.tfvars.example terraform.tfvars   # then edit
+terraform init
+terraform apply
+```
+
+Wire the outputs into the cluster. The recommended path is the
+**external-secrets operator** reading the Secrets Manager secret:
+
+1. `terraform output secret_access_policy_arn` → attach to an IAM role whose
+   trust policy references your EKS OIDC provider, bound to the external-secrets
+   ServiceAccount (IRSA).
+2. Create an `ExternalSecret` that pulls `terraform output app_secret_name`
+   with a `dataFrom` extract into a Kubernetes Secret named `querywise-secrets`
+   (its keys — `DATABASE_URL`, `REDIS_URL`, `ENCRYPTION_KEY`, `JWT_SECRET`,
+   `OPENAI_API_KEY`, … — already match the backend's env).
+3. Install the chart pointing at it:
+
+   ```bash
+   helm upgrade --install querywise ../../helm/querywise -n querywise \
+     --set secrets.existingSecret=querywise-secrets \
+     --set config.AUTO_SETUP_SAMPLE_DB=false
+   ```
+
+Make sure `allowed_security_group_ids` includes the EKS node/pod security group
+so pods can reach Postgres + Redis.
+
+> **Quick-start without external-secrets:** feed the DSNs straight into the
+> chart's own Secret — but `database_url` / `redis_url` are sensitive outputs, so
+> avoid this for anything but a sandbox.
+
+## pgvector
+
+The `vector` extension ships with RDS PostgreSQL 16 and is created by the app's
+Alembic migrations (`CREATE EXTENSION IF NOT EXISTS vector`) on first
+`helm upgrade` (the migration hook). No parameter-group change required.
+
+## Notes
+
+- `db_deletion_protection = true` (default) blocks `terraform destroy` of the DB
+  and forces a final snapshot. Set to `false` for throwaway environments.
+- The master DB password and JWT secret are generated if not supplied and stored
+  only in Secrets Manager / Terraform state — keep your state backend encrypted.
diff --git a/deploy/terraform/aws/iam.tf b/deploy/terraform/aws/iam.tf
new file mode 100644
index 0000000..4b819c6
--- /dev/null
+++ b/deploy/terraform/aws/iam.tf
@@ -0,0 +1,22 @@
+# Read-only access to the app secret, for the external-secrets operator's IRSA
+# role. Attach `secret_access_policy_arn` to the IAM role you bind to the
+# external-secrets ServiceAccount (the role's trust policy references the EKS
+# OIDC provider — created with the cluster, hence kept out of this data module).
+
+data "aws_iam_policy_document" "secret_read" {
+  statement {
+    sid    = "ReadAppSecret"
+    effect = "Allow"
+    actions = [
+      "secretsmanager:GetSecretValue",
+      "secretsmanager:DescribeSecret",
+    ]
+    resources = [aws_secretsmanager_secret.app.arn]
+  }
+}
+
+resource "aws_iam_policy" "secret_read" {
+  name        = "${var.name_prefix}-secret-read"
+  description = "Read the QueryWise app secret (for external-secrets IRSA)"
+  policy      = data.aws_iam_policy_document.secret_read.json
+}
diff --git a/deploy/terraform/aws/main.tf b/deploy/terraform/aws/main.tf
new file mode 100644
index 0000000..8b5f8e7
--- /dev/null
+++ b/deploy/terraform/aws/main.tf
@@ -0,0 +1,57 @@
+data "aws_caller_identity" "current" {}
+
+data "aws_availability_zones" "available" {
+  state = "available"
+}
+
+locals {
+  tags = merge({
+    "app.kubernetes.io/name" = "querywise"
+    "ManagedBy"              = "terraform"
+  }, var.tags)
+
+  # Default to the first two available AZs when none are supplied.
+  azs = length(var.availability_zones) > 0 ? var.availability_zones : slice(data.aws_availability_zones.available.names, 0, 2)
+
+  # Resolve network: created vs. supplied.
+  vpc_id             = var.create_vpc ? aws_vpc.this[0].id : var.vpc_id
+  private_subnet_ids = var.create_vpc ? aws_subnet.private[*].id : var.private_subnet_ids
+
+  # Master password: supplied or generated.
+  db_password = var.db_password != "" ? var.db_password : random_password.db[0].result
+  jwt_secret  = var.jwt_secret != "" ? var.jwt_secret : random_password.jwt[0].result
+
+  # DSNs the app/Helm chart consume. The generated DB password uses a URL-safe
+  # alphabet (see random_password.db) so no escaping is needed here.
+  database_url = "postgresql+asyncpg://${var.db_username}:${local.db_password}@${aws_db_instance.this.address}:5432/${var.db_name}"
+  redis_url    = "redis://${aws_elasticache_replication_group.this.primary_endpoint_address}:6379/0"
+
+  bucket_name = var.s3_bucket_name != "" ? var.s3_bucket_name : "${var.name_prefix}-${data.aws_caller_identity.current.account_id}"
+
+  # Keys mirror what the backend reads from env / the Helm Secret. Empty values
+  # are dropped so optional provider keys don't create blank entries.
+  secret_payload = { for k, v in {
+    DATABASE_URL           = local.database_url
+    REDIS_URL              = local.redis_url
+    ENCRYPTION_KEY         = var.encryption_key
+    JWT_SECRET             = local.jwt_secret
+    DEFAULT_ADMIN_PASSWORD = var.default_admin_password
+    OPENAI_API_KEY         = var.openai_api_key
+    ANTHROPIC_API_KEY      = var.anthropic_api_key
+    AZURE_OPENAI_API_KEY   = var.azure_openai_api_key
+  } : k => v if v != null && v != "" }
+}
+
+resource "random_password" "db" {
+  count = var.db_password == "" ? 1 : 0
+  # URL-safe alphabet so the password drops cleanly into the DSN.
+  length           = 32
+  special          = true
+  override_special = "-_"
+}
+
+resource "random_password" "jwt" {
+  count   = var.jwt_secret == "" ? 1 : 0
+  length  = 48
+  special = false
+}
diff --git a/deploy/terraform/aws/network.tf b/deploy/terraform/aws/network.tf
new file mode 100644
index 0000000..29cbdfa
--- /dev/null
+++ b/deploy/terraform/aws/network.tf
@@ -0,0 +1,21 @@
+# Minimal private-subnet VPC for the data plane. RDS + ElastiCache live here;
+# compute (EKS/ECS) reaches them via security-group references. Set
+# create_vpc = false to drop these into an existing VPC instead.
+
+resource "aws_vpc" "this" {
+  count                = var.create_vpc ? 1 : 0
+  cidr_block           = var.vpc_cidr
+  enable_dns_support   = true
+  enable_dns_hostnames = true
+
+  tags = { Name = "${var.name_prefix}-vpc" }
+}
+
+resource "aws_subnet" "private" {
+  count             = var.create_vpc ? length(local.azs) : 0
+  vpc_id            = aws_vpc.this[0].id
+  availability_zone = local.azs[count.index]
+  cidr_block        = cidrsubnet(var.vpc_cidr, 4, count.index)
+
+  tags = { Name = "${var.name_prefix}-private-${local.azs[count.index]}" }
+}
diff --git a/deploy/terraform/aws/outputs.tf b/deploy/terraform/aws/outputs.tf
new file mode 100644
index 0000000..4e4e382
--- /dev/null
+++ b/deploy/terraform/aws/outputs.tf
@@ -0,0 +1,66 @@
+# -- Network -----------------------------------------------------------------
+output "vpc_id" {
+  description = "VPC the data plane runs in."
+  value       = local.vpc_id
+}
+
+output "private_subnet_ids" {
+  description = "Private subnets used by RDS/ElastiCache."
+  value       = local.private_subnet_ids
+}
+
+output "rds_security_group_id" {
+  description = "Attach app compute here is not needed; reference for rules/debugging."
+  value       = aws_security_group.rds.id
+}
+
+output "redis_security_group_id" {
+  value       = aws_security_group.redis.id
+  description = "Redis security group id."
+}
+
+# -- Endpoints ---------------------------------------------------------------
+output "db_endpoint" {
+  description = "RDS Postgres endpoint (host)."
+  value       = aws_db_instance.this.address
+}
+
+output "redis_endpoint" {
+  description = "ElastiCache primary endpoint (host)."
+  value       = aws_elasticache_replication_group.this.primary_endpoint_address
+}
+
+# -- Secrets -----------------------------------------------------------------
+output "app_secret_arn" {
+  description = "Secrets Manager ARN holding the assembled app secret (DSNs + keys). Point external-secrets at this."
+  value       = aws_secretsmanager_secret.app.arn
+}
+
+output "app_secret_name" {
+  description = "Secrets Manager name of the app secret."
+  value       = aws_secretsmanager_secret.app.name
+}
+
+output "secret_access_policy_arn" {
+  description = "IAM policy granting read of the app secret — attach to the external-secrets IRSA role."
+  value       = aws_iam_policy.secret_read.arn
+}
+
+# -- Storage -----------------------------------------------------------------
+output "s3_bucket_name" {
+  description = "Exports/backups bucket (empty if disabled)."
+  value       = var.create_s3_bucket ? aws_s3_bucket.data[0].bucket : ""
+}
+
+# -- Convenience: DSNs (sensitive) -------------------------------------------
+output "database_url" {
+  description = "asyncpg DSN for the backend (also stored in the app secret)."
+  value       = local.database_url
+  sensitive   = true
+}
+
+output "redis_url" {
+  description = "Redis DSN for cache + arq (also stored in the app secret)."
+  value       = local.redis_url
+  sensitive   = true
+}
diff --git a/deploy/terraform/aws/rds.tf b/deploy/terraform/aws/rds.tf
new file mode 100644
index 0000000..30e4d4f
--- /dev/null
+++ b/deploy/terraform/aws/rds.tf
@@ -0,0 +1,86 @@
+# PostgreSQL 16 with pgvector. The extension ships with RDS Postgres 16 and is
+# created by the app's Alembic migrations (`CREATE EXTENSION IF NOT EXISTS
+# vector`), so no parameter-group change is required.
+
+resource "aws_db_subnet_group" "this" {
+  name       = "${var.name_prefix}-db"
+  subnet_ids = local.private_subnet_ids
+}
+
+resource "aws_security_group" "rds" {
+  name        = "${var.name_prefix}-rds"
+  description = "Postgres access for QueryWise"
+  vpc_id      = local.vpc_id
+
+  egress {
+    description = "All egress"
+    from_port   = 0
+    to_port     = 0
+    protocol    = "-1"
+    cidr_blocks = ["0.0.0.0/0"]
+  }
+}
+
+resource "aws_security_group_rule" "rds_from_sg" {
+  count                    = length(var.allowed_security_group_ids)
+  type                     = "ingress"
+  from_port                = 5432
+  to_port                  = 5432
+  protocol                 = "tcp"
+  security_group_id        = aws_security_group.rds.id
+  source_security_group_id = var.allowed_security_group_ids[count.index]
+  description              = "Postgres from app security group"
+}
+
+resource "aws_security_group_rule" "rds_from_cidr" {
+  count             = length(var.allowed_cidr_blocks) > 0 ? 1 : 0
+  type              = "ingress"
+  from_port         = 5432
+  to_port           = 5432
+  protocol          = "tcp"
+  security_group_id = aws_security_group.rds.id
+  cidr_blocks       = var.allowed_cidr_blocks
+  description       = "Postgres from allowed CIDRs"
+}
+
+resource "aws_db_parameter_group" "this" {
+  name   = "${var.name_prefix}-pg16"
+  family = "postgres16"
+
+  # Enforce TLS in transit.
+  parameter {
+    name  = "rds.force_ssl"
+    value = "1"
+  }
+}
+
+resource "aws_db_instance" "this" {
+  identifier     = "${var.name_prefix}-pg"
+  engine         = "postgres"
+  engine_version = var.db_engine_version
+  instance_class = var.db_instance_class
+
+  db_name  = var.db_name
+  username = var.db_username
+  password = local.db_password
+
+  allocated_storage     = var.db_allocated_storage
+  max_allocated_storage = var.db_max_allocated_storage
+  storage_type          = "gp3"
+  storage_encrypted     = true
+
+  multi_az               = var.db_multi_az
+  db_subnet_group_name   = aws_db_subnet_group.this.name
+  vpc_security_group_ids = [aws_security_group.rds.id]
+  parameter_group_name   = aws_db_parameter_group.this.name
+
+  backup_retention_period    = var.db_backup_retention_days
+  deletion_protection        = var.db_deletion_protection
+  auto_minor_version_upgrade = true
+
+  # Take a final snapshot on destroy unless deletion protection is off.
+  skip_final_snapshot       = !var.db_deletion_protection
+  final_snapshot_identifier = var.db_deletion_protection ? "${var.name_prefix}-pg-final" : null
+
+  tags = { Name = "${var.name_prefix}-pg" }
+}
diff --git a/deploy/terraform/aws/redis.tf b/deploy/terraform/aws/redis.tf
new file mode 100644
index 0000000..ca583ef
--- /dev/null
+++ b/deploy/terraform/aws/redis.tf
@@ -0,0 +1,64 @@
+# ElastiCache Redis — backs the result cache + the arq job queue.
+
+resource "aws_elasticache_subnet_group" "this" {
+  name       = "${var.name_prefix}-redis"
+  subnet_ids = local.private_subnet_ids
+}
+
+resource "aws_security_group" "redis" {
+  name        = "${var.name_prefix}-redis"
+  description = "Redis access for QueryWise"
+  vpc_id      = local.vpc_id
+
+  egress {
+    description = "All egress"
+    from_port   = 0
+    to_port     = 0
+    protocol    = "-1"
+    cidr_blocks = ["0.0.0.0/0"]
+  }
+}
+
+resource "aws_security_group_rule" "redis_from_sg" {
+  count                    = length(var.allowed_security_group_ids)
+  type                     = "ingress"
+  from_port                = 6379
+  to_port                  = 6379
+  protocol                 = "tcp"
+  security_group_id        = aws_security_group.redis.id
+  source_security_group_id = var.allowed_security_group_ids[count.index]
+  description              = "Redis from app security group"
+}
+
+resource "aws_security_group_rule" "redis_from_cidr" {
+  count             = length(var.allowed_cidr_blocks) > 0 ? 1 : 0
+  type              = "ingress"
+  from_port         = 6379
+  to_port           = 6379
+  protocol          = "tcp"
+  security_group_id = aws_security_group.redis.id
+  cidr_blocks       = var.allowed_cidr_blocks
+  description       = "Redis from allowed CIDRs"
+}
+
+resource "aws_elasticache_replication_group" "this" {
+  replication_group_id = "${var.name_prefix}-redis"
+  description          = "QueryWise cache + job queue"
+
+  engine         = "redis"
+  engine_version = var.redis_engine_version
+  node_type      = var.redis_node_type
+  port           = 6379
+
+  # primary + N replicas; automatic failover needs at least one replica.
+  num_cache_clusters         = var.redis_replicas + 1
+  automatic_failover_enabled = var.redis_replicas > 0
+  multi_az_enabled           = var.redis_replicas > 0
+
+  subnet_group_name  = aws_elasticache_subnet_group.this.name
+  security_group_ids = [aws_security_group.redis.id]
+
+  at_rest_encryption_enabled = true
+
+  tags = { Name = "${var.name_prefix}-redis" }
+}
diff --git a/deploy/terraform/aws/s3.tf b/deploy/terraform/aws/s3.tf
new file mode 100644
index 0000000..df394ca
--- /dev/null
+++ b/deploy/terraform/aws/s3.tf
@@ -0,0 +1,37 @@
+# Optional bucket for exports / pg_dump backups. Private + encrypted + versioned.
+
+resource "aws_s3_bucket" "data" {
+  count  = var.create_s3_bucket ? 1 : 0
+  bucket = local.bucket_name
+
+  tags = { Name = local.bucket_name }
+}
+
+resource "aws_s3_bucket_public_access_block" "data" {
+  count                   = var.create_s3_bucket ? 1 : 0
+  bucket                  = aws_s3_bucket.data[0].id
+  block_public_acls       = true
+  block_public_policy     = true
+  ignore_public_acls      = true
+  restrict_public_buckets = true
+}
+
+resource "aws_s3_bucket_server_side_encryption_configuration" "data" {
+  count  = var.create_s3_bucket ? 1 : 0
+  bucket = aws_s3_bucket.data[0].id
+
+  rule {
+    apply_server_side_encryption_by_default {
+      sse_algorithm = "AES256"
+    }
+  }
+}
+
+resource "aws_s3_bucket_versioning" "data" {
+  count  = var.create_s3_bucket ? 1 : 0
+  bucket = aws_s3_bucket.data[0].id
+
+  versioning_configuration {
+    status = "Enabled"
+  }
+}
diff --git a/deploy/terraform/aws/secrets.tf b/deploy/terraform/aws/secrets.tf
new file mode 100644
index 0000000..c232965
--- /dev/null
+++ b/deploy/terraform/aws/secrets.tf
@@ -0,0 +1,14 @@
+# Secrets Manager holds the assembled app secret (DSNs + keys). The
+# external-secrets operator in-cluster syncs this into the Kubernetes Secret the
+# Helm chart references (secrets.existingSecret). Keys map 1:1 to the backend's
+# env vars, so a SecretStore + ExternalSecret with a "dataFrom" extract is enough.
+
+resource "aws_secretsmanager_secret" "app" {
+  name        = "${var.name_prefix}/app"
+  description = "QueryWise application secrets (DSNs + keys)"
+}
+
+resource "aws_secretsmanager_secret_version" "app" {
+  secret_id     = aws_secretsmanager_secret.app.id
+  secret_string = jsonencode(local.secret_payload)
+}
diff --git a/deploy/terraform/aws/terraform.tfvars.example b/deploy/terraform/aws/terraform.tfvars.example
new file mode 100644
index 0000000..9e62df1
--- /dev/null
+++ b/deploy/terraform/aws/terraform.tfvars.example
@@ -0,0 +1,32 @@
+# Copy to terraform.tfvars and fill in. Keep secrets out of version control.
+
+region      = "us-east-1"
+name_prefix = "querywise-prod"
+
+# Network — let the module create a VPC, or set create_vpc = false and supply
+# vpc_id + private_subnet_ids to drop into your existing cluster VPC.
+create_vpc = true
+# vpc_id             = "vpc-0123456789abcdef0"
+# private_subnet_ids = ["subnet-aaa", "subnet-bbb"]
+
+# Allow your EKS node/pod security group to reach Postgres + Redis.
+allowed_security_group_ids = ["sg-0123456789abcdef0"]
+
+# Database
+db_instance_class = "db.t4g.medium"
+db_multi_az       = true
+
+# Redis
+redis_node_type = "cache.t4g.small"
+redis_replicas  = 1
+
+# REQUIRED — Fernet key for connection-string encryption. Generate with:
+#   python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+encryption_key = "CHANGE_ME"
+
+# Optional — generated if left empty.
+# jwt_secret = ""
+
+# LLM provider key(s).
+openai_api_key = "CHANGE_ME"
+# anthropic_api_key = ""
diff --git a/deploy/terraform/aws/variables.tf b/deploy/terraform/aws/variables.tf
new file mode 100644
index 0000000..72a3268
--- /dev/null
+++ b/deploy/terraform/aws/variables.tf
@@ -0,0 +1,197 @@
+# -- General -----------------------------------------------------------------
+variable "region" {
+  description = "AWS region to deploy into."
+  type        = string
+}
+
+variable "name_prefix" {
+  description = "Prefix for all resource names (e.g. \"querywise-prod\")."
+  type        = string
+  default     = "querywise"
+}
+
+variable "tags" {
+  description = "Extra tags applied to every resource."
+  type        = map(string)
+  default     = {}
+}
+
+# -- Network -----------------------------------------------------------------
+# Either let the module create a VPC, or supply an existing one.
+variable "create_vpc" {
+  description = "Create a VPC + private subnets. If false, supply vpc_id and private_subnet_ids."
+  type        = bool
+  default     = true
+}
+
+variable "vpc_cidr" {
+  description = "CIDR for the created VPC (when create_vpc = true)."
+  type        = string
+  default     = "10.42.0.0/16"
+}
+
+variable "availability_zones" {
+  description = "AZs to spread data subnets across (>= 2 for Multi-AZ / ElastiCache)."
+  type        = list(string)
+  default     = []
+}
+
+variable "vpc_id" {
+  description = "Existing VPC id (when create_vpc = false)."
+  type        = string
+  default     = ""
+}
+
+variable "private_subnet_ids" {
+  description = "Existing private subnet ids for RDS/ElastiCache (when create_vpc = false)."
+  type        = list(string)
+  default     = []
+}
+
+variable "allowed_security_group_ids" {
+  description = "Security groups (e.g. the EKS node/pod SG) allowed to reach Postgres/Redis."
+  type        = list(string)
+  default     = []
+}
+
+variable "allowed_cidr_blocks" {
+  description = "CIDRs allowed to reach Postgres/Redis (use sparingly; prefer SG references)."
+  type        = list(string)
+  default     = []
+}
+
+# -- PostgreSQL (pgvector) ---------------------------------------------------
+variable "db_name" {
+  description = "Application database name."
+  type        = string
+  default     = "querywise"
+}
+
+variable "db_username" {
+  description = "Master username for the app database."
+  type        = string
+  default     = "querywise"
+}
+
+variable "db_password" {
+  description = "Master password. Leave empty to generate one (stored in Secrets Manager)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "db_engine_version" {
+  description = "PostgreSQL engine version (16.x supports the pgvector extension)."
+  type        = string
+  default     = "16.4"
+}
+
+variable "db_instance_class" {
+  description = "RDS instance class."
+  type        = string
+  default     = "db.t4g.medium"
+}
+
+variable "db_allocated_storage" {
+  description = "Initial storage (GiB)."
+  type        = number
+  default     = 50
+}
+
+variable "db_max_allocated_storage" {
+  description = "Storage autoscaling ceiling (GiB). Set equal to allocated to disable."
+  type        = number
+  default     = 200
+}
+
+variable "db_multi_az" {
+  description = "Run the database Multi-AZ for HA."
+  type        = bool
+  default     = true
+}
+
+variable "db_backup_retention_days" {
+  description = "Automated backup retention (days)."
+  type        = number
+  default     = 7
+}
+
+variable "db_deletion_protection" {
+  description = "Block accidental `terraform destroy` of the database."
+  type        = bool
+  default     = true
+}
+
+# -- ElastiCache (Redis) -----------------------------------------------------
+variable "redis_node_type" {
+  description = "ElastiCache node type."
+  type        = string
+  default     = "cache.t4g.small"
+}
+
+variable "redis_engine_version" {
+  description = "Redis engine version."
+  type        = string
+  default     = "7.1"
+}
+
+variable "redis_replicas" {
+  description = "Number of replica nodes (0 = single primary, no HA)."
+  type        = number
+  default     = 1
+}
+
+# -- S3 (exports / backups) --------------------------------------------------
+variable "create_s3_bucket" {
+  description = "Create an S3 bucket for exports/backups."
+  type        = bool
+  default     = true
+}
+
+variable "s3_bucket_name" {
+  description = "Bucket name. Empty = \"<name_prefix>-<account_id>\"."
+  type        = string
+  default     = ""
+}
+
+# -- Application secrets (assembled into the Secrets Manager secret) ----------
+variable "encryption_key" {
+  description = "Fernet key for connection-string encryption (REQUIRED — generate with the python one-liner in the README)."
+  type        = string
+  sensitive   = true
+}
+
+variable "jwt_secret" {
+  description = "HS256 signing secret for session/magic-link JWTs. Empty = generate one."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "default_admin_password" {
+  description = "Optional bootstrap admin password."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "openai_api_key" {
+  description = "OpenAI API key (completions + embeddings)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "anthropic_api_key" {
+  description = "Anthropic API key (optional)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "azure_openai_api_key" {
+  description = "Azure OpenAI key (optional)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
diff --git a/deploy/terraform/aws/versions.tf b/deploy/terraform/aws/versions.tf
new file mode 100644
index 0000000..e6d3a2d
--- /dev/null
+++ b/deploy/terraform/aws/versions.tf
@@ -0,0 +1,22 @@
+terraform {
+  required_version = ">= 1.5"
+
+  required_providers {
+    aws = {
+      source  = "hashicorp/aws"
+      version = "~> 5.40"
+    }
+    random = {
+      source  = "hashicorp/random"
+      version = "~> 3.5"
+    }
+  }
+}
+
+provider "aws" {
+  region = var.region
+
+  default_tags {
+    tags = local.tags
+  }
+}
diff --git a/deploy/terraform/azure/.terraform.lock.hcl b/deploy/terraform/azure/.terraform.lock.hcl
new file mode 100644
index 0000000..9653214
--- /dev/null
+++ b/deploy/terraform/azure/.terraform.lock.hcl
@@ -0,0 +1,66 @@
+# This file is maintained automatically by "tofu init".
+# Manual edits may be lost in future updates.
+
+provider "registry.opentofu.org/hashicorp/azurerm" {
+  version     = "3.117.1"
+  constraints = "~> 3.110"
+  hashes = [
+    "h1:/LLqOTgyUBDgagr0Bi2C6ZMbe1ytEZHjV8vZxxIKApo=",
+    "h1:BlM2+pRV7LWN3+enHRHjztlEVQGdfQfibT3Jt6vYw4o=",
+    "h1:CACtG75Ab4OnxWIvUVTVp/8KzXzVVA1vfr6eSiGDh48=",
+    "h1:HhwEwklIk4ccwh/55Yuk2bmoqRY/zwIhYEGpZPHAV5U=",
+    "h1:II0m9urxKMoQ+spbeVYO2aC2e6fpDMIfvnwG5lV2PtU=",
+    "h1:LW4VuLN3Jxho7ieW508bPNHPDq1aCWWYVszmiw3eQKg=",
+    "h1:OXBPoQpiwe519GeBfkmbfsDXO020v706RmWTYSuuUCE=",
+    "h1:PWUq7EPQ9uDVA8We/AbeUnTKlk4/6ELYQ5uWkthUxz0=",
+    "h1:YBwLP1Vu2+/XAORcOLN8Z3s/aH4d5uGC98A2qDBSo+8=",
+    "h1:wBga8SpJzBXnt03/RQZLVUFu3AVT1EsvFa6QyL1hofg=",
+    "zh:1fedd2521c8ced1fbebd5d70fda376d42393cac5cc25c043c390b44d630d9e37",
+    "zh:634c16442fd8aaed6c3bccd0069f4a01399b141d2a993d85997e6a03f9f867cf",
+    "zh:637ae3787f87506e5b673f44a1b0f33cf75d7fa9c5353df6a2584488fc3d4328",
+    "zh:7c7741f66ff5b05051db4b6c3d9bad68c829f9e920a7f1debdca0ab8e50836a3",
+    "zh:9b454fa0b6c821db2c6a71e591a467a5b4802129509710b56f01ae7106058d86",
+    "zh:bb820ff92b4a77e9d70999ae30758d408728c6e782b4e1c8c4b6d53b8c3c8ff9",
+    "zh:d38cd7d5f99398fb96672cb27943b96ea2b7008f26d379a69e1c6c2f25051869",
+    "zh:d56f5a132181ab14e6be332996753cc11c0d3b1cfdd1a1b44ef484c67e38cc91",
+    "zh:d8a1e7cf218f46e6d0bd878ff70f92db7e800a15f01e96189a24864d10cde33b",
+    "zh:f67cf6d14d859a1d2a1dc615941a1740a14cb3f4ee2a34da672ff6729d81fa81",
+  ]
+}
+
+provider "registry.opentofu.org/hashicorp/random" {
+  version     = "3.9.0"
+  constraints = "~> 3.5"
+  hashes = [
+    "h1:8EQU5KSxezcjo/phRSe69rDOI0lk4pSaggj7FsskYp8=",
+    "h1:Lw9im2VBBJQ3RyAbHPQ0rcvcmmcZWm3x+kIOpN+Tv9s=",
+    "h1:U8KXqGCoNI9/guYbTvzgdtVk3fRthoG0UXwm1JoEpIs=",
+    "h1:YXaVd4p6qXPPVaxIBaIDNXmBwT02ZqDn0qD+tYpw8sA=",
+    "h1:cOpc03fphEt/G9Rfc4jLL/fW0D7tgvlXqiDKPF4vuww=",
+    "h1:g09RR7T1xWkeGrZwWvWMT9ncJrFGr1k3CBD585UmO7w=",
+    "h1:gGDdPPibmw2EWROx+sh1RGLjR5+nPwZyrf6/N9jXfeM=",
+    "h1:haE7/nXCOhXKP4oXeEnER3t5CaVQWqujz4nBnpeTUv4=",
+    "h1:ieSVpfZS2lKuMr05ph0QsOVpCzg7uk3cgKBaXR+Ikug=",
+    "h1:ig2s1IS9IzehorRjvVAnKIsUUj8fkgyxct1L/kswcc4=",
+    "h1:j3lS+ZEERFnoab8t1ppDrScGVP/cgWbzlCrEYKTCXYw=",
+    "h1:lxezrKmOiQIySHAM+os8qLVq7hqufDr8h3Hpzvsk+78=",
+    "h1:lzRqBJAG+NETxHbEZUJ/YP3RMEjZBinTX7VmgH3lw60=",
+    "h1:tdSNWK5ApqUsgbdYieyeYLTu6nIZUV3hR1oFqUfAuGo=",
+    "h1:xedet8yH/zI2CfdxsGlK0nlFWc/Bp61yrWsEa3fHB8g=",
+    "zh:03f1114cc20b8913523735ab76e0f0a2b16ce13c92923a53304bf85f07fc0dbc",
+    "zh:105b678ee72322a3067f105d7e05e940f6143238f377f6e87ff4ec909246ac2a",
+    "zh:55f3bbf13ea18cbace61a706566a80f25f33fe2b1780b6f3d7b582af2a05b6d2",
+    "zh:63adf996db48f082f7a6351eb485e219cd88795fc71e6ec60a837263ab0d2cb1",
+    "zh:7e99550738a4e3cc68b8a467714b0d69371025fe95e3326d5323d026d55653e9",
+    "zh:8342b54af3a18a37e075eeae61be57f4de2ba71b35d95c5075d402dd2c1f289d",
+    "zh:83ee18e32ac9dd5fc91298554b7c4cfa4c3a1db50f4c797945637cc93c0844ae",
+    "zh:993ecc0adbf6bd535a59fbc9b735d8c33950e6f6eb5e621d750da9b71d65d80a",
+    "zh:ad722bc59d4edbf1415e827fc007c0efe6e0e9462d5568bae20b34be1058a261",
+    "zh:ae9448e1f87b2f9a6c5197a0e9862162ec6b137cb3a3835e11522995d8939e7c",
+    "zh:bc9cdd3aac784f759125c6627f6f6416e8726a1c184eb9cf3e55b9edbc94c627",
+    "zh:c8e35b89572ba1c40a9b20022e033a3395fb8d42e7604d50c900f193ba10382e",
+    "zh:e2deaa8a9975ef81d9f62baed12c41286918b0a10908e0e031f13f69a3b730a1",
+    "zh:ee39707557210a0ab1098aa357d2cdfe502e5a312d0dbdffb09d08facc4d3fc5",
+    "zh:f81afe4eb63e8aa9e0ea71be6c990f0dc69cb360e7191c0742a991f4a5081b64",
+  ]
+}
diff --git a/deploy/terraform/azure/README.md b/deploy/terraform/azure/README.md
new file mode 100644
index 0000000..56fdb5f
--- /dev/null
+++ b/deploy/terraform/azure/README.md
@@ -0,0 +1,64 @@
+# QueryWise on Azure — Terraform (data plane + secrets)
+
+Provisions the managed dependencies the Helm chart needs, in your subscription:
+
+- **Azure Database for PostgreSQL flexible server 16** (pgvector allow-listed,
+  VNet-integrated/private, zone-redundant HA, TLS-only)
+- **Azure Cache for Redis** (result cache + the arq job queue, TLS-only)
+- **Key Vault** secret with the assembled DSNs + keys
+- **Storage account + container** for exports / `pg_dump` backups (optional)
+- **VNet + delegated subnet + private DNS zone** (optional — or BYO)
+- **User-assigned managed identity** with Key Vault read, for external-secrets
+
+**Compute (AKS) is out of scope** — BYO or the upstream
+[`Azure/aks`](https://registry.terraform.io/modules/Azure/aks/azurerm/latest)
+module — then deploy with the Helm chart in [`../../helm/querywise`](../../helm/querywise),
+keeping the cluster in a separate state from the database.
+
+## Usage
+
+```bash
+az login
+cp terraform.tfvars.example terraform.tfvars   # then edit
+terraform init
+terraform apply
+```
+
+Wire it up with the external-secrets operator on AKS (Workload Identity):
+
+1. Federate the managed identity to the external-secrets KSA:
+   ```bash
+   az identity federated-credential create \
+     --identity-name querywise-prod-ext-secrets \
+     --resource-group "$(terraform output -raw resource_group_name)" \
+     --issuer "$(az aks show -g <rg> -n <cluster> --query oidcIssuerProfile.issuerUrl -o tsv)" \
+     --subject system:serviceaccount:external-secrets:external-secrets \
+     --audience api://AzureADTokenExchange
+   ```
+2. Create an `ExternalSecret` (provider `azurekv`) that pulls the
+   `querywise-app` secret with a `dataFrom` extract into a Kubernetes Secret
+   named `querywise-secrets` (its keys already match the backend's env).
+3. Install the chart:
+   ```bash
+   helm upgrade --install querywise ../../helm/querywise -n querywise \
+     --set secrets.existingSecret=querywise-secrets
+   ```
+
+AKS must reach the Postgres private endpoint and the Redis host — peer its VNet
+with the one created here (or set `create_vnet = false` and deploy into the
+cluster's VNet).
+
+## pgvector
+
+`azure.extensions = VECTOR` is set here so the server permits the extension; the
+app's Alembic migrations then run `CREATE EXTENSION IF NOT EXISTS vector` on
+first `helm upgrade` (the migration hook).
+
+## Notes
+
+- The Terraform principal needs rights to assign roles on the Key Vault (it
+  grants itself **Key Vault Secrets Officer** to write the secret).
+- Key Vault has purge protection on — a destroyed vault is recoverable for 7
+  days and the name stays reserved.
+- Generated DB password / JWT secret live only in Key Vault + Terraform state —
+  keep your state backend encrypted.
diff --git a/deploy/terraform/azure/identity.tf b/deploy/terraform/azure/identity.tf
new file mode 100644
index 0000000..f6c0dc1
--- /dev/null
+++ b/deploy/terraform/azure/identity.tf
@@ -0,0 +1,23 @@
+# User-assigned managed identity for the external-secrets operator. Grant it
+# read on the vault, then federate it to the in-cluster external-secrets KSA
+# (the federated credential references the AKS OIDC issuer, created with the
+# cluster — hence kept out of this data module):
+#
+#   az identity federated-credential create \
+#     --identity-name <name> --resource-group <rg> \
+#     --issuer <aks-oidc-issuer-url> \
+#     --subject system:serviceaccount:external-secrets:external-secrets \
+#     --audience api://AzureADTokenExchange
+
+resource "azurerm_user_assigned_identity" "external_secrets" {
+  name                = "${var.name_prefix}-ext-secrets"
+  location            = var.location
+  resource_group_name = local.rg_name
+  tags                = local.tags
+}
+
+resource "azurerm_role_assignment" "es_secrets_user" {
+  scope                = azurerm_key_vault.this.id
+  role_definition_name = "Key Vault Secrets User"
+  principal_id         = azurerm_user_assigned_identity.external_secrets.principal_id
+}
diff --git a/deploy/terraform/azure/keyvault.tf b/deploy/terraform/azure/keyvault.tf
new file mode 100644
index 0000000..0866bde
--- /dev/null
+++ b/deploy/terraform/azure/keyvault.tf
@@ -0,0 +1,35 @@
+# Key Vault holds the assembled app secret (DSNs + keys) as a JSON blob. The
+# external-secrets operator on AKS reads it (via the managed identity in
+# identity.tf, federated to its KSA) and syncs it into the Kubernetes Secret the
+# Helm chart references. Keys map 1:1 to the backend's env vars.
+
+locals {
+  key_vault_name = substr("${var.name_prefix}-kv-${random_string.suffix.result}", 0, 24)
+}
+
+resource "azurerm_key_vault" "this" {
+  name                       = local.key_vault_name
+  location                   = var.location
+  resource_group_name        = local.rg_name
+  tenant_id                  = data.azurerm_client_config.current.tenant_id
+  sku_name                   = "standard"
+  enable_rbac_authorization  = true
+  purge_protection_enabled   = true
+  soft_delete_retention_days = 7
+  tags                       = local.tags
+}
+
+# Let the principal running Terraform write secrets (RBAC mode).
+resource "azurerm_role_assignment" "tf_secrets_officer" {
+  scope                = azurerm_key_vault.this.id
+  role_definition_name = "Key Vault Secrets Officer"
+  principal_id         = data.azurerm_client_config.current.object_id
+}
+
+resource "azurerm_key_vault_secret" "app" {
+  name         = "querywise-app"
+  value        = jsonencode(local.secret_payload)
+  key_vault_id = azurerm_key_vault.this.id
+
+  depends_on = [azurerm_role_assignment.tf_secrets_officer]
+}
diff --git a/deploy/terraform/azure/main.tf b/deploy/terraform/azure/main.tf
new file mode 100644
index 0000000..503836f
--- /dev/null
+++ b/deploy/terraform/azure/main.tf
@@ -0,0 +1,61 @@
+data "azurerm_client_config" "current" {}
+
+locals {
+  tags = merge({
+    "app"       = "querywise"
+    "managedBy" = "terraform"
+  }, var.tags)
+
+  rg_name = var.create_resource_group ? azurerm_resource_group.this[0].name : var.resource_group_name
+
+  db_subnet_id        = var.create_vnet ? azurerm_subnet.db[0].id : var.db_subnet_id
+  private_dns_zone_id = var.create_vnet ? azurerm_private_dns_zone.pg[0].id : var.private_dns_zone_id
+
+  db_password = var.db_password != "" ? var.db_password : random_password.db[0].result
+  jwt_secret  = var.jwt_secret != "" ? var.jwt_secret : random_password.jwt[0].result
+
+  # Postgres flexible server FQDN; Azure Cache for Redis is TLS-only on 6380 and
+  # authenticates with the access key (rediss:// DSN).
+  database_url = "postgresql+asyncpg://${var.db_username}:${local.db_password}@${azurerm_postgresql_flexible_server.this.fqdn}:5432/${var.db_name}"
+  redis_url    = "rediss://:${azurerm_redis_cache.this.primary_access_key}@${azurerm_redis_cache.this.hostname}:6380/0"
+
+  # Storage account name: 3-24 lowercase alphanumeric, globally unique.
+  storage_account_name = substr("${replace(lower(var.name_prefix), "/[^a-z0-9]/", "")}${random_string.suffix.result}", 0, 24)
+
+  secret_payload = { for k, v in {
+    DATABASE_URL           = local.database_url
+    REDIS_URL              = local.redis_url
+    ENCRYPTION_KEY         = var.encryption_key
+    JWT_SECRET             = local.jwt_secret
+    DEFAULT_ADMIN_PASSWORD = var.default_admin_password
+    OPENAI_API_KEY         = var.openai_api_key
+    ANTHROPIC_API_KEY      = var.anthropic_api_key
+    AZURE_OPENAI_API_KEY   = var.azure_openai_api_key
+  } : k => v if v != null && v != "" }
+}
+
+resource "random_string" "suffix" {
+  length  = 6
+  upper   = false
+  special = false
+}
+
+resource "random_password" "db" {
+  count            = var.db_password == "" ? 1 : 0
+  length           = 32
+  special          = true
+  override_special = "-_"
+}
+
+resource "random_password" "jwt" {
+  count   = var.jwt_secret == "" ? 1 : 0
+  length  = 48
+  special = false
+}
+
+resource "azurerm_resource_group" "this" {
+  count    = var.create_resource_group ? 1 : 0
+  name     = var.resource_group_name != "" ? var.resource_group_name : "${var.name_prefix}-rg"
+  location = var.location
+  tags     = local.tags
+}
diff --git a/deploy/terraform/azure/network.tf b/deploy/terraform/azure/network.tf
new file mode 100644
index 0000000..cbb0aeb
--- /dev/null
+++ b/deploy/terraform/azure/network.tf
@@ -0,0 +1,43 @@
+# VNet + a subnet delegated to the Postgres flexible server, plus the private
+# DNS zone it needs for VNet integration. Set create_vnet = false to supply your
+# own delegated subnet + DNS zone.
+
+resource "azurerm_virtual_network" "this" {
+  count               = var.create_vnet ? 1 : 0
+  name                = "${var.name_prefix}-vnet"
+  location            = var.location
+  resource_group_name = local.rg_name
+  address_space       = [var.vnet_cidr]
+  tags                = local.tags
+}
+
+resource "azurerm_subnet" "db" {
+  count                = var.create_vnet ? 1 : 0
+  name                 = "${var.name_prefix}-pg"
+  resource_group_name  = local.rg_name
+  virtual_network_name = azurerm_virtual_network.this[0].name
+  address_prefixes     = [var.db_subnet_cidr]
+
+  delegation {
+    name = "fs"
+    service_delegation {
+      name    = "Microsoft.DBforPostgreSQL/flexibleServers"
+      actions = ["Microsoft.Network/virtualNetworks/subnets/join/action"]
+    }
+  }
+}
+
+resource "azurerm_private_dns_zone" "pg" {
+  count               = var.create_vnet ? 1 : 0
+  name                = "${var.name_prefix}.private.postgres.database.azure.com"
+  resource_group_name = local.rg_name
+  tags                = local.tags
+}
+
+resource "azurerm_private_dns_zone_virtual_network_link" "pg" {
+  count                 = var.create_vnet ? 1 : 0
+  name                  = "${var.name_prefix}-pg-link"
+  resource_group_name   = local.rg_name
+  private_dns_zone_name = azurerm_private_dns_zone.pg[0].name
+  virtual_network_id    = azurerm_virtual_network.this[0].id
+}
diff --git a/deploy/terraform/azure/outputs.tf b/deploy/terraform/azure/outputs.tf
new file mode 100644
index 0000000..5572ce7
--- /dev/null
+++ b/deploy/terraform/azure/outputs.tf
@@ -0,0 +1,46 @@
+output "resource_group_name" {
+  description = "Resource group the data plane runs in."
+  value       = local.rg_name
+}
+
+output "db_fqdn" {
+  description = "Postgres flexible server FQDN."
+  value       = azurerm_postgresql_flexible_server.this.fqdn
+}
+
+output "redis_hostname" {
+  description = "Azure Cache for Redis hostname."
+  value       = azurerm_redis_cache.this.hostname
+}
+
+output "key_vault_name" {
+  description = "Key Vault holding the app secret. Point external-secrets at this."
+  value       = azurerm_key_vault.this.name
+}
+
+output "app_secret_name" {
+  description = "Key Vault secret name with the assembled app config (JSON)."
+  value       = azurerm_key_vault_secret.app.name
+}
+
+output "external_secrets_identity_client_id" {
+  description = "Client id of the managed identity to federate to the external-secrets KSA."
+  value       = azurerm_user_assigned_identity.external_secrets.client_id
+}
+
+output "storage_account_name" {
+  description = "Exports/backups storage account (empty if disabled)."
+  value       = var.create_storage ? azurerm_storage_account.this[0].name : ""
+}
+
+output "database_url" {
+  description = "asyncpg DSN (also stored in Key Vault)."
+  value       = local.database_url
+  sensitive   = true
+}
+
+output "redis_url" {
+  description = "Redis DSN (also stored in Key Vault)."
+  value       = local.redis_url
+  sensitive   = true
+}
diff --git a/deploy/terraform/azure/postgres.tf b/deploy/terraform/azure/postgres.tf
new file mode 100644
index 0000000..77d87cb
--- /dev/null
+++ b/deploy/terraform/azure/postgres.tf
@@ -0,0 +1,48 @@
+# Azure Database for PostgreSQL flexible server, v16. pgvector must be
+# allow-listed via the azure.extensions server parameter; the extension itself
+# is then created by the app's Alembic migrations (`CREATE EXTENSION ... vector`).
+
+resource "azurerm_postgresql_flexible_server" "this" {
+  name                = "${var.name_prefix}-pg"
+  resource_group_name = local.rg_name
+  location            = var.location
+  version             = "16"
+
+  administrator_login    = var.db_username
+  administrator_password = local.db_password
+
+  sku_name   = var.db_sku
+  storage_mb = var.db_storage_mb
+
+  # VNet-integrated (private) access.
+  delegated_subnet_id = local.db_subnet_id
+  private_dns_zone_id = local.private_dns_zone_id
+
+  backup_retention_days = var.db_backup_retention_days
+
+  dynamic "high_availability" {
+    for_each = var.db_ha ? [1] : []
+    content {
+      mode = "ZoneRedundant"
+    }
+  }
+
+  tags = local.tags
+
+  # The private DNS zone link must exist before the server is created.
+  depends_on = [azurerm_private_dns_zone_virtual_network_link.pg]
+}
+
+resource "azurerm_postgresql_flexible_server_database" "app" {
+  name      = var.db_name
+  server_id = azurerm_postgresql_flexible_server.this.id
+  collation = "en_US.utf8"
+  charset   = "UTF8"
+}
+
+# Allow-list pgvector so the app can `CREATE EXTENSION vector`.
+resource "azurerm_postgresql_flexible_server_configuration" "extensions" {
+  name      = "azure.extensions"
+  server_id = azurerm_postgresql_flexible_server.this.id
+  value     = "VECTOR"
+}
diff --git a/deploy/terraform/azure/redis.tf b/deploy/terraform/azure/redis.tf
new file mode 100644
index 0000000..97b46df
--- /dev/null
+++ b/deploy/terraform/azure/redis.tf
@@ -0,0 +1,17 @@
+# Azure Cache for Redis — result cache + the arq job queue. TLS-only (6380);
+# the backend connects with rediss:// using the primary access key.
+
+resource "azurerm_redis_cache" "this" {
+  name                = "${var.name_prefix}-redis"
+  location            = var.location
+  resource_group_name = local.rg_name
+
+  capacity = var.redis_capacity
+  family   = var.redis_sku == "Premium" ? "P" : "C"
+  sku_name = var.redis_sku
+
+  non_ssl_port_enabled = false
+  minimum_tls_version  = "1.2"
+
+  tags = local.tags
+}
diff --git a/deploy/terraform/azure/storage.tf b/deploy/terraform/azure/storage.tf
new file mode 100644
index 0000000..7f969a1
--- /dev/null
+++ b/deploy/terraform/azure/storage.tf
@@ -0,0 +1,21 @@
+# Optional storage account + container for exports / pg_dump backups.
+
+resource "azurerm_storage_account" "this" {
+  count                    = var.create_storage ? 1 : 0
+  name                     = local.storage_account_name
+  resource_group_name      = local.rg_name
+  location                 = var.location
+  account_tier             = "Standard"
+  account_replication_type = "LRS"
+  account_kind             = "StorageV2"
+  min_tls_version          = "TLS1_2"
+
+  tags = local.tags
+}
+
+resource "azurerm_storage_container" "data" {
+  count                 = var.create_storage ? 1 : 0
+  name                  = "exports"
+  storage_account_name  = azurerm_storage_account.this[0].name
+  container_access_type = "private"
+}
diff --git a/deploy/terraform/azure/terraform.tfvars.example b/deploy/terraform/azure/terraform.tfvars.example
new file mode 100644
index 0000000..0e57e63
--- /dev/null
+++ b/deploy/terraform/azure/terraform.tfvars.example
@@ -0,0 +1,25 @@
+# Copy to terraform.tfvars and fill in. Keep secrets out of version control.
+# Authenticate first: `az login` (+ set ARM_SUBSCRIPTION_ID or subscription_id).
+
+location    = "eastus"
+name_prefix = "querywise-prod"
+# subscription_id = "00000000-0000-0000-0000-000000000000"
+
+# Network — create a VNet with a delegated subnet + private DNS zone, or set
+# create_vnet = false and supply db_subnet_id + private_dns_zone_id.
+create_vnet = true
+
+# Postgres flexible server
+db_sku  = "GP_Standard_D2ds_v5"
+db_ha   = true
+
+# Redis
+redis_sku      = "Standard"
+redis_capacity = 1
+
+# REQUIRED — Fernet key. Generate with:
+#   python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+encryption_key = "CHANGE_ME"
+
+# LLM provider key(s).
+openai_api_key = "CHANGE_ME"
diff --git a/deploy/terraform/azure/variables.tf b/deploy/terraform/azure/variables.tf
new file mode 100644
index 0000000..137b7b5
--- /dev/null
+++ b/deploy/terraform/azure/variables.tf
@@ -0,0 +1,176 @@
+# -- General -----------------------------------------------------------------
+variable "subscription_id" {
+  description = "Azure subscription id. Empty = use the provider's ambient context (ARM_SUBSCRIPTION_ID)."
+  type        = string
+  default     = ""
+}
+
+variable "location" {
+  description = "Azure region."
+  type        = string
+  default     = "eastus"
+}
+
+variable "name_prefix" {
+  description = "Prefix for resource names."
+  type        = string
+  default     = "querywise"
+}
+
+variable "tags" {
+  description = "Extra tags applied to every resource."
+  type        = map(string)
+  default     = {}
+}
+
+# -- Resource group ----------------------------------------------------------
+variable "create_resource_group" {
+  description = "Create the resource group. If false, it must already exist."
+  type        = bool
+  default     = true
+}
+
+variable "resource_group_name" {
+  description = "Resource group name. Empty = \"<name_prefix>-rg\"."
+  type        = string
+  default     = ""
+}
+
+# -- Network -----------------------------------------------------------------
+# The Postgres flexible server uses VNet integration (delegated subnet + private
+# DNS zone). Set create_vnet = false to supply your own delegated subnet.
+variable "create_vnet" {
+  description = "Create a VNet + delegated subnet + private DNS zone for Postgres."
+  type        = bool
+  default     = true
+}
+
+variable "vnet_cidr" {
+  description = "VNet CIDR (when create_vnet = true)."
+  type        = string
+  default     = "10.44.0.0/16"
+}
+
+variable "db_subnet_cidr" {
+  description = "Delegated subnet CIDR for the flexible server."
+  type        = string
+  default     = "10.44.1.0/24"
+}
+
+variable "db_subnet_id" {
+  description = "Existing delegated subnet id (when create_vnet = false)."
+  type        = string
+  default     = ""
+}
+
+variable "private_dns_zone_id" {
+  description = "Existing private DNS zone id for Postgres (when create_vnet = false)."
+  type        = string
+  default     = ""
+}
+
+# -- PostgreSQL flexible server (pgvector) -----------------------------------
+variable "db_name" {
+  description = "Application database name."
+  type        = string
+  default     = "querywise"
+}
+
+variable "db_username" {
+  description = "Administrator login."
+  type        = string
+  default     = "querywise"
+}
+
+variable "db_password" {
+  description = "Admin password. Empty = generate one (stored in Key Vault)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "db_sku" {
+  description = "Flexible server SKU."
+  type        = string
+  default     = "GP_Standard_D2ds_v5"
+}
+
+variable "db_storage_mb" {
+  description = "Storage (MB). Minimum 32768."
+  type        = number
+  default     = 65536
+}
+
+variable "db_ha" {
+  description = "Zone-redundant high availability."
+  type        = bool
+  default     = true
+}
+
+variable "db_backup_retention_days" {
+  description = "Backup retention (days)."
+  type        = number
+  default     = 7
+}
+
+# -- Redis -------------------------------------------------------------------
+variable "redis_capacity" {
+  description = "Redis cache capacity (Standard family C: 0=250MB,1=1GB,...)."
+  type        = number
+  default     = 1
+}
+
+variable "redis_sku" {
+  description = "Redis SKU (Basic | Standard | Premium)."
+  type        = string
+  default     = "Standard"
+}
+
+# -- Storage -----------------------------------------------------------------
+variable "create_storage" {
+  description = "Create a storage account + container for exports/backups."
+  type        = bool
+  default     = true
+}
+
+# -- Application secrets ------------------------------------------------------
+variable "encryption_key" {
+  description = "Fernet key for connection-string encryption (REQUIRED — see README)."
+  type        = string
+  sensitive   = true
+}
+
+variable "jwt_secret" {
+  description = "HS256 JWT signing secret. Empty = generate one."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "default_admin_password" {
+  description = "Optional bootstrap admin password."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "openai_api_key" {
+  description = "OpenAI API key."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "anthropic_api_key" {
+  description = "Anthropic API key (optional)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "azure_openai_api_key" {
+  description = "Azure OpenAI key (optional)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
diff --git a/deploy/terraform/azure/versions.tf b/deploy/terraform/azure/versions.tf
new file mode 100644
index 0000000..cea02ae
--- /dev/null
+++ b/deploy/terraform/azure/versions.tf
@@ -0,0 +1,19 @@
+terraform {
+  required_version = ">= 1.5"
+
+  required_providers {
+    azurerm = {
+      source  = "hashicorp/azurerm"
+      version = "~> 3.110"
+    }
+    random = {
+      source  = "hashicorp/random"
+      version = "~> 3.5"
+    }
+  }
+}
+
+provider "azurerm" {
+  features {}
+  subscription_id = var.subscription_id != "" ? var.subscription_id : null
+}
diff --git a/deploy/terraform/gcp/.terraform.lock.hcl b/deploy/terraform/gcp/.terraform.lock.hcl
new file mode 100644
index 0000000..e625516
--- /dev/null
+++ b/deploy/terraform/gcp/.terraform.lock.hcl
@@ -0,0 +1,66 @@
+# This file is maintained automatically by "tofu init".
+# Manual edits may be lost in future updates.
+
+provider "registry.opentofu.org/hashicorp/google" {
+  version     = "5.45.2"
+  constraints = "~> 5.40"
+  hashes = [
+    "h1:0RjrEaRJMIWbgQ4sBfjjLOy9tZiaKaq4r5J1iVz085E=",
+    "h1:0ehpLaWItePTA1Ne3WXjtRjI4uzPkdPiNwe+M2NI5Kc=",
+    "h1:9HblaFutcY1nCoKewYVq8aRKC5wyzLIYyLeMMVuzO9M=",
+    "h1:9jeOJWcgA9fNRuBzPStcI3/N3NZ6BNoSxqS6O9dEmIE=",
+    "h1:Dm34S6/Q+8uAtrmX+tWkQZCkrvVveU6lHbk4NBkgGBc=",
+    "h1:RYRrPC1vvSyNu4aYq5MFkRpTyCx84YMan5cNP01XUUk=",
+    "h1:YEQOp7Ou1+GtpcKyCX6Cr/mAGqKIogpi85MX51GuG4s=",
+    "h1:fwPyxJ8zBHeuEyv87dn8YkRHAqXGbJ9AqLN1I8loPr8=",
+    "h1:lg2ogfA9WQfN1nKFQzWpZEqX+0+/J5yz2hvw6Mea/qk=",
+    "h1:qYSz8K/mE6U1q05/GBky/xOPaUJ4BZn4f4kyDvxcugo=",
+    "zh:0931f08e81f220ae3132169cfa4ed8e9d8d2045f29ca914afd8ee9e3e9cf56e0",
+    "zh:31afa45a4c8a0fd4abff564ecff8b69a97ac1813ead61c12f5f0bf5d33cec7f1",
+    "zh:536979e437aad59ba41465c9398d8e3d7d3702bfe2a51d80571862d48c817959",
+    "zh:748e14614be32350ece4e9249e09bc1d20e54421983734ded3a0df6d6674ea71",
+    "zh:7c8fe641666603aad6693207c8eaac679b9be15246d77090c73a1a84326d6084",
+    "zh:8095a513a0662323d99c25466b5a291c80b2b0c1857c7c7a7b1159f25dbe4439",
+    "zh:9453db86d14611cab26dba30daf56d1cfef929918207e9e3e78b58299fc8c4fe",
+    "zh:adaa5df5d40060409b6b66136c0ac37b99fb35ac2cf554c584649c236a18d95b",
+    "zh:af2f659b4bd1f44e578f203830bdab829b5e635fcf2a59ffa7e997c16e6611ad",
+    "zh:b75184fe5c162821b0524fa941d6a934c452e815d82e62675bb21bbdc9046dfc",
+  ]
+}
+
+provider "registry.opentofu.org/hashicorp/random" {
+  version     = "3.9.0"
+  constraints = "~> 3.5"
+  hashes = [
+    "h1:8EQU5KSxezcjo/phRSe69rDOI0lk4pSaggj7FsskYp8=",
+    "h1:Lw9im2VBBJQ3RyAbHPQ0rcvcmmcZWm3x+kIOpN+Tv9s=",
+    "h1:U8KXqGCoNI9/guYbTvzgdtVk3fRthoG0UXwm1JoEpIs=",
+    "h1:YXaVd4p6qXPPVaxIBaIDNXmBwT02ZqDn0qD+tYpw8sA=",
+    "h1:cOpc03fphEt/G9Rfc4jLL/fW0D7tgvlXqiDKPF4vuww=",
+    "h1:g09RR7T1xWkeGrZwWvWMT9ncJrFGr1k3CBD585UmO7w=",
+    "h1:gGDdPPibmw2EWROx+sh1RGLjR5+nPwZyrf6/N9jXfeM=",
+    "h1:haE7/nXCOhXKP4oXeEnER3t5CaVQWqujz4nBnpeTUv4=",
+    "h1:ieSVpfZS2lKuMr05ph0QsOVpCzg7uk3cgKBaXR+Ikug=",
+    "h1:ig2s1IS9IzehorRjvVAnKIsUUj8fkgyxct1L/kswcc4=",
+    "h1:j3lS+ZEERFnoab8t1ppDrScGVP/cgWbzlCrEYKTCXYw=",
+    "h1:lxezrKmOiQIySHAM+os8qLVq7hqufDr8h3Hpzvsk+78=",
+    "h1:lzRqBJAG+NETxHbEZUJ/YP3RMEjZBinTX7VmgH3lw60=",
+    "h1:tdSNWK5ApqUsgbdYieyeYLTu6nIZUV3hR1oFqUfAuGo=",
+    "h1:xedet8yH/zI2CfdxsGlK0nlFWc/Bp61yrWsEa3fHB8g=",
+    "zh:03f1114cc20b8913523735ab76e0f0a2b16ce13c92923a53304bf85f07fc0dbc",
+    "zh:105b678ee72322a3067f105d7e05e940f6143238f377f6e87ff4ec909246ac2a",
+    "zh:55f3bbf13ea18cbace61a706566a80f25f33fe2b1780b6f3d7b582af2a05b6d2",
+    "zh:63adf996db48f082f7a6351eb485e219cd88795fc71e6ec60a837263ab0d2cb1",
+    "zh:7e99550738a4e3cc68b8a467714b0d69371025fe95e3326d5323d026d55653e9",
+    "zh:8342b54af3a18a37e075eeae61be57f4de2ba71b35d95c5075d402dd2c1f289d",
+    "zh:83ee18e32ac9dd5fc91298554b7c4cfa4c3a1db50f4c797945637cc93c0844ae",
+    "zh:993ecc0adbf6bd535a59fbc9b735d8c33950e6f6eb5e621d750da9b71d65d80a",
+    "zh:ad722bc59d4edbf1415e827fc007c0efe6e0e9462d5568bae20b34be1058a261",
+    "zh:ae9448e1f87b2f9a6c5197a0e9862162ec6b137cb3a3835e11522995d8939e7c",
+    "zh:bc9cdd3aac784f759125c6627f6f6416e8726a1c184eb9cf3e55b9edbc94c627",
+    "zh:c8e35b89572ba1c40a9b20022e033a3395fb8d42e7604d50c900f193ba10382e",
+    "zh:e2deaa8a9975ef81d9f62baed12c41286918b0a10908e0e031f13f69a3b730a1",
+    "zh:ee39707557210a0ab1098aa357d2cdfe502e5a312d0dbdffb09d08facc4d3fc5",
+    "zh:f81afe4eb63e8aa9e0ea71be6c990f0dc69cb360e7191c0742a991f4a5081b64",
+  ]
+}
diff --git a/deploy/terraform/gcp/README.md b/deploy/terraform/gcp/README.md
new file mode 100644
index 0000000..605c5b5
--- /dev/null
+++ b/deploy/terraform/gcp/README.md
@@ -0,0 +1,55 @@
+# QueryWise on GCP — Terraform (data plane + secrets)
+
+Provisions the managed dependencies the Helm chart needs, in your project:
+
+- **Cloud SQL PostgreSQL 16** (pgvector-ready, private IP, regional HA, PITR, TLS-only)
+- **Memorystore for Redis** (result cache + the arq job queue)
+- **Secret Manager** secret with the assembled DSNs + keys
+- **GCS** bucket for exports / `pg_dump` backups (optional)
+- **VPC + private-services-access** peering (optional — or BYO VPC with PSA)
+- **Service account** with `secretAccessor` for the external-secrets operator
+
+**Compute (GKE / Cloud Run) is out of scope** — BYO or the upstream
+[`terraform-google-modules/kubernetes-engine`](https://github.com/terraform-google-modules/terraform-google-kubernetes-engine)
+module — then deploy with the Helm chart in [`../../helm/querywise`](../../helm/querywise),
+keeping the cluster in a separate state from the database.
+
+## Usage
+
+```bash
+cp terraform.tfvars.example terraform.tfvars   # then edit
+terraform init
+terraform apply
+```
+
+Wire it up with the external-secrets operator on GKE:
+
+1. Bind the service account to the external-secrets KSA with Workload Identity:
+   ```bash
+   gcloud iam service-accounts add-iam-policy-binding \
+     "$(terraform output -raw external_secrets_sa_email)" \
+     --role roles/iam.workloadIdentityUser \
+     --member "serviceAccount:PROJECT.svc.id.goog[external-secrets/external-secrets]"
+   ```
+2. Create an `ExternalSecret` that pulls `terraform output app_secret_id` with a
+   `dataFrom` extract into a Kubernetes Secret named `querywise-secrets` (its
+   keys already match the backend's env).
+3. Install the chart:
+   ```bash
+   helm upgrade --install querywise ../../helm/querywise -n querywise \
+     --set secrets.existingSecret=querywise-secrets
+   ```
+
+GKE must sit on the same VPC (or a peered one) so pods reach the Cloud SQL
+private IP and Memorystore host.
+
+## pgvector
+
+The `vector` extension is created by the app's Alembic migrations on first
+`helm upgrade` (the migration hook). No instance flag required.
+
+## Notes
+
+- `db_deletion_protection = true` (default) blocks destroying the instance.
+- Generated DB password / JWT secret live only in Secret Manager + Terraform
+  state — keep your state backend (a GCS bucket) encrypted and access-controlled.
diff --git a/deploy/terraform/gcp/cloudsql.tf b/deploy/terraform/gcp/cloudsql.tf
new file mode 100644
index 0000000..bf6b159
--- /dev/null
+++ b/deploy/terraform/gcp/cloudsql.tf
@@ -0,0 +1,45 @@
+# Cloud SQL for PostgreSQL 16. pgvector is available as an extension and is
+# created by the app's Alembic migrations (`CREATE EXTENSION IF NOT EXISTS
+# vector`) — no instance flag required.
+
+resource "google_sql_database_instance" "this" {
+  name                = "${var.name_prefix}-pg"
+  database_version    = "POSTGRES_16"
+  region              = var.region
+  deletion_protection = var.db_deletion_protection
+
+  # Private IP depends on the PSA peering being established first.
+  depends_on = [google_service_networking_connection.psa]
+
+  settings {
+    tier              = var.db_tier
+    availability_type = var.db_ha ? "REGIONAL" : "ZONAL"
+    disk_size         = var.db_disk_size
+    disk_autoresize   = true
+    disk_type         = "PD_SSD"
+
+    ip_configuration {
+      ipv4_enabled    = false
+      private_network = local.network_id
+      ssl_mode        = "ENCRYPTED_ONLY"
+    }
+
+    backup_configuration {
+      enabled                        = true
+      point_in_time_recovery_enabled = true
+    }
+
+    user_labels = local.labels
+  }
+}
+
+resource "google_sql_database" "app" {
+  name     = var.db_name
+  instance = google_sql_database_instance.this.name
+}
+
+resource "google_sql_user" "app" {
+  name     = var.db_username
+  instance = google_sql_database_instance.this.name
+  password = local.db_password
+}
diff --git a/deploy/terraform/gcp/gcs.tf b/deploy/terraform/gcp/gcs.tf
new file mode 100644
index 0000000..09d2a0e
--- /dev/null
+++ b/deploy/terraform/gcp/gcs.tf
@@ -0,0 +1,14 @@
+# Optional bucket for exports / pg_dump backups. Uniform access + versioned.
+
+resource "google_storage_bucket" "data" {
+  count                       = var.create_bucket ? 1 : 0
+  name                        = local.bucket_name
+  location                    = var.region
+  uniform_bucket_level_access = true
+  force_destroy               = false
+  labels                      = local.labels
+
+  versioning {
+    enabled = true
+  }
+}
diff --git a/deploy/terraform/gcp/iam.tf b/deploy/terraform/gcp/iam.tf
new file mode 100644
index 0000000..669050c
--- /dev/null
+++ b/deploy/terraform/gcp/iam.tf
@@ -0,0 +1,19 @@
+# Service account for the external-secrets operator. Grant it accessor on the
+# app secret, then bind it to the in-cluster external-secrets KSA with Workload
+# Identity (the iam.workloadIdentityUser binding references the GKE workload
+# identity pool, created with the cluster — hence kept out of this data module):
+#
+#   gcloud iam service-accounts add-iam-policy-binding <sa_email> \
+#     --role roles/iam.workloadIdentityUser \
+#     --member "serviceAccount:<project>.svc.id.goog[external-secrets/external-secrets]"
+
+resource "google_service_account" "external_secrets" {
+  account_id   = "${var.name_prefix}-ext-secrets"
+  display_name = "QueryWise external-secrets accessor"
+}
+
+resource "google_secret_manager_secret_iam_member" "accessor" {
+  secret_id = google_secret_manager_secret.app.id
+  role      = "roles/secretmanager.secretAccessor"
+  member    = "serviceAccount:${google_service_account.external_secrets.email}"
+}
diff --git a/deploy/terraform/gcp/main.tf b/deploy/terraform/gcp/main.tf
new file mode 100644
index 0000000..c2b4164
--- /dev/null
+++ b/deploy/terraform/gcp/main.tf
@@ -0,0 +1,41 @@
+locals {
+  labels = merge({
+    "app" = "querywise"
+  }, var.labels)
+
+  network_id = var.create_network ? google_compute_network.this[0].id : var.network_id
+
+  db_password = var.db_password != "" ? var.db_password : random_password.db[0].result
+  jwt_secret  = var.jwt_secret != "" ? var.jwt_secret : random_password.jwt[0].result
+
+  # Cloud SQL private IP + Memorystore host. Generated password uses a URL-safe
+  # alphabet so it drops into the DSN without escaping.
+  database_url = "postgresql+asyncpg://${var.db_username}:${local.db_password}@${google_sql_database_instance.this.private_ip_address}:5432/${var.db_name}"
+  redis_url    = "redis://${google_redis_instance.this.host}:${google_redis_instance.this.port}/0"
+
+  bucket_name = var.bucket_name != "" ? var.bucket_name : "${var.name_prefix}-${var.project_id}"
+
+  secret_payload = { for k, v in {
+    DATABASE_URL           = local.database_url
+    REDIS_URL              = local.redis_url
+    ENCRYPTION_KEY         = var.encryption_key
+    JWT_SECRET             = local.jwt_secret
+    DEFAULT_ADMIN_PASSWORD = var.default_admin_password
+    OPENAI_API_KEY         = var.openai_api_key
+    ANTHROPIC_API_KEY      = var.anthropic_api_key
+    AZURE_OPENAI_API_KEY   = var.azure_openai_api_key
+  } : k => v if v != null && v != "" }
+}
+
+resource "random_password" "db" {
+  count            = var.db_password == "" ? 1 : 0
+  length           = 32
+  special          = true
+  override_special = "-_"
+}
+
+resource "random_password" "jwt" {
+  count   = var.jwt_secret == "" ? 1 : 0
+  length  = 48
+  special = false
+}
diff --git a/deploy/terraform/gcp/network.tf b/deploy/terraform/gcp/network.tf
new file mode 100644
index 0000000..285157e
--- /dev/null
+++ b/deploy/terraform/gcp/network.tf
@@ -0,0 +1,34 @@
+# VPC + private-services-access peering so Cloud SQL gets a private IP. Set
+# create_network = false to use an existing VPC that already has PSA configured.
+
+resource "google_compute_network" "this" {
+  count                   = var.create_network ? 1 : 0
+  name                    = "${var.name_prefix}-vpc"
+  auto_create_subnetworks = false
+}
+
+resource "google_compute_subnetwork" "this" {
+  count                    = var.create_network ? 1 : 0
+  name                     = "${var.name_prefix}-subnet"
+  ip_cidr_range            = var.subnet_cidr
+  region                   = var.region
+  network                  = google_compute_network.this[0].id
+  private_ip_google_access = true
+}
+
+# Reserved range + connection for private services access (Cloud SQL, etc.).
+resource "google_compute_global_address" "psa" {
+  count         = var.create_network ? 1 : 0
+  name          = "${var.name_prefix}-psa"
+  purpose       = "VPC_PEERING"
+  address_type  = "INTERNAL"
+  prefix_length = 16
+  network       = google_compute_network.this[0].id
+}
+
+resource "google_service_networking_connection" "psa" {
+  count                   = var.create_network ? 1 : 0
+  network                 = google_compute_network.this[0].id
+  service                 = "servicenetworking.googleapis.com"
+  reserved_peering_ranges = [google_compute_global_address.psa[0].name]
+}
diff --git a/deploy/terraform/gcp/outputs.tf b/deploy/terraform/gcp/outputs.tf
new file mode 100644
index 0000000..4e160ff
--- /dev/null
+++ b/deploy/terraform/gcp/outputs.tf
@@ -0,0 +1,46 @@
+output "network_id" {
+  description = "VPC the data plane runs in."
+  value       = local.network_id
+}
+
+output "db_private_ip" {
+  description = "Cloud SQL private IP."
+  value       = google_sql_database_instance.this.private_ip_address
+}
+
+output "db_instance_connection_name" {
+  description = "Cloud SQL connection name (for the auth proxy, if used)."
+  value       = google_sql_database_instance.this.connection_name
+}
+
+output "redis_host" {
+  description = "Memorystore host."
+  value       = google_redis_instance.this.host
+}
+
+output "app_secret_id" {
+  description = "Secret Manager secret id holding the assembled app secret. Point external-secrets at this."
+  value       = google_secret_manager_secret.app.secret_id
+}
+
+output "external_secrets_sa_email" {
+  description = "Service account email to bind to the external-secrets KSA via Workload Identity."
+  value       = google_service_account.external_secrets.email
+}
+
+output "bucket_name" {
+  description = "Exports/backups bucket (empty if disabled)."
+  value       = var.create_bucket ? google_storage_bucket.data[0].name : ""
+}
+
+output "database_url" {
+  description = "asyncpg DSN (also stored in the app secret)."
+  value       = local.database_url
+  sensitive   = true
+}
+
+output "redis_url" {
+  description = "Redis DSN (also stored in the app secret)."
+  value       = local.redis_url
+  sensitive   = true
+}
diff --git a/deploy/terraform/gcp/redis.tf b/deploy/terraform/gcp/redis.tf
new file mode 100644
index 0000000..3137dcf
--- /dev/null
+++ b/deploy/terraform/gcp/redis.tf
@@ -0,0 +1,14 @@
+# Memorystore for Redis — result cache + the arq job queue. Reachable on the
+# authorized VPC's private IP.
+
+resource "google_redis_instance" "this" {
+  name           = "${var.name_prefix}-redis"
+  tier           = var.redis_ha ? "STANDARD_HA" : "BASIC"
+  memory_size_gb = var.redis_memory_gb
+  region         = var.region
+  redis_version  = "REDIS_7_0"
+
+  authorized_network = local.network_id
+
+  labels = local.labels
+}
diff --git a/deploy/terraform/gcp/secrets.tf b/deploy/terraform/gcp/secrets.tf
new file mode 100644
index 0000000..8485988
--- /dev/null
+++ b/deploy/terraform/gcp/secrets.tf
@@ -0,0 +1,18 @@
+# Secret Manager holds the assembled app secret (DSNs + keys) as a JSON blob.
+# The external-secrets operator on GKE reads it (via the service account below,
+# bound with Workload Identity) and syncs it into the Kubernetes Secret the Helm
+# chart references. Keys map 1:1 to the backend's env vars.
+
+resource "google_secret_manager_secret" "app" {
+  secret_id = "${var.name_prefix}-app"
+  labels    = local.labels
+
+  replication {
+    auto {}
+  }
+}
+
+resource "google_secret_manager_secret_version" "app" {
+  secret      = google_secret_manager_secret.app.id
+  secret_data = jsonencode(local.secret_payload)
+}
diff --git a/deploy/terraform/gcp/terraform.tfvars.example b/deploy/terraform/gcp/terraform.tfvars.example
new file mode 100644
index 0000000..13e73b5
--- /dev/null
+++ b/deploy/terraform/gcp/terraform.tfvars.example
@@ -0,0 +1,25 @@
+# Copy to terraform.tfvars and fill in. Keep secrets out of version control.
+
+project_id  = "my-gcp-project"
+region      = "us-central1"
+name_prefix = "querywise-prod"
+
+# Network — create a VPC with private-services-access, or set create_network =
+# false and supply a network_id that already has PSA configured.
+create_network = true
+# network_id   = "projects/my-gcp-project/global/networks/my-vpc"
+
+# Cloud SQL
+db_tier = "db-custom-2-7680"
+db_ha   = true
+
+# Memorystore
+redis_memory_gb = 1
+redis_ha        = true
+
+# REQUIRED — Fernet key. Generate with:
+#   python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
+encryption_key = "CHANGE_ME"
+
+# LLM provider key(s).
+openai_api_key = "CHANGE_ME"
diff --git a/deploy/terraform/gcp/variables.tf b/deploy/terraform/gcp/variables.tf
new file mode 100644
index 0000000..dcf249b
--- /dev/null
+++ b/deploy/terraform/gcp/variables.tf
@@ -0,0 +1,155 @@
+# -- General -----------------------------------------------------------------
+variable "project_id" {
+  description = "GCP project id."
+  type        = string
+}
+
+variable "region" {
+  description = "GCP region."
+  type        = string
+  default     = "us-central1"
+}
+
+variable "name_prefix" {
+  description = "Prefix for resource names."
+  type        = string
+  default     = "querywise"
+}
+
+variable "labels" {
+  description = "Extra labels applied to resources that support them."
+  type        = map(string)
+  default     = {}
+}
+
+# -- Network -----------------------------------------------------------------
+# Cloud SQL private IP needs a VPC with a private-services-access peering range.
+variable "create_network" {
+  description = "Create a VPC + subnet + private-services-access peering. If false, supply network_id (must already have PSA configured)."
+  type        = bool
+  default     = true
+}
+
+variable "subnet_cidr" {
+  description = "Primary subnet CIDR (when create_network = true)."
+  type        = string
+  default     = "10.43.0.0/20"
+}
+
+variable "network_id" {
+  description = "Existing VPC self_link/id (when create_network = false)."
+  type        = string
+  default     = ""
+}
+
+# -- Cloud SQL (PostgreSQL + pgvector) ---------------------------------------
+variable "db_name" {
+  description = "Application database name."
+  type        = string
+  default     = "querywise"
+}
+
+variable "db_username" {
+  description = "Application database user."
+  type        = string
+  default     = "querywise"
+}
+
+variable "db_password" {
+  description = "DB password. Empty = generate one (stored in Secret Manager)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "db_tier" {
+  description = "Cloud SQL machine tier."
+  type        = string
+  default     = "db-custom-2-7680"
+}
+
+variable "db_disk_size" {
+  description = "Cloud SQL disk size (GiB)."
+  type        = number
+  default     = 50
+}
+
+variable "db_ha" {
+  description = "Regional (HA) availability instead of zonal."
+  type        = bool
+  default     = true
+}
+
+variable "db_deletion_protection" {
+  description = "Block accidental destroy of the instance."
+  type        = bool
+  default     = true
+}
+
+# -- Memorystore (Redis) -----------------------------------------------------
+variable "redis_memory_gb" {
+  description = "Memorystore capacity (GiB)."
+  type        = number
+  default     = 1
+}
+
+variable "redis_ha" {
+  description = "STANDARD_HA tier instead of BASIC."
+  type        = bool
+  default     = true
+}
+
+# -- GCS ---------------------------------------------------------------------
+variable "create_bucket" {
+  description = "Create a GCS bucket for exports/backups."
+  type        = bool
+  default     = true
+}
+
+variable "bucket_name" {
+  description = "Bucket name. Empty = \"<name_prefix>-<project_id>\"."
+  type        = string
+  default     = ""
+}
+
+# -- Application secrets ------------------------------------------------------
+variable "encryption_key" {
+  description = "Fernet key for connection-string encryption (REQUIRED — see README)."
+  type        = string
+  sensitive   = true
+}
+
+variable "jwt_secret" {
+  description = "HS256 JWT signing secret. Empty = generate one."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "default_admin_password" {
+  description = "Optional bootstrap admin password."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "openai_api_key" {
+  description = "OpenAI API key."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "anthropic_api_key" {
+  description = "Anthropic API key (optional)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
+
+variable "azure_openai_api_key" {
+  description = "Azure OpenAI key (optional)."
+  type        = string
+  default     = ""
+  sensitive   = true
+}
diff --git a/deploy/terraform/gcp/versions.tf b/deploy/terraform/gcp/versions.tf
new file mode 100644
index 0000000..0ab2d32
--- /dev/null
+++ b/deploy/terraform/gcp/versions.tf
@@ -0,0 +1,19 @@
+terraform {
+  required_version = ">= 1.5"
+
+  required_providers {
+    google = {
+      source  = "hashicorp/google"
+      version = "~> 5.40"
+    }
+    random = {
+      source  = "hashicorp/random"
+      version = "~> 3.5"
+    }
+  }
+}
+
+provider "google" {
+  project = var.project_id
+  region  = var.region
+}
diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
new file mode 100644
index 0000000..5c38628
--- /dev/null
+++ b/docker-compose.prod.yml
@@ -0,0 +1,121 @@
+# Production stack for QueryWise (small / on-prem single-tenant).
+#
+#   cp .env.prod.example .env.prod   # then edit secrets
+#   docker compose -f docker-compose.prod.yml --env-file .env.prod up -d --build
+#
+# Topology:  frontend(nginx edge) ──> backend(uvicorn, N workers)
+#                                       ├─> app-db (pgvector)
+#                                       └─> redis  (cache + arq jobs)
+#            worker(arq) ─> redis/app-db        migrate(one-shot) ─> app-db
+#
+# TLS: terminate at the `frontend` edge by mounting certs + adding a 443 server
+# block, or front the stack with a cloud LB / external nginx. See the Helm chart
+# / Terraform modules (deploy/) for managed-ingress deployments.
+
+services:
+  app-db:
+    image: pgvector/pgvector:pg16
+    restart: unless-stopped
+    environment:
+      POSTGRES_DB: ${POSTGRES_DB:-querywise}
+      POSTGRES_USER: ${POSTGRES_USER:-querywise}
+      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?set POSTGRES_PASSWORD in .env.prod}
+    volumes:
+      - app_db_data:/var/lib/postgresql/data
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-querywise} -d ${POSTGRES_DB:-querywise}"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    # No host port by default — only the backend/worker/migrate reach it.
+
+  redis:
+    image: redis:7-alpine
+    restart: unless-stopped
+    command: ["redis-server", "--appendonly", "yes"]
+    volumes:
+      - redis_data:/data
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+  # One-shot schema migration. Backend/worker wait for this to finish cleanly
+  # so multiple backend replicas never race on `alembic upgrade`.
+  migrate:
+    build:
+      context: ./backend
+      dockerfile: Dockerfile.prod
+    image: querywise-backend:prod
+    restart: "no"
+    env_file:
+      - .env.prod
+    environment:
+      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-querywise}:${POSTGRES_PASSWORD}@app-db:5432/${POSTGRES_DB:-querywise}
+    command: ["alembic", "upgrade", "head"]
+    depends_on:
+      app-db:
+        condition: service_healthy
+
+  backend:
+    build:
+      context: ./backend
+      dockerfile: Dockerfile.prod
+    image: querywise-backend:prod
+    restart: unless-stopped
+    env_file:
+      - .env.prod
+    environment:
+      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-querywise}:${POSTGRES_PASSWORD}@app-db:5432/${POSTGRES_DB:-querywise}
+      REDIS_URL: redis://redis:6379/0
+      JOB_BACKEND: arq
+      UVICORN_WORKERS: ${UVICORN_WORKERS:-4}
+      # Auto-setup seeds the sample DB — off in prod (point at a real warehouse).
+      AUTO_SETUP_SAMPLE_DB: ${AUTO_SETUP_SAMPLE_DB:-false}
+    depends_on:
+      app-db:
+        condition: service_healthy
+      redis:
+        condition: service_healthy
+      migrate:
+        condition: service_completed_successfully
+
+  # arq worker: runs background jobs (embeddings, schedules) off the request path.
+  worker:
+    build:
+      context: ./backend
+      dockerfile: Dockerfile.prod
+    image: querywise-backend:prod
+    restart: unless-stopped
+    env_file:
+      - .env.prod
+    environment:
+      DATABASE_URL: postgresql+asyncpg://${POSTGRES_USER:-querywise}:${POSTGRES_PASSWORD}@app-db:5432/${POSTGRES_DB:-querywise}
+      REDIS_URL: redis://redis:6379/0
+      JOB_BACKEND: arq
+    command: ["arq", "app.jobs.worker.WorkerSettings"]
+    depends_on:
+      redis:
+        condition: service_healthy
+      migrate:
+        condition: service_completed_successfully
+
+  # Edge: serves the SPA bundle and reverse-proxies /api, /mcp, /health.
+  frontend:
+    build:
+      context: ./frontend
+      dockerfile: Dockerfile.prod
+      args:
+        # Empty => SPA calls the API same-origin; nginx proxies to backend.
+        VITE_API_URL: ""
+    image: querywise-frontend:prod
+    restart: unless-stopped
+    ports:
+      - "${HTTP_PORT:-80}:8080"
+    depends_on:
+      - backend
+
+volumes:
+  app_db_data:
+  redis_data:
diff --git a/frontend/.dockerignore b/frontend/.dockerignore
new file mode 100644
index 0000000..bc801d5
--- /dev/null
+++ b/frontend/.dockerignore
@@ -0,0 +1,7 @@
+node_modules/
+dist/
+.env
+.env.*
+*.log
+.git/
+.DS_Store
diff --git a/frontend/Dockerfile.prod b/frontend/Dockerfile.prod
new file mode 100644
index 0000000..853e0e9
--- /dev/null
+++ b/frontend/Dockerfile.prod
@@ -0,0 +1,38 @@
+# syntax=docker/dockerfile:1
+#
+# Hardened, multi-stage production image for the QueryWise frontend.
+#   * builder stage produces the static Vite bundle
+#   * runtime stage serves it from the unprivileged nginx image (non-root,
+#     listens on 8080) and reverse-proxies the API to the backend
+#
+# Build:  docker build -f Dockerfile.prod -t querywise-frontend:prod .
+#
+# VITE_API_URL is baked in at build time. Leave it empty ("") so the SPA calls
+# the API same-origin (/api/v1) and nginx proxies it to the backend — the
+# build-once, configure-by-deployment shape. Override only for a split-origin
+# deployment where the API lives on a different host.
+
+# ---- builder ---------------------------------------------------------------
+FROM node:20-slim AS builder
+
+WORKDIR /app
+COPY package*.json ./
+RUN npm ci
+
+COPY . .
+
+ARG VITE_API_URL=""
+ENV VITE_API_URL=${VITE_API_URL}
+RUN npm run build
+
+# ---- runtime ---------------------------------------------------------------
+# Unprivileged nginx: master + workers run as uid 101, listens on 8080.
+FROM nginxinc/nginx-unprivileged:1.27-alpine AS runtime
+
+COPY nginx.conf /etc/nginx/conf.d/default.conf
+COPY --from=builder /app/dist /usr/share/nginx/html
+
+EXPOSE 8080
+
+HEALTHCHECK --interval=15s --timeout=5s --start-period=10s --retries=3 \
+    CMD wget -qO- http://localhost:8080/healthz >/dev/null 2>&1 || exit 1
diff --git a/frontend/nginx.conf b/frontend/nginx.conf
new file mode 100644
index 0000000..53cbd74
--- /dev/null
+++ b/frontend/nginx.conf
@@ -0,0 +1,68 @@
+# QueryWise frontend edge: serves the SPA bundle and reverse-proxies the API,
+# MCP, and health endpoints to the backend. Runs in the unprivileged nginx
+# image (worker uid 101, listens on 8080); TLS is terminated upstream (the
+# compose `edge`/ingress, or a cloud LB). See docker-compose.prod.yml.
+
+server {
+    listen 8080;
+    server_name _;
+
+    # Resolve the backend at request time via Docker's embedded DNS (127.0.0.11)
+    # so the edge boots even when the backend is still starting / restarting.
+    # A static `upstream` would make nginx refuse to start if it can't resolve.
+    resolver 127.0.0.11 valid=10s ipv6=off;
+    set $backend http://backend:8000;
+
+    # SPA assets.
+    root /usr/share/nginx/html;
+    index index.html;
+
+    # Don't leak the nginx version.
+    server_tokens off;
+
+    # Cap request bodies (knowledge imports / uploads are modest).
+    client_max_body_size 25m;
+
+    # Container-internal healthcheck target (see Dockerfile.prod HEALTHCHECK).
+    location = /healthz {
+        access_log off;
+        add_header Content-Type text/plain;
+        return 200 "ok\n";
+    }
+
+    # API + MCP + health proxied to the backend, same-origin.
+    location /api/ {
+        proxy_pass $backend;
+        proxy_http_version 1.1;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_read_timeout 120s;
+    }
+
+    # MCP is streamable HTTP (SSE) — disable buffering so events flush live.
+    location /mcp {
+        proxy_pass $backend;
+        proxy_http_version 1.1;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_buffering off;
+        proxy_cache off;
+        proxy_read_timeout 3600s;
+    }
+
+    # Hashed Vite assets — cache hard.
+    location /assets/ {
+        expires 1y;
+        add_header Cache-Control "public, immutable";
+        try_files $uri =404;
+    }
+
+    # SPA fallback — every other path serves index.html for client routing.
+    location / {
+        try_files $uri $uri/ /index.html;
+    }
+}
diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts
index 1842c31..9c44ec9 100644
--- a/frontend/src/api/client.ts
+++ b/frontend/src/api/client.ts
@@ -1,6 +1,9 @@
 import axios from 'axios';
 
-const API_BASE = import.meta.env.VITE_API_URL || 'http://localhost:8000';
+// `??` (not `||`) so a deliberately-empty VITE_API_URL is honored: the prod
+// build sets it to "" so the SPA calls the API same-origin (/api/v1) behind
+// nginx. Unset (dev) still falls back to the local backend.
+const API_BASE = import.meta.env.VITE_API_URL ?? 'http://localhost:8000';
 
 export const api = axios.create({
   baseURL: `${API_BASE}/api/v1`,