From d5c0107cb77edd3f2fe0d922cee2abeed6fdd855 Mon Sep 17 00:00:00 2001 From: Yassin Kortam Date: Sat, 16 May 2026 18:44:44 -0700 Subject: [PATCH 01/10] refactor: convert AWS and GCP Terraform stacks into reusable modules with examples/default entry point MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove `provider` blocks from both AWS and GCP stack roots so the modules can be consumed with `count`, `for_each`, `depends_on`, assumed-role or aliased providers — patterns that are forbidden when a module owns its own provider configuration - Add `examples/default/` thin-root wrappers for both stacks that wire the provider (AWS) / providers (google + google-beta) and call the module with a curated variable surface, preserving the one-command deploy experience - Move `terraform.tfvars.example` files into `examples/default/` alongside the new roots; update example comments to reflect the curated variable surface - Thread `local.tags` (containing `litellm:stack`, `managed-by`, and `var.tags`) explicitly onto every taggable AWS resource since the module no longer controls the provider's `default_tags`; GCP resource labels already flow through the module's `labels` input - Add `examples/default/variables.tf` and `outputs.tf` for both stacks, exposing the most-used knobs and re-exporting all module outputs - Commit provider lock files for both examples so `terraform init` is reproducible without a network fetch - Update top-level and per-stack READMEs to document the module-first design, the `for_each` multi-tenant pattern, and the `examples/default/` quick-start path --- terraform/litellm/README.md | 30 ++++- terraform/litellm/aws/README.md | 64 +++++++++- terraform/litellm/aws/alb.tf | 18 +++ terraform/litellm/aws/bootstrap.tf | 6 + terraform/litellm/aws/ecs.tf | 22 ++++ .../aws/examples/default/.terraform.lock.hcl | 46 +++++++ .../litellm/aws/examples/default/main.tf | 40 ++++++ .../litellm/aws/examples/default/outputs.tf | 54 ++++++++ .../litellm/aws/examples/default/providers.tf | 18 +++ .../default}/terraform.tfvars.example | 19 +-- .../litellm/aws/examples/default/variables.tf | 98 ++++++++++++++ .../litellm/aws/examples/default/versions.tf | 14 ++ terraform/litellm/aws/iam.tf | 10 ++ terraform/litellm/aws/locals.tf | 14 ++ terraform/litellm/aws/migrations.tf | 2 + terraform/litellm/aws/network.tf | 24 ++-- terraform/litellm/aws/providers.tf | 13 -- terraform/litellm/aws/rds.tf | 10 ++ terraform/litellm/aws/redis.tf | 4 + terraform/litellm/aws/s3.tf | 4 + terraform/litellm/aws/secrets.tf | 8 ++ terraform/litellm/aws/variables.tf | 2 +- terraform/litellm/gcp/README.md | 57 ++++++++- terraform/litellm/gcp/cloudsql.tf | 15 +++ .../gcp/examples/default/.terraform.lock.hcl | 63 +++++++++ .../litellm/gcp/examples/default/main.tf | 45 +++++++ .../litellm/gcp/examples/default/outputs.tf | 59 +++++++++ .../litellm/gcp/examples/default/providers.tf | 17 +++ .../default}/terraform.tfvars.example | 16 +-- .../litellm/gcp/examples/default/variables.tf | 120 ++++++++++++++++++ .../litellm/gcp/examples/default/versions.tf | 18 +++ terraform/litellm/gcp/load_balancer.tf | 13 +- terraform/litellm/gcp/providers.tf | 9 -- 33 files changed, 880 insertions(+), 72 deletions(-) create mode 100644 terraform/litellm/aws/examples/default/.terraform.lock.hcl create mode 100644 terraform/litellm/aws/examples/default/main.tf create mode 100644 terraform/litellm/aws/examples/default/outputs.tf create mode 100644 terraform/litellm/aws/examples/default/providers.tf rename terraform/litellm/aws/{ => examples/default}/terraform.tfvars.example (79%) create mode 100644 terraform/litellm/aws/examples/default/variables.tf create mode 100644 terraform/litellm/aws/examples/default/versions.tf delete mode 100644 terraform/litellm/aws/providers.tf create mode 100644 terraform/litellm/gcp/examples/default/.terraform.lock.hcl create mode 100644 terraform/litellm/gcp/examples/default/main.tf create mode 100644 terraform/litellm/gcp/examples/default/outputs.tf create mode 100644 terraform/litellm/gcp/examples/default/providers.tf rename terraform/litellm/gcp/{ => examples/default}/terraform.tfvars.example (81%) create mode 100644 terraform/litellm/gcp/examples/default/variables.tf create mode 100644 terraform/litellm/gcp/examples/default/versions.tf delete mode 100644 terraform/litellm/gcp/providers.tf diff --git a/terraform/litellm/README.md b/terraform/litellm/README.md index 5ca704b96dd..f1fa455f65e 100644 --- a/terraform/litellm/README.md +++ b/terraform/litellm/README.md @@ -1,18 +1,34 @@ # LiteLLM Terraform stacks -Two self-contained Terraform root modules that deploy the **componentized** -LiteLLM proxy — the gateway, backend, and UI as three independent containers -(see `helm/litellm/` for the canonical chart with the same split). +Two self-contained, reusable Terraform **modules** that deploy the +**componentized** LiteLLM proxy — the gateway, backend, and UI as three +independent containers (see `helm/litellm/` for the canonical chart with the +same split). + +Each module declares **no `provider` block of its own**, so it can be called +with `count` / `for_each` / `depends_on` and the caller controls region, +assume-role / impersonation, aliases, and `default_tags`. A ready-to-run root +that wires the provider lives at `/examples/default/` — that's the +one-command deploy path. To embed a stack in your own config, call the module +by source: + +```hcl +module "litellm" { + source = "github.com/BerriAI/litellm//terraform/litellm/aws?ref=" + # ... inputs ... +} +``` | Stack | Compute | Database (writer + reader) | Cache | Object store | Public entrypoint | | ------ | ----------- | ---------------------------------- | ----------- | ------------ | ------------------ | | `aws/` | ECS Fargate | Aurora Postgres (IAM auth) | ElastiCache | S3 | Application LB | | `gcp/` | Cloud Run | Cloud SQL Postgres (password auth) | Memorystore | GCS | External HTTPS LB | -Each stack creates its own VPC and managed data stores — drop in a tfvars -file and run `terraform apply`. Both stacks support a typed `proxy_config` -input (mirrors `helm/litellm`'s `gateway.config.proxy_config`) and per-component -extra env vars / secret-manager refs. +Each stack creates its own VPC and managed data stores — from +`/examples/default/`, drop in a tfvars file and run `terraform apply`. +Both stacks support a typed `proxy_config` input (mirrors `helm/litellm`'s +`gateway.config.proxy_config`) and per-component extra env vars / +secret-manager refs. ## Components diff --git a/terraform/litellm/aws/README.md b/terraform/litellm/aws/README.md index 8638ea800ec..80ee18c667d 100644 --- a/terraform/litellm/aws/README.md +++ b/terraform/litellm/aws/README.md @@ -132,10 +132,11 @@ pair differs: | `acme` | `prod` | `acme-litellm-prod-master-key` | | `globex` | `dev` | `globex-litellm-dev-license` | -For a per-tenant instance, the only inputs that change are the tenant -slug, env, and the two pre-issued secrets: +For a per-tenant instance via the example root, the only inputs that +change are the tenant slug, env, and the two pre-issued secrets: ```bash +cd terraform/litellm/aws/examples/default export TF_VAR_litellm_master_key="sk-..." # the tenant's master key export TF_VAR_litellm_license="lic-..." # their LITELLM_LICENSE @@ -146,6 +147,22 @@ terraform apply \ -var "env=stage" ``` +To run *many* tenants from a single config, call the module with +`for_each` instead of one root per tenant (see "Using as a module"): + +```hcl +module "litellm" { + for_each = toset(["acme", "globex"]) + source = "github.com/BerriAI/litellm//terraform/litellm/aws?ref=" + tenant = each.key + env = "prod" + region = "us-west-2" + azs = ["us-west-2a", "us-west-2b"] +} +``` +(This `for_each` form is only possible because the module declares no +provider block — the original root-with-provider layout forbade it.) + Both `litellm_master_key` and `litellm_license` are optional: - Omit `litellm_master_key` → the stack auto-generates a random `sk-…` value (trial/dev path). @@ -159,14 +176,21 @@ example files. ## Quick start ```bash -cd terraform/litellm/aws +cd terraform/litellm/aws/examples/default cp terraform.tfvars.example terraform.tfvars -# Edit: region, tenant, env, azs, *_image, proxy_config, gateway_extra_secrets. +# Edit: region, tenant, env, azs, proxy_config, gateway_extra_secrets. terraform init terraform apply ``` +`examples/default/` is a thin root that configures the `aws` provider and +calls the module (`../../`). It exposes a curated variable surface; for +advanced knobs (per-component CPU/memory/workers, autoscaling, RDS/Redis +sizing, per-component image pins) set them on the `module "litellm"` block +in `examples/default/main.tf`, or call the module from your own config — +see "Using as a module" below. + That single apply provisions everything, runs the DB user bootstrap, runs the schema migration, and only then starts the gateway/backend services. When it returns, the stack is serving traffic. @@ -179,6 +203,34 @@ aws secretsmanager get-secret-value \ --query SecretString --output text ``` +## Using as a module + +The directory itself is a module with **no `provider` block** — the caller +owns provider config. That means you can call it directly with `for_each` +(many tenants from one config), `count` (conditional stacks), `depends_on`, +an assume-role / aliased provider, etc.: + +```hcl +provider "aws" { + region = "us-west-2" + assume_role { role_arn = "arn:aws:iam::111122223333:role/deployer" } +} + +module "litellm" { + source = "github.com/BerriAI/litellm//terraform/litellm/aws?ref=" + + region = "us-west-2" + tenant = "acme" + env = "prod" + azs = ["us-west-2a", "us-west-2b"] + # ...any of the inputs in variables.tf... +} +``` + +Tags: the module threads its own `litellm:stack` / `managed-by` / `var.tags` +onto every taggable resource. Any `default_tags` on your provider merge on +top — set org-wide tags there, per-deployment tags via the `tags` input. + ## Image pulls The defaults pull from `ghcr.io/berriai/litellm-:v1.86.0-dev`, @@ -238,8 +290,8 @@ losing the contents. | File | What's in it | | ----------------- | --------------------------------------------------------------------- | -| `versions.tf` | Terraform + provider version constraints | -| `providers.tf` | AWS provider (region + default tags) | +| `versions.tf` | Terraform + `required_providers` constraints (module declares no provider config) | +| `examples/default/` | Thin root: `aws` provider + `default_tags` + a call to the module. The one-command deploy path. | | `variables.tf` | All input variables | | `locals.tf` | Path-prefix lists for ALB routing (mirror of `helm/.../ingress.yaml`) | | `network.tf` | VPC, subnets, IGW, NAT, route tables, security groups | diff --git a/terraform/litellm/aws/alb.tf b/terraform/litellm/aws/alb.tf index de0d9c2310f..786b9d9a5b9 100644 --- a/terraform/litellm/aws/alb.tf +++ b/terraform/litellm/aws/alb.tf @@ -6,6 +6,8 @@ resource "aws_lb" "this" { subnets = aws_subnet.public[*].id idle_timeout = 120 + + tags = local.tags } locals { @@ -35,6 +37,8 @@ resource "aws_lb_target_group" "gateway" { } deregistration_delay = 30 + + tags = local.tags } resource "aws_lb_target_group" "backend" { @@ -54,6 +58,8 @@ resource "aws_lb_target_group" "backend" { } deregistration_delay = 30 + + tags = local.tags } resource "aws_lb_target_group" "ui" { @@ -73,6 +79,8 @@ resource "aws_lb_target_group" "ui" { } deregistration_delay = 30 + + tags = local.tags } # HTTP listener. When TLS is enabled this only serves a permanent @@ -106,6 +114,8 @@ resource "aws_lb_listener" "http" { error_message = "ALB has no HTTPS listener. Either set `acm_certificate_arn` to enable TLS, or set `allow_plaintext_alb = true` to opt into HTTP-only (trial / dev only)." } } + + tags = local.tags } # HTTPS listener. Only created when an ACM cert ARN is supplied — terminates @@ -122,6 +132,8 @@ resource "aws_lb_listener" "https" { type = "forward" target_group_arn = aws_lb_target_group.backend.arn } + + tags = local.tags } # UI exact paths (/, /favicon.ico, /ui) — priority 10. @@ -139,6 +151,8 @@ resource "aws_lb_listener_rule" "ui_exact" { values = local.ui_exact_paths } } + + tags = local.tags } # UI prefix paths (/_next/*, /litellm-asset-prefix/*, /assets/*, /ui/*) — priority 20. @@ -156,6 +170,8 @@ resource "aws_lb_listener_rule" "ui_prefix" { values = local.ui_path_prefixes } } + + tags = local.tags } # Gateway prefix rules — one per chunk-of-5 because ALB caps a path-pattern @@ -176,4 +192,6 @@ resource "aws_lb_listener_rule" "gateway" { values = each.value } } + + tags = local.tags } diff --git a/terraform/litellm/aws/bootstrap.tf b/terraform/litellm/aws/bootstrap.tf index e9a56dedbb5..b0bc38d44fb 100644 --- a/terraform/litellm/aws/bootstrap.tf +++ b/terraform/litellm/aws/bootstrap.tf @@ -32,6 +32,8 @@ resource "aws_iam_policy" "bootstrap_secrets" { Resource = [aws_secretsmanager_secret.db_master_password.arn] }] }) + + tags = local.tags } resource "aws_iam_role_policy_attachment" "task_execution_bootstrap_secrets" { @@ -43,6 +45,8 @@ resource "aws_iam_role_policy_attachment" "task_execution_bootstrap_secrets" { resource "aws_cloudwatch_log_group" "bootstrap_db" { name = "/ecs/${local.name}/bootstrap-db" retention_in_days = var.log_retention_days + + tags = local.tags } locals { @@ -101,6 +105,8 @@ resource "aws_ecs_task_definition" "bootstrap_db" { } } }]) + + tags = local.tags } # ---------- Bootstrap trigger ---------- diff --git a/terraform/litellm/aws/ecs.tf b/terraform/litellm/aws/ecs.tf index a6d2350c681..aee8f0cfc73 100644 --- a/terraform/litellm/aws/ecs.tf +++ b/terraform/litellm/aws/ecs.tf @@ -5,26 +5,36 @@ resource "aws_ecs_cluster" "this" { name = "containerInsights" value = "enabled" } + + tags = local.tags } resource "aws_cloudwatch_log_group" "gateway" { name = "/ecs/${local.name}/gateway" retention_in_days = var.log_retention_days + + tags = local.tags } resource "aws_cloudwatch_log_group" "backend" { name = "/ecs/${local.name}/backend" retention_in_days = var.log_retention_days + + tags = local.tags } resource "aws_cloudwatch_log_group" "ui" { name = "/ecs/${local.name}/ui" retention_in_days = var.log_retention_days + + tags = local.tags } resource "aws_cloudwatch_log_group" "migrations" { name = "/ecs/${local.name}/migrations" retention_in_days = var.log_retention_days + + tags = local.tags } # Shared env block fed to gateway, backend, and the migration task. Mirrors @@ -169,6 +179,8 @@ resource "aws_ecs_task_definition" "gateway" { local.gateway_proxy_overrides, ) ]) + + tags = local.tags } resource "aws_ecs_service" "gateway" { @@ -206,6 +218,8 @@ resource "aws_ecs_service" "gateway" { aws_lb_listener.https, terraform_data.migration, ] + + tags = local.tags } # ---------- Backend ---------- @@ -246,6 +260,8 @@ resource "aws_ecs_task_definition" "backend" { local.backend_proxy_overrides, ) ]) + + tags = local.tags } resource "aws_ecs_service" "backend" { @@ -279,6 +295,8 @@ resource "aws_ecs_service" "backend" { aws_lb_listener.https, terraform_data.migration, ] + + tags = local.tags } # ---------- UI ---------- @@ -312,6 +330,8 @@ resource "aws_ecs_task_definition" "ui" { } } ]) + + tags = local.tags } resource "aws_ecs_service" "ui" { @@ -344,4 +364,6 @@ resource "aws_ecs_service" "ui" { aws_lb_listener.http, aws_lb_listener.https, ] + + tags = local.tags } diff --git a/terraform/litellm/aws/examples/default/.terraform.lock.hcl b/terraform/litellm/aws/examples/default/.terraform.lock.hcl new file mode 100644 index 00000000000..4a059b2b268 --- /dev/null +++ b/terraform/litellm/aws/examples/default/.terraform.lock.hcl @@ -0,0 +1,46 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "5.100.0" + constraints = "~> 5.60" + hashes = [ + "h1:Ijt7pOlB7Tr7maGQIqtsLFbl7pSMIj06TVdkoSBcYOw=", + "zh:054b8dd49f0549c9a7cc27d159e45327b7b65cf404da5e5a20da154b90b8a644", + "zh:0b97bf8d5e03d15d83cc40b0530a1f84b459354939ba6f135a0086c20ebbe6b2", + "zh:1589a2266af699cbd5d80737a0fe02e54ec9cf2ca54e7e00ac51c7359056f274", + "zh:6330766f1d85f01ae6ea90d1b214b8b74cc8c1badc4696b165b36ddd4cc15f7b", + "zh:7c8c2e30d8e55291b86fcb64bdf6c25489d538688545eb48fd74ad622e5d3862", + "zh:99b1003bd9bd32ee323544da897148f46a527f622dc3971af63ea3e251596342", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:9f8b909d3ec50ade83c8062290378b1ec553edef6a447c56dadc01a99f4eaa93", + "zh:aaef921ff9aabaf8b1869a86d692ebd24fbd4e12c21205034bb679b9caf883a2", + "zh:ac882313207aba00dd5a76dbd572a0ddc818bb9cbf5c9d61b28fe30efaec951e", + "zh:bb64e8aff37becab373a1a0cc1080990785304141af42ed6aa3dd4913b000421", + "zh:dfe495f6621df5540d9c92ad40b8067376350b005c637ea6efac5dc15028add4", + "zh:f0ddf0eaf052766cfe09dea8200a946519f653c384ab4336e2a4a64fdd6310e9", + "zh:f1b7e684f4c7ae1eed272b6de7d2049bb87a0275cb04dbb7cda6636f600699c9", + "zh:ff461571e3f233699bf690db319dfe46aec75e58726636a0d97dd9ac6e32fb70", + ] +} + +provider "registry.terraform.io/hashicorp/random" { + version = "3.9.0" + constraints = "~> 3.6" + hashes = [ + "h1:OO+IuvQJSPmWdN8AyyIEvPJbLvDQpgX/zbktoa9KsJE=", + "zh:161ad0bd9a75768c82f53fb6e7172a9d8be2d4889b012645a34795031aaf1bf1", + "zh:19dc9a5b17729725ccfc4f45b0500af0ee5bc6b6b160c7adb8f2bf617d2c80ea", + "zh:269eda8fe42daa7974d5a34d166c3ba9defe80cde86c01e4dadcfdf2e1f05e5f", + "zh:373f7c65566f8f2cc7f45d698654feb9d988996957e1266a69ca00c52d6d16d0", + "zh:5599d16804c41c83009ec621b6d6b6f74e102f5827678a4750f8809055546b61", + "zh:583be0440469a22bff70dcfa56593b01566860b29607437264adb51060cf46fc", + "zh:5f211d8ec3f2e1f414870d9584bfe26e6995560ef81c748f8447a48164767398", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:7b547fd16216761ef86efc3ed516ac5ac0c5c42b7c7eb24a08cef2d93f69ed5e", + "zh:7e7c0679daf2a382151d05068c8c3f0dae6b7b7dccf818827b73dd08638df2ef", + "zh:8089dec888a8038b9b4fb23b3df7e1057293dbc5b60b42cc47ff690d69d4b61b", + "zh:c51f15a031edfd6f23ce8ced3446ca7f8d8d647e2499890d7d5d10d5016d7257", + "zh:c94784f005708890dc6895afd53636ec00ec1e430b15d41e5aebfb1d4b39bd04", + ] +} diff --git a/terraform/litellm/aws/examples/default/main.tf b/terraform/litellm/aws/examples/default/main.tf new file mode 100644 index 00000000000..0cbd48701aa --- /dev/null +++ b/terraform/litellm/aws/examples/default/main.tf @@ -0,0 +1,40 @@ +# One-command deploy of the LiteLLM AWS stack. +# +# cd terraform/litellm/aws/examples/default +# cp terraform.tfvars.example terraform.tfvars # edit it +# terraform init +# terraform apply +# +# This root just wires the provider (see providers.tf) to the module. The +# module itself (../../) declares no provider, so it can also be consumed +# from your own config with count/for_each/aliased or assume-role providers: +# +# module "litellm" { +# source = "github.com/BerriAI/litellm//terraform/litellm/aws?ref=" +# ... +# } +# +# Knobs not surfaced as variables here (per-component sizing, autoscaling, +# RDS/Redis tuning) can be set directly on this block — see ../../variables.tf. +module "litellm" { + source = "../../" + + region = var.region + tenant = var.tenant + env = var.env + azs = var.azs + + litellm_master_key = var.litellm_master_key + litellm_license = var.litellm_license + ui_password = var.ui_password + + acm_certificate_arn = var.acm_certificate_arn + allow_plaintext_alb = var.allow_plaintext_alb + s3_force_destroy = var.s3_force_destroy + + proxy_config = var.proxy_config + gateway_extra_env = var.gateway_extra_env + backend_extra_env = var.backend_extra_env + gateway_extra_secrets = var.gateway_extra_secrets + backend_extra_secrets = var.backend_extra_secrets +} diff --git a/terraform/litellm/aws/examples/default/outputs.tf b/terraform/litellm/aws/examples/default/outputs.tf new file mode 100644 index 00000000000..235c069933c --- /dev/null +++ b/terraform/litellm/aws/examples/default/outputs.tf @@ -0,0 +1,54 @@ +output "alb_dns_name" { + description = "Public DNS name of the LiteLLM ALB." + value = module.litellm.alb_dns_name +} + +output "alb_url" { + description = "Proxy URL. Dashboard at /, API at /v1/*." + value = module.litellm.alb_url +} + +output "ecs_cluster" { + description = "ECS cluster name." + value = module.litellm.ecs_cluster +} + +output "aurora_writer_endpoint" { + description = "Aurora writer endpoint." + value = module.litellm.aurora_writer_endpoint +} + +output "aurora_reader_endpoint" { + description = "Aurora reader endpoint." + value = module.litellm.aurora_reader_endpoint +} + +output "redis_endpoint" { + description = "ElastiCache Redis primary endpoint (TLS)." + value = module.litellm.redis_endpoint +} + +output "s3_bucket" { + description = "S3 bucket name." + value = module.litellm.s3_bucket +} + +output "master_key_secret_arn" { + description = "Secrets Manager ARN holding LITELLM_MASTER_KEY." + value = module.litellm.master_key_secret_arn +} + +output "db_master_password_secret_arn" { + description = "Secrets Manager ARN holding the Aurora master credentials (bootstrap-only)." + value = module.litellm.db_master_password_secret_arn +} + +output "db_bootstrap_sql" { + description = "Run once as the master DB user to create the IAM-authed app user." + value = module.litellm.db_bootstrap_sql +} + +output "migration_run_command" { + description = "Break-glass command to re-run the one-off prisma migration task." + value = module.litellm.migration_run_command +} diff --git a/terraform/litellm/aws/examples/default/providers.tf b/terraform/litellm/aws/examples/default/providers.tf new file mode 100644 index 00000000000..aaad4ae916d --- /dev/null +++ b/terraform/litellm/aws/examples/default/providers.tf @@ -0,0 +1,18 @@ +# The provider is configured HERE, in the root, not in the module. That is +# the whole point of the split: a module that declares its own configured +# `provider` block can't be called with count/for_each/depends_on and gives +# the caller no way to set assume-role, custom endpoints, or aliases. +# +# `default_tags` set here still flow into every resource the module creates +# (provider default_tags propagate through module calls) and merge with the +# module's own `litellm:stack` / `managed-by` / var.tags. Use this block for +# org-wide tags; use the module's `tags` input for per-deployment tags. +provider "aws" { + region = var.region + + default_tags { + tags = { + "managed-by" = "terraform" + } + } +} diff --git a/terraform/litellm/aws/terraform.tfvars.example b/terraform/litellm/aws/examples/default/terraform.tfvars.example similarity index 79% rename from terraform/litellm/aws/terraform.tfvars.example rename to terraform/litellm/aws/examples/default/terraform.tfvars.example index 2be573949ef..88d12cf26e2 100644 --- a/terraform/litellm/aws/terraform.tfvars.example +++ b/terraform/litellm/aws/examples/default/terraform.tfvars.example @@ -26,19 +26,12 @@ env = "stage" # non-empty bucket. Flip to true only for ephemeral / CI stacks. # s3_force_destroy = false -# Component images. Defaults pin all four to the same GHCR release tag — -# bump them together when bumping LiteLLM. Override here to pull from a -# private registry or to mix-and-match versions. -# gateway_image = "ghcr.io/berriai/litellm-gateway:1.86.0-dev" -# backend_image = "ghcr.io/berriai/litellm-backend:1.86.0-dev" -# ui_image = "ghcr.io/berriai/litellm-ui:1.86.0-dev" -# migrations_image = "ghcr.io/berriai/litellm-migrations:1.86.0-dev" - -# Per-task sizing for the gateway. Defaults are 1 vCPU / 4 GiB / 1 worker. -# uvicorn rule of thumb for CPU-bound work is (2 * vCPU) + 1 workers. -# gateway_cpu = 1024 # 1024 = 1 vCPU -# gateway_memory = 4096 # MiB -# gateway_num_workers = 1 +# Component images and per-task sizing/autoscaling are NOT exposed as +# variables in this example (it keeps the curated surface small). They +# default to working public GHCR images. To pin images or tune +# CPU/memory/workers/autoscaling, set those inputs directly on the +# `module "litellm"` block in main.tf — the full list is in +# ../../variables.tf — or call the module from your own root config. # ---------- proxy_config (mirrors helm gateway.config.proxy_config) ---------- # proxy_config = { diff --git a/terraform/litellm/aws/examples/default/variables.tf b/terraform/litellm/aws/examples/default/variables.tf new file mode 100644 index 00000000000..f8950ca2eca --- /dev/null +++ b/terraform/litellm/aws/examples/default/variables.tf @@ -0,0 +1,98 @@ +# Curated surface for the one-command deploy path. The module (../../) +# exposes far more knobs (per-component CPU/memory, autoscaling, RDS/Redis +# sizing, …). To tune those, set them directly on the `module "litellm"` +# block in main.tf, or call the module from your own root config. Full +# per-variable docs live in ../../variables.tf — the module is the source +# of truth; descriptions here are intentionally terse. + +variable "region" { + description = "AWS region to deploy into." + type = string +} + +variable "tenant" { + description = "Tenant slug — prefix for every resource (-litellm-)." + type = string +} + +variable "env" { + description = "Environment suffix (stage, prod, dev)." + type = string +} + +variable "azs" { + description = "Availability zones for subnets. At least 2 (RDS + ALB)." + type = list(string) +} + +# Sensitive — prefer TF_VAR_litellm_master_key / TF_VAR_litellm_license / +# TF_VAR_ui_password so values stay out of any committed tfvars file. +variable "litellm_master_key" { + description = "Pre-existing LITELLM_MASTER_KEY (sk-…). Empty → auto-generated." + type = string + default = "" + sensitive = true +} + +variable "litellm_license" { + description = "LiteLLM enterprise license. Empty → OSS-only." + type = string + default = "" + sensitive = true +} + +variable "ui_password" { + description = "UI admin password. Empty → falls back to LITELLM_MASTER_KEY." + type = string + default = "" + sensitive = true +} + +# TLS — provide an ACM cert for production, or opt into HTTP-only for dev. +variable "acm_certificate_arn" { + description = "ACM cert ARN for the ALB HTTPS listener. Empty → no TLS." + type = string + default = "" +} + +variable "allow_plaintext_alb" { + description = "Opt into HTTP-only ALB (trial/dev only)." + type = bool + default = false +} + +variable "s3_force_destroy" { + description = "Allow destroy of a non-empty S3 bucket (ephemeral/CI only)." + type = bool + default = false +} + +variable "proxy_config" { + description = "LiteLLM proxy config (contents of config.yaml). Empty → defaults." + type = any + default = {} +} + +variable "gateway_extra_env" { + description = "Plain-text env vars layered onto the gateway." + type = map(string) + default = {} +} + +variable "backend_extra_env" { + description = "Plain-text env vars layered onto the backend." + type = map(string) + default = {} +} + +variable "gateway_extra_secrets" { + description = "Gateway env vars sourced from Secrets Manager (name → ARN)." + type = map(string) + default = {} +} + +variable "backend_extra_secrets" { + description = "Backend env vars sourced from Secrets Manager (name → ARN)." + type = map(string) + default = {} +} diff --git a/terraform/litellm/aws/examples/default/versions.tf b/terraform/litellm/aws/examples/default/versions.tf new file mode 100644 index 00000000000..73b88e91dce --- /dev/null +++ b/terraform/litellm/aws/examples/default/versions.tf @@ -0,0 +1,14 @@ +terraform { + required_version = ">= 1.6.0" + + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.60" + } + random = { + source = "hashicorp/random" + version = "~> 3.6" + } + } +} diff --git a/terraform/litellm/aws/iam.tf b/terraform/litellm/aws/iam.tf index 504e0fe1d63..f425f1a00e7 100644 --- a/terraform/litellm/aws/iam.tf +++ b/terraform/litellm/aws/iam.tf @@ -13,6 +13,8 @@ data "aws_iam_policy_document" "task_assume" { resource "aws_iam_role" "task_execution" { name = "${local.name}-task-execution" assume_role_policy = data.aws_iam_policy_document.task_assume.json + + tags = local.tags } resource "aws_iam_role_policy_attachment" "task_execution" { @@ -59,6 +61,8 @@ data "aws_iam_policy_document" "secrets_access" { resource "aws_iam_policy" "secrets_access" { name = "${local.name}-secrets-access" policy = data.aws_iam_policy_document.secrets_access.json + + tags = local.tags } resource "aws_iam_role_policy_attachment" "task_execution_secrets" { @@ -75,6 +79,8 @@ resource "aws_iam_role_policy_attachment" "task_execution_secrets" { resource "aws_iam_role" "task" { name = "${local.name}-task" assume_role_policy = data.aws_iam_policy_document.task_assume.json + + tags = local.tags } data "aws_caller_identity" "current" {} @@ -91,6 +97,8 @@ data "aws_iam_policy_document" "rds_iam_connect" { resource "aws_iam_policy" "rds_iam_connect" { name = "${local.name}-rds-iam-connect" policy = data.aws_iam_policy_document.rds_iam_connect.json + + tags = local.tags } resource "aws_iam_role_policy_attachment" "task_rds_iam_connect" { @@ -111,4 +119,6 @@ resource "aws_iam_role_policy_attachment" "task_rds_iam_connect" { resource "aws_iam_role" "ui_task" { name = "${local.name}-ui-task" assume_role_policy = data.aws_iam_policy_document.task_assume.json + + tags = local.tags } diff --git a/terraform/litellm/aws/locals.tf b/terraform/litellm/aws/locals.tf index 85c3b6eaaad..b5e28272d04 100644 --- a/terraform/litellm/aws/locals.tf +++ b/terraform/litellm/aws/locals.tf @@ -11,6 +11,20 @@ locals { # the stack can reference local.name. name = "${var.tenant}-litellm-${var.env}" + # This is a reusable module — it declares no `provider` block, so the AWS + # provider's `default_tags` is the caller's concern, not ours. To keep the + # same per-resource tagging the stack had when it owned the provider, the + # module threads `local.tags` onto every taggable resource itself. Callers + # may layer org-wide tags on top via their own provider `default_tags` + # (those merge with these). `var.tags` is the per-deployment override. + tags = merge( + { + "litellm:stack" = local.name + "managed-by" = "terraform" + }, + var.tags, + ) + gateway_path_prefixes = [ "/v1/chat/*", "/chat/*", "/v1/completions*", "/completions*", diff --git a/terraform/litellm/aws/migrations.tf b/terraform/litellm/aws/migrations.tf index fc4e2ce0cab..62880ebf165 100644 --- a/terraform/litellm/aws/migrations.tf +++ b/terraform/litellm/aws/migrations.tf @@ -42,4 +42,6 @@ resource "aws_ecs_task_definition" "migrations" { } } }]) + + tags = local.tags } diff --git a/terraform/litellm/aws/network.tf b/terraform/litellm/aws/network.tf index d5ed49c1b8a..2f104da6a6b 100644 --- a/terraform/litellm/aws/network.tf +++ b/terraform/litellm/aws/network.tf @@ -7,12 +7,12 @@ resource "aws_vpc" "this" { enable_dns_hostnames = true enable_dns_support = true - tags = { Name = local.name } + tags = merge(local.tags, { Name = local.name }) } resource "aws_internet_gateway" "this" { vpc_id = aws_vpc.this.id - tags = { Name = local.name } + tags = merge(local.tags, { Name = local.name }) } # Public subnets (ALB + NAT). One per AZ. @@ -23,7 +23,7 @@ resource "aws_subnet" "public" { availability_zone = var.azs[count.index] map_public_ip_on_launch = true - tags = { Name = "${local.name}-public-${var.azs[count.index]}" } + tags = merge(local.tags, { Name = "${local.name}-public-${var.azs[count.index]}" }) } # Private subnets (ECS tasks, RDS, ElastiCache). One per AZ, separate from @@ -34,12 +34,12 @@ resource "aws_subnet" "private" { cidr_block = cidrsubnet(var.vpc_cidr, 8, count.index + 10) availability_zone = var.azs[count.index] - tags = { Name = "${local.name}-private-${var.azs[count.index]}" } + tags = merge(local.tags, { Name = "${local.name}-private-${var.azs[count.index]}" }) } resource "aws_eip" "nat" { domain = "vpc" - tags = { Name = "${local.name}-nat" } + tags = merge(local.tags, { Name = "${local.name}-nat" }) depends_on = [aws_internet_gateway.this] } @@ -50,7 +50,7 @@ resource "aws_nat_gateway" "this" { allocation_id = aws_eip.nat.id subnet_id = aws_subnet.public[0].id - tags = { Name = local.name } + tags = merge(local.tags, { Name = local.name }) depends_on = [aws_internet_gateway.this] } @@ -63,7 +63,7 @@ resource "aws_route_table" "public" { gateway_id = aws_internet_gateway.this.id } - tags = { Name = "${local.name}-public" } + tags = merge(local.tags, { Name = "${local.name}-public" }) } resource "aws_route_table_association" "public" { @@ -80,7 +80,7 @@ resource "aws_route_table" "private" { nat_gateway_id = aws_nat_gateway.this.id } - tags = { Name = "${local.name}-private" } + tags = merge(local.tags, { Name = "${local.name}-private" }) } resource "aws_route_table_association" "private" { @@ -119,6 +119,8 @@ resource "aws_security_group" "alb" { protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } + + tags = local.tags } resource "aws_security_group" "tasks" { @@ -141,6 +143,8 @@ resource "aws_security_group" "tasks" { protocol = "-1" cidr_blocks = ["0.0.0.0/0"] } + + tags = local.tags } resource "aws_security_group" "rds" { @@ -155,6 +159,8 @@ resource "aws_security_group" "rds" { protocol = "tcp" security_groups = [aws_security_group.tasks.id] } + + tags = local.tags } resource "aws_security_group" "redis" { @@ -169,4 +175,6 @@ resource "aws_security_group" "redis" { protocol = "tcp" security_groups = [aws_security_group.tasks.id] } + + tags = local.tags } diff --git a/terraform/litellm/aws/providers.tf b/terraform/litellm/aws/providers.tf deleted file mode 100644 index 5e7d506c23f..00000000000 --- a/terraform/litellm/aws/providers.tf +++ /dev/null @@ -1,13 +0,0 @@ -provider "aws" { - region = var.region - - default_tags { - tags = merge( - { - "litellm:stack" = local.name - "managed-by" = "terraform" - }, - var.tags, - ) - } -} diff --git a/terraform/litellm/aws/rds.tf b/terraform/litellm/aws/rds.tf index 8e3b70a8d62..d9b7351a805 100644 --- a/terraform/litellm/aws/rds.tf +++ b/terraform/litellm/aws/rds.tf @@ -19,12 +19,16 @@ resource "aws_db_subnet_group" "this" { name = "${local.name}-db" subnet_ids = aws_subnet.private[*].id + + tags = local.tags } resource "aws_rds_cluster_parameter_group" "this" { name = "${local.name}-cluster-pg" family = "aurora-postgresql${split(".", var.db_engine_version)[0]}" description = "LiteLLM Aurora Postgres cluster parameters." + + tags = local.tags } resource "aws_rds_cluster" "this" { @@ -52,6 +56,8 @@ resource "aws_rds_cluster" "this" { backup_retention_period = 7 preferred_backup_window = "07:00-09:00" + + tags = local.tags } resource "aws_rds_cluster_instance" "writer" { @@ -67,6 +73,8 @@ resource "aws_rds_cluster_instance" "writer" { # Promotion tier 0 — first in line during failover, so this instance stays # the writer unless it goes unhealthy. promotion_tier = 0 + + tags = local.tags } resource "aws_rds_cluster_instance" "reader" { @@ -82,4 +90,6 @@ resource "aws_rds_cluster_instance" "reader" { # Higher promotion tier — won't be picked as writer during a failover # unless the writer instance itself is gone. promotion_tier = 15 + + tags = local.tags } diff --git a/terraform/litellm/aws/redis.tf b/terraform/litellm/aws/redis.tf index 2a6fab2d89f..071cbc6d46f 100644 --- a/terraform/litellm/aws/redis.tf +++ b/terraform/litellm/aws/redis.tf @@ -1,6 +1,8 @@ resource "aws_elasticache_subnet_group" "this" { name = "${local.name}-redis" subnet_ids = aws_subnet.private[*].id + + tags = local.tags } # Replication group (not aws_elasticache_cluster, which is the @@ -30,4 +32,6 @@ resource "aws_elasticache_replication_group" "this" { transit_encryption_enabled = true apply_immediately = true + + tags = local.tags } diff --git a/terraform/litellm/aws/s3.tf b/terraform/litellm/aws/s3.tf index 375bc73bb71..218949ebd03 100644 --- a/terraform/litellm/aws/s3.tf +++ b/terraform/litellm/aws/s3.tf @@ -18,6 +18,8 @@ resource "aws_s3_bucket" "this" { # cached responses, archived request logs, and /v1/files storage stay put. # Flip to true only for ephemeral / CI stacks (`var.s3_force_destroy`). force_destroy = var.s3_force_destroy + + tags = local.tags } resource "aws_s3_bucket_versioning" "this" { @@ -72,6 +74,8 @@ data "aws_iam_policy_document" "s3_access" { resource "aws_iam_policy" "s3_access" { name = "${local.name}-s3-access" policy = data.aws_iam_policy_document.s3_access.json + + tags = local.tags } resource "aws_iam_role_policy_attachment" "task_s3_access" { diff --git a/terraform/litellm/aws/secrets.tf b/terraform/litellm/aws/secrets.tf index dd13fdc1239..300d38e4053 100644 --- a/terraform/litellm/aws/secrets.tf +++ b/terraform/litellm/aws/secrets.tf @@ -22,6 +22,8 @@ resource "aws_secretsmanager_secret" "master_key" { name = "${local.name}-master-key" description = "LITELLM_MASTER_KEY for gateway + backend." recovery_window_in_days = 0 + + tags = local.tags } resource "aws_secretsmanager_secret_version" "master_key" { @@ -40,6 +42,8 @@ resource "aws_secretsmanager_secret" "license" { name = "${local.name}-license" description = "LITELLM_LICENSE for gateway + backend." recovery_window_in_days = 0 + + tags = local.tags } resource "aws_secretsmanager_secret_version" "license" { @@ -59,6 +63,8 @@ resource "aws_secretsmanager_secret" "ui_password" { name = "${local.name}-ui-password" description = "UI_PASSWORD for the backend (UI admin login)." recovery_window_in_days = 0 + + tags = local.tags } resource "aws_secretsmanager_secret_version" "ui_password" { @@ -72,6 +78,8 @@ resource "aws_secretsmanager_secret" "db_master_password" { name = "${local.name}-db-master-password" description = "Aurora master-user password - bootstrap only. Runtime auth is IAM-token." recovery_window_in_days = 0 + + tags = local.tags } resource "aws_secretsmanager_secret_version" "db_master_password" { diff --git a/terraform/litellm/aws/variables.tf b/terraform/litellm/aws/variables.tf index 946cd7ebbf3..8bd505eb9a0 100644 --- a/terraform/litellm/aws/variables.tf +++ b/terraform/litellm/aws/variables.tf @@ -24,7 +24,7 @@ variable "env" { } variable "tags" { - description = "Additional tags merged into the provider default_tags." + description = "Per-deployment tags applied to every taggable resource the module creates, on top of the module's own `litellm:stack` / `managed-by` tags. Caller-level provider `default_tags` (if any) merge with these." type = map(string) default = {} } diff --git a/terraform/litellm/gcp/README.md b/terraform/litellm/gcp/README.md index 504cfa066e4..140741bcff6 100644 --- a/terraform/litellm/gcp/README.md +++ b/terraform/litellm/gcp/README.md @@ -173,10 +173,11 @@ pair differs: | `acme` | `prod` | `acme-litellm-prod-master-key` | | `globex` | `dev` | `globex-litellm-dev-license` | -For a per-tenant instance, the only inputs that change are the tenant -slug, env, and the two pre-issued secrets: +For a per-tenant instance via the example root, the only inputs that +change are the tenant slug, env, and the two pre-issued secrets: ```bash +cd terraform/litellm/gcp/examples/default export TF_VAR_litellm_master_key="sk-..." # the tenant's master key export TF_VAR_litellm_license="lic-..." # their LITELLM_LICENSE @@ -187,6 +188,10 @@ terraform apply \ -var "env=stage" ``` +To run *many* tenants from a single config, call the module with +`for_each` instead of one root per tenant — only possible because the +module declares no provider block (see "Using as a module"). + Both `litellm_master_key` and `litellm_license` are optional: - Omit `litellm_master_key` → the stack auto-generates a random `sk-…` value (trial/dev path). @@ -200,14 +205,22 @@ example files. ## Quick start ```bash -cd terraform/litellm/gcp +cd terraform/litellm/gcp/examples/default cp terraform.tfvars.example terraform.tfvars -# Edit: project, region, tenant, env, *_image, proxy_config, gateway_extra_secrets. +# Edit: project, region, tenant, env, image_registry, proxy_config, gateway_extra_secrets. terraform init terraform apply ``` +`examples/default/` is a thin root that configures the `google` / +`google-beta` providers and calls the module (`../../`). It exposes a +curated variable surface; for advanced knobs (per-component +CPU/memory/instances, Cloud SQL tier/edition, Memorystore tier, +per-component image pins) set them on the `module "litellm"` block in +`examples/default/main.tf`, or call the module from your own config — see +"Using as a module" below. + That single apply provisions everything, runs the prisma schema migration via the Cloud Run job (auto-triggered by `bootstrap.tf`), and only then starts the gateway/backend services. When it returns, the stack is serving traffic. @@ -251,6 +264,38 @@ Set `allow_plaintext_lb = true` and leave `lb_domains = []`. Without the flag, plan fails with a clear error pointing at the precondition. Intended for short-lived trial / dev stacks only. +## Using as a module + +The directory itself is a module with **no `provider` block** — the caller +owns provider config. You can call it directly with `for_each` (many +tenants from one config), `count`, `depends_on`, or providers configured +to impersonate a service account / target a different project: + +```hcl +provider "google" { + project = "my-gcp-project" + region = "us-central1" +} +provider "google-beta" { + project = "my-gcp-project" + region = "us-central1" +} + +module "litellm" { + source = "github.com/BerriAI/litellm//terraform/litellm/gcp?ref=" + + project = "my-gcp-project" + region = "us-central1" + tenant = "acme" + env = "prod" + # ...any of the inputs in variables.tf... +} +``` + +Both the default `google` and `google-beta` configs are inherited by the +module automatically through the call — declare both in the caller. +Resource labels are controlled by the module's `labels` input. + ## Storage and database retention Two opt-in tripwires guard against accidental data loss on @@ -281,8 +326,8 @@ or point them at your own CA. | File | What's in it | | ----------------- | -------------------------------------------------------------------- | -| `versions.tf` | Terraform + provider version constraints | -| `providers.tf` | Google + Google-Beta providers | +| `versions.tf` | Terraform + `required_providers` constraints (module declares no provider config) | +| `examples/default/` | Thin root: `google` / `google-beta` providers + a call to the module. The one-command deploy path. | | `variables.tf` | All input variables | | `locals.tf` | Path-prefix lists (mirror of `helm/.../ingress.yaml`) + proxy_config helpers | | `network.tf` | VPC, subnet, PSA range, Serverless VPC connector | diff --git a/terraform/litellm/gcp/cloudsql.tf b/terraform/litellm/gcp/cloudsql.tf index 70939c049c3..e3394fefc0f 100644 --- a/terraform/litellm/gcp/cloudsql.tf +++ b/terraform/litellm/gcp/cloudsql.tf @@ -45,6 +45,15 @@ resource "google_sql_database_instance" "writer" { } deletion_protection = var.cloudsql_deletion_protection + + lifecycle { + # disk_autoresize grows storage but never shrinks it. Without this, + # the first plan after any auto-grow reads disk_size as a shrink, which + # is an immutable change and forces a destroy/recreate of the instance + # (full data loss). Set the initial size only; let Cloud SQL own it + # thereafter. + ignore_changes = [settings[0].disk_size] + } } resource "google_sql_database_instance" "reader" { @@ -68,6 +77,12 @@ resource "google_sql_database_instance" "reader" { } deletion_protection = var.cloudsql_deletion_protection + + lifecycle { + # Same autoresize footgun as the writer — the replica grows its disk + # independently. Never let a perceived shrink replace the instance. + ignore_changes = [settings[0].disk_size] + } } resource "google_sql_database" "this" { diff --git a/terraform/litellm/gcp/examples/default/.terraform.lock.hcl b/terraform/litellm/gcp/examples/default/.terraform.lock.hcl new file mode 100644 index 00000000000..e6285567315 --- /dev/null +++ b/terraform/litellm/gcp/examples/default/.terraform.lock.hcl @@ -0,0 +1,63 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/google" { + version = "6.50.0" + constraints = "~> 6.10" + hashes = [ + "h1:79CwMTsp3Ud1nOl5hFS5mxQHyT0fGVye7pqpU0PPlHI=", + "zh:1f3513fcfcbf7ca53d667a168c5067a4dd91a4d4cccd19743e248ff31065503c", + "zh:3da7db8fc2c51a77dd958ea8baaa05c29cd7f829bd8941c26e2ea9cb3aadc1e5", + "zh:3e09ac3f6ca8111cbb659d38c251771829f4347ab159a12db195e211c76068bb", + "zh:7bb9e41c568df15ccf1a8946037355eefb4dfb4e35e3b190808bb7c4abae547d", + "zh:81e5d78bdec7778e6d67b5c3544777505db40a826b6eb5abe9b86d4ba396866b", + "zh:8d309d020fb321525883f5c4ea864df3d5942b6087f6656d6d8b3a1377f340fc", + "zh:93e112559655ab95a523193158f4a4ac0f2bfed7eeaa712010b85ebb551d5071", + "zh:d3efe589ffd625b300cef5917c4629513f77e3a7b111c9df65075f76a46a63c7", + "zh:d4a4d672bbef756a870d8f32b35925f8ce2ef4f6bbd5b71a3cb764f1b6c85421", + "zh:e13a86bca299ba8a118e80d5f84fbdd708fe600ecdceea1a13d4919c068379fe", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + "zh:fec30c095647b583a246c39d557704947195a1b7d41f81e369ba377d997faef6", + ] +} + +provider "registry.terraform.io/hashicorp/google-beta" { + version = "6.50.0" + constraints = "~> 6.10" + hashes = [ + "h1:P2GiUJM1frlPtBViwKn1A9V2dVBdGuWcX80w9TdH8ZE=", + "zh:18b442bd0a05321d39dda1e9e3f1bdede4e61bc2ac62cc7a67037a3864f75101", + "zh:2e387c51455862828bec923a3ec81abf63a4d998da470cf00e09003bda53d668", + "zh:3942e708fa84ebe54996086f4b1398cb747fe19cbcd0be07ace528291fb35dee", + "zh:496287dd48b34ae6197cb1f887abeafd07c33f389dbe431bb01e24846754cfdd", + "zh:6eca885419969ce5c2a706f34dce1f10bde9774757675f2d8a92d12e5a1be390", + "zh:710dbef826c3fe7f76f844dae47937e8e4c1279dd9205ec4610be04cf3327244", + "zh:777ebf44b24bfc7bdbf770dc089f1a72f143b4718fdedb8c6bd75983115a1ec2", + "zh:9c8703bba37b8c7ad857efc3513392c5a096c519397c1cb822d7612f38e4262f", + "zh:c4f1d3a73de2702277c99d5348ad6d374705bcfdd367ad964ff4cfd2cf06c281", + "zh:eca8df11af3f5a948492d5b8b5d01b4ec705aad10bc30ec1524205508ae28393", + "zh:f41e7fd5f2628e8fd6b8ea136366923858f54428d1729898925469b862c275c2", + "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", + ] +} + +provider "registry.terraform.io/hashicorp/random" { + version = "3.9.0" + constraints = "~> 3.6" + hashes = [ + "h1:OO+IuvQJSPmWdN8AyyIEvPJbLvDQpgX/zbktoa9KsJE=", + "zh:161ad0bd9a75768c82f53fb6e7172a9d8be2d4889b012645a34795031aaf1bf1", + "zh:19dc9a5b17729725ccfc4f45b0500af0ee5bc6b6b160c7adb8f2bf617d2c80ea", + "zh:269eda8fe42daa7974d5a34d166c3ba9defe80cde86c01e4dadcfdf2e1f05e5f", + "zh:373f7c65566f8f2cc7f45d698654feb9d988996957e1266a69ca00c52d6d16d0", + "zh:5599d16804c41c83009ec621b6d6b6f74e102f5827678a4750f8809055546b61", + "zh:583be0440469a22bff70dcfa56593b01566860b29607437264adb51060cf46fc", + "zh:5f211d8ec3f2e1f414870d9584bfe26e6995560ef81c748f8447a48164767398", + "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", + "zh:7b547fd16216761ef86efc3ed516ac5ac0c5c42b7c7eb24a08cef2d93f69ed5e", + "zh:7e7c0679daf2a382151d05068c8c3f0dae6b7b7dccf818827b73dd08638df2ef", + "zh:8089dec888a8038b9b4fb23b3df7e1057293dbc5b60b42cc47ff690d69d4b61b", + "zh:c51f15a031edfd6f23ce8ced3446ca7f8d8d647e2499890d7d5d10d5016d7257", + "zh:c94784f005708890dc6895afd53636ec00ec1e430b15d41e5aebfb1d4b39bd04", + ] +} diff --git a/terraform/litellm/gcp/examples/default/main.tf b/terraform/litellm/gcp/examples/default/main.tf new file mode 100644 index 00000000000..745b79383db --- /dev/null +++ b/terraform/litellm/gcp/examples/default/main.tf @@ -0,0 +1,45 @@ +# One-command deploy of the LiteLLM GCP stack. +# +# cd terraform/litellm/gcp/examples/default +# cp terraform.tfvars.example terraform.tfvars # edit it +# terraform init +# terraform apply +# +# This root just wires the providers (see providers.tf) to the module. The +# module itself (../../) declares no provider, so it can also be consumed +# from your own config with count/for_each or impersonated-SA providers: +# +# module "litellm" { +# source = "github.com/BerriAI/litellm//terraform/litellm/gcp?ref=" +# ... +# } +# +# Knobs not surfaced as variables here (per-component sizing/instances, +# Cloud SQL tier/edition, Memorystore tier, per-component image overrides) +# can be set directly on this block — see ../../variables.tf. +module "litellm" { + source = "../../" + + project = var.project + region = var.region + tenant = var.tenant + env = var.env + + litellm_master_key = var.litellm_master_key + litellm_license = var.litellm_license + ui_password = var.ui_password + + image_registry = var.image_registry + image_tag = var.image_tag + + lb_domains = var.lb_domains + allow_plaintext_lb = var.allow_plaintext_lb + cloudsql_deletion_protection = var.cloudsql_deletion_protection + gcs_force_destroy = var.gcs_force_destroy + + proxy_config = var.proxy_config + gateway_extra_env = var.gateway_extra_env + backend_extra_env = var.backend_extra_env + gateway_extra_secrets = var.gateway_extra_secrets + backend_extra_secrets = var.backend_extra_secrets +} diff --git a/terraform/litellm/gcp/examples/default/outputs.tf b/terraform/litellm/gcp/examples/default/outputs.tf new file mode 100644 index 00000000000..3a9343c4850 --- /dev/null +++ b/terraform/litellm/gcp/examples/default/outputs.tf @@ -0,0 +1,59 @@ +output "lb_ip" { + description = "Global anycast IP of the external load balancer." + value = module.litellm.lb_ip +} + +output "lb_url" { + description = "Proxy URL. Dashboard at /, API at /v1/*." + value = module.litellm.lb_url +} + +output "gateway_service_url" { + description = "Default Cloud Run URL for the gateway (bypasses the LB)." + value = module.litellm.gateway_service_url +} + +output "backend_service_url" { + description = "Default Cloud Run URL for the backend (bypasses the LB)." + value = module.litellm.backend_service_url +} + +output "ui_service_url" { + description = "Default Cloud Run URL for the UI (bypasses the LB)." + value = module.litellm.ui_service_url +} + +output "cloudsql_writer_ip" { + description = "Private IP of the Cloud SQL writer." + value = module.litellm.cloudsql_writer_ip +} + +output "cloudsql_reader_ip" { + description = "Private IP of the Cloud SQL read replica." + value = module.litellm.cloudsql_reader_ip +} + +output "redis_endpoint" { + description = "Memorystore Redis endpoint." + value = module.litellm.redis_endpoint +} + +output "gcs_bucket" { + description = "GCS bucket name." + value = module.litellm.gcs_bucket +} + +output "master_key_secret_id" { + description = "Secret Manager resource ID holding LITELLM_MASTER_KEY." + value = module.litellm.master_key_secret_id +} + +output "db_password_secret_id" { + description = "Secret Manager resource ID holding the Cloud SQL app-user password." + value = module.litellm.db_password_secret_id +} + +output "migration_run_command" { + description = "Break-glass command to re-run the one-off migration job." + value = module.litellm.migration_run_command +} diff --git a/terraform/litellm/gcp/examples/default/providers.tf b/terraform/litellm/gcp/examples/default/providers.tf new file mode 100644 index 00000000000..4b79367fe09 --- /dev/null +++ b/terraform/litellm/gcp/examples/default/providers.tf @@ -0,0 +1,17 @@ +# Providers are configured HERE, in the root, not in the module. A module +# that declares its own configured `provider` block can't be called with +# count/for_each/depends_on and gives the caller no way to set an +# impersonated service account, a different project, or aliases. +# +# The module's resources inherit these default (unaliased) `google` / +# `google-beta` configs automatically through the module call, so project +# and region set here flow into every resource that doesn't pass its own. +provider "google" { + project = var.project + region = var.region +} + +provider "google-beta" { + project = var.project + region = var.region +} diff --git a/terraform/litellm/gcp/terraform.tfvars.example b/terraform/litellm/gcp/examples/default/terraform.tfvars.example similarity index 81% rename from terraform/litellm/gcp/terraform.tfvars.example rename to terraform/litellm/gcp/examples/default/terraform.tfvars.example index 5c22a14c6d6..eff338ca240 100644 --- a/terraform/litellm/gcp/terraform.tfvars.example +++ b/terraform/litellm/gcp/examples/default/terraform.tfvars.example @@ -28,14 +28,14 @@ env = "stage" # cloudsql_deletion_protection = true # default: refuse destroy on the DB # gcs_force_destroy = false # default: refuse destroy on a non-empty bucket -# Component images. Defaults pin all four to the same GHCR release tag — -# bump them together when bumping LiteLLM. To use private images, mirror -# them into Artifact Registry first — Cloud Run only authenticates against -# AR / gcr.io. -# gateway_image = "us-central1-docker.pkg.dev/my-gcp-project/litellm/gateway:1.86.0-dev" -# backend_image = "us-central1-docker.pkg.dev/my-gcp-project/litellm/backend:1.86.0-dev" -# ui_image = "us-central1-docker.pkg.dev/my-gcp-project/litellm/ui:1.86.0-dev" -# migrations_image = "us-central1-docker.pkg.dev/my-gcp-project/litellm/migrations:1.86.0-dev" +# Images. Cloud Run rejects ghcr.io, so a real deploy must point +# image_registry at an Artifact Registry remote repo (see README "Image +# pulls"); image_tag is applied to all four litellm-* images. Per-component +# *_image overrides are NOT exposed here — set them directly on the +# `module "litellm"` block in main.tf (see ../../variables.tf) if you need +# to mix-and-match versions. +# image_registry = "us-central1-docker.pkg.dev/my-gcp-project/litellm/berriai" +# image_tag = "v1.86.0-dev" # ---------- proxy_config (mirrors helm gateway.config.proxy_config) ---------- # proxy_config = { diff --git a/terraform/litellm/gcp/examples/default/variables.tf b/terraform/litellm/gcp/examples/default/variables.tf new file mode 100644 index 00000000000..745a5e5d76b --- /dev/null +++ b/terraform/litellm/gcp/examples/default/variables.tf @@ -0,0 +1,120 @@ +# Curated surface for the one-command deploy path. The module (../../) +# exposes far more knobs (per-component CPU/memory/instances, Cloud SQL +# tier/edition, Memorystore tier, per-component image overrides, …). To +# tune those, set them directly on the `module "litellm"` block in +# main.tf, or call the module from your own root config. Full per-variable +# docs live in ../../variables.tf — the module is the source of truth. + +variable "project" { + description = "GCP project ID." + type = string +} + +variable "region" { + description = "GCP region for VPC, Cloud SQL, Memorystore, Cloud Run, and the LB IP." + type = string + default = "us-central1" +} + +variable "tenant" { + description = "Tenant slug — prefix for every resource (-litellm-)." + type = string +} + +variable "env" { + description = "Environment suffix (stage, prod, dev)." + type = string +} + +# Sensitive — prefer TF_VAR_litellm_master_key / TF_VAR_litellm_license / +# TF_VAR_ui_password so values stay out of any committed tfvars file. +variable "litellm_master_key" { + description = "Pre-existing LITELLM_MASTER_KEY (sk-…). Empty → auto-generated." + type = string + default = "" + sensitive = true +} + +variable "litellm_license" { + description = "LiteLLM enterprise license. Empty → OSS-only." + type = string + default = "" + sensitive = true +} + +variable "ui_password" { + description = "UI admin password. Empty → falls back to LITELLM_MASTER_KEY." + type = string + default = "" + sensitive = true +} + +# Image source. Cloud Run rejects ghcr.io, so a real deploy must point +# image_registry at an Artifact Registry remote repo (see README "Image +# pulls"). Per-component overrides live in ../../variables.tf. +variable "image_registry" { + description = "Registry path prefix; images composed as /litellm-:." + type = string + default = "ghcr.io/berriai" +} + +variable "image_tag" { + description = "Tag applied to all four litellm-* images. Bump in lockstep." + type = string + default = "v1.86.0-dev" +} + +# TLS — provide DNS names for a managed cert, or opt into HTTP-only for dev. +variable "lb_domains" { + description = "DNS names (already pointing at lb_ip) for a Google-managed cert. Empty → no TLS." + type = list(string) + default = [] +} + +variable "allow_plaintext_lb" { + description = "Opt into HTTP-only LB (trial/dev only)." + type = bool + default = false +} + +variable "cloudsql_deletion_protection" { + description = "Cloud SQL deletion protection (writer + reader)." + type = bool + default = true +} + +variable "gcs_force_destroy" { + description = "Allow destroy of a non-empty GCS bucket (ephemeral/CI only)." + type = bool + default = false +} + +variable "proxy_config" { + description = "LiteLLM proxy config (contents of config.yaml). Empty → defaults." + type = any + default = {} +} + +variable "gateway_extra_env" { + description = "Plain-text env vars layered onto the gateway." + type = map(string) + default = {} +} + +variable "backend_extra_env" { + description = "Plain-text env vars layered onto the backend." + type = map(string) + default = {} +} + +variable "gateway_extra_secrets" { + description = "Gateway env vars sourced from Secret Manager (name → secret resource ID)." + type = map(string) + default = {} +} + +variable "backend_extra_secrets" { + description = "Backend env vars sourced from Secret Manager (name → secret resource ID)." + type = map(string) + default = {} +} diff --git a/terraform/litellm/gcp/examples/default/versions.tf b/terraform/litellm/gcp/examples/default/versions.tf new file mode 100644 index 00000000000..a630c59afd0 --- /dev/null +++ b/terraform/litellm/gcp/examples/default/versions.tf @@ -0,0 +1,18 @@ +terraform { + required_version = ">= 1.6.0" + + required_providers { + google = { + source = "hashicorp/google" + version = "~> 6.10" + } + google-beta = { + source = "hashicorp/google-beta" + version = "~> 6.10" + } + random = { + source = "hashicorp/random" + version = "~> 3.6" + } + } +} diff --git a/terraform/litellm/gcp/load_balancer.tf b/terraform/litellm/gcp/load_balancer.tf index b0081786f13..3fce96eaf74 100644 --- a/terraform/litellm/gcp/load_balancer.tf +++ b/terraform/litellm/gcp/load_balancer.tf @@ -160,11 +160,22 @@ resource "google_compute_global_forwarding_rule" "http" { resource "google_compute_managed_ssl_certificate" "this" { count = local.tls_enabled ? 1 : 0 - name = "${local.name}-cert" + + # A managed cert's `domains` is immutable, so changing var.lb_domains + # forces replacement, and the cert is referenced by the HTTPS target + # proxy — a destroy-then-create replacement fails with + # `resourceInUseByAnotherResource`. Hashing the domains into the name + # makes the name change with the domain set, so create_before_destroy + # builds the new cert + repoints the proxy before deleting the old one. + name = "${local.name}-cert-${substr(sha1(join(",", var.lb_domains)), 0, 8)}" managed { domains = var.lb_domains } + + lifecycle { + create_before_destroy = true + } } resource "google_compute_target_https_proxy" "this" { diff --git a/terraform/litellm/gcp/providers.tf b/terraform/litellm/gcp/providers.tf deleted file mode 100644 index fd1584463f8..00000000000 --- a/terraform/litellm/gcp/providers.tf +++ /dev/null @@ -1,9 +0,0 @@ -provider "google" { - project = var.project - region = var.region -} - -provider "google-beta" { - project = var.project - region = var.region -} From e10fb258e58bb889eafc9b551a166e7c38ed53ce Mon Sep 17 00:00:00 2001 From: yassin-berriai Date: Mon, 1 Jun 2026 21:27:15 +0000 Subject: [PATCH 02/10] =?UTF-8?q?docs(terraform):=20address=20review=20?= =?UTF-8?q?=E2=80=94=20state-migration=20guide,=20tag=20dedupe,=20for=5Fea?= =?UTF-8?q?ch=20note?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add 'Migrating an existing deployment' section to AWS & GCP READMEs documenting the required terraform state mv step (resource addresses now gain a module.litellm. prefix under the examples/default root) - Remove redundant managed-by tag from the AWS example providers.tf; reserve default_tags there for org-wide tags only - Document the for_each single-provider limitation for GCP (no configuration_aliases) in the README and example main.tf Resolves LIT-3504 --- terraform/litellm/aws/README.md | 50 ++++++++++++++++- .../litellm/aws/examples/default/providers.tf | 16 ++++-- terraform/litellm/gcp/README.md | 56 +++++++++++++++++++ .../litellm/gcp/examples/default/main.tf | 6 ++ 4 files changed, 122 insertions(+), 6 deletions(-) diff --git a/terraform/litellm/aws/README.md b/terraform/litellm/aws/README.md index 80ee18c667d..4887de517d4 100644 --- a/terraform/litellm/aws/README.md +++ b/terraform/litellm/aws/README.md @@ -231,6 +231,54 @@ Tags: the module threads its own `litellm:stack` / `managed-by` / `var.tags` onto every taggable resource. Any `default_tags` on your provider merge on top — set org-wide tags there, per-deployment tags via the `tags` input. +## Migrating an existing deployment + +**Read this before re-applying if you first deployed from `terraform/litellm/aws/` +directly** (i.e. before this module-first refactor). + +The old layout ran terraform from the stack root, so every resource lived at a +root-level address (`aws_lb.this`, `aws_ecs_cluster.this`, …). The new entry +point — `examples/default/` — wraps the stack in a `module "litellm"` block, so +the same resources now live at `module.litellm.`. Terraform keys state by +address, so a plain `terraform plan` from the new root against your **existing +state** sees the old addresses as "gone" and the prefixed addresses as "new" — +it will propose a **full destroy-and-recreate of the entire stack** (database, +ALB, everything). Do not apply that plan. + +Migrate the state once so the addresses line up. From the directory holding +your existing state (`examples/default/` after you've copied your state file +there, or wherever you run terraform): + +```bash +# 1. List the current root-level addresses. +terraform state list + +# 2. Move each one under the module. Mechanically prefix every address +# with `module.litellm.`, e.g.: +terraform state mv 'aws_lb.this' 'module.litellm.aws_lb.this' +terraform state mv 'aws_ecs_cluster.this' 'module.litellm.aws_ecs_cluster.this' +# …repeat for every address from step 1, including indexed/for_each +# resources (keep the [key]/[index] suffix intact), e.g.: +terraform state mv 'aws_subnet.private[0]' 'module.litellm.aws_subnet.private[0]' + +# 3. Confirm the plan is now clean (no destroys/creates). +terraform plan +``` + +A scripted move over the whole list (run from the dir with the state): + +```bash +terraform state list | grep -v '^module\.litellm\.' | while read -r addr; do + terraform state mv "$addr" "module.litellm.$addr" +done +terraform plan # expect: No changes +``` + +`terraform state mv` only rewrites local state — it never touches live +infrastructure — and a clean `terraform plan` afterward confirms the addresses +line up before you apply. If you'd rather not migrate, you can keep calling the +module from your own root with the same addresses you already have. + ## Image pulls The defaults pull from `ghcr.io/berriai/litellm-:v1.86.0-dev`, @@ -291,7 +339,7 @@ losing the contents. | File | What's in it | | ----------------- | --------------------------------------------------------------------- | | `versions.tf` | Terraform + `required_providers` constraints (module declares no provider config) | -| `examples/default/` | Thin root: `aws` provider + `default_tags` + a call to the module. The one-command deploy path. | +| `examples/default/` | Thin root: `aws` provider (with an optional `default_tags` slot for org-wide tags) + a call to the module. The one-command deploy path. | | `variables.tf` | All input variables | | `locals.tf` | Path-prefix lists for ALB routing (mirror of `helm/.../ingress.yaml`) | | `network.tf` | VPC, subnets, IGW, NAT, route tables, security groups | diff --git a/terraform/litellm/aws/examples/default/providers.tf b/terraform/litellm/aws/examples/default/providers.tf index aaad4ae916d..92723a92769 100644 --- a/terraform/litellm/aws/examples/default/providers.tf +++ b/terraform/litellm/aws/examples/default/providers.tf @@ -10,9 +10,15 @@ provider "aws" { region = var.region - default_tags { - tags = { - "managed-by" = "terraform" - } - } + # Reserve `default_tags` for pure org-wide tags the module shouldn't know + # about (cost center, team, compliance scope, …). They propagate through the + # module call and merge with the module's own `litellm:stack` / `managed-by` + # / var.tags. The module already stamps `managed-by = "terraform"`, so don't + # duplicate it here — set per-deployment tags via the module's `tags` input. + # + # default_tags { + # tags = { + # "cost-center" = "platform" + # } + # } } diff --git a/terraform/litellm/gcp/README.md b/terraform/litellm/gcp/README.md index 140741bcff6..9740fbea7d9 100644 --- a/terraform/litellm/gcp/README.md +++ b/terraform/litellm/gcp/README.md @@ -296,6 +296,62 @@ Both the default `google` and `google-beta` configs are inherited by the module automatically through the call — declare both in the caller. Resource labels are controlled by the module's `labels` input. +**`for_each` shares one provider config.** The module's `versions.tf` declares +`google` / `google-beta` *without* `configuration_aliases`, so it only ever +receives the caller's single default (unaliased) `google` / `google-beta` +providers. That's deliberate — it keeps the one-command path simple — but it +means a `for_each` over the module runs every instance against the **same +project, region, and credentials**. Use `for_each` for many tenants in one +project (distinct `tenant`/`env`); it cannot fan out across projects or regions +on its own. To deploy into separate projects/regions, give each its own root +with its own provider config (one `examples/default`-style root per project), +or fork the module to add `configuration_aliases` and pass per-instance +`providers = { ... }`. + +## Migrating an existing deployment + +**Read this before re-applying if you first deployed from `terraform/litellm/gcp/` +directly** (i.e. before this module-first refactor). + +The old layout ran terraform from the stack root, so every resource lived at a +root-level address (`google_cloud_run_v2_service.gateway`, …). The new entry +point — `examples/default/` — wraps the stack in a `module "litellm"` block, so +the same resources now live at `module.litellm.`. Terraform keys state by +address, so a plain `terraform plan` from the new root against your **existing +state** sees the old addresses as "gone" and the prefixed addresses as "new" — +it will propose a **full destroy-and-recreate of the entire stack** (Cloud SQL, +load balancer, everything). Do not apply that plan. + +Migrate the state once so the addresses line up. From the directory holding +your existing state: + +```bash +# 1. List the current root-level addresses. +terraform state list + +# 2. Move each one under the module by prefixing it with `module.litellm.`, +# keeping any [key]/[index] suffix intact, e.g.: +terraform state mv 'google_cloud_run_v2_service.gateway' \ + 'module.litellm.google_cloud_run_v2_service.gateway' + +# 3. Confirm the plan is now clean (no destroys/creates). +terraform plan +``` + +A scripted move over the whole list (run from the dir with the state): + +```bash +terraform state list | grep -v '^module\.litellm\.' | while read -r addr; do + terraform state mv "$addr" "module.litellm.$addr" +done +terraform plan # expect: No changes +``` + +`terraform state mv` only rewrites local state — it never touches live +infrastructure — and a clean `terraform plan` afterward confirms the addresses +line up before you apply. If you'd rather not migrate, you can keep calling the +module from your own root with the same addresses you already have. + ## Storage and database retention Two opt-in tripwires guard against accidental data loss on diff --git a/terraform/litellm/gcp/examples/default/main.tf b/terraform/litellm/gcp/examples/default/main.tf index 745b79383db..a575dc4fc8a 100644 --- a/terraform/litellm/gcp/examples/default/main.tf +++ b/terraform/litellm/gcp/examples/default/main.tf @@ -14,6 +14,12 @@ # ... # } # +# Note: the module declares no `configuration_aliases`, so it receives only the +# caller's single default google/google-beta providers — a `for_each` over it +# runs every instance against the same project/region/credentials. To fan out +# across projects or regions, use one root per project. See the GCP README's +# "Using as a module" section. +# # Knobs not surfaced as variables here (per-component sizing/instances, # Cloud SQL tier/edition, Memorystore tier, per-component image overrides) # can be set directly on this block — see ../../variables.tf. From 2ead9076bf98581afa347348bb3481c352bd1137 Mon Sep 17 00:00:00 2001 From: yassin-berriai Date: Mon, 1 Jun 2026 21:42:39 +0000 Subject: [PATCH 03/10] docs(terraform/gcp): note expected SSL cert replacement in state-migration guide MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The managed SSL cert is named with a hash of lb_domains, so TLS-enabled stacks that migrated from the old un-hashed name will see one create_before_destroy cert replacement after terraform state mv — not a clean 'No changes'. Document that this single replacement is expected and safe. --- terraform/litellm/gcp/README.md | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/terraform/litellm/gcp/README.md b/terraform/litellm/gcp/README.md index 9740fbea7d9..e6ff6000ffa 100644 --- a/terraform/litellm/gcp/README.md +++ b/terraform/litellm/gcp/README.md @@ -344,12 +344,25 @@ A scripted move over the whole list (run from the dir with the state): terraform state list | grep -v '^module\.litellm\.' | while read -r addr; do terraform state mv "$addr" "module.litellm.$addr" done -terraform plan # expect: No changes +terraform plan # expect: No changes (but see the TLS note below) ``` +**TLS-enabled stacks expect one cert replacement.** The managed SSL +certificate is named with a hash of `lb_domains` +(`-cert-`) so a domain change rolls the cert safely. If you +deployed an earlier revision its cert is stored under the old un-hashed +name, so after the `state mv` above `terraform plan` will show **one** +`google_compute_managed_ssl_certificate` replacement (a create-then-destroy, +guarded by `create_before_destroy`) rather than "No changes". That single +replacement is expected and safe — the new cert is provisioned and attached +to the LB before the old one is removed. Everything else should report no +changes; if the plan shows anything beyond that one cert, an address didn't +line up — re-check step 1. + `terraform state mv` only rewrites local state — it never touches live -infrastructure — and a clean `terraform plan` afterward confirms the addresses -line up before you apply. If you'd rather not migrate, you can keep calling the +infrastructure — and a clean `terraform plan` afterward (modulo the cert +replacement above for TLS stacks) confirms the addresses line up before you +apply. If you'd rather not migrate, you can keep calling the module from your own root with the same addresses you already have. ## Storage and database retention From b1fa2c2f427245f0f7433ffc89b341f1c231cd3e Mon Sep 17 00:00:00 2001 From: yassin-berriai Date: Tue, 2 Jun 2026 06:32:53 +0000 Subject: [PATCH 04/10] docs(terraform): drop state-migration guides The AWS/GCP stacks have never been published, so there are no existing deployments to migrate from the old root-module layout. Remove the 'Migrating an existing deployment' sections from both READMEs. --- terraform/litellm/aws/README.md | 48 --------------------------- terraform/litellm/gcp/README.md | 57 --------------------------------- 2 files changed, 105 deletions(-) diff --git a/terraform/litellm/aws/README.md b/terraform/litellm/aws/README.md index 4887de517d4..17f6cc12749 100644 --- a/terraform/litellm/aws/README.md +++ b/terraform/litellm/aws/README.md @@ -231,54 +231,6 @@ Tags: the module threads its own `litellm:stack` / `managed-by` / `var.tags` onto every taggable resource. Any `default_tags` on your provider merge on top — set org-wide tags there, per-deployment tags via the `tags` input. -## Migrating an existing deployment - -**Read this before re-applying if you first deployed from `terraform/litellm/aws/` -directly** (i.e. before this module-first refactor). - -The old layout ran terraform from the stack root, so every resource lived at a -root-level address (`aws_lb.this`, `aws_ecs_cluster.this`, …). The new entry -point — `examples/default/` — wraps the stack in a `module "litellm"` block, so -the same resources now live at `module.litellm.`. Terraform keys state by -address, so a plain `terraform plan` from the new root against your **existing -state** sees the old addresses as "gone" and the prefixed addresses as "new" — -it will propose a **full destroy-and-recreate of the entire stack** (database, -ALB, everything). Do not apply that plan. - -Migrate the state once so the addresses line up. From the directory holding -your existing state (`examples/default/` after you've copied your state file -there, or wherever you run terraform): - -```bash -# 1. List the current root-level addresses. -terraform state list - -# 2. Move each one under the module. Mechanically prefix every address -# with `module.litellm.`, e.g.: -terraform state mv 'aws_lb.this' 'module.litellm.aws_lb.this' -terraform state mv 'aws_ecs_cluster.this' 'module.litellm.aws_ecs_cluster.this' -# …repeat for every address from step 1, including indexed/for_each -# resources (keep the [key]/[index] suffix intact), e.g.: -terraform state mv 'aws_subnet.private[0]' 'module.litellm.aws_subnet.private[0]' - -# 3. Confirm the plan is now clean (no destroys/creates). -terraform plan -``` - -A scripted move over the whole list (run from the dir with the state): - -```bash -terraform state list | grep -v '^module\.litellm\.' | while read -r addr; do - terraform state mv "$addr" "module.litellm.$addr" -done -terraform plan # expect: No changes -``` - -`terraform state mv` only rewrites local state — it never touches live -infrastructure — and a clean `terraform plan` afterward confirms the addresses -line up before you apply. If you'd rather not migrate, you can keep calling the -module from your own root with the same addresses you already have. - ## Image pulls The defaults pull from `ghcr.io/berriai/litellm-:v1.86.0-dev`, diff --git a/terraform/litellm/gcp/README.md b/terraform/litellm/gcp/README.md index e6ff6000ffa..8b3579b572d 100644 --- a/terraform/litellm/gcp/README.md +++ b/terraform/litellm/gcp/README.md @@ -308,63 +308,6 @@ with its own provider config (one `examples/default`-style root per project), or fork the module to add `configuration_aliases` and pass per-instance `providers = { ... }`. -## Migrating an existing deployment - -**Read this before re-applying if you first deployed from `terraform/litellm/gcp/` -directly** (i.e. before this module-first refactor). - -The old layout ran terraform from the stack root, so every resource lived at a -root-level address (`google_cloud_run_v2_service.gateway`, …). The new entry -point — `examples/default/` — wraps the stack in a `module "litellm"` block, so -the same resources now live at `module.litellm.`. Terraform keys state by -address, so a plain `terraform plan` from the new root against your **existing -state** sees the old addresses as "gone" and the prefixed addresses as "new" — -it will propose a **full destroy-and-recreate of the entire stack** (Cloud SQL, -load balancer, everything). Do not apply that plan. - -Migrate the state once so the addresses line up. From the directory holding -your existing state: - -```bash -# 1. List the current root-level addresses. -terraform state list - -# 2. Move each one under the module by prefixing it with `module.litellm.`, -# keeping any [key]/[index] suffix intact, e.g.: -terraform state mv 'google_cloud_run_v2_service.gateway' \ - 'module.litellm.google_cloud_run_v2_service.gateway' - -# 3. Confirm the plan is now clean (no destroys/creates). -terraform plan -``` - -A scripted move over the whole list (run from the dir with the state): - -```bash -terraform state list | grep -v '^module\.litellm\.' | while read -r addr; do - terraform state mv "$addr" "module.litellm.$addr" -done -terraform plan # expect: No changes (but see the TLS note below) -``` - -**TLS-enabled stacks expect one cert replacement.** The managed SSL -certificate is named with a hash of `lb_domains` -(`-cert-`) so a domain change rolls the cert safely. If you -deployed an earlier revision its cert is stored under the old un-hashed -name, so after the `state mv` above `terraform plan` will show **one** -`google_compute_managed_ssl_certificate` replacement (a create-then-destroy, -guarded by `create_before_destroy`) rather than "No changes". That single -replacement is expected and safe — the new cert is provisioned and attached -to the LB before the old one is removed. Everything else should report no -changes; if the plan shows anything beyond that one cert, an address didn't -line up — re-check step 1. - -`terraform state mv` only rewrites local state — it never touches live -infrastructure — and a clean `terraform plan` afterward (modulo the cert -replacement above for TLS stacks) confirms the addresses line up before you -apply. If you'd rather not migrate, you can keep calling the -module from your own root with the same addresses you already have. - ## Storage and database retention Two opt-in tripwires guard against accidental data loss on From 70a9dc8538438dc991532c2ec2cf311d610dce57 Mon Sep 17 00:00:00 2001 From: Yassin Kortam Date: Sat, 6 Jun 2026 10:18:08 -0700 Subject: [PATCH 05/10] docs(terraform): call out image-registry override required for GCP 1-click The GCP stack's default image_registry points at ghcr.io, which Cloud Run won't authenticate against, so any real deploy (HCP Terraform no-code or otherwise) must override it. Document that as a hard requirement on the GCP README rather than a side note, and add a top-level HCP Terraform 1-click section enumerating the required inputs per stack and the migration-task caveat for HCP-hosted runners. --- terraform/litellm/README.md | 41 +++++++++++++++++++++++++++++++++ terraform/litellm/gcp/README.md | 12 ++++++---- 2 files changed, 49 insertions(+), 4 deletions(-) diff --git a/terraform/litellm/README.md b/terraform/litellm/README.md index f1fa455f65e..36315a99ff3 100644 --- a/terraform/litellm/README.md +++ b/terraform/litellm/README.md @@ -173,3 +173,44 @@ gateway/backend services start serving traffic. - Observability beyond the cloud provider's defaults (CloudWatch logs on AWS, Cloud Logging on GCP). Wire your own Prometheus / Datadog / Langfuse via the `*_extra_env` variables. + +## HCP Terraform no-code (1-click) deploy + +Both stacks are publishable as no-code modules in HCP Terraform's private +registry. The end-user flow is: open the no-code launch URL, fill in a +few inputs, hit *Create workspace*, and HCP runs plan/apply against your +cloud account using a variable-set of credentials (static keys or +dynamic-credentials OIDC). + +Required overrides the launcher must supply per stack: + +- **AWS** (`terraform/litellm/aws`): `region`, `azs`, `tenant`, `env`. + The image vars (`gateway_image`, `backend_image`, `ui_image`, + `migrations_image`) can be left at their defaults — the GHCR images + are anonymous-readable and ECS Fargate pulls them without extra + credentials. + +- **GCP** (`terraform/litellm/gcp`): `project`, `tenant`, `env`, **and + one of**: + - `image_registry` pointed at an Artifact Registry **remote** repository + backed by `https://ghcr.io` (e.g. + `us-central1-docker.pkg.dev//litellm/berriai`), so Cloud Run + pulls the four upstream `litellm-*` images through it; or + - all four per-component `*_image` URIs pointing at images mirrored + into a regular Artifact Registry repo. + + The defaults (`ghcr.io/berriai`) cause Cloud Run admission to reject + the service spec — Cloud Run only authenticates against Artifact + Registry, `[region.]gcr.io`, or `docker.io`. See + `terraform/litellm/gcp/README.md#image-pulls` for the + `gcloud artifacts repositories create … --mode=remote-repository` + command that sets up the passthrough repo (one-time, per project). + +What still requires a manual step regardless of HCP no-code: + +- The one-off migration task. The stacks auto-run it via `local-exec` + during `terraform apply`, but that requires the `aws` / `gcloud` CLI + on the runner. HCP-hosted runners don't have them; use an HCP agent + pool with a custom image that includes the relevant CLI, or run the + command printed in the `migration_run_command` output by hand after + the first apply. diff --git a/terraform/litellm/gcp/README.md b/terraform/litellm/gcp/README.md index 8b3579b572d..26702e231f0 100644 --- a/terraform/litellm/gcp/README.md +++ b/terraform/litellm/gcp/README.md @@ -25,10 +25,14 @@ and `litellm-migrations` (slim image used only by the one-off Cloud Run Job — runs `prisma migrate deploy` against the writer DB and exits). Bump them together when bumping LiteLLM. -Cloud Run only accepts images from Artifact Registry, `[region.]gcr.io`, -or `docker.io` — `ghcr.io` URIs are rejected at apply time. The four -images are published to GHCR upstream, so any real deploy needs an -Artifact Registry remote repository pointed at GHCR. +**Required override.** The `image_registry` default (`ghcr.io/berriai`) +does **not** work as-is — Cloud Run only accepts images from Artifact +Registry, `[region.]gcr.io`, or `docker.io`, and rejects `ghcr.io` URIs +at apply time. Every deploy (including HCP Terraform 1-click) must +supply either `image_registry` pointed at an Artifact Registry remote +repo backed by GHCR, or full per-component `*_image` URIs against +images you've already mirrored. The default is present only so +`terraform plan` succeeds during local iteration. **One-time setup (per project):** create a remote repo and let Cloud Run pull through it. From ab2c928efb6dfdca87150936f1f781ee9fe6c48d Mon Sep 17 00:00:00 2001 From: Yassin Kortam Date: Sat, 6 Jun 2026 10:21:38 -0700 Subject: [PATCH 06/10] feat(terraform/aws): mount proxy_config from S3 and wire OpenTelemetry v2 proxy_config Drop the inline LITELLM_PROXY_CONFIG_B64 env var. Upload the YAML to S3 at config/litellm-config.yaml; gateway and backend container entrypoints download it to /tmp/litellm-config.yaml via boto3 before exec'ing uvicorn. The S3 object etag is wired into the task definition so a config edit produces a new task-def revision and a rolling redeploy. The existing s3_access policy already grants the task role s3:GetObject on this bucket, so no IAM changes were needed for the mount itself. OpenTelemetry v2 New variables otel_endpoint, otel_exporter, otel_service_name, and otel_headers_secret_arn. Setting otel_endpoint to a non-empty value adds LITELLM_OTEL_V2=true plus OTEL_EXPORTER / OTEL_ENDPOINT / OTEL_SERVICE_NAME / OTEL_ENVIRONMENT_NAME to the shared env block; an optional Secrets Manager ARN backs OTEL_HEADERS for collectors that need an auth header. Execution role auto-gains GetSecretValue on that ARN. Empty endpoint = nothing added, so existing deployments are unchanged. --- terraform/litellm/aws/README.md | 38 +++++++- terraform/litellm/aws/ecs.tf | 90 ++++++++++++------- .../examples/default/terraform.tfvars.example | 9 ++ terraform/litellm/aws/iam.tf | 1 + terraform/litellm/aws/s3.tf | 19 ++++ terraform/litellm/aws/variables.tf | 65 +++++++++++++- 6 files changed, 183 insertions(+), 39 deletions(-) diff --git a/terraform/litellm/aws/README.md b/terraform/litellm/aws/README.md index 17f6cc12749..cae128175f6 100644 --- a/terraform/litellm/aws/README.md +++ b/terraform/litellm/aws/README.md @@ -44,9 +44,12 @@ needs the `aws` CLI installed and authenticated. ### `proxy_config` (preferred) Mirrors the helm chart's `gateway.config.proxy_config`. The map is YAML-encoded -and base64-passed to gateway, backend, and the migration task; each container -decodes it to `/tmp/litellm-config.yaml` at startup and sets `CONFIG_FILE_PATH` -to match. +and uploaded to S3 (`config/litellm-config.yaml` in the stack's bucket); the +gateway and backend container entrypoints download it to +`/tmp/litellm-config.yaml` at task start via boto3 and set `CONFIG_FILE_PATH` +to match. The S3 object's etag is wired into the task definition, so editing +`proxy_config` produces a new task-def revision and a rolling redeploy of both +services. ```hcl proxy_config = { @@ -119,6 +122,35 @@ aws secretsmanager create-secret \ --secret-string "sk-proj-..." ``` +### Observability (OpenTelemetry v2) + +Set `otel_endpoint` and OTel v2 +(https://docs.litellm.ai/docs/observability/opentelemetry_v2) turns on for +both gateway and backend; the stack flips `LITELLM_OTEL_V2=true` and wires +`OTEL_EXPORTER` / `OTEL_ENDPOINT` / `OTEL_SERVICE_NAME` / +`OTEL_ENVIRONMENT_NAME` into the shared env block. Leave it empty and no OTel +env vars are added. + +```hcl +otel_endpoint = "http://otel-collector.internal:4318" +otel_exporter = "otlp_http" # otlp_grpc, console +otel_service_name = "" # defaults to the stack name +``` + +For collectors that require an auth header, store the comma-separated +`key=value` string in Secrets Manager and reference it via +`otel_headers_secret_arn`. The execution role auto-gains +`secretsmanager:GetSecretValue` on that ARN. + +```hcl +otel_headers_secret_arn = "arn:aws:secretsmanager:us-west-2:111122223333:secret:honeycomb-otel-headers-AbCdEf" +``` + +Vendor presets (Arize, Phoenix, Langfuse OTel, Weave, Langtrace, Levo, +AgentOps) live under `proxy_config.litellm_settings.callbacks` and are +orthogonal to the OTLP variables above; their credentials still go in +`*_extra_secrets`. + ## Tenant deployment Every resource the stack creates is named `${tenant}-litellm-${env}` (or diff --git a/terraform/litellm/aws/ecs.tf b/terraform/litellm/aws/ecs.tf index aee8f0cfc73..c4ec767f7de 100644 --- a/terraform/litellm/aws/ecs.tf +++ b/terraform/litellm/aws/ecs.tf @@ -44,29 +44,48 @@ resource "aws_cloudwatch_log_group" "migrations" { # HOST/PORT/USER/NAME plus an IAM-signed token, so no DB password is needed # in the task definition. locals { - shared_env = [ - { name = "IAM_TOKEN_DB_AUTH", value = "true" }, - { name = "DATABASE_HOST", value = aws_rds_cluster.this.endpoint }, - { name = "DATABASE_PORT", value = tostring(aws_rds_cluster.this.port) }, - { name = "DATABASE_USER", value = var.db_username }, - { name = "DATABASE_NAME", value = var.db_name }, - { name = "DATABASE_HOST_READ_REPLICA", value = aws_rds_cluster.this.reader_endpoint }, - { name = "DATABASE_PORT_READ_REPLICA", value = tostring(aws_rds_cluster.this.port) }, - { name = "REDIS_HOST", value = aws_elasticache_replication_group.this.primary_endpoint_address }, - { name = "REDIS_PORT", value = tostring(aws_elasticache_replication_group.this.port) }, - # transit_encryption_enabled = true on the replication group means the - # proxy must connect via rediss://. _redis.get_redis_url_from_environment - # honors REDIS_SSL to flip the scheme. - { name = "REDIS_SSL", value = "true" }, - # S3 bucket — referenced from proxy_config via os.environ/S3_BUCKET_NAME - # (e.g. cache backend, request log archival, /files passthrough). - { name = "S3_BUCKET_NAME", value = aws_s3_bucket.this.bucket }, - { name = "S3_REGION_NAME", value = var.region }, - # boto3 inside generate_iam_auth_token reads AWS_REGION_NAME first, then - # AWS_REGION. Set both for compatibility. - { name = "AWS_REGION", value = var.region }, - { name = "AWS_REGION_NAME", value = var.region }, - ] + # OTel v2 is wired on by default; the proxy stays inert until + # otel_endpoint is set (no exporter is configured). When an endpoint is + # supplied, OTEL_HEADERS is sourced from Secrets Manager (otel_headers env + # injection lives under shared_secrets). + otel_enabled = var.otel_endpoint != "" + otel_env = local.otel_enabled ? [ + { name = "LITELLM_OTEL_V2", value = "true" }, + { name = "OTEL_EXPORTER", value = var.otel_exporter }, + { name = "OTEL_ENDPOINT", value = var.otel_endpoint }, + { name = "OTEL_SERVICE_NAME", value = var.otel_service_name != "" ? var.otel_service_name : local.name }, + { name = "OTEL_ENVIRONMENT_NAME", value = var.env }, + ] : [] + otel_secrets = local.otel_enabled && var.otel_headers_secret_arn != "" ? [ + { name = "OTEL_HEADERS", valueFrom = var.otel_headers_secret_arn }, + ] : [] + + shared_env = concat( + [ + { name = "IAM_TOKEN_DB_AUTH", value = "true" }, + { name = "DATABASE_HOST", value = aws_rds_cluster.this.endpoint }, + { name = "DATABASE_PORT", value = tostring(aws_rds_cluster.this.port) }, + { name = "DATABASE_USER", value = var.db_username }, + { name = "DATABASE_NAME", value = var.db_name }, + { name = "DATABASE_HOST_READ_REPLICA", value = aws_rds_cluster.this.reader_endpoint }, + { name = "DATABASE_PORT_READ_REPLICA", value = tostring(aws_rds_cluster.this.port) }, + { name = "REDIS_HOST", value = aws_elasticache_replication_group.this.primary_endpoint_address }, + { name = "REDIS_PORT", value = tostring(aws_elasticache_replication_group.this.port) }, + # transit_encryption_enabled = true on the replication group means the + # proxy must connect via rediss://. _redis.get_redis_url_from_environment + # honors REDIS_SSL to flip the scheme. + { name = "REDIS_SSL", value = "true" }, + # S3 bucket — referenced from proxy_config via os.environ/S3_BUCKET_NAME + # (e.g. cache backend, request log archival, /files passthrough). + { name = "S3_BUCKET_NAME", value = aws_s3_bucket.this.bucket }, + { name = "S3_REGION_NAME", value = var.region }, + # boto3 inside generate_iam_auth_token reads AWS_REGION_NAME first, then + # AWS_REGION. Set both for compatibility. + { name = "AWS_REGION", value = var.region }, + { name = "AWS_REGION_NAME", value = var.region }, + ], + local.otel_env, + ) shared_secrets = concat( [ @@ -75,6 +94,7 @@ locals { var.litellm_license == "" ? [] : [ { name = "LITELLM_LICENSE", valueFrom = aws_secretsmanager_secret.license[0].arn }, ], + local.otel_secrets, ) # Backend-only managed secrets. UI_PASSWORD is consumed by the management @@ -101,20 +121,26 @@ locals { ] # Mirrors the helm chart's gateway.config.create / configmap pattern. - # ECS Fargate has no ConfigMap analogue, so we pass the YAML as a - # base64-encoded env var and decode it at container start via a tiny - # python shim that prepends the image's normal uvicorn entrypoint. + # ECS Fargate has no ConfigMap analogue, so the YAML is uploaded to S3 + # (see aws_s3_object.proxy_config in s3.tf) and the container entrypoint + # downloads it to /tmp/litellm-config.yaml via boto3 before exec'ing + # uvicorn. The S3 object's etag is embedded in the task definition so a + # config edit forces a new task-def revision and a rolling redeploy. proxy_config_enabled = length(keys(var.proxy_config)) > 0 - proxy_config_b64 = local.proxy_config_enabled ? base64encode(yamlencode(var.proxy_config)) : "" + proxy_config_path = "/tmp/litellm-config.yaml" proxy_config_env = local.proxy_config_enabled ? [ - { name = "LITELLM_PROXY_CONFIG_B64", value = local.proxy_config_b64 }, - { name = "CONFIG_FILE_PATH", value = "/tmp/litellm-config.yaml" }, + { name = "CONFIG_FILE_PATH", value = local.proxy_config_path }, + { name = "LITELLM_PROXY_CONFIG_S3_BUCKET", value = aws_s3_bucket.this.bucket }, + { name = "LITELLM_PROXY_CONFIG_S3_KEY", value = aws_s3_object.proxy_config[0].key }, + { name = "LITELLM_PROXY_CONFIG_S3_ETAG", value = aws_s3_object.proxy_config[0].etag }, ] : [] + proxy_config_fetch_cmd = "python -c \"import os, boto3; boto3.client('s3', region_name=os.environ['AWS_REGION']).download_file(os.environ['LITELLM_PROXY_CONFIG_S3_BUCKET'], os.environ['LITELLM_PROXY_CONFIG_S3_KEY'], os.environ['CONFIG_FILE_PATH'])\"" + # Gateway always needs --workers wired in (no NUM_WORKERS env var support # in the image entrypoint). When proxy_config is enabled we also have to - # decode the base64 config first, so the command goes through `sh -c`; + # pull the config from S3 first, so the command goes through `sh -c`; # otherwise we keep the image's ENTRYPOINT and only override `command`. gateway_uvicorn_args = "--host 0.0.0.0 --port 4000 --workers ${var.gateway_num_workers}" backend_uvicorn_args = "--host 0.0.0.0 --port 4001" @@ -122,7 +148,7 @@ locals { gateway_proxy_overrides = local.proxy_config_enabled ? { entryPoint = ["sh", "-c"] command = [ - "python -c \"import os, base64, pathlib; pathlib.Path(os.environ['CONFIG_FILE_PATH']).write_bytes(base64.b64decode(os.environ['LITELLM_PROXY_CONFIG_B64']))\" && exec uvicorn gateway.main:app ${local.gateway_uvicorn_args}" + "${local.proxy_config_fetch_cmd} && exec uvicorn gateway.main:app ${local.gateway_uvicorn_args}" ] } : { # Mirror the image's ENTRYPOINT so we can append --workers via command. @@ -133,7 +159,7 @@ locals { backend_proxy_overrides = local.proxy_config_enabled ? { entryPoint = ["sh", "-c"] command = [ - "python -c \"import os, base64, pathlib; pathlib.Path(os.environ['CONFIG_FILE_PATH']).write_bytes(base64.b64decode(os.environ['LITELLM_PROXY_CONFIG_B64']))\" && exec uvicorn backend.main:app ${local.backend_uvicorn_args}" + "${local.proxy_config_fetch_cmd} && exec uvicorn backend.main:app ${local.backend_uvicorn_args}" ] } : {} } diff --git a/terraform/litellm/aws/examples/default/terraform.tfvars.example b/terraform/litellm/aws/examples/default/terraform.tfvars.example index 88d12cf26e2..19eee59a0e1 100644 --- a/terraform/litellm/aws/examples/default/terraform.tfvars.example +++ b/terraform/litellm/aws/examples/default/terraform.tfvars.example @@ -79,3 +79,12 @@ env = "stage" # OPENAI_API_KEY = "arn:aws:secretsmanager:us-west-2:111122223333:secret:openai-api-key-AbCdEf" # ANTHROPIC_API_KEY = "arn:aws:secretsmanager:us-west-2:111122223333:secret:anthropic-api-key-GhIjKl" # } + +# ---------- OpenTelemetry v2 ---------- +# Set otel_endpoint to a non-empty value to enable OTel v2 on gateway and +# backend (LITELLM_OTEL_V2=true + OTEL_EXPORTER/OTEL_ENDPOINT/OTEL_SERVICE_NAME/ +# OTEL_ENVIRONMENT_NAME). Empty disables it entirely. +# otel_endpoint = "http://otel-collector.internal:4318" +# otel_exporter = "otlp_http" # otlp_grpc, console +# otel_service_name = "" # defaults to "-litellm-" +# otel_headers_secret_arn = "arn:aws:secretsmanager:us-west-2:111122223333:secret:honeycomb-otel-headers-AbCdEf" diff --git a/terraform/litellm/aws/iam.tf b/terraform/litellm/aws/iam.tf index f425f1a00e7..64e1b1ad5f9 100644 --- a/terraform/litellm/aws/iam.tf +++ b/terraform/litellm/aws/iam.tf @@ -54,6 +54,7 @@ data "aws_iam_policy_document" "secrets_access" { aws_secretsmanager_secret.license[*].arn, aws_secretsmanager_secret.ui_password[*].arn, local.extra_secret_arns, + var.otel_headers_secret_arn == "" ? [] : [var.otel_headers_secret_arn], ) } } diff --git a/terraform/litellm/aws/s3.tf b/terraform/litellm/aws/s3.tf index 218949ebd03..a666a790c0c 100644 --- a/terraform/litellm/aws/s3.tf +++ b/terraform/litellm/aws/s3.tf @@ -82,3 +82,22 @@ resource "aws_iam_role_policy_attachment" "task_s3_access" { role = aws_iam_role.task.name policy_arn = aws_iam_policy.s3_access.arn } + +# proxy_config is uploaded as an S3 object so the gateway and backend +# containers can fetch it at startup instead of carrying the YAML inline +# as a base64 env var. ECS Fargate has no native S3 volume type, so +# "mount" here is: container entrypoint runs a boto3 download_file into +# /tmp/litellm-config.yaml before exec'ing uvicorn. The task role already +# has s3:GetObject on this bucket via aws_iam_policy.s3_access. +# +# etag flows into the task definition (see locals.proxy_config_env in +# ecs.tf) so a config edit produces a new task-def revision and ECS rolls +# both services automatically. +resource "aws_s3_object" "proxy_config" { + count = length(keys(var.proxy_config)) > 0 ? 1 : 0 + + bucket = aws_s3_bucket.this.id + key = "config/litellm-config.yaml" + content = yamlencode(var.proxy_config) + content_type = "application/yaml" +} diff --git a/terraform/litellm/aws/variables.tf b/terraform/litellm/aws/variables.tf index 8bd505eb9a0..8a2a487b642 100644 --- a/terraform/litellm/aws/variables.tf +++ b/terraform/litellm/aws/variables.tf @@ -420,10 +420,12 @@ variable "backend_extra_secrets" { variable "proxy_config" { description = <<-EOT LiteLLM proxy config (the contents of config.yaml). Mirrors the helm - chart's `gateway.config.proxy_config` value. Passed to gateway, backend, - and the migration task as a base64-encoded env var and decoded to - /tmp/litellm-config.yaml at container start; CONFIG_FILE_PATH is set - automatically. + chart's `gateway.config.proxy_config` value. Uploaded to S3 under + `config/litellm-config.yaml` in the stack's bucket; gateway and backend + container entrypoints download it to /tmp/litellm-config.yaml at task + start (CONFIG_FILE_PATH is set automatically). The S3 object's etag is + wired into the task definition, so editing this value produces a new + task-def revision and a rolling redeploy. Example: proxy_config = { @@ -456,3 +458,58 @@ variable "log_retention_days" { type = number default = 30 } + +# ---------- OpenTelemetry v2 ---------- +# +# https://docs.litellm.ai/docs/observability/opentelemetry_v2 +# +# Setting otel_endpoint to a non-empty value turns OTel v2 on for both gateway +# and backend (LITELLM_OTEL_V2=true plus OTEL_EXPORTER/OTEL_ENDPOINT/ +# OTEL_SERVICE_NAME/OTEL_ENVIRONMENT_NAME are added to shared_env). Empty +# endpoint = nothing added to the container env. + +variable "otel_endpoint" { + description = <<-EOT + OTLP collector endpoint (sets OTEL_ENDPOINT / OTEL_EXPORTER_OTLP_ENDPOINT). + Empty disables OTel export. Point at any OTLP-compatible backend + (self-hosted collector, Grafana Tempo, Honeycomb, Datadog, etc.). Example: + "http://otel-collector.internal:4318" for OTLP/HTTP. + EOT + type = string + default = "" +} + +variable "otel_exporter" { + description = <<-EOT + OTLP exporter protocol. One of "otlp_http", "otlp_grpc", or "console" + (stdout, useful for verifying instrumentation against CloudWatch logs). + Ignored when otel_endpoint is empty. + EOT + type = string + default = "otlp_http" + + validation { + condition = contains(["otlp_http", "otlp_grpc", "console"], var.otel_exporter) + error_message = "otel_exporter must be one of: otlp_http, otlp_grpc, console." + } +} + +variable "otel_service_name" { + description = <<-EOT + OTEL_SERVICE_NAME resource attribute. Defaults to the stack name + (`-litellm-`). + EOT + type = string + default = "" +} + +variable "otel_headers_secret_arn" { + description = <<-EOT + Secrets Manager ARN whose plaintext value becomes OTEL_HEADERS + (comma-separated `key=value` pairs, typically used to pass an API key + header to a managed collector). The execution role auto-gains + secretsmanager:GetSecretValue on this ARN. Empty omits OTEL_HEADERS. + EOT + type = string + default = "" +} From 91996ef43b4c5876c9ae177961cff747071751c5 Mon Sep 17 00:00:00 2001 From: Yassin Kortam Date: Sat, 6 Jun 2026 10:23:41 -0700 Subject: [PATCH 07/10] feat(terraform/gcp): add DeployStack one-click installer Wires up a Cloud Shell "Open in Cloud Shell" badge backed by the GoogleCloudPlatform DeployStack flow so examples/default can be installed from a click in the README without a local terraform setup. - examples/default/deploystack.json drives project/region collection plus prompts for tenant, env, image_tag, and allow_plaintext_lb. Complex inputs (proxy_config, *_extra_secrets, lb_domains) and sensitive vars (litellm_master_key, litellm_license, ui_password) stay tfvars / env only so they never land in a committed file. - examples/default/TUTORIAL.md is a Cloud Shell walkthrough that enables required APIs, creates the GHCR-passthrough Artifact Registry repo, optionally exports the TF_VAR_* secrets, runs `deploystack install`, and shows how to fetch the master key plus migrate from plaintext LB to TLS. - Renames var.project to var.project_id across the module and the examples/default wrapper to match the variable DeployStack injects from `collect_project: true`. Breaking rename for anyone with a `project = ...` line in terraform.tfvars; the fix is one line. --- terraform/litellm/gcp/README.md | 6 +- terraform/litellm/gcp/bootstrap.tf | 2 +- terraform/litellm/gcp/cloudrun.tf | 6 +- .../litellm/gcp/examples/default/TUTORIAL.md | 134 ++++++++++++++++++ .../gcp/examples/default/deploystack.json | 37 +++++ .../litellm/gcp/examples/default/main.tf | 8 +- .../litellm/gcp/examples/default/providers.tf | 4 +- .../examples/default/terraform.tfvars.example | 4 +- .../litellm/gcp/examples/default/variables.tf | 2 +- terraform/litellm/gcp/gcs.tf | 2 +- terraform/litellm/gcp/iam.tf | 2 +- terraform/litellm/gcp/outputs.tf | 2 +- terraform/litellm/gcp/variables.tf | 2 +- 13 files changed, 193 insertions(+), 18 deletions(-) create mode 100644 terraform/litellm/gcp/examples/default/TUTORIAL.md create mode 100644 terraform/litellm/gcp/examples/default/deploystack.json diff --git a/terraform/litellm/gcp/README.md b/terraform/litellm/gcp/README.md index 26702e231f0..9ab1723a8b5 100644 --- a/terraform/litellm/gcp/README.md +++ b/terraform/litellm/gcp/README.md @@ -1,5 +1,9 @@ # LiteLLM on GCP (Cloud Run) +[![Open in Cloud Shell](https://gstatic.com/cloudssh/images/open-btn.svg)](https://ssh.cloud.google.com/cloudshell/editor?cloudshell_git_repo=https%3A%2F%2Fgithub.com%2FBerriAI%2Flitellm&cloudshell_workspace=terraform%2Flitellm%2Fgcp%2Fexamples%2Fdefault&cloudshell_tutorial=TUTORIAL.md&cloudshell_image=gcr.io/ds-artifacts-cloudshell/deploystack_custom_image&shellonly=true) + +The button above opens the [DeployStack](https://github.com/GoogleCloudPlatform/deploystack) installer in Cloud Shell, walks you through `TUTORIAL.md`, and runs `terraform apply` once you've answered the prompts. The rest of this README is the manual / advanced path. + Deploys the componentized LiteLLM proxy on GCP: - **VPC** + Private Services Access range + a Serverless VPC Access connector @@ -186,7 +190,7 @@ export TF_VAR_litellm_master_key="sk-..." # the tenant's master key export TF_VAR_litellm_license="lic-..." # their LITELLM_LICENSE terraform apply \ - -var "project=my-gcp-project" \ + -var "project_id=my-gcp-project" \ -var "region=us-central1" \ -var "tenant=acme" \ -var "env=stage" diff --git a/terraform/litellm/gcp/bootstrap.tf b/terraform/litellm/gcp/bootstrap.tf index 47ad885ff12..b929c4d76f3 100644 --- a/terraform/litellm/gcp/bootstrap.tf +++ b/terraform/litellm/gcp/bootstrap.tf @@ -25,7 +25,7 @@ resource "terraform_data" "migration" { environment = { JOB = google_cloud_run_v2_job.migrations.name REGION = var.region - PROJECT = var.project + PROJECT = var.project_id } command = <<-EOT set -euo pipefail diff --git a/terraform/litellm/gcp/cloudrun.tf b/terraform/litellm/gcp/cloudrun.tf index 28e1145b081..a0c415cf459 100644 --- a/terraform/litellm/gcp/cloudrun.tf +++ b/terraform/litellm/gcp/cloudrun.tf @@ -344,7 +344,7 @@ resource "google_cloud_run_v2_service" "ui" { # (LITELLM_MASTER_KEY); these IAM bindings just open up Cloud Run's invoker # gate so the LB request makes it to the container. resource "google_cloud_run_v2_service_iam_member" "gateway_allusers" { - project = var.project + project = var.project_id location = google_cloud_run_v2_service.gateway.location name = google_cloud_run_v2_service.gateway.name role = "roles/run.invoker" @@ -352,7 +352,7 @@ resource "google_cloud_run_v2_service_iam_member" "gateway_allusers" { } resource "google_cloud_run_v2_service_iam_member" "backend_allusers" { - project = var.project + project = var.project_id location = google_cloud_run_v2_service.backend.location name = google_cloud_run_v2_service.backend.name role = "roles/run.invoker" @@ -360,7 +360,7 @@ resource "google_cloud_run_v2_service_iam_member" "backend_allusers" { } resource "google_cloud_run_v2_service_iam_member" "ui_allusers" { - project = var.project + project = var.project_id location = google_cloud_run_v2_service.ui.location name = google_cloud_run_v2_service.ui.name role = "roles/run.invoker" diff --git a/terraform/litellm/gcp/examples/default/TUTORIAL.md b/terraform/litellm/gcp/examples/default/TUTORIAL.md new file mode 100644 index 00000000000..9026207563d --- /dev/null +++ b/terraform/litellm/gcp/examples/default/TUTORIAL.md @@ -0,0 +1,134 @@ +# Deploy LiteLLM on GCP + + + +This walkthrough provisions the full LiteLLM stack on GCP via Cloud Run, Cloud SQL, Memorystore Redis, and an external HTTPS load balancer. You'll answer a few prompts; DeployStack writes a `terraform.tfvars` and runs `terraform apply` against the project you select. + +## Prerequisites + + + +Pick the GCP project you want to deploy into, then make sure billing is enabled on it. The stack provisions paid resources (Cloud SQL, Memorystore, an LB anycast IP). + +## Enable required APIs + +The stack needs these APIs enabled in the target project. Click to enable, or run the gcloud command below. + + + +```bash +gcloud services enable \ + run.googleapis.com \ + sqladmin.googleapis.com \ + redis.googleapis.com \ + secretmanager.googleapis.com \ + vpcaccess.googleapis.com \ + compute.googleapis.com \ + servicenetworking.googleapis.com \ + storage.googleapis.com \ + artifactregistry.googleapis.com +``` + +## Create the Artifact Registry passthrough to GHCR + +Cloud Run only pulls from Artifact Registry, `gcr.io`, or `docker.io`; it rejects `ghcr.io` URIs at apply time. The four LiteLLM images live on GHCR, so the stack needs a remote Artifact Registry repo pointed at GHCR. This is a one-time setup per project. + +```bash +gcloud artifacts repositories create litellm \ + --repository-format=docker \ + --location= \ + --mode=remote-repository \ + --remote-repo-config-desc="GitHub Container Registry passthrough" \ + --remote-docker-repo=https://ghcr.io +``` + +If the repo already exists, this command exits with a clear error and you can move on. Then set `image_registry` in `terraform.tfvars` to `-docker.pkg.dev//litellm/berriai` before applying. + +## (Optional) Set tenant secrets + +The stack auto-generates a `LITELLM_MASTER_KEY` if you don't supply one. If you have an enterprise license or want a pre-chosen master key, export them as `TF_VAR_*` env vars before running the installer so they end up in Secret Manager but not in `terraform.tfvars`. + +```bash +export TF_VAR_litellm_master_key="sk-..." # optional; auto-generated if omitted +export TF_VAR_litellm_license="lic-..." # optional; OSS-only without it +export TF_VAR_ui_password="..." # optional; falls back to master_key for UI login +``` + +Skip this step entirely for a trial deploy. + +## Run the installer + +DeployStack will prompt for project, region, tenant, env, image tag, and TLS posture, then run `terraform apply`. Open `deploystack.json` if you want to see the prompt definitions first. + +```bash +deploystack install +``` + +The first apply takes 20-25 minutes; most of that is Cloud SQL provisioning. The migration Cloud Run Job runs automatically once the database is ready, and only then do gateway, backend, and UI start. + +## Grab the LB URL + +```bash +terraform output lb_url +``` + +For trial deploys (`allow_plaintext_lb=true`), this is `http://`. The UI lives at `/ui`; sign in with username `admin` and the master key: + +```bash +gcloud secrets versions access latest \ + --secret="$(terraform output -raw master_key_secret_id)" +``` + +## Going to TLS + +If you picked `allow_plaintext_lb=true` to bootstrap but want HTTPS for real, point a DNS A record at the LB IP, then re-run terraform with `lb_domains` set and `allow_plaintext_lb` removed: + +```bash +terraform apply \ + -var 'lb_domains=["proxy.example.com"]' +``` + +Google-managed certs sit in `PROVISIONING` for 15-60 minutes after DNS propagates. You can watch the state with `gcloud compute ssl-certificates describe -litellm--cert`. + +## Adding provider API keys + +Provider keys (OpenAI, Anthropic, etc.) belong in Secret Manager, not in `terraform.tfvars`. Create the secret first, then reference its resource ID from `gateway_extra_secrets` and re-apply: + +```bash +echo -n "sk-proj-..." | gcloud secrets create openai-api-key --data-file=- +``` + +Edit `terraform.tfvars`: + +```hcl +gateway_extra_secrets = { + OPENAI_API_KEY = "projects//secrets/openai-api-key" +} +proxy_config = { + model_list = [ + { + model_name = "gpt-4o" + litellm_params = { + model = "openai/gpt-4o" + api_key = "os.environ/OPENAI_API_KEY" + } + }, + ] +} +``` + +Then `terraform apply`. + +## Tearing it all down + +```bash +deploystack uninstall +``` + +`cloudsql_deletion_protection` is `true` by default; flip it to `false` in `terraform.tfvars` and apply before uninstalling if you actually want the DB gone. Same goes for `gcs_force_destroy` on the bucket. + +## You're done + + + +Full configuration reference is in `README.md`, and every input variable on the underlying module lives in `variables.tf`. diff --git a/terraform/litellm/gcp/examples/default/deploystack.json b/terraform/litellm/gcp/examples/default/deploystack.json new file mode 100644 index 00000000000..6e5339272e7 --- /dev/null +++ b/terraform/litellm/gcp/examples/default/deploystack.json @@ -0,0 +1,37 @@ +{ + "title": "LiteLLM on GCP (Cloud Run)", + "name": "litellm-gcp", + "description": "Deploys the LiteLLM proxy on GCP: Cloud Run gateway/backend/UI, Cloud SQL with a read replica, Memorystore Redis, a GCS bucket, Secret Manager entries, and an external HTTPS load balancer. Takes ~20-25 minutes on the first apply.", + "duration": 25, + "documentation_link": "https://github.com/BerriAI/litellm/blob/main/terraform/litellm/gcp/README.md", + "collect_project": true, + "collect_region": true, + "region_type": "run", + "region_default": "us-central1", + "collect_zone": false, + "custom_settings": [ + { + "name": "tenant", + "description": "Tenant slug used as the prefix for every GCP resource the stack creates (e.g. 'acme' produces 'acme-litellm--gateway'). 1-21 lowercase chars starting with a letter", + "default": "acme", + "validation": "^[a-z][a-z0-9-]{0,20}$" + }, + { + "name": "env", + "description": "Environment suffix appended to every resource name (e.g. 'stage', 'prod', 'dev'). 1-9 lowercase chars starting with a letter", + "default": "stage", + "validation": "^[a-z][a-z0-9-]{0,8}$" + }, + { + "name": "image_tag", + "description": "Tag for the four litellm-* images (gateway, backend, ui, migrations). Bump together when bumping LiteLLM", + "default": "v1.86.0-dev" + }, + { + "name": "allow_plaintext_lb", + "description": "Skip TLS on the load balancer (HTTP-only). Set true for trial/dev. For production, leave false and add lb_domains to terraform.tfvars after the first apply", + "default": "true", + "options": ["true", "false"] + } + ] +} diff --git a/terraform/litellm/gcp/examples/default/main.tf b/terraform/litellm/gcp/examples/default/main.tf index a575dc4fc8a..8760d445f0c 100644 --- a/terraform/litellm/gcp/examples/default/main.tf +++ b/terraform/litellm/gcp/examples/default/main.tf @@ -26,10 +26,10 @@ module "litellm" { source = "../../" - project = var.project - region = var.region - tenant = var.tenant - env = var.env + project_id = var.project_id + region = var.region + tenant = var.tenant + env = var.env litellm_master_key = var.litellm_master_key litellm_license = var.litellm_license diff --git a/terraform/litellm/gcp/examples/default/providers.tf b/terraform/litellm/gcp/examples/default/providers.tf index 4b79367fe09..d4a9836e887 100644 --- a/terraform/litellm/gcp/examples/default/providers.tf +++ b/terraform/litellm/gcp/examples/default/providers.tf @@ -7,11 +7,11 @@ # `google-beta` configs automatically through the module call, so project # and region set here flow into every resource that doesn't pass its own. provider "google" { - project = var.project + project = var.project_id region = var.region } provider "google-beta" { - project = var.project + project = var.project_id region = var.region } diff --git a/terraform/litellm/gcp/examples/default/terraform.tfvars.example b/terraform/litellm/gcp/examples/default/terraform.tfvars.example index eff338ca240..bc505bbbf4d 100644 --- a/terraform/litellm/gcp/examples/default/terraform.tfvars.example +++ b/terraform/litellm/gcp/examples/default/terraform.tfvars.example @@ -1,5 +1,5 @@ -project = "my-gcp-project" -region = "us-central1" +project_id = "my-gcp-project" +region = "us-central1" # Resource naming: every GCP resource the stack creates is named # `${tenant}-litellm-${env}` (or that plus a per-resource suffix). E.g. diff --git a/terraform/litellm/gcp/examples/default/variables.tf b/terraform/litellm/gcp/examples/default/variables.tf index 745a5e5d76b..56e5ec88ef8 100644 --- a/terraform/litellm/gcp/examples/default/variables.tf +++ b/terraform/litellm/gcp/examples/default/variables.tf @@ -5,7 +5,7 @@ # main.tf, or call the module from your own root config. Full per-variable # docs live in ../../variables.tf — the module is the source of truth. -variable "project" { +variable "project_id" { description = "GCP project ID." type = string } diff --git a/terraform/litellm/gcp/gcs.tf b/terraform/litellm/gcp/gcs.tf index 86511d38a31..7504ac5e50e 100644 --- a/terraform/litellm/gcp/gcs.tf +++ b/terraform/litellm/gcp/gcs.tf @@ -7,7 +7,7 @@ resource "random_id" "bucket_suffix" { } resource "google_storage_bucket" "this" { - name = "${var.project}-${local.name}-${random_id.bucket_suffix.hex}" + name = "${var.project_id}-${local.name}-${random_id.bucket_suffix.hex}" location = var.region uniform_bucket_level_access = true force_destroy = var.gcs_force_destroy diff --git a/terraform/litellm/gcp/iam.tf b/terraform/litellm/gcp/iam.tf index 93a9997ed7a..252f5de25ac 100644 --- a/terraform/litellm/gcp/iam.tf +++ b/terraform/litellm/gcp/iam.tf @@ -21,7 +21,7 @@ resource "google_service_account" "ui_runtime" { # Cloud SQL client — lets the Cloud Run services connect to the instance # over private IP via the VPC connector. resource "google_project_iam_member" "runtime_cloudsql" { - project = var.project + project = var.project_id role = "roles/cloudsql.client" member = "serviceAccount:${google_service_account.runtime.email}" } diff --git a/terraform/litellm/gcp/outputs.tf b/terraform/litellm/gcp/outputs.tf index df25215adcc..6f1f1d5ccf4 100644 --- a/terraform/litellm/gcp/outputs.tf +++ b/terraform/litellm/gcp/outputs.tf @@ -59,6 +59,6 @@ output "migration_run_command" { "gcloud run jobs execute %s --region %s --project %s --wait", google_cloud_run_v2_job.migrations.name, var.region, - var.project, + var.project_id, ) } diff --git a/terraform/litellm/gcp/variables.tf b/terraform/litellm/gcp/variables.tf index fe726b0317a..b99e1f95e18 100644 --- a/terraform/litellm/gcp/variables.tf +++ b/terraform/litellm/gcp/variables.tf @@ -1,4 +1,4 @@ -variable "project" { +variable "project_id" { description = "GCP project ID." type = string } From c1492a51f27e5ffdf533f9742dca7344059cc3da Mon Sep 17 00:00:00 2001 From: Yassin Kortam Date: Sat, 6 Jun 2026 10:29:27 -0700 Subject: [PATCH 08/10] feat(terraform/gcp): mount proxy_config from GCS and wire OpenTelemetry v2 proxy_config Drop the inline LITELLM_PROXY_CONFIG_B64 env var and the python-decode startup fragment. Upload the YAML to a dedicated GCS bucket as config.yaml, then mount it read-only into the gateway and backend at /etc/litellm via Cloud Run v2's gcsfuse volume. CONFIG_FILE_PATH points at the mount; an md5 of the YAML rides along as PROXY_CONFIG_HASH so a config-only edit forces a new Cloud Run revision (gcsfuse only surfaces new objects on container restart, so without the hash an updated proxy_config would sit in the bucket unread). The config bucket is separate from the data-plane bucket so the runtime SA can hold objectViewer here (read-only at runtime) while keeping objectAdmin on the data-plane bucket. Both bucket and IAM binding are gated on proxy_config != {}; an empty config skips bucket creation and mounts nothing. OpenTelemetry v2 LITELLM_OTEL_V2=true is now wired into shared_env_kv unconditionally so both the gateway and backend boot with the integration enabled. It's dormant until otel_endpoint is non-empty; setting it injects OTEL_EXPORTER / OTEL_ENDPOINT / OTEL_ENVIRONMENT_NAME plus a per-component OTEL_SERVICE_NAME (\${tenant}-litellm-\${env}-{gateway,backend}) so spans land tagged with the right hop. otel_headers_secret takes a Secret Manager resource ID for OTEL_HEADERS (collector auth); the runtime SA auto-gains roles/secretmanager.secretAccessor on it. otel_capture_message_content defaults to no_content matching the litellm default. Any OTEL_* key set in *_extra_env wins over the defaults so Cloud Run doesn't reject the apply on the duplicate-env-name check. --- terraform/litellm/gcp/README.md | 37 +++++++- terraform/litellm/gcp/cloudrun.tf | 90 ++++++++++++++++--- .../examples/default/terraform.tfvars.example | 8 ++ terraform/litellm/gcp/gcs.tf | 40 +++++++++ terraform/litellm/gcp/iam.tf | 10 +++ terraform/litellm/gcp/locals.tf | 13 ++- terraform/litellm/gcp/variables.tf | 87 ++++++++++++++++-- 7 files changed, 261 insertions(+), 24 deletions(-) diff --git a/terraform/litellm/gcp/README.md b/terraform/litellm/gcp/README.md index 9ab1723a8b5..79f624d4b26 100644 --- a/terraform/litellm/gcp/README.md +++ b/terraform/litellm/gcp/README.md @@ -110,9 +110,13 @@ Unix socket. ### `proxy_config` Mirrors the helm chart's `gateway.config.proxy_config`. The map is -YAML-encoded and base64-passed to gateway, backend, and the migration job; -each container decodes it to `/tmp/litellm-config.yaml` at startup and sets -`CONFIG_FILE_PATH`. +YAML-encoded and uploaded to a dedicated GCS bucket as `config.yaml`, then +mounted read-only into the gateway and backend at `/etc/litellm` via Cloud +Run v2's gcsfuse volume. `CONFIG_FILE_PATH` points at the mount path. A +hash of the YAML rides along as an env var so an edit to `proxy_config` +forces a new Cloud Run revision; without it the new file would sit in the +bucket unread until the next unrelated revision rollover. The migrations +job doesn't get the config (it only runs `prisma migrate deploy`). ```hcl proxy_config = { @@ -168,6 +172,33 @@ reject the version suffix; version is always resolved as `latest`. If you need a pinned version, edit `local.gateway_extra_secret_kv` in `cloudrun.tf` directly to set `version = "3"` for the entry in question. +### OpenTelemetry v2 + +`LITELLM_OTEL_V2=true` is wired into both the gateway and backend by default +(see [OpenTelemetry v2 docs](https://docs.litellm.ai/docs/observability/opentelemetry_v2)). +The flag is dormant until `otel_endpoint` is non-empty; with an empty +endpoint nothing exports and the integration is effectively off. + +```hcl +otel_endpoint = "https://otel.example.com:4318" +otel_exporter = "otlp_http" # or otlp_grpc +otel_environment_name = "prod" # default: var.env +otel_headers_secret = "projects/my-gcp-project/secrets/otel-headers" +``` + +`OTEL_SERVICE_NAME` is set per component (`${tenant}-litellm-${env}-gateway` +and `-backend`) so spans land tagged with the right hop. `OTEL_HEADERS` +is wired as a Secret Manager `secret_key_ref` since it typically carries +the collector's auth token; create the secret with the literal header +string, e.g. `Authorization=Bearer `. Any `OTEL_*` key set in +`gateway_extra_env` / `backend_extra_env` overrides the default for that +service. + +`OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` defaults to +`no_content` — flip `otel_capture_message_content = +"prompt_and_completion"` only after auditing what lands in the backend, +since prompts and completions are typically sensitive. + ## Tenant deployment Every resource the stack creates is named `${tenant}-litellm-${env}` (or diff --git a/terraform/litellm/gcp/cloudrun.tf b/terraform/litellm/gcp/cloudrun.tf index a0c415cf459..cb8d0a9da2d 100644 --- a/terraform/litellm/gcp/cloudrun.tf +++ b/terraform/litellm/gcp/cloudrun.tf @@ -24,8 +24,39 @@ locals { { name = "REDIS_SSL_CA_CERTS", value = "/tmp/redis-ca.pem" }, { name = "REDIS_CA_PEM_B64", value = local.redis_ca_pem_b64 }, { name = "GCS_BUCKET_NAME", value = google_storage_bucket.this.name }, + # OTel v2 master switch. Dormant until otel_endpoint is set; see + # otel_env below and the otel_endpoint variable for the gate. + { name = "LITELLM_OTEL_V2", value = "true" }, ] + otel_enabled = var.otel_endpoint != "" + otel_environment_name = var.otel_environment_name != "" ? var.otel_environment_name : var.env + otel_capture_kv = local.otel_enabled ? [{ name = "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", value = var.otel_capture_message_content }] : [] + otel_shared_endpoint_kv = local.otel_enabled ? [ + { name = "OTEL_EXPORTER", value = var.otel_exporter }, + { name = "OTEL_ENDPOINT", value = var.otel_endpoint }, + { name = "OTEL_ENVIRONMENT_NAME", value = local.otel_environment_name }, + ] : [] + # OTel defaults are filtered out when the same key appears in + # *_extra_env, so a caller-supplied OTEL_SERVICE_NAME (or any other + # OTEL_*) takes precedence without colliding at Cloud Run apply time + # (Cloud Run rejects duplicate env var names). + gateway_otel_env_kv_raw = concat(local.otel_shared_endpoint_kv, local.otel_capture_kv, local.otel_enabled ? [ + { name = "OTEL_SERVICE_NAME", value = "${local.name}-gateway" }, + ] : []) + backend_otel_env_kv_raw = concat(local.otel_shared_endpoint_kv, local.otel_capture_kv, local.otel_enabled ? [ + { name = "OTEL_SERVICE_NAME", value = "${local.name}-backend" }, + ] : []) + gateway_otel_env_kv = [ + for e in local.gateway_otel_env_kv_raw : e if !contains(keys(var.gateway_extra_env), e.name) + ] + backend_otel_env_kv = [ + for e in local.backend_otel_env_kv_raw : e if !contains(keys(var.backend_extra_env), e.name) + ] + otel_env_secrets = var.otel_headers_secret != "" ? [ + { name = "OTEL_HEADERS", secret = var.otel_headers_secret, version = "latest" }, + ] : [] + # Cloud Run v2 secret env vars use value_source.secret_key_ref pointing at a # secret resource ID. Shared between gateway and backend (the migrations # job has its own narrower env list — see migrations_env_secrets below). @@ -63,13 +94,6 @@ locals { for k, v in var.backend_extra_secrets : { name = k, secret = v, version = "latest" } ] - # Shell fragments composed with && so any failure short-circuits the - # whole startup instead of falling through to `exec uvicorn`. The - # python step is only included when the caller provided a proxy_config. - proxy_config_fragment = local.proxy_config_enabled ? [ - "python -c \"import os, base64, pathlib; pathlib.Path(os.environ['CONFIG_FILE_PATH']).write_bytes(base64.b64decode(os.environ['LITELLM_PROXY_CONFIG_B64']))\"" - ] : [] - # Decode the Memorystore CA cert (passed as REDIS_CA_PEM_B64) to the # path REDIS_SSL_CA_CERTS points at, so the redis-py client can validate # the rediss:// handshake. @@ -83,14 +107,12 @@ locals { ] gateway_args = join(" && ", concat( - local.proxy_config_fragment, local.redis_ca_fragment, local.database_url_fragment, ["exec uvicorn gateway.main:app --host 0.0.0.0 --port 4000"], )) backend_args = join(" && ", concat( - local.proxy_config_fragment, local.redis_ca_fragment, local.database_url_fragment, ["exec uvicorn backend.main:app --host 0.0.0.0 --port 4001"], @@ -149,7 +171,7 @@ resource "google_cloud_run_v2_service" "gateway" { } dynamic "env" { - for_each = concat(local.shared_env_kv, local.gateway_extra_env_kv, local.proxy_config_env) + for_each = concat(local.shared_env_kv, local.gateway_otel_env_kv, local.gateway_extra_env_kv, local.proxy_config_env) content { name = env.value.name value = env.value.value @@ -157,7 +179,7 @@ resource "google_cloud_run_v2_service" "gateway" { } dynamic "env" { - for_each = concat(local.shared_env_secrets, local.gateway_extra_secret_kv) + for_each = concat(local.shared_env_secrets, local.otel_env_secrets, local.gateway_extra_secret_kv) content { name = env.value.name value_source { @@ -169,6 +191,14 @@ resource "google_cloud_run_v2_service" "gateway" { } } + dynamic "volume_mounts" { + for_each = local.proxy_config_enabled ? [1] : [] + content { + name = local.proxy_config_volume + mount_path = local.proxy_config_mount_path + } + } + startup_probe { http_get { path = "/health/readiness" @@ -189,6 +219,17 @@ resource "google_cloud_run_v2_service" "gateway" { timeout_seconds = 5 } } + + dynamic "volumes" { + for_each = local.proxy_config_enabled ? [1] : [] + content { + name = local.proxy_config_volume + gcs { + bucket = google_storage_bucket.proxy_config[0].name + read_only = true + } + } + } } depends_on = [ @@ -196,6 +237,8 @@ resource "google_cloud_run_v2_service" "gateway" { google_secret_manager_secret_iam_member.db_password, google_secret_manager_secret_iam_member.license, google_secret_manager_secret_iam_member.extras, + google_secret_manager_secret_iam_member.otel_headers, + google_storage_bucket_iam_member.proxy_config_runtime, google_sql_user.app, # Don't go live until the schema is migrated; otherwise the proxy boots, # fails on missing tables, and Cloud Run keeps cold-restarting. @@ -240,7 +283,7 @@ resource "google_cloud_run_v2_service" "backend" { } dynamic "env" { - for_each = concat(local.shared_env_kv, local.backend_default_env_kv, local.backend_extra_env_kv, local.proxy_config_env) + for_each = concat(local.shared_env_kv, local.backend_default_env_kv, local.backend_otel_env_kv, local.backend_extra_env_kv, local.proxy_config_env) content { name = env.value.name value = env.value.value @@ -248,7 +291,7 @@ resource "google_cloud_run_v2_service" "backend" { } dynamic "env" { - for_each = concat(local.shared_env_secrets, local.backend_managed_env_secrets, local.backend_extra_secret_kv) + for_each = concat(local.shared_env_secrets, local.backend_managed_env_secrets, local.otel_env_secrets, local.backend_extra_secret_kv) content { name = env.value.name value_source { @@ -260,6 +303,14 @@ resource "google_cloud_run_v2_service" "backend" { } } + dynamic "volume_mounts" { + for_each = local.proxy_config_enabled ? [1] : [] + content { + name = local.proxy_config_volume + mount_path = local.proxy_config_mount_path + } + } + startup_probe { http_get { path = "/health/readiness" @@ -280,6 +331,17 @@ resource "google_cloud_run_v2_service" "backend" { timeout_seconds = 5 } } + + dynamic "volumes" { + for_each = local.proxy_config_enabled ? [1] : [] + content { + name = local.proxy_config_volume + gcs { + bucket = google_storage_bucket.proxy_config[0].name + read_only = true + } + } + } } depends_on = [ @@ -288,6 +350,8 @@ resource "google_cloud_run_v2_service" "backend" { google_secret_manager_secret_iam_member.license, google_secret_manager_secret_iam_member.ui_password, google_secret_manager_secret_iam_member.extras, + google_secret_manager_secret_iam_member.otel_headers, + google_storage_bucket_iam_member.proxy_config_runtime, google_sql_user.app, terraform_data.migration, ] diff --git a/terraform/litellm/gcp/examples/default/terraform.tfvars.example b/terraform/litellm/gcp/examples/default/terraform.tfvars.example index bc505bbbf4d..7be8cec062a 100644 --- a/terraform/litellm/gcp/examples/default/terraform.tfvars.example +++ b/terraform/litellm/gcp/examples/default/terraform.tfvars.example @@ -75,3 +75,11 @@ env = "stage" # OPENAI_API_KEY = "projects/my-gcp-project/secrets/openai-api-key" # ANTHROPIC_API_KEY = "projects/my-gcp-project/secrets/anthropic-api-key" # } + +# ---------- OpenTelemetry v2 ---------- +# LITELLM_OTEL_V2=true is always set on the gateway and backend; nothing +# exports until you point them at a collector. To enable export, set +# otel_endpoint (and optionally otel_exporter, otel_headers_secret, +# otel_environment_name, otel_capture_message_content) directly on the +# `module "litellm"` block in main.tf — these aren't wrapper vars in this +# example. Full docs in ../../variables.tf. diff --git a/terraform/litellm/gcp/gcs.tf b/terraform/litellm/gcp/gcs.tf index 7504ac5e50e..43ee6c7b2f6 100644 --- a/terraform/litellm/gcp/gcs.tf +++ b/terraform/litellm/gcp/gcs.tf @@ -27,3 +27,43 @@ resource "google_storage_bucket_iam_member" "runtime" { role = "roles/storage.objectAdmin" member = "serviceAccount:${google_service_account.runtime.email}" } + +# Dedicated bucket holding only config.yaml. Mounted read-only into the +# gateway and backend via Cloud Run v2's gcsfuse volume. Kept separate from +# the data-plane bucket above so the runtime SA can hold a narrower +# objectViewer binding here (config is read-only at runtime) while keeping +# objectAdmin on the data-plane bucket. Only created when proxy_config is +# non-empty. +resource "google_storage_bucket" "proxy_config" { + count = local.proxy_config_enabled ? 1 : 0 + + name = "${var.project_id}-${local.name}-config-${random_id.bucket_suffix.hex}" + location = var.region + uniform_bucket_level_access = true + force_destroy = var.gcs_force_destroy + + versioning { + enabled = true + } + + public_access_prevention = "enforced" + + labels = var.labels +} + +resource "google_storage_bucket_object" "proxy_config" { + count = local.proxy_config_enabled ? 1 : 0 + + name = local.proxy_config_file_name + bucket = google_storage_bucket.proxy_config[0].name + content = local.proxy_config_yaml + content_type = "application/yaml" +} + +resource "google_storage_bucket_iam_member" "proxy_config_runtime" { + count = local.proxy_config_enabled ? 1 : 0 + + bucket = google_storage_bucket.proxy_config[0].name + role = "roles/storage.objectViewer" + member = "serviceAccount:${google_service_account.runtime.email}" +} diff --git a/terraform/litellm/gcp/iam.tf b/terraform/litellm/gcp/iam.tf index 252f5de25ac..dc3ae5e0912 100644 --- a/terraform/litellm/gcp/iam.tf +++ b/terraform/litellm/gcp/iam.tf @@ -69,3 +69,13 @@ resource "google_secret_manager_secret_iam_member" "extras" { role = "roles/secretmanager.secretAccessor" member = "serviceAccount:${google_service_account.runtime.email}" } + +# OTEL_HEADERS secret accessor — only created when var.otel_headers_secret +# is set. Carries the OTLP collector's auth header(s). +resource "google_secret_manager_secret_iam_member" "otel_headers" { + count = var.otel_headers_secret == "" ? 0 : 1 + + secret_id = var.otel_headers_secret + role = "roles/secretmanager.secretAccessor" + member = "serviceAccount:${google_service_account.runtime.email}" +} diff --git a/terraform/litellm/gcp/locals.tf b/terraform/litellm/gcp/locals.tf index 2d1231fb197..a43920ed31d 100644 --- a/terraform/litellm/gcp/locals.tf +++ b/terraform/litellm/gcp/locals.tf @@ -62,11 +62,18 @@ locals { ] proxy_config_enabled = length(keys(var.proxy_config)) > 0 - proxy_config_b64 = local.proxy_config_enabled ? base64encode(yamlencode(var.proxy_config)) : "" + proxy_config_yaml = local.proxy_config_enabled ? yamlencode(var.proxy_config) : "" + + proxy_config_mount_path = "/etc/litellm" + proxy_config_file_name = "config.yaml" + proxy_config_volume = "proxy-config" proxy_config_env = local.proxy_config_enabled ? [ - { name = "LITELLM_PROXY_CONFIG_B64", value = local.proxy_config_b64 }, - { name = "CONFIG_FILE_PATH", value = "/tmp/litellm-config.yaml" }, + { name = "CONFIG_FILE_PATH", value = "${local.proxy_config_mount_path}/${local.proxy_config_file_name}" }, + # Forces a new Cloud Run revision when the YAML changes; gcsfuse only + # surfaces the new object on container restart, so without this an + # updated proxy_config would sit in the bucket unread. + { name = "PROXY_CONFIG_HASH", value = md5(local.proxy_config_yaml) }, ] : [] # Resolved image URIs: per-component override wins, otherwise compose diff --git a/terraform/litellm/gcp/variables.tf b/terraform/litellm/gcp/variables.tf index b99e1f95e18..798ded525cd 100644 --- a/terraform/litellm/gcp/variables.tf +++ b/terraform/litellm/gcp/variables.tf @@ -394,12 +394,89 @@ variable "backend_extra_secrets" { variable "proxy_config" { description = <<-EOT LiteLLM proxy config (contents of config.yaml). Mirrors the helm chart's - `gateway.config.proxy_config`. Passed to gateway, backend, and the - migration job as a base64-encoded env var and decoded to - /tmp/litellm-config.yaml at container start; CONFIG_FILE_PATH is set - automatically. Reference env-injected secrets from the YAML via - `os.environ/`. Leave empty ({}) to skip. + `gateway.config.proxy_config`. YAML-encoded and uploaded to a dedicated + GCS bucket as `config.yaml`, then mounted read-only into the gateway + and backend at `/etc/litellm` via Cloud Run v2's gcsfuse volume; + CONFIG_FILE_PATH is set automatically. A hash of the YAML is wired in + as an env var so a config-only edit forces a new revision (gcsfuse + surfaces the new object on container restart). Reference env-injected + secrets from the YAML via `os.environ/`. Leave empty ({}) to + skip — the bucket isn't created and no volume is mounted. EOT type = any default = {} } + +# ---------- OpenTelemetry v2 ---------- +# +# https://docs.litellm.ai/docs/observability/opentelemetry_v2 +# +# OTel v2 is wired into the gateway and backend by default (LITELLM_OTEL_V2=true +# is added to shared_env) but is dormant until otel_endpoint is non-empty. Leave +# otel_endpoint = "" to ship without any OTel exporter; the proxy will boot with +# the flag flipped on but no destination configured, which is functionally +# equivalent to "off" because nothing is exported. + +variable "otel_endpoint" { + description = <<-EOT + OTLP collector URL (e.g. https://otel.example.com:4318 for HTTP, or + your collector's :4317 for gRPC). When empty the gateway/backend boot + with LITELLM_OTEL_V2=true but no exporter wired in, which is + functionally off. When set, OTEL_EXPORTER and OTEL_ENDPOINT are + injected and spans ship to the collector. + EOT + type = string + default = "" +} + +variable "otel_exporter" { + description = <<-EOT + OTel exporter protocol. Ignored when otel_endpoint is empty. `otlp_http` + is the safer default (works through a vanilla L7 ingress); `otlp_grpc` + needs the collector reachable over h2 and the `grpcio` extra installed + in the proxy image. + EOT + type = string + default = "otlp_http" + validation { + condition = contains(["otlp_http", "otlp_grpc", "console"], var.otel_exporter) + error_message = "otel_exporter must be one of: otlp_http, otlp_grpc, console." + } +} + +variable "otel_headers_secret" { + description = <<-EOT + Optional Secret Manager secret resource ID + (`projects//secrets/`) whose latest version is the + value of OTEL_HEADERS — used for collector auth, e.g. + `Authorization=Bearer `. Mounted as an env-var secret_key_ref; + the runtime SA auto-gains roles/secretmanager.secretAccessor. + EOT + type = string + default = "" +} + +variable "otel_environment_name" { + description = <<-EOT + Value for OTEL_ENVIRONMENT_NAME (becomes `deployment.environment` on + every span). Defaults to var.env so spans land tagged with the + deployment env without extra wiring. + EOT + type = string + default = "" +} + +variable "otel_capture_message_content" { + description = <<-EOT + Value for OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT. Default + `no_content` matches the litellm default; flip to `prompt_and_completion` + only when you've audited what's about to land in your observability + backend, because raw prompts/completions are typically sensitive. + EOT + type = string + default = "no_content" + validation { + condition = contains(["no_content", "prompt_and_completion"], var.otel_capture_message_content) + error_message = "otel_capture_message_content must be one of: no_content, prompt_and_completion." + } +} From 9c04aa10e49269ee976c094be41b0068230da693 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 6 Jun 2026 18:32:01 +0000 Subject: [PATCH 09/10] refactor(terraform): make AWS and GCP stacks behave identically Bring both modules to the same surface and the same runtime behavior so swapping clouds (or reading either README) is symmetric. Labels and tags. GCP previously stamped var.labels onto only the two GCS buckets, leaving Cloud Run, Cloud SQL, Memorystore, Secret Manager, and the LB resources unlabeled; the variable description claimed full coverage. Now the module computes local.labels (litellm-stack + managed-by + var.labels, mirroring AWS's local.tags) and threads it onto every label-supporting resource: Cloud Run services and the migrations job, Cloud SQL writer and reader (via user_labels), Memorystore, Secret Manager entries (master_key, license, ui_password, db_password), both GCS buckets, the global LB address, and the http/https forwarding rules. GCP keys use 'litellm-stack' instead of AWS's 'litellm:stack' because GCP label keys forbid colons; var.labels now defaults to {}. OpenTelemetry v2 is opt-in on both stacks. AWS already gated everything on otel_endpoint; GCP previously stamped LITELLM_OTEL_V2=true into shared_env unconditionally and only ungated the OTEL_* block. Both stacks now do the same thing: leave otel_endpoint empty and nothing OTel-related lands in the container env; set it and gateway and backend get LITELLM_OTEL_V2=true plus OTEL_EXPORTER, OTEL_ENDPOINT, OTEL_ENVIRONMENT_NAME, OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, and a per-component OTEL_SERVICE_NAME (${tenant}-litellm-${env}-gateway or -backend) so spans land tagged with the right hop. AWS picks up the richer GCP surface: otel_environment_name (defaults to var.env), otel_capture_message_content (defaults to no_content), and *_extra_env override filtering so a caller-set OTEL_* key wins over the default for that service (ECS allows duplicates, but the filter gives the same predictable last-wins shape Cloud Run enforces). var.otel_service_name on AWS is gone, replaced by the per-component naming. uvicorn workers. GCP gains gateway_num_workers, matching AWS; threads into the gateway args as --workers ${var.gateway_num_workers}. Docs reflect the parity: each README's OTel section, the GCP 'Using as a module' Labels paragraph, and a new feature-parity table in the top-level README that lays out the AWS/GCP input mapping side by side. --- terraform/litellm/README.md | 36 +++++++- terraform/litellm/aws/README.md | 25 ++++-- terraform/litellm/aws/ecs.tf | 83 +++++++++++-------- .../examples/default/terraform.tfvars.example | 15 ++-- terraform/litellm/aws/variables.tf | 42 +++++++--- terraform/litellm/gcp/README.md | 43 ++++++---- terraform/litellm/gcp/cloudrun.tf | 22 +++-- terraform/litellm/gcp/cloudsql.tf | 5 ++ .../examples/default/terraform.tfvars.example | 14 ++-- terraform/litellm/gcp/gcs.tf | 4 +- terraform/litellm/gcp/load_balancer.tf | 5 +- terraform/litellm/gcp/locals.tf | 13 +++ terraform/litellm/gcp/redis.tf | 2 + terraform/litellm/gcp/secrets.tf | 3 + terraform/litellm/gcp/variables.tf | 36 +++++--- 15 files changed, 240 insertions(+), 108 deletions(-) diff --git a/terraform/litellm/README.md b/terraform/litellm/README.md index 36315a99ff3..8f09cb53407 100644 --- a/terraform/litellm/README.md +++ b/terraform/litellm/README.md @@ -163,6 +163,39 @@ against the backend image: Run the migration job once after the first `terraform apply` and before the gateway/backend services start serving traffic. +## Feature parity between stacks + +The two modules expose the same conceptual surface; concrete inputs differ +only where the underlying cloud forces it. + +| Capability | AWS input(s) | GCP input(s) | +| -------------------------------- | ------------------------------------------------------- | --------------------------------------------------------- | +| Tenant + env naming | `tenant`, `env` | `tenant`, `env` | +| Pre-shared master key / license | `litellm_master_key`, `litellm_license` | `litellm_master_key`, `litellm_license` | +| UI admin password | `ui_password` | `ui_password` | +| Per-deployment tags / labels | `tags` (`map(string)`) | `labels` (`map(string)`) | +| TLS posture | `acm_certificate_arn`, `allow_plaintext_alb` | `lb_domains`, `allow_plaintext_lb` | +| Force destroy of object store | `s3_force_destroy` | `gcs_force_destroy` | +| Database deletion protection | `skip_final_snapshot` | `cloudsql_deletion_protection` | +| `proxy_config` (typed YAML map) | `proxy_config` | `proxy_config` | +| Extra plain env per component | `gateway_extra_env`, `backend_extra_env` | `gateway_extra_env`, `backend_extra_env` | +| Extra secret-backed env | `gateway_extra_secrets`, `backend_extra_secrets` (ARNs) | `gateway_extra_secrets`, `backend_extra_secrets` (resource IDs) | +| Uvicorn `--workers` on gateway | `gateway_num_workers` | `gateway_num_workers` | +| OpenTelemetry v2 (opt-in) | `otel_endpoint`, `otel_exporter`, `otel_environment_name`, `otel_capture_message_content`, `otel_headers_secret_arn` | `otel_endpoint`, `otel_exporter`, `otel_environment_name`, `otel_capture_message_content`, `otel_headers_secret` | + +Each module stamps its own stack-identity tag (`litellm:stack` on AWS, +`litellm-stack` on GCP — GCP label keys forbid colons) plus +`managed-by = "terraform"` onto every taggable / labelable resource and +merges `var.tags` / `var.labels` on top. Provider `default_tags` on AWS +merge on top of all of these. + +OTel is opt-in on both clouds: leave `otel_endpoint` empty and nothing +OTel-related is added to the container env; set it and both gateway and +backend get `LITELLM_OTEL_V2=true` plus the full `OTEL_*` block, with +`OTEL_SERVICE_NAME` stamped per component +(`-litellm--gateway` and `-backend`). Any `OTEL_*` key set +in `gateway_extra_env` / `backend_extra_env` wins for that service. + ## What's not included - TLS certificates / custom domains. Both stacks expose plain-HTTP load @@ -172,7 +205,8 @@ gateway/backend services start serving traffic. backend block to `versions.tf` when graduating to a team environment. - Observability beyond the cloud provider's defaults (CloudWatch logs on AWS, Cloud Logging on GCP). Wire your own Prometheus / Datadog / Langfuse - via the `*_extra_env` variables. + via the `*_extra_env` variables, or turn on OTel v2 (see the parity + table above). ## HCP Terraform no-code (1-click) deploy diff --git a/terraform/litellm/aws/README.md b/terraform/litellm/aws/README.md index cae128175f6..7d4ef0a14fb 100644 --- a/terraform/litellm/aws/README.md +++ b/terraform/litellm/aws/README.md @@ -124,17 +124,19 @@ aws secretsmanager create-secret \ ### Observability (OpenTelemetry v2) -Set `otel_endpoint` and OTel v2 -(https://docs.litellm.ai/docs/observability/opentelemetry_v2) turns on for -both gateway and backend; the stack flips `LITELLM_OTEL_V2=true` and wires -`OTEL_EXPORTER` / `OTEL_ENDPOINT` / `OTEL_SERVICE_NAME` / -`OTEL_ENVIRONMENT_NAME` into the shared env block. Leave it empty and no OTel -env vars are added. +OTel v2 (https://docs.litellm.ai/docs/observability/opentelemetry_v2) is +opt-in and gated entirely on `otel_endpoint`. Empty (default) and nothing +OTel-related is added to the container env. Set it and both gateway and +backend gain `LITELLM_OTEL_V2=true` plus the `OTEL_*` block, with +`OTEL_SERVICE_NAME` stamped per component (`${tenant}-litellm-${env}-gateway` +and `-backend`) so spans land tagged with the right hop. Any `OTEL_*` key +set in `gateway_extra_env` / `backend_extra_env` overrides the default for +that service. ```hcl -otel_endpoint = "http://otel-collector.internal:4318" -otel_exporter = "otlp_http" # otlp_grpc, console -otel_service_name = "" # defaults to the stack name +otel_endpoint = "http://otel-collector.internal:4318" +otel_exporter = "otlp_http" # otlp_grpc, console +otel_environment_name = "prod" # defaults to var.env ``` For collectors that require an auth header, store the comma-separated @@ -146,6 +148,11 @@ For collectors that require an auth header, store the comma-separated otel_headers_secret_arn = "arn:aws:secretsmanager:us-west-2:111122223333:secret:honeycomb-otel-headers-AbCdEf" ``` +`OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` defaults to +`no_content`; flip `otel_capture_message_content = "prompt_and_completion"` +only after auditing what lands in the backend, since prompts and +completions are typically sensitive. + Vendor presets (Arize, Phoenix, Langfuse OTel, Weave, Langtrace, Levo, AgentOps) live under `proxy_config.litellm_settings.callbacks` and are orthogonal to the OTLP variables above; their credentials still go in diff --git a/terraform/litellm/aws/ecs.tf b/terraform/litellm/aws/ecs.tf index c4ec767f7de..54ab80de9f4 100644 --- a/terraform/litellm/aws/ecs.tf +++ b/terraform/litellm/aws/ecs.tf @@ -44,48 +44,61 @@ resource "aws_cloudwatch_log_group" "migrations" { # HOST/PORT/USER/NAME plus an IAM-signed token, so no DB password is needed # in the task definition. locals { - # OTel v2 is wired on by default; the proxy stays inert until - # otel_endpoint is set (no exporter is configured). When an endpoint is - # supplied, OTEL_HEADERS is sourced from Secrets Manager (otel_headers env - # injection lives under shared_secrets). - otel_enabled = var.otel_endpoint != "" - otel_env = local.otel_enabled ? [ + # OTel v2 is opt-in and gated on otel_endpoint, matching the GCP stack. + # When set, LITELLM_OTEL_V2 flips on alongside the OTEL_* block, with + # OTEL_SERVICE_NAME stamped per component so spans land tagged with the + # right hop. Any OTEL_* key set in *_extra_env wins over the default for + # that service (ECS allows duplicates but last-wins is undefined, so we + # filter here for the same predictable behavior GCP gets from Cloud Run's + # hard duplicate-rejection). + otel_enabled = var.otel_endpoint != "" + otel_environment_name = var.otel_environment_name != "" ? var.otel_environment_name : var.env + otel_shared_env = local.otel_enabled ? [ { name = "LITELLM_OTEL_V2", value = "true" }, { name = "OTEL_EXPORTER", value = var.otel_exporter }, { name = "OTEL_ENDPOINT", value = var.otel_endpoint }, - { name = "OTEL_SERVICE_NAME", value = var.otel_service_name != "" ? var.otel_service_name : local.name }, - { name = "OTEL_ENVIRONMENT_NAME", value = var.env }, + { name = "OTEL_ENVIRONMENT_NAME", value = local.otel_environment_name }, + { name = "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", value = var.otel_capture_message_content }, ] : [] + gateway_otel_env_raw = concat(local.otel_shared_env, local.otel_enabled ? [ + { name = "OTEL_SERVICE_NAME", value = "${local.name}-gateway" }, + ] : []) + backend_otel_env_raw = concat(local.otel_shared_env, local.otel_enabled ? [ + { name = "OTEL_SERVICE_NAME", value = "${local.name}-backend" }, + ] : []) + gateway_otel_env = [ + for e in local.gateway_otel_env_raw : e if !contains(keys(var.gateway_extra_env), e.name) + ] + backend_otel_env = [ + for e in local.backend_otel_env_raw : e if !contains(keys(var.backend_extra_env), e.name) + ] otel_secrets = local.otel_enabled && var.otel_headers_secret_arn != "" ? [ { name = "OTEL_HEADERS", valueFrom = var.otel_headers_secret_arn }, ] : [] - shared_env = concat( - [ - { name = "IAM_TOKEN_DB_AUTH", value = "true" }, - { name = "DATABASE_HOST", value = aws_rds_cluster.this.endpoint }, - { name = "DATABASE_PORT", value = tostring(aws_rds_cluster.this.port) }, - { name = "DATABASE_USER", value = var.db_username }, - { name = "DATABASE_NAME", value = var.db_name }, - { name = "DATABASE_HOST_READ_REPLICA", value = aws_rds_cluster.this.reader_endpoint }, - { name = "DATABASE_PORT_READ_REPLICA", value = tostring(aws_rds_cluster.this.port) }, - { name = "REDIS_HOST", value = aws_elasticache_replication_group.this.primary_endpoint_address }, - { name = "REDIS_PORT", value = tostring(aws_elasticache_replication_group.this.port) }, - # transit_encryption_enabled = true on the replication group means the - # proxy must connect via rediss://. _redis.get_redis_url_from_environment - # honors REDIS_SSL to flip the scheme. - { name = "REDIS_SSL", value = "true" }, - # S3 bucket — referenced from proxy_config via os.environ/S3_BUCKET_NAME - # (e.g. cache backend, request log archival, /files passthrough). - { name = "S3_BUCKET_NAME", value = aws_s3_bucket.this.bucket }, - { name = "S3_REGION_NAME", value = var.region }, - # boto3 inside generate_iam_auth_token reads AWS_REGION_NAME first, then - # AWS_REGION. Set both for compatibility. - { name = "AWS_REGION", value = var.region }, - { name = "AWS_REGION_NAME", value = var.region }, - ], - local.otel_env, - ) + shared_env = [ + { name = "IAM_TOKEN_DB_AUTH", value = "true" }, + { name = "DATABASE_HOST", value = aws_rds_cluster.this.endpoint }, + { name = "DATABASE_PORT", value = tostring(aws_rds_cluster.this.port) }, + { name = "DATABASE_USER", value = var.db_username }, + { name = "DATABASE_NAME", value = var.db_name }, + { name = "DATABASE_HOST_READ_REPLICA", value = aws_rds_cluster.this.reader_endpoint }, + { name = "DATABASE_PORT_READ_REPLICA", value = tostring(aws_rds_cluster.this.port) }, + { name = "REDIS_HOST", value = aws_elasticache_replication_group.this.primary_endpoint_address }, + { name = "REDIS_PORT", value = tostring(aws_elasticache_replication_group.this.port) }, + # transit_encryption_enabled = true on the replication group means the + # proxy must connect via rediss://. _redis.get_redis_url_from_environment + # honors REDIS_SSL to flip the scheme. + { name = "REDIS_SSL", value = "true" }, + # S3 bucket — referenced from proxy_config via os.environ/S3_BUCKET_NAME + # (e.g. cache backend, request log archival, /files passthrough). + { name = "S3_BUCKET_NAME", value = aws_s3_bucket.this.bucket }, + { name = "S3_REGION_NAME", value = var.region }, + # boto3 inside generate_iam_auth_token reads AWS_REGION_NAME first, then + # AWS_REGION. Set both for compatibility. + { name = "AWS_REGION", value = var.region }, + { name = "AWS_REGION_NAME", value = var.region }, + ] shared_secrets = concat( [ @@ -184,6 +197,7 @@ resource "aws_ecs_task_definition" "gateway" { portMappings = [{ containerPort = 4000, protocol = "tcp" }] environment = concat( local.shared_env, + local.gateway_otel_env, local.gateway_extra_env_list, local.proxy_config_env, ) @@ -269,6 +283,7 @@ resource "aws_ecs_task_definition" "backend" { environment = concat( local.shared_env, local.backend_default_env, + local.backend_otel_env, local.backend_extra_env_list, local.proxy_config_env, ) diff --git a/terraform/litellm/aws/examples/default/terraform.tfvars.example b/terraform/litellm/aws/examples/default/terraform.tfvars.example index 19eee59a0e1..3952b06c6a9 100644 --- a/terraform/litellm/aws/examples/default/terraform.tfvars.example +++ b/terraform/litellm/aws/examples/default/terraform.tfvars.example @@ -81,10 +81,11 @@ env = "stage" # } # ---------- OpenTelemetry v2 ---------- -# Set otel_endpoint to a non-empty value to enable OTel v2 on gateway and -# backend (LITELLM_OTEL_V2=true + OTEL_EXPORTER/OTEL_ENDPOINT/OTEL_SERVICE_NAME/ -# OTEL_ENVIRONMENT_NAME). Empty disables it entirely. -# otel_endpoint = "http://otel-collector.internal:4318" -# otel_exporter = "otlp_http" # otlp_grpc, console -# otel_service_name = "" # defaults to "-litellm-" -# otel_headers_secret_arn = "arn:aws:secretsmanager:us-west-2:111122223333:secret:honeycomb-otel-headers-AbCdEf" +# OTel is gated on otel_endpoint: empty (default) and nothing is added to +# the container env; set it and both gateway and backend gain +# LITELLM_OTEL_V2=true plus the OTEL_* block (with OTEL_SERVICE_NAME +# stamped per component). The knobs aren't surfaced as wrapper vars in +# this example; set them directly on the `module "litellm"` block in +# main.tf (otel_endpoint, otel_exporter, otel_environment_name, +# otel_capture_message_content, otel_headers_secret_arn). Full docs in +# ../../variables.tf. diff --git a/terraform/litellm/aws/variables.tf b/terraform/litellm/aws/variables.tf index 8a2a487b642..8db4935664b 100644 --- a/terraform/litellm/aws/variables.tf +++ b/terraform/litellm/aws/variables.tf @@ -463,17 +463,20 @@ variable "log_retention_days" { # # https://docs.litellm.ai/docs/observability/opentelemetry_v2 # -# Setting otel_endpoint to a non-empty value turns OTel v2 on for both gateway -# and backend (LITELLM_OTEL_V2=true plus OTEL_EXPORTER/OTEL_ENDPOINT/ -# OTEL_SERVICE_NAME/OTEL_ENVIRONMENT_NAME are added to shared_env). Empty -# endpoint = nothing added to the container env. +# OTel v2 is opt-in and gated entirely on otel_endpoint, matching the GCP +# stack. Leave otel_endpoint = "" and nothing OTel-related lands in the +# container env. Set it and the gateway and backend gain LITELLM_OTEL_V2=true +# plus the OTEL_* block (per-component OTEL_SERVICE_NAME, exporter, endpoint, +# environment name, capture-content), with OTEL_HEADERS sourced from +# otel_headers_secret_arn when provided. variable "otel_endpoint" { description = <<-EOT - OTLP collector endpoint (sets OTEL_ENDPOINT / OTEL_EXPORTER_OTLP_ENDPOINT). - Empty disables OTel export. Point at any OTLP-compatible backend - (self-hosted collector, Grafana Tempo, Honeycomb, Datadog, etc.). Example: - "http://otel-collector.internal:4318" for OTLP/HTTP. + OTLP collector endpoint (sets OTEL_ENDPOINT). Empty disables OTel + entirely (no LITELLM_OTEL_V2, no OTEL_* env). Point at any + OTLP-compatible backend (self-hosted collector, Grafana Tempo, + Honeycomb, Datadog). Example: "http://otel-collector.internal:4318" + for OTLP/HTTP. EOT type = string default = "" @@ -494,15 +497,32 @@ variable "otel_exporter" { } } -variable "otel_service_name" { +variable "otel_environment_name" { description = <<-EOT - OTEL_SERVICE_NAME resource attribute. Defaults to the stack name - (`-litellm-`). + Value for OTEL_ENVIRONMENT_NAME (becomes `deployment.environment` on + every span). Defaults to var.env when empty so spans land tagged with + the deployment env without extra wiring. EOT type = string default = "" } +variable "otel_capture_message_content" { + description = <<-EOT + Value for OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT. Default + `no_content` matches the litellm default; flip to `prompt_and_completion` + only when you've audited what's about to land in your observability + backend, because raw prompts/completions are typically sensitive. + EOT + type = string + default = "no_content" + + validation { + condition = contains(["no_content", "prompt_and_completion"], var.otel_capture_message_content) + error_message = "otel_capture_message_content must be one of: no_content, prompt_and_completion." + } +} + variable "otel_headers_secret_arn" { description = <<-EOT Secrets Manager ARN whose plaintext value becomes OTEL_HEADERS diff --git a/terraform/litellm/gcp/README.md b/terraform/litellm/gcp/README.md index 79f624d4b26..1e0bf4319df 100644 --- a/terraform/litellm/gcp/README.md +++ b/terraform/litellm/gcp/README.md @@ -174,10 +174,15 @@ you need a pinned version, edit `local.gateway_extra_secret_kv` in ### OpenTelemetry v2 -`LITELLM_OTEL_V2=true` is wired into both the gateway and backend by default -(see [OpenTelemetry v2 docs](https://docs.litellm.ai/docs/observability/opentelemetry_v2)). -The flag is dormant until `otel_endpoint` is non-empty; with an empty -endpoint nothing exports and the integration is effectively off. +OTel v2 (https://docs.litellm.ai/docs/observability/opentelemetry_v2) is +opt-in and gated entirely on `otel_endpoint`. Empty (default) and nothing +OTel-related lands in the container env. Set it and both gateway and +backend gain `LITELLM_OTEL_V2=true` plus the `OTEL_*` block, with +`OTEL_SERVICE_NAME` stamped per component (`${tenant}-litellm-${env}-gateway` +and `-backend`) so spans land tagged with the right hop. Any `OTEL_*` key +set in `gateway_extra_env` / `backend_extra_env` overrides the default for +that service (Cloud Run rejects duplicate env names, so the override is +predictable). ```hcl otel_endpoint = "https://otel.example.com:4318" @@ -186,18 +191,18 @@ otel_environment_name = "prod" # default: var.env otel_headers_secret = "projects/my-gcp-project/secrets/otel-headers" ``` -`OTEL_SERVICE_NAME` is set per component (`${tenant}-litellm-${env}-gateway` -and `-backend`) so spans land tagged with the right hop. `OTEL_HEADERS` -is wired as a Secret Manager `secret_key_ref` since it typically carries -the collector's auth token; create the secret with the literal header -string, e.g. `Authorization=Bearer `. Any `OTEL_*` key set in -`gateway_extra_env` / `backend_extra_env` overrides the default for that -service. +`OTEL_HEADERS` is wired as a Secret Manager `secret_key_ref` since it +typically carries the collector's auth token; create the secret with the +literal header string, e.g. `Authorization=Bearer `. `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` defaults to -`no_content` — flip `otel_capture_message_content = -"prompt_and_completion"` only after auditing what lands in the backend, -since prompts and completions are typically sensitive. +`no_content`; flip `otel_capture_message_content = "prompt_and_completion"` +only after auditing what lands in the backend, since prompts and +completions are typically sensitive. + +Behavior matches the AWS stack 1:1; the only naming differences are +`otel_headers_secret` (a Secret Manager resource ID) vs AWS's +`otel_headers_secret_arn` (a Secrets Manager ARN). ## Tenant deployment @@ -332,8 +337,14 @@ module "litellm" { ``` Both the default `google` and `google-beta` configs are inherited by the -module automatically through the call — declare both in the caller. -Resource labels are controlled by the module's `labels` input. +module automatically through the call; declare both in the caller. + +Labels: the module stamps its own `litellm-stack` and `managed-by` labels +onto every label-supporting resource (Cloud Run services and the +migrations job, Cloud SQL writer and reader, Memorystore, Secret Manager +entries, GCS buckets, the LB global address and forwarding rules) and +merges `var.labels` on top. Use the `labels` input for per-deployment +labels; mirrors the AWS stack's `tags` input. **`for_each` shares one provider config.** The module's `versions.tf` declares `google` / `google-beta` *without* `configuration_aliases`, so it only ever diff --git a/terraform/litellm/gcp/cloudrun.tf b/terraform/litellm/gcp/cloudrun.tf index cb8d0a9da2d..3e3f5f6924b 100644 --- a/terraform/litellm/gcp/cloudrun.tf +++ b/terraform/litellm/gcp/cloudrun.tf @@ -24,27 +24,29 @@ locals { { name = "REDIS_SSL_CA_CERTS", value = "/tmp/redis-ca.pem" }, { name = "REDIS_CA_PEM_B64", value = local.redis_ca_pem_b64 }, { name = "GCS_BUCKET_NAME", value = google_storage_bucket.this.name }, - # OTel v2 master switch. Dormant until otel_endpoint is set; see - # otel_env below and the otel_endpoint variable for the gate. - { name = "LITELLM_OTEL_V2", value = "true" }, ] + # OTel v2 is opt-in and gated on otel_endpoint, matching the AWS stack — + # nothing OTel-related is added to the container env until an endpoint is + # set. LITELLM_OTEL_V2 flips on alongside the OTEL_* block so the proxy + # never boots the instrumentation with no exporter wired in. otel_enabled = var.otel_endpoint != "" otel_environment_name = var.otel_environment_name != "" ? var.otel_environment_name : var.env - otel_capture_kv = local.otel_enabled ? [{ name = "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", value = var.otel_capture_message_content }] : [] otel_shared_endpoint_kv = local.otel_enabled ? [ + { name = "LITELLM_OTEL_V2", value = "true" }, { name = "OTEL_EXPORTER", value = var.otel_exporter }, { name = "OTEL_ENDPOINT", value = var.otel_endpoint }, { name = "OTEL_ENVIRONMENT_NAME", value = local.otel_environment_name }, + { name = "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", value = var.otel_capture_message_content }, ] : [] # OTel defaults are filtered out when the same key appears in # *_extra_env, so a caller-supplied OTEL_SERVICE_NAME (or any other # OTEL_*) takes precedence without colliding at Cloud Run apply time # (Cloud Run rejects duplicate env var names). - gateway_otel_env_kv_raw = concat(local.otel_shared_endpoint_kv, local.otel_capture_kv, local.otel_enabled ? [ + gateway_otel_env_kv_raw = concat(local.otel_shared_endpoint_kv, local.otel_enabled ? [ { name = "OTEL_SERVICE_NAME", value = "${local.name}-gateway" }, ] : []) - backend_otel_env_kv_raw = concat(local.otel_shared_endpoint_kv, local.otel_capture_kv, local.otel_enabled ? [ + backend_otel_env_kv_raw = concat(local.otel_shared_endpoint_kv, local.otel_enabled ? [ { name = "OTEL_SERVICE_NAME", value = "${local.name}-backend" }, ] : []) gateway_otel_env_kv = [ @@ -53,7 +55,7 @@ locals { backend_otel_env_kv = [ for e in local.backend_otel_env_kv_raw : e if !contains(keys(var.backend_extra_env), e.name) ] - otel_env_secrets = var.otel_headers_secret != "" ? [ + otel_env_secrets = local.otel_enabled && var.otel_headers_secret != "" ? [ { name = "OTEL_HEADERS", secret = var.otel_headers_secret, version = "latest" }, ] : [] @@ -109,7 +111,7 @@ locals { gateway_args = join(" && ", concat( local.redis_ca_fragment, local.database_url_fragment, - ["exec uvicorn gateway.main:app --host 0.0.0.0 --port 4000"], + ["exec uvicorn gateway.main:app --host 0.0.0.0 --port 4000 --workers ${var.gateway_num_workers}"], )) backend_args = join(" && ", concat( @@ -139,6 +141,7 @@ resource "google_cloud_run_v2_service" "gateway" { name = "${local.name}-gateway" location = var.region ingress = "INGRESS_TRAFFIC_INTERNAL_LOAD_BALANCER" + labels = local.labels template { service_account = google_service_account.runtime.email @@ -251,6 +254,7 @@ resource "google_cloud_run_v2_service" "backend" { name = "${local.name}-backend" location = var.region ingress = "INGRESS_TRAFFIC_INTERNAL_LOAD_BALANCER" + labels = local.labels template { service_account = google_service_account.runtime.email @@ -365,6 +369,7 @@ resource "google_cloud_run_v2_service" "ui" { name = "${local.name}-ui" location = var.region ingress = "INGRESS_TRAFFIC_INTERNAL_LOAD_BALANCER" + labels = local.labels template { service_account = google_service_account.ui_runtime.email @@ -438,6 +443,7 @@ resource "google_cloud_run_v2_service_iam_member" "ui_allusers" { resource "google_cloud_run_v2_job" "migrations" { name = "${local.name}-migrations" location = var.region + labels = local.labels template { template { diff --git a/terraform/litellm/gcp/cloudsql.tf b/terraform/litellm/gcp/cloudsql.tf index e3394fefc0f..0af15eec22f 100644 --- a/terraform/litellm/gcp/cloudsql.tf +++ b/terraform/litellm/gcp/cloudsql.tf @@ -26,6 +26,8 @@ resource "google_sql_database_instance" "writer" { disk_size = 20 disk_autoresize = true + user_labels = local.labels + backup_configuration { enabled = true point_in_time_recovery_enabled = true @@ -70,6 +72,8 @@ resource "google_sql_database_instance" "reader" { availability_type = "ZONAL" disk_autoresize = true + user_labels = local.labels + ip_configuration { ipv4_enabled = false private_network = google_compute_network.this.id @@ -106,6 +110,7 @@ resource "google_sql_user" "app" { resource "google_secret_manager_secret" "db_password" { secret_id = "${local.name}-db-password" + labels = local.labels replication { auto {} } diff --git a/terraform/litellm/gcp/examples/default/terraform.tfvars.example b/terraform/litellm/gcp/examples/default/terraform.tfvars.example index 7be8cec062a..6358ec96e6d 100644 --- a/terraform/litellm/gcp/examples/default/terraform.tfvars.example +++ b/terraform/litellm/gcp/examples/default/terraform.tfvars.example @@ -77,9 +77,11 @@ env = "stage" # } # ---------- OpenTelemetry v2 ---------- -# LITELLM_OTEL_V2=true is always set on the gateway and backend; nothing -# exports until you point them at a collector. To enable export, set -# otel_endpoint (and optionally otel_exporter, otel_headers_secret, -# otel_environment_name, otel_capture_message_content) directly on the -# `module "litellm"` block in main.tf — these aren't wrapper vars in this -# example. Full docs in ../../variables.tf. +# OTel is gated on otel_endpoint: empty (default) and nothing is added to +# the container env; set it and both gateway and backend gain +# LITELLM_OTEL_V2=true plus the OTEL_* block (with OTEL_SERVICE_NAME +# stamped per component). These knobs aren't surfaced as wrapper vars in +# this example; set them directly on the `module "litellm"` block in +# main.tf (otel_endpoint, otel_exporter, otel_environment_name, +# otel_capture_message_content, otel_headers_secret). Full docs in +# ../../variables.tf. diff --git a/terraform/litellm/gcp/gcs.tf b/terraform/litellm/gcp/gcs.tf index 43ee6c7b2f6..3ba1f482219 100644 --- a/terraform/litellm/gcp/gcs.tf +++ b/terraform/litellm/gcp/gcs.tf @@ -18,7 +18,7 @@ resource "google_storage_bucket" "this" { public_access_prevention = "enforced" - labels = var.labels + labels = local.labels } # Cloud Run runtime SA gains object admin on this bucket only. @@ -48,7 +48,7 @@ resource "google_storage_bucket" "proxy_config" { public_access_prevention = "enforced" - labels = var.labels + labels = local.labels } resource "google_storage_bucket_object" "proxy_config" { diff --git a/terraform/litellm/gcp/load_balancer.tf b/terraform/litellm/gcp/load_balancer.tf index 3fce96eaf74..11f30d0f944 100644 --- a/terraform/litellm/gcp/load_balancer.tf +++ b/terraform/litellm/gcp/load_balancer.tf @@ -14,7 +14,8 @@ locals { } resource "google_compute_global_address" "lb" { - name = "${local.name}-lb-ip" + name = "${local.name}-lb-ip" + labels = local.labels } # Serverless NEGs — one per Cloud Run service. @@ -148,6 +149,7 @@ resource "google_compute_global_forwarding_rule" "http" { load_balancing_scheme = "EXTERNAL_MANAGED" ip_address = google_compute_global_address.lb.address target = google_compute_target_http_proxy.this.id + labels = local.labels } # ---------- HTTPS (gated on var.lb_domains) ---------- @@ -193,4 +195,5 @@ resource "google_compute_global_forwarding_rule" "https" { load_balancing_scheme = "EXTERNAL_MANAGED" ip_address = google_compute_global_address.lb.address target = google_compute_target_https_proxy.this[0].id + labels = local.labels } diff --git a/terraform/litellm/gcp/locals.tf b/terraform/litellm/gcp/locals.tf index a43920ed31d..732b4ce7d6b 100644 --- a/terraform/litellm/gcp/locals.tf +++ b/terraform/litellm/gcp/locals.tf @@ -8,6 +8,19 @@ locals { # the stack can reference local.name. name = "${var.tenant}-litellm-${var.env}" + # Mirrors the AWS stack's local.tags: the module stamps its own + # `litellm-stack` / `managed-by` labels onto every label-supporting + # resource (Cloud Run, Cloud SQL, Memorystore, Secret Manager, GCS) and + # merges var.labels on top. GCP label keys/values are lower-kebab/snake + # only, so the key is `litellm-stack`, not AWS's `litellm:stack`. + labels = merge( + { + "litellm-stack" = local.name + "managed-by" = "terraform" + }, + var.labels, + ) + gateway_path_prefixes = [ "/v1/chat/*", "/chat/*", "/v1/completions*", "/completions*", diff --git a/terraform/litellm/gcp/redis.tf b/terraform/litellm/gcp/redis.tf index f7e174ecbae..0e07c416e85 100644 --- a/terraform/litellm/gcp/redis.tf +++ b/terraform/litellm/gcp/redis.tf @@ -9,6 +9,8 @@ resource "google_redis_instance" "this" { redis_version = "REDIS_7_0" + labels = local.labels + # In-transit encryption between Cloud Run and Memorystore. The instance # exposes its self-signed CA via `server_ca_certs` (read in cloudrun.tf # and passed to the proxy as REDIS_CA_PEM_B64); the proxy decodes it to diff --git a/terraform/litellm/gcp/secrets.tf b/terraform/litellm/gcp/secrets.tf index 80312e06a91..f93514bb70b 100644 --- a/terraform/litellm/gcp/secrets.tf +++ b/terraform/litellm/gcp/secrets.tf @@ -10,6 +10,7 @@ resource "random_password" "master_key" { # account gets accessor permission on it (see iam.tf). resource "google_secret_manager_secret" "master_key" { secret_id = "${local.name}-master-key" + labels = local.labels replication { auto {} } @@ -29,6 +30,7 @@ resource "google_secret_manager_secret" "license" { count = var.litellm_license == "" ? 0 : 1 secret_id = "${local.name}-license" + labels = local.labels replication { auto {} } @@ -49,6 +51,7 @@ resource "google_secret_manager_secret" "ui_password" { count = var.ui_password == "" ? 0 : 1 secret_id = "${local.name}-ui-password" + labels = local.labels replication { auto {} } diff --git a/terraform/litellm/gcp/variables.tf b/terraform/litellm/gcp/variables.tf index 798ded525cd..4355192e9f1 100644 --- a/terraform/litellm/gcp/variables.tf +++ b/terraform/litellm/gcp/variables.tf @@ -30,11 +30,9 @@ variable "env" { } variable "labels" { - description = "Resource labels merged into every label-supporting resource." + description = "Per-deployment labels applied to every label-supporting resource the module creates, on top of the module's own `litellm-stack` / `managed-by` labels. Mirrors the AWS stack's `tags` input." type = map(string) - default = { - "managed-by" = "terraform" - } + default = {} } # ---------- Tenant-supplied secrets ---------- @@ -171,6 +169,17 @@ variable "gateway_memory" { default = "4Gi" } +variable "gateway_num_workers" { + description = "uvicorn worker processes per gateway instance (passed as --workers). Size relative to gateway_cpu — uvicorn recommends ~(2 × vCPU) + 1 for CPU-bound work. Mirrors the AWS stack's gateway_num_workers." + type = number + default = 1 + + validation { + condition = var.gateway_num_workers >= 1 + error_message = "gateway_num_workers must be >= 1." + } +} + # Cloud Run autoscales out of the box (request-rate driven). The min/max # bounds mirror the HPA replica bounds in helm/litellm/values.yaml so each # stack scales over the same range. Cloud Run has no direct CPU-utilization @@ -411,19 +420,20 @@ variable "proxy_config" { # # https://docs.litellm.ai/docs/observability/opentelemetry_v2 # -# OTel v2 is wired into the gateway and backend by default (LITELLM_OTEL_V2=true -# is added to shared_env) but is dormant until otel_endpoint is non-empty. Leave -# otel_endpoint = "" to ship without any OTel exporter; the proxy will boot with -# the flag flipped on but no destination configured, which is functionally -# equivalent to "off" because nothing is exported. +# OTel v2 is opt-in and gated entirely on otel_endpoint, matching the AWS +# stack. Leave otel_endpoint = "" and nothing OTel-related is added to the +# container env. Set it and the gateway/backend gain LITELLM_OTEL_V2=true +# plus the OTEL_* block (per-component OTEL_SERVICE_NAME, exporter, endpoint, +# environment name, capture-content), with OTEL_HEADERS sourced from +# otel_headers_secret when provided. variable "otel_endpoint" { description = <<-EOT OTLP collector URL (e.g. https://otel.example.com:4318 for HTTP, or - your collector's :4317 for gRPC). When empty the gateway/backend boot - with LITELLM_OTEL_V2=true but no exporter wired in, which is - functionally off. When set, OTEL_EXPORTER and OTEL_ENDPOINT are - injected and spans ship to the collector. + your collector's :4317 for gRPC). Empty disables OTel entirely (no + LITELLM_OTEL_V2, no OTEL_* env). When set, LITELLM_OTEL_V2=true plus + OTEL_EXPORTER / OTEL_ENDPOINT are injected and spans ship to the + collector. EOT type = string default = "" From 1edeba1a31cb848635e344283a5ba3321a4b5b32 Mon Sep 17 00:00:00 2001 From: Yassin Kortam Date: Sat, 6 Jun 2026 12:44:10 -0700 Subject: [PATCH 10/10] fix(terraform/aws): expose skip_final_snapshot through the default example The example wrapper already exposed `s3_force_destroy` so ephemeral / CI stacks could destroy the S3 bucket without manual cleanup, but the matching Aurora knob (`skip_final_snapshot`) was hidden behind the module surface. That meant a `terraform destroy` on a trial stack still produced a `-final-` snapshot, with no opt-out short of editing the module call. Adds `var.skip_final_snapshot` to the example (default `false`, preserving the data-loss tripwire) and threads it through to the module input, mirroring the existing `s3_force_destroy` pattern. Documented alongside it in the tfvars example. Verified by deploying the example end-to-end against a clean AWS account (VPC + Aurora w/ IAM auth + Redis + ALB + 3 ECS services), confirming all services reach steady state and the data plane serves traffic, then running `terraform destroy` with `skip_final_snapshot = true` to a clean teardown (93 destroyed, no Aurora snapshot left behind, no leftover billable resources). --- terraform/litellm/aws/examples/default/main.tf | 1 + .../litellm/aws/examples/default/terraform.tfvars.example | 6 ++++-- terraform/litellm/aws/examples/default/variables.tf | 6 ++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/terraform/litellm/aws/examples/default/main.tf b/terraform/litellm/aws/examples/default/main.tf index 0cbd48701aa..3d421099aed 100644 --- a/terraform/litellm/aws/examples/default/main.tf +++ b/terraform/litellm/aws/examples/default/main.tf @@ -31,6 +31,7 @@ module "litellm" { acm_certificate_arn = var.acm_certificate_arn allow_plaintext_alb = var.allow_plaintext_alb s3_force_destroy = var.s3_force_destroy + skip_final_snapshot = var.skip_final_snapshot proxy_config = var.proxy_config gateway_extra_env = var.gateway_extra_env diff --git a/terraform/litellm/aws/examples/default/terraform.tfvars.example b/terraform/litellm/aws/examples/default/terraform.tfvars.example index 3952b06c6a9..4fdfb47e678 100644 --- a/terraform/litellm/aws/examples/default/terraform.tfvars.example +++ b/terraform/litellm/aws/examples/default/terraform.tfvars.example @@ -23,8 +23,10 @@ env = "stage" # allow_plaintext_alb = true # Storage retention: false (default) makes `terraform destroy` refuse on a -# non-empty bucket. Flip to true only for ephemeral / CI stacks. -# s3_force_destroy = false +# non-empty bucket / take an Aurora final snapshot. Flip to true only for +# ephemeral / CI stacks where you accept losing the data. +# s3_force_destroy = false +# skip_final_snapshot = false # Component images and per-task sizing/autoscaling are NOT exposed as # variables in this example (it keeps the curated surface small). They diff --git a/terraform/litellm/aws/examples/default/variables.tf b/terraform/litellm/aws/examples/default/variables.tf index f8950ca2eca..74522118a93 100644 --- a/terraform/litellm/aws/examples/default/variables.tf +++ b/terraform/litellm/aws/examples/default/variables.tf @@ -67,6 +67,12 @@ variable "s3_force_destroy" { default = false } +variable "skip_final_snapshot" { + description = "Skip the Aurora final snapshot on destroy (ephemeral/CI only)." + type = bool + default = false +} + variable "proxy_config" { description = "LiteLLM proxy config (contents of config.yaml). Empty → defaults." type = any