From bae40a4804d76ac1e8764f074e8bd270c5a81880 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 14:44:14 -0400 Subject: [PATCH 01/15] install external secret manager with helm --- .../stacks/dpe-k8s-deployments/main.tf | 6 + modules/argo-cd/main.tf | 1 + modules/external-secrets/main.tf | 44 ++ .../external-secrets/templates/values.yaml | 610 ++++++++++++++++++ modules/external-secrets/variables.tf | 23 + 5 files changed, 684 insertions(+) create mode 100644 modules/external-secrets/main.tf create mode 100644 modules/external-secrets/templates/values.yaml create mode 100644 modules/external-secrets/variables.tf diff --git a/deployments/stacks/dpe-k8s-deployments/main.tf b/deployments/stacks/dpe-k8s-deployments/main.tf index 16c39074..23453b8b 100644 --- a/deployments/stacks/dpe-k8s-deployments/main.tf +++ b/deployments/stacks/dpe-k8s-deployments/main.tf @@ -29,6 +29,12 @@ module "argo-cd" { source = "../../../modules/argo-cd" } +module "external_secrets" { + source = "../../modules/external-secrets" + region = var.region + aws_account_id = var.aws_account_id +} + module "flux-cd" { depends_on = [module.sage-aws-eks-autoscaler] source = "../../../modules/flux-cd" diff --git a/modules/argo-cd/main.tf b/modules/argo-cd/main.tf index 3fc30291..6c2d0d53 100644 --- a/modules/argo-cd/main.tf +++ b/modules/argo-cd/main.tf @@ -14,3 +14,4 @@ resource "helm_release" "argo-cd" { values = [templatefile("${path.module}/templates/values.yaml", {})] } + diff --git a/modules/external-secrets/main.tf b/modules/external-secrets/main.tf new file mode 100644 index 00000000..14187791 --- /dev/null +++ b/modules/external-secrets/main.tf @@ -0,0 +1,44 @@ +resource "kubernetes_namespace" "external_secrets" { + metadata { name = var.namespace } +} + + +# Argo CD Application that installs ESO from the official Helm repo +resource "kubectl_manifest" "external_secrets_app" { + yaml_body = < Date: Fri, 8 Aug 2025 14:50:16 -0400 Subject: [PATCH 02/15] use variable to control prune --- modules/external-secrets/main.tf | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/modules/external-secrets/main.tf b/modules/external-secrets/main.tf index 14187791..a36e1c27 100644 --- a/modules/external-secrets/main.tf +++ b/modules/external-secrets/main.tf @@ -15,9 +15,11 @@ metadata: argocd.argoproj.io/sync-wave: "0" spec: project: default + %{if var.auto_deploy} syncPolicy: automated: - prune: true + prune: ${var.auto_prune} + %{endif} sources: - repoURL: 'https://charts.external-secrets.io' chart: external-secrets @@ -33,12 +35,4 @@ spec: server: 'https://kubernetes.default.svc' namespace: external-secrets YAML -} - -resource "local_file" "eso_values" { - content = templatefile( - "${path.module}/templates/values.yaml.tmpl", - { account_id = data.aws_caller_identity.current.account_id } - ) - filename = "${path.module}/templates/values.yaml" -} +} \ No newline at end of file From 8b7cd88383530fe7ebe246cdff138f3ffa07829c Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 14:53:58 -0400 Subject: [PATCH 03/15] update module directory --- deployments/stacks/dpe-k8s-deployments/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployments/stacks/dpe-k8s-deployments/main.tf b/deployments/stacks/dpe-k8s-deployments/main.tf index 23453b8b..4a1104f0 100644 --- a/deployments/stacks/dpe-k8s-deployments/main.tf +++ b/deployments/stacks/dpe-k8s-deployments/main.tf @@ -30,7 +30,7 @@ module "argo-cd" { } module "external_secrets" { - source = "../../modules/external-secrets" + source = "../../../modules/external-secrets" region = var.region aws_account_id = var.aws_account_id } From c019568f9831461275ef6b03a1c8a771aee64322 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 15:59:07 -0400 Subject: [PATCH 04/15] add version.tf --- modules/external-secrets/versions.tf | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 modules/external-secrets/versions.tf diff --git a/modules/external-secrets/versions.tf b/modules/external-secrets/versions.tf new file mode 100644 index 00000000..9f47d757 --- /dev/null +++ b/modules/external-secrets/versions.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 5.0" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "~> 2.0" + } + kubectl = { + source = "gavinbunney/kubectl" + version = ">= 1.16.0" + } + } +} From 2baba687f4134303be663cca9da2d7c0fbd11499 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 16:06:34 -0400 Subject: [PATCH 05/15] add region and aws_account_id variable --- modules/external-secrets/variables.tf | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/external-secrets/variables.tf b/modules/external-secrets/variables.tf index 7e3c87f1..50b6e4d2 100644 --- a/modules/external-secrets/variables.tf +++ b/modules/external-secrets/variables.tf @@ -21,3 +21,12 @@ variable "namespace" { type = string } +variable "region" { + description = "AWS region for External Secrets" + type = string +} + +variable "aws_account_id" { + description = "AWS account ID for IRSA role" + type = string +} \ No newline at end of file From ce26095d1f8e55972158af3fddecdafc675a6057 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 16:42:12 -0400 Subject: [PATCH 06/15] add a default namespace --- modules/external-secrets/variables.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/external-secrets/variables.tf b/modules/external-secrets/variables.tf index 50b6e4d2..00a4b77d 100644 --- a/modules/external-secrets/variables.tf +++ b/modules/external-secrets/variables.tf @@ -19,6 +19,7 @@ variable "auto_prune" { variable "namespace" { description = "The namespace to deploy into" type = string + default = "external_secrets" } variable "region" { From fb515f0364b3cf1829537d7e40368eb2038e852c Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 16:57:17 -0400 Subject: [PATCH 07/15] external secrets name space --- deployments/stacks/dpe-k8s-deployments/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/deployments/stacks/dpe-k8s-deployments/main.tf b/deployments/stacks/dpe-k8s-deployments/main.tf index 4a1104f0..de4b0b55 100644 --- a/deployments/stacks/dpe-k8s-deployments/main.tf +++ b/deployments/stacks/dpe-k8s-deployments/main.tf @@ -33,6 +33,7 @@ module "external_secrets" { source = "../../../modules/external-secrets" region = var.region aws_account_id = var.aws_account_id + namespace = "external-secrets" } module "flux-cd" { From 099760675d6d68393f4812aa2f82b4f39e58bac8 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 17:00:45 -0400 Subject: [PATCH 08/15] revert to use 1.32 as cluster version --- modules/sage-aws-eks/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sage-aws-eks/variables.tf b/modules/sage-aws-eks/variables.tf index 1edd27b7..7c7e85ed 100644 --- a/modules/sage-aws-eks/variables.tf +++ b/modules/sage-aws-eks/variables.tf @@ -6,7 +6,7 @@ variable "cluster_name" { variable "cluster_version" { description = "Version of K8 cluster" type = string - default = "1.33" + default = "1.32" } variable "region" { From fe990317b95a422bb2963250a3d548b2fe2721d6 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 17:04:29 -0400 Subject: [PATCH 09/15] use external-secret instead of external_secret --- deployments/stacks/dpe-k8s-deployments/main.tf | 2 +- modules/external-secrets/main.tf | 4 ++-- modules/external-secrets/variables.tf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/deployments/stacks/dpe-k8s-deployments/main.tf b/deployments/stacks/dpe-k8s-deployments/main.tf index de4b0b55..31a1eb05 100644 --- a/deployments/stacks/dpe-k8s-deployments/main.tf +++ b/deployments/stacks/dpe-k8s-deployments/main.tf @@ -29,7 +29,7 @@ module "argo-cd" { source = "../../../modules/argo-cd" } -module "external_secrets" { +module "external-secrets" { source = "../../../modules/external-secrets" region = var.region aws_account_id = var.aws_account_id diff --git a/modules/external-secrets/main.tf b/modules/external-secrets/main.tf index a36e1c27..da2176b1 100644 --- a/modules/external-secrets/main.tf +++ b/modules/external-secrets/main.tf @@ -1,10 +1,10 @@ -resource "kubernetes_namespace" "external_secrets" { +resource "kubernetes_namespace" "external-secrets" { metadata { name = var.namespace } } # Argo CD Application that installs ESO from the official Helm repo -resource "kubectl_manifest" "external_secrets_app" { +resource "kubectl_manifest" "external-secrets-app" { yaml_body = < Date: Fri, 8 Aug 2025 17:16:40 -0400 Subject: [PATCH 10/15] try patch because of IAM error due to using the wrong cluster version --- modules/sage-aws-k8s-node-autoscaler/data.tf | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/modules/sage-aws-k8s-node-autoscaler/data.tf b/modules/sage-aws-k8s-node-autoscaler/data.tf index 2d884fa9..37dfcc7f 100644 --- a/modules/sage-aws-k8s-node-autoscaler/data.tf +++ b/modules/sage-aws-k8s-node-autoscaler/data.tf @@ -14,3 +14,17 @@ data "aws_secretsmanager_secret_version" "secret_credentials" { secret_id = data.aws_secretsmanager_secret.spotinst_token.id } +# patch +# Discover the cluster version (or let a var override it) +data "aws_eks_cluster" "this" { + name = var.cluster_name +} + +locals { + k8s_version = coalesce(var.cluster_version, data.aws_eks_cluster.this.version) # e.g., "1.29" +} + +# Amazon Linux 2 EKS-optimized AMI via SSM (non-GPU) +data "aws_ssm_parameter" "eks_worker_ami" { + name = "/aws/service/eks/optimized-ami/${local.k8s_version}/amazon-linux-2/recommended/image_id" +} \ No newline at end of file From fa096fb3a8162ffd04bc61d48974a728b4358d1a Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 17:21:53 -0400 Subject: [PATCH 11/15] move patch to main.tf --- modules/sage-aws-k8s-node-autoscaler/data.tf | 15 --------------- modules/sage-aws-k8s-node-autoscaler/main.tf | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/modules/sage-aws-k8s-node-autoscaler/data.tf b/modules/sage-aws-k8s-node-autoscaler/data.tf index 37dfcc7f..c1d0e997 100644 --- a/modules/sage-aws-k8s-node-autoscaler/data.tf +++ b/modules/sage-aws-k8s-node-autoscaler/data.tf @@ -12,19 +12,4 @@ data "aws_secretsmanager_secret" "spotinst_token" { data "aws_secretsmanager_secret_version" "secret_credentials" { secret_id = data.aws_secretsmanager_secret.spotinst_token.id -} - -# patch -# Discover the cluster version (or let a var override it) -data "aws_eks_cluster" "this" { - name = var.cluster_name -} - -locals { - k8s_version = coalesce(var.cluster_version, data.aws_eks_cluster.this.version) # e.g., "1.29" -} - -# Amazon Linux 2 EKS-optimized AMI via SSM (non-GPU) -data "aws_ssm_parameter" "eks_worker_ami" { - name = "/aws/service/eks/optimized-ami/${local.k8s_version}/amazon-linux-2/recommended/image_id" } \ No newline at end of file diff --git a/modules/sage-aws-k8s-node-autoscaler/main.tf b/modules/sage-aws-k8s-node-autoscaler/main.tf index 2d6fd448..a8cdd3d3 100644 --- a/modules/sage-aws-k8s-node-autoscaler/main.tf +++ b/modules/sage-aws-k8s-node-autoscaler/main.tf @@ -104,6 +104,23 @@ resource "helm_release" "ocean-kubernetes-controller" { } +# -----------patch------------------------- +# --- Discover the current cluster version --- +data "aws_eks_cluster" "this" { + name = var.cluster_name +} + +locals { + # Use var.cluster_version if set, else detect from live cluster + k8s_version = coalesce(var.cluster_version, data.aws_eks_cluster.this.version) +} + +# --- Lookup recommended AL2 AMI from SSM --- +data "aws_ssm_parameter" "eks_worker_ami" { + name = "/aws/service/eks/optimized-ami/${local.k8s_version}/amazon-linux-2/recommended/image_id" +} +# -----------patch------------------------- + module "ocean-aws-k8s" { source = "spotinst/ocean-aws-k8s/spotinst" version = "1.4.0" From 5ecd582cb54fe65ba101a276281a7ee2f7b8cebb Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 18:47:54 -0400 Subject: [PATCH 12/15] add an environment variable to control if auto scaler get deployed; add a env variable in PR run in spacelift to skip it for now --- modules/sage-aws-k8s-node-autoscaler/main.tf | 19 +------------------ .../sage-aws-k8s-node-autoscaler/variables.tf | 5 +++++ 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/modules/sage-aws-k8s-node-autoscaler/main.tf b/modules/sage-aws-k8s-node-autoscaler/main.tf index a8cdd3d3..a7465ed6 100644 --- a/modules/sage-aws-k8s-node-autoscaler/main.tf +++ b/modules/sage-aws-k8s-node-autoscaler/main.tf @@ -103,25 +103,8 @@ resource "helm_release" "ocean-kubernetes-controller" { } } - -# -----------patch------------------------- -# --- Discover the current cluster version --- -data "aws_eks_cluster" "this" { - name = var.cluster_name -} - -locals { - # Use var.cluster_version if set, else detect from live cluster - k8s_version = coalesce(var.cluster_version, data.aws_eks_cluster.this.version) -} - -# --- Lookup recommended AL2 AMI from SSM --- -data "aws_ssm_parameter" "eks_worker_ami" { - name = "/aws/service/eks/optimized-ami/${local.k8s_version}/amazon-linux-2/recommended/image_id" -} -# -----------patch------------------------- - module "ocean-aws-k8s" { + count = var.enable_autoscaler ? 1 : 0 source = "spotinst/ocean-aws-k8s/spotinst" version = "1.4.0" diff --git a/modules/sage-aws-k8s-node-autoscaler/variables.tf b/modules/sage-aws-k8s-node-autoscaler/variables.tf index adbe0837..9cabc1e6 100644 --- a/modules/sage-aws-k8s-node-autoscaler/variables.tf +++ b/modules/sage-aws-k8s-node-autoscaler/variables.tf @@ -54,3 +54,8 @@ variable "single_az" { description = "Single AZ" type = bool } + +variable "enable_autoscaler" { + type = bool + default = true +} \ No newline at end of file From 7e80f4c03681560e4b1ee10813ba478c66215f68 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 18:58:56 -0400 Subject: [PATCH 13/15] try to guard the root --- deployments/stacks/dpe-k8s-deployments/main.tf | 1 + deployments/stacks/dpe-k8s-deployments/variables.tf | 5 +++++ modules/sage-aws-k8s-node-autoscaler/main.tf | 1 - modules/sage-aws-k8s-node-autoscaler/variables.tf | 5 ----- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/deployments/stacks/dpe-k8s-deployments/main.tf b/deployments/stacks/dpe-k8s-deployments/main.tf index 31a1eb05..4400ff21 100644 --- a/deployments/stacks/dpe-k8s-deployments/main.tf +++ b/deployments/stacks/dpe-k8s-deployments/main.tf @@ -2,6 +2,7 @@ locals { git_revision = var.git_revision } module "sage-aws-eks-autoscaler" { + count = var.enable_autoscaler ? 1 : 0 source = "spacelift.io/sagebionetworks/sage-aws-eks-autoscaler/aws" version = "0.9.0" cluster_name = var.cluster_name diff --git a/deployments/stacks/dpe-k8s-deployments/variables.tf b/deployments/stacks/dpe-k8s-deployments/variables.tf index 69961893..107185d8 100644 --- a/deployments/stacks/dpe-k8s-deployments/variables.tf +++ b/deployments/stacks/dpe-k8s-deployments/variables.tf @@ -119,3 +119,8 @@ variable "docker_access_token" { type = string default = "" } + +variable "enable_autoscaler" { + type = bool + default = true +} diff --git a/modules/sage-aws-k8s-node-autoscaler/main.tf b/modules/sage-aws-k8s-node-autoscaler/main.tf index a7465ed6..f7cc9111 100644 --- a/modules/sage-aws-k8s-node-autoscaler/main.tf +++ b/modules/sage-aws-k8s-node-autoscaler/main.tf @@ -104,7 +104,6 @@ resource "helm_release" "ocean-kubernetes-controller" { } module "ocean-aws-k8s" { - count = var.enable_autoscaler ? 1 : 0 source = "spotinst/ocean-aws-k8s/spotinst" version = "1.4.0" diff --git a/modules/sage-aws-k8s-node-autoscaler/variables.tf b/modules/sage-aws-k8s-node-autoscaler/variables.tf index 9cabc1e6..4455eada 100644 --- a/modules/sage-aws-k8s-node-autoscaler/variables.tf +++ b/modules/sage-aws-k8s-node-autoscaler/variables.tf @@ -53,9 +53,4 @@ variable "desired_capacity" { variable "single_az" { description = "Single AZ" type = bool -} - -variable "enable_autoscaler" { - type = bool - default = true } \ No newline at end of file From 29bcdf5f53637036cc48b2a73b9b5be4cfa0ffa4 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Fri, 8 Aug 2025 23:44:07 -0400 Subject: [PATCH 14/15] try use a different source for ocean aws k8s; remove temporary patch --- deployments/stacks/dpe-k8s-deployments/main.tf | 1 - deployments/stacks/dpe-k8s-deployments/variables.tf | 7 +------ modules/sage-aws-k8s-node-autoscaler/main.tf | 2 +- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/deployments/stacks/dpe-k8s-deployments/main.tf b/deployments/stacks/dpe-k8s-deployments/main.tf index 4400ff21..31a1eb05 100644 --- a/deployments/stacks/dpe-k8s-deployments/main.tf +++ b/deployments/stacks/dpe-k8s-deployments/main.tf @@ -2,7 +2,6 @@ locals { git_revision = var.git_revision } module "sage-aws-eks-autoscaler" { - count = var.enable_autoscaler ? 1 : 0 source = "spacelift.io/sagebionetworks/sage-aws-eks-autoscaler/aws" version = "0.9.0" cluster_name = var.cluster_name diff --git a/deployments/stacks/dpe-k8s-deployments/variables.tf b/deployments/stacks/dpe-k8s-deployments/variables.tf index 107185d8..e986cc74 100644 --- a/deployments/stacks/dpe-k8s-deployments/variables.tf +++ b/deployments/stacks/dpe-k8s-deployments/variables.tf @@ -118,9 +118,4 @@ variable "docker_access_token" { description = "The access token to use for docker authenticated pulls. Created via by setting 'TF_VAR_docker_access_token' within spacelift as an environment variable" type = string default = "" -} - -variable "enable_autoscaler" { - type = bool - default = true -} +} \ No newline at end of file diff --git a/modules/sage-aws-k8s-node-autoscaler/main.tf b/modules/sage-aws-k8s-node-autoscaler/main.tf index f7cc9111..dcb0135d 100644 --- a/modules/sage-aws-k8s-node-autoscaler/main.tf +++ b/modules/sage-aws-k8s-node-autoscaler/main.tf @@ -104,7 +104,7 @@ resource "helm_release" "ocean-kubernetes-controller" { } module "ocean-aws-k8s" { - source = "spotinst/ocean-aws-k8s/spotinst" + source = "git::https://github.com/spotinst/terraform-spotinst-ocean-aws-k8s.git?ref=7a30a60b4d0af3a7847467ac373511f3da58e40a" version = "1.4.0" # Configuration From 6016818053b51f624f938f80648365ee7b2c0da8 Mon Sep 17 00:00:00 2001 From: Lingling Peng Date: Mon, 11 Aug 2025 10:25:13 -0400 Subject: [PATCH 15/15] remove quotes; remove version --- modules/sage-aws-k8s-node-autoscaler/main.tf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/sage-aws-k8s-node-autoscaler/main.tf b/modules/sage-aws-k8s-node-autoscaler/main.tf index dcb0135d..3655d9c9 100644 --- a/modules/sage-aws-k8s-node-autoscaler/main.tf +++ b/modules/sage-aws-k8s-node-autoscaler/main.tf @@ -104,8 +104,7 @@ resource "helm_release" "ocean-kubernetes-controller" { } module "ocean-aws-k8s" { - source = "git::https://github.com/spotinst/terraform-spotinst-ocean-aws-k8s.git?ref=7a30a60b4d0af3a7847467ac373511f3da58e40a" - version = "1.4.0" + source = git::https://github.com/spotinst/terraform-spotinst-ocean-aws-k8s.git?ref=7a30a60b4d0af3a7847467ac373511f3da58e40a # Configuration cluster_name = var.cluster_name