diff --git a/modules/terraform/azure/main.tf b/modules/terraform/azure/main.tf index 76e278c0d5..6090a91b28 100644 --- a/modules/terraform/azure/main.tf +++ b/modules/terraform/azure/main.tf @@ -13,14 +13,17 @@ locals { aks_aad_enabled = lookup(var.json_input, "aks_aad_enabled", false) enable_apiserver_vnet_integration = lookup(var.json_input, "enable_apiserver_vnet_integration", false) - tags = { - "owner" = var.owner - "scenario" = "${var.scenario_type}-${var.scenario_name}" - "creation_time" = timestamp() - "deletion_due_time" = timeadd(timestamp(), var.deletion_delay) - "run_id" = local.run_id - "SkipAKSCluster" = "1" - } + tags = merge( + var.tags, + { + "owner" = var.owner + "scenario" = "${var.scenario_type}-${var.scenario_name}" + "creation_time" = timestamp() + "deletion_due_time" = timeadd(timestamp(), var.deletion_delay) + "run_id" = local.run_id + "SkipAKSCluster" = "1" + } + ) network_config_map = { for network in var.network_config_list : network.role => network } diff --git a/modules/terraform/azure/variables.tf b/modules/terraform/azure/variables.tf index 8495bd64df..c1dea6d316 100644 --- a/modules/terraform/azure/variables.tf +++ b/modules/terraform/azure/variables.tf @@ -72,6 +72,12 @@ variable "deletion_delay" { default = "2h" } +variable "tags" { + description = "Optional tags to apply to all resources" + type = map(string) + default = {} +} + variable "public_ip_config_list" { description = "A list of public IP names" type = list(object({ diff --git a/pipelines/perf-eval/Secure TLS Bootstrap Benchmark/node-auto-provisioning.yml b/pipelines/perf-eval/Secure TLS Bootstrap Benchmark/node-auto-provisioning.yml new file mode 100644 index 0000000000..91f776130b --- /dev/null +++ b/pipelines/perf-eval/Secure TLS Bootstrap Benchmark/node-auto-provisioning.yml @@ -0,0 +1,52 @@ +trigger: none +schedules: + # Azure Large Scale Schedule + - cron: "0 3 * * *" + displayName: "3:00 AM Daily" + branches: + include: + - main + always: true + +variables: + SCENARIO_TYPE: perf-eval + SCENARIO_NAME: stls-bootstrap-nap + +stages: + - stage: azure_westus2_large + condition: | + or( + eq(variables['Build.CronSchedule.DisplayName'], 'Every day at 6:00 AM'), + eq(variables['Build.Reason'], 'Manual') + ) + dependsOn: [] + jobs: + - template: /jobs/competitive-test.yml + parameters: + cloud: azure + regions: + - westus2 + terraform_input_file_mapping: + - westus2: "scenarios/perf-eval/stls-bootstrap-nap/terraform-inputs/azure.tfvars" + engine: clusterloader2 + engine_input: + image: "ghcr.io/azure/clusterloader2:v20250423" + topology: karpenter + matrix: + large-scale-on-demand: + cpu_per_node: 2 + node_count: 1000 + pod_count: 1000 + scale_up_timeout: "60m" + scale_down_timeout: "60m" + node_label_selector: "karpenter.sh/nodepool = default" + node_selector: "{karpenter.sh/nodepool: default}" + loop_count: 1 + warmup_deployment: true + warmup_deployment_template: warmup_deployment.yaml + vm_size: Standard_D2ds_v4 + capacity_type: on-demand + max_parallel: 1 + timeout_in_minutes: 360 + credential_type: service_connection + ssh_key_enabled: false diff --git a/pipelines/system/new-pipeline-test.yml b/pipelines/system/new-pipeline-test.yml index 63d55f02d9..9e4cf1b9d6 100644 --- a/pipelines/system/new-pipeline-test.yml +++ b/pipelines/system/new-pipeline-test.yml @@ -1,25 +1,52 @@ trigger: none +schedules: + # Azure Large Scale Schedule + - cron: "0 3 * * *" + displayName: "3:00 AM Daily" + branches: + include: + - main + always: true variables: - SCENARIO_TYPE: - SCENARIO_NAME: + SCENARIO_TYPE: perf-eval + SCENARIO_NAME: stls-bootstrap-nap stages: - - stage: # format: [_]+ (e.g. azure_eastus2, aws_eastus_westus) + - stage: azure_westus2_large + condition: | + or( + eq(variables['Build.CronSchedule.DisplayName'], 'Every day at 6:00 AM'), + eq(variables['Build.Reason'], 'Manual') + ) dependsOn: [] jobs: - - template: /jobs/competitive-test.yml # must keep as is + - template: /jobs/competitive-test.yml parameters: - cloud: # e.g. azure, aws - regions: # list of regions - - region1 # e.g. eastus2 - topology: # e.g. cluster-autoscaler - engine: # e.g. clusterloader2 - matrix: # list of test parameters to customize the provisioned resources - : - : - : - max_parallel: # required - credential_type: service_connection # required + cloud: azure + regions: + - westus2 + terraform_input_file_mapping: + - westus2: "scenarios/perf-eval/stls-bootstrap-nap/terraform-inputs/azure.tfvars" + engine: clusterloader2 + engine_input: + image: "ghcr.io/azure/clusterloader2:v20250423" + topology: karpenter + matrix: + large-scale-on-demand: + cpu_per_node: 2 + node_count: 50 + pod_count: 50 + scale_up_timeout: "60m" + scale_down_timeout: "60m" + node_label_selector: "karpenter.sh/nodepool = default" + node_selector: "{karpenter.sh/nodepool: default}" + loop_count: 1 + warmup_deployment: true + warmup_deployment_template: warmup_deployment.yaml + vm_size: Standard_D2ds_v4 + capacity_type: on-demand + max_parallel: 1 + timeout_in_minutes: 360 + credential_type: service_connection ssh_key_enabled: false - timeout_in_minutes: 60 # if not specified, default is 60 diff --git a/scenarios/perf-eval/stls-bootstrap-nap/kubernetes/karpenter_nodepool.azure.yml b/scenarios/perf-eval/stls-bootstrap-nap/kubernetes/karpenter_nodepool.azure.yml new file mode 100644 index 0000000000..deb2653f35 --- /dev/null +++ b/scenarios/perf-eval/stls-bootstrap-nap/kubernetes/karpenter_nodepool.azure.yml @@ -0,0 +1,74 @@ +# Shared AKSNodeClass (common for both Spot and On-Demand) +--- +apiVersion: karpenter.azure.com/v1alpha2 +kind: AKSNodeClass +metadata: + name: default + annotations: + kubernetes.io/description: "General purpose AKSNodeClass for running Ubuntu2204 nodes" +spec: + imageFamily: Ubuntu2204 + +# On-Demand NodePool (default) +--- +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: default + annotations: + kubernetes.io/description: "General purpose On-Demand NodePool" +spec: + disruption: + consolidationPolicy: WhenEmpty + consolidateAfter: 1s + budgets: + - nodes: "100%" + template: + spec: + nodeClassRef: + group: karpenter.azure.com + kind: AKSNodeClass + name: default + expireAfter: Never + requirements: + - key: kubernetes.io/os + operator: In + values: ["linux"] + - key: karpenter.sh/capacity-type + operator: In + values: ["on-demand"] + - key: karpenter.azure.com/sku-name + operator: In + values: [Standard_D2_v5] + +# Spot NodePool +--- +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: spot + annotations: + kubernetes.io/description: "Spot NodePool for burstable cost-efficient workloads" +spec: + disruption: + consolidationPolicy: WhenEmpty + consolidateAfter: 1s + budgets: + - nodes: "100%" + template: + spec: + nodeClassRef: + group: karpenter.azure.com + kind: AKSNodeClass + name: default + expireAfter: Never + requirements: + - key: kubernetes.io/os + operator: In + values: ["linux"] + - key: karpenter.sh/capacity-type + operator: In + values: ["spot"] + - key: karpenter.azure.com/sku-name + operator: In + values: [Standard_D2_v5] diff --git a/scenarios/perf-eval/stls-bootstrap-nap/terraform-inputs/azure.tfvars b/scenarios/perf-eval/stls-bootstrap-nap/terraform-inputs/azure.tfvars new file mode 100644 index 0000000000..d3110ea868 --- /dev/null +++ b/scenarios/perf-eval/stls-bootstrap-nap/terraform-inputs/azure.tfvars @@ -0,0 +1,49 @@ +scenario_type = "perf-eval" +scenario_name = "stls-bootstrap-nap" +deletion_delay = "2h" +owner = "aks" +tags = { + enable-stls-nap = "True" +} + +aks_config_list = [] + +aks_cli_config_list = [ + { + role = "nap" + aks_name = "nap" + sku_tier = "standard" + aks_custom_headers = [ + "AKSHTTPCustomFeatures=Microsoft.ContainerService/EnableSecureTLSBootstrapping" + ] + kubernetes_version = "1.33" + default_node_pool = { + name = "system" + node_count = 5 + vm_size = "Standard_D4_v5" + } + extra_node_pool = [] + optional_parameters = [ + { + name = "node-provisioning-mode" + value = "Auto" + }, + { + name = "network-plugin" + value = "azure" + }, + { + name = "network-plugin-mode" + value = "overlay" + }, + { + name = "node-init-taints" + value = "CriticalAddonsOnly=true:NoSchedule" + }, + { + name = "pod-cidr" + value = "10.128.0.0/11" + } + ] + } +] \ No newline at end of file diff --git a/scenarios/perf-eval/stls-bootstrap-nap/terraform-test-inputs/azure.json b/scenarios/perf-eval/stls-bootstrap-nap/terraform-test-inputs/azure.json new file mode 100644 index 0000000000..fe80d2d474 --- /dev/null +++ b/scenarios/perf-eval/stls-bootstrap-nap/terraform-test-inputs/azure.json @@ -0,0 +1,4 @@ +{ + "run_id" : "123456789", + "region" : "westus2" +}