Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions modules/python/clusterloader2/cri/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ name: resource-consumer
{{$loadType := DefaultParam .CL2_LOAD_TYPE "memory"}}

{{$provider := DefaultParam .CL2_PROVIDER "aks"}}
{{$registry := DefaultParam .CL2_REGISTRY "akscritelescope" }}
{{$osType := DefaultParam .CL2_OS_TYPE "linux"}}
{{$scrapeKubelets := DefaultParam .CL2_SCRAPE_KUBELETS false}}
{{$hostNetwork := DefaultParam .CL2_HOST_NETWORK "true"}}
Expand Down Expand Up @@ -99,6 +100,7 @@ steps:
CPURequest: {{$cpu}}m
LoadType: {{$loadType}}
Provider: {{$provider}}
Registry: {{$registry}}
OSType: {{$osType}}
HostNetwork: {{$hostNetwork}}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
{{$CPURequest := DefaultParam .CPURequest "100m"}}
{{$LoadType := DefaultParam .LoadType "memory"}}
{{$Provider := DefaultParam .Provider "aks"}}
{{$Registry := DefaultParam .Registry "akscritelescope"}}
{{$OSType := DefaultParam .OSType "linux"}}
{{$HostNetwork := DefaultParam .HostNetwork "true"}}

Expand Down Expand Up @@ -32,7 +33,7 @@ spec:
- name: resource-consumer-memory
imagePullPolicy: IfNotPresent
{{if eq $OSType "windows"}}
image: akscritelescope.azurecr.io/e2e-test-images/resource-consumer:1.13-windows-amd64-ltsc2022
image: {{$Registry}}.azurecr.io/e2e-test-images/resource-consumer:1.13-windows-amd64-ltsc2022
command:
- testlimit.exe
args:
Expand All @@ -46,7 +47,7 @@ spec:
- "1"
{{else}}
{{if eq $Provider "aks"}}
image: akscritelescope.azurecr.io/e2e-test-images/resource-consumer:1.13
image: {{$Registry}}.azurecr.io/e2e-test-images/resource-consumer:1.13
{{else}}
image: registry.k8s.io/e2e-test-images/resource-consumer:1.13
{{end}}
Expand All @@ -70,12 +71,12 @@ spec:
- name: resource-consumer-cpu
imagePullPolicy: IfNotPresent
{{if eq $OSType "windows"}}
image: akscritelescope.azurecr.io/e2e-test-images/resource-consumer:1.13-windows-amd64-ltsc2022
image: {{$Registry}}.azurecr.io/e2e-test-images/resource-consumer:1.13-windows-amd64-ltsc2022
command:
- /consume-cpu/consume-cpu.exe
{{else}}
{{if eq $Provider "aks"}}
image: akscritelescope.azurecr.io/e2e-test-images/resource-consumer:1.13
image: {{$Registry}}.azurecr.io/e2e-test-images/resource-consumer:1.13
{{else}}
image: registry.k8s.io/e2e-test-images/resource-consumer:1.13
{{end}}
Expand Down
13 changes: 11 additions & 2 deletions modules/python/clusterloader2/cri/cri.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
def override_config_clusterloader2(
node_count, node_per_step, max_pods, repeats, operation_timeout,
load_type, scale_enabled, pod_startup_latency_threshold, provider,
os_type, scrape_kubelets, host_network, override_file):
registry, os_type, scrape_kubelets, host_network, override_file):
client = KubernetesClient(os.path.expanduser("~/.kube/config"))
nodes = client.get_nodes(label_selector="cri-resource-consume=true")
if len(nodes) == 0:
Expand Down Expand Up @@ -88,6 +88,7 @@ def override_config_clusterloader2(
file.write("CL2_PROMETHEUS_NODE_SELECTOR: \"prometheus: \\\"true\\\"\"\n")
file.write(f"CL2_POD_STARTUP_LATENCY_THRESHOLD: {pod_startup_latency_threshold}\n")
file.write(f"CL2_PROVIDER: {provider}\n")
file.write(f"CL2_REGISTRY: {registry}\n")
file.write(f"CL2_OS_TYPE: {os_type}\n")
file.write(f"CL2_SCRAPE_KUBELETS: {str(scrape_kubelets).lower()}\n")
file.write(f"CL2_HOST_NETWORK: {str(host_network).lower()}\n")
Expand Down Expand Up @@ -141,10 +142,16 @@ def collect_clusterloader2(
run_id,
run_url,
result_file,
scrape_kubelets
scrape_kubelets,
scrape_acr_info=False,
acr_info
):
if scrape_kubelets:
verify_measurement()

if scrape_acr_info:
# attach ACR info to cloud_info
# append acr info to cloud_info

details = parse_xml_to_json(os.path.join(cl2_report_dir, "junit.xml"), indent = 2)
json_data = json.loads(details)
Expand Down Expand Up @@ -248,6 +255,7 @@ def main():
help="Pod startup latency threshold",
)
parser_override.add_argument("--provider", type=str, help="Cloud provider name")
parser_override.add_argument("--registry", type=str, help="Container image registry")
parser_override.add_argument(
"--os_type", type=str, choices=["linux", "windows"], default="linux"
)
Expand Down Expand Up @@ -342,6 +350,7 @@ def main():
args.scale_enabled,
args.pod_startup_latency_threshold,
args.provider,
args.registry,
args.os_type,
args.scrape_kubelets,
args.host_network,
Expand Down
9 changes: 7 additions & 2 deletions modules/python/tests/test_cri.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def test_override_config_clusterloader2(self, mock_kubernetes_client, mock_open)
scale_enabled=False,
pod_startup_latency_threshold="15s",
provider="aks",
registry="akscritelescope",
os_type="linux",
scrape_kubelets=True,
host_network=True,
Expand All @@ -86,6 +87,7 @@ def test_override_config_clusterloader2(self, mock_kubernetes_client, mock_open)
handle.write.assert_any_call("CL2_PROMETHEUS_NODE_SELECTOR: \"prometheus: \\\"true\\\"\"\n")
handle.write.assert_any_call("CL2_POD_STARTUP_LATENCY_THRESHOLD: 15s\n")
handle.write.assert_any_call("CL2_PROVIDER: aks\n")
handle.write.assert_any_call("CL2_REGISTRY: akscritelescope\n")
handle.write.assert_any_call("CL2_OS_TYPE: linux\n")
handle.write.assert_any_call("CL2_SCRAPE_KUBELETS: true\n")
handle.write.assert_any_call("CL2_HOST_NETWORK: true\n")
Expand Down Expand Up @@ -114,6 +116,7 @@ def test_override_config_clusterloader2_host_network_false(self, mock_kubernetes
scale_enabled=False,
pod_startup_latency_threshold="15s",
provider="aks",
registry="akscritelescope",
os_type="linux",
scrape_kubelets=False,
host_network=False,
Expand Down Expand Up @@ -228,6 +231,7 @@ def test_override_command(self, mock_override):
"--scale_enabled", "True",
"--pod_startup_latency_threshold", "10s",
"--provider", "aws",
"--registry", "",
"--os_type", "linux",
"--scrape_kubelets", "False",
"--host_network", "False",
Expand All @@ -236,7 +240,7 @@ def test_override_command(self, mock_override):
with patch.object(sys, 'argv', test_args):
main()
mock_override.assert_called_once_with(
5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "linux", False, False, "/tmp/override.yaml"
5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "", "linux", False, False, "/tmp/override.yaml"
)

@patch("clusterloader2.cri.cri.override_config_clusterloader2")
Expand All @@ -253,14 +257,15 @@ def test_override_command_default_host_network(self, mock_override):
"--scale_enabled", "True",
"--pod_startup_latency_threshold", "10s",
"--provider", "aws",
"--registry", "",
"--os_type", "linux",
"--scrape_kubelets", "False",
"--cl2_override_file", "/tmp/override.yaml"
]
with patch.object(sys, 'argv', test_args):
main()
mock_override.assert_called_once_with(
5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "linux", False, True, "/tmp/override.yaml"
5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "", "linux", False, True, "/tmp/override.yaml"
)

@patch("clusterloader2.cri.cri.execute_clusterloader2")
Expand Down
41 changes: 25 additions & 16 deletions pipelines/system/new-pipeline-test.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,34 @@
trigger: none

variables:
SCENARIO_TYPE: <scenario-type>
SCENARIO_NAME: <scenario-name>
SCENARIO_TYPE: perf-eval
SCENARIO_NAME: cri-resource-consume

stages:
- stage: <stage-name> # format: <cloud>[_<region>]+ (e.g. azure_eastus2, aws_eastus_westus)
- stage: azure_swedencentral_managed_vs_ephemeral_1_31
dependsOn: []
jobs:
- template: /jobs/competitive-test.yml # must keep as is
- template: /jobs/competitive-test.yml
parameters:
cloud: <cloud> # e.g. azure, aws
regions: # list of regions
- region1 # e.g. eastus2
topology: <topology> # e.g. cluster-autoscaler
engine: <engine> # e.g. clusterloader2
matrix: # list of test parameters to customize the provisioned resources
<case-name>:
<key1>: <value1>
<key2>: <value2>
max_parallel: <number of concurrent jobs> # required
credential_type: service_connection # required
cloud: azure
regions:
- swedencentral
engine: clusterloader2
engine_input:
image: "ghcr.io/azure/clusterloader2:v20250513"
topology: cri-resource-consume
matrix:
n10-p300-memory-ephemeral:
node_count: 10
max_pods: 30
repeats: 1
operation_timeout: 3m
load_type: memory
kubernetes_version: "1.31"
scrape_kubelets: True
scrape_acr: True
registry_endpoint: "acrtelescope.azurecr.io"
max_parallel: 3
timeout_in_minutes: 120
credential_type: service_connection
ssh_key_enabled: false
timeout_in_minutes: 60 # if not specified, default is 60
7 changes: 6 additions & 1 deletion steps/engine/clusterloader2/cri/collect.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ parameters:
default: {}
- name: region
type: string
- name: registry_info
type: string
default: "registry info collected"

steps:
- template: /steps/cloud/${{ parameters.cloud }}/collect-cloud-info.yml
Expand All @@ -25,7 +28,9 @@ steps:
--run_id $RUN_ID \
--run_url $RUN_URL \
--result_file $TEST_RESULTS_FILE \
--scrape_kubelets ${SCRAPE_KUBELETS:-False}
--scrape_kubelets ${SCRAPE_KUBELETS:-False} \
--scrape_acr_info ${SCRAPE_ACR:-False} \
--registry_info ${REGISTRY_INFO:-""}
workingDirectory: modules/python
env:
CLOUD: ${{ parameters.cloud }}
Expand Down
1 change: 1 addition & 0 deletions steps/engine/clusterloader2/cri/execute.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ steps:
--scale_enabled ${SCALE_ENABLED:-False} \
--pod_startup_latency_threshold ${POD_STARTUP_LATENCY_THRESHOLD:-15s} \
--provider $CLOUD \
--registry $REGISTRY_ENDPOINT \
--os_type ${OS_TYPE:-linux} \
--scrape_kubelets ${SCRAPE_KUBELETS:-False} \
--host_network ${HOST_NETWORK:-True} \
Expand Down
34 changes: 34 additions & 0 deletions steps/topology/image-pull-acr/collect-clusterloader2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
parameters:
- name: cloud
type: string
default: ''
- name: engine_input
type: object
default: {}
- name: regions
type: object
default: {}

steps:
- script: |
if [ "$SCRAPE_ACR" = "true" ]; then
echo "Scraping ACR info of $REGISTRY_ENDPOINT..."
registry_info="acr info for bla bla bla"
else
echo "SCRAPE_ACR is false — skipping"
fi

echo "##vso[task.setvariable variable=registry_info;isOutput=true]$registry_info"
name: collectRegistry
workingDirectory: modules/python
env:
CLOUD: ${{ parameters.cloud }}
RUN_URL: $(RUN_URL)
displayName: "Collect ACR Info"
- template: /steps/engine/clusterloader2/cri/collect.yml
parameters:
cloud: ${{ parameters.cloud }}
engine_input: ${{ parameters.engine_input }}
region: ${{ parameters.regions[0] }}
registry_info: ${{ dependencies.collectRegistry.outputs['collectRegistry.registry_info'] }}

18 changes: 18 additions & 0 deletions steps/topology/image-pull-acr/execute-clusterloader2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
parameters:
- name: cloud
type: string
default: ''
- name: engine_input
type: object
default: {}
- name: regions
type: object
default: {}

steps:
- template: /steps/engine/clusterloader2/cri/execute.yml
parameters:
cloud: ${{ parameters.cloud }}
engine_input: ${{ parameters.engine_input }}
region: ${{ parameters.regions[0] }}

16 changes: 16 additions & 0 deletions steps/topology/image-pull-acr/validate-resources.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
parameters:
- name: cloud
type: string
- name: engine
type: string
- name: regions
type: object

steps:
- template: /steps/cloud/${{ parameters.cloud }}/update-kubeconfig.yml
parameters:
role: client
region: ${{ parameters.regions[0] }}
- template: /steps/engine/clusterloader2/large-cluster/validate.yml
parameters:
desired_nodes: 14
Loading