From 8b32cc1beb7b88846fcb1239765acab12ac2d373 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Tue, 5 May 2026 20:16:24 -0400
Subject: [PATCH 1/7] test: Add Torch AOTI Tests

This change:
- Creates a new L0_torch_aoti test suit.
- Adds complex Torch AOTI model generation to qa/common/gen_qa_models.py.
- Cleans up existion AOTI model generation in qa/common/gen_qa_models.py.
- Enabled torchvision AOTI model generation in qa/common/gen_qa_model_repository.
---
 qa/L0_torch_aoti/.gitignore               |   7 +
 qa/L0_torch_aoti/test.sh                  | 147 +++++++
 qa/L0_torch_aoti/torch_aoti_infer_test.py | 306 ++++++++++++++
 qa/common/gen_qa_model_repository         |   2 +-
 qa/common/gen_qa_models.py                | 466 ++++++++++++++++++----
 5 files changed, 842 insertions(+), 86 deletions(-)
 create mode 100644 qa/L0_torch_aoti/.gitignore
 create mode 100755 qa/L0_torch_aoti/test.sh
 create mode 100755 qa/L0_torch_aoti/torch_aoti_infer_test.py

diff --git a/qa/L0_torch_aoti/.gitignore b/qa/L0_torch_aoti/.gitignore
new file mode 100644
index 0000000000..ffea82cd8f
--- /dev/null
+++ b/qa/L0_torch_aoti/.gitignore
@@ -0,0 +1,7 @@
+models/
+
+*.log
+
+1
+2
+test_results
\ No newline at end of file
diff --git a/qa/L0_torch_aoti/test.sh b/qa/L0_torch_aoti/test.sh
new file mode 100755
index 0000000000..67da22cd78
--- /dev/null
+++ b/qa/L0_torch_aoti/test.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+source ../common/util.sh
+
+if [[ "${DEBUG}" == "true" ]]; then
+    set -x
+else
+    set +x
+fi
+
+COLOR_DARK="\033[90m"
+COLOR_ERROR="\033[31m"
+COLOR_INFO="\033[94m"
+COLOR_RESET="\033[0m"
+COLOR_STATUS="\033[36m"
+COLOR_SUCCESS="\033[32m"
+COLOR_WARNING="\033[33m"
+RET=0
+
+REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
+if [[ "$#" -ge 1 ]]; then
+    REPO_VERSION=$1
+fi
+if [[ -z "$REPO_VERSION" ]]; then
+    echo -e "${COLOR_ERROR}Repository version must be specified${COLOR_RESET}" &1>2
+    echo -e "${COLOR_ERROR}\n***\n*** Test Failed\n***${COLOR_RESET}" &1>2
+    exit 1
+fi
+if [[ ! -z "$TEST_REPO_ARCH" ]]; then
+    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
+fi
+
+export CUDA_VISIBLE_DEVICES=0
+
+MODELDIR=${MODELDIR:=`pwd`/models}
+DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"}
+TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"}
+SERVER=${TRITON_DIR}/bin/tritonserver
+BACKEND_DIR=${TRITON_DIR}/backends
+
+# PyTorch on SBSA requires libgomp to be loaded first. See the following
+# GitHub issue for more information:
+# https://github.com/pytorch/pytorch/issues/2575
+arch=`uname -m`
+echo -e "${COLOR_DARK}Detected architecture: ${arch}${COLOR_RESET}"
+if [[ "${arch}" == "aarch64" ]]; then
+    SERVER_LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libgomp.so.1
+    echo -e "${COLOR_DARK}SERVER_LD_PRELOAD=${SERVER_LD_PRELOAD}${COLOR_RESET}"
+fi
+
+# If BACKENDS not specified, set to all
+BACKENDS=${BACKENDS:="pytorch"}
+export BACKENDS
+
+# Copy the models into the model repository
+echo -e "${COLOR_DARK}Setting up model repository in ${MODELDIR}${COLOR_RESET}"
+rm -rf ${MODELDIR} && mkdir -p ${MODELDIR}
+models=(
+    "torch_aoti_complex_index"
+    "torch_aoti_complex_named"
+    "torch_aoti_int8_int8"
+    "torch_aoti_int16_int16"
+    "torch_aoti_int32_int32"
+    "torch_aoti_int64_int64"
+    "torch_aoti_float16_float16"
+    "torch_aoti_float32_float32"
+    "torchvision_aoti"
+)
+for model in "${models[@]}"; do
+    cp -r ${DATADIR}/qa_model_repository/${model} ${MODELDIR}/${model}
+    echo -e "${COLOR_DARK}ls ${MODELDIR}/${model}${COLOR_RESET}"
+    ls -lha ${MODELDIR}/${model}
+done
+echo -e "${COLOR_DARK}ls ${MODELDIR}${COLOR_RESET}"
+ls -lha ${MODELDIR}
+
+SERVER_ARGS="--model-repository=${MODELDIR} --log-verbose=1"
+SERVER_LOG="./torch_aoti_complex_named-server.log"
+CLIENT_LOG="./torch_aoti_complex_named-client.log"
+
+echo -e "${COLOR_DARK}Running ${SERVER} with model repository ${MODELDIR}${COLOR_RESET}"
+run_server
+if [[ "${SERVER_PID}" -eq 0 ]]; then
+    echo -e "${COLOR_ERROR}\n***\n*** Failed to start ${SERVER}\n***${COLOR_RESET}" &1>2
+    cat ${SERVER_LOG} &1>2
+    echo -e "\n" &1>2
+    exit 1
+fi
+
+# Install torch framework
+echo -e "${COLOR_DARK}Installing PyTorch framework required by tests${COLOR_RESET}"
+pip install torch
+
+# Run the Tests
+TEST_NAME="torch_aoti_infer_test"
+python3 ./${TEST_NAME}.py >> ${CLIENT_LOG} 2>&1
+EXIT_CODE=$?
+if [[ ${EXIT_CODE} -ne 0 ]]; then
+    echo -e "${COLOR_ERROR}\n***\n*** Test '${TEST_NAME}' Failed with exit code ${EXIT_CODE}\n***${COLOR_RESET}" &1>2
+    cat ${CLIENT_LOG} &1>2
+    echo -e "\n" &1>2
+    RET=1
+else
+    echo -e "${COLOR_INFO}\n***\n*** Test '${TEST_NAME}' Passed\n***${COLOR_RESET}"
+fi
+
+# Cleanup
+echo -e "${COLOR_DARK}Killing server (pid: ${SERVER_PID})${COLOR_RESET}"
+kill -s SIGINT ${SERVER_PID}
+echo -e "${COLOR_DARK}Removing model repository${COLOR_RESET}"
+for model in "${models[@]}"; do
+    rm -rf ${MODELDIR}/${model}
+done
+
+# Report results and exit.
+if [[ ${RET} -ne 0 ]]; then
+    echo -e "${COLOR_ERROR}\n***\n*** Test Suite FAILED\n***${COLOR_RESET}" &1>2
+else
+    echo -e "${COLOR_SUCCESS}\n***\n*** Test Suite PASSED\n***${COLOR_RESET}"
+fi
+
+exit ${RET}
diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py
new file mode 100755
index 0000000000..e04a0949b0
--- /dev/null
+++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py
@@ -0,0 +1,306 @@
+#!/usr/bin/python
+# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import time
+
+sys.path.append("../common")
+
+import unittest
+
+import numpy as np
+import test_util as tu
+import torch
+import tritonclient.http as http
+
+
+class TorchAotiTest(tu.TestResultCollector):
+    def _get_complex_input_shape(self):
+        return (1, 16)
+
+    def _get_complex_output_shape(self):
+        return (1, 16)
+
+    def _get_complex_input_data(self, shape):
+        return [
+            torch.randint(low=0, high=127, size=shape, dtype=torch.int8).numpy(),
+            torch.randint(low=0, high=127, size=shape, dtype=torch.int8).numpy(),
+            torch.randint(low=0, high=127, size=shape, dtype=torch.int8).numpy(),
+            torch.randint(low=0, high=127, size=shape, dtype=torch.int8).numpy(),
+        ]
+
+    def _get_simple_input_data(self, shape, io_type):
+        if io_type in [torch.int8, torch.int16, torch.int32, torch.int64]:
+            return torch.randint(low=0, high=127, size=shape, dtype=io_type).numpy()
+        elif io_type in [torch.float16, torch.float32, torch.float64]:
+            return torch.randn(size=shape, dtype=io_type).numpy()
+        else:
+            raise ValueError(f"Unsupported data type: {io_type}")
+
+    def _get_torchvision_input_data(self, shape):
+        return torch.randn(size=shape, dtype=torch.float32).numpy()
+
+    def _dtype_to_triton_dtype(self, dtype):
+        if dtype == torch.int8:
+            return "INT8"
+        elif dtype == torch.int16:
+            return "INT16"
+        elif dtype == torch.int32:
+            return "INT32"
+        elif dtype == torch.int64:
+            return "INT64"
+        elif dtype == torch.float16:
+            return "FP16"
+        elif dtype == torch.float32:
+            return "FP32"
+        else:
+            raise ValueError(f"Unsupported data type: {dtype}")
+
+    def _get_simple_model_name(self, io_type):
+        if io_type == torch.int8:
+            return "torch_aoti_int8_int8"
+        elif io_type == torch.int16:
+            return "torch_aoti_int16_int16"
+        elif io_type == torch.int32:
+            return "torch_aoti_int32_int32"
+        elif io_type == torch.int64:
+            return "torch_aoti_int64_int64"
+        elif io_type == torch.float16:
+            return "torch_aoti_float16_float16"
+        elif io_type == torch.float32:
+            return "torch_aoti_float32_float32"
+        else:
+            raise ValueError(f"Unsupported data type: {io_type}")
+
+    def test_complex_index(self):
+        MODEL_NAME = "torch_aoti_complex_index"
+        INPUT_SHAPE = self._get_complex_input_shape()
+        OUTPUT_SHAPE = self._get_complex_output_shape()
+
+        input_data = self._get_complex_input_data(INPUT_SHAPE)
+
+        start = time.time()
+
+        with http.InferenceServerClient("localhost:8000") as client:
+            inputs = [
+                http.InferInput("INPUT__0", input_data[0].shape, "INT8"),
+                http.InferInput("INPUT__1", input_data[1].shape, "INT8"),
+                http.InferInput("INPUT__2", input_data[2].shape, "INT8"),
+                http.InferInput("INPUT__3", input_data[3].shape, "INT8"),
+            ]
+
+            inputs[0].set_data_from_numpy(input_data[0], binary_data=True)
+            inputs[1].set_data_from_numpy(input_data[1], binary_data=True)
+            inputs[2].set_data_from_numpy(input_data[2], binary_data=True)
+            inputs[3].set_data_from_numpy(input_data[3], binary_data=True)
+
+            output_names = [
+                "OUTPUT__0",
+                "OUTPUT__1",
+                "OUTPUT__2",
+                "OUTPUT__3",
+                "OUTPUT__4",
+                "OUTPUT__5",
+            ]
+
+            outputs = []
+            for output_name in output_names:
+                outputs.append(http.InferRequestedOutput(output_name, binary_data=True))
+
+            output_data = []
+            results = client.infer(MODEL_NAME, inputs, outputs=outputs)
+
+            for output_name in output_names:
+                output_data.append(results.as_numpy(output_name))
+
+            assert len(outputs) == len(output_data)
+            for data in output_data:
+                assert data.shape == OUTPUT_SHAPE
+
+            assert (output_data[0] == (input_data[0] + input_data[1])).all()
+            assert (output_data[1] == input_data[0] - input_data[1]).all()
+            assert (output_data[2] == input_data[0]).all()
+            assert (output_data[3] == input_data[1]).all()
+            assert (output_data[4] == input_data[2]).all()
+            assert (output_data[5] == input_data[3]).all()
+
+        end = time.time()
+        assert (end - start) < 0.0333, f"Inference time {end - start} time exceeds 33ms"
+
+    def test_complex_named(self):
+        MODEL_NAME = "torch_aoti_complex_named"
+        INPUT_SHAPE = self._get_complex_input_shape()
+        OUTPUT_SHAPE = self._get_complex_output_shape()
+
+        input_data = self._get_complex_input_data(INPUT_SHAPE)
+
+        start = time.time()
+
+        with http.InferenceServerClient("localhost:8000") as client:
+            inputs = [
+                http.InferInput("ARGS[0]", input_data[0].shape, "INT8"),
+                http.InferInput("ARGS[1]", input_data[1].shape, "INT8"),
+                http.InferInput("ARGS[2][option1]", input_data[2].shape, "INT8"),
+                http.InferInput("ARGS[2][option2]", input_data[3].shape, "INT8"),
+            ]
+
+            inputs[0].set_data_from_numpy(input_data[0], binary_data=True)
+            inputs[1].set_data_from_numpy(input_data[1], binary_data=True)
+            inputs[2].set_data_from_numpy(input_data[2], binary_data=True)
+            inputs[3].set_data_from_numpy(input_data[3], binary_data=True)
+
+            output_names = [
+                "RESULT[AAA]",
+                "RESULT[BBB][0]",
+                "RESULT[BBB][1]",
+                "RESULT[CCC][option1]",
+                "RESULT[CCC][option2]",
+                "RESULT[ZZZ]",
+            ]
+
+            outputs = []
+            for output_name in output_names:
+                outputs.append(http.InferRequestedOutput(output_name, binary_data=True))
+
+            output_data = []
+            results = client.infer(MODEL_NAME, inputs, outputs=outputs)
+
+            for output_name in output_names:
+                output_data.append(results.as_numpy(output_name))
+
+            assert len(outputs) == len(output_data)
+            for data in output_data:
+                assert data.shape == OUTPUT_SHAPE
+
+            assert (output_data[0] == (input_data[0] + input_data[1])).all()
+            assert (output_data[1] == input_data[0]).all()
+            assert (output_data[2] == input_data[1]).all()
+            assert (output_data[3] == input_data[2]).all()
+            assert (output_data[4] == input_data[3]).all()
+            assert (output_data[5] == (input_data[0] - input_data[1])).all()
+
+        end = time.time()
+        assert (end - start) < 0.0333, f"Inference time {end - start} time exceeds 33ms"
+
+    def test_simple_model(self):
+        io_types = [
+            torch.int8,
+            torch.int16,
+            torch.int32,
+            torch.int64,
+            torch.float16,
+            torch.float32,
+        ]
+        for io_type in io_types:
+            MODEL_NAME = self._get_simple_model_name(io_type)
+            INPUT_SHAPE = (16,)
+            OUTPUT_SHAPE = (16,)
+            TRITON_IO_TYPE = self._dtype_to_triton_dtype(io_type)
+
+            input_data = (
+                self._get_simple_input_data(INPUT_SHAPE, io_type),
+                self._get_simple_input_data(INPUT_SHAPE, io_type),
+            )
+
+            start = time.time()
+
+            with http.InferenceServerClient("localhost:8000") as client:
+                inputs = [
+                    http.InferInput("ARGS[0]", input_data[0].shape, TRITON_IO_TYPE),
+                    http.InferInput("ARGS[1]", input_data[1].shape, TRITON_IO_TYPE),
+                ]
+
+                inputs[0].set_data_from_numpy(input_data[0], binary_data=True)
+                inputs[1].set_data_from_numpy(input_data[1], binary_data=True)
+
+                output_names = [
+                    "RESULT",
+                ]
+
+                outputs = []
+                for output_name in output_names:
+                    outputs.append(
+                        http.InferRequestedOutput(output_name, binary_data=True)
+                    )
+
+                output_data = []
+                results = client.infer(MODEL_NAME, inputs, outputs=outputs)
+
+                for output_name in output_names:
+                    output_data.append(results.as_numpy(output_name))
+
+                assert len(outputs) == len(output_data)
+                for data in output_data:
+                    assert data.shape == OUTPUT_SHAPE
+                    assert (data == input_data[0] + input_data[1]).all()
+
+            end = time.time()
+            assert (
+                end - start
+            ) < 0.0333, f"Inference time {end - start} time exceeds 33ms"
+
+    def test_torchvision(self):
+        MODEL_NAME = "torchvision_aoti"
+        INPUT_SHAPE = (1, 3, 224, 224)
+        OUTPUT_SHAPE = (1, 1000)
+
+        input_data = self._get_torchvision_input_data(INPUT_SHAPE)
+        input_data[0][0] = 1.0
+
+        start = time.time()
+
+        with http.InferenceServerClient("localhost:8000") as client:
+            inputs = [
+                http.InferInput("ARGS[0]", input_data.shape, "FP32"),
+            ]
+
+            inputs[0].set_data_from_numpy(input_data, binary_data=True)
+
+            output_names = [
+                "RESULT",
+            ]
+
+            outputs = []
+            for output_name in output_names:
+                outputs.append(http.InferRequestedOutput(output_name, binary_data=True))
+
+            output_data = []
+            results = client.infer(MODEL_NAME, inputs, outputs=outputs)
+
+            for output_name in output_names:
+                output_data.append(results.as_numpy(output_name))
+
+            assert len(outputs) == len(output_data)
+            for data in output_data:
+                assert data.shape == OUTPUT_SHAPE
+
+        end = time.time()
+        assert (end - start) < 0.2, f"Inference time {end - start} time exceeds 200ms"
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index 328f42bbe0..d6ed5738f8 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -263,9 +263,9 @@ set -e
 PATH=$PATH:/usr/local/cuda-13.0/bin
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --models_dir=$TRITON_MDLS_QA_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --models_dir=$TRITON_MDLS_QA_MODEL
+python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torchvision-aoti --models_dir=$TRITON_MDLS_QA_MODEL
 chmod -R 777 $TRITON_MDLS_QA_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
-python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL
 chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL
 python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL
 chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL
diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
index d509562bff..cbfce101a6 100755
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -47,6 +47,7 @@
 from typing import List, Tuple
 
 _color_blue = "\033[94m"
+_color_cyan = "\033[36m"
 _color_green = "\033[32m"
 _color_magenta = "\033[35m"
 _color_red = "\033[31m"
@@ -1298,69 +1299,44 @@ def generate_sample_inputs(
     input_shape = [abs(ips) for ips in input_shape]
 
     if input_dtype == np.int8:
-        input0 = torch.randint(-128, 127, input_shape, dtype=torch.int8, device=device)
-        input1 = torch.randint(-128, 127, input_shape, dtype=torch.int8, device=device)
+        input0 = torch.zeros(input_shape, dtype=torch.int8, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.int8, device=device)
     elif input_dtype == np.int16:
-        input0 = torch.randint(
-            -32768, 32767, input_shape, dtype=torch.int16, device=device
-        )
-        input1 = torch.randint(
-            -32768, 32767, input_shape, dtype=torch.int16, device=device
-        )
+        input0 = torch.zeros(input_shape, dtype=torch.int16, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.int16, device=device)
     elif input_dtype == np.int32:
-        input0 = torch.randint(
-            -2147483648, 2147483647, input_shape, dtype=torch.int32, device=device
-        )
-        input1 = torch.randint(
-            -2147483648, 2147483647, input_shape, dtype=torch.int32, device=device
-        )
+        input0 = torch.zeros(input_shape, dtype=torch.int32, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.int32, device=device)
     elif input_dtype == np.int64:
-        input0 = torch.randint(
-            -9223372036854775808,
-            9223372036854775807,
-            input_shape,
-            dtype=torch.int64,
-            device=device,
-        )
-        input1 = torch.randint(
-            -9223372036854775808,
-            9223372036854775807,
-            input_shape,
-            dtype=torch.int64,
-            device=device,
-        )
+        input0 = torch.zeros(input_shape, dtype=torch.int64, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.int64, device=device)
     elif input_dtype == np.float16:
-        input0 = torch.randn(*input_shape, dtype=torch.float16, device=device)
-        input1 = torch.randn(*input_shape, dtype=torch.float16, device=device)
+        input0 = torch.zeros(input_shape, dtype=torch.float16, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.float16, device=device)
     elif input_dtype == np.float32:
-        input0 = torch.randn(*input_shape, dtype=torch.float32, device=device)
-        input1 = torch.randn(*input_shape, dtype=torch.float32, device=device)
+        input0 = torch.zeros(input_shape, dtype=torch.float32, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.float32, device=device)
     elif input_dtype == np.float64:
-        input0 = torch.randn(*input_shape, dtype=torch.float64, device=device)
-        input1 = torch.randn(*input_shape, dtype=torch.float64, device=device)
+        input0 = torch.zeros(input_shape, dtype=torch.float64, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.float64, device=device)
     elif input_dtype == np.uint8:
-        input0 = torch.randint(0, 255, input_shape, dtype=torch.uint8, device=device)
-        input1 = torch.randint(0, 255, input_shape, dtype=torch.uint8, device=device)
+        input0 = torch.zeros(input_shape, dtype=torch.uint8, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.uint8, device=device)
     elif input_dtype == np.uint16:
-        input0 = torch.randint(0, 65535, input_shape, dtype=torch.uint16, device=device)
-        input1 = torch.randint(0, 65535, input_shape, dtype=torch.uint16, device=device)
+        input0 = torch.zeros(input_shape, dtype=torch.uint16, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.uint16, device=device)
     elif input_dtype == np.uint32:
-        input0 = torch.randint(
-            0, 4294967295, input_shape, dtype=torch.uint32, device=device
-        )
-        input1 = torch.randint(
-            0, 4294967295, input_shape, dtype=torch.uint32, device=device
-        )
+        input0 = torch.zeros(input_shape, dtype=torch.uint32, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.uint32, device=device)
     elif input_dtype == np.uint64:
-        input0 = torch.randint(
-            0, 18446744073709551615, input_shape, dtype=torch.uint64, device=device
-        )
-        input1 = torch.randint(
-            0, 18446744073709551615, input_shape, dtype=torch.uint64, device=device
-        )
+        input0 = torch.zeros(input_shape, dtype=torch.uint64, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.uint64, device=device)
     else:
-        input0 = torch.randn(*input_shape, device=device)
-        input1 = torch.randn(*input_shape, device=device)
+        print(
+            f"{_color_yellow}warning: dtype {input_dtype} is unsupported; falling back to torch.int32{_color_reset}"
+        )
+        input0 = torch.zeros(input_shape, dtype=torch.int32, device=device)
+        input1 = torch.zeros(input_shape, dtype=torch.int32, device=device)
 
     return (input0, input1)
 
@@ -1418,7 +1394,7 @@ def create_torch_aoti_modelfile(
         )
         return False
 
-    model_version_dir = f"{models_dir}/{model_name}/{model_version}"
+    model_version_dir = os.path.join(models_dir, model_name, str(model_version))
 
     print(f"{_color_green}Creating model {model_name}{_color_reset}")
 
@@ -1465,13 +1441,14 @@ def forward(self, INPUT0: torch.Tensor, INPUT1: torch.Tensor) -> torch.Tensor:
     model.to(device)
     model = model.eval()
 
-    sample_input = generate_sample_inputs(input_shape, input_dtype, device)
+    sample_inputs = generate_sample_inputs(input_shape, input_dtype, device)
+    package_path = os.path.join(model_version_dir, "model.pt2")
 
     try:
-        ep = torch.export.export(model, sample_input)
+        exported_model = torch.export.export(model, sample_inputs)
         torch._inductor.aoti_compile_and_package(
-            ep,
-            package_path=f"{model_version_dir}/model.pt2",
+            exported_model,
+            package_path=package_path,
         )
     except Exception as e:
         print(
@@ -1484,13 +1461,162 @@ def forward(self, INPUT0: torch.Tensor, INPUT1: torch.Tensor) -> torch.Tensor:
     return True
 
 
+def create_torch_aoti_complex_modelfile(
+    models_dir: str,
+):
+    base_name = "torch_aoti_complex"
+    model_names = [
+        f"{base_name}_named",
+        f"{base_name}_index",
+    ]
+    model_version_dirs = [
+        os.path.join(models_dir, model_names[0], "1"),
+        os.path.join(models_dir, model_names[1], "1"),
+    ]
+
+    for model_version_dir in model_version_dirs:
+        try:
+            os.makedirs(model_version_dir)
+        except OSError:
+            pass  # ignore existing dir
+
+    print(f"{_color_green}Creating model {base_name}{_color_reset}")
+
+    class TorchAotiComplex(torch.nn.Module):
+        def __init__(self):
+            super().__init__()
+
+        def forward(
+            self,
+            hdata: torch.Tensor,
+            vdata: torch.Tensor,
+            options: dict[str, torch.Tensor],
+        ) -> dict[
+            str,
+            torch.Tensor | tuple[torch.Tensor, torch.Tensor] | dict[str, torch.Tensor],
+        ]:
+            out = {
+                "AAA": hdata + vdata,
+                "ZZZ": hdata - vdata,
+                "BBB": (
+                    hdata,
+                    vdata,
+                ),
+                "CCC": options,
+            }
+
+            return out
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = TorchAotiComplex()
+    model.to(device)
+    model = model.eval()
+
+    SHAPE = (1, 16)
+
+    sample_args = (
+        torch.zeros(SHAPE, dtype=torch.int8, device=device),
+        torch.zeros(SHAPE, dtype=torch.int8, device=device),
+        {
+            "option1": torch.zeros(SHAPE, dtype=torch.int8, device=device),
+            "option2": torch.zeros(SHAPE, dtype=torch.int8, device=device),
+        },
+    )
+
+    # Export and package the model
+    print(f"{_color_green}Exporting and packaging the model...{_color_reset}")
+
+    model_file_name = "model.pt2"
+    package_paths = [
+        os.path.join(model_version_dirs[0], model_file_name),
+        os.path.join(model_version_dirs[1], model_file_name),
+    ]
+
+    try:
+        exported_model = torch.export.export(model, sample_args)
+        torch._inductor.aoti_compile_and_package(
+            exported_model,
+            package_path=package_paths[0],
+        )
+    except Exception as e:
+        print(
+            f"{_color_red}error: Failed to create model {base_name}{_color_reset}",
+            file=sys.stderr,
+        )
+        print(f"\n{_color_red}{e}{_color_reset}\n", file=sys.stderr)
+        return False
+
+    try:
+        # Now load and run the packaged model
+        print(f"{_color_cyan}Loading and running the packaged model...{_color_reset}")
+
+        compiled_model = torch._inductor.aoti_load_package(package_paths[0])
+
+        print(f"{_color_cyan}Compiled model call spec:{_color_reset}")
+
+        for elem in compiled_model.loader.get_call_spec():
+            print(elem)
+
+        print(f"{_color_cyan}Running the compiled model...{_color_reset}")
+
+        with torch.inference_mode():
+            hdata = torch.randint(
+                low=0,
+                high=127,
+                size=SHAPE,
+                dtype=torch.int8,
+                device=device,
+            )
+            vdata = torch.randint(
+                low=0,
+                high=127,
+                size=SHAPE,
+                dtype=torch.int8,
+                device=device,
+            )
+            options = {
+                "option1": torch.randint(
+                    low=0,
+                    high=127,
+                    size=SHAPE,
+                    dtype=torch.int8,
+                    device=device,
+                ),
+                "option2": torch.randint(
+                    low=0,
+                    high=127,
+                    size=SHAPE,
+                    dtype=torch.int8,
+                    device=device,
+                ),
+            }
+
+            _ = compiled_model(hdata, vdata, options)
+
+            print(
+                f'{_color_green}Model "{base_name}" successfully executed.{_color_reset}'
+            )
+    except Exception as e:
+        print(
+            f"{_color_red}error: Failed to validate model {base_name}{_color_reset}",
+            file=sys.stderr,
+        )
+        print(f"\n{_color_red}{e}{_color_reset}\n", file=sys.stderr)
+        return False
+
+    # Copy the compiled model package to the alternate model folder.
+    # Both the named and ordinal addressing versions of the model (from Triton's point-of-view) use the same compiled model.
+    shutil.copy(package_paths[0], package_paths[1])
+
+    return True
+
+
 def create_torchvision_aoti_modelfile(
     models_dir: str,
     max_batch: int,
-    model_version: int,
 ):
     model_name = "torchvision_aoti"
-    model_version_dir = f"{models_dir}/{model_name}/{model_version}"
+    model_version_dir = os.path.join(models_dir, model_name, "1")
 
     try:
         os.makedirs(model_version_dir)
@@ -1504,16 +1630,16 @@ def create_torchvision_aoti_modelfile(
     model = model.to(device)
     model = model.eval()
 
+    SHAPE = (max_batch, 3, 244, 244)
+
     # Example input tensor with batch size 1 and 3 color channels (RGB), height and width of 224
-    input_tensor = torch.randn(max_batch, 3, 224, 224, device=device)
+    sample_inputs = (torch.zeros(SHAPE, dtype=torch.float32, device=device),)
 
-    try:
-        ep = torch.export.export(model, (input_tensor,))
+    package_path = os.path.join(model_version_dir, "model.pt2")
 
-        torch._inductor.aoti_compile_and_package(
-            ep,
-            package_path=f"{model_version_dir}/model.pt2",
-        )
+    try:
+        ep = torch.export.export(model, sample_inputs)
+        torch._inductor.aoti_compile_and_package(ep, package_path=package_path)
     except Exception as e:
         print(
             f"{_color_red}error: Failed to create model {model_name}{_color_reset}",
@@ -1609,9 +1735,11 @@ def create_libtorch_modelconfig(
     except OSError:
         pass  # ignore existing dir
 
-    with open(f"{config_dir}/config.pbtxt", "w") as file:
+    config_path = os.path.join(config_dir, "config.pbtxt")
+
+    with open(config_path, "w") as file:
         file.write(config)
-        print(f"Created {config_dir}/config.pbtxt")
+        print(f"Created {config_path}")
 
     with open(f"{config_dir}/{label_filename}", "w") as file:
         for l in range(output0_label_cnt):
@@ -1650,7 +1778,7 @@ def create_torch_aoti_modelconfig(
     print(f"{_color_green}Creating config for {model_name}{_color_reset}")
 
     label_filename = "output_labels.txt"
-    config_dir = f"{models_dir}/{model_name}"
+    config_dir = os.path.join(models_dir, model_name)
     config = f"""
 backend: "pytorch"
 name: "{model_name}"
@@ -1658,19 +1786,19 @@ def create_torch_aoti_modelconfig(
 version_policy: {version_policy_str}
 input [
   {{
-    name: "INPUT0"
+    name: "ARGS[0]"
     data_type: {np_to_model_dtype(input_dtype)}
     dims: [ {tu.shape_to_dims_str(input_shape)} ]
   }},
   {{
-    name: "INPUT1"
+    name: "ARGS[1]"
     data_type: {np_to_model_dtype(input_dtype)}
     dims: [ {tu.shape_to_dims_str(input_shape)} ]
   }}
 ]
 output [
   {{
-    name: "OUTPUT__0"
+    name: "RESULT"
     data_type: {np_to_model_dtype(output_dtype)}
     dims: [ {tu.shape_to_dims_str(output_shape)} ]
     label_filename: "{label_filename}"
@@ -1684,14 +1812,170 @@ def create_torch_aoti_modelconfig(
     except OSError:
         pass  # ignore existing dir
 
-    with open(f"{config_dir}/config.pbtxt", "w") as file:
+    config_path = os.path.join(config_dir, "config.pbtxt")
+
+    with open(config_path, "w") as file:
         file.write(config)
-        print(f"Created {config_dir}/config.pbtxt")
+        print(f"Created {config_path}")
 
-    with open(f"{config_dir}/{label_filename}", "w") as file:
+    label_path = os.path.join(config_dir, label_filename)
+
+    with open(label_path, "w") as file:
         for l in range(output_label_cnt):
             file.write(f"label{l}\n")
-        print(f"Created {config_dir}/{label_filename}")
+        print(f"Created {label_path}")
+
+
+def create_torch_aoti_complex_modelconfig(
+    models_dir,
+):
+    base_name = "torch_aoti_complex"
+    model_names = [
+        f"{base_name}_named",
+        f"{base_name}_index",
+    ]
+
+    print(f"{_color_green}Creating config for {base_name}{_color_reset}")
+
+    config_dirs = [
+        os.path.join(models_dir, model_names[0]),
+        os.path.join(models_dir, model_names[1]),
+    ]
+    configs = [
+        f"""
+backend: "pytorch"
+platform: "torch_aoti"
+name: "{model_names[0]}"
+input: [
+  {{
+    name: "ARGS[0]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "ARGS[1]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "ARGS[2][option1]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "ARGS[2][option2]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }}
+]
+output: [
+  {{
+    name: "RESULT[AAA]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "RESULT[BBB][0]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "RESULT[BBB][1]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "RESULT[CCC][option1]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "RESULT[CCC][option2]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "RESULT[ZZZ]"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }}
+]
+instance_group [{{ kind: {"KIND_GPU" if torch.cuda.is_available() else "KIND_CPU"} }}]
+""",
+        f"""
+backend: "pytorch"
+name: "{model_names[1]}"
+platform: "torch_aoti"
+input: [
+  {{
+    name: "INPUT__0"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "INPUT__1"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "INPUT__2"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "INPUT__3"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }}
+]
+output: [
+  {{
+    name: "OUTPUT__0"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "OUTPUT__1"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "OUTPUT__2"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "OUTPUT__3"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "OUTPUT__4"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }},
+  {{
+    name: "OUTPUT__5"
+    data_type: TYPE_INT8
+    dims: [1, 16]
+  }}
+]
+instance_group [{{ kind: {"KIND_GPU" if torch.cuda.is_available() else "KIND_CPU"} }}]
+""",
+    ]
+
+    for i in range(2):
+        config_dir = config_dirs[i]
+        try:
+            os.makedirs(config_dir)
+        except OSError:
+            pass  # ignore existing dir
+
+        config_path = os.path.join(config_dir, "config.pbtxt")
+
+        with open(config_path, "w") as file:
+            file.write(configs[i])
+            print(f"Created {config_path}")
 
 
 def create_torchvision_aoti_modelconfig(
@@ -1703,7 +1987,7 @@ def create_torchvision_aoti_modelconfig(
 
     print(f"{_color_green}Creating config for {model_name}{_color_reset}")
 
-    config_dir = f"{models_dir}/{model_name}"
+    config_dir = os.path.join(models_dir, model_name)
     config = f"""
 backend: "pytorch"
 name: "{model_name}"
@@ -1711,14 +1995,13 @@ def create_torchvision_aoti_modelconfig(
 max_batch_size: {max_batch}
 input  [
   {{
-    name: "INPUT__0"
+    name: "ARGS[0]"
     data_type: TYPE_FP32
-    format: FORMAT_NCHW
     dims: [ 3, 224, 224 ]
   }}]
 output [
   {{
-    name: "OUTPUT__0"
+    name: "RESULT"
     data_type: TYPE_FP32
     dims: [ 1000 ]
     label_filename: "{label_filename}"
@@ -1732,15 +2015,19 @@ def create_torchvision_aoti_modelconfig(
     except OSError:
         pass  # ignore existing dir
 
-    with open(f"{config_dir}/config.pbtxt", "w") as file:
+    config_path = os.path.join(config_dir, "config.pbtxt")
+
+    with open(config_path, "w") as file:
         file.write(config)
-        print(f"Created {config_dir}/config.pbtxt")
+        print(f"Created {config_path}")
 
     source_path = os.environ.get("TRITON_GENSRCDIR", default="gen_srcdir")
     source_filename = os.path.join(source_path, RESNET50_LABEL_FILE)
 
-    shutil.copyfile(source_filename, f"{config_dir}/{label_filename}")
-    print(f"Created {config_dir}/{label_filename}")
+    target_path = os.path.join(config_dir, label_filename)
+
+    shutil.copyfile(source_filename, target_path)
+    print(f"Created {target_path}")
 
 
 def create_openvino_modelfile(
@@ -2352,6 +2639,8 @@ def create_fixed_models(
     if FLAGS.onnx:
         import onnx
     if FLAGS.libtorch or FLAGS.torch_aoti:
+        import shutil
+
         import torch
         from torch import nn
     if FLAGS.torchvision_aoti:
@@ -2747,7 +3036,14 @@ def create_fixed_models(
             for model_shape in [(-1,), (-1, -1), (-1, -1, -1)]:
                 emu.create_nop_modelconfig(FLAGS.models_dir, model_shape, model_dtype)
 
+    if FLAGS.torch_aoti:
+        print(
+            f"{_color_magenta}PyTorch: Complex AOTI model generation requested{_color_reset}"
+        )
+        if create_torch_aoti_complex_modelfile(FLAGS.models_dir):
+            create_torch_aoti_complex_modelconfig(FLAGS.models_dir)
+
     if FLAGS.torchvision_aoti:
         print(f"{_color_blue}TorchVision AOTI model generation requested{_color_reset}")
-        if create_torchvision_aoti_modelfile(FLAGS.models_dir, 1, 1):
+        if create_torchvision_aoti_modelfile(FLAGS.models_dir, 1):
             create_torchvision_aoti_modelconfig(FLAGS.models_dir, 1)

From 76f247c4e1e6a309447cb59f3afb6b857a0c069b Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Fri, 8 May 2026 12:32:37 -0400
Subject: [PATCH 2/7] Potential fix for pull request finding 'CodeQL / Unused
 import'

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
---
 qa/L0_torch_aoti/torch_aoti_infer_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py
index e04a0949b0..cd6691fe70 100755
--- a/qa/L0_torch_aoti/torch_aoti_infer_test.py
+++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py
@@ -32,7 +32,6 @@
 
 import unittest
 
-import numpy as np
 import test_util as tu
 import torch
 import tritonclient.http as http

From 726fdc31ea485a62b3c47e7248e62d3da0bf0c2e Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Tue, 5 May 2026 20:16:24 -0400
Subject: [PATCH 3/7] test: Add Torch AOTI Tests

This change:
- Creates a new L0_torch_aoti test suit.
- Adds complex Torch AOTI model generation to qa/common/gen_qa_models.py.
- Cleans up existion AOTI model generation in qa/common/gen_qa_models.py.
- Enabled torchvision AOTI model generation in qa/common/gen_qa_model_repository.
---
 qa/L0_torch_aoti/torch_aoti_infer_test.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py
index cd6691fe70..f7c75323d2 100755
--- a/qa/L0_torch_aoti/torch_aoti_infer_test.py
+++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py
@@ -270,8 +270,6 @@ def test_torchvision(self):
         input_data = self._get_torchvision_input_data(INPUT_SHAPE)
         input_data[0][0] = 1.0
 
-        start = time.time()
-
         with http.InferenceServerClient("localhost:8000") as client:
             inputs = [
                 http.InferInput("ARGS[0]", input_data.shape, "FP32"),
@@ -297,9 +295,6 @@ def test_torchvision(self):
             for data in output_data:
                 assert data.shape == OUTPUT_SHAPE
 
-        end = time.time()
-        assert (end - start) < 0.2, f"Inference time {end - start} time exceeds 200ms"
-
 
 if __name__ == "__main__":
     unittest.main()

From 676e7f9b4617f18599da8506bc4b58528ac2a326 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Mon, 11 May 2026 12:27:18 -0400
Subject: [PATCH 4/7] remove gitignore

---
 qa/L0_torch_aoti/.gitignore | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 qa/L0_torch_aoti/.gitignore

diff --git a/qa/L0_torch_aoti/.gitignore b/qa/L0_torch_aoti/.gitignore
deleted file mode 100644
index ffea82cd8f..0000000000
--- a/qa/L0_torch_aoti/.gitignore
+++ /dev/null
@@ -1,7 +0,0 @@
-models/
-
-*.log
-
-1
-2
-test_results
\ No newline at end of file

From 48a3375791b360e2301610cc521806d174dcef76 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Tue, 12 May 2026 16:19:10 -0400
Subject: [PATCH 5/7] adopt recommended changes from CoPilot

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
---
 qa/L0_torch_aoti/test.sh                  |  5 +-
 qa/L0_torch_aoti/torch_aoti_infer_test.py | 61 +++++++++--------------
 qa/common/gen_qa_models.py                |  3 +-
 3 files changed, 28 insertions(+), 41 deletions(-)

diff --git a/qa/L0_torch_aoti/test.sh b/qa/L0_torch_aoti/test.sh
index 67da22cd78..f37751c55e 100755
--- a/qa/L0_torch_aoti/test.sh
+++ b/qa/L0_torch_aoti/test.sh
@@ -47,8 +47,8 @@ if [[ "$#" -ge 1 ]]; then
     REPO_VERSION=$1
 fi
 if [[ -z "$REPO_VERSION" ]]; then
-    echo -e "${COLOR_ERROR}Repository version must be specified${COLOR_RESET}" &1>2
-    echo -e "${COLOR_ERROR}\n***\n*** Test Failed\n***${COLOR_RESET}" &1>2
+    echo -e "${COLOR_ERROR}Repository version must be specified${COLOR_RESET}" 1>&2
+    echo -e "${COLOR_ERROR}\n***\n*** Test Failed\n***${COLOR_RESET}" 1>&2
     exit 1
 fi
 if [[ ! -z "$TEST_REPO_ARCH" ]]; then
@@ -132,6 +132,7 @@ fi
 # Cleanup
 echo -e "${COLOR_DARK}Killing server (pid: ${SERVER_PID})${COLOR_RESET}"
 kill -s SIGINT ${SERVER_PID}
+wait ${SERVER_PID} || true
 echo -e "${COLOR_DARK}Removing model repository${COLOR_RESET}"
 for model in "${models[@]}"; do
     rm -rf ${MODELDIR}/${model}
diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py
index f7c75323d2..cabc6a305e 100755
--- a/qa/L0_torch_aoti/torch_aoti_infer_test.py
+++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py
@@ -102,8 +102,6 @@ def test_complex_index(self):
 
         input_data = self._get_complex_input_data(INPUT_SHAPE)
 
-        start = time.time()
-
         with http.InferenceServerClient("localhost:8000") as client:
             inputs = [
                 http.InferInput("INPUT__0", input_data[0].shape, "INT8"),
@@ -136,19 +134,16 @@ def test_complex_index(self):
             for output_name in output_names:
                 output_data.append(results.as_numpy(output_name))
 
-            assert len(outputs) == len(output_data)
+            self.assertEqual(len(outputs), len(output_data))
             for data in output_data:
-                assert data.shape == OUTPUT_SHAPE
-
-            assert (output_data[0] == (input_data[0] + input_data[1])).all()
-            assert (output_data[1] == input_data[0] - input_data[1]).all()
-            assert (output_data[2] == input_data[0]).all()
-            assert (output_data[3] == input_data[1]).all()
-            assert (output_data[4] == input_data[2]).all()
-            assert (output_data[5] == input_data[3]).all()
+                self.assertEqual(data.shape, OUTPUT_SHAPE)
 
-        end = time.time()
-        assert (end - start) < 0.0333, f"Inference time {end - start} time exceeds 33ms"
+            self.assertTrue((output_data[0] == (input_data[0] + input_data[1])).all())
+            self.assertTrue((output_data[1] == input_data[0] - input_data[1]).all())
+            self.assertTrue((output_data[2] == input_data[0]).all())
+            self.assertTrue((output_data[3] == input_data[1]).all())
+            self.assertTrue((output_data[4] == input_data[2]).all())
+            self.assertTrue((output_data[5] == input_data[3]).all())
 
     def test_complex_named(self):
         MODEL_NAME = "torch_aoti_complex_named"
@@ -157,8 +152,6 @@ def test_complex_named(self):
 
         input_data = self._get_complex_input_data(INPUT_SHAPE)
 
-        start = time.time()
-
         with http.InferenceServerClient("localhost:8000") as client:
             inputs = [
                 http.InferInput("ARGS[0]", input_data[0].shape, "INT8"),
@@ -191,19 +184,16 @@ def test_complex_named(self):
             for output_name in output_names:
                 output_data.append(results.as_numpy(output_name))
 
-            assert len(outputs) == len(output_data)
+            self.assertEqual(len(outputs), len(output_data))
             for data in output_data:
-                assert data.shape == OUTPUT_SHAPE
+                self.assertEqual(data.shape, OUTPUT_SHAPE)
 
-            assert (output_data[0] == (input_data[0] + input_data[1])).all()
-            assert (output_data[1] == input_data[0]).all()
-            assert (output_data[2] == input_data[1]).all()
-            assert (output_data[3] == input_data[2]).all()
-            assert (output_data[4] == input_data[3]).all()
-            assert (output_data[5] == (input_data[0] - input_data[1])).all()
-
-        end = time.time()
-        assert (end - start) < 0.0333, f"Inference time {end - start} time exceeds 33ms"
+            self.assertTrue((output_data[0] == (input_data[0] + input_data[1])).all())
+            self.assertTrue((output_data[1] == input_data[0]).all())
+            self.assertTrue((output_data[2] == input_data[1]).all())
+            self.assertTrue((output_data[3] == input_data[2]).all())
+            self.assertTrue((output_data[4] == input_data[3]).all())
+            self.assertTrue((output_data[5] == (input_data[0] - input_data[1])).all())
 
     def test_simple_model(self):
         io_types = [
@@ -225,8 +215,6 @@ def test_simple_model(self):
                 self._get_simple_input_data(INPUT_SHAPE, io_type),
             )
 
-            start = time.time()
-
             with http.InferenceServerClient("localhost:8000") as client:
                 inputs = [
                     http.InferInput("ARGS[0]", input_data[0].shape, TRITON_IO_TYPE),
@@ -252,15 +240,10 @@ def test_simple_model(self):
                 for output_name in output_names:
                     output_data.append(results.as_numpy(output_name))
 
-                assert len(outputs) == len(output_data)
+                self.assertEqual(len(outputs), len(output_data))
                 for data in output_data:
-                    assert data.shape == OUTPUT_SHAPE
-                    assert (data == input_data[0] + input_data[1]).all()
-
-            end = time.time()
-            assert (
-                end - start
-            ) < 0.0333, f"Inference time {end - start} time exceeds 33ms"
+                    self.assertEqual(data.shape, OUTPUT_SHAPE)
+                    self.assertTrue((data == input_data[0] + input_data[1]).all())
 
     def test_torchvision(self):
         MODEL_NAME = "torchvision_aoti"
@@ -291,9 +274,11 @@ def test_torchvision(self):
             for output_name in output_names:
                 output_data.append(results.as_numpy(output_name))
 
-            assert len(outputs) == len(output_data)
+            self.assertEqual(len(outputs), len(output_data))
             for data in output_data:
-                assert data.shape == OUTPUT_SHAPE
+                self.assertEqual(data.shape, OUTPUT_SHAPE)
+                output_tensor = torch.from_numpy(data)
+                self.assertTrue(torch.isfinite(output_tensor).all().item())
 
 
 if __name__ == "__main__":
diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
index cbfce101a6..7f2a92f160 100755
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -1630,7 +1630,7 @@ def create_torchvision_aoti_modelfile(
     model = model.to(device)
     model = model.eval()
 
-    SHAPE = (max_batch, 3, 244, 244)
+    SHAPE = (max_batch, 3, 224, 224)
 
     # Example input tensor with batch size 1 and 3 color channels (RGB), height and width of 224
     sample_inputs = (torch.zeros(SHAPE, dtype=torch.float32, device=device),)
@@ -3044,6 +3044,7 @@ def create_fixed_models(
             create_torch_aoti_complex_modelconfig(FLAGS.models_dir)
 
     if FLAGS.torchvision_aoti:
+        # TODO: Add support for variable batch size and version policy for torchvision AOTI models.
         print(f"{_color_blue}TorchVision AOTI model generation requested{_color_reset}")
         if create_torchvision_aoti_modelfile(FLAGS.models_dir, 1):
             create_torchvision_aoti_modelconfig(FLAGS.models_dir, 1)

From 4c94fa72db57206a2c387ebbfb75e46df2519db9 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Tue, 12 May 2026 18:09:28 -0400
Subject: [PATCH 6/7] Potential fix for pull request finding 'CodeQL / Unused
 import'

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
---
 qa/L0_torch_aoti/torch_aoti_infer_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py
index cabc6a305e..2b93f31a48 100755
--- a/qa/L0_torch_aoti/torch_aoti_infer_test.py
+++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py
@@ -26,7 +26,6 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import sys
-import time
 
 sys.path.append("../common")
 

From 891098d33f440539468841241d2e601a17dd33f3 Mon Sep 17 00:00:00 2001
From: J Wyman <jwyman@nvidia.com>
Date: Thu, 14 May 2026 13:54:34 -0400
Subject: [PATCH 7/7] adopt changes requested by @yingeeh.

---
 qa/common/gen_qa_models.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
index 7f2a92f160..0340265677 100755
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -1290,7 +1290,7 @@ def forward(self, INPUT0, INPUT1):
     traced.save(f"{model_version_dir}/model.pt")
 
 
-def generate_sample_inputs(
+def generate_torch_aoti_sample_inputs(
     input_shape,
     input_dtype,
     device,
@@ -1441,7 +1441,7 @@ def forward(self, INPUT0: torch.Tensor, INPUT1: torch.Tensor) -> torch.Tensor:
     model.to(device)
     model = model.eval()
 
-    sample_inputs = generate_sample_inputs(input_shape, input_dtype, device)
+    sample_inputs = generate_torch_aoti_sample_inputs(input_shape, input_dtype, device)
     package_path = os.path.join(model_version_dir, "model.pt2")
 
     try: