From 8b32cc1beb7b88846fcb1239765acab12ac2d373 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Tue, 5 May 2026 20:16:24 -0400 Subject: [PATCH 1/7] test: Add Torch AOTI Tests This change: - Creates a new L0_torch_aoti test suit. - Adds complex Torch AOTI model generation to qa/common/gen_qa_models.py. - Cleans up existion AOTI model generation in qa/common/gen_qa_models.py. - Enabled torchvision AOTI model generation in qa/common/gen_qa_model_repository. --- qa/L0_torch_aoti/.gitignore | 7 + qa/L0_torch_aoti/test.sh | 147 +++++++ qa/L0_torch_aoti/torch_aoti_infer_test.py | 306 ++++++++++++++ qa/common/gen_qa_model_repository | 2 +- qa/common/gen_qa_models.py | 466 ++++++++++++++++++---- 5 files changed, 842 insertions(+), 86 deletions(-) create mode 100644 qa/L0_torch_aoti/.gitignore create mode 100755 qa/L0_torch_aoti/test.sh create mode 100755 qa/L0_torch_aoti/torch_aoti_infer_test.py diff --git a/qa/L0_torch_aoti/.gitignore b/qa/L0_torch_aoti/.gitignore new file mode 100644 index 0000000000..ffea82cd8f --- /dev/null +++ b/qa/L0_torch_aoti/.gitignore @@ -0,0 +1,7 @@ +models/ + +*.log + +1 +2 +test_results \ No newline at end of file diff --git a/qa/L0_torch_aoti/test.sh b/qa/L0_torch_aoti/test.sh new file mode 100755 index 0000000000..67da22cd78 --- /dev/null +++ b/qa/L0_torch_aoti/test.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +source ../common/util.sh + +if [[ "${DEBUG}" == "true" ]]; then + set -x +else + set +x +fi + +COLOR_DARK="\033[90m" +COLOR_ERROR="\033[31m" +COLOR_INFO="\033[94m" +COLOR_RESET="\033[0m" +COLOR_STATUS="\033[36m" +COLOR_SUCCESS="\033[32m" +COLOR_WARNING="\033[33m" +RET=0 + +REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} +if [[ "$#" -ge 1 ]]; then + REPO_VERSION=$1 +fi +if [[ -z "$REPO_VERSION" ]]; then + echo -e "${COLOR_ERROR}Repository version must be specified${COLOR_RESET}" &1>2 + echo -e "${COLOR_ERROR}\n***\n*** Test Failed\n***${COLOR_RESET}" &1>2 + exit 1 +fi +if [[ ! -z "$TEST_REPO_ARCH" ]]; then + REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH} +fi + +export CUDA_VISIBLE_DEVICES=0 + +MODELDIR=${MODELDIR:=`pwd`/models} +DATADIR=${DATADIR:="/data/inferenceserver/${REPO_VERSION}"} +TRITON_DIR=${TRITON_DIR:="/opt/tritonserver"} +SERVER=${TRITON_DIR}/bin/tritonserver +BACKEND_DIR=${TRITON_DIR}/backends + +# PyTorch on SBSA requires libgomp to be loaded first. See the following +# GitHub issue for more information: +# https://github.com/pytorch/pytorch/issues/2575 +arch=`uname -m` +echo -e "${COLOR_DARK}Detected architecture: ${arch}${COLOR_RESET}" +if [[ "${arch}" == "aarch64" ]]; then + SERVER_LD_PRELOAD=/usr/lib/$(uname -m)-linux-gnu/libgomp.so.1 + echo -e "${COLOR_DARK}SERVER_LD_PRELOAD=${SERVER_LD_PRELOAD}${COLOR_RESET}" +fi + +# If BACKENDS not specified, set to all +BACKENDS=${BACKENDS:="pytorch"} +export BACKENDS + +# Copy the models into the model repository +echo -e "${COLOR_DARK}Setting up model repository in ${MODELDIR}${COLOR_RESET}" +rm -rf ${MODELDIR} && mkdir -p ${MODELDIR} +models=( + "torch_aoti_complex_index" + "torch_aoti_complex_named" + "torch_aoti_int8_int8" + "torch_aoti_int16_int16" + "torch_aoti_int32_int32" + "torch_aoti_int64_int64" + "torch_aoti_float16_float16" + "torch_aoti_float32_float32" + "torchvision_aoti" +) +for model in "${models[@]}"; do + cp -r ${DATADIR}/qa_model_repository/${model} ${MODELDIR}/${model} + echo -e "${COLOR_DARK}ls ${MODELDIR}/${model}${COLOR_RESET}" + ls -lha ${MODELDIR}/${model} +done +echo -e "${COLOR_DARK}ls ${MODELDIR}${COLOR_RESET}" +ls -lha ${MODELDIR} + +SERVER_ARGS="--model-repository=${MODELDIR} --log-verbose=1" +SERVER_LOG="./torch_aoti_complex_named-server.log" +CLIENT_LOG="./torch_aoti_complex_named-client.log" + +echo -e "${COLOR_DARK}Running ${SERVER} with model repository ${MODELDIR}${COLOR_RESET}" +run_server +if [[ "${SERVER_PID}" -eq 0 ]]; then + echo -e "${COLOR_ERROR}\n***\n*** Failed to start ${SERVER}\n***${COLOR_RESET}" &1>2 + cat ${SERVER_LOG} &1>2 + echo -e "\n" &1>2 + exit 1 +fi + +# Install torch framework +echo -e "${COLOR_DARK}Installing PyTorch framework required by tests${COLOR_RESET}" +pip install torch + +# Run the Tests +TEST_NAME="torch_aoti_infer_test" +python3 ./${TEST_NAME}.py >> ${CLIENT_LOG} 2>&1 +EXIT_CODE=$? +if [[ ${EXIT_CODE} -ne 0 ]]; then + echo -e "${COLOR_ERROR}\n***\n*** Test '${TEST_NAME}' Failed with exit code ${EXIT_CODE}\n***${COLOR_RESET}" &1>2 + cat ${CLIENT_LOG} &1>2 + echo -e "\n" &1>2 + RET=1 +else + echo -e "${COLOR_INFO}\n***\n*** Test '${TEST_NAME}' Passed\n***${COLOR_RESET}" +fi + +# Cleanup +echo -e "${COLOR_DARK}Killing server (pid: ${SERVER_PID})${COLOR_RESET}" +kill -s SIGINT ${SERVER_PID} +echo -e "${COLOR_DARK}Removing model repository${COLOR_RESET}" +for model in "${models[@]}"; do + rm -rf ${MODELDIR}/${model} +done + +# Report results and exit. +if [[ ${RET} -ne 0 ]]; then + echo -e "${COLOR_ERROR}\n***\n*** Test Suite FAILED\n***${COLOR_RESET}" &1>2 +else + echo -e "${COLOR_SUCCESS}\n***\n*** Test Suite PASSED\n***${COLOR_RESET}" +fi + +exit ${RET} diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py new file mode 100755 index 0000000000..e04a0949b0 --- /dev/null +++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py @@ -0,0 +1,306 @@ +#!/usr/bin/python +# Copyright 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sys +import time + +sys.path.append("../common") + +import unittest + +import numpy as np +import test_util as tu +import torch +import tritonclient.http as http + + +class TorchAotiTest(tu.TestResultCollector): + def _get_complex_input_shape(self): + return (1, 16) + + def _get_complex_output_shape(self): + return (1, 16) + + def _get_complex_input_data(self, shape): + return [ + torch.randint(low=0, high=127, size=shape, dtype=torch.int8).numpy(), + torch.randint(low=0, high=127, size=shape, dtype=torch.int8).numpy(), + torch.randint(low=0, high=127, size=shape, dtype=torch.int8).numpy(), + torch.randint(low=0, high=127, size=shape, dtype=torch.int8).numpy(), + ] + + def _get_simple_input_data(self, shape, io_type): + if io_type in [torch.int8, torch.int16, torch.int32, torch.int64]: + return torch.randint(low=0, high=127, size=shape, dtype=io_type).numpy() + elif io_type in [torch.float16, torch.float32, torch.float64]: + return torch.randn(size=shape, dtype=io_type).numpy() + else: + raise ValueError(f"Unsupported data type: {io_type}") + + def _get_torchvision_input_data(self, shape): + return torch.randn(size=shape, dtype=torch.float32).numpy() + + def _dtype_to_triton_dtype(self, dtype): + if dtype == torch.int8: + return "INT8" + elif dtype == torch.int16: + return "INT16" + elif dtype == torch.int32: + return "INT32" + elif dtype == torch.int64: + return "INT64" + elif dtype == torch.float16: + return "FP16" + elif dtype == torch.float32: + return "FP32" + else: + raise ValueError(f"Unsupported data type: {dtype}") + + def _get_simple_model_name(self, io_type): + if io_type == torch.int8: + return "torch_aoti_int8_int8" + elif io_type == torch.int16: + return "torch_aoti_int16_int16" + elif io_type == torch.int32: + return "torch_aoti_int32_int32" + elif io_type == torch.int64: + return "torch_aoti_int64_int64" + elif io_type == torch.float16: + return "torch_aoti_float16_float16" + elif io_type == torch.float32: + return "torch_aoti_float32_float32" + else: + raise ValueError(f"Unsupported data type: {io_type}") + + def test_complex_index(self): + MODEL_NAME = "torch_aoti_complex_index" + INPUT_SHAPE = self._get_complex_input_shape() + OUTPUT_SHAPE = self._get_complex_output_shape() + + input_data = self._get_complex_input_data(INPUT_SHAPE) + + start = time.time() + + with http.InferenceServerClient("localhost:8000") as client: + inputs = [ + http.InferInput("INPUT__0", input_data[0].shape, "INT8"), + http.InferInput("INPUT__1", input_data[1].shape, "INT8"), + http.InferInput("INPUT__2", input_data[2].shape, "INT8"), + http.InferInput("INPUT__3", input_data[3].shape, "INT8"), + ] + + inputs[0].set_data_from_numpy(input_data[0], binary_data=True) + inputs[1].set_data_from_numpy(input_data[1], binary_data=True) + inputs[2].set_data_from_numpy(input_data[2], binary_data=True) + inputs[3].set_data_from_numpy(input_data[3], binary_data=True) + + output_names = [ + "OUTPUT__0", + "OUTPUT__1", + "OUTPUT__2", + "OUTPUT__3", + "OUTPUT__4", + "OUTPUT__5", + ] + + outputs = [] + for output_name in output_names: + outputs.append(http.InferRequestedOutput(output_name, binary_data=True)) + + output_data = [] + results = client.infer(MODEL_NAME, inputs, outputs=outputs) + + for output_name in output_names: + output_data.append(results.as_numpy(output_name)) + + assert len(outputs) == len(output_data) + for data in output_data: + assert data.shape == OUTPUT_SHAPE + + assert (output_data[0] == (input_data[0] + input_data[1])).all() + assert (output_data[1] == input_data[0] - input_data[1]).all() + assert (output_data[2] == input_data[0]).all() + assert (output_data[3] == input_data[1]).all() + assert (output_data[4] == input_data[2]).all() + assert (output_data[5] == input_data[3]).all() + + end = time.time() + assert (end - start) < 0.0333, f"Inference time {end - start} time exceeds 33ms" + + def test_complex_named(self): + MODEL_NAME = "torch_aoti_complex_named" + INPUT_SHAPE = self._get_complex_input_shape() + OUTPUT_SHAPE = self._get_complex_output_shape() + + input_data = self._get_complex_input_data(INPUT_SHAPE) + + start = time.time() + + with http.InferenceServerClient("localhost:8000") as client: + inputs = [ + http.InferInput("ARGS[0]", input_data[0].shape, "INT8"), + http.InferInput("ARGS[1]", input_data[1].shape, "INT8"), + http.InferInput("ARGS[2][option1]", input_data[2].shape, "INT8"), + http.InferInput("ARGS[2][option2]", input_data[3].shape, "INT8"), + ] + + inputs[0].set_data_from_numpy(input_data[0], binary_data=True) + inputs[1].set_data_from_numpy(input_data[1], binary_data=True) + inputs[2].set_data_from_numpy(input_data[2], binary_data=True) + inputs[3].set_data_from_numpy(input_data[3], binary_data=True) + + output_names = [ + "RESULT[AAA]", + "RESULT[BBB][0]", + "RESULT[BBB][1]", + "RESULT[CCC][option1]", + "RESULT[CCC][option2]", + "RESULT[ZZZ]", + ] + + outputs = [] + for output_name in output_names: + outputs.append(http.InferRequestedOutput(output_name, binary_data=True)) + + output_data = [] + results = client.infer(MODEL_NAME, inputs, outputs=outputs) + + for output_name in output_names: + output_data.append(results.as_numpy(output_name)) + + assert len(outputs) == len(output_data) + for data in output_data: + assert data.shape == OUTPUT_SHAPE + + assert (output_data[0] == (input_data[0] + input_data[1])).all() + assert (output_data[1] == input_data[0]).all() + assert (output_data[2] == input_data[1]).all() + assert (output_data[3] == input_data[2]).all() + assert (output_data[4] == input_data[3]).all() + assert (output_data[5] == (input_data[0] - input_data[1])).all() + + end = time.time() + assert (end - start) < 0.0333, f"Inference time {end - start} time exceeds 33ms" + + def test_simple_model(self): + io_types = [ + torch.int8, + torch.int16, + torch.int32, + torch.int64, + torch.float16, + torch.float32, + ] + for io_type in io_types: + MODEL_NAME = self._get_simple_model_name(io_type) + INPUT_SHAPE = (16,) + OUTPUT_SHAPE = (16,) + TRITON_IO_TYPE = self._dtype_to_triton_dtype(io_type) + + input_data = ( + self._get_simple_input_data(INPUT_SHAPE, io_type), + self._get_simple_input_data(INPUT_SHAPE, io_type), + ) + + start = time.time() + + with http.InferenceServerClient("localhost:8000") as client: + inputs = [ + http.InferInput("ARGS[0]", input_data[0].shape, TRITON_IO_TYPE), + http.InferInput("ARGS[1]", input_data[1].shape, TRITON_IO_TYPE), + ] + + inputs[0].set_data_from_numpy(input_data[0], binary_data=True) + inputs[1].set_data_from_numpy(input_data[1], binary_data=True) + + output_names = [ + "RESULT", + ] + + outputs = [] + for output_name in output_names: + outputs.append( + http.InferRequestedOutput(output_name, binary_data=True) + ) + + output_data = [] + results = client.infer(MODEL_NAME, inputs, outputs=outputs) + + for output_name in output_names: + output_data.append(results.as_numpy(output_name)) + + assert len(outputs) == len(output_data) + for data in output_data: + assert data.shape == OUTPUT_SHAPE + assert (data == input_data[0] + input_data[1]).all() + + end = time.time() + assert ( + end - start + ) < 0.0333, f"Inference time {end - start} time exceeds 33ms" + + def test_torchvision(self): + MODEL_NAME = "torchvision_aoti" + INPUT_SHAPE = (1, 3, 224, 224) + OUTPUT_SHAPE = (1, 1000) + + input_data = self._get_torchvision_input_data(INPUT_SHAPE) + input_data[0][0] = 1.0 + + start = time.time() + + with http.InferenceServerClient("localhost:8000") as client: + inputs = [ + http.InferInput("ARGS[0]", input_data.shape, "FP32"), + ] + + inputs[0].set_data_from_numpy(input_data, binary_data=True) + + output_names = [ + "RESULT", + ] + + outputs = [] + for output_name in output_names: + outputs.append(http.InferRequestedOutput(output_name, binary_data=True)) + + output_data = [] + results = client.infer(MODEL_NAME, inputs, outputs=outputs) + + for output_name in output_names: + output_data.append(results.as_numpy(output_name)) + + assert len(outputs) == len(output_data) + for data in output_data: + assert data.shape == OUTPUT_SHAPE + + end = time.time() + assert (end - start) < 0.2, f"Inference time {end - start} time exceeds 200ms" + + +if __name__ == "__main__": + unittest.main() diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 328f42bbe0..d6ed5738f8 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -263,9 +263,9 @@ set -e PATH=$PATH:/usr/local/cuda-13.0/bin python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --models_dir=$TRITON_MDLS_QA_MODEL python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --models_dir=$TRITON_MDLS_QA_MODEL +python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torchvision-aoti --models_dir=$TRITON_MDLS_QA_MODEL chmod -R 777 $TRITON_MDLS_QA_MODEL python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --libtorch --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL -python3 $TRITON_MDLS_SRC_DIR/gen_qa_models.py --torch-aoti --variable --models_dir=$TRITON_MDLS_QA_VARIABLE_MODEL chmod -R 777 $TRITON_MDLS_QA_VARIABLE_MODEL python3 $TRITON_MDLS_SRC_DIR/gen_qa_identity_models.py --libtorch --models_dir=$TRITON_MDLS_QA_IDENTITY_MODEL chmod -R 777 $TRITON_MDLS_QA_IDENTITY_MODEL diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index d509562bff..cbfce101a6 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -47,6 +47,7 @@ from typing import List, Tuple _color_blue = "\033[94m" +_color_cyan = "\033[36m" _color_green = "\033[32m" _color_magenta = "\033[35m" _color_red = "\033[31m" @@ -1298,69 +1299,44 @@ def generate_sample_inputs( input_shape = [abs(ips) for ips in input_shape] if input_dtype == np.int8: - input0 = torch.randint(-128, 127, input_shape, dtype=torch.int8, device=device) - input1 = torch.randint(-128, 127, input_shape, dtype=torch.int8, device=device) + input0 = torch.zeros(input_shape, dtype=torch.int8, device=device) + input1 = torch.zeros(input_shape, dtype=torch.int8, device=device) elif input_dtype == np.int16: - input0 = torch.randint( - -32768, 32767, input_shape, dtype=torch.int16, device=device - ) - input1 = torch.randint( - -32768, 32767, input_shape, dtype=torch.int16, device=device - ) + input0 = torch.zeros(input_shape, dtype=torch.int16, device=device) + input1 = torch.zeros(input_shape, dtype=torch.int16, device=device) elif input_dtype == np.int32: - input0 = torch.randint( - -2147483648, 2147483647, input_shape, dtype=torch.int32, device=device - ) - input1 = torch.randint( - -2147483648, 2147483647, input_shape, dtype=torch.int32, device=device - ) + input0 = torch.zeros(input_shape, dtype=torch.int32, device=device) + input1 = torch.zeros(input_shape, dtype=torch.int32, device=device) elif input_dtype == np.int64: - input0 = torch.randint( - -9223372036854775808, - 9223372036854775807, - input_shape, - dtype=torch.int64, - device=device, - ) - input1 = torch.randint( - -9223372036854775808, - 9223372036854775807, - input_shape, - dtype=torch.int64, - device=device, - ) + input0 = torch.zeros(input_shape, dtype=torch.int64, device=device) + input1 = torch.zeros(input_shape, dtype=torch.int64, device=device) elif input_dtype == np.float16: - input0 = torch.randn(*input_shape, dtype=torch.float16, device=device) - input1 = torch.randn(*input_shape, dtype=torch.float16, device=device) + input0 = torch.zeros(input_shape, dtype=torch.float16, device=device) + input1 = torch.zeros(input_shape, dtype=torch.float16, device=device) elif input_dtype == np.float32: - input0 = torch.randn(*input_shape, dtype=torch.float32, device=device) - input1 = torch.randn(*input_shape, dtype=torch.float32, device=device) + input0 = torch.zeros(input_shape, dtype=torch.float32, device=device) + input1 = torch.zeros(input_shape, dtype=torch.float32, device=device) elif input_dtype == np.float64: - input0 = torch.randn(*input_shape, dtype=torch.float64, device=device) - input1 = torch.randn(*input_shape, dtype=torch.float64, device=device) + input0 = torch.zeros(input_shape, dtype=torch.float64, device=device) + input1 = torch.zeros(input_shape, dtype=torch.float64, device=device) elif input_dtype == np.uint8: - input0 = torch.randint(0, 255, input_shape, dtype=torch.uint8, device=device) - input1 = torch.randint(0, 255, input_shape, dtype=torch.uint8, device=device) + input0 = torch.zeros(input_shape, dtype=torch.uint8, device=device) + input1 = torch.zeros(input_shape, dtype=torch.uint8, device=device) elif input_dtype == np.uint16: - input0 = torch.randint(0, 65535, input_shape, dtype=torch.uint16, device=device) - input1 = torch.randint(0, 65535, input_shape, dtype=torch.uint16, device=device) + input0 = torch.zeros(input_shape, dtype=torch.uint16, device=device) + input1 = torch.zeros(input_shape, dtype=torch.uint16, device=device) elif input_dtype == np.uint32: - input0 = torch.randint( - 0, 4294967295, input_shape, dtype=torch.uint32, device=device - ) - input1 = torch.randint( - 0, 4294967295, input_shape, dtype=torch.uint32, device=device - ) + input0 = torch.zeros(input_shape, dtype=torch.uint32, device=device) + input1 = torch.zeros(input_shape, dtype=torch.uint32, device=device) elif input_dtype == np.uint64: - input0 = torch.randint( - 0, 18446744073709551615, input_shape, dtype=torch.uint64, device=device - ) - input1 = torch.randint( - 0, 18446744073709551615, input_shape, dtype=torch.uint64, device=device - ) + input0 = torch.zeros(input_shape, dtype=torch.uint64, device=device) + input1 = torch.zeros(input_shape, dtype=torch.uint64, device=device) else: - input0 = torch.randn(*input_shape, device=device) - input1 = torch.randn(*input_shape, device=device) + print( + f"{_color_yellow}warning: dtype {input_dtype} is unsupported; falling back to torch.int32{_color_reset}" + ) + input0 = torch.zeros(input_shape, dtype=torch.int32, device=device) + input1 = torch.zeros(input_shape, dtype=torch.int32, device=device) return (input0, input1) @@ -1418,7 +1394,7 @@ def create_torch_aoti_modelfile( ) return False - model_version_dir = f"{models_dir}/{model_name}/{model_version}" + model_version_dir = os.path.join(models_dir, model_name, str(model_version)) print(f"{_color_green}Creating model {model_name}{_color_reset}") @@ -1465,13 +1441,14 @@ def forward(self, INPUT0: torch.Tensor, INPUT1: torch.Tensor) -> torch.Tensor: model.to(device) model = model.eval() - sample_input = generate_sample_inputs(input_shape, input_dtype, device) + sample_inputs = generate_sample_inputs(input_shape, input_dtype, device) + package_path = os.path.join(model_version_dir, "model.pt2") try: - ep = torch.export.export(model, sample_input) + exported_model = torch.export.export(model, sample_inputs) torch._inductor.aoti_compile_and_package( - ep, - package_path=f"{model_version_dir}/model.pt2", + exported_model, + package_path=package_path, ) except Exception as e: print( @@ -1484,13 +1461,162 @@ def forward(self, INPUT0: torch.Tensor, INPUT1: torch.Tensor) -> torch.Tensor: return True +def create_torch_aoti_complex_modelfile( + models_dir: str, +): + base_name = "torch_aoti_complex" + model_names = [ + f"{base_name}_named", + f"{base_name}_index", + ] + model_version_dirs = [ + os.path.join(models_dir, model_names[0], "1"), + os.path.join(models_dir, model_names[1], "1"), + ] + + for model_version_dir in model_version_dirs: + try: + os.makedirs(model_version_dir) + except OSError: + pass # ignore existing dir + + print(f"{_color_green}Creating model {base_name}{_color_reset}") + + class TorchAotiComplex(torch.nn.Module): + def __init__(self): + super().__init__() + + def forward( + self, + hdata: torch.Tensor, + vdata: torch.Tensor, + options: dict[str, torch.Tensor], + ) -> dict[ + str, + torch.Tensor | tuple[torch.Tensor, torch.Tensor] | dict[str, torch.Tensor], + ]: + out = { + "AAA": hdata + vdata, + "ZZZ": hdata - vdata, + "BBB": ( + hdata, + vdata, + ), + "CCC": options, + } + + return out + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + model = TorchAotiComplex() + model.to(device) + model = model.eval() + + SHAPE = (1, 16) + + sample_args = ( + torch.zeros(SHAPE, dtype=torch.int8, device=device), + torch.zeros(SHAPE, dtype=torch.int8, device=device), + { + "option1": torch.zeros(SHAPE, dtype=torch.int8, device=device), + "option2": torch.zeros(SHAPE, dtype=torch.int8, device=device), + }, + ) + + # Export and package the model + print(f"{_color_green}Exporting and packaging the model...{_color_reset}") + + model_file_name = "model.pt2" + package_paths = [ + os.path.join(model_version_dirs[0], model_file_name), + os.path.join(model_version_dirs[1], model_file_name), + ] + + try: + exported_model = torch.export.export(model, sample_args) + torch._inductor.aoti_compile_and_package( + exported_model, + package_path=package_paths[0], + ) + except Exception as e: + print( + f"{_color_red}error: Failed to create model {base_name}{_color_reset}", + file=sys.stderr, + ) + print(f"\n{_color_red}{e}{_color_reset}\n", file=sys.stderr) + return False + + try: + # Now load and run the packaged model + print(f"{_color_cyan}Loading and running the packaged model...{_color_reset}") + + compiled_model = torch._inductor.aoti_load_package(package_paths[0]) + + print(f"{_color_cyan}Compiled model call spec:{_color_reset}") + + for elem in compiled_model.loader.get_call_spec(): + print(elem) + + print(f"{_color_cyan}Running the compiled model...{_color_reset}") + + with torch.inference_mode(): + hdata = torch.randint( + low=0, + high=127, + size=SHAPE, + dtype=torch.int8, + device=device, + ) + vdata = torch.randint( + low=0, + high=127, + size=SHAPE, + dtype=torch.int8, + device=device, + ) + options = { + "option1": torch.randint( + low=0, + high=127, + size=SHAPE, + dtype=torch.int8, + device=device, + ), + "option2": torch.randint( + low=0, + high=127, + size=SHAPE, + dtype=torch.int8, + device=device, + ), + } + + _ = compiled_model(hdata, vdata, options) + + print( + f'{_color_green}Model "{base_name}" successfully executed.{_color_reset}' + ) + except Exception as e: + print( + f"{_color_red}error: Failed to validate model {base_name}{_color_reset}", + file=sys.stderr, + ) + print(f"\n{_color_red}{e}{_color_reset}\n", file=sys.stderr) + return False + + # Copy the compiled model package to the alternate model folder. + # Both the named and ordinal addressing versions of the model (from Triton's point-of-view) use the same compiled model. + shutil.copy(package_paths[0], package_paths[1]) + + return True + + def create_torchvision_aoti_modelfile( models_dir: str, max_batch: int, - model_version: int, ): model_name = "torchvision_aoti" - model_version_dir = f"{models_dir}/{model_name}/{model_version}" + model_version_dir = os.path.join(models_dir, model_name, "1") try: os.makedirs(model_version_dir) @@ -1504,16 +1630,16 @@ def create_torchvision_aoti_modelfile( model = model.to(device) model = model.eval() + SHAPE = (max_batch, 3, 244, 244) + # Example input tensor with batch size 1 and 3 color channels (RGB), height and width of 224 - input_tensor = torch.randn(max_batch, 3, 224, 224, device=device) + sample_inputs = (torch.zeros(SHAPE, dtype=torch.float32, device=device),) - try: - ep = torch.export.export(model, (input_tensor,)) + package_path = os.path.join(model_version_dir, "model.pt2") - torch._inductor.aoti_compile_and_package( - ep, - package_path=f"{model_version_dir}/model.pt2", - ) + try: + ep = torch.export.export(model, sample_inputs) + torch._inductor.aoti_compile_and_package(ep, package_path=package_path) except Exception as e: print( f"{_color_red}error: Failed to create model {model_name}{_color_reset}", @@ -1609,9 +1735,11 @@ def create_libtorch_modelconfig( except OSError: pass # ignore existing dir - with open(f"{config_dir}/config.pbtxt", "w") as file: + config_path = os.path.join(config_dir, "config.pbtxt") + + with open(config_path, "w") as file: file.write(config) - print(f"Created {config_dir}/config.pbtxt") + print(f"Created {config_path}") with open(f"{config_dir}/{label_filename}", "w") as file: for l in range(output0_label_cnt): @@ -1650,7 +1778,7 @@ def create_torch_aoti_modelconfig( print(f"{_color_green}Creating config for {model_name}{_color_reset}") label_filename = "output_labels.txt" - config_dir = f"{models_dir}/{model_name}" + config_dir = os.path.join(models_dir, model_name) config = f""" backend: "pytorch" name: "{model_name}" @@ -1658,19 +1786,19 @@ def create_torch_aoti_modelconfig( version_policy: {version_policy_str} input [ {{ - name: "INPUT0" + name: "ARGS[0]" data_type: {np_to_model_dtype(input_dtype)} dims: [ {tu.shape_to_dims_str(input_shape)} ] }}, {{ - name: "INPUT1" + name: "ARGS[1]" data_type: {np_to_model_dtype(input_dtype)} dims: [ {tu.shape_to_dims_str(input_shape)} ] }} ] output [ {{ - name: "OUTPUT__0" + name: "RESULT" data_type: {np_to_model_dtype(output_dtype)} dims: [ {tu.shape_to_dims_str(output_shape)} ] label_filename: "{label_filename}" @@ -1684,14 +1812,170 @@ def create_torch_aoti_modelconfig( except OSError: pass # ignore existing dir - with open(f"{config_dir}/config.pbtxt", "w") as file: + config_path = os.path.join(config_dir, "config.pbtxt") + + with open(config_path, "w") as file: file.write(config) - print(f"Created {config_dir}/config.pbtxt") + print(f"Created {config_path}") - with open(f"{config_dir}/{label_filename}", "w") as file: + label_path = os.path.join(config_dir, label_filename) + + with open(label_path, "w") as file: for l in range(output_label_cnt): file.write(f"label{l}\n") - print(f"Created {config_dir}/{label_filename}") + print(f"Created {label_path}") + + +def create_torch_aoti_complex_modelconfig( + models_dir, +): + base_name = "torch_aoti_complex" + model_names = [ + f"{base_name}_named", + f"{base_name}_index", + ] + + print(f"{_color_green}Creating config for {base_name}{_color_reset}") + + config_dirs = [ + os.path.join(models_dir, model_names[0]), + os.path.join(models_dir, model_names[1]), + ] + configs = [ + f""" +backend: "pytorch" +platform: "torch_aoti" +name: "{model_names[0]}" +input: [ + {{ + name: "ARGS[0]" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "ARGS[1]" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "ARGS[2][option1]" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "ARGS[2][option2]" + data_type: TYPE_INT8 + dims: [1, 16] + }} +] +output: [ + {{ + name: "RESULT[AAA]" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "RESULT[BBB][0]" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "RESULT[BBB][1]" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "RESULT[CCC][option1]" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "RESULT[CCC][option2]" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "RESULT[ZZZ]" + data_type: TYPE_INT8 + dims: [1, 16] + }} +] +instance_group [{{ kind: {"KIND_GPU" if torch.cuda.is_available() else "KIND_CPU"} }}] +""", + f""" +backend: "pytorch" +name: "{model_names[1]}" +platform: "torch_aoti" +input: [ + {{ + name: "INPUT__0" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "INPUT__1" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "INPUT__2" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "INPUT__3" + data_type: TYPE_INT8 + dims: [1, 16] + }} +] +output: [ + {{ + name: "OUTPUT__0" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "OUTPUT__1" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "OUTPUT__2" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "OUTPUT__3" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "OUTPUT__4" + data_type: TYPE_INT8 + dims: [1, 16] + }}, + {{ + name: "OUTPUT__5" + data_type: TYPE_INT8 + dims: [1, 16] + }} +] +instance_group [{{ kind: {"KIND_GPU" if torch.cuda.is_available() else "KIND_CPU"} }}] +""", + ] + + for i in range(2): + config_dir = config_dirs[i] + try: + os.makedirs(config_dir) + except OSError: + pass # ignore existing dir + + config_path = os.path.join(config_dir, "config.pbtxt") + + with open(config_path, "w") as file: + file.write(configs[i]) + print(f"Created {config_path}") def create_torchvision_aoti_modelconfig( @@ -1703,7 +1987,7 @@ def create_torchvision_aoti_modelconfig( print(f"{_color_green}Creating config for {model_name}{_color_reset}") - config_dir = f"{models_dir}/{model_name}" + config_dir = os.path.join(models_dir, model_name) config = f""" backend: "pytorch" name: "{model_name}" @@ -1711,14 +1995,13 @@ def create_torchvision_aoti_modelconfig( max_batch_size: {max_batch} input [ {{ - name: "INPUT__0" + name: "ARGS[0]" data_type: TYPE_FP32 - format: FORMAT_NCHW dims: [ 3, 224, 224 ] }}] output [ {{ - name: "OUTPUT__0" + name: "RESULT" data_type: TYPE_FP32 dims: [ 1000 ] label_filename: "{label_filename}" @@ -1732,15 +2015,19 @@ def create_torchvision_aoti_modelconfig( except OSError: pass # ignore existing dir - with open(f"{config_dir}/config.pbtxt", "w") as file: + config_path = os.path.join(config_dir, "config.pbtxt") + + with open(config_path, "w") as file: file.write(config) - print(f"Created {config_dir}/config.pbtxt") + print(f"Created {config_path}") source_path = os.environ.get("TRITON_GENSRCDIR", default="gen_srcdir") source_filename = os.path.join(source_path, RESNET50_LABEL_FILE) - shutil.copyfile(source_filename, f"{config_dir}/{label_filename}") - print(f"Created {config_dir}/{label_filename}") + target_path = os.path.join(config_dir, label_filename) + + shutil.copyfile(source_filename, target_path) + print(f"Created {target_path}") def create_openvino_modelfile( @@ -2352,6 +2639,8 @@ def create_fixed_models( if FLAGS.onnx: import onnx if FLAGS.libtorch or FLAGS.torch_aoti: + import shutil + import torch from torch import nn if FLAGS.torchvision_aoti: @@ -2747,7 +3036,14 @@ def create_fixed_models( for model_shape in [(-1,), (-1, -1), (-1, -1, -1)]: emu.create_nop_modelconfig(FLAGS.models_dir, model_shape, model_dtype) + if FLAGS.torch_aoti: + print( + f"{_color_magenta}PyTorch: Complex AOTI model generation requested{_color_reset}" + ) + if create_torch_aoti_complex_modelfile(FLAGS.models_dir): + create_torch_aoti_complex_modelconfig(FLAGS.models_dir) + if FLAGS.torchvision_aoti: print(f"{_color_blue}TorchVision AOTI model generation requested{_color_reset}") - if create_torchvision_aoti_modelfile(FLAGS.models_dir, 1, 1): + if create_torchvision_aoti_modelfile(FLAGS.models_dir, 1): create_torchvision_aoti_modelconfig(FLAGS.models_dir, 1) From 76f247c4e1e6a309447cb59f3afb6b857a0c069b Mon Sep 17 00:00:00 2001 From: J Wyman Date: Fri, 8 May 2026 12:32:37 -0400 Subject: [PATCH 2/7] Potential fix for pull request finding 'CodeQL / Unused import' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- qa/L0_torch_aoti/torch_aoti_infer_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py index e04a0949b0..cd6691fe70 100755 --- a/qa/L0_torch_aoti/torch_aoti_infer_test.py +++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py @@ -32,7 +32,6 @@ import unittest -import numpy as np import test_util as tu import torch import tritonclient.http as http From 726fdc31ea485a62b3c47e7248e62d3da0bf0c2e Mon Sep 17 00:00:00 2001 From: J Wyman Date: Tue, 5 May 2026 20:16:24 -0400 Subject: [PATCH 3/7] test: Add Torch AOTI Tests This change: - Creates a new L0_torch_aoti test suit. - Adds complex Torch AOTI model generation to qa/common/gen_qa_models.py. - Cleans up existion AOTI model generation in qa/common/gen_qa_models.py. - Enabled torchvision AOTI model generation in qa/common/gen_qa_model_repository. --- qa/L0_torch_aoti/torch_aoti_infer_test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py index cd6691fe70..f7c75323d2 100755 --- a/qa/L0_torch_aoti/torch_aoti_infer_test.py +++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py @@ -270,8 +270,6 @@ def test_torchvision(self): input_data = self._get_torchvision_input_data(INPUT_SHAPE) input_data[0][0] = 1.0 - start = time.time() - with http.InferenceServerClient("localhost:8000") as client: inputs = [ http.InferInput("ARGS[0]", input_data.shape, "FP32"), @@ -297,9 +295,6 @@ def test_torchvision(self): for data in output_data: assert data.shape == OUTPUT_SHAPE - end = time.time() - assert (end - start) < 0.2, f"Inference time {end - start} time exceeds 200ms" - if __name__ == "__main__": unittest.main() From 676e7f9b4617f18599da8506bc4b58528ac2a326 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Mon, 11 May 2026 12:27:18 -0400 Subject: [PATCH 4/7] remove gitignore --- qa/L0_torch_aoti/.gitignore | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 qa/L0_torch_aoti/.gitignore diff --git a/qa/L0_torch_aoti/.gitignore b/qa/L0_torch_aoti/.gitignore deleted file mode 100644 index ffea82cd8f..0000000000 --- a/qa/L0_torch_aoti/.gitignore +++ /dev/null @@ -1,7 +0,0 @@ -models/ - -*.log - -1 -2 -test_results \ No newline at end of file From 48a3375791b360e2301610cc521806d174dcef76 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Tue, 12 May 2026 16:19:10 -0400 Subject: [PATCH 5/7] adopt recommended changes from CoPilot Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- qa/L0_torch_aoti/test.sh | 5 +- qa/L0_torch_aoti/torch_aoti_infer_test.py | 61 +++++++++-------------- qa/common/gen_qa_models.py | 3 +- 3 files changed, 28 insertions(+), 41 deletions(-) diff --git a/qa/L0_torch_aoti/test.sh b/qa/L0_torch_aoti/test.sh index 67da22cd78..f37751c55e 100755 --- a/qa/L0_torch_aoti/test.sh +++ b/qa/L0_torch_aoti/test.sh @@ -47,8 +47,8 @@ if [[ "$#" -ge 1 ]]; then REPO_VERSION=$1 fi if [[ -z "$REPO_VERSION" ]]; then - echo -e "${COLOR_ERROR}Repository version must be specified${COLOR_RESET}" &1>2 - echo -e "${COLOR_ERROR}\n***\n*** Test Failed\n***${COLOR_RESET}" &1>2 + echo -e "${COLOR_ERROR}Repository version must be specified${COLOR_RESET}" 1>&2 + echo -e "${COLOR_ERROR}\n***\n*** Test Failed\n***${COLOR_RESET}" 1>&2 exit 1 fi if [[ ! -z "$TEST_REPO_ARCH" ]]; then @@ -132,6 +132,7 @@ fi # Cleanup echo -e "${COLOR_DARK}Killing server (pid: ${SERVER_PID})${COLOR_RESET}" kill -s SIGINT ${SERVER_PID} +wait ${SERVER_PID} || true echo -e "${COLOR_DARK}Removing model repository${COLOR_RESET}" for model in "${models[@]}"; do rm -rf ${MODELDIR}/${model} diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py index f7c75323d2..cabc6a305e 100755 --- a/qa/L0_torch_aoti/torch_aoti_infer_test.py +++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py @@ -102,8 +102,6 @@ def test_complex_index(self): input_data = self._get_complex_input_data(INPUT_SHAPE) - start = time.time() - with http.InferenceServerClient("localhost:8000") as client: inputs = [ http.InferInput("INPUT__0", input_data[0].shape, "INT8"), @@ -136,19 +134,16 @@ def test_complex_index(self): for output_name in output_names: output_data.append(results.as_numpy(output_name)) - assert len(outputs) == len(output_data) + self.assertEqual(len(outputs), len(output_data)) for data in output_data: - assert data.shape == OUTPUT_SHAPE - - assert (output_data[0] == (input_data[0] + input_data[1])).all() - assert (output_data[1] == input_data[0] - input_data[1]).all() - assert (output_data[2] == input_data[0]).all() - assert (output_data[3] == input_data[1]).all() - assert (output_data[4] == input_data[2]).all() - assert (output_data[5] == input_data[3]).all() + self.assertEqual(data.shape, OUTPUT_SHAPE) - end = time.time() - assert (end - start) < 0.0333, f"Inference time {end - start} time exceeds 33ms" + self.assertTrue((output_data[0] == (input_data[0] + input_data[1])).all()) + self.assertTrue((output_data[1] == input_data[0] - input_data[1]).all()) + self.assertTrue((output_data[2] == input_data[0]).all()) + self.assertTrue((output_data[3] == input_data[1]).all()) + self.assertTrue((output_data[4] == input_data[2]).all()) + self.assertTrue((output_data[5] == input_data[3]).all()) def test_complex_named(self): MODEL_NAME = "torch_aoti_complex_named" @@ -157,8 +152,6 @@ def test_complex_named(self): input_data = self._get_complex_input_data(INPUT_SHAPE) - start = time.time() - with http.InferenceServerClient("localhost:8000") as client: inputs = [ http.InferInput("ARGS[0]", input_data[0].shape, "INT8"), @@ -191,19 +184,16 @@ def test_complex_named(self): for output_name in output_names: output_data.append(results.as_numpy(output_name)) - assert len(outputs) == len(output_data) + self.assertEqual(len(outputs), len(output_data)) for data in output_data: - assert data.shape == OUTPUT_SHAPE + self.assertEqual(data.shape, OUTPUT_SHAPE) - assert (output_data[0] == (input_data[0] + input_data[1])).all() - assert (output_data[1] == input_data[0]).all() - assert (output_data[2] == input_data[1]).all() - assert (output_data[3] == input_data[2]).all() - assert (output_data[4] == input_data[3]).all() - assert (output_data[5] == (input_data[0] - input_data[1])).all() - - end = time.time() - assert (end - start) < 0.0333, f"Inference time {end - start} time exceeds 33ms" + self.assertTrue((output_data[0] == (input_data[0] + input_data[1])).all()) + self.assertTrue((output_data[1] == input_data[0]).all()) + self.assertTrue((output_data[2] == input_data[1]).all()) + self.assertTrue((output_data[3] == input_data[2]).all()) + self.assertTrue((output_data[4] == input_data[3]).all()) + self.assertTrue((output_data[5] == (input_data[0] - input_data[1])).all()) def test_simple_model(self): io_types = [ @@ -225,8 +215,6 @@ def test_simple_model(self): self._get_simple_input_data(INPUT_SHAPE, io_type), ) - start = time.time() - with http.InferenceServerClient("localhost:8000") as client: inputs = [ http.InferInput("ARGS[0]", input_data[0].shape, TRITON_IO_TYPE), @@ -252,15 +240,10 @@ def test_simple_model(self): for output_name in output_names: output_data.append(results.as_numpy(output_name)) - assert len(outputs) == len(output_data) + self.assertEqual(len(outputs), len(output_data)) for data in output_data: - assert data.shape == OUTPUT_SHAPE - assert (data == input_data[0] + input_data[1]).all() - - end = time.time() - assert ( - end - start - ) < 0.0333, f"Inference time {end - start} time exceeds 33ms" + self.assertEqual(data.shape, OUTPUT_SHAPE) + self.assertTrue((data == input_data[0] + input_data[1]).all()) def test_torchvision(self): MODEL_NAME = "torchvision_aoti" @@ -291,9 +274,11 @@ def test_torchvision(self): for output_name in output_names: output_data.append(results.as_numpy(output_name)) - assert len(outputs) == len(output_data) + self.assertEqual(len(outputs), len(output_data)) for data in output_data: - assert data.shape == OUTPUT_SHAPE + self.assertEqual(data.shape, OUTPUT_SHAPE) + output_tensor = torch.from_numpy(data) + self.assertTrue(torch.isfinite(output_tensor).all().item()) if __name__ == "__main__": diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index cbfce101a6..7f2a92f160 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -1630,7 +1630,7 @@ def create_torchvision_aoti_modelfile( model = model.to(device) model = model.eval() - SHAPE = (max_batch, 3, 244, 244) + SHAPE = (max_batch, 3, 224, 224) # Example input tensor with batch size 1 and 3 color channels (RGB), height and width of 224 sample_inputs = (torch.zeros(SHAPE, dtype=torch.float32, device=device),) @@ -3044,6 +3044,7 @@ def create_fixed_models( create_torch_aoti_complex_modelconfig(FLAGS.models_dir) if FLAGS.torchvision_aoti: + # TODO: Add support for variable batch size and version policy for torchvision AOTI models. print(f"{_color_blue}TorchVision AOTI model generation requested{_color_reset}") if create_torchvision_aoti_modelfile(FLAGS.models_dir, 1): create_torchvision_aoti_modelconfig(FLAGS.models_dir, 1) From 4c94fa72db57206a2c387ebbfb75e46df2519db9 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Tue, 12 May 2026 18:09:28 -0400 Subject: [PATCH 6/7] Potential fix for pull request finding 'CodeQL / Unused import' Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- qa/L0_torch_aoti/torch_aoti_infer_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qa/L0_torch_aoti/torch_aoti_infer_test.py b/qa/L0_torch_aoti/torch_aoti_infer_test.py index cabc6a305e..2b93f31a48 100755 --- a/qa/L0_torch_aoti/torch_aoti_infer_test.py +++ b/qa/L0_torch_aoti/torch_aoti_infer_test.py @@ -26,7 +26,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import sys -import time sys.path.append("../common") From 891098d33f440539468841241d2e601a17dd33f3 Mon Sep 17 00:00:00 2001 From: J Wyman Date: Thu, 14 May 2026 13:54:34 -0400 Subject: [PATCH 7/7] adopt changes requested by @yingeeh. --- qa/common/gen_qa_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index 7f2a92f160..0340265677 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -1290,7 +1290,7 @@ def forward(self, INPUT0, INPUT1): traced.save(f"{model_version_dir}/model.pt") -def generate_sample_inputs( +def generate_torch_aoti_sample_inputs( input_shape, input_dtype, device, @@ -1441,7 +1441,7 @@ def forward(self, INPUT0: torch.Tensor, INPUT1: torch.Tensor) -> torch.Tensor: model.to(device) model = model.eval() - sample_inputs = generate_sample_inputs(input_shape, input_dtype, device) + sample_inputs = generate_torch_aoti_sample_inputs(input_shape, input_dtype, device) package_path = os.path.join(model_version_dir, "model.pt2") try: