From 8da633c521c0e27bd9db4a250b08b203a6842a59 Mon Sep 17 00:00:00 2001 From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com> Date: Fri, 15 May 2026 16:34:53 +0000 Subject: [PATCH 1/5] Addressing TensorRT API deprecation --- qa/common/gen_qa_dyna_sequence_implicit_models.py | 3 ++- qa/common/gen_qa_dyna_sequence_models.py | 6 ++++-- qa/common/gen_qa_identity_models.py | 6 ++++-- qa/common/gen_qa_implicit_models.py | 3 ++- qa/common/gen_qa_models.py | 6 ++++-- qa/common/gen_qa_sequence_models.py | 6 ++++-- qa/common/gen_qa_trt_format_models.py | 3 ++- 7 files changed, 22 insertions(+), 11 deletions(-) diff --git a/qa/common/gen_qa_dyna_sequence_implicit_models.py b/qa/common/gen_qa_dyna_sequence_implicit_models.py index 32e4ebea13..c69ca28eab 100755 --- a/qa/common/gen_qa_dyna_sequence_implicit_models.py +++ b/qa/common/gen_qa_dyna_sequence_implicit_models.py @@ -575,7 +575,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) diff --git a/qa/common/gen_qa_dyna_sequence_models.py b/qa/common/gen_qa_dyna_sequence_models.py index 7f8459b0da..1a26890f32 100755 --- a/qa/common/gen_qa_dyna_sequence_models.py +++ b/qa/common/gen_qa_dyna_sequence_models.py @@ -129,7 +129,8 @@ def create_plan_shape_tensor_modelfile( flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) @@ -369,7 +370,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py index 248126bcc2..426d939d9e 100755 --- a/qa/common/gen_qa_identity_models.py +++ b/qa/common/gen_qa_identity_models.py @@ -584,7 +584,8 @@ def create_plan_dynamic_rf_modelfile( flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_dtype]) for dt in datatype_set: if dt == trt.int8: @@ -707,7 +708,8 @@ def create_plan_shape_tensor_modelfile( flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_dtype]) for dt in datatype_set: if dt == trt.int8: diff --git a/qa/common/gen_qa_implicit_models.py b/qa/common/gen_qa_implicit_models.py index 06b86ee5c2..c0800098ec 100755 --- a/qa/common/gen_qa_implicit_models.py +++ b/qa/common/gen_qa_implicit_models.py @@ -1066,7 +1066,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index d509562bff..12d2a7225f 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -159,7 +159,8 @@ def create_plan_dynamic_rf_modelfile( profile.set_shape("INPUT1", min_shape, opt_shape, max_shape) flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype]) for dt in datatype_set: @@ -449,7 +450,8 @@ def create_plan_fixed_rf_modelfile( profile.set_shape("INPUT1", min_shape, opt_shape, max_shape) flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype]) for dt in datatype_set: diff --git a/qa/common/gen_qa_sequence_models.py b/qa/common/gen_qa_sequence_models.py index bf83a3a5f3..f8d89a5f9e 100755 --- a/qa/common/gen_qa_sequence_models.py +++ b/qa/common/gen_qa_sequence_models.py @@ -118,7 +118,8 @@ def create_plan_shape_tensor_modelfile( flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) @@ -320,7 +321,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) diff --git a/qa/common/gen_qa_trt_format_models.py b/qa/common/gen_qa_trt_format_models.py index 5645a7178c..5f2cadd69e 100755 --- a/qa/common/gen_qa_trt_format_models.py +++ b/qa/common/gen_qa_trt_format_models.py @@ -147,7 +147,8 @@ def create_plan_modelfile( # The build will fail if TensorRT cannot build an engine without introducing such reformatting. The failure may happen only for some target platforms, because of what formats are supported by kernels for those platforms. # flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype]) for dt in datatype_set: if dt == trt.int8: From 9d1f9d09584432fabf60fe43b6b727554453df86 Mon Sep 17 00:00:00 2001 From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com> Date: Fri, 15 May 2026 17:26:33 +0000 Subject: [PATCH 2/5] Source CustomHardmax plugin from internal TensorRT mirror The upstream onnx_custom_plugin sample on github.com/NVIDIA/TensorRT ships only the legacy IPluginV2DynamicExt implementation, which TRT 11 no longer loads -- the V2 plugin registry surface was removed. The internal NVIDIA mirror has a V3 port cherry-picked to rel-11.0 while rel-10.x branches keep V2. - gen_qa_model_repository now clones the TensorRT samples from gitlab-master.nvidia.com/TensorRT/TensorRT at rel-${TRT_BRANCH} matching the runtime TRT version. If the branch is not available the plugin model generation is skipped with a warning so the rest of the QA model repository is still produced. CI_JOB_TOKEN is passed through the docker run that executes the TRT script. - gen_qa_trt_plugin_models.py picks the V2 or V3 plugin API at runtime via hasattr (plugin_creator_list / add_plugin_v2 on TRT 10.x with V2 source, all_creators / add_plugin_v3 with phase=BUILD on TRT 11 with V3 source). --- qa/common/gen_qa_model_repository | 57 +++++++++++---------------- qa/common/gen_qa_trt_plugin_models.py | 37 +++++++++++++---- 2 files changed, 52 insertions(+), 42 deletions(-) diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 328f42bbe0..278f1bd5e5 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -352,43 +352,29 @@ python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_format_models.py --models_dir=$TRITON_MD chmod -R 777 $TRITON_MDLS_QA_TRT_FORMAT_MODEL nvidia-smi --query-gpu=compute_cap | grep -qzE '10\.7|11\.0' && echo -e '${COLOR_WARNING}[WARNING]${COLOR_RESET} Skipping model generation for data dependent shape (NonZero not supported on this GPU)${COLOR_RESET}' || python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL -# Make shared library for custom Hardmax plugin. -if [ -d "/usr/src/tensorrt/samples/python/onnx_custom_plugin" ]; then - cd /usr/src/tensorrt/samples/python/onnx_custom_plugin +# Build the custom Hardmax plugin used by L0_trt_plugin. The plugin source is +# pulled from the internal NVIDIA TensorRT mirror at the rel- +# branch matching the runtime TRT version (rel-10.x ships the legacy V2 +# implementation, rel-11.0 ships the IPluginV3 port). If the branch does not +# exist for this TRT version, the plugin model generation is skipped with a +# warning -- L0_trt_plugin will report missing artifacts but the rest of the +# QA model repository is still produced. +TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2) +TRTSRC=/workspace/TensorRT +rm -rf \$TRTSRC +if git clone --depth 1 -b rel-\${TRT_BRANCH} \ + https://gitlab-ci-token:\${CI_JOB_TOKEN}@gitlab-master.nvidia.com/TensorRT/TensorRT.git \ + \$TRTSRC; then + cd \$TRTSRC/samples/python/onnx_custom_plugin && \ + rm -rf build && mkdir build && cd build && cmake .. && make -j && \ + cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/. + LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so \ + python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py \ + --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL + chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL else - TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2) - if ! git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT; then - MAJOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 1) - MINOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 2) - if [ -n "\$MINOR" ] && [ "\$MINOR" -gt 0 ] 2>/dev/null; then - TRT_BRANCH="\${MAJOR}.\$((MINOR - 1))" - echo "Fallback: cloning TensorRT release/\${TRT_BRANCH} (previous minor)" - git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT - elif [ -n "\$MAJOR" ] && [ "\$MAJOR" -gt 0 ] 2>/dev/null; then - PREV_MAJOR=\$((MAJOR - 1)) - echo "Fallback: MINOR is 0, querying remote for latest release/\${PREV_MAJOR}.x branch" - TRT_BRANCH=\$(git ls-remote --heads https://github.com/NVIDIA/TensorRT.git "refs/heads/release/\${PREV_MAJOR}.*" \ - | awk -F'refs/heads/release/' '{print \$2}' \ - | awk -F. '{print \$2, \$0}' \ - | sort -k1,1n \ - | tail -1 \ - | awk '{print \$2}') - if [ -n "\$TRT_BRANCH" ]; then - echo "Fallback: cloning TensorRT release/\${TRT_BRANCH}" - git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT - else - exit 1 - fi - else - exit 1 - fi - fi - cd /workspace/TensorRT/samples/python/onnx_custom_plugin + echo "[WARNING] TensorRT rel-\${TRT_BRANCH} not available on internal mirror; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version." fi -rm -rf build && mkdir build && \ -cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/. -LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL -chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL exit 0 EOF @@ -569,6 +555,7 @@ if [ "$TRITON_MODELS_USE_DOCKER" -eq 1 ] && which docker ; then -v $DOCKER_VOLUME:/mnt \ -t \ -e TRT_VERBOSE \ + -e CI_JOB_TOKEN \ $TENSORRT_IMAGE \ bash -e $TRITON_MDLS_SRC_DIR/$TRTSCRIPT diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py index 83ed9e82a7..c492fc76dd 100755 --- a/qa/common/gen_qa_trt_plugin_models.py +++ b/qa/common/gen_qa_trt_plugin_models.py @@ -44,7 +44,14 @@ def get_trt_plugin(plugin_name): plugin = None field_collection = None - plugin_creators = trt.get_plugin_registry().plugin_creator_list + # The upstream onnx_custom_plugin sample is V2 on TRT 10.x release + # branches and V3 on rel-11.0 (and TRT 11 removed the V2 plugin + # registry surface). Pick the matching API at runtime. + registry = trt.get_plugin_registry() + use_v3 = not hasattr(registry, "plugin_creator_list") + plugin_creators = ( + registry.all_creators if use_v3 else registry.plugin_creator_list + ) for plugin_creator in plugin_creators: if (plugin_creator.name == "CustomHardmax") and ( plugin_name == "CustomHardmax" @@ -57,9 +64,16 @@ def get_trt_plugin(plugin_name): if field_collection is None: raise RuntimeError("Plugin not found: " + plugin_name) - plugin = plugin_creator.create_plugin( - name=plugin_name, field_collection=field_collection - ) + if use_v3: + plugin = plugin_creator.create_plugin( + name=plugin_name, + field_collection=field_collection, + phase=trt.TensorRTPhase.BUILD, + ) + else: + plugin = plugin_creator.create_plugin( + name=plugin_name, field_collection=field_collection + ) return plugin @@ -104,9 +118,18 @@ def create_plan_modelfile( input_layer = network.add_input( name="INPUT0", dtype=trt_input_dtype, shape=input_with_batchsize ) - plugin_layer = network.add_plugin_v2( - inputs=[input_layer], plugin=get_trt_plugin(plugin_name) - ) + # add_plugin_v2 was removed in TRT 11; add_plugin_v3 has existed since + # TRT 10.0. Pick the API that exists on this TRT install; the plugin + # object returned by get_trt_plugin() is matched to the same version. + plugin_obj = get_trt_plugin(plugin_name) + if hasattr(network, "add_plugin_v2"): + plugin_layer = network.add_plugin_v2( + inputs=[input_layer], plugin=plugin_obj + ) + else: + plugin_layer = network.add_plugin_v3( + inputs=[input_layer], shape_inputs=[], plugin=plugin_obj + ) plugin_layer.get_output(0).name = "OUTPUT0" network.mark_output(plugin_layer.get_output(0)) From ed9eaf3c78bddfa7b5bf76e117b14b668c862b89 Mon Sep 17 00:00:00 2001 From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com> Date: Fri, 15 May 2026 17:57:34 +0000 Subject: [PATCH 3/5] Use public TensorRT mirror for CustomHardmax, skip if branch missing Reverts the previous switch to the internal NVIDIA GitLab mirror. The public github.com/NVIDIA/TensorRT remains the source of truth; when a release/ branch is not yet published for the runtime TRT version (e.g. release/11.0 ahead of the public sync), the plugin model generation skips with a warning instead of trying the fallback ladder. L0_trt_plugin loses coverage for that TRT version until the branch becomes available, but the rest of the QA model repository still produces. Also drops the CI_JOB_TOKEN passthrough on the TRT docker run since the public clone needs no authentication. --- qa/common/gen_qa_model_repository | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 278f1bd5e5..6db9542e75 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -353,18 +353,16 @@ chmod -R 777 $TRITON_MDLS_QA_TRT_FORMAT_MODEL nvidia-smi --query-gpu=compute_cap | grep -qzE '10\.7|11\.0' && echo -e '${COLOR_WARNING}[WARNING]${COLOR_RESET} Skipping model generation for data dependent shape (NonZero not supported on this GPU)${COLOR_RESET}' || python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL # Build the custom Hardmax plugin used by L0_trt_plugin. The plugin source is -# pulled from the internal NVIDIA TensorRT mirror at the rel- -# branch matching the runtime TRT version (rel-10.x ships the legacy V2 -# implementation, rel-11.0 ships the IPluginV3 port). If the branch does not -# exist for this TRT version, the plugin model generation is skipped with a -# warning -- L0_trt_plugin will report missing artifacts but the rest of the -# QA model repository is still produced. +# pulled from the public NVIDIA/TensorRT repo at the release/ +# branch matching the runtime TRT version. If the branch is not published +# yet for this TRT version, the plugin model generation is skipped with a +# warning -- L0_trt_plugin will report missing artifacts but the rest of +# the QA model repository is still produced. TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2) TRTSRC=/workspace/TensorRT rm -rf \$TRTSRC -if git clone --depth 1 -b rel-\${TRT_BRANCH} \ - https://gitlab-ci-token:\${CI_JOB_TOKEN}@gitlab-master.nvidia.com/TensorRT/TensorRT.git \ - \$TRTSRC; then +if git clone --depth 1 -b release/\${TRT_BRANCH} \ + https://github.com/NVIDIA/TensorRT.git \$TRTSRC; then cd \$TRTSRC/samples/python/onnx_custom_plugin && \ rm -rf build && mkdir build && cd build && cmake .. && make -j && \ cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/. @@ -373,7 +371,7 @@ if git clone --depth 1 -b rel-\${TRT_BRANCH} \ --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL else - echo "[WARNING] TensorRT rel-\${TRT_BRANCH} not available on internal mirror; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version." + echo "[WARNING] TensorRT release/\${TRT_BRANCH} not available on github.com/NVIDIA/TensorRT; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version." fi exit 0 EOF @@ -555,7 +553,6 @@ if [ "$TRITON_MODELS_USE_DOCKER" -eq 1 ] && which docker ; then -v $DOCKER_VOLUME:/mnt \ -t \ -e TRT_VERBOSE \ - -e CI_JOB_TOKEN \ $TENSORRT_IMAGE \ bash -e $TRITON_MDLS_SRC_DIR/$TRTSCRIPT From c44f888792304eb5d73fafde3eec47f051fbc5f1 Mon Sep 17 00:00:00 2001 From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com> Date: Fri, 15 May 2026 17:59:42 +0000 Subject: [PATCH 4/5] Wrap bash variables in CustomHardmax plugin block as ${VAR} --- qa/common/gen_qa_model_repository | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 6db9542e75..d490f8c530 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -358,18 +358,18 @@ chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL # yet for this TRT version, the plugin model generation is skipped with a # warning -- L0_trt_plugin will report missing artifacts but the rest of # the QA model repository is still produced. -TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2) +TRT_BRANCH=\$(echo \${TRT_VERSION} | cut -d . -f -2) TRTSRC=/workspace/TensorRT -rm -rf \$TRTSRC +rm -rf \${TRTSRC} if git clone --depth 1 -b release/\${TRT_BRANCH} \ - https://github.com/NVIDIA/TensorRT.git \$TRTSRC; then - cd \$TRTSRC/samples/python/onnx_custom_plugin && \ + https://github.com/NVIDIA/TensorRT.git \${TRTSRC}; then + cd \${TRTSRC}/samples/python/onnx_custom_plugin && \ rm -rf build && mkdir build && cd build && cmake .. && make -j && \ - cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/. - LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so \ - python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py \ - --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL - chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL + cp libcustomHardmaxPlugin.so ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/. + LD_PRELOAD=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/libcustomHardmaxPlugin.so \ + python3 ${TRITON_MDLS_SRC_DIR}/gen_qa_trt_plugin_models.py \ + --models_dir=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL} + chmod -R 777 ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL} else echo "[WARNING] TensorRT release/\${TRT_BRANCH} not available on github.com/NVIDIA/TensorRT; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version." fi From 0d1b75240df9b4ea167b03f6ee5de0915fb02254 Mon Sep 17 00:00:00 2001 From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com> Date: Fri, 15 May 2026 19:19:02 +0000 Subject: [PATCH 5/5] Address pre-commit issue --- qa/common/gen_qa_trt_plugin_models.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py index c492fc76dd..9fd23d92a8 100755 --- a/qa/common/gen_qa_trt_plugin_models.py +++ b/qa/common/gen_qa_trt_plugin_models.py @@ -49,9 +49,7 @@ def get_trt_plugin(plugin_name): # registry surface). Pick the matching API at runtime. registry = trt.get_plugin_registry() use_v3 = not hasattr(registry, "plugin_creator_list") - plugin_creators = ( - registry.all_creators if use_v3 else registry.plugin_creator_list - ) + plugin_creators = registry.all_creators if use_v3 else registry.plugin_creator_list for plugin_creator in plugin_creators: if (plugin_creator.name == "CustomHardmax") and ( plugin_name == "CustomHardmax" @@ -123,9 +121,7 @@ def create_plan_modelfile( # object returned by get_trt_plugin() is matched to the same version. plugin_obj = get_trt_plugin(plugin_name) if hasattr(network, "add_plugin_v2"): - plugin_layer = network.add_plugin_v2( - inputs=[input_layer], plugin=plugin_obj - ) + plugin_layer = network.add_plugin_v2(inputs=[input_layer], plugin=plugin_obj) else: plugin_layer = network.add_plugin_v3( inputs=[input_layer], shape_inputs=[], plugin=plugin_obj