From 8da633c521c0e27bd9db4a250b08b203a6842a59 Mon Sep 17 00:00:00 2001
From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com>
Date: Fri, 15 May 2026 16:34:53 +0000
Subject: [PATCH 1/5] Addressing TensorRT API deprecation

---
 qa/common/gen_qa_dyna_sequence_implicit_models.py | 3 ++-
 qa/common/gen_qa_dyna_sequence_models.py          | 6 ++++--
 qa/common/gen_qa_identity_models.py               | 6 ++++--
 qa/common/gen_qa_implicit_models.py               | 3 ++-
 qa/common/gen_qa_models.py                        | 6 ++++--
 qa/common/gen_qa_sequence_models.py               | 6 ++++--
 qa/common/gen_qa_trt_format_models.py             | 3 ++-
 7 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/qa/common/gen_qa_dyna_sequence_implicit_models.py b/qa/common/gen_qa_dyna_sequence_implicit_models.py
index 32e4ebea13..c69ca28eab 100755
--- a/qa/common/gen_qa_dyna_sequence_implicit_models.py
+++ b/qa/common/gen_qa_dyna_sequence_implicit_models.py
@@ -575,7 +575,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)
diff --git a/qa/common/gen_qa_dyna_sequence_models.py b/qa/common/gen_qa_dyna_sequence_models.py
index 7f8459b0da..1a26890f32 100755
--- a/qa/common/gen_qa_dyna_sequence_models.py
+++ b/qa/common/gen_qa_dyna_sequence_models.py
@@ -129,7 +129,8 @@ def create_plan_shape_tensor_modelfile(
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)
@@ -369,7 +370,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)
diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py
index 248126bcc2..426d939d9e 100755
--- a/qa/common/gen_qa_identity_models.py
+++ b/qa/common/gen_qa_identity_models.py
@@ -584,7 +584,8 @@ def create_plan_dynamic_rf_modelfile(
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
     datatype_set = set([trt_dtype])
     for dt in datatype_set:
         if dt == trt.int8:
@@ -707,7 +708,8 @@ def create_plan_shape_tensor_modelfile(
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
     datatype_set = set([trt_dtype])
     for dt in datatype_set:
         if dt == trt.int8:
diff --git a/qa/common/gen_qa_implicit_models.py b/qa/common/gen_qa_implicit_models.py
index 06b86ee5c2..c0800098ec 100755
--- a/qa/common/gen_qa_implicit_models.py
+++ b/qa/common/gen_qa_implicit_models.py
@@ -1066,7 +1066,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)
diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
index d509562bff..12d2a7225f 100755
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -159,7 +159,8 @@ def create_plan_dynamic_rf_modelfile(
     profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)
 
     flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
     for dt in datatype_set:
@@ -449,7 +450,8 @@ def create_plan_fixed_rf_modelfile(
     profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)
 
     flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
     for dt in datatype_set:
diff --git a/qa/common/gen_qa_sequence_models.py b/qa/common/gen_qa_sequence_models.py
index bf83a3a5f3..f8d89a5f9e 100755
--- a/qa/common/gen_qa_sequence_models.py
+++ b/qa/common/gen_qa_sequence_models.py
@@ -118,7 +118,8 @@ def create_plan_shape_tensor_modelfile(
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)
@@ -320,7 +321,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)
diff --git a/qa/common/gen_qa_trt_format_models.py b/qa/common/gen_qa_trt_format_models.py
index 5645a7178c..5f2cadd69e 100755
--- a/qa/common/gen_qa_trt_format_models.py
+++ b/qa/common/gen_qa_trt_format_models.py
@@ -147,7 +147,8 @@ def create_plan_modelfile(
     # The build will fail if TensorRT cannot build an engine without introducing such reformatting. The failure may happen only for some target platforms, because of what formats are supported by kernels for those platforms.
     # flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
     datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
     for dt in datatype_set:
         if dt == trt.int8:

From 9d1f9d09584432fabf60fe43b6b727554453df86 Mon Sep 17 00:00:00 2001
From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com>
Date: Fri, 15 May 2026 17:26:33 +0000
Subject: [PATCH 2/5] Source CustomHardmax plugin from internal TensorRT mirror

The upstream onnx_custom_plugin sample on github.com/NVIDIA/TensorRT
ships only the legacy IPluginV2DynamicExt implementation, which TRT 11
no longer loads -- the V2 plugin registry surface was removed. The
internal NVIDIA mirror has a V3 port cherry-picked to rel-11.0 while
rel-10.x branches keep V2.

- gen_qa_model_repository now clones the TensorRT samples from
  gitlab-master.nvidia.com/TensorRT/TensorRT at rel-${TRT_BRANCH}
  matching the runtime TRT version. If the branch is not available
  the plugin model generation is skipped with a warning so the rest
  of the QA model repository is still produced. CI_JOB_TOKEN is
  passed through the docker run that executes the TRT script.

- gen_qa_trt_plugin_models.py picks the V2 or V3 plugin API at
  runtime via hasattr (plugin_creator_list / add_plugin_v2 on TRT
  10.x with V2 source, all_creators / add_plugin_v3 with phase=BUILD
  on TRT 11 with V3 source).
---
 qa/common/gen_qa_model_repository     | 57 +++++++++++----------------
 qa/common/gen_qa_trt_plugin_models.py | 37 +++++++++++++----
 2 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index 328f42bbe0..278f1bd5e5 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -352,43 +352,29 @@ python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_format_models.py --models_dir=$TRITON_MD
 chmod -R 777 $TRITON_MDLS_QA_TRT_FORMAT_MODEL
 nvidia-smi --query-gpu=compute_cap | grep -qzE '10\.7|11\.0' && echo -e '${COLOR_WARNING}[WARNING]${COLOR_RESET} Skipping model generation for data dependent shape (NonZero not supported on this GPU)${COLOR_RESET}' || python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
 chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
-# Make shared library for custom Hardmax plugin.
-if [ -d "/usr/src/tensorrt/samples/python/onnx_custom_plugin" ]; then
-    cd /usr/src/tensorrt/samples/python/onnx_custom_plugin
+# Build the custom Hardmax plugin used by L0_trt_plugin. The plugin source is
+# pulled from the internal NVIDIA TensorRT mirror at the rel-<major.minor>
+# branch matching the runtime TRT version (rel-10.x ships the legacy V2
+# implementation, rel-11.0 ships the IPluginV3 port). If the branch does not
+# exist for this TRT version, the plugin model generation is skipped with a
+# warning -- L0_trt_plugin will report missing artifacts but the rest of the
+# QA model repository is still produced.
+TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2)
+TRTSRC=/workspace/TensorRT
+rm -rf \$TRTSRC
+if git clone --depth 1 -b rel-\${TRT_BRANCH} \
+     https://gitlab-ci-token:\${CI_JOB_TOKEN}@gitlab-master.nvidia.com/TensorRT/TensorRT.git \
+     \$TRTSRC; then
+  cd \$TRTSRC/samples/python/onnx_custom_plugin && \
+    rm -rf build && mkdir build && cd build && cmake .. && make -j && \
+    cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/.
+  LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so \
+    python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py \
+    --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL
+  chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL
 else
-    TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2)
-    if ! git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT; then
-      MAJOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 1)
-      MINOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 2)
-      if [ -n "\$MINOR" ] && [ "\$MINOR" -gt 0 ] 2>/dev/null; then
-        TRT_BRANCH="\${MAJOR}.\$((MINOR - 1))"
-        echo "Fallback: cloning TensorRT release/\${TRT_BRANCH} (previous minor)"
-        git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT
-      elif [ -n "\$MAJOR" ] && [ "\$MAJOR" -gt 0 ] 2>/dev/null; then
-        PREV_MAJOR=\$((MAJOR - 1))
-        echo "Fallback: MINOR is 0, querying remote for latest release/\${PREV_MAJOR}.x branch"
-        TRT_BRANCH=\$(git ls-remote --heads https://github.com/NVIDIA/TensorRT.git "refs/heads/release/\${PREV_MAJOR}.*" \
-          | awk -F'refs/heads/release/' '{print \$2}' \
-          | awk -F. '{print \$2, \$0}' \
-          | sort -k1,1n \
-          | tail -1 \
-          | awk '{print \$2}')
-        if [ -n "\$TRT_BRANCH" ]; then
-          echo "Fallback: cloning TensorRT release/\${TRT_BRANCH}"
-          git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT
-        else
-          exit 1
-        fi
-      else
-        exit 1
-      fi
-    fi
-    cd /workspace/TensorRT/samples/python/onnx_custom_plugin
+  echo "[WARNING] TensorRT rel-\${TRT_BRANCH} not available on internal mirror; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version."
 fi
-rm -rf build && mkdir build && \
-cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/.
-LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL
-chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL
 exit 0
 EOF
 
@@ -569,6 +555,7 @@ if [ "$TRITON_MODELS_USE_DOCKER" -eq 1 ] && which docker ; then
             -v $DOCKER_VOLUME:/mnt \
             -t \
             -e TRT_VERBOSE \
+            -e CI_JOB_TOKEN \
             $TENSORRT_IMAGE \
             bash -e $TRITON_MDLS_SRC_DIR/$TRTSCRIPT
 
diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py
index 83ed9e82a7..c492fc76dd 100755
--- a/qa/common/gen_qa_trt_plugin_models.py
+++ b/qa/common/gen_qa_trt_plugin_models.py
@@ -44,7 +44,14 @@
 def get_trt_plugin(plugin_name):
     plugin = None
     field_collection = None
-    plugin_creators = trt.get_plugin_registry().plugin_creator_list
+    # The upstream onnx_custom_plugin sample is V2 on TRT 10.x release
+    # branches and V3 on rel-11.0 (and TRT 11 removed the V2 plugin
+    # registry surface). Pick the matching API at runtime.
+    registry = trt.get_plugin_registry()
+    use_v3 = not hasattr(registry, "plugin_creator_list")
+    plugin_creators = (
+        registry.all_creators if use_v3 else registry.plugin_creator_list
+    )
     for plugin_creator in plugin_creators:
         if (plugin_creator.name == "CustomHardmax") and (
             plugin_name == "CustomHardmax"
@@ -57,9 +64,16 @@ def get_trt_plugin(plugin_name):
 
     if field_collection is None:
         raise RuntimeError("Plugin not found: " + plugin_name)
-    plugin = plugin_creator.create_plugin(
-        name=plugin_name, field_collection=field_collection
-    )
+    if use_v3:
+        plugin = plugin_creator.create_plugin(
+            name=plugin_name,
+            field_collection=field_collection,
+            phase=trt.TensorRTPhase.BUILD,
+        )
+    else:
+        plugin = plugin_creator.create_plugin(
+            name=plugin_name, field_collection=field_collection
+        )
 
     return plugin
 
@@ -104,9 +118,18 @@ def create_plan_modelfile(
     input_layer = network.add_input(
         name="INPUT0", dtype=trt_input_dtype, shape=input_with_batchsize
     )
-    plugin_layer = network.add_plugin_v2(
-        inputs=[input_layer], plugin=get_trt_plugin(plugin_name)
-    )
+    # add_plugin_v2 was removed in TRT 11; add_plugin_v3 has existed since
+    # TRT 10.0. Pick the API that exists on this TRT install; the plugin
+    # object returned by get_trt_plugin() is matched to the same version.
+    plugin_obj = get_trt_plugin(plugin_name)
+    if hasattr(network, "add_plugin_v2"):
+        plugin_layer = network.add_plugin_v2(
+            inputs=[input_layer], plugin=plugin_obj
+        )
+    else:
+        plugin_layer = network.add_plugin_v3(
+            inputs=[input_layer], shape_inputs=[], plugin=plugin_obj
+        )
     plugin_layer.get_output(0).name = "OUTPUT0"
     network.mark_output(plugin_layer.get_output(0))
 

From ed9eaf3c78bddfa7b5bf76e117b14b668c862b89 Mon Sep 17 00:00:00 2001
From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com>
Date: Fri, 15 May 2026 17:57:34 +0000
Subject: [PATCH 3/5] Use public TensorRT mirror for CustomHardmax, skip if
 branch missing

Reverts the previous switch to the internal NVIDIA GitLab mirror.
The public github.com/NVIDIA/TensorRT remains the source of truth;
when a release/<major.minor> branch is not yet published for the
runtime TRT version (e.g. release/11.0 ahead of the public sync),
the plugin model generation skips with a warning instead of trying
the fallback ladder. L0_trt_plugin loses coverage for that TRT
version until the branch becomes available, but the rest of the
QA model repository still produces.

Also drops the CI_JOB_TOKEN passthrough on the TRT docker run since
the public clone needs no authentication.
---
 qa/common/gen_qa_model_repository | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index 278f1bd5e5..6db9542e75 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -353,18 +353,16 @@ chmod -R 777 $TRITON_MDLS_QA_TRT_FORMAT_MODEL
 nvidia-smi --query-gpu=compute_cap | grep -qzE '10\.7|11\.0' && echo -e '${COLOR_WARNING}[WARNING]${COLOR_RESET} Skipping model generation for data dependent shape (NonZero not supported on this GPU)${COLOR_RESET}' || python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
 chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
 # Build the custom Hardmax plugin used by L0_trt_plugin. The plugin source is
-# pulled from the internal NVIDIA TensorRT mirror at the rel-<major.minor>
-# branch matching the runtime TRT version (rel-10.x ships the legacy V2
-# implementation, rel-11.0 ships the IPluginV3 port). If the branch does not
-# exist for this TRT version, the plugin model generation is skipped with a
-# warning -- L0_trt_plugin will report missing artifacts but the rest of the
-# QA model repository is still produced.
+# pulled from the public NVIDIA/TensorRT repo at the release/<major.minor>
+# branch matching the runtime TRT version. If the branch is not published
+# yet for this TRT version, the plugin model generation is skipped with a
+# warning -- L0_trt_plugin will report missing artifacts but the rest of
+# the QA model repository is still produced.
 TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2)
 TRTSRC=/workspace/TensorRT
 rm -rf \$TRTSRC
-if git clone --depth 1 -b rel-\${TRT_BRANCH} \
-     https://gitlab-ci-token:\${CI_JOB_TOKEN}@gitlab-master.nvidia.com/TensorRT/TensorRT.git \
-     \$TRTSRC; then
+if git clone --depth 1 -b release/\${TRT_BRANCH} \
+     https://github.com/NVIDIA/TensorRT.git \$TRTSRC; then
   cd \$TRTSRC/samples/python/onnx_custom_plugin && \
     rm -rf build && mkdir build && cd build && cmake .. && make -j && \
     cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/.
@@ -373,7 +371,7 @@ if git clone --depth 1 -b rel-\${TRT_BRANCH} \
     --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL
   chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL
 else
-  echo "[WARNING] TensorRT rel-\${TRT_BRANCH} not available on internal mirror; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version."
+  echo "[WARNING] TensorRT release/\${TRT_BRANCH} not available on github.com/NVIDIA/TensorRT; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version."
 fi
 exit 0
 EOF
@@ -555,7 +553,6 @@ if [ "$TRITON_MODELS_USE_DOCKER" -eq 1 ] && which docker ; then
             -v $DOCKER_VOLUME:/mnt \
             -t \
             -e TRT_VERBOSE \
-            -e CI_JOB_TOKEN \
             $TENSORRT_IMAGE \
             bash -e $TRITON_MDLS_SRC_DIR/$TRTSCRIPT
 

From c44f888792304eb5d73fafde3eec47f051fbc5f1 Mon Sep 17 00:00:00 2001
From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com>
Date: Fri, 15 May 2026 17:59:42 +0000
Subject: [PATCH 4/5] Wrap bash variables in CustomHardmax plugin block as
 ${VAR}

---
 qa/common/gen_qa_model_repository | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index 6db9542e75..d490f8c530 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -358,18 +358,18 @@ chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
 # yet for this TRT version, the plugin model generation is skipped with a
 # warning -- L0_trt_plugin will report missing artifacts but the rest of
 # the QA model repository is still produced.
-TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2)
+TRT_BRANCH=\$(echo \${TRT_VERSION} | cut -d . -f -2)
 TRTSRC=/workspace/TensorRT
-rm -rf \$TRTSRC
+rm -rf \${TRTSRC}
 if git clone --depth 1 -b release/\${TRT_BRANCH} \
-     https://github.com/NVIDIA/TensorRT.git \$TRTSRC; then
-  cd \$TRTSRC/samples/python/onnx_custom_plugin && \
+     https://github.com/NVIDIA/TensorRT.git \${TRTSRC}; then
+  cd \${TRTSRC}/samples/python/onnx_custom_plugin && \
     rm -rf build && mkdir build && cd build && cmake .. && make -j && \
-    cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/.
-  LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so \
-    python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py \
-    --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL
-  chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL
+    cp libcustomHardmaxPlugin.so ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/.
+  LD_PRELOAD=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/libcustomHardmaxPlugin.so \
+    python3 ${TRITON_MDLS_SRC_DIR}/gen_qa_trt_plugin_models.py \
+    --models_dir=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}
+  chmod -R 777 ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}
 else
   echo "[WARNING] TensorRT release/\${TRT_BRANCH} not available on github.com/NVIDIA/TensorRT; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version."
 fi

From 0d1b75240df9b4ea167b03f6ee5de0915fb02254 Mon Sep 17 00:00:00 2001
From: "M. Chornyi" <99709299+mc-nv@users.noreply.github.com>
Date: Fri, 15 May 2026 19:19:02 +0000
Subject: [PATCH 5/5] Address pre-commit issue

---
 qa/common/gen_qa_trt_plugin_models.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py
index c492fc76dd..9fd23d92a8 100755
--- a/qa/common/gen_qa_trt_plugin_models.py
+++ b/qa/common/gen_qa_trt_plugin_models.py
@@ -49,9 +49,7 @@ def get_trt_plugin(plugin_name):
     # registry surface). Pick the matching API at runtime.
     registry = trt.get_plugin_registry()
     use_v3 = not hasattr(registry, "plugin_creator_list")
-    plugin_creators = (
-        registry.all_creators if use_v3 else registry.plugin_creator_list
-    )
+    plugin_creators = registry.all_creators if use_v3 else registry.plugin_creator_list
     for plugin_creator in plugin_creators:
         if (plugin_creator.name == "CustomHardmax") and (
             plugin_name == "CustomHardmax"
@@ -123,9 +121,7 @@ def create_plan_modelfile(
     # object returned by get_trt_plugin() is matched to the same version.
     plugin_obj = get_trt_plugin(plugin_name)
     if hasattr(network, "add_plugin_v2"):
-        plugin_layer = network.add_plugin_v2(
-            inputs=[input_layer], plugin=plugin_obj
-        )
+        plugin_layer = network.add_plugin_v2(inputs=[input_layer], plugin=plugin_obj)
     else:
         plugin_layer = network.add_plugin_v3(
             inputs=[input_layer], shape_inputs=[], plugin=plugin_obj