triton-inference-server · mc-nv · May 15, 2026 · May 15, 2026 · May 15, 2026 · May 15, 2026
diff --git a/qa/common/gen_qa_dyna_sequence_implicit_models.py b/qa/common/gen_qa_dyna_sequence_implicit_models.py
@@ -575,7 +575,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)

diff --git a/qa/common/gen_qa_dyna_sequence_models.py b/qa/common/gen_qa_dyna_sequence_models.py
@@ -129,7 +129,8 @@ def create_plan_shape_tensor_modelfile(
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)
@@ -369,7 +370,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)

diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py
@@ -584,7 +584,8 @@ def create_plan_dynamic_rf_modelfile(
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
     datatype_set = set([trt_dtype])
     for dt in datatype_set:
         if dt == trt.int8:
@@ -707,7 +708,8 @@ def create_plan_shape_tensor_modelfile(
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
     datatype_set = set([trt_dtype])
     for dt in datatype_set:
         if dt == trt.int8:

diff --git a/qa/common/gen_qa_implicit_models.py b/qa/common/gen_qa_implicit_models.py
@@ -1066,7 +1066,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)

diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
@@ -352,43 +352,27 @@ python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_format_models.py --models_dir=$TRITON_MD
 chmod -R 777 $TRITON_MDLS_QA_TRT_FORMAT_MODEL
 nvidia-smi --query-gpu=compute_cap | grep -qzE '10\.7|11\.0' && echo -e '${COLOR_WARNING}[WARNING]${COLOR_RESET} Skipping model generation for data dependent shape (NonZero not supported on this GPU)${COLOR_RESET}' || python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
 chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
-# Make shared library for custom Hardmax plugin.
-if [ -d "/usr/src/tensorrt/samples/python/onnx_custom_plugin" ]; then
-    cd /usr/src/tensorrt/samples/python/onnx_custom_plugin
+# Build the custom Hardmax plugin used by L0_trt_plugin. The plugin source is
+# pulled from the public NVIDIA/TensorRT repo at the release/<major.minor>
+# branch matching the runtime TRT version. If the branch is not published
+# yet for this TRT version, the plugin model generation is skipped with a
+# warning -- L0_trt_plugin will report missing artifacts but the rest of
+# the QA model repository is still produced.
+TRT_BRANCH=\$(echo \${TRT_VERSION} | cut -d . -f -2)
+TRTSRC=/workspace/TensorRT
+rm -rf \${TRTSRC}
+if git clone --depth 1 -b release/\${TRT_BRANCH} \
+     https://github.com/NVIDIA/TensorRT.git \${TRTSRC}; then
+  cd \${TRTSRC}/samples/python/onnx_custom_plugin && \
+    rm -rf build && mkdir build && cd build && cmake .. && make -j && \
+    cp libcustomHardmaxPlugin.so ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/.
+  LD_PRELOAD=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/libcustomHardmaxPlugin.so \
+    python3 ${TRITON_MDLS_SRC_DIR}/gen_qa_trt_plugin_models.py \
+    --models_dir=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}
+  chmod -R 777 ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}
 else
-    TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2)
-    if ! git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT; then
-      MAJOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 1)
-      MINOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 2)
-      if [ -n "\$MINOR" ] && [ "\$MINOR" -gt 0 ] 2>/dev/null; then
-        TRT_BRANCH="\${MAJOR}.\$((MINOR - 1))"
-        echo "Fallback: cloning TensorRT release/\${TRT_BRANCH} (previous minor)"
-        git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT
-      elif [ -n "\$MAJOR" ] && [ "\$MAJOR" -gt 0 ] 2>/dev/null; then
-        PREV_MAJOR=\$((MAJOR - 1))
-        echo "Fallback: MINOR is 0, querying remote for latest release/\${PREV_MAJOR}.x branch"
-        TRT_BRANCH=\$(git ls-remote --heads https://github.com/NVIDIA/TensorRT.git "refs/heads/release/\${PREV_MAJOR}.*" \
-          | awk -F'refs/heads/release/' '{print \$2}' \
-          | awk -F. '{print \$2, \$0}' \
-          | sort -k1,1n \
-          | tail -1 \
-          | awk '{print \$2}')
-        if [ -n "\$TRT_BRANCH" ]; then
-          echo "Fallback: cloning TensorRT release/\${TRT_BRANCH}"
-          git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT
-        else
-          exit 1
-        fi
-      else
-        exit 1
-      fi
-    fi
-    cd /workspace/TensorRT/samples/python/onnx_custom_plugin
+  echo "[WARNING] TensorRT release/\${TRT_BRANCH} not available on github.com/NVIDIA/TensorRT; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version."
 fi
-rm -rf build && mkdir build && \
-cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/.
-LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL
-chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL
 exit 0
 EOF
 

diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
@@ -159,7 +159,8 @@ def create_plan_dynamic_rf_modelfile(
     profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)
 
     flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
     for dt in datatype_set:
@@ -449,7 +450,8 @@ def create_plan_fixed_rf_modelfile(
     profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)
 
     flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
     for dt in datatype_set:

diff --git a/qa/common/gen_qa_sequence_models.py b/qa/common/gen_qa_sequence_models.py
@@ -118,7 +118,8 @@ def create_plan_shape_tensor_modelfile(
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)
@@ -320,7 +321,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
 
     flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
 
     if trt_dtype == trt.int8:
         flags |= 1 << int(trt.BuilderFlag.INT8)

diff --git a/qa/common/gen_qa_trt_format_models.py b/qa/common/gen_qa_trt_format_models.py
@@ -147,7 +147,8 @@ def create_plan_modelfile(
     # The build will fail if TensorRT cannot build an engine without introducing such reformatting. The failure may happen only for some target platforms, because of what formats are supported by kernels for those platforms.
     # flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
     flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
-    flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
+    if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
+        flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
     datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
     for dt in datatype_set:
         if dt == trt.int8:

diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py
@@ -44,7 +44,12 @@
 def get_trt_plugin(plugin_name):
     plugin = None
     field_collection = None
-    plugin_creators = trt.get_plugin_registry().plugin_creator_list
+    # The upstream onnx_custom_plugin sample is V2 on TRT 10.x release
+    # branches and V3 on rel-11.0 (and TRT 11 removed the V2 plugin
+    # registry surface). Pick the matching API at runtime.
+    registry = trt.get_plugin_registry()
+    use_v3 = not hasattr(registry, "plugin_creator_list")
+    plugin_creators = registry.all_creators if use_v3 else registry.plugin_creator_list
     for plugin_creator in plugin_creators:
         if (plugin_creator.name == "CustomHardmax") and (
             plugin_name == "CustomHardmax"
@@ -57,9 +62,16 @@ def get_trt_plugin(plugin_name):
 
     if field_collection is None:
         raise RuntimeError("Plugin not found: " + plugin_name)
-    plugin = plugin_creator.create_plugin(
-        name=plugin_name, field_collection=field_collection
-    )
+    if use_v3:
+        plugin = plugin_creator.create_plugin(
+            name=plugin_name,
+            field_collection=field_collection,
+            phase=trt.TensorRTPhase.BUILD,
+        )
+    else:
+        plugin = plugin_creator.create_plugin(
+            name=plugin_name, field_collection=field_collection
+        )
 
     return plugin
 
@@ -104,9 +116,16 @@ def create_plan_modelfile(
     input_layer = network.add_input(
         name="INPUT0", dtype=trt_input_dtype, shape=input_with_batchsize
     )
-    plugin_layer = network.add_plugin_v2(
-        inputs=[input_layer], plugin=get_trt_plugin(plugin_name)
-    )
+    # add_plugin_v2 was removed in TRT 11; add_plugin_v3 has existed since
+    # TRT 10.0. Pick the API that exists on this TRT install; the plugin
+    # object returned by get_trt_plugin() is matched to the same version.
+    plugin_obj = get_trt_plugin(plugin_name)
+    if hasattr(network, "add_plugin_v2"):
+        plugin_layer = network.add_plugin_v2(inputs=[input_layer], plugin=plugin_obj)
+    else:
+        plugin_layer = network.add_plugin_v3(
+            inputs=[input_layer], shape_inputs=[], plugin=plugin_obj
+        )
     plugin_layer.get_output(0).name = "OUTPUT0"
     network.mark_output(plugin_layer.get_output(0))