diff --git a/qa/common/gen_qa_dyna_sequence_implicit_models.py b/qa/common/gen_qa_dyna_sequence_implicit_models.py index 32e4ebea13..c69ca28eab 100755 --- a/qa/common/gen_qa_dyna_sequence_implicit_models.py +++ b/qa/common/gen_qa_dyna_sequence_implicit_models.py @@ -575,7 +575,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) diff --git a/qa/common/gen_qa_dyna_sequence_models.py b/qa/common/gen_qa_dyna_sequence_models.py index 7f8459b0da..1a26890f32 100755 --- a/qa/common/gen_qa_dyna_sequence_models.py +++ b/qa/common/gen_qa_dyna_sequence_models.py @@ -129,7 +129,8 @@ def create_plan_shape_tensor_modelfile( flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) @@ -369,7 +370,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py index 248126bcc2..426d939d9e 100755 --- a/qa/common/gen_qa_identity_models.py +++ b/qa/common/gen_qa_identity_models.py @@ -584,7 +584,8 @@ def create_plan_dynamic_rf_modelfile( flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_dtype]) for dt in datatype_set: if dt == trt.int8: @@ -707,7 +708,8 @@ def create_plan_shape_tensor_modelfile( flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_dtype]) for dt in datatype_set: if dt == trt.int8: diff --git a/qa/common/gen_qa_implicit_models.py b/qa/common/gen_qa_implicit_models.py index 06b86ee5c2..c0800098ec 100755 --- a/qa/common/gen_qa_implicit_models.py +++ b/qa/common/gen_qa_implicit_models.py @@ -1066,7 +1066,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 328f42bbe0..d490f8c530 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -352,43 +352,27 @@ python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_format_models.py --models_dir=$TRITON_MD chmod -R 777 $TRITON_MDLS_QA_TRT_FORMAT_MODEL nvidia-smi --query-gpu=compute_cap | grep -qzE '10\.7|11\.0' && echo -e '${COLOR_WARNING}[WARNING]${COLOR_RESET} Skipping model generation for data dependent shape (NonZero not supported on this GPU)${COLOR_RESET}' || python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL -# Make shared library for custom Hardmax plugin. -if [ -d "/usr/src/tensorrt/samples/python/onnx_custom_plugin" ]; then - cd /usr/src/tensorrt/samples/python/onnx_custom_plugin +# Build the custom Hardmax plugin used by L0_trt_plugin. The plugin source is +# pulled from the public NVIDIA/TensorRT repo at the release/ +# branch matching the runtime TRT version. If the branch is not published +# yet for this TRT version, the plugin model generation is skipped with a +# warning -- L0_trt_plugin will report missing artifacts but the rest of +# the QA model repository is still produced. +TRT_BRANCH=\$(echo \${TRT_VERSION} | cut -d . -f -2) +TRTSRC=/workspace/TensorRT +rm -rf \${TRTSRC} +if git clone --depth 1 -b release/\${TRT_BRANCH} \ + https://github.com/NVIDIA/TensorRT.git \${TRTSRC}; then + cd \${TRTSRC}/samples/python/onnx_custom_plugin && \ + rm -rf build && mkdir build && cd build && cmake .. && make -j && \ + cp libcustomHardmaxPlugin.so ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/. + LD_PRELOAD=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/libcustomHardmaxPlugin.so \ + python3 ${TRITON_MDLS_SRC_DIR}/gen_qa_trt_plugin_models.py \ + --models_dir=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL} + chmod -R 777 ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL} else - TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2) - if ! git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT; then - MAJOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 1) - MINOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 2) - if [ -n "\$MINOR" ] && [ "\$MINOR" -gt 0 ] 2>/dev/null; then - TRT_BRANCH="\${MAJOR}.\$((MINOR - 1))" - echo "Fallback: cloning TensorRT release/\${TRT_BRANCH} (previous minor)" - git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT - elif [ -n "\$MAJOR" ] && [ "\$MAJOR" -gt 0 ] 2>/dev/null; then - PREV_MAJOR=\$((MAJOR - 1)) - echo "Fallback: MINOR is 0, querying remote for latest release/\${PREV_MAJOR}.x branch" - TRT_BRANCH=\$(git ls-remote --heads https://github.com/NVIDIA/TensorRT.git "refs/heads/release/\${PREV_MAJOR}.*" \ - | awk -F'refs/heads/release/' '{print \$2}' \ - | awk -F. '{print \$2, \$0}' \ - | sort -k1,1n \ - | tail -1 \ - | awk '{print \$2}') - if [ -n "\$TRT_BRANCH" ]; then - echo "Fallback: cloning TensorRT release/\${TRT_BRANCH}" - git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT - else - exit 1 - fi - else - exit 1 - fi - fi - cd /workspace/TensorRT/samples/python/onnx_custom_plugin + echo "[WARNING] TensorRT release/\${TRT_BRANCH} not available on github.com/NVIDIA/TensorRT; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version." fi -rm -rf build && mkdir build && \ -cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/. -LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL -chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL exit 0 EOF diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index d509562bff..12d2a7225f 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -159,7 +159,8 @@ def create_plan_dynamic_rf_modelfile( profile.set_shape("INPUT1", min_shape, opt_shape, max_shape) flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype]) for dt in datatype_set: @@ -449,7 +450,8 @@ def create_plan_fixed_rf_modelfile( profile.set_shape("INPUT1", min_shape, opt_shape, max_shape) flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype]) for dt in datatype_set: diff --git a/qa/common/gen_qa_sequence_models.py b/qa/common/gen_qa_sequence_models.py index bf83a3a5f3..f8d89a5f9e 100755 --- a/qa/common/gen_qa_sequence_models.py +++ b/qa/common/gen_qa_sequence_models.py @@ -118,7 +118,8 @@ def create_plan_shape_tensor_modelfile( flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) @@ -320,7 +321,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) if trt_dtype == trt.int8: flags |= 1 << int(trt.BuilderFlag.INT8) diff --git a/qa/common/gen_qa_trt_format_models.py b/qa/common/gen_qa_trt_format_models.py index 5645a7178c..5f2cadd69e 100755 --- a/qa/common/gen_qa_trt_format_models.py +++ b/qa/common/gen_qa_trt_format_models.py @@ -147,7 +147,8 @@ def create_plan_modelfile( # The build will fail if TensorRT cannot build an engine without introducing such reformatting. The failure may happen only for some target platforms, because of what formats are supported by kernels for those platforms. # flags = 1 << int(trt.BuilderFlag.DIRECT_IO) flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS) - flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) + if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"): + flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS) datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype]) for dt in datatype_set: if dt == trt.int8: diff --git a/qa/common/gen_qa_trt_plugin_models.py b/qa/common/gen_qa_trt_plugin_models.py index 83ed9e82a7..9fd23d92a8 100755 --- a/qa/common/gen_qa_trt_plugin_models.py +++ b/qa/common/gen_qa_trt_plugin_models.py @@ -44,7 +44,12 @@ def get_trt_plugin(plugin_name): plugin = None field_collection = None - plugin_creators = trt.get_plugin_registry().plugin_creator_list + # The upstream onnx_custom_plugin sample is V2 on TRT 10.x release + # branches and V3 on rel-11.0 (and TRT 11 removed the V2 plugin + # registry surface). Pick the matching API at runtime. + registry = trt.get_plugin_registry() + use_v3 = not hasattr(registry, "plugin_creator_list") + plugin_creators = registry.all_creators if use_v3 else registry.plugin_creator_list for plugin_creator in plugin_creators: if (plugin_creator.name == "CustomHardmax") and ( plugin_name == "CustomHardmax" @@ -57,9 +62,16 @@ def get_trt_plugin(plugin_name): if field_collection is None: raise RuntimeError("Plugin not found: " + plugin_name) - plugin = plugin_creator.create_plugin( - name=plugin_name, field_collection=field_collection - ) + if use_v3: + plugin = plugin_creator.create_plugin( + name=plugin_name, + field_collection=field_collection, + phase=trt.TensorRTPhase.BUILD, + ) + else: + plugin = plugin_creator.create_plugin( + name=plugin_name, field_collection=field_collection + ) return plugin @@ -104,9 +116,16 @@ def create_plan_modelfile( input_layer = network.add_input( name="INPUT0", dtype=trt_input_dtype, shape=input_with_batchsize ) - plugin_layer = network.add_plugin_v2( - inputs=[input_layer], plugin=get_trt_plugin(plugin_name) - ) + # add_plugin_v2 was removed in TRT 11; add_plugin_v3 has existed since + # TRT 10.0. Pick the API that exists on this TRT install; the plugin + # object returned by get_trt_plugin() is matched to the same version. + plugin_obj = get_trt_plugin(plugin_name) + if hasattr(network, "add_plugin_v2"): + plugin_layer = network.add_plugin_v2(inputs=[input_layer], plugin=plugin_obj) + else: + plugin_layer = network.add_plugin_v3( + inputs=[input_layer], shape_inputs=[], plugin=plugin_obj + ) plugin_layer.get_output(0).name = "OUTPUT0" network.mark_output(plugin_layer.get_output(0))