Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion qa/common/gen_qa_dyna_sequence_implicit_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -575,7 +575,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)

flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

if trt_dtype == trt.int8:
flags |= 1 << int(trt.BuilderFlag.INT8)
Expand Down
6 changes: 4 additions & 2 deletions qa/common/gen_qa_dyna_sequence_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ def create_plan_shape_tensor_modelfile(

flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

if trt_dtype == trt.int8:
flags |= 1 << int(trt.BuilderFlag.INT8)
Expand Down Expand Up @@ -369,7 +370,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)

flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

if trt_dtype == trt.int8:
flags |= 1 << int(trt.BuilderFlag.INT8)
Expand Down
6 changes: 4 additions & 2 deletions qa/common/gen_qa_identity_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,8 @@ def create_plan_dynamic_rf_modelfile(

flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
datatype_set = set([trt_dtype])
for dt in datatype_set:
if dt == trt.int8:
Expand Down Expand Up @@ -707,7 +708,8 @@ def create_plan_shape_tensor_modelfile(

flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
datatype_set = set([trt_dtype])
for dt in datatype_set:
if dt == trt.int8:
Expand Down
3 changes: 2 additions & 1 deletion qa/common/gen_qa_implicit_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)

flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

if trt_dtype == trt.int8:
flags |= 1 << int(trt.BuilderFlag.INT8)
Expand Down
54 changes: 19 additions & 35 deletions qa/common/gen_qa_model_repository
Original file line number Diff line number Diff line change
Expand Up @@ -352,43 +352,27 @@ python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_format_models.py --models_dir=$TRITON_MD
chmod -R 777 $TRITON_MDLS_QA_TRT_FORMAT_MODEL
nvidia-smi --query-gpu=compute_cap | grep -qzE '10\.7|11\.0' && echo -e '${COLOR_WARNING}[WARNING]${COLOR_RESET} Skipping model generation for data dependent shape (NonZero not supported on this GPU)${COLOR_RESET}' || python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_data_dependent_shape.py --models_dir=$TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
chmod -R 777 $TRITON_MDLS_QA_TRT_DATA_DEPENDENT_MODEL
# Make shared library for custom Hardmax plugin.
if [ -d "/usr/src/tensorrt/samples/python/onnx_custom_plugin" ]; then
cd /usr/src/tensorrt/samples/python/onnx_custom_plugin
# Build the custom Hardmax plugin used by L0_trt_plugin. The plugin source is
# pulled from the public NVIDIA/TensorRT repo at the release/<major.minor>
# branch matching the runtime TRT version. If the branch is not published
# yet for this TRT version, the plugin model generation is skipped with a
# warning -- L0_trt_plugin will report missing artifacts but the rest of
# the QA model repository is still produced.
TRT_BRANCH=\$(echo \${TRT_VERSION} | cut -d . -f -2)
TRTSRC=/workspace/TensorRT
rm -rf \${TRTSRC}
if git clone --depth 1 -b release/\${TRT_BRANCH} \
https://github.com/NVIDIA/TensorRT.git \${TRTSRC}; then
cd \${TRTSRC}/samples/python/onnx_custom_plugin && \
rm -rf build && mkdir build && cd build && cmake .. && make -j && \
cp libcustomHardmaxPlugin.so ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/.
LD_PRELOAD=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}/libcustomHardmaxPlugin.so \
python3 ${TRITON_MDLS_SRC_DIR}/gen_qa_trt_plugin_models.py \
--models_dir=${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}
chmod -R 777 ${TRITON_MDLS_QA_TRT_PLUGIN_MODEL}
else
TRT_BRANCH=\$(echo \$TRT_VERSION | cut -d . -f -2)
if ! git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT; then
MAJOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 1)
MINOR=\$(echo "\$TRT_BRANCH" | cut -d . -f 2)
if [ -n "\$MINOR" ] && [ "\$MINOR" -gt 0 ] 2>/dev/null; then
TRT_BRANCH="\${MAJOR}.\$((MINOR - 1))"
echo "Fallback: cloning TensorRT release/\${TRT_BRANCH} (previous minor)"
git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT
elif [ -n "\$MAJOR" ] && [ "\$MAJOR" -gt 0 ] 2>/dev/null; then
PREV_MAJOR=\$((MAJOR - 1))
echo "Fallback: MINOR is 0, querying remote for latest release/\${PREV_MAJOR}.x branch"
TRT_BRANCH=\$(git ls-remote --heads https://github.com/NVIDIA/TensorRT.git "refs/heads/release/\${PREV_MAJOR}.*" \
| awk -F'refs/heads/release/' '{print \$2}' \
| awk -F. '{print \$2, \$0}' \
| sort -k1,1n \
| tail -1 \
| awk '{print \$2}')
if [ -n "\$TRT_BRANCH" ]; then
echo "Fallback: cloning TensorRT release/\${TRT_BRANCH}"
git clone -b release/\${TRT_BRANCH} --depth 1 https://github.com/NVIDIA/TensorRT.git /workspace/TensorRT
else
exit 1
fi
else
exit 1
fi
fi
cd /workspace/TensorRT/samples/python/onnx_custom_plugin
echo "[WARNING] TensorRT release/\${TRT_BRANCH} not available on github.com/NVIDIA/TensorRT; skipping CustomHardmax plugin model generation. L0_trt_plugin coverage will be missing for this TRT version."
Comment thread
mc-nv marked this conversation as resolved.
fi
rm -rf build && mkdir build && \
cd build && cmake .. && make -j && cp libcustomHardmaxPlugin.so $TRITON_MDLS_QA_TRT_PLUGIN_MODEL/.
LD_PRELOAD=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL/libcustomHardmaxPlugin.so python3 $TRITON_MDLS_SRC_DIR/gen_qa_trt_plugin_models.py --models_dir=$TRITON_MDLS_QA_TRT_PLUGIN_MODEL
chmod -R 777 $TRITON_MDLS_QA_TRT_PLUGIN_MODEL
exit 0
EOF

Expand Down
6 changes: 4 additions & 2 deletions qa/common/gen_qa_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ def create_plan_dynamic_rf_modelfile(
profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)

flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
for dt in datatype_set:
Expand Down Expand Up @@ -449,7 +450,8 @@ def create_plan_fixed_rf_modelfile(
profile.set_shape("INPUT1", min_shape, opt_shape, max_shape)

flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
for dt in datatype_set:
Expand Down
6 changes: 4 additions & 2 deletions qa/common/gen_qa_sequence_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ def create_plan_shape_tensor_modelfile(

flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

if trt_dtype == trt.int8:
flags |= 1 << int(trt.BuilderFlag.INT8)
Expand Down Expand Up @@ -320,7 +321,8 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)

flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
flags |= 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)

if trt_dtype == trt.int8:
flags |= 1 << int(trt.BuilderFlag.INT8)
Expand Down
3 changes: 2 additions & 1 deletion qa/common/gen_qa_trt_format_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ def create_plan_modelfile(
# The build will fail if TensorRT cannot build an engine without introducing such reformatting. The failure may happen only for some target platforms, because of what formats are supported by kernels for those platforms.
# flags = 1 << int(trt.BuilderFlag.DIRECT_IO)
flags = 1 << int(trt.BuilderFlag.PREFER_PRECISION_CONSTRAINTS)
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
if hasattr(trt.BuilderFlag, "REJECT_EMPTY_ALGORITHMS"):
flags |= 1 << int(trt.BuilderFlag.REJECT_EMPTY_ALGORITHMS)
datatype_set = set([trt_input_dtype, trt_output0_dtype, trt_output1_dtype])
for dt in datatype_set:
if dt == trt.int8:
Expand Down
33 changes: 26 additions & 7 deletions qa/common/gen_qa_trt_plugin_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,12 @@
def get_trt_plugin(plugin_name):
plugin = None
field_collection = None
plugin_creators = trt.get_plugin_registry().plugin_creator_list
# The upstream onnx_custom_plugin sample is V2 on TRT 10.x release
# branches and V3 on rel-11.0 (and TRT 11 removed the V2 plugin
# registry surface). Pick the matching API at runtime.
registry = trt.get_plugin_registry()
use_v3 = not hasattr(registry, "plugin_creator_list")
plugin_creators = registry.all_creators if use_v3 else registry.plugin_creator_list
for plugin_creator in plugin_creators:
if (plugin_creator.name == "CustomHardmax") and (
plugin_name == "CustomHardmax"
Expand All @@ -57,9 +62,16 @@ def get_trt_plugin(plugin_name):

if field_collection is None:
raise RuntimeError("Plugin not found: " + plugin_name)
plugin = plugin_creator.create_plugin(
name=plugin_name, field_collection=field_collection
)
if use_v3:
plugin = plugin_creator.create_plugin(
name=plugin_name,
field_collection=field_collection,
phase=trt.TensorRTPhase.BUILD,
)
else:
plugin = plugin_creator.create_plugin(
name=plugin_name, field_collection=field_collection
)

return plugin

Expand Down Expand Up @@ -104,9 +116,16 @@ def create_plan_modelfile(
input_layer = network.add_input(
name="INPUT0", dtype=trt_input_dtype, shape=input_with_batchsize
)
plugin_layer = network.add_plugin_v2(
inputs=[input_layer], plugin=get_trt_plugin(plugin_name)
)
# add_plugin_v2 was removed in TRT 11; add_plugin_v3 has existed since
# TRT 10.0. Pick the API that exists on this TRT install; the plugin
# object returned by get_trt_plugin() is matched to the same version.
plugin_obj = get_trt_plugin(plugin_name)
if hasattr(network, "add_plugin_v2"):
plugin_layer = network.add_plugin_v2(inputs=[input_layer], plugin=plugin_obj)
else:
plugin_layer = network.add_plugin_v3(
inputs=[input_layer], shape_inputs=[], plugin=plugin_obj
)
plugin_layer.get_output(0).name = "OUTPUT0"
network.mark_output(plugin_layer.get_output(0))

Expand Down
Loading