triton-inference-server · yinggeh · May 14, 2026 · mudit-eng · May 15, 2026 · yinggeh
diff --git a/docs/user_guide/request_cancellation.md b/docs/user_guide/request_cancellation.md
@@ -28,14 +28,14 @@
 
 # Request Cancellation
 
-Starting from r23.10, Triton supports handling request cancellation received
-from the gRPC client or a C API user. Long running inference requests such
-as for auto generative large language models may run for an indeterminate
-amount of time or indeterminate number of steps. Additionally clients may
-enqueue a large number of requests as part of a sequence or request stream
-and later determine the results are no longer needed. Continuing to process
-requests whose results are no longer required can significantly impact server
-resources.
+Triton supports handling request cancellation received from the gRPC Python
+client or a C API user (since r23.10), and C++ client (since r26.05).
+Long running inference requests such as for auto generative large language
+models may run for an indeterminate amount of time or indeterminate number of
+steps. Additionally clients may enqueue a large number of requests as part of
+a sequence or request stream and later determine the results are no longer
+needed. Continuing to process requests whose results are no longer required can
+significantly impact server resources.
 
 ## Issuing Request Cancellation
 
@@ -51,8 +51,7 @@ about the APIs in [tritonserver.h](https://github.com/triton-inference-server/co
 
 In addition, [gRPC endpoint](../customization_guide/inference_protocols.md#httprest-and-grpc-protocols) can
 now detect cancellation from the client and attempt to terminate request.
-At present, only gRPC python client supports issuing request cancellation
-to the server endpoint. See [request-cancellation](https://github.com/triton-inference-server/client#request-cancellation)
+See [request-cancellation](https://github.com/triton-inference-server/client#request-cancellation)
 for more details on how to issue requests from the client-side.
 See gRPC guide on RPC [cancellation](https://grpc.io/docs/guides/cancellation/) for
 finer details.

diff --git a/qa/L0_request_cancellation/test.sh b/qa/L0_request_cancellation/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -42,8 +42,10 @@ export CUDA_VISIBLE_DEVICES=0
 
 SERVER=/opt/tritonserver/bin/tritonserver
 source ../common/util.sh
+CANCEL_LOG_LINE="Cancellation notification received for"
 LOG_VERBOSE(1) << "Cancellation notification received for " << Name() 
                << ", rpc_ok=" << rpc_ok << ", context " 
                << state->context_->unique_id_ << " step " 
                << state->context_->step_ << ", state " 
                << state->unique_id_ << " step " << state->step_; 
 LOG_VERBOSE(1) << "Cancellation notification received for " << Name() 
                << ", rpc_ok=" << rpc_ok << ", context " 
                << state->context_->unique_id_ << " step " 
                << state->context_->step_ << ", state " 
                << state->unique_id_ << " step " << state->step_; 
 LOG_VERBOSE(1) << "Cancellation notification received for " << Name() 
                << ", rpc_ok=" << rpc_ok << ", context " 
                << state->context_->unique_id_ << " step " 
                << state->context_->step_ << ", state " 
                << state->unique_id_ << " step " << state->step_; 
 LOG_VERBOSE(1) << "Cancellation notification received for " << Name() 
                << ", rpc_ok=" << rpc_ok << ", context " 
                << state->context_->unique_id_ << " step " 
                << state->context_->step_ << ", state " 
                << state->unique_id_ << " step " << state->step_; 
 
 RET=0
+rm -f *.log
 
 #
 # Unit tests
@@ -66,7 +68,7 @@ if [ $? -ne 0 ]; then
 fi
 
 #
-# gRPC cancellation tests
+# Python gRPC cancellation tests
 #
 rm -rf models && mkdir models
 mkdir -p models/custom_identity_int32/1 && (cd models/custom_identity_int32 && \
@@ -121,7 +123,7 @@ for TEST_CASE in "test_grpc_async_infer" \
         RET=1
     fi
 
-    count=$(grep -o "Cancellation notification received for" $SERVER_LOG | wc -l)
+    count=$(grep -o "$CANCEL_LOG_LINE" $SERVER_LOG | wc -l)
     if [ $count == 0 ]; then
         echo -e "\n***\n*** Cancellation not received by server on $TEST_CASE\n***"
         cat $SERVER_LOG
@@ -170,6 +172,66 @@ for TEST_CASE in "test_grpc_async_infer" \
     fi
 done
 
+#
+# C++ gRPC cancellation tests
+#
+GRPC_CANCELLATION_TEST_CPP=../clients/grpc_cancellation_test
+
+for ENTRY in "TestGrpcAsyncInfer 1" \
+             "TestGrpcAsyncInferCancelAfterCompletionIsNoOp 0" \
+             "TestGrpcAsyncInferWithoutContextStillCompletes 0" \
+             "TestGrpcAsyncInferMulti 2" \
+             "TestGrpcStreamInfer 1" \
+             "TestGrpcStreamCancelWithoutInfer 1" \
+             "TestGrpcStreamCancelThenRestart 1"; do
+    read -r TEST_CASE EXPECTED_CANCEL_COUNT <<< "$ENTRY"
+
+    TEST_LOG="./grpc_cancellation_test_cpp.$TEST_CASE.log"
+    SERVER_LOG="./grpc_cancellation_test_cpp.$TEST_CASE.server.log"
+
+    # AsyncInferMulti fans out N concurrent requests; bump to 3 CPU
+    # instances so each can execute in parallel. Reverted after the test
+    # so subsequent cases keep the default single-instance config.
+    if [ "$TEST_CASE" == "TestGrpcAsyncInferMulti" ]; then
+        sed -i 's|instance_group .*|instance_group [{ count: 3, kind: KIND_CPU }]|' \
+            models/custom_identity_int32/config.pbtxt
+    fi
+
+    SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=2"
+    run_server
+    if [ "$SERVER_PID" == "0" ]; then
+        echo -e "\n***\n*** Failed to start $SERVER\n***"
+        cat $SERVER_LOG
+        exit 1
+    fi
+
+    set +e
+    LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH \
+        $GRPC_CANCELLATION_TEST_CPP \
+            --gtest_filter="GrpcCancellationTest.$TEST_CASE" > $TEST_LOG 2>&1
+    if [ $? -ne 0 ]; then
+        echo -e "\n***\n*** C++ gRPC Cancellation Tests Failed on $TEST_CASE\n***"
+        cat $TEST_LOG
+        RET=1
+    fi
+
+    cancel_count=$(grep -c "$CANCEL_LOG_LINE" $SERVER_LOG || true)
+    if [ $cancel_count -ne $EXPECTED_CANCEL_COUNT ]; then
+        echo -e "\n***\n*** Unexpected cancellation count on $TEST_CASE. Expected $EXPECTED_CANCEL_COUNT but received $cancel_count.\n***"
+        cat $SERVER_LOG
+        RET=1
+    fi
+    set -e
+
+    kill $SERVER_PID
+    wait $SERVER_PID
+
+    if [ "$TEST_CASE" == "TestGrpcAsyncInferMulti" ]; then
+        sed -i 's|instance_group .*|instance_group [{ kind: KIND_CPU }]|' \
+            models/custom_identity_int32/config.pbtxt
+    fi
+done
+
 #
 # End-to-end scheduler tests
 #