From 3e4031292c328735984a1d2e1df4d07a8d6c47ce Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 3 Jun 2025 05:46:25 -0700 Subject: [PATCH 01/43] Add capability method to query worker attributes --- cpp/include/ucxx/worker.h | 13 ++++++++++++- cpp/src/worker.cpp | 10 ++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/cpp/include/ucxx/worker.h b/cpp/include/ucxx/worker.h index 300f37973..9668ab8c3 100644 --- a/cpp/include/ucxx/worker.h +++ b/cpp/include/ucxx/worker.h @@ -935,7 +935,7 @@ class Worker : public Component { * * Using a Python future may be requested by specifying `enablePythonFuture`. If a * Python future is requested, the Python application must then await on this future to - * ensure the transfer has completed. Requires UCXX Python support. + * ensure the transfer has completed. * * @param[in] enablePythonFuture whether a python future should be created and * subsequently notified. @@ -948,6 +948,17 @@ class Worker : public Component { const bool enablePythonFuture = false, RequestCallbackUserFunction callbackFunction = nullptr, RequestCallbackUserData callbackData = nullptr); + + /** + * @brief Query worker attributes. + * + * Queries the worker attributes using ucp_worker_query. This provides information about + * the worker's thread mode and other attributes. + * + * @returns The worker attributes structure. + * @throws ucxx::Error if an error occurred while querying worker attributes. + */ + [[nodiscard]] ucp_worker_attr_t queryAttributes() const; }; } // namespace ucxx diff --git a/cpp/src/worker.cpp b/cpp/src/worker.cpp index b4398174d..732833eca 100644 --- a/cpp/src/worker.cpp +++ b/cpp/src/worker.cpp @@ -195,6 +195,16 @@ std::string Worker::getInfo() return utils::decodeTextFileDescriptor(TextFileDescriptor); } +ucp_worker_attr_t Worker::queryAttributes() const +{ + ucp_worker_attr_t attr = { + .field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE // Request thread mode info + }; + + utils::ucsErrorThrow(ucp_worker_query(_handle, &attr)); + return attr; +} + bool Worker::isDelayedRequestSubmissionEnabled() const { return _delayedSubmissionCollection->isDelayedRequestSubmissionEnabled(); From 0f20908ff5e5131cc0d86b0e0e3545793cb14ed9 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 5 Jun 2025 02:41:14 -0700 Subject: [PATCH 02/43] Add tests for --- cpp/tests/worker.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/cpp/tests/worker.cpp b/cpp/tests/worker.cpp index 9bc6bfbbf..3841432d7 100644 --- a/cpp/tests/worker.cpp +++ b/cpp/tests/worker.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. + * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. * SPDX-License-Identifier: BSD-3-Clause */ #include @@ -95,6 +95,17 @@ class WorkerGenericCallbackSingleTest : public WorkerProgressTest {}; TEST_F(WorkerTest, HandleIsValid) { ASSERT_TRUE(_worker->getHandle() != nullptr); } +TEST_F(WorkerTest, QueryAttributes) +{ + auto attrs = _worker->queryAttributes(); + + // Verify that the thread mode field was requested and returned + ASSERT_TRUE(attrs.field_mask & UCP_WORKER_ATTR_FIELD_THREAD_MODE); + + // The worker was created with UCS_THREAD_MODE_MULTI in the constructor + ASSERT_EQ(attrs.thread_mode, UCS_THREAD_MODE_MULTI); +} + TEST_P(WorkerCapabilityTest, CheckCapability) { ASSERT_EQ(_worker->isDelayedRequestSubmissionEnabled(), _enableDelayedSubmission); From f9ee7566aea720a02dbf3d116912324b61b2892b Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 5 Jun 2025 02:43:53 -0700 Subject: [PATCH 03/43] Add method to debug information --- cpp/include/ucxx/request.h | 34 ++++++++++++++++++ cpp/src/request.cpp | 73 +++++++++++++++++++++++++++++++++++++- cpp/src/worker.cpp | 3 +- 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index bcfdae543..8934bb785 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -224,6 +225,39 @@ class Request : public Component { * @return The received buffer (if applicable) or `nullptr`. */ [[nodiscard]] virtual std::shared_ptr getRecvBuffer(); + + /** + * @brief Get a debug string containing information about the request. + * + * Returns a detailed string containing information about the request's current state + * by querying the underlying UCP request using ucp_request_query. The information includes: + * - Request memory address + * - Operation name + * - Current status + * - Owner information + * - UCP request handle + * - Completion status + * - Request status from UCP (if available) + * - Memory type from UCP (if available) + * - Python future status + * - Callback presence + * + * @return A string containing debug information about the request. + */ + [[nodiscard]] std::string getDebugString() const; + + protected: + /** + * @brief Query the UCP request attributes. + * + * Helper method that queries the UCP request for its attributes using ucp_request_query. + * Currently queries for: + * - Request status + * - Memory type + * + * @return A pair containing the query status and the request attributes. + */ + [[nodiscard]] std::pair queryRequestAttributes() const; }; } // namespace ucxx diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 22e135e0e..bea0532d1 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -1,11 +1,13 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. + * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. * SPDX-License-Identifier: BSD-3-Clause */ #include #include #include #include +#include +#include #include @@ -238,6 +240,75 @@ void Request::setStatus(ucs_status_t status) const std::string& Request::getOwnerString() const { return _ownerString; } +std::pair Request::queryRequestAttributes() const +{ + ucp_request_attr_t attr; + + // Get the debug string size from worker attributes + auto worker_attr = _worker->queryAttributes(); + + // Allocate buffer for debug string with size from worker attributes + std::vector debug_str(worker_attr.max_debug_string, '\0'); + + attr.field_mask = UCP_REQUEST_ATTR_FIELD_STATUS | // Request status + UCP_REQUEST_ATTR_FIELD_MEM_TYPE | // Memory type + UCP_REQUEST_ATTR_FIELD_INFO_STRING | // Debug string + UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; // Debug string size + + // Set up the debug string buffer + attr.debug_string = debug_str.data(); + attr.debug_string_size = debug_str.size(); + + ucs_status_t status = UCS_OK; + if (UCS_PTR_IS_PTR(_request)) { + status = ucp_request_query(_request, &attr); + } else { + status = UCS_ERR_INVALID_PARAM; + } + + return {status, attr}; +} + +std::string Request::getDebugString() const +{ + std::stringstream ss; + ss << "Request[" << this << "] {\n" + << " operation: " << _operationName << "\n" + << " status: " << _status << " (" << ucs_status_string(_status) << ")\n" + << " owner: " << _ownerString << "\n" + << " UCP handle: " << _request << "\n" + << " completed: " << (_status != UCS_INPROGRESS ? "yes" : "no") << "\n"; + + if (!_status_msg.empty()) { ss << " status_msg: " << _status_msg << "\n"; } + + // Query UCP request attributes if available + if (UCS_PTR_IS_PTR(_request)) { + auto [query_status, attr] = queryRequestAttributes(); + if (query_status == UCS_OK) { + if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_STATUS) { + ss << " request_status: " << attr.status << " (" << ucs_status_string(attr.status) + << ")\n"; + } + if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_MEM_TYPE) { + ss << " memory_type: " << attr.mem_type << "\n"; + } + if ((attr.field_mask & UCP_REQUEST_ATTR_FIELD_INFO_STRING) && + (attr.field_mask & UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE) && + attr.debug_string != nullptr) { + ss << " debug_string: " << attr.debug_string << "\n"; + } + } else { + ss << " request_query_failed: " << ucs_status_string(query_status) << "\n"; + } + } + + ss << " python_future: " << (_enablePythonFuture ? "enabled" : "disabled") << "\n" + << " has_callback: " << (_callback ? "yes" : "no") << "\n" + << "}"; + + return ss.str(); +} + std::shared_ptr Request::getRecvBuffer() { return nullptr; } } // namespace ucxx diff --git a/cpp/src/worker.cpp b/cpp/src/worker.cpp index 732833eca..513ac9999 100644 --- a/cpp/src/worker.cpp +++ b/cpp/src/worker.cpp @@ -198,7 +198,8 @@ std::string Worker::getInfo() ucp_worker_attr_t Worker::queryAttributes() const { ucp_worker_attr_t attr = { - .field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE // Request thread mode info + .field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE | // Request thread mode info + UCP_WORKER_ATTR_FIELD_MAX_INFO_STRING // Request debug string size }; utils::ucsErrorThrow(ucp_worker_query(_handle, &attr)); From e77c9be4ccc9d0ceaf587714ea1407677574c255 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 5 Jun 2025 04:35:48 -0700 Subject: [PATCH 04/43] Cache attributes before completion --- cpp/include/ucxx/request.h | 15 ++++++++-- cpp/src/request.cpp | 61 ++++++++++++++++++++++++-------------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index 8934bb785..f53d54f1a 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -38,6 +38,13 @@ namespace ucxx { */ class Request : public Component { protected: + /// Structure to hold cached request attributes including the debug string + struct CachedRequestAttributes { + ucs_status_t query_status{UCS_INPROGRESS}; ///< Status of the query operation + ucp_request_attr_t attributes{}; ///< UCP request attributes + std::string debug_string{}; ///< Stored debug string + }; + ucs_status_t _status{UCS_INPROGRESS}; ///< Requests status std::string _status_msg{}; ///< Human-readable status message void* _request{nullptr}; ///< Pointer to UCP request @@ -55,6 +62,8 @@ class Request : public Component { bool _enablePythonFuture{true}; ///< Whether Python future is enabled for this request RequestCallbackUserFunction _callback{nullptr}; ///< Completion callback RequestCallbackUserData _callbackData{nullptr}; ///< Completion callback data + CachedRequestAttributes + _cached_request_attr{}; ///< Cached request attributes queried before request is freed /** * @brief Protected constructor of an abstract `ucxx::Request`. @@ -254,10 +263,12 @@ class Request : public Component { * Currently queries for: * - Request status * - Memory type + * - Debug string * - * @return A pair containing the query status and the request attributes. + * @return A CachedRequestAttributes containing the query status, request attributes and debug + * string. */ - [[nodiscard]] std::pair queryRequestAttributes() const; + [[nodiscard]] CachedRequestAttributes queryRequestAttributes() const; }; } // namespace ucxx diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index bea0532d1..cb4de2a85 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -149,7 +149,11 @@ void Request::callback(void* request, ucs_status_t status) status, ucs_status_string(status)); - if (UCS_PTR_IS_PTR(_request)) ucp_request_free(request); + if (UCS_PTR_IS_PTR(_request)) { + // Query request attributes before freeing + _cached_request_attr = queryRequestAttributes(); + ucp_request_free(request); + } ucxx_trace_req_f(_ownerString.c_str(), this, _request, _operationName.c_str(), "completed"); setStatus(status); @@ -240,9 +244,9 @@ void Request::setStatus(ucs_status_t status) const std::string& Request::getOwnerString() const { return _ownerString; } -std::pair Request::queryRequestAttributes() const +Request::CachedRequestAttributes Request::queryRequestAttributes() const { - ucp_request_attr_t attr; + CachedRequestAttributes result; // Get the debug string size from worker attributes auto worker_attr = _worker->queryAttributes(); @@ -250,23 +254,25 @@ std::pair Request::queryRequestAttributes() co // Allocate buffer for debug string with size from worker attributes std::vector debug_str(worker_attr.max_debug_string, '\0'); - attr.field_mask = UCP_REQUEST_ATTR_FIELD_STATUS | // Request status - UCP_REQUEST_ATTR_FIELD_MEM_TYPE | // Memory type - UCP_REQUEST_ATTR_FIELD_INFO_STRING | // Debug string - UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; // Debug string size + result.attributes.field_mask = UCP_REQUEST_ATTR_FIELD_STATUS | // Request status + UCP_REQUEST_ATTR_FIELD_MEM_TYPE | // Memory type + UCP_REQUEST_ATTR_FIELD_INFO_STRING | // Debug string + UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; // Debug string size // Set up the debug string buffer - attr.debug_string = debug_str.data(); - attr.debug_string_size = debug_str.size(); + result.attributes.debug_string = debug_str.data(); + result.attributes.debug_string_size = debug_str.size(); - ucs_status_t status = UCS_OK; if (UCS_PTR_IS_PTR(_request)) { - status = ucp_request_query(_request, &attr); + result.query_status = ucp_request_query(_request, &result.attributes); + if (result.query_status == UCS_OK && result.attributes.debug_string != nullptr) { + result.debug_string = std::string(result.attributes.debug_string); + } } else { - status = UCS_ERR_INVALID_PARAM; + result.query_status = UCS_ERR_INVALID_PARAM; } - return {status, attr}; + return result; } std::string Request::getDebugString() const @@ -281,10 +287,23 @@ std::string Request::getDebugString() const if (!_status_msg.empty()) { ss << " status_msg: " << _status_msg << "\n"; } - // Query UCP request attributes if available - if (UCS_PTR_IS_PTR(_request)) { - auto [query_status, attr] = queryRequestAttributes(); - if (query_status == UCS_OK) { + // Use cached request attributes if available + if (_cached_request_attr.query_status == UCS_OK) { + const auto& attr = _cached_request_attr.attributes; + if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_STATUS) { + ss << " request_status: " << attr.status << " (" << ucs_status_string(attr.status) << ")\n"; + } + if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_MEM_TYPE) { + ss << " memory_type: " << attr.mem_type << "\n"; + } + if (!_cached_request_attr.debug_string.empty()) { + ss << " debug_string: " << _cached_request_attr.debug_string << "\n"; + } + } else if (UCS_PTR_IS_PTR(_request)) { + // If request is still available, query it directly + auto result = queryRequestAttributes(); + if (result.query_status == UCS_OK) { + const auto& attr = result.attributes; if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_STATUS) { ss << " request_status: " << attr.status << " (" << ucs_status_string(attr.status) << ")\n"; @@ -292,13 +311,9 @@ std::string Request::getDebugString() const if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_MEM_TYPE) { ss << " memory_type: " << attr.mem_type << "\n"; } - if ((attr.field_mask & UCP_REQUEST_ATTR_FIELD_INFO_STRING) && - (attr.field_mask & UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE) && - attr.debug_string != nullptr) { - ss << " debug_string: " << attr.debug_string << "\n"; - } + if (!result.debug_string.empty()) { ss << " debug_string: " << result.debug_string << "\n"; } } else { - ss << " request_query_failed: " << ucs_status_string(query_status) << "\n"; + ss << " request_query_failed: " << ucs_status_string(result.query_status) << "\n"; } } From a5f6b9b181a4814fe341fa455436412c190bd7c5 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 5 Jun 2025 14:00:35 -0700 Subject: [PATCH 05/43] [WIP] Always enable debug info --- cpp/src/request.cpp | 5 +++-- cpp/src/request_tag.cpp | 10 ++++++---- cpp/tests/request.cpp | 6 +++++- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index cb4de2a85..90bee1a9c 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -151,7 +151,7 @@ void Request::callback(void* request, ucs_status_t status) if (UCS_PTR_IS_PTR(_request)) { // Query request attributes before freeing - _cached_request_attr = queryRequestAttributes(); + // _cached_request_attr = queryRequestAttributes(); ucp_request_free(request); } @@ -288,7 +288,8 @@ std::string Request::getDebugString() const if (!_status_msg.empty()) { ss << " status_msg: " << _status_msg << "\n"; } // Use cached request attributes if available - if (_cached_request_attr.query_status == UCS_OK) { + // if (_cached_request_attr.query_status == UCS_OK) { + if (true) { const auto& attr = _cached_request_attr.attributes; if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_STATUS) { ss << " request_status: " << attr.status << " (" << ucs_status_string(attr.status) << ")\n"; diff --git a/cpp/src/request_tag.cpp b/cpp/src/request_tag.cpp index 9655c441a..087af1f65 100644 --- a/cpp/src/request_tag.cpp +++ b/cpp/src/request_tag.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. + * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. * SPDX-License-Identifier: BSD-3-Clause */ #include @@ -123,20 +123,22 @@ void RequestTag::request() _endpoint->getHandle(), tagSend._buffer, tagSend._length, tagSend._tag, ¶m); }, [this, &request, ¶m](data::TagReceive tagReceive) { - param.cb.recv = tagRecvCallback; - request = ucp_tag_recv_nbx(_worker->getHandle(), + param.cb.recv = tagRecvCallback; + request = ucp_tag_recv_nbx(_worker->getHandle(), tagReceive._buffer, tagReceive._length, tagReceive._tag, tagReceive._tagMask, ¶m); + _cached_request_attr = queryRequestAttributes(); }, [](auto) { throw std::runtime_error("Unreachable"); }, }, _requestData); std::lock_guard lock(_mutex); - _request = request; + _request = request; + _cached_request_attr = queryRequestAttributes(); } void RequestTag::populateDelayedSubmission() diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 5349bdc86..0e49d401c 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. + * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. * SPDX-License-Identifier: BSD-3-Clause */ #include @@ -295,6 +295,10 @@ TEST_P(RequestTest, ProgressTag) requests.push_back(_ep->tagRecv(_recvPtr[0], _messageSize, ucxx::Tag{0}, ucxx::TagMaskFull)); waitRequests(_worker, requests, _progressWorker); + for (const auto& request : requests) { + std::cout << request->getDebugString() << std::endl; + } + copyResults(); // Assert data correctness From 34b805c3f0a2723487dd14617245239236afa298 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Fri, 6 Jun 2025 09:35:13 -0700 Subject: [PATCH 06/43] Improve query, add user-facing getter --- cpp/include/ucxx/request.h | 38 +++++++--------- cpp/src/request.cpp | 91 +++++++++++--------------------------- cpp/src/request_tag.cpp | 9 ++-- cpp/tests/request.cpp | 8 +++- 4 files changed, 51 insertions(+), 95 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index f53d54f1a..f7402cb91 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -39,10 +39,10 @@ namespace ucxx { class Request : public Component { protected: /// Structure to hold cached request attributes including the debug string - struct CachedRequestAttributes { - ucs_status_t query_status{UCS_INPROGRESS}; ///< Status of the query operation - ucp_request_attr_t attributes{}; ///< UCP request attributes - std::string debug_string{}; ///< Stored debug string + struct RequestAttributes { + ucs_status_t status{UCS_INPROGRESS}; ///< Status of the request + ucs_memory_type memoryType{UCS_MEMORY_TYPE_UNKNOWN}; ///< Memory type of the request + std::string debugString{}; ///< Stored debug string }; ucs_status_t _status{UCS_INPROGRESS}; ///< Requests status @@ -62,8 +62,8 @@ class Request : public Component { bool _enablePythonFuture{true}; ///< Whether Python future is enabled for this request RequestCallbackUserFunction _callback{nullptr}; ///< Completion callback RequestCallbackUserData _callbackData{nullptr}; ///< Completion callback data - CachedRequestAttributes - _cached_request_attr{}; ///< Cached request attributes queried before request is freed + RequestAttributes _requestAttr{}; ///< Request attributes queried when request is posted + bool _isRequestAttrValid{false}; ///< Whether the request attributes are valid /** * @brief Protected constructor of an abstract `ucxx::Request`. @@ -236,24 +236,16 @@ class Request : public Component { [[nodiscard]] virtual std::shared_ptr getRecvBuffer(); /** - * @brief Get a debug string containing information about the request. + * @brief Get the request attributes. * - * Returns a detailed string containing information about the request's current state - * by querying the underlying UCP request using ucp_request_query. The information includes: - * - Request memory address - * - Operation name - * - Current status - * - Owner information - * - UCP request handle - * - Completion status - * - Request status from UCP (if available) - * - Memory type from UCP (if available) - * - Python future status - * - Callback presence + * Get the request attributes. If the request attributes are not available yet, this + * method will throw an error. * - * @return A string containing debug information about the request. + * @throw ucxx::Error if the request attributes are not available yet. + * + * @return A RequestAttributes containing the request attributes. */ - [[nodiscard]] std::string getDebugString() const; + [[nodiscard]] RequestAttributes getRequestAttributes(); protected: /** @@ -265,10 +257,10 @@ class Request : public Component { * - Memory type * - Debug string * - * @return A CachedRequestAttributes containing the query status, request attributes and debug + * @return A RequestAttributes containing the query status, request attributes and debug * string. */ - [[nodiscard]] CachedRequestAttributes queryRequestAttributes() const; + void queryRequestAttributes(); }; } // namespace ucxx diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 90bee1a9c..43ba7a02b 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -149,11 +149,7 @@ void Request::callback(void* request, ucs_status_t status) status, ucs_status_string(status)); - if (UCS_PTR_IS_PTR(_request)) { - // Query request attributes before freeing - // _cached_request_attr = queryRequestAttributes(); - ucp_request_free(request); - } + if (UCS_PTR_IS_PTR(_request)) ucp_request_free(request); ucxx_trace_req_f(_ownerString.c_str(), this, _request, _operationName.c_str(), "completed"); setStatus(status); @@ -244,9 +240,13 @@ void Request::setStatus(ucs_status_t status) const std::string& Request::getOwnerString() const { return _ownerString; } -Request::CachedRequestAttributes Request::queryRequestAttributes() const +void Request::queryRequestAttributes() { - CachedRequestAttributes result; + std::lock_guard lock(_mutex); + + if (_isRequestAttrValid) return; + + ucp_request_attr_t result; // Get the debug string size from worker attributes auto worker_attr = _worker->queryAttributes(); @@ -254,75 +254,34 @@ Request::CachedRequestAttributes Request::queryRequestAttributes() const // Allocate buffer for debug string with size from worker attributes std::vector debug_str(worker_attr.max_debug_string, '\0'); - result.attributes.field_mask = UCP_REQUEST_ATTR_FIELD_STATUS | // Request status - UCP_REQUEST_ATTR_FIELD_MEM_TYPE | // Memory type - UCP_REQUEST_ATTR_FIELD_INFO_STRING | // Debug string - UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; // Debug string size + result.field_mask = UCP_REQUEST_ATTR_FIELD_STATUS | // Request status + UCP_REQUEST_ATTR_FIELD_MEM_TYPE | // Memory type + UCP_REQUEST_ATTR_FIELD_INFO_STRING | // Debug string + UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; // Debug string size // Set up the debug string buffer - result.attributes.debug_string = debug_str.data(); - result.attributes.debug_string_size = debug_str.size(); + result.debug_string = debug_str.data(); + result.debug_string_size = debug_str.size(); if (UCS_PTR_IS_PTR(_request)) { - result.query_status = ucp_request_query(_request, &result.attributes); - if (result.query_status == UCS_OK && result.attributes.debug_string != nullptr) { - result.debug_string = std::string(result.attributes.debug_string); + result.status = ucp_request_query(_request, &result); + if (result.status == UCS_OK && result.debug_string != nullptr) { + _requestAttr.debugString = std::string(result.debug_string); + _requestAttr.memoryType = result.mem_type; + _requestAttr.status = result.status; + _isRequestAttrValid = true; } - } else { - result.query_status = UCS_ERR_INVALID_PARAM; } - - return result; } -std::string Request::getDebugString() const +Request::RequestAttributes Request::getRequestAttributes() { - std::stringstream ss; - ss << "Request[" << this << "] {\n" - << " operation: " << _operationName << "\n" - << " status: " << _status << " (" << ucs_status_string(_status) << ")\n" - << " owner: " << _ownerString << "\n" - << " UCP handle: " << _request << "\n" - << " completed: " << (_status != UCS_INPROGRESS ? "yes" : "no") << "\n"; - - if (!_status_msg.empty()) { ss << " status_msg: " << _status_msg << "\n"; } - - // Use cached request attributes if available - // if (_cached_request_attr.query_status == UCS_OK) { - if (true) { - const auto& attr = _cached_request_attr.attributes; - if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_STATUS) { - ss << " request_status: " << attr.status << " (" << ucs_status_string(attr.status) << ")\n"; - } - if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_MEM_TYPE) { - ss << " memory_type: " << attr.mem_type << "\n"; - } - if (!_cached_request_attr.debug_string.empty()) { - ss << " debug_string: " << _cached_request_attr.debug_string << "\n"; - } - } else if (UCS_PTR_IS_PTR(_request)) { - // If request is still available, query it directly - auto result = queryRequestAttributes(); - if (result.query_status == UCS_OK) { - const auto& attr = result.attributes; - if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_STATUS) { - ss << " request_status: " << attr.status << " (" << ucs_status_string(attr.status) - << ")\n"; - } - if (attr.field_mask & UCP_REQUEST_ATTR_FIELD_MEM_TYPE) { - ss << " memory_type: " << attr.mem_type << "\n"; - } - if (!result.debug_string.empty()) { ss << " debug_string: " << result.debug_string << "\n"; } - } else { - ss << " request_query_failed: " << ucs_status_string(result.query_status) << "\n"; - } - } - - ss << " python_future: " << (_enablePythonFuture ? "enabled" : "disabled") << "\n" - << " has_callback: " << (_callback ? "yes" : "no") << "\n" - << "}"; + std::lock_guard lock(_mutex); - return ss.str(); + if (_isRequestAttrValid) + return _requestAttr; + else + throw ucxx::Error("Request attributes not available yet"); } std::shared_ptr Request::getRecvBuffer() { return nullptr; } diff --git a/cpp/src/request_tag.cpp b/cpp/src/request_tag.cpp index 087af1f65..1717dbe5d 100644 --- a/cpp/src/request_tag.cpp +++ b/cpp/src/request_tag.cpp @@ -123,22 +123,21 @@ void RequestTag::request() _endpoint->getHandle(), tagSend._buffer, tagSend._length, tagSend._tag, ¶m); }, [this, &request, ¶m](data::TagReceive tagReceive) { - param.cb.recv = tagRecvCallback; - request = ucp_tag_recv_nbx(_worker->getHandle(), + param.cb.recv = tagRecvCallback; + request = ucp_tag_recv_nbx(_worker->getHandle(), tagReceive._buffer, tagReceive._length, tagReceive._tag, tagReceive._tagMask, ¶m); - _cached_request_attr = queryRequestAttributes(); }, [](auto) { throw std::runtime_error("Unreachable"); }, }, _requestData); std::lock_guard lock(_mutex); - _request = request; - _cached_request_attr = queryRequestAttributes(); + _request = request; + queryRequestAttributes(); } void RequestTag::populateDelayedSubmission() diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 0e49d401c..a53a96ee1 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -296,7 +297,12 @@ TEST_P(RequestTest, ProgressTag) waitRequests(_worker, requests, _progressWorker); for (const auto& request : requests) { - std::cout << request->getDebugString() << std::endl; + auto debugString = request->getRequestAttributes().debugString; + // Check that debugString contains the expected host memory length substring + std::string expectedSubstring = "length " + std::to_string(_messageSize); + ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); + ASSERT_THAT(debugString, + ::testing::HasSubstr(_memoryType == UCS_MEMORY_TYPE_HOST ? "host" : "cuda")); } copyResults(); From 4becdab07bae419efb74e2c95b69365cc0a9646e Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 28 Apr 2026 20:17:54 +0000 Subject: [PATCH 07/43] Add requestAttributes to ucxx::experimental::WorkerBuilder --- .../ucxx/experimental/worker_builder.h | 13 +++++++++ cpp/include/ucxx/request.h | 9 ++++-- cpp/include/ucxx/worker.h | 15 ++++++++++ cpp/src/experimental/worker_builder.cpp | 12 +++++++- cpp/src/request.cpp | 1 + cpp/src/worker.cpp | 2 ++ cpp/tests/request.cpp | 5 +++- cpp/tests/worker.cpp | 29 +++++++++++++++++++ 8 files changed, 81 insertions(+), 5 deletions(-) diff --git a/cpp/include/ucxx/experimental/worker_builder.h b/cpp/include/ucxx/experimental/worker_builder.h index 832e1b52d..75ee949e9 100644 --- a/cpp/include/ucxx/experimental/worker_builder.h +++ b/cpp/include/ucxx/experimental/worker_builder.h @@ -88,6 +88,19 @@ class WorkerBuilder final { */ WorkerBuilder& pythonFuture(bool enable = true); + /** + * @brief Configure request attributes querying. + * + * When enabled, each `ucxx::Request` created from the worker will have its UCP + * attributes (such as the debug string) queried immediately after submission, making + * them available via `ucxx::Request::getRequestAttributes()`. This may have + * non-negligible runtime cost and is therefore disabled by default. + * + * @param[in] enable whether request attributes querying is enabled (default: true). + * @return Reference to this builder for method chaining. + */ + WorkerBuilder& requestAttributes(bool enable = true); + /** * @brief Build and return the `Worker`. * diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index 3b16801c0..9ffdb832f 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -249,10 +249,13 @@ class Request : public Component { /** * @brief Get the request attributes. * - * Get the request attributes. If the request attributes are not available yet, this - * method will throw an error. + * Get the request attributes. The owning `ucxx::Worker` must have been created with + * request attributes querying enabled (see + * `ucxx::experimental::WorkerBuilder::requestAttributes()`); otherwise the attributes + * are never populated and this method throws. * - * @throw ucxx::Error if the request attributes are not available yet. + * @throw ucxx::Error if the request attributes are not available yet, including when + * request attributes querying is disabled on the owning worker. * * @return A RequestAttributes containing the request attributes. */ diff --git a/cpp/include/ucxx/worker.h b/cpp/include/ucxx/worker.h index 0e0e8742f..0eada0275 100644 --- a/cpp/include/ucxx/worker.h +++ b/cpp/include/ucxx/worker.h @@ -78,6 +78,8 @@ class Worker : public Component { protected: bool _enableFuture{ false}; ///< Boolean identifying whether the worker was created with future capability + bool _enableRequestAttributes{ + false}; ///< Whether request attributes (e.g. UCP debug info) are queried for each request std::mutex _futuresPoolMutex{}; ///< Mutex to access the futures pool std::queue> _futuresPool{}; ///< Futures pool to prevent running out of fresh futures @@ -492,6 +494,19 @@ class Worker : public Component { */ [[nodiscard]] bool isFutureEnabled() const; + /** + * @brief Inquire if worker has been created with request attributes querying enabled. + * + * Check whether the worker has been created with request attributes querying enabled. + * When enabled, each `ucxx::Request` will have its UCP attributes (such as the debug + * string) queried immediately after submission, making them available via + * `ucxx::Request::getRequestAttributes()`. Querying request attributes has a + * non-negligible runtime cost and is therefore disabled by default. + * + * @returns `true` if request attributes querying is enabled, `false` otherwise. + */ + [[nodiscard]] bool isRequestAttributesEnabled() const; + /** * @brief Populate the futures pool. * diff --git a/cpp/src/experimental/worker_builder.cpp b/cpp/src/experimental/worker_builder.cpp index ce11a467f..77a48bc3e 100644 --- a/cpp/src/experimental/worker_builder.cpp +++ b/cpp/src/experimental/worker_builder.cpp @@ -17,6 +17,7 @@ struct WorkerBuilder::Impl { std::shared_ptr context; bool enableDelayedSubmission{false}; bool enableFuture{false}; + bool enableRequestAttributes{false}; explicit Impl(std::shared_ptr ctx) : context(std::move(ctx)) {} }; @@ -40,9 +41,18 @@ WorkerBuilder& WorkerBuilder::pythonFuture(bool enable) return *this; } +WorkerBuilder& WorkerBuilder::requestAttributes(bool enable) +{ + _impl->enableRequestAttributes = enable; + return *this; +} + std::shared_ptr WorkerBuilder::build() const { - return ucxx::createWorker(_impl->context, _impl->enableDelayedSubmission, _impl->enableFuture); + auto worker = + ucxx::createWorker(_impl->context, _impl->enableDelayedSubmission, _impl->enableFuture); + worker->_enableRequestAttributes = _impl->enableRequestAttributes; + return worker; } } // namespace experimental diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 0967d5e38..d6c6f83a0 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -246,6 +246,7 @@ void Request::queryRequestAttributes() std::lock_guard lock(_mutex); if (_isRequestAttrValid) return; + if (!_worker->isRequestAttributesEnabled()) return; ucp_request_attr_t result; diff --git a/cpp/src/worker.cpp b/cpp/src/worker.cpp index 92a8d2d71..4ac4bc9fd 100644 --- a/cpp/src/worker.cpp +++ b/cpp/src/worker.cpp @@ -214,6 +214,8 @@ bool Worker::isDelayedRequestSubmissionEnabled() const bool Worker::isFutureEnabled() const { return _enableFuture; } +bool Worker::isRequestAttributesEnabled() const { return _enableRequestAttributes; } + void Worker::initBlockingProgressMode() { // In blocking progress mode, we create an epoll file diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index f829f0939..7aa1462a0 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -78,7 +78,10 @@ class RequestTest : public ::testing::TestWithParam< _context = ucxx::createContext({{"RNDV_THRESH", std::to_string(_rndvThresh)}}, ucxx::Context::defaultFeatureFlags); - _worker = _context->createWorker(_enableDelayedSubmission); + _worker = ucxx::experimental::createWorker(_context) + .delayedSubmission(_enableDelayedSubmission) + .requestAttributes(true) + .build(); if (_progressMode == ProgressMode::Blocking) { _worker->initBlockingProgressMode(); diff --git a/cpp/tests/worker.cpp b/cpp/tests/worker.cpp index dd5e749ed..8e57603ce 100644 --- a/cpp/tests/worker.cpp +++ b/cpp/tests/worker.cpp @@ -887,6 +887,35 @@ TEST(WorkerBuilderTest, BuilderBackwardCompatibility) ASSERT_TRUE(worker2->isFutureEnabled()); } +TEST(WorkerBuilderTest, RequestAttributesDefaultDisabled) +{ + auto context = ucxx::experimental::createContext(ucxx::Context::defaultFeatureFlags).build(); + auto worker = ucxx::experimental::createWorker(context).build(); + + ASSERT_TRUE(worker != nullptr); + ASSERT_FALSE(worker->isRequestAttributesEnabled()); +} + +TEST(WorkerBuilderTest, RequestAttributesEnabled) +{ + auto context = ucxx::experimental::createContext(ucxx::Context::defaultFeatureFlags).build(); + auto worker = ucxx::experimental::createWorker(context).requestAttributes(true).build(); + + ASSERT_TRUE(worker != nullptr); + ASSERT_TRUE(worker->isRequestAttributesEnabled()); + ASSERT_FALSE(worker->isDelayedRequestSubmissionEnabled()); + ASSERT_FALSE(worker->isFutureEnabled()); +} + +TEST(WorkerBuilderTest, RequestAttributesExplicitDisable) +{ + auto context = ucxx::experimental::createContext(ucxx::Context::defaultFeatureFlags).build(); + auto worker = ucxx::experimental::createWorker(context).requestAttributes(false).build(); + + ASSERT_TRUE(worker != nullptr); + ASSERT_FALSE(worker->isRequestAttributesEnabled()); +} + TEST(AmReceiverCallbackOwnerTypeTest, DefaultConstructsEmpty) { ucxx::AmReceiverCallbackOwnerType owner; From cd13fc55f7dc57394a618c70ba5ec02af6e6681e Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 28 Apr 2026 20:34:25 +0000 Subject: [PATCH 08/43] Improve tests --- cpp/tests/request.cpp | 70 ++++++++++++++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 18 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 7aa1462a0..4fccbac4e 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -58,6 +58,36 @@ class RequestTest : public ::testing::TestWithParam< std::vector _sendPtr{nullptr}; std::vector _recvPtr{nullptr}; + void buildWorker(bool enableRequestAttributes) + { + _worker = ucxx::experimental::createWorker(_context) + .delayedSubmission(_enableDelayedSubmission) + .requestAttributes(enableRequestAttributes) + .build(); + + if (_progressMode == ProgressMode::Blocking) { + _worker->initBlockingProgressMode(); + } else if (_progressMode == ProgressMode::ThreadPolling || + _progressMode == ProgressMode::ThreadBlocking) { + _worker->setProgressThreadStartCallback(::createCudaContextCallback, nullptr); + + if (_progressMode == ProgressMode::ThreadPolling) _worker->startProgressThread(true); + if (_progressMode == ProgressMode::ThreadBlocking) _worker->startProgressThread(false); + } + + _progressWorker = getProgressFunction(_worker, _progressMode); + + _ep = _worker->createEndpointFromWorkerAddress(_worker->getAddress()); + } + + void rebuildWorker(bool enableRequestAttributes) + { + if (_worker && _worker->isProgressThreadRunning()) _worker->stopProgressThread(); + _ep.reset(); + _worker.reset(); + buildWorker(enableRequestAttributes); + } + void SetUp() { std::tie(_bufferType, @@ -78,24 +108,7 @@ class RequestTest : public ::testing::TestWithParam< _context = ucxx::createContext({{"RNDV_THRESH", std::to_string(_rndvThresh)}}, ucxx::Context::defaultFeatureFlags); - _worker = ucxx::experimental::createWorker(_context) - .delayedSubmission(_enableDelayedSubmission) - .requestAttributes(true) - .build(); - - if (_progressMode == ProgressMode::Blocking) { - _worker->initBlockingProgressMode(); - } else if (_progressMode == ProgressMode::ThreadPolling || - _progressMode == ProgressMode::ThreadBlocking) { - _worker->setProgressThreadStartCallback(::createCudaContextCallback, nullptr); - - if (_progressMode == ProgressMode::ThreadPolling) _worker->startProgressThread(true); - if (_progressMode == ProgressMode::ThreadBlocking) _worker->startProgressThread(false); - } - - _progressWorker = getProgressFunction(_worker, _progressMode); - - _ep = _worker->createEndpointFromWorkerAddress(_worker->getAddress()); + buildWorker(false); } void TearDown() @@ -484,6 +497,8 @@ TEST_P(RequestTest, ProgressStream) TEST_P(RequestTest, ProgressTag) { + rebuildWorker(true); + allocate(); // Submit and wait for transfers to complete @@ -507,6 +522,25 @@ TEST_P(RequestTest, ProgressTag) ASSERT_THAT(_recv[0], ContainerEq(_send[0])); } +TEST_P(RequestTest, ProgressTagRequestAttributesDisabled) +{ + ASSERT_FALSE(_worker->isRequestAttributesEnabled()); + + allocate(); + + std::vector> requests; + requests.push_back(_ep->tagSend(_sendPtr[0], _messageSize, ucxx::Tag{0})); + requests.push_back(_ep->tagRecv(_recvPtr[0], _messageSize, ucxx::Tag{0}, ucxx::TagMaskFull)); + waitRequests(_worker, requests, _progressWorker); + + for (const auto& request : requests) { + EXPECT_THROW(std::ignore = request->getRequestAttributes(), ucxx::Error); + } + + copyResults(); + ASSERT_THAT(_recv[0], ContainerEq(_send[0])); +} + TEST_P(RequestTest, ProgressTagMulti) { if (_progressMode == ProgressMode::Wait) { From dc3445ef19da75a1e59fbdd25184f1d22100f2a3 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 29 Apr 2026 13:12:00 +0000 Subject: [PATCH 09/43] Query request attributes for other request types, add tests --- cpp/src/request_am.cpp | 2 + cpp/src/request_mem.cpp | 3 +- cpp/src/request_stream.cpp | 3 +- cpp/tests/request.cpp | 85 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 2 deletions(-) diff --git a/cpp/src/request_am.cpp b/cpp/src/request_am.cpp index d77ac2bb9..3bb914b7c 100644 --- a/cpp/src/request_am.cpp +++ b/cpp/src/request_am.cpp @@ -369,6 +369,7 @@ ucs_status_t RequestAm::recvCallback(void* arg, } else { // The request will be handled by the callback recvAmMessage->setUcpRequest(status); + req->queryRequestAttributes(); amData->_registerInflightRequest(req); { @@ -472,6 +473,7 @@ void RequestAm::request() std::lock_guard lock(_mutex); _request = request; + queryRequestAttributes(); }, [](auto) { throw ucxx::UnsupportedError("Only send active messages can call request()"); }, }, diff --git a/cpp/src/request_mem.cpp b/cpp/src/request_mem.cpp index bff6caeee..8f608710e 100644 --- a/cpp/src/request_mem.cpp +++ b/cpp/src/request_mem.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. * SPDX-License-Identifier: BSD-3-Clause */ #include @@ -129,6 +129,7 @@ void RequestMem::request() std::lock_guard lock(_mutex); _request = request; + queryRequestAttributes(); } void RequestMem::populateDelayedSubmission() diff --git a/cpp/src/request_stream.cpp b/cpp/src/request_stream.cpp index 4327cc407..cff84ea5c 100644 --- a/cpp/src/request_stream.cpp +++ b/cpp/src/request_stream.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. * SPDX-License-Identifier: BSD-3-Clause */ #include @@ -93,6 +93,7 @@ void RequestStream::request() std::lock_guard lock(_mutex); _request = request; + queryRequestAttributes(); } void RequestStream::populateDelayedSubmission() diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 4fccbac4e..b0a4196aa 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -541,6 +541,91 @@ TEST_P(RequestTest, ProgressTagRequestAttributesDisabled) ASSERT_THAT(_recv[0], ContainerEq(_send[0])); } +TEST_P(RequestTest, ProgressStreamRequestAttributes) +{ + if (_messageSize == 0) GTEST_SKIP() << "Stream rejects zero-length transfers"; + if (_progressMode == ProgressMode::ThreadPolling || _progressMode == ProgressMode::ThreadBlocking) + GTEST_SKIP() << "Threaded progress modes can race the attribute query"; + + rebuildWorker(/* enableRequestAttributes */ true); + + allocate(); + + std::vector> requests; + requests.push_back(_ep->streamSend(_sendPtr[0], _messageSize, 0)); + requests.push_back(_ep->streamRecv(_recvPtr[0], _messageSize, 0)); + waitRequests(_worker, requests, _progressWorker); + + for (const auto& request : requests) { + auto debugString = request->getRequestAttributes().debugString; + ASSERT_FALSE(debugString.empty()); + } + + copyResults(); + ASSERT_THAT(_recv[0], ContainerEq(_send[0])); +} + +TEST_P(RequestTest, ProgressAmRequestAttributes) +{ + if (_messageSize == 0) GTEST_SKIP() << "Zero-length AM completes without a UCP request"; + if (_messageSize < _rndvThresh) + GTEST_SKIP() << "Eager AM completes inline without a UCP request to query"; + if (_progressMode == ProgressMode::Wait) + GTEST_SKIP() << "Interrupting UCP worker progress operation in wait mode is not possible"; + if (_progressMode == ProgressMode::ThreadPolling || _progressMode == ProgressMode::ThreadBlocking) + GTEST_SKIP() << "Threaded progress modes can race the attribute query"; + + rebuildWorker(/* enableRequestAttributes */ true); + + allocate(1, false); + + std::vector> requests; + requests.push_back(_ep->amSend(_sendPtr[0], _messageSize, _memoryType)); + requests.push_back(_ep->amRecv()); + waitRequests(_worker, requests, _progressWorker); + + for (const auto& request : requests) { + auto debugString = request->getRequestAttributes().debugString; + ASSERT_FALSE(debugString.empty()); + } + + auto recvReq = requests[1]; + _recvPtr[0] = recvReq->getRecvBuffer()->data(); + copyResults(); + ASSERT_THAT(_recv[0], ContainerEq(_send[0])); +} + +TEST_P(RequestTest, MemoryGetRequestAttributes) +{ + if (_messageSize == 0) GTEST_SKIP() << "Zero-length memGet completes without a UCP request"; + if (_progressMode == ProgressMode::ThreadPolling || _progressMode == ProgressMode::ThreadBlocking) + GTEST_SKIP() << "Threaded progress modes can race the attribute query"; + + rebuildWorker(/* enableRequestAttributes */ true); + + allocate(); + + auto memoryHandle = _context->createMemoryHandle(_messageSize, nullptr, _memoryType); + copyMemoryTypeAware( + reinterpret_cast(memoryHandle->getBaseAddress()), _sendPtr[0], _messageSize); + + auto localRemoteKey = memoryHandle->createRemoteKey(); + auto serializedRemoteKey = localRemoteKey->serialize(); + auto remoteKey = ucxx::createRemoteKeyFromSerialized(_ep, serializedRemoteKey); + + auto request = _ep->memGet(_recvPtr[0], _messageSize, remoteKey); + std::vector> requests; + requests.push_back(request); + requests.push_back(_ep->flush()); + waitRequests(_worker, requests, _progressWorker); + + auto debugString = request->getRequestAttributes().debugString; + ASSERT_FALSE(debugString.empty()); + + copyResults(); + ASSERT_THAT(_recv[0], ContainerEq(_send[0])); +} + TEST_P(RequestTest, ProgressTagMulti) { if (_progressMode == ProgressMode::Wait) { From d51f84454f0ffba38e9437dab2e7c9e848a1fce1 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 29 Apr 2026 20:02:46 +0000 Subject: [PATCH 10/43] Make request-attribute querying race-free via lock-coordinated free ucp_request_query is safe at any point in a UCP request's lifetime between obtaining a UCS_PTR_IS_PTR handle and calling ucp_request_free. The previous code freed inside Request::callback (progress thread, outside any lock) while queryRequestAttributes ran on the submit thread under _mutex, causing a race in threaded progress modes that could free the request out from under the query. Move ucp_request_free into Request::setStatus, where it now executes inside the lock that the submit thread already holds during publish + query. Introduce a small Request::publishRequest helper so every submit site (Tag, Stream, Mem, Am send and rendezvous-recv, Flush, EpClose) performs the "store _request, query attributes" pair atomically under the same _mutex. With both sides serialized on the same recursive mutex the race is gone with no atomics, no new locking in the callback path beyond what setStatus already takes, and no behavioral change to the disabled (default) configuration. --- cpp/include/ucxx/internal/request_am.h | 9 ----- cpp/include/ucxx/request.h | 17 ++++++++++ cpp/src/internal/request_am.cpp | 2 -- cpp/src/request.cpp | 23 ++++++++++--- cpp/src/request_am.cpp | 7 ++-- cpp/src/request_endpoint_close.cpp | 5 ++- cpp/src/request_flush.cpp | 5 ++- cpp/src/request_mem.cpp | 4 +-- cpp/src/request_stream.cpp | 4 +-- cpp/src/request_tag.cpp | 6 ++-- cpp/tests/request.cpp | 47 ++++++++++++++++++++------ 11 files changed, 81 insertions(+), 48 deletions(-) diff --git a/cpp/include/ucxx/internal/request_am.h b/cpp/include/ucxx/internal/request_am.h index fe3e8ac49..37d7dd972 100644 --- a/cpp/include/ucxx/internal/request_am.h +++ b/cpp/include/ucxx/internal/request_am.h @@ -67,15 +67,6 @@ class RecvAmMessage { AmReceiverCallbackType receiverCallback = AmReceiverCallbackType(), std::vector userHeader = {}); - /** - * @brief Set the UCP request. - * - * Set the underlying UCP request (`_request` attribute) of the `RequestAm`. - * - * @param[in] request the UCP request associated to the active message receive operation. - */ - void setUcpRequest(void* request); - /** * @brief Execute the `ucxx::Request::callback()`. * diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index 9ffdb832f..c6e077e0a 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -275,6 +275,23 @@ class Request : public Component { * string. */ void queryRequestAttributes(); + + /** + * @brief Publish the UCP request handle and capture its attributes. + * + * Single critical section that stores the UCP request pointer in `_request` and, when + * the owning worker has request attributes querying enabled, immediately queries those + * attributes. The completion path frees the UCP request inside `setStatus` under the + * same `_mutex`, so this helper guarantees the query and the free are mutually + * exclusive and that there are no use-after-free in threaded progress modes. + * + * Every submit site (all `request` methods from child classes and the AM + * rendezvous-receive path) calls this after obtaining the request handle from the + * corresponding `ucp_*_nbx` function. + * + * @param[in] request the UCP request pointer returned by a non-blocking submit. + */ + void publishRequest(void* request); }; } // namespace ucxx diff --git a/cpp/src/internal/request_am.cpp b/cpp/src/internal/request_am.cpp index e2eb65c25..6fbe7d922 100644 --- a/cpp/src/internal/request_am.cpp +++ b/cpp/src/internal/request_am.cpp @@ -40,8 +40,6 @@ RecvAmMessage::RecvAmMessage(internal::AmData* amData, } } -void RecvAmMessage::setUcpRequest(void* request) { _request->_request = request; } - void RecvAmMessage::callback(void* request, ucs_status_t status) { std::visit(data::dispatch{ diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index d6c6f83a0..290654447 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -141,7 +141,7 @@ void Request::callback(void* request, ucs_status_t status) if (_status != UCS_INPROGRESS) ucxx_trace_req_f(_ownerString.c_str(), this, - _request, + request, _operationName.c_str(), "has status already set to %d (%s), callback setting %d (%s)", _status, @@ -149,12 +149,10 @@ void Request::callback(void* request, ucs_status_t status) status, ucs_status_string(status)); - if (UCS_PTR_IS_PTR(_request)) ucp_request_free(request); - - ucxx_trace_req_f(_ownerString.c_str(), this, _request, _operationName.c_str(), "completed"); + ucxx_trace_req_f(_ownerString.c_str(), this, request, _operationName.c_str(), "completed"); setStatus(status); ucxx_trace_req_f( - _ownerString.c_str(), this, _request, _operationName.c_str(), "isCompleted: %d", isCompleted()); + _ownerString.c_str(), this, request, _operationName.c_str(), "isCompleted: %d", isCompleted()); } void Request::process() @@ -236,6 +234,14 @@ void Request::setStatus(ucs_status_t status) _ownerString.c_str(), this, _request, _operationName.c_str(), "invoking user callback"); _callback(status, _callbackData); } + + // Free the UCP request inside the lock so it is mutually exclusive with + // `publishRequest()`/`queryRequestAttributes()` on the submit thread. Clearing + // `_request` afterwards keeps this idempotent if `setStatus` ever re-enters. + if (UCS_PTR_IS_PTR(_request)) { + ucp_request_free(_request); + _request = nullptr; + } } } @@ -276,6 +282,13 @@ void Request::queryRequestAttributes() } } +void Request::publishRequest(void* request) +{ + std::lock_guard lock(_mutex); + _request = request; + queryRequestAttributes(); +} + Request::RequestAttributes Request::getRequestAttributes() { std::lock_guard lock(_mutex); diff --git a/cpp/src/request_am.cpp b/cpp/src/request_am.cpp index 3bb914b7c..4ccdae333 100644 --- a/cpp/src/request_am.cpp +++ b/cpp/src/request_am.cpp @@ -368,8 +368,7 @@ ucs_status_t RequestAm::recvCallback(void* arg, return s; } else { // The request will be handled by the callback - recvAmMessage->setUcpRequest(status); - req->queryRequestAttributes(); + req->publishRequest(status); amData->_registerInflightRequest(req); { @@ -471,9 +470,7 @@ void RequestAm::request() amSend._count, ¶m); - std::lock_guard lock(_mutex); - _request = request; - queryRequestAttributes(); + publishRequest(request); }, [](auto) { throw ucxx::UnsupportedError("Only send active messages can call request()"); }, }, diff --git a/cpp/src/request_endpoint_close.cpp b/cpp/src/request_endpoint_close.cpp index 3e1175824..05f83845d 100644 --- a/cpp/src/request_endpoint_close.cpp +++ b/cpp/src/request_endpoint_close.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. * SPDX-License-Identifier: BSD-3-Clause */ #include "ucxx/request_data.h" @@ -78,8 +78,7 @@ void RequestEndpointClose::request() else throw ucxx::Error("A valid endpoint or worker is required for a close operation."); - std::lock_guard lock(_mutex); - _request = request; + publishRequest(request); } void RequestEndpointClose::populateDelayedSubmission() diff --git a/cpp/src/request_flush.cpp b/cpp/src/request_flush.cpp index 1dcb3a936..bb20491be 100644 --- a/cpp/src/request_flush.cpp +++ b/cpp/src/request_flush.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. * SPDX-License-Identifier: BSD-3-Clause */ #include @@ -77,8 +77,7 @@ void RequestFlush::request() else throw ucxx::Error("A valid endpoint or worker is required for a flush operation."); - std::lock_guard lock(_mutex); - _request = request; + publishRequest(request); } void RequestFlush::populateDelayedSubmission() diff --git a/cpp/src/request_mem.cpp b/cpp/src/request_mem.cpp index 8f608710e..16f157e59 100644 --- a/cpp/src/request_mem.cpp +++ b/cpp/src/request_mem.cpp @@ -127,9 +127,7 @@ void RequestMem::request() }, _requestData); - std::lock_guard lock(_mutex); - _request = request; - queryRequestAttributes(); + publishRequest(request); } void RequestMem::populateDelayedSubmission() diff --git a/cpp/src/request_stream.cpp b/cpp/src/request_stream.cpp index cff84ea5c..30b42f60d 100644 --- a/cpp/src/request_stream.cpp +++ b/cpp/src/request_stream.cpp @@ -91,9 +91,7 @@ void RequestStream::request() }, _requestData); - std::lock_guard lock(_mutex); - _request = request; - queryRequestAttributes(); + publishRequest(request); } void RequestStream::populateDelayedSubmission() diff --git a/cpp/src/request_tag.cpp b/cpp/src/request_tag.cpp index 1253478a8..edc0f8627 100644 --- a/cpp/src/request_tag.cpp +++ b/cpp/src/request_tag.cpp @@ -1,5 +1,5 @@ /** - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. * SPDX-License-Identifier: BSD-3-Clause */ #include @@ -161,9 +161,7 @@ void RequestTag::request() }, _requestData); - std::lock_guard lock(_mutex); - _request = request; - queryRequestAttributes(); + publishRequest(request); } void RequestTag::populateDelayedSubmission() diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index b0a4196aa..6a60333c7 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -545,7 +545,10 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) { if (_messageSize == 0) GTEST_SKIP() << "Stream rejects zero-length transfers"; if (_progressMode == ProgressMode::ThreadPolling || _progressMode == ProgressMode::ThreadBlocking) - GTEST_SKIP() << "Threaded progress modes can race the attribute query"; + GTEST_SKIP() << "On loopback with a worker progress thread, UCX often completes both " + "stream send and recv inline (returns UCS_OK_PTR), so no UCP request " + "handle exists to query. The feature is correctly reporting no " + "attributes are available rather than racing."; rebuildWorker(/* enableRequestAttributes */ true); @@ -556,10 +559,22 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) requests.push_back(_ep->streamRecv(_recvPtr[0], _messageSize, 0)); waitRequests(_worker, requests, _progressWorker); + // UCX may complete a request inline (e.g. very small loopback sends), in which case + // there is no UCP request handle to query and `getRequestAttributes()` legitimately + // throws. Accept that, but require at least one request (typically the receive) to + // expose a queryable handle so we know the feature wired up. + size_t requestsWithAttributes = 0; for (const auto& request : requests) { - auto debugString = request->getRequestAttributes().debugString; - ASSERT_FALSE(debugString.empty()); + try { + auto debugString = request->getRequestAttributes().debugString; + EXPECT_FALSE(debugString.empty()); + ++requestsWithAttributes; + } catch (const ucxx::Error&) { + // Inline completion: no UCP request, nothing to query. + } } + EXPECT_GT(requestsWithAttributes, 0u) + << "Expected at least one request to expose queryable attributes"; copyResults(); ASSERT_THAT(_recv[0], ContainerEq(_send[0])); @@ -572,8 +587,6 @@ TEST_P(RequestTest, ProgressAmRequestAttributes) GTEST_SKIP() << "Eager AM completes inline without a UCP request to query"; if (_progressMode == ProgressMode::Wait) GTEST_SKIP() << "Interrupting UCP worker progress operation in wait mode is not possible"; - if (_progressMode == ProgressMode::ThreadPolling || _progressMode == ProgressMode::ThreadBlocking) - GTEST_SKIP() << "Threaded progress modes can race the attribute query"; rebuildWorker(/* enableRequestAttributes */ true); @@ -584,10 +597,19 @@ TEST_P(RequestTest, ProgressAmRequestAttributes) requests.push_back(_ep->amRecv()); waitRequests(_worker, requests, _progressWorker); + // Inline completion is acceptable (no UCP handle to query); require at least one + // request to have populated attributes. + size_t requestsWithAttributes = 0; for (const auto& request : requests) { - auto debugString = request->getRequestAttributes().debugString; - ASSERT_FALSE(debugString.empty()); + try { + auto debugString = request->getRequestAttributes().debugString; + EXPECT_FALSE(debugString.empty()); + ++requestsWithAttributes; + } catch (const ucxx::Error&) { + } } + EXPECT_GT(requestsWithAttributes, 0u) + << "Expected at least one request to expose queryable attributes"; auto recvReq = requests[1]; _recvPtr[0] = recvReq->getRecvBuffer()->data(); @@ -598,8 +620,6 @@ TEST_P(RequestTest, ProgressAmRequestAttributes) TEST_P(RequestTest, MemoryGetRequestAttributes) { if (_messageSize == 0) GTEST_SKIP() << "Zero-length memGet completes without a UCP request"; - if (_progressMode == ProgressMode::ThreadPolling || _progressMode == ProgressMode::ThreadBlocking) - GTEST_SKIP() << "Threaded progress modes can race the attribute query"; rebuildWorker(/* enableRequestAttributes */ true); @@ -619,8 +639,13 @@ TEST_P(RequestTest, MemoryGetRequestAttributes) requests.push_back(_ep->flush()); waitRequests(_worker, requests, _progressWorker); - auto debugString = request->getRequestAttributes().debugString; - ASSERT_FALSE(debugString.empty()); + // The memGet may complete inline on loopback (no UCP handle to query); accept that + // and exercise the path either way. + try { + auto debugString = request->getRequestAttributes().debugString; + EXPECT_FALSE(debugString.empty()); + } catch (const ucxx::Error&) { + } copyResults(); ASSERT_THAT(_recv[0], ContainerEq(_send[0])); From 15b086ecfecc71fc66c732019867aabe5ebfca94 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 30 Apr 2026 09:29:30 +0000 Subject: [PATCH 11/43] Skip ProgressTag configurations that have no UCP request to query --- cpp/tests/request.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 6a60333c7..ecc7d77f4 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -497,6 +497,25 @@ TEST_P(RequestTest, ProgressStream) TEST_P(RequestTest, ProgressTag) { + // `ucp_request_query` only works on a real UCP request handle, but UCX does not + // always allocate one. Skip the configurations where it doesn't, so the + // assertions on the debug string content are deterministic: + // - Zero-length CUDA transfers report host memtype in UCX debug strings (no + // device buffer is involved), so the "cuda" substring assertion would fail. + // - In threaded progress modes the worker progress thread advances the first + // half of an eager tag pair into the peer's unexpected queue before the + // second half is submitted; UCX then matches and completes the second + // submission inline (returning `UCS_OK_PTR`), leaving no request handle to + // query. Non-threaded modes don't progress between submissions, so both + // halves get real handles, and rendezvous-protocol messages always defer. + if (_bufferType == ucxx::BufferType::RMM && _messageSize == 0) + GTEST_SKIP() << "Zero-length CUDA transfers report host memtype in UCX debug strings"; + if ((_progressMode == ProgressMode::ThreadPolling || + _progressMode == ProgressMode::ThreadBlocking) && + _messageSize < _rndvThresh) + GTEST_SKIP() << "Worker progress thread completes eager tag pairs inline at submission " + "(UCS_OK_PTR), leaving no UCP request handle to query"; + rebuildWorker(true); allocate(); From 174fb4052f1414ca682613721ac834f5c1c159eb Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Thu, 30 Apr 2026 09:46:08 +0000 Subject: [PATCH 12/43] Reorganize tag tests --- cpp/tests/request.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index ecc7d77f4..077d2a69e 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -496,6 +496,22 @@ TEST_P(RequestTest, ProgressStream) } TEST_P(RequestTest, ProgressTag) +{ + allocate(); + + // Submit and wait for transfers to complete + std::vector> requests; + requests.push_back(_ep->tagSend(_sendPtr[0], _messageSize, ucxx::Tag{0})); + requests.push_back(_ep->tagRecv(_recvPtr[0], _messageSize, ucxx::Tag{0}, ucxx::TagMaskFull)); + waitRequests(_worker, requests, _progressWorker); + + copyResults(); + + // Assert data correctness + ASSERT_THAT(_recv[0], ContainerEq(_send[0])); +} + +TEST_P(RequestTest, ProgressTagRequestAttributes) { // `ucp_request_query` only works on a real UCP request handle, but UCX does not // always allocate one. Skip the configurations where it doesn't, so the @@ -516,19 +532,17 @@ TEST_P(RequestTest, ProgressTag) GTEST_SKIP() << "Worker progress thread completes eager tag pairs inline at submission " "(UCS_OK_PTR), leaving no UCP request handle to query"; - rebuildWorker(true); + rebuildWorker(/* enableRequestAttributes */ true); allocate(); - // Submit and wait for transfers to complete std::vector> requests; requests.push_back(_ep->tagSend(_sendPtr[0], _messageSize, ucxx::Tag{0})); requests.push_back(_ep->tagRecv(_recvPtr[0], _messageSize, ucxx::Tag{0}, ucxx::TagMaskFull)); waitRequests(_worker, requests, _progressWorker); for (const auto& request : requests) { - auto debugString = request->getRequestAttributes().debugString; - // Check that debugString contains the expected host memory length substring + auto debugString = request->getRequestAttributes().debugString; std::string expectedSubstring = "length " + std::to_string(_messageSize); ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); ASSERT_THAT(debugString, @@ -536,8 +550,6 @@ TEST_P(RequestTest, ProgressTag) } copyResults(); - - // Assert data correctness ASSERT_THAT(_recv[0], ContainerEq(_send[0])); } From 612c4bc6320fe7f09ec4cb8743acd5cac0e67d6c Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 5 May 2026 17:41:36 +0000 Subject: [PATCH 13/43] Fix rendezvous threshold skip conditions --- cpp/tests/request.cpp | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 077d2a69e..d5eab6fdc 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -210,8 +210,8 @@ TEST_P(RequestTest, ProgressAm) // Messages larger than `_rndvThresh` are rendezvous and will use custom allocator, // smaller messages are eager and will always be host-allocated. ASSERT_THAT(recvReq->getRecvBuffer()->getType(), - (_registerCustomAmAllocator && _messageSize >= _rndvThresh) ? _bufferType - : ucxx::BufferType::Host); + (_registerCustomAmAllocator && _messageSize > _rndvThresh) ? _bufferType + : ucxx::BufferType::Host); copyResults(); @@ -362,7 +362,7 @@ TEST_P(RequestTest, ProgressAmReceiverCallback) // Messages larger than `_rndvThresh` are rendezvous and will use custom allocator, // smaller messages are eager and will always be host-allocated. ASSERT_THAT(receivedRequests[0]->getRecvBuffer()->getType(), - (_registerCustomAmAllocator && _messageSize >= _rndvThresh) + (_registerCustomAmAllocator && _messageSize > _rndvThresh) ? _bufferType : ucxx::BufferType::Host); } @@ -526,11 +526,8 @@ TEST_P(RequestTest, ProgressTagRequestAttributes) // halves get real handles, and rendezvous-protocol messages always defer. if (_bufferType == ucxx::BufferType::RMM && _messageSize == 0) GTEST_SKIP() << "Zero-length CUDA transfers report host memtype in UCX debug strings"; - if ((_progressMode == ProgressMode::ThreadPolling || - _progressMode == ProgressMode::ThreadBlocking) && - _messageSize < _rndvThresh) - GTEST_SKIP() << "Worker progress thread completes eager tag pairs inline at submission " - "(UCS_OK_PTR), leaving no UCP request handle to query"; + if (_messageSize <= _rndvThresh) + GTEST_SKIP() << "Eager messages do not create a ucp_request and thus no debug info"; rebuildWorker(/* enableRequestAttributes */ true); @@ -575,11 +572,8 @@ TEST_P(RequestTest, ProgressTagRequestAttributesDisabled) TEST_P(RequestTest, ProgressStreamRequestAttributes) { if (_messageSize == 0) GTEST_SKIP() << "Stream rejects zero-length transfers"; - if (_progressMode == ProgressMode::ThreadPolling || _progressMode == ProgressMode::ThreadBlocking) - GTEST_SKIP() << "On loopback with a worker progress thread, UCX often completes both " - "stream send and recv inline (returns UCS_OK_PTR), so no UCP request " - "handle exists to query. The feature is correctly reporting no " - "attributes are available rather than racing."; + if (_messageSize <= _rndvThresh) + GTEST_SKIP() << "Eager messages complete inline without a UCP request to query"; rebuildWorker(/* enableRequestAttributes */ true); @@ -614,8 +608,8 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) TEST_P(RequestTest, ProgressAmRequestAttributes) { if (_messageSize == 0) GTEST_SKIP() << "Zero-length AM completes without a UCP request"; - if (_messageSize < _rndvThresh) - GTEST_SKIP() << "Eager AM completes inline without a UCP request to query"; + if (_messageSize <= _rndvThresh) + GTEST_SKIP() << "Eager messages complete inline without a UCP request to query"; if (_progressMode == ProgressMode::Wait) GTEST_SKIP() << "Interrupting UCP worker progress operation in wait mode is not possible"; From 718b464880ff51906f844cacdc84729b63052bf1 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 5 May 2026 19:01:36 +0000 Subject: [PATCH 14/43] Test for expected substring --- cpp/tests/request.cpp | 31 +++++++++---------------------- 1 file changed, 9 insertions(+), 22 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index d5eab6fdc..d452fbb8d 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -590,16 +590,10 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) // expose a queryable handle so we know the feature wired up. size_t requestsWithAttributes = 0; for (const auto& request : requests) { - try { - auto debugString = request->getRequestAttributes().debugString; - EXPECT_FALSE(debugString.empty()); - ++requestsWithAttributes; - } catch (const ucxx::Error&) { - // Inline completion: no UCP request, nothing to query. - } + auto debugString = request->getRequestAttributes().debugString; + std::string expectedSubstring = "length " + std::to_string(_messageSize); + ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); } - EXPECT_GT(requestsWithAttributes, 0u) - << "Expected at least one request to expose queryable attributes"; copyResults(); ASSERT_THAT(_recv[0], ContainerEq(_send[0])); @@ -626,15 +620,10 @@ TEST_P(RequestTest, ProgressAmRequestAttributes) // request to have populated attributes. size_t requestsWithAttributes = 0; for (const auto& request : requests) { - try { - auto debugString = request->getRequestAttributes().debugString; - EXPECT_FALSE(debugString.empty()); - ++requestsWithAttributes; - } catch (const ucxx::Error&) { - } + auto debugString = request->getRequestAttributes().debugString; + std::string expectedSubstring = "length " + std::to_string(_messageSize); + ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); } - EXPECT_GT(requestsWithAttributes, 0u) - << "Expected at least one request to expose queryable attributes"; auto recvReq = requests[1]; _recvPtr[0] = recvReq->getRecvBuffer()->data(); @@ -666,11 +655,9 @@ TEST_P(RequestTest, MemoryGetRequestAttributes) // The memGet may complete inline on loopback (no UCP handle to query); accept that // and exercise the path either way. - try { - auto debugString = request->getRequestAttributes().debugString; - EXPECT_FALSE(debugString.empty()); - } catch (const ucxx::Error&) { - } + auto debugString = request->getRequestAttributes().debugString; + std::string expectedSubstring = "length " + std::to_string(_messageSize); + ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); copyResults(); ASSERT_THAT(_recv[0], ContainerEq(_send[0])); From 95407abcdbb25916ba8ff55aa48b7797699fb2f3 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 5 May 2026 19:14:06 +0000 Subject: [PATCH 15/43] Test stream send/recv attributes against per-direction substrings --- cpp/tests/request.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index d452fbb8d..afa632ff4 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -579,21 +579,16 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) allocate(); - std::vector> requests; - requests.push_back(_ep->streamSend(_sendPtr[0], _messageSize, 0)); - requests.push_back(_ep->streamRecv(_recvPtr[0], _messageSize, 0)); + auto sendRequest = _ep->streamSend(_sendPtr[0], _messageSize, 0); + auto recvRequest = _ep->streamRecv(_recvPtr[0], _messageSize, 0); + std::vector> requests{sendRequest, recvRequest}; waitRequests(_worker, requests, _progressWorker); - // UCX may complete a request inline (e.g. very small loopback sends), in which case - // there is no UCP request handle to query and `getRequestAttributes()` legitimately - // throws. Accept that, but require at least one request (typically the receive) to - // expose a queryable handle so we know the feature wired up. - size_t requestsWithAttributes = 0; - for (const auto& request : requests) { - auto debugString = request->getRequestAttributes().debugString; - std::string expectedSubstring = "length " + std::to_string(_messageSize); - ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); - } + auto sendDebug = sendRequest->getRequestAttributes().debugString; + ASSERT_THAT(sendDebug, ::testing::HasSubstr("length " + std::to_string(_messageSize))); + + auto recvDebug = recvRequest->getRequestAttributes().debugString; + ASSERT_THAT(recvDebug, ::testing::HasSubstr("no debug info")); copyResults(); ASSERT_THAT(_recv[0], ContainerEq(_send[0])); From 07f8c484712270e9c7c7395c70775a3fc0089c68 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 6 May 2026 08:31:06 +0000 Subject: [PATCH 16/43] Test cleanup --- cpp/tests/request.cpp | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index afa632ff4..d7c0b6202 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -513,23 +513,10 @@ TEST_P(RequestTest, ProgressTag) TEST_P(RequestTest, ProgressTagRequestAttributes) { - // `ucp_request_query` only works on a real UCP request handle, but UCX does not - // always allocate one. Skip the configurations where it doesn't, so the - // assertions on the debug string content are deterministic: - // - Zero-length CUDA transfers report host memtype in UCX debug strings (no - // device buffer is involved), so the "cuda" substring assertion would fail. - // - In threaded progress modes the worker progress thread advances the first - // half of an eager tag pair into the peer's unexpected queue before the - // second half is submitted; UCX then matches and completes the second - // submission inline (returning `UCS_OK_PTR`), leaving no request handle to - // query. Non-threaded modes don't progress between submissions, so both - // halves get real handles, and rendezvous-protocol messages always defer. - if (_bufferType == ucxx::BufferType::RMM && _messageSize == 0) - GTEST_SKIP() << "Zero-length CUDA transfers report host memtype in UCX debug strings"; if (_messageSize <= _rndvThresh) GTEST_SKIP() << "Eager messages do not create a ucp_request and thus no debug info"; - rebuildWorker(/* enableRequestAttributes */ true); + rebuildWorker(true); allocate(); @@ -542,8 +529,14 @@ TEST_P(RequestTest, ProgressTagRequestAttributes) auto debugString = request->getRequestAttributes().debugString; std::string expectedSubstring = "length " + std::to_string(_messageSize); ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); - ASSERT_THAT(debugString, - ::testing::HasSubstr(_memoryType == UCS_MEMORY_TYPE_HOST ? "host" : "cuda")); + + if (_bufferType == ucxx::BufferType::RMM && _messageSize == 0) { + // Zero-length CUDA transfers report host memtype + ASSERT_THAT(debugString, ::testing::HasSubstr("host")); + } else { + ASSERT_THAT(debugString, + ::testing::HasSubstr(_memoryType == UCS_MEMORY_TYPE_HOST ? "host" : "cuda")); + } } copyResults(); @@ -575,7 +568,7 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) if (_messageSize <= _rndvThresh) GTEST_SKIP() << "Eager messages complete inline without a UCP request to query"; - rebuildWorker(/* enableRequestAttributes */ true); + rebuildWorker(true); allocate(); @@ -602,7 +595,7 @@ TEST_P(RequestTest, ProgressAmRequestAttributes) if (_progressMode == ProgressMode::Wait) GTEST_SKIP() << "Interrupting UCP worker progress operation in wait mode is not possible"; - rebuildWorker(/* enableRequestAttributes */ true); + rebuildWorker(true); allocate(1, false); @@ -611,8 +604,6 @@ TEST_P(RequestTest, ProgressAmRequestAttributes) requests.push_back(_ep->amRecv()); waitRequests(_worker, requests, _progressWorker); - // Inline completion is acceptable (no UCP handle to query); require at least one - // request to have populated attributes. size_t requestsWithAttributes = 0; for (const auto& request : requests) { auto debugString = request->getRequestAttributes().debugString; @@ -630,7 +621,7 @@ TEST_P(RequestTest, MemoryGetRequestAttributes) { if (_messageSize == 0) GTEST_SKIP() << "Zero-length memGet completes without a UCP request"; - rebuildWorker(/* enableRequestAttributes */ true); + rebuildWorker(true); allocate(); @@ -648,8 +639,6 @@ TEST_P(RequestTest, MemoryGetRequestAttributes) requests.push_back(_ep->flush()); waitRequests(_worker, requests, _progressWorker); - // The memGet may complete inline on loopback (no UCP handle to query); accept that - // and exercise the path either way. auto debugString = request->getRequestAttributes().debugString; std::string expectedSubstring = "length " + std::to_string(_messageSize); ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); From 588e4f35ba4769eb41bd292348d51c1051e801ab Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 6 May 2026 08:29:16 +0000 Subject: [PATCH 17/43] Accept either outcome on stream recv attribute query --- cpp/tests/request.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index d7c0b6202..9134b1a9d 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -580,8 +580,14 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) auto sendDebug = sendRequest->getRequestAttributes().debugString; ASSERT_THAT(sendDebug, ::testing::HasSubstr("length " + std::to_string(_messageSize))); - auto recvDebug = recvRequest->getRequestAttributes().debugString; - ASSERT_THAT(recvDebug, ::testing::HasSubstr("no debug info")); + try { + // Stream recv requests have no rendezvous path, thus debug info cannot be generated. + auto recvDebug = recvRequest->getRequestAttributes().debugString; + EXPECT_THAT(recvDebug, ::testing::HasSubstr("no debug info")); + } catch (const ucxx::Error&) { + // Recv completed inline (send completed before recv was posted); no UCP request to + // query. + } copyResults(); ASSERT_THAT(_recv[0], ContainerEq(_send[0])); From 771bbb77c024602e72a9decff5bce34a069a200d Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 6 May 2026 08:43:00 +0000 Subject: [PATCH 18/43] Make ProgressTagRequestAttributesDisabled non-parameterized --- cpp/tests/request.cpp | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 9134b1a9d..52f1514c3 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -543,23 +543,36 @@ TEST_P(RequestTest, ProgressTagRequestAttributes) ASSERT_THAT(_recv[0], ContainerEq(_send[0])); } -TEST_P(RequestTest, ProgressTagRequestAttributesDisabled) +TEST(RequestTagRequestAttributesDisabledTest, GetRequestAttributesThrows) { - ASSERT_FALSE(_worker->isRequestAttributesEnabled()); - - allocate(); + // Whether the request attributes query is gated on the worker flag is invariant + // across progress modes, buffer types and message sizes — there is nothing to + // gain from running this assertion across the full RequestTest matrix. Use a + // single fixed configuration: a default-built worker (requestAttributes disabled) + // and a small host loopback tag transfer. + auto context = ucxx::createContext({}, ucxx::Context::defaultFeatureFlags); + auto worker = ucxx::experimental::createWorker(context).build(); + ASSERT_FALSE(worker->isRequestAttributesEnabled()); + + auto ep = worker->createEndpointFromWorkerAddress(worker->getAddress()); + auto progressWorker = getProgressFunction(worker, ProgressMode::Polling); + + constexpr size_t messageLength = 1024; + constexpr size_t messageSize = messageLength * sizeof(int); + std::vector sendBuf(messageLength); + std::vector recvBuf(messageLength); + std::iota(sendBuf.begin(), sendBuf.end(), 0); std::vector> requests; - requests.push_back(_ep->tagSend(_sendPtr[0], _messageSize, ucxx::Tag{0})); - requests.push_back(_ep->tagRecv(_recvPtr[0], _messageSize, ucxx::Tag{0}, ucxx::TagMaskFull)); - waitRequests(_worker, requests, _progressWorker); + requests.push_back(ep->tagSend(sendBuf.data(), messageSize, ucxx::Tag{0})); + requests.push_back(ep->tagRecv(recvBuf.data(), messageSize, ucxx::Tag{0}, ucxx::TagMaskFull)); + waitRequests(worker, requests, progressWorker); for (const auto& request : requests) { EXPECT_THROW(std::ignore = request->getRequestAttributes(), ucxx::Error); } - copyResults(); - ASSERT_THAT(_recv[0], ContainerEq(_send[0])); + ASSERT_THAT(recvBuf, ::testing::ContainerEq(sendBuf)); } TEST_P(RequestTest, ProgressStreamRequestAttributes) From 110049ef701de4146c3dd0f862eda6248c7aec46 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 6 May 2026 08:52:54 +0000 Subject: [PATCH 19/43] Add disabled-attribute tests for Stream/AM/MemoryGet/MemoryPut --- cpp/tests/request.cpp | 130 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 105 insertions(+), 25 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 52f1514c3..48ede2824 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -543,36 +543,116 @@ TEST_P(RequestTest, ProgressTagRequestAttributes) ASSERT_THAT(_recv[0], ContainerEq(_send[0])); } -TEST(RequestTagRequestAttributesDisabledTest, GetRequestAttributesThrows) +// Whether `getRequestAttributes()` throws when the worker has the feature disabled +// is invariant across progress modes, buffer types and message sizes — there is +// nothing to gain from running these assertions across the full RequestTest matrix. +// Each test below uses the same fixture-built loopback (default-disabled worker, +// polling progress, host buffers) and only varies the request type submitted. +class RequestAttributesDisabledTest : public ::testing::Test { + protected: + static constexpr size_t kMessageLength = 1024; + static constexpr size_t kMessageSize = kMessageLength * sizeof(int); + + std::shared_ptr _context; + std::shared_ptr _worker; + std::shared_ptr _ep; + std::function _progressWorker; + std::vector _sendBuf; + std::vector _recvBuf; + + void SetUp() override + { + _context = ucxx::createContext({}, ucxx::Context::defaultFeatureFlags); + _worker = ucxx::experimental::createWorker(_context).build(); + ASSERT_FALSE(_worker->isRequestAttributesEnabled()); + + _ep = _worker->createEndpointFromWorkerAddress(_worker->getAddress()); + _progressWorker = getProgressFunction(_worker, ProgressMode::Polling); + + _sendBuf.resize(kMessageLength); + _recvBuf.resize(kMessageLength); + std::iota(_sendBuf.begin(), _sendBuf.end(), 0); + } + + void expectAllThrow(const std::vector>& requests) const + { + for (const auto& request : requests) { + EXPECT_THROW(std::ignore = request->getRequestAttributes(), ucxx::Error); + } + } +}; + +TEST_F(RequestAttributesDisabledTest, Tag) { - // Whether the request attributes query is gated on the worker flag is invariant - // across progress modes, buffer types and message sizes — there is nothing to - // gain from running this assertion across the full RequestTest matrix. Use a - // single fixed configuration: a default-built worker (requestAttributes disabled) - // and a small host loopback tag transfer. - auto context = ucxx::createContext({}, ucxx::Context::defaultFeatureFlags); - auto worker = ucxx::experimental::createWorker(context).build(); - ASSERT_FALSE(worker->isRequestAttributesEnabled()); - - auto ep = worker->createEndpointFromWorkerAddress(worker->getAddress()); - auto progressWorker = getProgressFunction(worker, ProgressMode::Polling); - - constexpr size_t messageLength = 1024; - constexpr size_t messageSize = messageLength * sizeof(int); - std::vector sendBuf(messageLength); - std::vector recvBuf(messageLength); - std::iota(sendBuf.begin(), sendBuf.end(), 0); + std::vector> requests; + requests.push_back(_ep->tagSend(_sendBuf.data(), kMessageSize, ucxx::Tag{0})); + requests.push_back(_ep->tagRecv(_recvBuf.data(), kMessageSize, ucxx::Tag{0}, ucxx::TagMaskFull)); + waitRequests(_worker, requests, _progressWorker); + + expectAllThrow(requests); + ASSERT_THAT(_recvBuf, ::testing::ContainerEq(_sendBuf)); +} +TEST_F(RequestAttributesDisabledTest, Stream) +{ std::vector> requests; - requests.push_back(ep->tagSend(sendBuf.data(), messageSize, ucxx::Tag{0})); - requests.push_back(ep->tagRecv(recvBuf.data(), messageSize, ucxx::Tag{0}, ucxx::TagMaskFull)); - waitRequests(worker, requests, progressWorker); + requests.push_back(_ep->streamSend(_sendBuf.data(), kMessageSize, 0)); + requests.push_back(_ep->streamRecv(_recvBuf.data(), kMessageSize, 0)); + waitRequests(_worker, requests, _progressWorker); - for (const auto& request : requests) { - EXPECT_THROW(std::ignore = request->getRequestAttributes(), ucxx::Error); - } + expectAllThrow(requests); + ASSERT_THAT(_recvBuf, ::testing::ContainerEq(_sendBuf)); +} + +TEST_F(RequestAttributesDisabledTest, Am) +{ + std::vector> requests; + requests.push_back(_ep->amSend(_sendBuf.data(), kMessageSize, UCS_MEMORY_TYPE_HOST)); + requests.push_back(_ep->amRecv()); + waitRequests(_worker, requests, _progressWorker); + + expectAllThrow(requests); + + auto recvBuffer = requests[1]->getRecvBuffer(); + ASSERT_EQ(recvBuffer->getSize(), kMessageSize); + std::vector received(reinterpret_cast(recvBuffer->data()), + reinterpret_cast(recvBuffer->data()) + kMessageLength); + ASSERT_THAT(received, ::testing::ContainerEq(_sendBuf)); +} + +TEST_F(RequestAttributesDisabledTest, MemoryGet) +{ + auto memoryHandle = _context->createMemoryHandle(kMessageSize, nullptr, UCS_MEMORY_TYPE_HOST); + std::memcpy( + reinterpret_cast(memoryHandle->getBaseAddress()), _sendBuf.data(), kMessageSize); + + auto serializedRemoteKey = memoryHandle->createRemoteKey()->serialize(); + auto remoteKey = ucxx::createRemoteKeyFromSerialized(_ep, serializedRemoteKey); + + auto request = _ep->memGet(_recvBuf.data(), kMessageSize, remoteKey); + std::vector> requests{request, _ep->flush()}; + waitRequests(_worker, requests, _progressWorker); + + expectAllThrow({request}); + ASSERT_THAT(_recvBuf, ::testing::ContainerEq(_sendBuf)); +} + +TEST_F(RequestAttributesDisabledTest, MemoryPut) +{ + auto memoryHandle = _context->createMemoryHandle(kMessageSize, nullptr, UCS_MEMORY_TYPE_HOST); + + auto serializedRemoteKey = memoryHandle->createRemoteKey()->serialize(); + auto remoteKey = ucxx::createRemoteKeyFromSerialized(_ep, serializedRemoteKey); + + auto request = _ep->memPut(_sendBuf.data(), kMessageSize, remoteKey); + std::vector> requests{request, _ep->flush()}; + waitRequests(_worker, requests, _progressWorker); + + expectAllThrow({request}); - ASSERT_THAT(recvBuf, ::testing::ContainerEq(sendBuf)); + std::memcpy( + _recvBuf.data(), reinterpret_cast(memoryHandle->getBaseAddress()), kMessageSize); + ASSERT_THAT(_recvBuf, ::testing::ContainerEq(_sendBuf)); } TEST_P(RequestTest, ProgressStreamRequestAttributes) From 08f1832f481bd3a309b67ae11b0e65852ae90239 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 6 May 2026 09:12:36 +0000 Subject: [PATCH 20/43] Add MemoryPutRequestAttributes test --- cpp/tests/request.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 48ede2824..b6bc130c2 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -746,6 +746,40 @@ TEST_P(RequestTest, MemoryGetRequestAttributes) ASSERT_THAT(_recv[0], ContainerEq(_send[0])); } +TEST_P(RequestTest, MemoryPutRequestAttributes) +{ + if (_messageSize == 0) GTEST_SKIP() << "Zero-length memPut completes without a UCP request"; + if (_messageSize <= _rndvThresh) + GTEST_SKIP() << "Eager messages complete inline without a UCP request to query"; + + rebuildWorker(true); + + allocate(); + + auto memoryHandle = _context->createMemoryHandle(_messageSize, nullptr, _memoryType); + + auto localRemoteKey = memoryHandle->createRemoteKey(); + auto serializedRemoteKey = localRemoteKey->serialize(); + auto remoteKey = ucxx::createRemoteKeyFromSerialized(_ep, serializedRemoteKey); + + auto request = _ep->memPut(_sendPtr[0], _messageSize, remoteKey); + std::vector> requests; + requests.push_back(request); + requests.push_back(_ep->flush()); + waitRequests(_worker, requests, _progressWorker); + + auto debugString = request->getRequestAttributes().debugString; + std::string expectedSubstring = "length " + std::to_string(_messageSize); + ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); + + // Copy memory handle data to receive buffer to verify data correctness. + copyMemoryTypeAware( + _recvPtr[0], reinterpret_cast(memoryHandle->getBaseAddress()), _messageSize); + + copyResults(); + ASSERT_THAT(_recv[0], ContainerEq(_send[0])); +} + TEST_P(RequestTest, ProgressTagMulti) { if (_progressMode == ProgressMode::Wait) { From 247bd8283b8792c3596bd31e61b5a9146f3ef800 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 6 May 2026 09:19:50 +0000 Subject: [PATCH 21/43] Restore inclusive rndv threshold in AM allocator-type assertion --- cpp/tests/request.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index b6bc130c2..ad07f8aee 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -207,11 +207,11 @@ TEST_P(RequestTest, ProgressAm) auto recvReq = requests[1]; _recvPtr[0] = recvReq->getRecvBuffer()->data(); - // Messages larger than `_rndvThresh` are rendezvous and will use custom allocator, - // smaller messages are eager and will always be host-allocated. + // Messages of size `_rndvThresh` or larger are rendezvous and will use the custom + // allocator, smaller messages are eager and will always be host-allocated. ASSERT_THAT(recvReq->getRecvBuffer()->getType(), - (_registerCustomAmAllocator && _messageSize > _rndvThresh) ? _bufferType - : ucxx::BufferType::Host); + (_registerCustomAmAllocator && _messageSize >= _rndvThresh) ? _bufferType + : ucxx::BufferType::Host); copyResults(); @@ -362,7 +362,7 @@ TEST_P(RequestTest, ProgressAmReceiverCallback) // Messages larger than `_rndvThresh` are rendezvous and will use custom allocator, // smaller messages are eager and will always be host-allocated. ASSERT_THAT(receivedRequests[0]->getRecvBuffer()->getType(), - (_registerCustomAmAllocator && _messageSize > _rndvThresh) + (_registerCustomAmAllocator && _messageSize >= _rndvThresh) ? _bufferType : ucxx::BufferType::Host); } From 6e020495a0558f89c8b383e4d633295ea282f412 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 6 May 2026 09:31:15 +0000 Subject: [PATCH 22/43] Tighten attribute-test eager skip for Tag and AM to strict-less-than --- cpp/tests/request.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index ad07f8aee..2f78fbe38 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -513,7 +513,7 @@ TEST_P(RequestTest, ProgressTag) TEST_P(RequestTest, ProgressTagRequestAttributes) { - if (_messageSize <= _rndvThresh) + if (_messageSize < _rndvThresh) GTEST_SKIP() << "Eager messages do not create a ucp_request and thus no debug info"; rebuildWorker(true); @@ -689,7 +689,7 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) TEST_P(RequestTest, ProgressAmRequestAttributes) { if (_messageSize == 0) GTEST_SKIP() << "Zero-length AM completes without a UCP request"; - if (_messageSize <= _rndvThresh) + if (_messageSize < _rndvThresh) GTEST_SKIP() << "Eager messages complete inline without a UCP request to query"; if (_progressMode == ProgressMode::Wait) GTEST_SKIP() << "Interrupting UCP worker progress operation in wait mode is not possible"; From 600c9c7216200ed84ff8fa87e79759568182b5a2 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 6 May 2026 10:09:39 +0000 Subject: [PATCH 23/43] Make attribute tests transport-agnostic via lenient assertions --- cpp/tests/request.cpp | 60 +++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 37 deletions(-) diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 2f78fbe38..4b9de4677 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -526,28 +526,16 @@ TEST_P(RequestTest, ProgressTagRequestAttributes) waitRequests(_worker, requests, _progressWorker); for (const auto& request : requests) { - auto debugString = request->getRequestAttributes().debugString; - std::string expectedSubstring = "length " + std::to_string(_messageSize); - ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); - - if (_bufferType == ucxx::BufferType::RMM && _messageSize == 0) { - // Zero-length CUDA transfers report host memtype - ASSERT_THAT(debugString, ::testing::HasSubstr("host")); - } else { - ASSERT_THAT(debugString, - ::testing::HasSubstr(_memoryType == UCS_MEMORY_TYPE_HOST ? "host" : "cuda")); - } + auto debugString = request->getRequestAttributes().debugString; + ASSERT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); + ASSERT_THAT(debugString, + ::testing::HasSubstr(_memoryType == UCS_MEMORY_TYPE_HOST ? "host" : "cuda")); } copyResults(); ASSERT_THAT(_recv[0], ContainerEq(_send[0])); } -// Whether `getRequestAttributes()` throws when the worker has the feature disabled -// is invariant across progress modes, buffer types and message sizes — there is -// nothing to gain from running these assertions across the full RequestTest matrix. -// Each test below uses the same fixture-built loopback (default-disabled worker, -// polling progress, host buffers) and only varies the request type submitted. class RequestAttributesDisabledTest : public ::testing::Test { protected: static constexpr size_t kMessageLength = 1024; @@ -658,8 +646,6 @@ TEST_F(RequestAttributesDisabledTest, MemoryPut) TEST_P(RequestTest, ProgressStreamRequestAttributes) { if (_messageSize == 0) GTEST_SKIP() << "Stream rejects zero-length transfers"; - if (_messageSize <= _rndvThresh) - GTEST_SKIP() << "Eager messages complete inline without a UCP request to query"; rebuildWorker(true); @@ -670,16 +656,19 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) std::vector> requests{sendRequest, recvRequest}; waitRequests(_worker, requests, _progressWorker); - auto sendDebug = sendRequest->getRequestAttributes().debugString; - ASSERT_THAT(sendDebug, ::testing::HasSubstr("length " + std::to_string(_messageSize))); + try { + auto sendDebug = sendRequest->getRequestAttributes().debugString; + EXPECT_FALSE(sendDebug.empty()); + EXPECT_THAT(sendDebug, ::testing::HasSubstr("length " + std::to_string(_messageSize))); + } catch (const ucxx::Error&) { + // Send completed inline; no UCP request handle to query. + } try { - // Stream recv requests have no rendezvous path, thus debug info cannot be generated. auto recvDebug = recvRequest->getRequestAttributes().debugString; EXPECT_THAT(recvDebug, ::testing::HasSubstr("no debug info")); } catch (const ucxx::Error&) { - // Recv completed inline (send completed before recv was posted); no UCP request to - // query. + // Recv completed inline; no UCP request handle to query. } copyResults(); @@ -688,7 +677,6 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) TEST_P(RequestTest, ProgressAmRequestAttributes) { - if (_messageSize == 0) GTEST_SKIP() << "Zero-length AM completes without a UCP request"; if (_messageSize < _rndvThresh) GTEST_SKIP() << "Eager messages complete inline without a UCP request to query"; if (_progressMode == ProgressMode::Wait) @@ -703,11 +691,9 @@ TEST_P(RequestTest, ProgressAmRequestAttributes) requests.push_back(_ep->amRecv()); waitRequests(_worker, requests, _progressWorker); - size_t requestsWithAttributes = 0; for (const auto& request : requests) { - auto debugString = request->getRequestAttributes().debugString; - std::string expectedSubstring = "length " + std::to_string(_messageSize); - ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); + auto debugString = request->getRequestAttributes().debugString; + ASSERT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); } auto recvReq = requests[1]; @@ -738,9 +724,8 @@ TEST_P(RequestTest, MemoryGetRequestAttributes) requests.push_back(_ep->flush()); waitRequests(_worker, requests, _progressWorker); - auto debugString = request->getRequestAttributes().debugString; - std::string expectedSubstring = "length " + std::to_string(_messageSize); - ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); + auto debugString = request->getRequestAttributes().debugString; + ASSERT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); copyResults(); ASSERT_THAT(_recv[0], ContainerEq(_send[0])); @@ -749,8 +734,6 @@ TEST_P(RequestTest, MemoryGetRequestAttributes) TEST_P(RequestTest, MemoryPutRequestAttributes) { if (_messageSize == 0) GTEST_SKIP() << "Zero-length memPut completes without a UCP request"; - if (_messageSize <= _rndvThresh) - GTEST_SKIP() << "Eager messages complete inline without a UCP request to query"; rebuildWorker(true); @@ -768,11 +751,14 @@ TEST_P(RequestTest, MemoryPutRequestAttributes) requests.push_back(_ep->flush()); waitRequests(_worker, requests, _progressWorker); - auto debugString = request->getRequestAttributes().debugString; - std::string expectedSubstring = "length " + std::to_string(_messageSize); - ASSERT_THAT(debugString, ::testing::HasSubstr(expectedSubstring)); + try { + auto debugString = request->getRequestAttributes().debugString; + EXPECT_FALSE(debugString.empty()); + EXPECT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); + } catch (const ucxx::Error&) { + // Request completed inline; no UCP request handle to query. + } - // Copy memory handle data to receive buffer to verify data correctness. copyMemoryTypeAware( _recvPtr[0], reinterpret_cast(memoryHandle->getBaseAddress()), _messageSize); From 53e4b9bbf2b68e78d818dd30cdf59598ece4e0dc Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 13:35:42 +0000 Subject: [PATCH 24/43] Return a C++ struct from Worker::queryAttributes and query every field --- cpp/include/ucxx/worker.h | 31 ++++++++++++++++++++++++++----- cpp/src/request.cpp | 2 +- cpp/src/worker.cpp | 16 +++++++++++----- cpp/tests/worker.cpp | 14 +++++++++----- 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/cpp/include/ucxx/worker.h b/cpp/include/ucxx/worker.h index 0eada0275..5843117e1 100644 --- a/cpp/include/ucxx/worker.h +++ b/cpp/include/ucxx/worker.h @@ -1011,15 +1011,36 @@ class Worker : public Component { RequestCallbackUserData callbackData = nullptr); /** - * @brief Query worker attributes. + * @brief Idiomatic C++ snapshot of the worker attributes reported by `ucp_worker_query`. * - * Queries the worker attributes using ucp_worker_query. This provides information about - * the worker's thread mode and other attributes. + * Returned by `queryAttributes()`. The address attributes (`address` / + * `address_length`) are intentionally omitted: ucxx already exposes the worker + * address via `getAddress()` as a `std::shared_ptr
` with proper RAII, + * and folding the raw pointer here would either duplicate that or force the + * caller to remember `ucp_worker_release_address`. + */ + struct WorkerAttributes { + /// Thread safety level the worker was created with. + ucs_thread_mode_t threadMode{UCS_THREAD_MODE_MULTI}; + /// Maximum allowed header size for `ucp_am_send_nbx`. + size_t maxAmHeader{0}; + /// Worker name used by tracing and analysis tools. + std::string name{}; + /// Maximum debug-string buffer size accepted by `ucp_request_query`. + size_t maxDebugString{0}; + }; + + /** + * @brief Query the worker's attributes. + * + * Wraps `ucp_worker_query` and returns the populated attributes as a C++ struct. + * All non-address fields exposed by UCP are queried; see `WorkerAttributes` for + * the field list and the rationale for omitting the address. * - * @returns The worker attributes structure. + * @returns A `WorkerAttributes` filled with all queried fields. * @throws ucxx::Error if an error occurred while querying worker attributes. */ - [[nodiscard]] ucp_worker_attr_t queryAttributes() const; + [[nodiscard]] WorkerAttributes queryAttributes() const; }; /** diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 290654447..ba6af7ebd 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -260,7 +260,7 @@ void Request::queryRequestAttributes() auto worker_attr = _worker->queryAttributes(); // Allocate buffer for debug string with size from worker attributes - std::vector debug_str(worker_attr.max_debug_string, '\0'); + std::vector debug_str(worker_attr.maxDebugString, '\0'); result.field_mask = UCP_REQUEST_ATTR_FIELD_STATUS | // Request status UCP_REQUEST_ATTR_FIELD_MEM_TYPE | // Memory type diff --git a/cpp/src/worker.cpp b/cpp/src/worker.cpp index 4ac4bc9fd..7c6b61dc2 100644 --- a/cpp/src/worker.cpp +++ b/cpp/src/worker.cpp @@ -3,6 +3,7 @@ * SPDX-License-Identifier: BSD-3-Clause */ #include +#include #include #include #include @@ -196,15 +197,20 @@ std::string Worker::getInfo() return utils::decodeTextFileDescriptor(TextFileDescriptor); } -ucp_worker_attr_t Worker::queryAttributes() const +Worker::WorkerAttributes Worker::queryAttributes() const { ucp_worker_attr_t attr = { - .field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE | // Request thread mode info - UCP_WORKER_ATTR_FIELD_MAX_INFO_STRING // Request debug string size - }; + .field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE | UCP_WORKER_ATTR_FIELD_MAX_AM_HEADER | + UCP_WORKER_ATTR_FIELD_NAME | UCP_WORKER_ATTR_FIELD_MAX_INFO_STRING}; utils::ucsErrorThrow(ucp_worker_query(_handle, &attr)); - return attr; + + return WorkerAttributes{ + .threadMode = attr.thread_mode, + .maxAmHeader = attr.max_am_header, + .name = std::string(attr.name, ::strnlen(attr.name, sizeof(attr.name))), + .maxDebugString = attr.max_debug_string, + }; } bool Worker::isDelayedRequestSubmissionEnabled() const diff --git a/cpp/tests/worker.cpp b/cpp/tests/worker.cpp index 8e57603ce..1749c8c7b 100644 --- a/cpp/tests/worker.cpp +++ b/cpp/tests/worker.cpp @@ -112,11 +112,15 @@ TEST_F(WorkerTest, QueryAttributes) { auto attrs = _worker->queryAttributes(); - // Verify that the thread mode field was requested and returned - ASSERT_TRUE(attrs.field_mask & UCP_WORKER_ATTR_FIELD_THREAD_MODE); - - // The worker was created with UCS_THREAD_MODE_MULTI in the constructor - ASSERT_EQ(attrs.thread_mode, UCS_THREAD_MODE_MULTI); + // The worker was created with UCS_THREAD_MODE_MULTI in the constructor. + EXPECT_EQ(attrs.threadMode, UCS_THREAD_MODE_MULTI); + + // The remaining fields are determined by UCX configuration, so the strongest + // portable assertion is that they were populated with non-zero / non-empty + // values. + EXPECT_GT(attrs.maxAmHeader, 0u); + EXPECT_FALSE(attrs.name.empty()); + EXPECT_GT(attrs.maxDebugString, 0u); } TEST_P(WorkerCapabilityTest, CheckCapability) From 91165e6ac5b2c313227f6559c00ca9f37badbbc9 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 14:23:51 +0000 Subject: [PATCH 25/43] Align attribute APIs as getAttributes() and class-nested Attributes --- cpp/include/ucxx/request.h | 17 +++++++++-------- cpp/include/ucxx/worker.h | 19 ++++++++++--------- cpp/src/request.cpp | 4 ++-- cpp/src/worker.cpp | 4 ++-- cpp/tests/request.cpp | 14 +++++++------- cpp/tests/worker.cpp | 2 +- 6 files changed, 31 insertions(+), 29 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index c6e077e0a..66e148222 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -39,7 +39,7 @@ namespace ucxx { class Request : public Component { protected: /// Structure to hold cached request attributes including the debug string - struct RequestAttributes { + struct Attributes { ucs_status_t status{UCS_INPROGRESS}; ///< Status of the request ucs_memory_type memoryType{UCS_MEMORY_TYPE_UNKNOWN}; ///< Memory type of the request std::string debugString{}; ///< Stored debug string @@ -62,8 +62,8 @@ class Request : public Component { bool _enablePythonFuture{true}; ///< Whether Python future is enabled for this request RequestCallbackUserFunction _callback{nullptr}; ///< Completion callback RequestCallbackUserData _callbackData{nullptr}; ///< Completion callback data - RequestAttributes _requestAttr{}; ///< Request attributes queried when request is posted - bool _isRequestAttrValid{false}; ///< Whether the request attributes are valid + Attributes _requestAttr{}; ///< Request attributes queried when request is posted + bool _isRequestAttrValid{false}; ///< Whether the request attributes are valid /** * @brief Protected constructor of an abstract `ucxx::Request`. @@ -252,14 +252,15 @@ class Request : public Component { * Get the request attributes. The owning `ucxx::Worker` must have been created with * request attributes querying enabled (see * `ucxx::experimental::WorkerBuilder::requestAttributes()`); otherwise the attributes - * are never populated and this method throws. + * are never populated and this method throws. Querying the underlying UCP request is + * an implementation detail performed eagerly when the request is submitted. * * @throw ucxx::Error if the request attributes are not available yet, including when * request attributes querying is disabled on the owning worker. * - * @return A RequestAttributes containing the request attributes. + * @return An `Attributes` containing the request attributes. */ - [[nodiscard]] RequestAttributes getRequestAttributes(); + [[nodiscard]] Attributes getAttributes(); protected: /** @@ -271,8 +272,8 @@ class Request : public Component { * - Memory type * - Debug string * - * @return A RequestAttributes containing the query status, request attributes and debug - * string. + * Internal companion to `getAttributes()`: this is the side that actually calls into + * UCP and populates the cached attributes; `getAttributes()` only returns the cache. */ void queryRequestAttributes(); diff --git a/cpp/include/ucxx/worker.h b/cpp/include/ucxx/worker.h index 5843117e1..8d139b3d3 100644 --- a/cpp/include/ucxx/worker.h +++ b/cpp/include/ucxx/worker.h @@ -500,7 +500,7 @@ class Worker : public Component { * Check whether the worker has been created with request attributes querying enabled. * When enabled, each `ucxx::Request` will have its UCP attributes (such as the debug * string) queried immediately after submission, making them available via - * `ucxx::Request::getRequestAttributes()`. Querying request attributes has a + * `ucxx::Request::getAttributes()`. Querying request attributes has a * non-negligible runtime cost and is therefore disabled by default. * * @returns `true` if request attributes querying is enabled, `false` otherwise. @@ -1013,13 +1013,13 @@ class Worker : public Component { /** * @brief Idiomatic C++ snapshot of the worker attributes reported by `ucp_worker_query`. * - * Returned by `queryAttributes()`. The address attributes (`address` / + * Returned by `getAttributes()`. The address attributes (`address` / * `address_length`) are intentionally omitted: ucxx already exposes the worker * address via `getAddress()` as a `std::shared_ptr
` with proper RAII, * and folding the raw pointer here would either duplicate that or force the * caller to remember `ucp_worker_release_address`. */ - struct WorkerAttributes { + struct Attributes { /// Thread safety level the worker was created with. ucs_thread_mode_t threadMode{UCS_THREAD_MODE_MULTI}; /// Maximum allowed header size for `ucp_am_send_nbx`. @@ -1031,16 +1031,17 @@ class Worker : public Component { }; /** - * @brief Query the worker's attributes. + * @brief Get the worker's attributes. * - * Wraps `ucp_worker_query` and returns the populated attributes as a C++ struct. - * All non-address fields exposed by UCP are queried; see `WorkerAttributes` for - * the field list and the rationale for omitting the address. + * Returns the worker attributes as a C++ struct, querying UCP via + * `ucp_worker_query` under the hood. All non-address fields exposed by UCP are + * queried; see `Attributes` for the field list and the rationale for omitting + * the address. * - * @returns A `WorkerAttributes` filled with all queried fields. + * @returns An `Attributes` filled with all queried fields. * @throws ucxx::Error if an error occurred while querying worker attributes. */ - [[nodiscard]] WorkerAttributes queryAttributes() const; + [[nodiscard]] Attributes getAttributes() const; }; /** diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index ba6af7ebd..a2594eba5 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -257,7 +257,7 @@ void Request::queryRequestAttributes() ucp_request_attr_t result; // Get the debug string size from worker attributes - auto worker_attr = _worker->queryAttributes(); + auto worker_attr = _worker->getAttributes(); // Allocate buffer for debug string with size from worker attributes std::vector debug_str(worker_attr.maxDebugString, '\0'); @@ -289,7 +289,7 @@ void Request::publishRequest(void* request) queryRequestAttributes(); } -Request::RequestAttributes Request::getRequestAttributes() +Request::Attributes Request::getAttributes() { std::lock_guard lock(_mutex); diff --git a/cpp/src/worker.cpp b/cpp/src/worker.cpp index 7c6b61dc2..86eea8b84 100644 --- a/cpp/src/worker.cpp +++ b/cpp/src/worker.cpp @@ -197,7 +197,7 @@ std::string Worker::getInfo() return utils::decodeTextFileDescriptor(TextFileDescriptor); } -Worker::WorkerAttributes Worker::queryAttributes() const +Worker::Attributes Worker::getAttributes() const { ucp_worker_attr_t attr = { .field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE | UCP_WORKER_ATTR_FIELD_MAX_AM_HEADER | @@ -205,7 +205,7 @@ Worker::WorkerAttributes Worker::queryAttributes() const utils::ucsErrorThrow(ucp_worker_query(_handle, &attr)); - return WorkerAttributes{ + return Attributes{ .threadMode = attr.thread_mode, .maxAmHeader = attr.max_am_header, .name = std::string(attr.name, ::strnlen(attr.name, sizeof(attr.name))), diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 4b9de4677..74100abe0 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -526,7 +526,7 @@ TEST_P(RequestTest, ProgressTagRequestAttributes) waitRequests(_worker, requests, _progressWorker); for (const auto& request : requests) { - auto debugString = request->getRequestAttributes().debugString; + auto debugString = request->getAttributes().debugString; ASSERT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); ASSERT_THAT(debugString, ::testing::HasSubstr(_memoryType == UCS_MEMORY_TYPE_HOST ? "host" : "cuda")); @@ -565,7 +565,7 @@ class RequestAttributesDisabledTest : public ::testing::Test { void expectAllThrow(const std::vector>& requests) const { for (const auto& request : requests) { - EXPECT_THROW(std::ignore = request->getRequestAttributes(), ucxx::Error); + EXPECT_THROW(std::ignore = request->getAttributes(), ucxx::Error); } } }; @@ -657,7 +657,7 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) waitRequests(_worker, requests, _progressWorker); try { - auto sendDebug = sendRequest->getRequestAttributes().debugString; + auto sendDebug = sendRequest->getAttributes().debugString; EXPECT_FALSE(sendDebug.empty()); EXPECT_THAT(sendDebug, ::testing::HasSubstr("length " + std::to_string(_messageSize))); } catch (const ucxx::Error&) { @@ -665,7 +665,7 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) } try { - auto recvDebug = recvRequest->getRequestAttributes().debugString; + auto recvDebug = recvRequest->getAttributes().debugString; EXPECT_THAT(recvDebug, ::testing::HasSubstr("no debug info")); } catch (const ucxx::Error&) { // Recv completed inline; no UCP request handle to query. @@ -692,7 +692,7 @@ TEST_P(RequestTest, ProgressAmRequestAttributes) waitRequests(_worker, requests, _progressWorker); for (const auto& request : requests) { - auto debugString = request->getRequestAttributes().debugString; + auto debugString = request->getAttributes().debugString; ASSERT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); } @@ -724,7 +724,7 @@ TEST_P(RequestTest, MemoryGetRequestAttributes) requests.push_back(_ep->flush()); waitRequests(_worker, requests, _progressWorker); - auto debugString = request->getRequestAttributes().debugString; + auto debugString = request->getAttributes().debugString; ASSERT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); copyResults(); @@ -752,7 +752,7 @@ TEST_P(RequestTest, MemoryPutRequestAttributes) waitRequests(_worker, requests, _progressWorker); try { - auto debugString = request->getRequestAttributes().debugString; + auto debugString = request->getAttributes().debugString; EXPECT_FALSE(debugString.empty()); EXPECT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); } catch (const ucxx::Error&) { diff --git a/cpp/tests/worker.cpp b/cpp/tests/worker.cpp index 1749c8c7b..52b6c685d 100644 --- a/cpp/tests/worker.cpp +++ b/cpp/tests/worker.cpp @@ -110,7 +110,7 @@ TEST_F(WorkerTest, HandleIsValid) { ASSERT_TRUE(_worker->getHandle() != nullptr) TEST_F(WorkerTest, QueryAttributes) { - auto attrs = _worker->queryAttributes(); + auto attrs = _worker->getAttributes(); // The worker was created with UCS_THREAD_MODE_MULTI in the constructor. EXPECT_EQ(attrs.threadMode, UCS_THREAD_MODE_MULTI); From 6691758b2a4d3fdad538a5368de70af0ff9a3385 Mon Sep 17 00:00:00 2001 From: Horde Date: Tue, 28 Apr 2026 20:01:35 +0000 Subject: [PATCH 26/43] Cancel inflight requests and submit force-close atomically in a single pre-callback --- cpp/src/endpoint.cpp | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/cpp/src/endpoint.cpp b/cpp/src/endpoint.cpp index 5d880c359..cf682900b 100644 --- a/cpp/src/endpoint.cpp +++ b/cpp/src/endpoint.cpp @@ -288,8 +288,33 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) bool submitted = false; for (uint64_t i = 0; i < maxAttempts && !closeSuccess; ++i) { if (!submitted) { + // Cancel inflight requests and submit FORCE close ATOMICALLY in a + // single pre-callback, with no ucp_worker_progress() between them. + // + // Why cancel here at all (UCX FORCE close already cancels endpoint + // operations): + // tag_recv requests are worker-scoped (ucp_tag_recv_nbx(worker, ...)), + // not endpoint-scoped, so ucp_ep_close_nbx(FORCE) leaves them pending. + // Without ucp_request_cancel() here, an `await ep.close()` running + // alongside an outstanding `await ep.recv()` would hang forever. + // See test_shutdown.py::test_{server,client}_shutdown. + // + // Why atomic with FORCE close (not as a separate pre-callback): + // When cancelAll and FORCE close were separate pre-callbacks (the + // old cancelInflightRequestsBlocking path), a full ucp_worker_progress() + // ran between them. That intermediate progress could leave UCT-level + // TCP pending entries half-dispatched (mid-cuMemcpyAsync staging of + // a CUDA send); the next progress after FORCE close then crashed + // dispatching them on a freed staging buffer (uct_cuda_copy_ep_get_short + // -> cuMemcpyAsync -> SIGSEGV). Running them in a single pre-callback + // matches the safe single-threaded ordering proven by the regression + // test in cpp/tests/endpoint_close_force_tcp_cuda_race.cpp. if (!worker->registerGenericPre( - [this, &status, ¶m]() { status = ucp_ep_close_nbx(_handle, ¶m); }, period)) + [this, &status, ¶m]() { + _inflightRequests->cancelAll(); + status = ucp_ep_close_nbx(_handle, ¶m); + }, + period)) continue; submitted = true; } @@ -326,6 +351,10 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) _handle); } } else { + // No progress thread: cancel inflight + FORCE close back-to-back, then + // drive progress here. Same atomicity reasoning as the progress-thread + // path above (no ucp_worker_progress() between cancel and FORCE close). + _inflightRequests->cancelAll(); status = ucp_ep_close_nbx(_handle, ¶m); if (UCS_PTR_IS_PTR(status)) { ucs_status_t s; From 41d80c28246c0dd4abcbdb9d8e6124f0435ff28f Mon Sep 17 00:00:00 2001 From: Horde Date: Wed, 29 Apr 2026 12:19:29 +0000 Subject: [PATCH 27/43] Fix invalid _handle usage --- cpp/src/endpoint.cpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/cpp/src/endpoint.cpp b/cpp/src/endpoint.cpp index cf682900b..a5bea7bae 100644 --- a/cpp/src/endpoint.cpp +++ b/cpp/src/endpoint.cpp @@ -313,6 +313,11 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) [this, &status, ¶m]() { _inflightRequests->cancelAll(); status = ucp_ep_close_nbx(_handle, ¶m); + // Invalidate _handle synchronously immediately, to prevent + // time window where _handle` points to freed UCP memory, usually + // observed in `populateDelayedSubmission()`. + _originalHandle = _handle; + _handle = nullptr; }, period)) continue; @@ -331,7 +336,7 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) "endpoint: %s", __func__, this, - _handle, + _originalHandle, ucs_status_string(UCS_PTR_STATUS(status))); } }, @@ -348,14 +353,16 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) "ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, all attempts to close timed out", __func__, this, - _handle); + _originalHandle != nullptr ? _originalHandle : _handle); } } else { // No progress thread: cancel inflight + FORCE close back-to-back, then // drive progress here. Same atomicity reasoning as the progress-thread // path above (no ucp_worker_progress() between cancel and FORCE close). _inflightRequests->cancelAll(); - status = ucp_ep_close_nbx(_handle, ¶m); + status = ucp_ep_close_nbx(_handle, ¶m); + _originalHandle = _handle; + _handle = nullptr; if (UCS_PTR_IS_PTR(status)) { ucs_status_t s; while ((s = ucp_request_check_status(status)) == UCS_INPROGRESS) @@ -366,11 +373,12 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) "ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, Error while closing endpoint: %s", __func__, this, - _handle, + _originalHandle, ucs_status_string(UCS_PTR_STATUS(status))); } } - ucxx_trace("ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, closed", __func__, this, _handle); + ucxx_trace( + "ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, closed", __func__, this, _originalHandle); if (UCS_PTR_IS_PTR(status)) ucp_request_free(status); @@ -380,14 +388,12 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) ucxx_debug("ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, calling user close callback", __func__, this, - _handle); + _originalHandle); _closeCallback(_status, _closeCallbackArg); _closeCallback = nullptr; _closeCallbackArg = nullptr; } } - - std::swap(_handle, _originalHandle); } ucp_ep_h Endpoint::getHandle() { return _handle; } From 16091bd01d135bff603f8931e936f1d2632cbeb3 Mon Sep 17 00:00:00 2001 From: Horde Date: Tue, 12 May 2026 14:56:47 +0000 Subject: [PATCH 28/43] Rename getAttributes back to queryAttributes --- cpp/include/ucxx/request.h | 6 +++--- cpp/include/ucxx/worker.h | 6 +++--- cpp/src/request.cpp | 4 ++-- cpp/src/worker.cpp | 2 +- cpp/tests/request.cpp | 14 +++++++------- cpp/tests/worker.cpp | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index 66e148222..cda5122ba 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -260,7 +260,7 @@ class Request : public Component { * * @return An `Attributes` containing the request attributes. */ - [[nodiscard]] Attributes getAttributes(); + [[nodiscard]] Attributes queryAttributes(); protected: /** @@ -272,8 +272,8 @@ class Request : public Component { * - Memory type * - Debug string * - * Internal companion to `getAttributes()`: this is the side that actually calls into - * UCP and populates the cached attributes; `getAttributes()` only returns the cache. + * Internal companion to `queryAttributes()`: this is the side that actually calls into + * UCP and populates the cached attributes; `queryAttributes()` only returns the cache. */ void queryRequestAttributes(); diff --git a/cpp/include/ucxx/worker.h b/cpp/include/ucxx/worker.h index 8d139b3d3..09f1cedc8 100644 --- a/cpp/include/ucxx/worker.h +++ b/cpp/include/ucxx/worker.h @@ -500,7 +500,7 @@ class Worker : public Component { * Check whether the worker has been created with request attributes querying enabled. * When enabled, each `ucxx::Request` will have its UCP attributes (such as the debug * string) queried immediately after submission, making them available via - * `ucxx::Request::getAttributes()`. Querying request attributes has a + * `ucxx::Request::queryAttributes()`. Querying request attributes has a * non-negligible runtime cost and is therefore disabled by default. * * @returns `true` if request attributes querying is enabled, `false` otherwise. @@ -1013,7 +1013,7 @@ class Worker : public Component { /** * @brief Idiomatic C++ snapshot of the worker attributes reported by `ucp_worker_query`. * - * Returned by `getAttributes()`. The address attributes (`address` / + * Returned by `queryAttributes()`. The address attributes (`address` / * `address_length`) are intentionally omitted: ucxx already exposes the worker * address via `getAddress()` as a `std::shared_ptr
` with proper RAII, * and folding the raw pointer here would either duplicate that or force the @@ -1041,7 +1041,7 @@ class Worker : public Component { * @returns An `Attributes` filled with all queried fields. * @throws ucxx::Error if an error occurred while querying worker attributes. */ - [[nodiscard]] Attributes getAttributes() const; + [[nodiscard]] Attributes queryAttributes() const; }; /** diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index a2594eba5..12e43f5cc 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -257,7 +257,7 @@ void Request::queryRequestAttributes() ucp_request_attr_t result; // Get the debug string size from worker attributes - auto worker_attr = _worker->getAttributes(); + auto worker_attr = _worker->queryAttributes(); // Allocate buffer for debug string with size from worker attributes std::vector debug_str(worker_attr.maxDebugString, '\0'); @@ -289,7 +289,7 @@ void Request::publishRequest(void* request) queryRequestAttributes(); } -Request::Attributes Request::getAttributes() +Request::Attributes Request::queryAttributes() { std::lock_guard lock(_mutex); diff --git a/cpp/src/worker.cpp b/cpp/src/worker.cpp index 86eea8b84..a3ebb03a7 100644 --- a/cpp/src/worker.cpp +++ b/cpp/src/worker.cpp @@ -197,7 +197,7 @@ std::string Worker::getInfo() return utils::decodeTextFileDescriptor(TextFileDescriptor); } -Worker::Attributes Worker::getAttributes() const +Worker::Attributes Worker::queryAttributes() const { ucp_worker_attr_t attr = { .field_mask = UCP_WORKER_ATTR_FIELD_THREAD_MODE | UCP_WORKER_ATTR_FIELD_MAX_AM_HEADER | diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 74100abe0..292cb0730 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -526,7 +526,7 @@ TEST_P(RequestTest, ProgressTagRequestAttributes) waitRequests(_worker, requests, _progressWorker); for (const auto& request : requests) { - auto debugString = request->getAttributes().debugString; + auto debugString = request->queryAttributes().debugString; ASSERT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); ASSERT_THAT(debugString, ::testing::HasSubstr(_memoryType == UCS_MEMORY_TYPE_HOST ? "host" : "cuda")); @@ -565,7 +565,7 @@ class RequestAttributesDisabledTest : public ::testing::Test { void expectAllThrow(const std::vector>& requests) const { for (const auto& request : requests) { - EXPECT_THROW(std::ignore = request->getAttributes(), ucxx::Error); + EXPECT_THROW(std::ignore = request->queryAttributes(), ucxx::Error); } } }; @@ -657,7 +657,7 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) waitRequests(_worker, requests, _progressWorker); try { - auto sendDebug = sendRequest->getAttributes().debugString; + auto sendDebug = sendRequest->queryAttributes().debugString; EXPECT_FALSE(sendDebug.empty()); EXPECT_THAT(sendDebug, ::testing::HasSubstr("length " + std::to_string(_messageSize))); } catch (const ucxx::Error&) { @@ -665,7 +665,7 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) } try { - auto recvDebug = recvRequest->getAttributes().debugString; + auto recvDebug = recvRequest->queryAttributes().debugString; EXPECT_THAT(recvDebug, ::testing::HasSubstr("no debug info")); } catch (const ucxx::Error&) { // Recv completed inline; no UCP request handle to query. @@ -692,7 +692,7 @@ TEST_P(RequestTest, ProgressAmRequestAttributes) waitRequests(_worker, requests, _progressWorker); for (const auto& request : requests) { - auto debugString = request->getAttributes().debugString; + auto debugString = request->queryAttributes().debugString; ASSERT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); } @@ -724,7 +724,7 @@ TEST_P(RequestTest, MemoryGetRequestAttributes) requests.push_back(_ep->flush()); waitRequests(_worker, requests, _progressWorker); - auto debugString = request->getAttributes().debugString; + auto debugString = request->queryAttributes().debugString; ASSERT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); copyResults(); @@ -752,7 +752,7 @@ TEST_P(RequestTest, MemoryPutRequestAttributes) waitRequests(_worker, requests, _progressWorker); try { - auto debugString = request->getAttributes().debugString; + auto debugString = request->queryAttributes().debugString; EXPECT_FALSE(debugString.empty()); EXPECT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); } catch (const ucxx::Error&) { diff --git a/cpp/tests/worker.cpp b/cpp/tests/worker.cpp index 52b6c685d..1749c8c7b 100644 --- a/cpp/tests/worker.cpp +++ b/cpp/tests/worker.cpp @@ -110,7 +110,7 @@ TEST_F(WorkerTest, HandleIsValid) { ASSERT_TRUE(_worker->getHandle() != nullptr) TEST_F(WorkerTest, QueryAttributes) { - auto attrs = _worker->getAttributes(); + auto attrs = _worker->queryAttributes(); // The worker was created with UCS_THREAD_MODE_MULTI in the constructor. EXPECT_EQ(attrs.threadMode, UCS_THREAD_MODE_MULTI); From 2e6310a544c22431563810452a0d245fe7c3624d Mon Sep 17 00:00:00 2001 From: Horde Date: Tue, 12 May 2026 15:24:24 +0000 Subject: [PATCH 29/43] Throw ucxx::NoElemError from Request::queryAttributes --- cpp/include/ucxx/request.h | 6 ++++-- cpp/src/request.cpp | 2 +- cpp/tests/request.cpp | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index cda5122ba..41938cd93 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -255,8 +255,10 @@ class Request : public Component { * are never populated and this method throws. Querying the underlying UCP request is * an implementation detail performed eagerly when the request is submitted. * - * @throw ucxx::Error if the request attributes are not available yet, including when - * request attributes querying is disabled on the owning worker. + * @throw ucxx::NoElemError if the request attributes are not available, either + * because the feature is disabled on the owning worker, or + * because UCX took an inline-completion path that produced + * no UCP request handle to query. * * @return An `Attributes` containing the request attributes. */ diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 12e43f5cc..a76a2e0ec 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -296,7 +296,7 @@ Request::Attributes Request::queryAttributes() if (_isRequestAttrValid) return _requestAttr; else - throw ucxx::Error("Request attributes not available yet"); + throw ucxx::NoElemError("Request attributes not available yet"); } std::shared_ptr Request::getRecvBuffer() { return nullptr; } diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index 292cb0730..eb2a3a144 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -565,7 +565,7 @@ class RequestAttributesDisabledTest : public ::testing::Test { void expectAllThrow(const std::vector>& requests) const { for (const auto& request : requests) { - EXPECT_THROW(std::ignore = request->queryAttributes(), ucxx::Error); + EXPECT_THROW(std::ignore = request->queryAttributes(), ucxx::NoElemError); } } }; @@ -660,14 +660,14 @@ TEST_P(RequestTest, ProgressStreamRequestAttributes) auto sendDebug = sendRequest->queryAttributes().debugString; EXPECT_FALSE(sendDebug.empty()); EXPECT_THAT(sendDebug, ::testing::HasSubstr("length " + std::to_string(_messageSize))); - } catch (const ucxx::Error&) { + } catch (const ucxx::NoElemError&) { // Send completed inline; no UCP request handle to query. } try { auto recvDebug = recvRequest->queryAttributes().debugString; EXPECT_THAT(recvDebug, ::testing::HasSubstr("no debug info")); - } catch (const ucxx::Error&) { + } catch (const ucxx::NoElemError&) { // Recv completed inline; no UCP request handle to query. } @@ -755,7 +755,7 @@ TEST_P(RequestTest, MemoryPutRequestAttributes) auto debugString = request->queryAttributes().debugString; EXPECT_FALSE(debugString.empty()); EXPECT_THAT(debugString, ::testing::HasSubstr("length " + std::to_string(_messageSize))); - } catch (const ucxx::Error&) { + } catch (const ucxx::NoElemError&) { // Request completed inline; no UCP request handle to query. } From be015a1d826a10ba1be3de574e363164cc5d04f0 Mon Sep 17 00:00:00 2001 From: Horde Date: Tue, 12 May 2026 15:31:02 +0000 Subject: [PATCH 30/43] Split disabled vs runtime-unavailable in Request::queryAttributes --- cpp/include/ucxx/request.h | 14 ++++++++++---- cpp/src/request.cpp | 14 ++++++++++---- cpp/tests/request.cpp | 2 +- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index 41938cd93..ef957bff7 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -255,10 +255,16 @@ class Request : public Component { * are never populated and this method throws. Querying the underlying UCP request is * an implementation detail performed eagerly when the request is submitted. * - * @throw ucxx::NoElemError if the request attributes are not available, either - * because the feature is disabled on the owning worker, or - * because UCX took an inline-completion path that produced - * no UCP request handle to query. + * @throw ucxx::UnsupportedError if the owning worker was not built with request + * attributes querying enabled. Requires `Worker` + * created with + * `ucxx::experimental::WorkerBuilder::requestAttributes(true)`. + * @throw ucxx::NoElemError if attributes are unavailable for this specific + * request: either because UCX took an inline-completion + * path that produced no UCP request to query, or because + * the request has not completed yet. Callers can + * distinguish the latter from the former by checking + * `isCompleted()`. * * @return An `Attributes` containing the request attributes. */ diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index a76a2e0ec..5aedf17ab 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -293,10 +293,16 @@ Request::Attributes Request::queryAttributes() { std::lock_guard lock(_mutex); - if (_isRequestAttrValid) - return _requestAttr; - else - throw ucxx::NoElemError("Request attributes not available yet"); + if (_isRequestAttrValid) return _requestAttr; + + if (!_worker->isRequestAttributesEnabled()) + throw ucxx::UnsupportedError( + "Request attributes querying is disabled on the owning worker; build the worker " + "with `ucxx::experimental::WorkerBuilder::requestAttributes(true)` to enable it"); + + throw ucxx::NoElemError( + "Request attributes are not available for this request: UCX took an inline-completion " + "path with no queryable UCP request, or the request has not completed yet"); } std::shared_ptr Request::getRecvBuffer() { return nullptr; } diff --git a/cpp/tests/request.cpp b/cpp/tests/request.cpp index eb2a3a144..57fa77543 100644 --- a/cpp/tests/request.cpp +++ b/cpp/tests/request.cpp @@ -565,7 +565,7 @@ class RequestAttributesDisabledTest : public ::testing::Test { void expectAllThrow(const std::vector>& requests) const { for (const auto& request : requests) { - EXPECT_THROW(std::ignore = request->queryAttributes(), ucxx::NoElemError); + EXPECT_THROW(std::ignore = request->queryAttributes(), ucxx::UnsupportedError); } } }; From ed307b3f595b5e52620a912dc4cc5db1fbd1161b Mon Sep 17 00:00:00 2001 From: Horde Date: Tue, 12 May 2026 16:03:10 +0000 Subject: [PATCH 31/43] Drop redundant status field from Request::Attributes --- cpp/include/ucxx/request.h | 14 ++++++++------ cpp/include/ucxx/worker.h | 15 ++++----------- cpp/src/request.cpp | 8 +++----- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index ef957bff7..c9f3166cd 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -38,9 +38,10 @@ namespace ucxx { */ class Request : public Component { protected: - /// Structure to hold cached request attributes including the debug string + /** + * @brief Request attributes reported by `ucp_request_query`. + */ struct Attributes { - ucs_status_t status{UCS_INPROGRESS}; ///< Status of the request ucs_memory_type memoryType{UCS_MEMORY_TYPE_UNKNOWN}; ///< Memory type of the request std::string debugString{}; ///< Stored debug string }; @@ -247,13 +248,14 @@ class Request : public Component { [[nodiscard]] virtual std::string getRecvHeader(); /** - * @brief Get the request attributes. + * @brief Get the requests's attributes. * - * Get the request attributes. The owning `ucxx::Worker` must have been created with - * request attributes querying enabled (see + * Returns the request attributes as a struct. The owning `ucxx::Worker` must have been + * created with request attributes querying enabled (see * `ucxx::experimental::WorkerBuilder::requestAttributes()`); otherwise the attributes * are never populated and this method throws. Querying the underlying UCP request is - * an implementation detail performed eagerly when the request is submitted. + * an implementation detail performed eagerly when the request is submitted. All + * non-status fields exposed by UCP are queried, use `getStatus()` to obtain the status. * * @throw ucxx::UnsupportedError if the owning worker was not built with request * attributes querying enabled. Requires `Worker` diff --git a/cpp/include/ucxx/worker.h b/cpp/include/ucxx/worker.h index 09f1cedc8..71e0c3fed 100644 --- a/cpp/include/ucxx/worker.h +++ b/cpp/include/ucxx/worker.h @@ -1011,13 +1011,7 @@ class Worker : public Component { RequestCallbackUserData callbackData = nullptr); /** - * @brief Idiomatic C++ snapshot of the worker attributes reported by `ucp_worker_query`. - * - * Returned by `queryAttributes()`. The address attributes (`address` / - * `address_length`) are intentionally omitted: ucxx already exposes the worker - * address via `getAddress()` as a `std::shared_ptr
` with proper RAII, - * and folding the raw pointer here would either duplicate that or force the - * caller to remember `ucp_worker_release_address`. + * @brief Worker attributes reported by `ucp_worker_query`. */ struct Attributes { /// Thread safety level the worker was created with. @@ -1033,10 +1027,9 @@ class Worker : public Component { /** * @brief Get the worker's attributes. * - * Returns the worker attributes as a C++ struct, querying UCP via - * `ucp_worker_query` under the hood. All non-address fields exposed by UCP are - * queried; see `Attributes` for the field list and the rationale for omitting - * the address. + * Returns the worker attributes as a struct, querying UCP via `ucp_worker_query` under + * the hood. All non-address fields exposed by UCP are queried, use `getAddress()` to + * obtain the address. * * @returns An `Attributes` filled with all queried fields. * @throws ucxx::Error if an error occurred while querying worker attributes. diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 5aedf17ab..063b3d325 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -262,8 +262,7 @@ void Request::queryRequestAttributes() // Allocate buffer for debug string with size from worker attributes std::vector debug_str(worker_attr.maxDebugString, '\0'); - result.field_mask = UCP_REQUEST_ATTR_FIELD_STATUS | // Request status - UCP_REQUEST_ATTR_FIELD_MEM_TYPE | // Memory type + result.field_mask = UCP_REQUEST_ATTR_FIELD_MEM_TYPE | // Memory type UCP_REQUEST_ATTR_FIELD_INFO_STRING | // Debug string UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; // Debug string size @@ -272,11 +271,10 @@ void Request::queryRequestAttributes() result.debug_string_size = debug_str.size(); if (UCS_PTR_IS_PTR(_request)) { - result.status = ucp_request_query(_request, &result); - if (result.status == UCS_OK && result.debug_string != nullptr) { + auto queryStatus = ucp_request_query(_request, &result); + if (queryStatus == UCS_OK && result.debug_string != nullptr) { _requestAttr.debugString = std::string(result.debug_string); _requestAttr.memoryType = result.mem_type; - _requestAttr.status = result.status; _isRequestAttrValid = true; } } From 024561b7333c101438df05e3ca92bf3b16cbb2fc Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 13:09:25 -0700 Subject: [PATCH 32/43] Revert "Fix invalid _handle usage" This reverts commit 41d80c28246c0dd4abcbdb9d8e6124f0435ff28f. --- cpp/src/endpoint.cpp | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/cpp/src/endpoint.cpp b/cpp/src/endpoint.cpp index a5bea7bae..cf682900b 100644 --- a/cpp/src/endpoint.cpp +++ b/cpp/src/endpoint.cpp @@ -313,11 +313,6 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) [this, &status, ¶m]() { _inflightRequests->cancelAll(); status = ucp_ep_close_nbx(_handle, ¶m); - // Invalidate _handle synchronously immediately, to prevent - // time window where _handle` points to freed UCP memory, usually - // observed in `populateDelayedSubmission()`. - _originalHandle = _handle; - _handle = nullptr; }, period)) continue; @@ -336,7 +331,7 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) "endpoint: %s", __func__, this, - _originalHandle, + _handle, ucs_status_string(UCS_PTR_STATUS(status))); } }, @@ -353,16 +348,14 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) "ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, all attempts to close timed out", __func__, this, - _originalHandle != nullptr ? _originalHandle : _handle); + _handle); } } else { // No progress thread: cancel inflight + FORCE close back-to-back, then // drive progress here. Same atomicity reasoning as the progress-thread // path above (no ucp_worker_progress() between cancel and FORCE close). _inflightRequests->cancelAll(); - status = ucp_ep_close_nbx(_handle, ¶m); - _originalHandle = _handle; - _handle = nullptr; + status = ucp_ep_close_nbx(_handle, ¶m); if (UCS_PTR_IS_PTR(status)) { ucs_status_t s; while ((s = ucp_request_check_status(status)) == UCS_INPROGRESS) @@ -373,12 +366,11 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) "ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, Error while closing endpoint: %s", __func__, this, - _originalHandle, + _handle, ucs_status_string(UCS_PTR_STATUS(status))); } } - ucxx_trace( - "ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, closed", __func__, this, _originalHandle); + ucxx_trace("ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, closed", __func__, this, _handle); if (UCS_PTR_IS_PTR(status)) ucp_request_free(status); @@ -388,12 +380,14 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) ucxx_debug("ucxx::Endpoint::%s, Endpoint: %p, UCP handle: %p, calling user close callback", __func__, this, - _originalHandle); + _handle); _closeCallback(_status, _closeCallbackArg); _closeCallback = nullptr; _closeCallbackArg = nullptr; } } + + std::swap(_handle, _originalHandle); } ucp_ep_h Endpoint::getHandle() { return _handle; } From 9cd6e1fe9f4368707047691c7fbc3c7a5f970fd2 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 13:09:30 -0700 Subject: [PATCH 33/43] Revert "Cancel inflight requests and submit force-close atomically in a single pre-callback" This reverts commit 6691758b2a4d3fdad538a5368de70af0ff9a3385. --- cpp/src/endpoint.cpp | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/cpp/src/endpoint.cpp b/cpp/src/endpoint.cpp index cf682900b..5d880c359 100644 --- a/cpp/src/endpoint.cpp +++ b/cpp/src/endpoint.cpp @@ -288,33 +288,8 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) bool submitted = false; for (uint64_t i = 0; i < maxAttempts && !closeSuccess; ++i) { if (!submitted) { - // Cancel inflight requests and submit FORCE close ATOMICALLY in a - // single pre-callback, with no ucp_worker_progress() between them. - // - // Why cancel here at all (UCX FORCE close already cancels endpoint - // operations): - // tag_recv requests are worker-scoped (ucp_tag_recv_nbx(worker, ...)), - // not endpoint-scoped, so ucp_ep_close_nbx(FORCE) leaves them pending. - // Without ucp_request_cancel() here, an `await ep.close()` running - // alongside an outstanding `await ep.recv()` would hang forever. - // See test_shutdown.py::test_{server,client}_shutdown. - // - // Why atomic with FORCE close (not as a separate pre-callback): - // When cancelAll and FORCE close were separate pre-callbacks (the - // old cancelInflightRequestsBlocking path), a full ucp_worker_progress() - // ran between them. That intermediate progress could leave UCT-level - // TCP pending entries half-dispatched (mid-cuMemcpyAsync staging of - // a CUDA send); the next progress after FORCE close then crashed - // dispatching them on a freed staging buffer (uct_cuda_copy_ep_get_short - // -> cuMemcpyAsync -> SIGSEGV). Running them in a single pre-callback - // matches the safe single-threaded ordering proven by the regression - // test in cpp/tests/endpoint_close_force_tcp_cuda_race.cpp. if (!worker->registerGenericPre( - [this, &status, ¶m]() { - _inflightRequests->cancelAll(); - status = ucp_ep_close_nbx(_handle, ¶m); - }, - period)) + [this, &status, ¶m]() { status = ucp_ep_close_nbx(_handle, ¶m); }, period)) continue; submitted = true; } @@ -351,10 +326,6 @@ void Endpoint::closeBlocking(uint64_t period, uint64_t maxAttempts) _handle); } } else { - // No progress thread: cancel inflight + FORCE close back-to-back, then - // drive progress here. Same atomicity reasoning as the progress-thread - // path above (no ucp_worker_progress() between cancel and FORCE close). - _inflightRequests->cancelAll(); status = ucp_ep_close_nbx(_handle, ¶m); if (UCS_PTR_IS_PTR(status)) { ucs_status_t s; From 5502e7a0831f20b2b546d1c18245cc04f204a0ad Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 22:18:07 +0200 Subject: [PATCH 34/43] Cleanup Co-authored-by: Lawrence Mitchell --- cpp/include/ucxx/worker.h | 2 +- cpp/src/request.cpp | 12 ++++-------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/cpp/include/ucxx/worker.h b/cpp/include/ucxx/worker.h index 71e0c3fed..ba58b8ac2 100644 --- a/cpp/include/ucxx/worker.h +++ b/cpp/include/ucxx/worker.h @@ -505,7 +505,7 @@ class Worker : public Component { * * @returns `true` if request attributes querying is enabled, `false` otherwise. */ - [[nodiscard]] bool isRequestAttributesEnabled() const; + [[nodiscard]] bool isRequestAttributesEnabled() const noexcept; /** * @brief Populate the futures pool. diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 063b3d325..18f5b1fa5 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -236,8 +236,7 @@ void Request::setStatus(ucs_status_t status) } // Free the UCP request inside the lock so it is mutually exclusive with - // `publishRequest()`/`queryRequestAttributes()` on the submit thread. Clearing - // `_request` afterwards keeps this idempotent if `setStatus` ever re-enters. + // `publishRequest()`/`queryRequestAttributes()` on the submit thread. if (UCS_PTR_IS_PTR(_request)) { ucp_request_free(_request); _request = nullptr; @@ -256,17 +255,14 @@ void Request::queryRequestAttributes() ucp_request_attr_t result; - // Get the debug string size from worker attributes auto worker_attr = _worker->queryAttributes(); - // Allocate buffer for debug string with size from worker attributes std::vector debug_str(worker_attr.maxDebugString, '\0'); - result.field_mask = UCP_REQUEST_ATTR_FIELD_MEM_TYPE | // Memory type - UCP_REQUEST_ATTR_FIELD_INFO_STRING | // Debug string - UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; // Debug string size + result.field_mask = UCP_REQUEST_ATTR_FIELD_MEM_TYPE | + UCP_REQUEST_ATTR_FIELD_INFO_STRING | + UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; - // Set up the debug string buffer result.debug_string = debug_str.data(); result.debug_string_size = debug_str.size(); From 3716e5d6c0c75c35ab6dce8f9087e4357b2e27c8 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 13:19:19 -0700 Subject: [PATCH 35/43] Docstring cleanup --- cpp/include/ucxx/request.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index c9f3166cd..aae631e05 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -296,9 +296,8 @@ class Request : public Component { * same `_mutex`, so this helper guarantees the query and the free are mutually * exclusive and that there are no use-after-free in threaded progress modes. * - * Every submit site (all `request` methods from child classes and the AM - * rendezvous-receive path) calls this after obtaining the request handle from the - * corresponding `ucp_*_nbx` function. + * Every submit site calls this after obtaining the request handle from the corresponding + * `ucp_*_nbx` function. * * @param[in] request the UCP request pointer returned by a non-blocking submit. */ From d2a60b0d76a4498b00aced1ffedb4c4967e5f34b Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 20:32:48 +0000 Subject: [PATCH 36/43] Fix noexcept mismatch on Worker::isRequestAttributesEnabled --- cpp/src/worker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/worker.cpp b/cpp/src/worker.cpp index a3ebb03a7..14c3be57e 100644 --- a/cpp/src/worker.cpp +++ b/cpp/src/worker.cpp @@ -220,7 +220,7 @@ bool Worker::isDelayedRequestSubmissionEnabled() const bool Worker::isFutureEnabled() const { return _enableFuture; } -bool Worker::isRequestAttributesEnabled() const { return _enableRequestAttributes; } +bool Worker::isRequestAttributesEnabled() const noexcept { return _enableRequestAttributes; } void Worker::initBlockingProgressMode() { From 6cd44e5b090a3713b4ef2aca762bc38297f09df6 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 20:33:10 +0000 Subject: [PATCH 37/43] Use memoryType sentinel instead of _isRequestAttrValid flag --- cpp/include/ucxx/request.h | 5 +++-- cpp/src/request.cpp | 8 +++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index aae631e05..2d1adea7c 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -63,8 +63,9 @@ class Request : public Component { bool _enablePythonFuture{true}; ///< Whether Python future is enabled for this request RequestCallbackUserFunction _callback{nullptr}; ///< Completion callback RequestCallbackUserData _callbackData{nullptr}; ///< Completion callback data - Attributes _requestAttr{}; ///< Request attributes queried when request is posted - bool _isRequestAttrValid{false}; ///< Whether the request attributes are valid + Attributes _requestAttr{}; ///< Request attributes queried when request is posted; the + ///< default `memoryType == UCS_MEMORY_TYPE_UNKNOWN` doubles + ///< as the "not populated yet" sentinel /** * @brief Protected constructor of an abstract `ucxx::Request`. diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 18f5b1fa5..468855079 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -250,7 +250,7 @@ void Request::queryRequestAttributes() { std::lock_guard lock(_mutex); - if (_isRequestAttrValid) return; + if (_requestAttr.memoryType != UCS_MEMORY_TYPE_UNKNOWN) return; if (!_worker->isRequestAttributesEnabled()) return; ucp_request_attr_t result; @@ -259,8 +259,7 @@ void Request::queryRequestAttributes() std::vector debug_str(worker_attr.maxDebugString, '\0'); - result.field_mask = UCP_REQUEST_ATTR_FIELD_MEM_TYPE | - UCP_REQUEST_ATTR_FIELD_INFO_STRING | + result.field_mask = UCP_REQUEST_ATTR_FIELD_MEM_TYPE | UCP_REQUEST_ATTR_FIELD_INFO_STRING | UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; result.debug_string = debug_str.data(); @@ -271,7 +270,6 @@ void Request::queryRequestAttributes() if (queryStatus == UCS_OK && result.debug_string != nullptr) { _requestAttr.debugString = std::string(result.debug_string); _requestAttr.memoryType = result.mem_type; - _isRequestAttrValid = true; } } } @@ -287,7 +285,7 @@ Request::Attributes Request::queryAttributes() { std::lock_guard lock(_mutex); - if (_isRequestAttrValid) return _requestAttr; + if (_requestAttr.memoryType != UCS_MEMORY_TYPE_UNKNOWN) return _requestAttr; if (!_worker->isRequestAttributesEnabled()) throw ucxx::UnsupportedError( From a37d5e020bd0c5f886fba2acc669323988b8f673 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 20:34:48 +0000 Subject: [PATCH 38/43] Inline queryRequestAttributes into publishRequest --- cpp/include/ucxx/request.h | 14 -------------- cpp/src/request.cpp | 10 ++-------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/cpp/include/ucxx/request.h b/cpp/include/ucxx/request.h index 2d1adea7c..c718ebca4 100644 --- a/cpp/include/ucxx/request.h +++ b/cpp/include/ucxx/request.h @@ -274,20 +274,6 @@ class Request : public Component { [[nodiscard]] Attributes queryAttributes(); protected: - /** - * @brief Query the UCP request attributes. - * - * Helper method that queries the UCP request for its attributes using ucp_request_query. - * Currently queries for: - * - Request status - * - Memory type - * - Debug string - * - * Internal companion to `queryAttributes()`: this is the side that actually calls into - * UCP and populates the cached attributes; `queryAttributes()` only returns the cache. - */ - void queryRequestAttributes(); - /** * @brief Publish the UCP request handle and capture its attributes. * diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 468855079..fd8911271 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -246,9 +246,10 @@ void Request::setStatus(ucs_status_t status) const std::string& Request::getOwnerString() const { return _ownerString; } -void Request::queryRequestAttributes() +void Request::publishRequest(void* request) { std::lock_guard lock(_mutex); + _request = request; if (_requestAttr.memoryType != UCS_MEMORY_TYPE_UNKNOWN) return; if (!_worker->isRequestAttributesEnabled()) return; @@ -274,13 +275,6 @@ void Request::queryRequestAttributes() } } -void Request::publishRequest(void* request) -{ - std::lock_guard lock(_mutex); - _request = request; - queryRequestAttributes(); -} - Request::Attributes Request::queryAttributes() { std::lock_guard lock(_mutex); From 3e92f4ba1487f7499c85b815d572d91f502a9b9e Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 20:35:52 +0000 Subject: [PATCH 39/43] Move enabled-check before mutex in publishRequest and queryAttributes --- cpp/src/request.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index fd8911271..76b35e45f 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -248,11 +248,16 @@ const std::string& Request::getOwnerString() const { return _ownerString; } void Request::publishRequest(void* request) { + if (!_worker->isRequestAttributesEnabled()) { + std::lock_guard lock(_mutex); + _request = request; + return; + } + std::lock_guard lock(_mutex); _request = request; if (_requestAttr.memoryType != UCS_MEMORY_TYPE_UNKNOWN) return; - if (!_worker->isRequestAttributesEnabled()) return; ucp_request_attr_t result; @@ -277,15 +282,15 @@ void Request::publishRequest(void* request) Request::Attributes Request::queryAttributes() { - std::lock_guard lock(_mutex); - - if (_requestAttr.memoryType != UCS_MEMORY_TYPE_UNKNOWN) return _requestAttr; - if (!_worker->isRequestAttributesEnabled()) throw ucxx::UnsupportedError( "Request attributes querying is disabled on the owning worker; build the worker " "with `ucxx::experimental::WorkerBuilder::requestAttributes(true)` to enable it"); + std::lock_guard lock(_mutex); + + if (_requestAttr.memoryType != UCS_MEMORY_TYPE_UNKNOWN) return _requestAttr; + throw ucxx::NoElemError( "Request attributes are not available for this request: UCX took an inline-completion " "path with no queryable UCP request, or the request has not completed yet"); From a1a5c177f8fb64fae2a6f497c1e88fdf7d69453e Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 12 May 2026 20:37:36 +0000 Subject: [PATCH 40/43] Move debug string into cached attribute instead of copying --- cpp/src/request.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 76b35e45f..624d45422 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -3,11 +3,11 @@ * SPDX-License-Identifier: BSD-3-Clause */ #include +#include #include #include #include #include -#include #include @@ -263,18 +263,19 @@ void Request::publishRequest(void* request) auto worker_attr = _worker->queryAttributes(); - std::vector debug_str(worker_attr.maxDebugString, '\0'); + std::string debugString(worker_attr.maxDebugString, '\0'); result.field_mask = UCP_REQUEST_ATTR_FIELD_MEM_TYPE | UCP_REQUEST_ATTR_FIELD_INFO_STRING | UCP_REQUEST_ATTR_FIELD_INFO_STRING_SIZE; - result.debug_string = debug_str.data(); - result.debug_string_size = debug_str.size(); + result.debug_string = debugString.data(); + result.debug_string_size = debugString.size(); if (UCS_PTR_IS_PTR(_request)) { auto queryStatus = ucp_request_query(_request, &result); if (queryStatus == UCS_OK && result.debug_string != nullptr) { - _requestAttr.debugString = std::string(result.debug_string); + debug_str.resize(std::strlen(debugString.c_str())); + _requestAttr.debugString = std::move(debug_str); _requestAttr.memoryType = result.mem_type; } } From f1d488ddf092067f2fb0ca7888e27acf94362f25 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 13 May 2026 05:07:41 -0700 Subject: [PATCH 41/43] Fix outdated variable name --- cpp/src/request.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 624d45422..79d31a7f5 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -274,7 +274,7 @@ void Request::publishRequest(void* request) if (UCS_PTR_IS_PTR(_request)) { auto queryStatus = ucp_request_query(_request, &result); if (queryStatus == UCS_OK && result.debug_string != nullptr) { - debug_str.resize(std::strlen(debugString.c_str())); + debugString.resize(std::strlen(debugString.c_str())); _requestAttr.debugString = std::move(debug_str); _requestAttr.memoryType = result.mem_type; } From 0c35d5eff1d2a6f780984df8c11f4c30e1b42cdb Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 13 May 2026 05:43:32 -0700 Subject: [PATCH 42/43] More fixes --- cpp/src/request.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/request.cpp b/cpp/src/request.cpp index 79d31a7f5..05eb48f58 100644 --- a/cpp/src/request.cpp +++ b/cpp/src/request.cpp @@ -275,7 +275,7 @@ void Request::publishRequest(void* request) auto queryStatus = ucp_request_query(_request, &result); if (queryStatus == UCS_OK && result.debug_string != nullptr) { debugString.resize(std::strlen(debugString.c_str())); - _requestAttr.debugString = std::move(debug_str); + _requestAttr.debugString = std::move(debugString); _requestAttr.memoryType = result.mem_type; } } From 8dfa5aabfd93da5cc0bc8e19b757bff2bf53808a Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Wed, 13 May 2026 07:05:17 -0700 Subject: [PATCH 43/43] Increase C++ test timeout --- ci/run_cpp.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_cpp.sh b/ci/run_cpp.sh index 0d0a4ab14..bf725e8f6 100755 --- a/ci/run_cpp.sh +++ b/ci/run_cpp.sh @@ -45,7 +45,7 @@ else fi run_cpp_tests() { - CMD_LINE="python ${TIMEOUT_TOOL_PATH} $((10*60)) ${GTESTS_PATH}/UCXX_TEST" + CMD_LINE="python ${TIMEOUT_TOOL_PATH} $((20*60)) ${GTESTS_PATH}/UCXX_TEST" log_command "${CMD_LINE}" UCX_TCP_CM_REUSEADDR=y ${CMD_LINE}