From dfe14d7bd6b7092a735f3f8358a2fb65119d614e Mon Sep 17 00:00:00 2001 From: marcoSanti Date: Sun, 26 Apr 2026 13:20:01 +0200 Subject: [PATCH 01/13] Added Capio Discovery Service This commit adds the CAPIO discovery service which allows a CAPIO server instance to detect other running server instances. It also moves the CAPIO canary flag to be managed by the deiscovery service, and changes the CLI parsing logic accordingly to manage correctly the statup process. --- capio/common/shm.hpp | 1 + capio/server/capio_server.cpp | 6 +- capio/server/include/remote/backend.hpp | 6 + capio/server/include/remote/backend/mpi.hpp | 1 + capio/server/include/remote/backend/none.hpp | 1 + capio/server/include/remote/discovery.hpp | 60 ++++++++++ capio/server/include/utils/signals.hpp | 10 +- capio/server/src/discovery_service.cpp | 109 +++++++++++++++++++ capio/server/src/mpi_backend.cpp | 1 + capio/server/src/none_backend.cpp | 3 +- capio/server/src/shm_canary.cpp | 27 +++++ capio/tests/unit/server/src/capio_file.cpp | 1 + 12 files changed, 220 insertions(+), 6 deletions(-) create mode 100644 capio/server/include/remote/discovery.hpp create mode 100644 capio/server/src/discovery_service.cpp create mode 100644 capio/server/src/shm_canary.cpp diff --git a/capio/common/shm.hpp b/capio/common/shm.hpp index c425e080b..99c113289 100644 --- a/capio/common/shm.hpp +++ b/capio/common/shm.hpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/capio/server/capio_server.cpp b/capio/server/capio_server.cpp index 403ec8f49..5ee3f08cb 100644 --- a/capio/server/capio_server.cpp +++ b/capio/server/capio_server.cpp @@ -28,6 +28,7 @@ #include "common/requests.hpp" #include "common/semaphore.hpp" #include "remote/backend.hpp" +#include "remote/discovery.hpp" #include "storage/capio_file.hpp" #include "utils/common.hpp" #include "utils/env.hpp" @@ -36,6 +37,7 @@ ClientManager *client_manager; StorageManager *storage_manager; Backend *backend; +DiscoveryService *discovery_service; #include "handlers.hpp" #include "utils/cli_parser.hpp" @@ -140,13 +142,13 @@ int main(int argc, char **argv) { capio_cl_engine->print(); - backend = select_backend(configuration.backend_name, argc, argv); + discovery_service = new DiscoveryService(); + backend = select_backend(configuration.backend_name, argc, argv); START_LOG(gettid(), "call()"); open_files_location(); - shm_canary = new CapioShmCanary(capio_cl_engine->getWorkflowName()); storage_manager = new StorageManager(); client_manager = new ClientManager(); diff --git a/capio/server/include/remote/backend.hpp b/capio/server/include/remote/backend.hpp index f1c827821..50b336b73 100644 --- a/capio/server/include/remote/backend.hpp +++ b/capio/server/include/remote/backend.hpp @@ -87,6 +87,12 @@ class Backend { * @param target */ virtual void send_request(const char *message, int message_len, const std::string &target) = 0; + + /** + * Connect this server instance to a remote server instance + * @param target Remote server instance identification + */ + virtual void connect_to(const std::string &target) = 0; }; #endif // CAPIO_SERVER_REMOTE_BACKEND_HPP diff --git a/capio/server/include/remote/backend/mpi.hpp b/capio/server/include/remote/backend/mpi.hpp index fca12a752..be185f01e 100644 --- a/capio/server/include/remote/backend/mpi.hpp +++ b/capio/server/include/remote/backend/mpi.hpp @@ -27,6 +27,7 @@ class MPIBackend : public Backend { void send_file(char *shm, long int nbytes, const std::string &target) override; void send_request(const char *message, int message_len, const std::string &target) override; void recv_file(char *shm, const std::string &source, long int bytes_expected) override; + void connect_to(const std::string &target) override; }; class MPISYNCBackend final : public MPIBackend { diff --git a/capio/server/include/remote/backend/none.hpp b/capio/server/include/remote/backend/none.hpp index 3faae205f..c0cd2298a 100644 --- a/capio/server/include/remote/backend/none.hpp +++ b/capio/server/include/remote/backend/none.hpp @@ -11,5 +11,6 @@ class NoneBackend final : public Backend { void send_file(char *shm, long int nbytes, const std::string &target) override; void send_request(const char *message, int message_len, const std::string &target) override; void recv_file(char *shm, const std::string &source, long int bytes_expected) override; + void connect_to(const std::string &target) override; }; #endif // CAPIO_SERVER_REMOTE_BACKEND_NONE_HPP diff --git a/capio/server/include/remote/discovery.hpp b/capio/server/include/remote/discovery.hpp new file mode 100644 index 000000000..eefc95f90 --- /dev/null +++ b/capio/server/include/remote/discovery.hpp @@ -0,0 +1,60 @@ +#ifndef CAPIO_DISCOVERY_HPP +#define CAPIO_DISCOVERY_HPP + +#include "common/shm.hpp" +#include +#include + +class CapioShmCanary { + int _shm_id; + std::string _canary_name; + + public: + explicit CapioShmCanary(std::string capio_workflow_name); + ~CapioShmCanary(); +}; + +class DiscoveryService { + bool terminate = false; + + /// @brief Handle for thread listening for other server instances + std::thread *listener_thread = nullptr; + /// @brief Handle for thread advertising this server instance + std::thread *advertisement_thread = nullptr; + + /// @brief Token to be advertised by this server + std::string advertisement_token; + + /// @brief Canary variable to detect other server instances running locally that are logically + /// equivalent to the one starting up + CapioShmCanary *shm_canary; + + public: + DiscoveryService(); + ~DiscoveryService(); + + /** + * Set the token to be advertised so that other server instance may connect to this instance. + * Token needs to be provided by an instance of a backend, according to backend specification + * for incoming connection + * @param token + */ + void setAdvertisementToken(const std::string &token); + + /** + * Start to advertise the token, and to scan for tokens from other servers + * @param adv_delay Delay between each advertisement. + */ + void start(unsigned int adv_delay); + + /** + * Stop current server instance from advertising itself and from receiving advertisements from + * other server instances. + * + * NOTE: this method does not destroy the CAPIO canary variable. for that the destruction of the + * class instance is required. + */ + void stop(); +}; + +#endif // CAPIO_DISCOVERY_HPP \ No newline at end of file diff --git a/capio/server/include/utils/signals.hpp b/capio/server/include/utils/signals.hpp index 94660c0a1..0f711611d 100644 --- a/capio/server/include/utils/signals.hpp +++ b/capio/server/include/utils/signals.hpp @@ -4,6 +4,7 @@ #include #include "remote/backend.hpp" +#include "remote/discovery.hpp" #include "server_println.hpp" #ifdef CAPIO_COVERAGE @@ -23,19 +24,22 @@ void sig_term_handler(int signum, siginfo_t *info, void *ptr) { } // free all the memory used - + discovery_service->stop(); delete client_manager; delete storage_manager; server_println("data_buffers cleanup completed", CapioCLEngine::get().getWorkflowName(), CAPIO_LOG_SERVER_CLI_LEVEL_WARNING, __func__); + delete backend; + delete client_manager; + delete storage_manager; + #ifdef CAPIO_COVERAGE __gcov_dump(); #endif - delete backend; - delete shm_canary; + delete discovery_service; server_println("shutdown completed", CapioCLEngine::get().getWorkflowName(), CAPIO_LOG_SERVER_CLI_LEVEL_INFO, __func__); diff --git a/capio/server/src/discovery_service.cpp b/capio/server/src/discovery_service.cpp new file mode 100644 index 000000000..2a337819e --- /dev/null +++ b/capio/server/src/discovery_service.cpp @@ -0,0 +1,109 @@ +#include +#include + +#include "common/logger.hpp" +#include "remote/backend.hpp" +#include "remote/discovery.hpp" +#include "utils/capiocl_adapter.hpp" +#include "utils/common.hpp" + +extern Backend *backend; + +constexpr char CAPIO_MULTICAST_ADDRESS[] = "224.0.0.2"; +constexpr int CAPIO_MULTICAST_PORT = 22334; +int REUSE_MCAST_SOCKET = 1; + +void advertise(const bool *terminate, const unsigned int delay_ms, + const std::string &advertisement_token) { + const int advert_sock_fd = socket(AF_INET, SOCK_DGRAM, 0); + sockaddr_in advert_multicast_addr{}; + advert_multicast_addr.sin_family = AF_INET; + advert_multicast_addr.sin_port = htons(CAPIO_MULTICAST_PORT); + advert_multicast_addr.sin_addr.s_addr = inet_addr(CAPIO_MULTICAST_ADDRESS); + + while (!*terminate) { + std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms)); + sendto(advert_sock_fd, advertisement_token.data(), advertisement_token.size(), 0, + reinterpret_cast(&advert_multicast_addr), sizeof(advert_multicast_addr)); + } + + close(advert_sock_fd); +} + +void thread_discovery_service(const bool *terminate) { + START_LOG(gettid(), "call()"); + + int sockfd = socket(AF_INET, SOCK_DGRAM, 0); + + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &REUSE_MCAST_SOCKET, sizeof(REUSE_MCAST_SOCKET)); + + timeval tv{}; + tv.tv_sec = 0; + tv.tv_usec = 100000; // 100,000 microseconds = 100ms + setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + + sockaddr_in local_addr{}; + local_addr.sin_family = AF_INET; + local_addr.sin_port = htons(CAPIO_MULTICAST_PORT); + local_addr.sin_addr.s_addr = htonl(INADDR_ANY); + bind(sockfd, reinterpret_cast(&local_addr), sizeof(local_addr)); + + ip_mreq mreq{}; + mreq.imr_multiaddr.s_addr = inet_addr(CAPIO_MULTICAST_ADDRESS); + mreq.imr_interface.s_addr = htonl(INADDR_ANY); + setsockopt(sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); + + char incoming_token[2 * HOST_NAME_MAX] = {0}; + + while (!*terminate) { + + bzero(incoming_token, 2 * HOST_NAME_MAX); + + if (recvfrom(sockfd, incoming_token, sizeof(incoming_token) - 1, 0, nullptr, nullptr) > 0) { + backend->connect_to(incoming_token); + } + } + close(sockfd); +} + +void DiscoveryService::start(unsigned int adv_delay) { + if (advertisement_token.empty()) { + throw std::runtime_error("Advertisement token is empty"); + } + + listener_thread = new std::thread(thread_discovery_service, &terminate); + advertisement_thread = + new std::thread(advertise, &terminate, adv_delay, std::ref(advertisement_token)); + + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "DiscoveryService will advertise " + + advertisement_token + " every " + + std::to_string(adv_delay) + "ms."); +} +void DiscoveryService::stop() { + terminate = true; + + if (listener_thread != nullptr && listener_thread->joinable()) { + listener_thread->join(); + listener_thread = nullptr; + } + + if (listener_thread != nullptr && advertisement_thread->joinable()) { + advertisement_thread->join(); + advertisement_thread = nullptr; + } +} + +DiscoveryService::DiscoveryService() { + shm_canary = new CapioShmCanary(CapioCLEngine::get().getWorkflowName()); +} + +DiscoveryService::~DiscoveryService() { + if (!terminate) { + stop(); + } + delete shm_canary; +} + +void DiscoveryService::setAdvertisementToken(const std::string &token) { + this->advertisement_token = token; +} \ No newline at end of file diff --git a/capio/server/src/mpi_backend.cpp b/capio/server/src/mpi_backend.cpp index ead84a42c..00ebe0aec 100644 --- a/capio/server/src/mpi_backend.cpp +++ b/capio/server/src/mpi_backend.cpp @@ -119,6 +119,7 @@ void MPIBackend::recv_file(char *shm, const std::string &source, long int bytes_ LOG("Chunk size is %ld bytes", bytes_received); } } +void MPIBackend::connect_to(const std::string &target) { return; } MPISYNCBackend::MPISYNCBackend(int argc, char *argv[]) : MPIBackend(argc, argv) { START_LOG(gettid(), "call()"); diff --git a/capio/server/src/none_backend.cpp b/capio/server/src/none_backend.cpp index e0e8e3208..588d5d843 100644 --- a/capio/server/src/none_backend.cpp +++ b/capio/server/src/none_backend.cpp @@ -30,4 +30,5 @@ void NoneBackend::send_request(const char *message, const int message_len, void NoneBackend::recv_file(char *shm, const std::string &source, const long int bytes_expected) { START_LOG(gettid(), "call(shm=%ld, source=%s, bytes_expected=%ld)", shm, source.c_str(), bytes_expected); -} \ No newline at end of file +} +void NoneBackend::connect_to(const std::string &target) { return; } \ No newline at end of file diff --git a/capio/server/src/shm_canary.cpp b/capio/server/src/shm_canary.cpp new file mode 100644 index 000000000..1c9321880 --- /dev/null +++ b/capio/server/src/shm_canary.cpp @@ -0,0 +1,27 @@ +#include "common/env.hpp" +#include "common/logger.hpp" +#include "remote/discovery.hpp" +#include "utils/common.hpp" + +CapioShmCanary::CapioShmCanary(std::string capio_workflow_name) + : _canary_name(capio_workflow_name) { + START_LOG(capio_syscall(SYS_gettid), "call(capio_workflow_name: %s)", _canary_name.data()); + if (_canary_name.empty()) { + _canary_name = get_capio_workflow_name(); + } + _shm_id = shm_open(_canary_name.data(), O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + if (_shm_id == -1) { + + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_ERROR, + "Error: canary variable " + _canary_name + " already exists!"); + LOG(CAPIO_SHM_CANARY_ERROR, _canary_name.data()); + ERR_EXIT("ERR: shm canary flag already exists"); + } +} + +CapioShmCanary::~CapioShmCanary() { + START_LOG(capio_syscall(SYS_gettid), "call()"); + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_WARNING, "Removing shared memory canary flag"); + close(_shm_id); + SHM_DESTROY_CHECK(_canary_name.c_str()); +} diff --git a/capio/tests/unit/server/src/capio_file.cpp b/capio/tests/unit/server/src/capio_file.cpp index db7ead849..28095dfe9 100644 --- a/capio/tests/unit/server/src/capio_file.cpp +++ b/capio/tests/unit/server/src/capio_file.cpp @@ -338,6 +338,7 @@ class MockBackend : public Backend { RemoteRequest read_next_request() override { return {nullptr, ""}; } void send_file(char *shm, long int nbytes, const std::string &target) override {} void send_request(const char *message, int message_len, const std::string &target) override {} + void connect_to(const std::string &target) override {} }; class MockBackendTestFixture : public ::testing::Test { From 5f76798556caec6ac79a41442786befef9b2fe9b Mon Sep 17 00:00:00 2001 From: marcoSanti Date: Sun, 26 Apr 2026 14:08:14 +0200 Subject: [PATCH 02/13] Began work to add FS based service discovery --- .gitignore | 2 ++ capio/server/include/remote/discovery.hpp | 27 ++++++++++++++--------- capio/server/include/utils/shm_canary.hpp | 13 +++++++++++ capio/server/src/discovery_service.cpp | 15 +++++++++++++ capio/server/src/shm_canary.cpp | 5 +++-- 5 files changed, 49 insertions(+), 13 deletions(-) create mode 100644 capio/server/include/utils/shm_canary.hpp diff --git a/.gitignore b/.gitignore index d29456e74..20776d91a 100644 --- a/.gitignore +++ b/.gitignore @@ -48,4 +48,6 @@ capio_logs debug build +.capio_tokens + cmake_test_discovery*.json diff --git a/capio/server/include/remote/discovery.hpp b/capio/server/include/remote/discovery.hpp index eefc95f90..6b8630844 100644 --- a/capio/server/include/remote/discovery.hpp +++ b/capio/server/include/remote/discovery.hpp @@ -1,20 +1,14 @@ #ifndef CAPIO_DISCOVERY_HPP #define CAPIO_DISCOVERY_HPP -#include "common/shm.hpp" #include #include -class CapioShmCanary { - int _shm_id; - std::string _canary_name; - - public: - explicit CapioShmCanary(std::string capio_workflow_name); - ~CapioShmCanary(); -}; +#include "utils/shm_canary.hpp" class DiscoveryService { + + /// @brief Variable used to signal termination to child threads bool terminate = false; /// @brief Handle for thread listening for other server instances @@ -29,20 +23,31 @@ class DiscoveryService { /// equivalent to the one starting up CapioShmCanary *shm_canary; + std::filesystem::path token_directory_path = ".capio_tokens/"; + std::filesystem::path token_filename; + public: + /// @brief Default constructor DiscoveryService(); + + /// @brief Default destructor ~DiscoveryService(); /** * Set the token to be advertised so that other server instance may connect to this instance. * Token needs to be provided by an instance of a backend, according to backend specification - * for incoming connection + * for incoming connection. + * + * Once the token is set, a new hidden file with the current token is stored within a hidden + * directory. * @param token */ void setAdvertisementToken(const std::string &token); /** - * Start to advertise the token, and to scan for tokens from other servers + * Start to advertise the token, and to scan for tokens from other servers. Advertisement works + * by sending multicast traffic, and by scanning files contained within the hidden directory + * with aliveness tokens. * @param adv_delay Delay between each advertisement. */ void start(unsigned int adv_delay); diff --git a/capio/server/include/utils/shm_canary.hpp b/capio/server/include/utils/shm_canary.hpp new file mode 100644 index 000000000..765ab7c4c --- /dev/null +++ b/capio/server/include/utils/shm_canary.hpp @@ -0,0 +1,13 @@ +#ifndef CAPIO_SHM_CANARY_HPP +#define CAPIO_SHM_CANARY_HPP +#include + +class CapioShmCanary { + int _shm_id; + std::string _canary_name; + + public: + explicit CapioShmCanary(const std::string &capio_workflow_name); + ~CapioShmCanary(); +}; +#endif // CAPIO_SHM_CANARY_HPP diff --git a/capio/server/src/discovery_service.cpp b/capio/server/src/discovery_service.cpp index 2a337819e..d90dc6366 100644 --- a/capio/server/src/discovery_service.cpp +++ b/capio/server/src/discovery_service.cpp @@ -95,15 +95,30 @@ void DiscoveryService::stop() { DiscoveryService::DiscoveryService() { shm_canary = new CapioShmCanary(CapioCLEngine::get().getWorkflowName()); + + if (!std::filesystem::exists(token_directory_path)) { + std::filesystem::create_directory(token_directory_path); + } + + std::string node_name(HOST_NAME_MAX, '\0'); + gethostname(node_name.data(), node_name.size()); + node_name.resize(strlen(node_name.data())); + + token_filename = node_name + ".capio"; } DiscoveryService::~DiscoveryService() { if (!terminate) { stop(); } + std::filesystem::remove(token_directory_path / token_filename); delete shm_canary; } void DiscoveryService::setAdvertisementToken(const std::string &token) { this->advertisement_token = token; + + std::ofstream token_file(token_directory_path / token_filename); + token_file << token; + token_file.close(); } \ No newline at end of file diff --git a/capio/server/src/shm_canary.cpp b/capio/server/src/shm_canary.cpp index 1c9321880..4c0bdceab 100644 --- a/capio/server/src/shm_canary.cpp +++ b/capio/server/src/shm_canary.cpp @@ -1,9 +1,10 @@ +#include "utils/shm_canary.hpp" + #include "common/env.hpp" #include "common/logger.hpp" -#include "remote/discovery.hpp" #include "utils/common.hpp" -CapioShmCanary::CapioShmCanary(std::string capio_workflow_name) +CapioShmCanary::CapioShmCanary(const std::string &capio_workflow_name) : _canary_name(capio_workflow_name) { START_LOG(capio_syscall(SYS_gettid), "call(capio_workflow_name: %s)", _canary_name.data()); if (_canary_name.empty()) { From 58b3585de9ab76fe2e0240d85f9b00b752ab25f1 Mon Sep 17 00:00:00 2001 From: marcoSanti Date: Sun, 26 Apr 2026 14:27:06 +0200 Subject: [PATCH 03/13] Added FS based discovery service --- capio/server/include/remote/discovery.hpp | 8 +++-- capio/server/src/discovery_service.cpp | 38 +++++++++++++++++++---- 2 files changed, 37 insertions(+), 9 deletions(-) diff --git a/capio/server/include/remote/discovery.hpp b/capio/server/include/remote/discovery.hpp index 6b8630844..9fe6b7d4e 100644 --- a/capio/server/include/remote/discovery.hpp +++ b/capio/server/include/remote/discovery.hpp @@ -11,10 +11,12 @@ class DiscoveryService { /// @brief Variable used to signal termination to child threads bool terminate = false; - /// @brief Handle for thread listening for other server instances - std::thread *listener_thread = nullptr; + /// @brief Handle for multicast based discovery thread + std::thread *mcast_listener_thread = nullptr; + /// @brief Handle for file system based discovery thread + std::thread *fs_listener_thread = nullptr; /// @brief Handle for thread advertising this server instance - std::thread *advertisement_thread = nullptr; + std::thread *advertisement_thread = nullptr; /// @brief Token to be advertised by this server std::string advertisement_token; diff --git a/capio/server/src/discovery_service.cpp b/capio/server/src/discovery_service.cpp index d90dc6366..753c35fd4 100644 --- a/capio/server/src/discovery_service.cpp +++ b/capio/server/src/discovery_service.cpp @@ -30,7 +30,7 @@ void advertise(const bool *terminate, const unsigned int delay_ms, close(advert_sock_fd); } -void thread_discovery_service(const bool *terminate) { +void mcast_thread_discovery_service(const bool *terminate) { START_LOG(gettid(), "call()"); int sockfd = socket(AF_INET, SOCK_DGRAM, 0); @@ -66,12 +66,33 @@ void thread_discovery_service(const bool *terminate) { close(sockfd); } +void fs_discovery_service(const bool *terminate, const std::filesystem::path &token_directory_path, + const unsigned int delay_ms) { + std::vector found_paths; + + while (!*terminate) { + for (auto &entry : std::filesystem::directory_iterator(token_directory_path)) { + if (std::find(found_paths.begin(), found_paths.end(), entry.path().string()) == + found_paths.end()) { + found_paths.push_back(entry.path().string()); + std::ifstream input(entry.path()); + std::string token; + input >> token; + backend->connect_to(token); + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms)); + } +} + void DiscoveryService::start(unsigned int adv_delay) { if (advertisement_token.empty()) { throw std::runtime_error("Advertisement token is empty"); } - listener_thread = new std::thread(thread_discovery_service, &terminate); + mcast_listener_thread = new std::thread(mcast_thread_discovery_service, &terminate); + fs_listener_thread = + new std::thread(fs_discovery_service, &terminate, token_directory_path, adv_delay); advertisement_thread = new std::thread(advertise, &terminate, adv_delay, std::ref(advertisement_token)); @@ -82,12 +103,17 @@ void DiscoveryService::start(unsigned int adv_delay) { void DiscoveryService::stop() { terminate = true; - if (listener_thread != nullptr && listener_thread->joinable()) { - listener_thread->join(); - listener_thread = nullptr; + if (mcast_listener_thread != nullptr && mcast_listener_thread->joinable()) { + mcast_listener_thread->join(); + mcast_listener_thread = nullptr; + } + + if (fs_listener_thread != nullptr && fs_listener_thread->joinable()) { + fs_listener_thread->join(); + fs_listener_thread = nullptr; } - if (listener_thread != nullptr && advertisement_thread->joinable()) { + if (advertisement_thread != nullptr && advertisement_thread->joinable()) { advertisement_thread->join(); advertisement_thread = nullptr; } From 0f64ed8c0636b03d03ad9bfd6065061a4addcba4 Mon Sep 17 00:00:00 2001 From: marcoSanti Date: Mon, 27 Apr 2026 11:41:05 +0200 Subject: [PATCH 04/13] Cleanup and comments --- capio/common/constants.hpp | 4 + capio/common/shm.hpp | 43 --------- capio/server/include/remote/discovery.hpp | 57 ++++++++---- capio/server/src/discovery_service.cpp | 104 ++++++++++++++-------- 4 files changed, 109 insertions(+), 99 deletions(-) diff --git a/capio/common/constants.hpp b/capio/common/constants.hpp index 3a230f4bc..1c5e8d66e 100644 --- a/capio/common/constants.hpp +++ b/capio/common/constants.hpp @@ -11,6 +11,10 @@ constexpr size_t CAPIO_DEFAULT_DIR_INITIAL_SIZE = 1024L * 1024 * 1024; constexpr off64_t CAPIO_DEFAULT_FILE_INITIAL_SIZE = 1024L * 1024 * 1024 * 4; +// CAPIO backend constants +constexpr char CAPIO_MCAST_ADV_DEFAULT_ADDR[] = "224.0.0.2"; +constexpr unsigned int CAPIO_MCAST_ADV_DEFAULT_PORT = 22334; + // CAPIO default values for shared memory constexpr char CAPIO_DEFAULT_WORKFLOW_NAME[] = "CAPIO"; constexpr char CAPIO_DEFAULT_APP_NAME[] = "default_app"; diff --git a/capio/common/shm.hpp b/capio/common/shm.hpp index 99c113289..7f0c0515d 100644 --- a/capio/common/shm.hpp +++ b/capio/common/shm.hpp @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -44,48 +43,6 @@ #endif -class CapioShmCanary { - int _shm_id; - std::string _canary_name; - - public: - explicit CapioShmCanary(std::string capio_workflow_name) : _canary_name(capio_workflow_name) { - START_LOG(capio_syscall(SYS_gettid), "call(capio_workflow_name: %s)", _canary_name.data()); - if (_canary_name.empty()) { - _canary_name = get_capio_workflow_name(); - } - _shm_id = shm_open(_canary_name.data(), O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); - if (_shm_id == -1) { - LOG(CAPIO_SHM_CANARY_ERROR, _canary_name.data()); -#ifndef __CAPIO_POSIX - const auto message = new char[strlen(CAPIO_SHM_CANARY_ERROR)]; - sprintf(message, CAPIO_SHM_CANARY_ERROR, _canary_name.data()); - server_println(message, capio_workflow_name, CAPIO_LOG_SERVER_CLI_LEVEL_ERROR, - "CapioShmCanary"); - delete[] message; -#endif - ERR_EXIT("ERR: shm canary flag already exists"); - } -#ifndef __CAPIO_POSIX - server_println("Created Capio SHM canary: " + _canary_name, capio_workflow_name, - CAPIO_LOG_SERVER_CLI_LEVEL_STATUS, "CapioShmCanary"); -#endif - }; - - ~CapioShmCanary() { - START_LOG(capio_syscall(SYS_gettid), "call()"); -#ifndef __CAPIO_POSIX - server_println("Removing shared memory canary flag", get_capio_workflow_name(), - CAPIO_LOG_SERVER_CLI_LEVEL_WARNING, "CapioShmCanary"); -#endif - close(_shm_id); - SHM_DESTROY_CHECK(_canary_name.c_str()); - } -}; - -// FIXME: Remove the inline specifier by using extern -inline CapioShmCanary *shm_canary; - inline void *create_shm(const std::string &shm_name, const long int size) { START_LOG(capio_syscall(SYS_gettid), "call(shm_name=%s, size=%ld)", shm_name.c_str(), size); diff --git a/capio/server/include/remote/discovery.hpp b/capio/server/include/remote/discovery.hpp index 9fe6b7d4e..4454a808b 100644 --- a/capio/server/include/remote/discovery.hpp +++ b/capio/server/include/remote/discovery.hpp @@ -6,6 +6,13 @@ #include "utils/shm_canary.hpp" +/** + * Discovery service. Responsible for: + * - Detect other server instances running in the same node with the same workflow name (and halts + * startup if it finds one) + * - Detect other remote running server instances of capio servers and issue commands to the backend + * to open a connection with them as soon as they are found. + */ class DiscoveryService { /// @brief Variable used to signal termination to child threads @@ -25,34 +32,48 @@ class DiscoveryService { /// equivalent to the one starting up CapioShmCanary *shm_canary; - std::filesystem::path token_directory_path = ".capio_tokens/"; + /// @brief Directory to look into for CAPIO tokens + std::filesystem::path token_directory_path; + /// @brief This server instance token filename std::filesystem::path token_filename; + /// @brief Multicast address + const std::string capio_multicast_adv_address; + + /// @brief multicast port + const unsigned int capio_multicast_adv_port; + public: - /// @brief Default constructor - DiscoveryService(); + /** + * Construct a new Discovery Service class + * @param mcast_addr Address to send and receive aliveness token from other servers + * @param mcast_port Port to send and receive aliveness token from other servers + */ + explicit DiscoveryService(const std::string &mcast_addr = CAPIO_MCAST_ADV_DEFAULT_ADDR, + unsigned int mcast_port = CAPIO_MCAST_ADV_DEFAULT_PORT); /// @brief Default destructor ~DiscoveryService(); /** - * Set the token to be advertised so that other server instance may connect to this instance. - * Token needs to be provided by an instance of a backend, according to backend specification - * for incoming connection. + * @brief Configures and starts the discovery service to advertise and scan for tokens. * - * Once the token is set, a new hidden file with the current token is stored within a hidden - * directory. - * @param token - */ - void setAdvertisementToken(const std::string &token); - - /** - * Start to advertise the token, and to scan for tokens from other servers. Advertisement works - * by sending multicast traffic, and by scanning files contained within the hidden directory - * with aliveness tokens. - * @param adv_delay Delay between each advertisement. + * Sets the advertisement token used by other server instances to establish a connection. + * The token must conform to the specific backend requirements for incoming connections. + * * @note The token is not passed via the constructor because the Discovery Service + * must be instantiated before the Backend provides the token. + * + * Once called, this method: + * 1. Stores the current token in a hidden file within a designated directory. + * 2. Initiates multicast traffic to advertise the local token. + * 3. Scans the hidden directory for aliveness tokens from other servers. + * + * @param adv_delay The interval (in milliseconds/seconds) between advertisement broadcasts. + * @param token The authentication or identification string provided by the backend. + * @param token_directory directory to store capio aliveness tokens */ - void start(unsigned int adv_delay); + void start(unsigned int adv_delay, const std::string &token, + const std::string &token_directory = ".capio_tokens/"); /** * Stop current server instance from advertising itself and from receiving advertisements from diff --git a/capio/server/src/discovery_service.cpp b/capio/server/src/discovery_service.cpp index 753c35fd4..5bf856f70 100644 --- a/capio/server/src/discovery_service.cpp +++ b/capio/server/src/discovery_service.cpp @@ -9,17 +9,17 @@ extern Backend *backend; -constexpr char CAPIO_MULTICAST_ADDRESS[] = "224.0.0.2"; -constexpr int CAPIO_MULTICAST_PORT = 22334; -int REUSE_MCAST_SOCKET = 1; +// constant required by setsockopt() +int REUSE_MCAST_SOCKET = 1; void advertise(const bool *terminate, const unsigned int delay_ms, - const std::string &advertisement_token) { + const std::string &advertisement_token, const std::string &adv_addr, + const unsigned int adv_port) { const int advert_sock_fd = socket(AF_INET, SOCK_DGRAM, 0); sockaddr_in advert_multicast_addr{}; advert_multicast_addr.sin_family = AF_INET; - advert_multicast_addr.sin_port = htons(CAPIO_MULTICAST_PORT); - advert_multicast_addr.sin_addr.s_addr = inet_addr(CAPIO_MULTICAST_ADDRESS); + advert_multicast_addr.sin_port = htons(adv_port); + advert_multicast_addr.sin_addr.s_addr = inet_addr(adv_addr.c_str()); while (!*terminate) { std::this_thread::sleep_for(std::chrono::milliseconds(delay_ms)); @@ -30,7 +30,8 @@ void advertise(const bool *terminate, const unsigned int delay_ms, close(advert_sock_fd); } -void mcast_thread_discovery_service(const bool *terminate) { +void mcast_thread_discovery_service(const bool *terminate, const std::string &adv_addr, + const unsigned int adv_port) { START_LOG(gettid(), "call()"); int sockfd = socket(AF_INET, SOCK_DGRAM, 0); @@ -44,12 +45,18 @@ void mcast_thread_discovery_service(const bool *terminate) { sockaddr_in local_addr{}; local_addr.sin_family = AF_INET; - local_addr.sin_port = htons(CAPIO_MULTICAST_PORT); + local_addr.sin_port = htons(adv_port); local_addr.sin_addr.s_addr = htonl(INADDR_ANY); - bind(sockfd, reinterpret_cast(&local_addr), sizeof(local_addr)); + if (bind(sockfd, reinterpret_cast(&local_addr), sizeof(local_addr)) == -1) { + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_ERROR, + "Error: unable to bind to multicast socket. Error is: " + + std::string(std::strerror(errno))); + // halt execution and return + return; + } ip_mreq mreq{}; - mreq.imr_multiaddr.s_addr = inet_addr(CAPIO_MULTICAST_ADDRESS); + mreq.imr_multiaddr.s_addr = inet_addr(adv_addr.c_str()); mreq.imr_interface.s_addr = htonl(INADDR_ANY); setsockopt(sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); @@ -68,16 +75,26 @@ void mcast_thread_discovery_service(const bool *terminate) { void fs_discovery_service(const bool *terminate, const std::filesystem::path &token_directory_path, const unsigned int delay_ms) { - std::vector found_paths; + // local cache to not reload tokens already found + // TODO: relax this by storing also last modified date, and reload in case changes occurred + // after first read + + std::vector cache; while (!*terminate) { - for (auto &entry : std::filesystem::directory_iterator(token_directory_path)) { - if (std::find(found_paths.begin(), found_paths.end(), entry.path().string()) == - found_paths.end()) { - found_paths.push_back(entry.path().string()); + const auto iterator = std::filesystem::directory_iterator(token_directory_path); + for (auto &entry : iterator) { + if (std::find(cache.begin(), cache.end(), entry.path()) == cache.end()) { + cache.push_back(entry.path()); + + // Read connection token from FS std::ifstream input(entry.path()); std::string token; input >> token; + + // Send token to backend to issue a direct connection. + // NOTE: backend will refuse to connect silently if connection is already + // established backend->connect_to(token); } } @@ -85,16 +102,40 @@ void fs_discovery_service(const bool *terminate, const std::filesystem::path &to } } -void DiscoveryService::start(unsigned int adv_delay) { - if (advertisement_token.empty()) { +void DiscoveryService::start(unsigned int adv_delay, const std::string &token, + const std::string &token_directory) { + + if (token.empty()) { throw std::runtime_error("Advertisement token is empty"); } - mcast_listener_thread = new std::thread(mcast_thread_discovery_service, &terminate); + if (token_directory.empty()) { + throw std::runtime_error("Provided token directory is empty"); + } + + if (!std::filesystem::exists(token_directory)) { + std::filesystem::create_directory(token_directory); + } + + std::string node_name(HOST_NAME_MAX, '\0'); + gethostname(node_name.data(), node_name.size()); + node_name.resize(strlen(node_name.data())); + + token_directory_path = token_directory; + token_filename = node_name + ".capio"; + advertisement_token = token; + + std::ofstream token_file(token_directory_path / token_filename); + token_file << advertisement_token; + token_file.close(); + + mcast_listener_thread = new std::thread(mcast_thread_discovery_service, &terminate, + capio_multicast_adv_address, capio_multicast_adv_port); fs_listener_thread = new std::thread(fs_discovery_service, &terminate, token_directory_path, adv_delay); advertisement_thread = - new std::thread(advertise, &terminate, adv_delay, std::ref(advertisement_token)); + new std::thread(advertise, &terminate, adv_delay, std::ref(advertisement_token), + capio_multicast_adv_address, capio_multicast_adv_port); server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "DiscoveryService will advertise " + advertisement_token + " every " + @@ -119,32 +160,19 @@ void DiscoveryService::stop() { } } -DiscoveryService::DiscoveryService() { +DiscoveryService::DiscoveryService(const std::string &mcast_addr, const unsigned int mcast_port) + : capio_multicast_adv_address(mcast_addr), capio_multicast_adv_port(mcast_port) { shm_canary = new CapioShmCanary(CapioCLEngine::get().getWorkflowName()); - - if (!std::filesystem::exists(token_directory_path)) { - std::filesystem::create_directory(token_directory_path); - } - - std::string node_name(HOST_NAME_MAX, '\0'); - gethostname(node_name.data(), node_name.size()); - node_name.resize(strlen(node_name.data())); - - token_filename = node_name + ".capio"; } DiscoveryService::~DiscoveryService() { + // if destructor is called before stop(), then stop the the service first. if (!terminate) { stop(); } + // delete aliveness token std::filesystem::remove(token_directory_path / token_filename); - delete shm_canary; -} - -void DiscoveryService::setAdvertisementToken(const std::string &token) { - this->advertisement_token = token; - std::ofstream token_file(token_directory_path / token_filename); - token_file << token; - token_file.close(); + // delete shm canary + delete shm_canary; } \ No newline at end of file From 9dfa4bd93bfe4f3bfb819cf93321638f64309f96 Mon Sep 17 00:00:00 2001 From: = <=> Date: Mon, 23 Mar 2026 10:24:50 +0000 Subject: [PATCH 05/13] Added NoBackend Added NoBackend as default CAPIO backend Split backend declaration and implementation minor fixes Fixes to tests Fixes to no_backend test fix const refs fix to segfault Messages Messages bugfix format format Final classes --- capio/server/include/remote/backend/include.hpp | 1 + capio/tests/unit/server/src/main.cpp | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/capio/server/include/remote/backend/include.hpp b/capio/server/include/remote/backend/include.hpp index 5518624cf..6dffef70d 100644 --- a/capio/server/include/remote/backend/include.hpp +++ b/capio/server/include/remote/backend/include.hpp @@ -6,4 +6,5 @@ #include "mpi.hpp" #include "none.hpp" + #endif // CAPIO_SERVER_REMOTE_BACKEND_INCLUDE_HPP diff --git a/capio/tests/unit/server/src/main.cpp b/capio/tests/unit/server/src/main.cpp index 28cb277e4..3a3d44610 100644 --- a/capio/tests/unit/server/src/main.cpp +++ b/capio/tests/unit/server/src/main.cpp @@ -3,6 +3,7 @@ #include "capiocl.hpp" #include "capiocl/engine.h" #include "client-manager/client_manager.hpp" +#include "remote/backend/none.hpp" #include "storage/manager.hpp" #include "utils/capiocl_adapter.hpp" #include "utils/location.hpp" @@ -22,12 +23,16 @@ class ServerUnitTestEnvironment : public testing::Environment { capio_cl_engine = new capiocl::engine::Engine(false); client_manager = new ClientManager(); storage_manager = new StorageManager(); + backend = new NoneBackend(0, nullptr); + + open_files_location(); } void TearDown() override { delete storage_manager; delete client_manager; delete capio_cl_engine; + delete backend; } }; From 612ef1d280e76a448c28cd776769c6d51ccc2e3a Mon Sep 17 00:00:00 2001 From: = <=> Date: Tue, 24 Mar 2026 11:53:15 +0000 Subject: [PATCH 06/13] Dedicated test fixture with MockBackend to avoid strange pointer arithmetics WIP: MTCL backend WIP backend WIP: mtcl backend Fixed handshake First alpha implementation --- CMakeLists.txt | 6 + capio/common/requests.hpp | 2 + capio/server/CMakeLists.txt | 3 +- capio/server/capio_server.cpp | 1 - .../server/include/remote/backend/include.hpp | 1 + capio/server/include/remote/backend/mtcl.hpp | 149 +++++++ capio/server/include/remote/listener.hpp | 7 + capio/server/src/mtcl_backend.cpp | 363 ++++++++++++++++++ capio/tests/unit/server/src/main.cpp | 1 - cmake_test_discovery_310bb1f06a.json | 357 +++++++++++++++++ cmake_test_discovery_8e6c96c2a6.json | 17 + cmake_test_discovery_ad21613777.json | 42 ++ cmake_test_discovery_d1ab4fe680.json | 215 +++++++++++ docker-compose.yml | 49 +++ 14 files changed, 1210 insertions(+), 3 deletions(-) create mode 100644 capio/server/include/remote/backend/mtcl.hpp create mode 100644 capio/server/src/mtcl_backend.cpp create mode 100644 cmake_test_discovery_310bb1f06a.json create mode 100644 cmake_test_discovery_8e6c96c2a6.json create mode 100644 cmake_test_discovery_ad21613777.json create mode 100644 cmake_test_discovery_d1ab4fe680.json create mode 100644 docker-compose.yml diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a930e879..d7dd6ce33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,6 +81,12 @@ FetchContent_Declare( GIT_TAG v1.4.0 ) +FetchContent_Declare( + mtcl + GIT_REPOSITORY https://github.com/ParaGroup/MTCL + GIT_TAG e5f2bfeea0fc3d704554c7df02e7857f0a00bbba +) + ##################################### # Targets ##################################### diff --git a/capio/common/requests.hpp b/capio/common/requests.hpp index 2ae70083a..e73f8a8c6 100644 --- a/capio/common/requests.hpp +++ b/capio/common/requests.hpp @@ -37,4 +37,6 @@ constexpr const int CAPIO_SERVER_REQUEST_STAT_REPLY = 3; constexpr const int CAPIO_SERVER_NR_REQUEST = 4; +constexpr const int BACKEND_HAVE_FINISH_SEND_REQUEST = 4; + #endif // CAPIO_COMMON_REQUESTS_HPP diff --git a/capio/server/CMakeLists.txt b/capio/server/CMakeLists.txt index 701789550..acf5a732e 100644 --- a/capio/server/CMakeLists.txt +++ b/capio/server/CMakeLists.txt @@ -19,7 +19,7 @@ FetchContent_Declare( set(ARGS_BUILD_EXAMPLE OFF CACHE INTERNAL "") set(ARGS_BUILD_UNITTESTS OFF CACHE INTERNAL "") -FetchContent_MakeAvailable(args capio_cl) +FetchContent_MakeAvailable(args capio_cl mtcl) ##################################### # Target definition @@ -38,6 +38,7 @@ target_include_directories(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include ${MPI_INCLUDE_PATH} ${capio_cl_SOURCE_DIR} + ${mtcl_SOURCE_DIR}/include ) ##################################### diff --git a/capio/server/capio_server.cpp b/capio/server/capio_server.cpp index 5ee3f08cb..782697582 100644 --- a/capio/server/capio_server.cpp +++ b/capio/server/capio_server.cpp @@ -161,6 +161,5 @@ int main(int argc, char **argv) { server_thread.join(); remote_listener_thread.join(); - delete backend; return 0; } \ No newline at end of file diff --git a/capio/server/include/remote/backend/include.hpp b/capio/server/include/remote/backend/include.hpp index 6dffef70d..7f04735c4 100644 --- a/capio/server/include/remote/backend/include.hpp +++ b/capio/server/include/remote/backend/include.hpp @@ -5,6 +5,7 @@ */ #include "mpi.hpp" +#include "mtcl.hpp" #include "none.hpp" #endif // CAPIO_SERVER_REMOTE_BACKEND_INCLUDE_HPP diff --git a/capio/server/include/remote/backend/mtcl.hpp b/capio/server/include/remote/backend/mtcl.hpp new file mode 100644 index 000000000..6d9b1c031 --- /dev/null +++ b/capio/server/include/remote/backend/mtcl.hpp @@ -0,0 +1,149 @@ +#ifndef MTCL_BACKEND_HPP +#define MTCL_BACKEND_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/constants.hpp" +#include "common/logger.hpp" +#include "remote/backend.hpp" + +typedef unsigned long long int capio_off64_t; + +template class AtomicQueue { + // data, sizeof(data), hostname + std::queue> _queue; + std::mutex _mutex; + std::condition_variable _lock_cond; + + bool _shutdown = false; + + public: + ~AtomicQueue() { + { + std::lock_guard lg(_mutex); + _shutdown = true; + } + _lock_cond.notify_all(); + } + + void push(T message, size_t message_size, const std::string &origin) { + { + std::lock_guard lg(_mutex); + if (_shutdown) { + return; + } + _queue.emplace(message, message_size, origin); + } + _lock_cond.notify_all(); + } + + std::tuple pop() { + std::unique_lock lock(_mutex); + _lock_cond.wait(lock, [this] { return !_queue.empty() || _shutdown; }); + auto s = std::move(_queue.front()); + _queue.pop(); + + return s; + } + + std::optional> try_pop() { + std::lock_guard lg(_mutex); + if (_queue.empty() || _shutdown) { + return std::nullopt; + } + + auto s = std::move(_queue.front()); + _queue.pop(); + return s; + } +}; + +/** + * This avoids it to include the MTCL library here as it is a header-only library. + * this is equivalent to use extern in C but for class + */ +namespace MTCL { +class HandleUser; +} + +// TODO: extend backend class +class MTCLBackend : public Backend { + + std::string selfToken, connectedHostname, ownPort, usedProtocol; + std::unordered_map *> open_connections; + std::string ownHostname; + int thread_sleep_times = 0; + bool *continue_execution = new bool; + std::mutex *_guard; + std::thread *incoming_MTCL_connection_listener_thread = nullptr; + std::thread *incoming_UDP_connection_listener_thread = nullptr; + std::vector connection_threads; + bool *terminate; + + AtomicQueue incoming_request_queue; + + /** + * Waits for incoming new requests to connect to new server instances. When a new request + * arrives, it then handshakes with the remote servers, opening a new connection, and starting a + * new thread that will handle remote requests. If no request arrives within the sleep_time + * parameter, then the method will issue an advertisement on UDP multicast of its alive state + * so that other servers may instantiate a new connection with me. + * + * @param ownHostname + * @param ownPort + * @param usedProtocol + * @param continue_execution + * @param sleep_time + * @param open_connections + * @param guard + * @param _connection_threads + * @param terminate + */ + void static incomingMTCLConnectionListener( + const std::string &ownHostname, const std::string &ownPort, const std::string &usedProtocol, + const bool *continue_execution, int sleep_time, + std::unordered_map *> *open_connections, + std::mutex *guard, std::vector *_connection_threads, bool *terminate, + AtomicQueue *incoming_request_queue); + + /** + * Initiate a new MTCL connection with "out of band" communication trough multicast + * advertisement. when a multicast advertisement is received, start the MTCL handshake with the + * remote server instance. + */ + static void incomingUDPConnectionListener( + bool *terminate, const std::string &ownHostname, std::string ownPort, + std::string usedProtocol, + std::unordered_map *> *open_connections, + std::vector *connection_threads, int thread_sleep_time, + AtomicQueue *incoming_request_queue, std::mutex *_guard); + + public: + explicit MTCLBackend(const std::string &proto, const std::string &port, int sleep_time); + + ~MTCLBackend() override; + + RemoteRequest read_next_request() override; + + void handshake_servers() override; + + const std::set get_nodes() override; + + void send_request(const char *message, int message_len, const std::string &target) override; + + void send_file(char *shm, long int nbytes, const std::string &target) override; + + void recv_file(char *shm, const std::string &source, long int bytes_expected) override; +}; + +#endif // MTCL_BACKEND_HPP \ No newline at end of file diff --git a/capio/server/include/remote/listener.hpp b/capio/server/include/remote/listener.hpp index 747c20efc..cb96ec6d9 100644 --- a/capio/server/include/remote/listener.hpp +++ b/capio/server/include/remote/listener.hpp @@ -41,6 +41,13 @@ inline Backend *select_backend(const std::string &backend_name, int argc, char * return new MPIBackend(argc, argv); } + if (backend_name == "mtcl") { + LOG("backend selected: MTCL"); + std::cout << CAPIO_LOG_SERVER_CLI_LEVEL_INFO << "Starting CAPIO with MTCL backend" + << std::endl; + return new MTCLBackend("TCP", "1234", 1000000); + } + if (backend_name == "mpisync") { LOG("backend selected: mpisync"); server_println("Starting CAPIO with MPI (SYNC) backend", diff --git a/capio/server/src/mtcl_backend.cpp b/capio/server/src/mtcl_backend.cpp new file mode 100644 index 000000000..bfcb77d11 --- /dev/null +++ b/capio/server/src/mtcl_backend.cpp @@ -0,0 +1,363 @@ +#include "common/logger.hpp" +#include "common/requests.hpp" +#include "remote/backend/mtcl.hpp" +#include "storage/manager.hpp" +#include "utils/common.hpp" +#include +#include + +// TODO: THERE IS A MASSIVE MEMORY LEAK WHEN SENDING AND RECEIVING CONST CHAR*. FIX IT BEFORE MERGE + +// TODO: CLI args (with defaults) instead of hardcoded values +constexpr char CAPIO_MULTICAST_ADDRESS[] = "224.0.0.2"; +constexpr int CAPIO_MULTICAST_PORT = 22334; +constexpr int REUSE_MCAST_SOCKET = 1; +constexpr int max_net_op = 10; + +extern StorageManager *storage_service; + +RemoteRequest MTCLBackend::read_next_request() { + START_LOG(gettid(), "call()"); + + auto optional_request = incoming_request_queue.try_pop(); + while (!optional_request.has_value()) { + std::this_thread::sleep_for(std::chrono::milliseconds(thread_sleep_times)); + optional_request = incoming_request_queue.try_pop(); + } + + auto [req, req_size, source] = optional_request.value(); + LOG("Received %s from %d", req.c_str(), source.c_str()); + return RemoteRequest(req.data(), source); +} + +/** + * @brief Manages a dedicated P2P connection to a single remote capio_server instance. + * * The communication logic follows a deterministic role-assignment algorithm: + * 1. **Initial Role Assignment:** The initial sender is determined by the lexicographical + * comparison of the two participating hostnames (the smaller hostname starts as sender). + * 2. **Operational Phases:** The thread executes alternating phases of sending and receiving, + * processing up to `max_net_op` operations per phase. + * 3. **Role Switching:** Nodes synchronize a role swap using a `HAVE_FINISH_SEND_REQUEST` + * signal. This occurs when the current sender either exhausts its message queue or reaches + * the `max_net_op` threshold. + * 4. **Termination:** The loop persists as long as the remote handle remains valid and the + * `terminate` flag is false. + * @param HandlerPointer A valid MTCL HandlePointer for the connection. + * @param remote_hostname The hostname of the remote endpoint. + * @param queue Pointer to the communication hub containing inbound and outbound sub-queues. + * @param sleep_time Microseconds to sleep between thread cycles to prevent CPU pinning. + * @param terminate Reference to a heap-allocated boolean controlled by the main thread + * to signal execution shutdown. + * @param incoming_request_queue + */ +void serverConnectionHandler(MTCL::HandleUser HandlerPointer, const std::string &remote_hostname, + AtomicQueue *queue, const int sleep_time, + const bool *terminate, + AtomicQueue *incoming_request_queue) { + + char ownHostname[HOST_NAME_MAX]; + gethostname(ownHostname, HOST_NAME_MAX); + bool my_turn_to_send = ownHostname > remote_hostname; + + char request_has_finished_to_send[CAPIO_REQ_MAX_SIZE]{0}; + sprintf(request_has_finished_to_send, "%03d", BACKEND_HAVE_FINISH_SEND_REQUEST); + + START_LOG(gettid(), "call(remote_hostname=%s)", remote_hostname.c_str()); + + LOG("Will begin execution with %s phase", my_turn_to_send ? "sending" : "receiving"); + + while (HandlerPointer.isValid()) { + // execute up to N operation of send &/or receive, to avoid starvation + + if (my_turn_to_send) { + LOG("Send PHASE"); + for (int i = 0; i < max_net_op; i++) { + if (const auto request_opt = queue->try_pop(); request_opt.has_value()) { + const auto &[request, request_size, target] = request_opt.value(); + LOG("Request to be sent = %s to %s", request, target.c_str()); + + HandlerPointer.send(&request_size, sizeof(request_size)); + HandlerPointer.send(request, request_size); + } + } + LOG("Completed SEND PHASE"); + // Send message I have finished the max number of allowed consecutive io operations + HandlerPointer.send(request_has_finished_to_send, sizeof(request_has_finished_to_send)); + + } else { + + bool continue_receive_phase = true; + size_t receive_size = 0; + LOG("Receive PHASE"); + while (continue_receive_phase) { + // Receive phase + HandlerPointer.probe(receive_size, false); + if (receive_size > 0) { + LOG("A request is incoming"); + + ssize_t incoming_request_size = 0; + HandlerPointer.receive(&incoming_request_size, sizeof(incoming_request_size)); + + const auto incoming_request = new char[incoming_request_size]; + const auto resp_size = + HandlerPointer.receive(incoming_request, incoming_request_size); + LOG("Received request with size = %ld", incoming_request_size); + + if (const auto code = + RemoteRequest{incoming_request, remote_hostname}.get_code(); + code == BACKEND_HAVE_FINISH_SEND_REQUEST) { + // Finished sending data. Set continue_receive_phase = false to go to next + // phase + LOG("CTRL MSG received: Other has finished sending phase. Switching me " + "from receive to send"); + continue_receive_phase = false; + } else { + incoming_request_queue->push(incoming_request, resp_size, remote_hostname); + } + } + } + } + + // terminate phase + if (*terminate) { + LOG("[TERM PHASE] Closing connection"); + HandlerPointer.close(); + LOG("[TERM PHASE] Terminating thread server_connection_handler"); + return; + } + + my_turn_to_send = !my_turn_to_send; + std::this_thread::sleep_for(std::chrono::milliseconds(sleep_time)); + } +} + +void MTCLBackend::incomingMTCLConnectionListener( + const std::string &ownHostname, const std::string &ownPort, const std::string &usedProtocol, + const bool *continue_execution, int sleep_time, + std::unordered_map *> *open_connections, + std::mutex *guard, std::vector *_connection_threads, bool *terminate, + AtomicQueue *incoming_request_queue) { + + std::string selfToken = usedProtocol + ":" + ownHostname + ":" + ownPort; + + const int sockfd = socket(AF_INET, SOCK_DGRAM, 0); + sockaddr_in multicast_addr{}; + multicast_addr.sin_family = AF_INET; + multicast_addr.sin_port = htons(CAPIO_MULTICAST_PORT); + multicast_addr.sin_addr.s_addr = inet_addr(CAPIO_MULTICAST_ADDRESS); + + START_LOG(gettid(), "call(sleep_time=%d)", sleep_time); + + while (*continue_execution) { + + if (auto UserManager = MTCL::Manager::getNext(std::chrono::microseconds(sleep_time)); + UserManager.isValid()) { + // received MTCL handle + LOG("Handle user is valid"); + char connected_hostname[HOST_NAME_MAX] = {0}; + UserManager.receive(connected_hostname, HOST_NAME_MAX); + LOG("Received connection hostname: %s", connected_hostname); + + auto *queue = new AtomicQueue(); + { + const std::lock_guard lock(*guard); + open_connections->insert({connected_hostname, queue}); + } + _connection_threads->push_back( + new std::thread(serverConnectionHandler, std::move(UserManager), connected_hostname, + queue, sleep_time, terminate, incoming_request_queue)); + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, + "Connected to " + std::string(connected_hostname)); + } else { + // broadcast ADV on multicast of me being alive by sending token named selfToken + sendto(sockfd, selfToken.data(), selfToken.size(), 0, + reinterpret_cast(&multicast_addr), sizeof(multicast_addr)); + } + } + + close(sockfd); +} +void MTCLBackend::incomingUDPConnectionListener( + bool *terminate, const std::string &ownHostname, std::string ownPort, std::string usedProtocol, + std::unordered_map *> *open_connections, + std::vector *connection_threads, int thread_sleep_time, + AtomicQueue *incoming_request_queue, std::mutex *_guard) { + START_LOG(gettid(), "call()"); + + const std::string selfToken = usedProtocol + ":" + ownHostname + ":" + ownPort; + + int sockfd = socket(AF_INET, SOCK_DGRAM, 0); + + setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &REUSE_MCAST_SOCKET, sizeof(REUSE_MCAST_SOCKET)); + + timeval tv; + tv.tv_sec = 0; + tv.tv_usec = 100000; // 100,000 microseconds = 100ms + setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + + sockaddr_in local_addr{}; + local_addr.sin_family = AF_INET; + local_addr.sin_port = htons(CAPIO_MULTICAST_PORT); + local_addr.sin_addr.s_addr = htonl(INADDR_ANY); + bind(sockfd, reinterpret_cast(&local_addr), sizeof(local_addr)); + + ip_mreq mreq{}; + mreq.imr_multiaddr.s_addr = inet_addr(CAPIO_MULTICAST_ADDRESS); + mreq.imr_interface.s_addr = htonl(INADDR_ANY); + setsockopt(sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); + + while (!*terminate) { + char incoming_token[2 * HOST_NAME_MAX] = {0}; + + if (recvfrom(sockfd, incoming_token, sizeof(incoming_token) - 1, 0, nullptr, nullptr) <= + 0) { + continue; + } + + std::string hostname_port(incoming_token); + + if (std::string(incoming_token) == selfToken) { + LOG("Skipping to connect to self"); + continue; + } + + std::string remoteHost = + hostname_port.substr(hostname_port.find(':') + 1, // Drop proto + hostname_port.find_last_of(':') - hostname_port.find(':') - 1); + + if (open_connections->find(remoteHost) != open_connections->end()) { + LOG("Remote host %s is already connected", remoteHost.c_str()); + continue; + } + + LOG("Trying to connect on remote: %s", incoming_token); + if (auto UserManager = MTCL::Manager::connect(incoming_token); UserManager.isValid()) { + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, + std::string("Opened connection with ") + incoming_token); + LOG("Opened connection with: %s", incoming_token); + + // send my hostname + char _ownHotname_cstr[PATH_MAX]{0}; + sprintf(_ownHotname_cstr, "%s", ownHostname.c_str()); + UserManager.send(_ownHotname_cstr, HOST_NAME_MAX); + + auto *queue = new AtomicQueue(); + { + const std::lock_guard lg(*_guard); + open_connections->insert({remoteHost, queue}); + } + connection_threads->push_back( + new std::thread(serverConnectionHandler, std::move(UserManager), remoteHost, queue, + thread_sleep_time, terminate, incoming_request_queue)); + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + remoteHost); + } else { + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_WARNING, "Warning: tried to connect to " + + std::string(remoteHost) + + " but connection is not valid"); + } + } + close(sockfd); +} + +MTCLBackend::MTCLBackend(const std::string &proto, const std::string &port, const int sleep_time) + : Backend(HOST_NAME_MAX), selfToken(proto + ":0.0.0.0:" + port), ownPort(port), + usedProtocol(proto), thread_sleep_times(sleep_time) { + START_LOG(gettid(), "INFO: instance of MTCLBackend"); + + terminate = new bool; + *terminate = false; + + _guard = new std::mutex(); + + ownHostname.resize(HOST_NAME_MAX, '\0'); + gethostname(ownHostname.data(), HOST_NAME_MAX); + ownHostname.resize(strnlen(ownHostname.c_str(), HOST_NAME_MAX)); + + LOG("My hostname is %s. Starting to listen on connection %s", ownHostname.c_str(), + selfToken.c_str()); + + std::string hostname_id("server-"); + hostname_id += ownHostname; + MTCL::Manager::init(hostname_id); + + *continue_execution = true; + + MTCL::Manager::listen(selfToken); + + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "MTCL_backend initialization completed."); +} + +void MTCLBackend::handshake_servers() { + incoming_MTCL_connection_listener_thread = + new std::thread(incomingMTCLConnectionListener, ownHostname, ownPort, usedProtocol, + std::ref(continue_execution), thread_sleep_times, &open_connections, _guard, + &connection_threads, terminate, &incoming_request_queue); + + incoming_UDP_connection_listener_thread = + new std::thread(incomingUDPConnectionListener, terminate, ownHostname, ownPort, + usedProtocol, &open_connections, &connection_threads, thread_sleep_times, + &incoming_request_queue, _guard); +} + +MTCLBackend::~MTCLBackend() { + START_LOG(gettid(), "call()"); + *terminate = true; + *continue_execution = false; + + for (const auto thread : connection_threads) { + thread->join(); + } + LOG("Terminated connection threads"); + + pthread_cancel(incoming_MTCL_connection_listener_thread->native_handle()); + incoming_MTCL_connection_listener_thread->join(); + + pthread_cancel(incoming_UDP_connection_listener_thread->native_handle()); + incoming_UDP_connection_listener_thread->join(); + + delete incoming_MTCL_connection_listener_thread; + delete incoming_UDP_connection_listener_thread; + delete continue_execution; + delete terminate; + + LOG("Handler closed."); + + MTCL::Manager::finalize(); + LOG("Finalizing MTCL backend"); + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "MTCL backend cleanup completed."); +} + +const std::set MTCLBackend::get_nodes() { + std::set keys; + for (const auto &[hostname, _handle] : open_connections) { + keys.insert(hostname); + } + + return keys; +} + +void MTCLBackend::send_request(const char *message, const int message_len, + const std::string &target) { + + START_LOG(gettid(), "call(target=%s, message=%s, message_len=%ld)", target.c_str(), message, + message_len); + + const auto queues = open_connections.at(target); + LOG("obtained access to queue"); + + queues->push(message, message_len, target); + LOG("Request pushed to output queue"); +} + +void MTCLBackend::send_file(char *shm, long int nbytes, const std::string &target) { + START_LOG(gettid(), "call(target=%s, nbytes=%ld)", target.c_str(), nbytes); + + const auto queue = open_connections.at(target); + queue->push(shm, nbytes, target); +} + +void MTCLBackend::recv_file(char *shm, const std::string &source, long int bytes_expected) { + const auto queues = open_connections.at(source); + const auto data = queues->pop(); + memcpy(shm, std::get<0>(data), bytes_expected); +} diff --git a/capio/tests/unit/server/src/main.cpp b/capio/tests/unit/server/src/main.cpp index 3a3d44610..f32dc9546 100644 --- a/capio/tests/unit/server/src/main.cpp +++ b/capio/tests/unit/server/src/main.cpp @@ -32,7 +32,6 @@ class ServerUnitTestEnvironment : public testing::Environment { delete storage_manager; delete client_manager; delete capio_cl_engine; - delete backend; } }; diff --git a/cmake_test_discovery_310bb1f06a.json b/cmake_test_discovery_310bb1f06a.json new file mode 100644 index 000000000..067195536 --- /dev/null +++ b/cmake_test_discovery_310bb1f06a.json @@ -0,0 +1,357 @@ +{ + "tests": 69, + "name": "AllTests", + "testsuites": [ + { + "name": "SystemCallTest", + "tests": 69, + "testsuite": [ + { + "name": "TestChdirOnExternalCapioDirAndThenBack", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/chdir.cpp", + "line": 7 + }, + { + "name": "TestThreadClone", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/clone.cpp", + "line": 37 + }, + { + "name": "TestThreadCloneProducerConsumer", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/clone.cpp", + "line": 45 + }, + { + "name": "TestForkParentChild", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/clone.cpp", + "line": 63 + }, + { + "name": "TestThreadCloneProducerConsumerWithStat", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/clone.cpp", + "line": 71 + }, + { + "name": "TestDirectoryCreateReopenClose", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", + "line": 11 + }, + { + "name": "TestDirectoryCreateReopenCloseWithMkdiratAtFdcwd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", + "line": 26 + }, + { + "name": "TestMkdirFailsIfDirectoryAlreadyExists", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", + "line": 41 + }, + { + "name": "TestDirectoryCreateReopenCloseInDifferentDirectoryWithOpenatAbsolutePath", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", + "line": 51 + }, + { + "name": "TestDirectoryCreateReopenCloseInDifferentDirectoryWithMkdiratDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", + "line": 67 + }, + { + "name": "TestGetcwd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", + "line": 86 + }, + { + "name": "TestGetcwdWithPathLongerThanSize", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", + "line": 93 + }, + { + "name": "TestDirentsOnCapioDir", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dirent.cpp", + "line": 20 + }, + { + "name": "TestDup", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", + "line": 6 + }, + { + "name": "TestDup2", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", + "line": 26 + }, + { + "name": "TestDup2WithNewfdPointingToOpenFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", + "line": 47 + }, + { + "name": "TestDup2WithTwoEqualArguments", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", + "line": 65 + }, + { + "name": "TestDup3", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", + "line": 78 + }, + { + "name": "TestDup3WithNewfdPointingToOpenFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", + "line": 99 + }, + { + "name": "TestDup3WithTwoEqualArguments", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", + "line": 117 + }, + { + "name": "TestDup3WithOCloexec", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", + "line": 131 + }, + { + "name": "TestFchmod", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/fcntl.cpp", + "line": 8 + }, + { + "name": "TestFchown", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/fcntl.cpp", + "line": 15 + }, + { + "name": "TestFgetxattr", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/fcntl.cpp", + "line": 22 + }, + { + "name": "TestStatfs", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/fcntl.cpp", + "line": 32 + }, + { + "name": "TestFileCreateReopenClose", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", + "line": 10 + }, + { + "name": "TestCreat", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", + "line": 24 + }, + { + "name": "TestFileCreateReopenCloseWithOpenatAtFdcwd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", + "line": 34 + }, + { + "name": "TestOpenFailsWithOExclIfFileAlreadyExists", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", + "line": 48 + }, + { + "name": "TestFileCreateReopenCloseInDifferentDirectoryWithOpenatAbsolutePath", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", + "line": 62 + }, + { + "name": "TestFileCreateReopenCloseInDifferentDirectoryWithOpenatDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", + "line": 78 + }, + { + "name": "TestFileRenameWhenNewPathDoesNotExist", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/rename.cpp", + "line": 7 + }, + { + "name": "TestFileRenameWithNewPathAlreadyExists", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/rename.cpp", + "line": 22 + }, + { + "name": "TestDirectoryRenameWhenNewPathDoesNotExist", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/rename.cpp", + "line": 39 + }, + { + "name": "TestDirectoryRenameWhenNewPathAlreadyExists", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/rename.cpp", + "line": 52 + }, + { + "name": "TestStatOnFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 17 + }, + { + "name": "TestStatOnDirectory", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 33 + }, + { + "name": "TestStatOnNonexistentFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 44 + }, + { + "name": "TestFstatOnFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 50 + }, + { + "name": "TestFstatOnDirectory", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 66 + }, + { + "name": "TestFstatOnInvalidFd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 81 + }, + { + "name": "TestFstatatOnFileWithAtFdcwd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 87 + }, + { + "name": "TestFstatatOnDirectoryWithAtFdcwd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 103 + }, + { + "name": "TestFstatatOnFileInDifferentDirectoryWithAbsolutePath", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 114 + }, + { + "name": "TestFstatatOnDirectoryInDifferentDirectoryWithAbsolutePath", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 132 + }, + { + "name": "TestFstatatOnFileInDifferentDirectoryWithDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 144 + }, + { + "name": "TestFstatatOnDirectoryInDifferentDirectoryWithDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 162 + }, + { + "name": "TestFstatatOnFileWithAtEmptyPathAndDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 178 + }, + { + "name": "TestFstatatOnDirectoryWIthAtEmptyPathAndDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 193 + }, + { + "name": "TestFstatatOnNonexistentFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 207 + }, + { + "name": "TestFstatatOnRelativePathWithInvalidDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 213 + }, + { + "name": "TestFstatatWithEmptyPathAndNoAtEmptyPath", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 220 + }, + { + "name": "TestFileCreateWriteCloseWithStat", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 226 + }, + { + "name": "TestDirectoryCreateReopenCloseWithStat", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", + "line": 251 + }, + { + "name": "TestStatxOnFileWithAtFdcwd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 16 + }, + { + "name": "TestStatxOnDirectoryWithAtFdcwd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 32 + }, + { + "name": "TestStatxOnFileInDifferentDirectoryWithAbsolutePath", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 43 + }, + { + "name": "TestStatxOnDirectoryInDifferentDirectoryWithAbsoluePath", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 61 + }, + { + "name": "TestStatxOnFileInDifferentDirectoryWithDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 73 + }, + { + "name": "TestStatxOnDirectoryInDifferentDirectoryWithDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 91 + }, + { + "name": "TestStatxOnFileWithAtEmptyPathAndDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 107 + }, + { + "name": "TestStatxOnDirectoryWithAtEmptyPathAndDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 122 + }, + { + "name": "TestStatxOnNonexistentFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 136 + }, + { + "name": "TestStatxOnRelativePathWithInvalidDirfd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 143 + }, + { + "name": "TestStatxWithStatxReservedSet", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 150 + }, + { + "name": "TestStatxWithEmptyPathAndNoEmptyPath", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", + "line": 157 + }, + { + "name": "TestFileCreateWriteClose", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/write.cpp", + "line": 11 + }, + { + "name": "TestFileCreateWriteLseekClose", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/write.cpp", + "line": 31 + }, + { + "name": "TestFileCreateBufferedWriteClose", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/write.cpp", + "line": 55 + } + ] + } + ] +} diff --git a/cmake_test_discovery_8e6c96c2a6.json b/cmake_test_discovery_8e6c96c2a6.json new file mode 100644 index 000000000..2c8385496 --- /dev/null +++ b/cmake_test_discovery_8e6c96c2a6.json @@ -0,0 +1,17 @@ +{ + "tests": 1, + "name": "AllTests", + "testsuites": [ + { + "name": "integrationTests", + "tests": 1, + "testsuite": [ + { + "name": "RunTestSplitMergeAndMapReduceFunction", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/integration\/src\/mapreduce.cpp", + "line": 351 + } + ] + } + ] +} diff --git a/cmake_test_discovery_ad21613777.json b/cmake_test_discovery_ad21613777.json new file mode 100644 index 000000000..afe02b1c1 --- /dev/null +++ b/cmake_test_discovery_ad21613777.json @@ -0,0 +1,42 @@ +{ + "tests": 6, + "name": "AllTests", + "testsuites": [ + { + "name": "RealpathPosixTest", + "tests": 6, + "testsuite": [ + { + "name": "TestAbsolutePathsInCapioDirWhenPathExists", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", + "line": 16 + }, + { + "name": "TestAbsolutePathsInCapioDirWhenPathDoesNotExist", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", + "line": 25 + }, + { + "name": "TestAbsolutePathsOutsideCapioDirWhenPathExists", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", + "line": 30 + }, + { + "name": "TestAbsolutePathOutsideCapioDirWhenPathDoesNotExist", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", + "line": 39 + }, + { + "name": "TestRelativePathsInCapioDirWhenCwdIsCapioDir", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", + "line": 44 + }, + { + "name": "TestRelativePathsInCapioDirWhenCwdIsParentOfCapioDir", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", + "line": 55 + } + ] + } + ] +} diff --git a/cmake_test_discovery_d1ab4fe680.json b/cmake_test_discovery_d1ab4fe680.json new file mode 100644 index 000000000..7906d223b --- /dev/null +++ b/cmake_test_discovery_d1ab4fe680.json @@ -0,0 +1,215 @@ +{ + "tests": 37, + "name": "AllTests", + "testsuites": [ + { + "name": "ServerTest", + "tests": 24, + "testsuite": [ + { + "name": "TestInsertSingleSector", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 14 + }, + { + "name": "TestBufferAllocation", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 22 + }, + { + "name": "TestInsertTwoNonOverlappingSectors", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 32 + }, + { + "name": "TestInsertTwoOverlappingSectors", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 44 + }, + { + "name": "TestInsertTwoOverlappingSectorsSameStart", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 53 + }, + { + "name": "TestInsertTwoOverlappingSectorsSameEnd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 62 + }, + { + "name": "TestInsertTwoOverlappingSectorsNested", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 71 + }, + { + "name": "TestDestructionOfPermanentCapioFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 80 + }, + { + "name": "TestDestructionOfPermanentCapioFileDirectory", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 88 + }, + { + "name": "TestCapioFileWaitForDataMultithreaded", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 97 + }, + { + "name": "TestCapioFileWaitForDataMultithreadedWithCommit", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 136 + }, + { + "name": "TestCapioFileWaitForCompletion", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 157 + }, + { + "name": "TestCommitCapioFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 195 + }, + { + "name": "TestDumpNotHomeNode", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 207 + }, + { + "name": "TestCommitAndDeleteDirectory", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 216 + }, + { + "name": "TesMemcpyCapioFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 227 + }, + { + "name": "TestCloseCapioFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 242 + }, + { + "name": "TestCapioFileSeekData", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 256 + }, + { + "name": "TestCapioFileSeekHole", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 273 + }, + { + "name": "TestAddAndRemoveFD", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 289 + }, + { + "name": "TestSetGetRealFileSize", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 302 + }, + { + "name": "TestDeletePermanentDirectory", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 309 + }, + { + "name": "TestFileSetCommitToFalse", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 318 + }, + { + "name": "TestGetSectorEnd", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 369 + } + ] + }, + { + "name": "MockBackendTestFixture", + "tests": 2, + "testsuite": [ + { + "name": "TestReadFromNodeMockBackend", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 353 + }, + { + "name": "TestSimulateDirectoryStreaming", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", + "line": 380 + } + ] + }, + { + "name": "ClientManagerTestEnvironment", + "tests": 4, + "testsuite": [ + { + "name": "testReplyToNonClient", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/client_manager.cpp", + "line": 9 + }, + { + "name": "testGetNumberOfConnectedClients", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/client_manager.cpp", + "line": 14 + }, + { + "name": "testFailedRequestCode", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/client_manager.cpp", + "line": 25 + }, + { + "name": "testAddAndRemoveProducedFiles", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/client_manager.cpp", + "line": 48 + } + ] + }, + { + "name": "StorageManagerTestEnvironment", + "tests": 7, + "testsuite": [ + { + "name": "testGetPaths", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", + "line": 14 + }, + { + "name": "testExceptions", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", + "line": 36 + }, + { + "name": "testInitDirectory", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", + "line": 42 + }, + { + "name": "testAddDirectoryFailure", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", + "line": 59 + }, + { + "name": "testRemameFile", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", + "line": 72 + }, + { + "name": "testNumberOfOpensAndCloses", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", + "line": 87 + }, + { + "name": "testNumberOfOpensAfterClone", + "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", + "line": 102 + } + ] + } + ] +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..327ab3e44 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,49 @@ +services: + node1: + image: hpio/capio:latest + container_name: node1 + hostname: "node1" + tty: true + working_dir: /shared + volumes: + - shared_data:/shared + networks: + capio_private_net: + ipv4_address: 10.10.0.10 + environment: + - CAPIO_LOG_LEVEL=-1 + - APP_TYPE=writer + - CAPIO_DIR=. + command: | + capio_server -b mtcl --no-config + + node2: + image: hpio/capio:latest + container_name: node2 + hostname: "node2" + tty: true + working_dir: /shared + volumes: + - shared_data:/shared + networks: + capio_private_net: + ipv4_address: 10.10.0.11 + environment: + - CAPIO_LOG_LEVEL=-1 + - APP_TYPE=reader + - CAPIO_DIR=. + command: | + capio_server -b mtcl --no-config + +volumes: + shared_data: + +networks: + capio_private_net: + driver: macvlan + driver_opts: + parent: dummy0 + ipam: + config: + - subnet: 10.10.0.0/24 + gateway: 10.10.0.1 \ No newline at end of file From 060acdf7dda9b0780b40c21ca097138461a7151f Mon Sep 17 00:00:00 2001 From: = Date: Mon, 13 Apr 2026 15:50:31 +0100 Subject: [PATCH 07/13] Cmake fix --- capio/server/include/remote/backend/mtcl.hpp | 2 +- capio/tests/unit/server/CMakeLists.txt | 3 +- capio/tests/unit/server/src/main.cpp | 4 - cmake_test_discovery_310bb1f06a.json | 357 ------------------- cmake_test_discovery_8e6c96c2a6.json | 17 - cmake_test_discovery_ad21613777.json | 42 --- cmake_test_discovery_d1ab4fe680.json | 215 ----------- 7 files changed, 3 insertions(+), 637 deletions(-) delete mode 100644 cmake_test_discovery_310bb1f06a.json delete mode 100644 cmake_test_discovery_8e6c96c2a6.json delete mode 100644 cmake_test_discovery_ad21613777.json delete mode 100644 cmake_test_discovery_d1ab4fe680.json diff --git a/capio/server/include/remote/backend/mtcl.hpp b/capio/server/include/remote/backend/mtcl.hpp index 6d9b1c031..12060febe 100644 --- a/capio/server/include/remote/backend/mtcl.hpp +++ b/capio/server/include/remote/backend/mtcl.hpp @@ -117,7 +117,7 @@ class MTCLBackend : public Backend { AtomicQueue *incoming_request_queue); /** - * Initiate a new MTCL connection with "out of band" communication trough multicast + * Initiate a new MTCL connection with "out of band" communication through multicast * advertisement. when a multicast advertisement is received, start the MTCL handshake with the * remote server instance. */ diff --git a/capio/tests/unit/server/CMakeLists.txt b/capio/tests/unit/server/CMakeLists.txt index ca41e31bc..a11ccdb27 100644 --- a/capio/tests/unit/server/CMakeLists.txt +++ b/capio/tests/unit/server/CMakeLists.txt @@ -4,7 +4,7 @@ set(TARGET_NAME capio_server_unit_tests) find_package(MPI REQUIRED) -FetchContent_MakeAvailable(capio_cl) +FetchContent_MakeAvailable(capio_cl mtcl) set(TARGET_INCLUDE_FOLDER "${PROJECT_SOURCE_DIR}/capio/server") @@ -32,6 +32,7 @@ target_sources(${TARGET_NAME} PRIVATE target_include_directories(${TARGET_NAME} PRIVATE "${TARGET_INCLUDE_FOLDER}/include" ${capio_cl_SOURCE_DIR} + ${mtcl_SOURCE_DIR}/include ) ##################################### diff --git a/capio/tests/unit/server/src/main.cpp b/capio/tests/unit/server/src/main.cpp index f32dc9546..28cb277e4 100644 --- a/capio/tests/unit/server/src/main.cpp +++ b/capio/tests/unit/server/src/main.cpp @@ -3,7 +3,6 @@ #include "capiocl.hpp" #include "capiocl/engine.h" #include "client-manager/client_manager.hpp" -#include "remote/backend/none.hpp" #include "storage/manager.hpp" #include "utils/capiocl_adapter.hpp" #include "utils/location.hpp" @@ -23,9 +22,6 @@ class ServerUnitTestEnvironment : public testing::Environment { capio_cl_engine = new capiocl::engine::Engine(false); client_manager = new ClientManager(); storage_manager = new StorageManager(); - backend = new NoneBackend(0, nullptr); - - open_files_location(); } void TearDown() override { diff --git a/cmake_test_discovery_310bb1f06a.json b/cmake_test_discovery_310bb1f06a.json deleted file mode 100644 index 067195536..000000000 --- a/cmake_test_discovery_310bb1f06a.json +++ /dev/null @@ -1,357 +0,0 @@ -{ - "tests": 69, - "name": "AllTests", - "testsuites": [ - { - "name": "SystemCallTest", - "tests": 69, - "testsuite": [ - { - "name": "TestChdirOnExternalCapioDirAndThenBack", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/chdir.cpp", - "line": 7 - }, - { - "name": "TestThreadClone", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/clone.cpp", - "line": 37 - }, - { - "name": "TestThreadCloneProducerConsumer", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/clone.cpp", - "line": 45 - }, - { - "name": "TestForkParentChild", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/clone.cpp", - "line": 63 - }, - { - "name": "TestThreadCloneProducerConsumerWithStat", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/clone.cpp", - "line": 71 - }, - { - "name": "TestDirectoryCreateReopenClose", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", - "line": 11 - }, - { - "name": "TestDirectoryCreateReopenCloseWithMkdiratAtFdcwd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", - "line": 26 - }, - { - "name": "TestMkdirFailsIfDirectoryAlreadyExists", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", - "line": 41 - }, - { - "name": "TestDirectoryCreateReopenCloseInDifferentDirectoryWithOpenatAbsolutePath", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", - "line": 51 - }, - { - "name": "TestDirectoryCreateReopenCloseInDifferentDirectoryWithMkdiratDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", - "line": 67 - }, - { - "name": "TestGetcwd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", - "line": 86 - }, - { - "name": "TestGetcwdWithPathLongerThanSize", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/directory.cpp", - "line": 93 - }, - { - "name": "TestDirentsOnCapioDir", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dirent.cpp", - "line": 20 - }, - { - "name": "TestDup", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", - "line": 6 - }, - { - "name": "TestDup2", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", - "line": 26 - }, - { - "name": "TestDup2WithNewfdPointingToOpenFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", - "line": 47 - }, - { - "name": "TestDup2WithTwoEqualArguments", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", - "line": 65 - }, - { - "name": "TestDup3", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", - "line": 78 - }, - { - "name": "TestDup3WithNewfdPointingToOpenFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", - "line": 99 - }, - { - "name": "TestDup3WithTwoEqualArguments", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", - "line": 117 - }, - { - "name": "TestDup3WithOCloexec", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/dup.cpp", - "line": 131 - }, - { - "name": "TestFchmod", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/fcntl.cpp", - "line": 8 - }, - { - "name": "TestFchown", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/fcntl.cpp", - "line": 15 - }, - { - "name": "TestFgetxattr", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/fcntl.cpp", - "line": 22 - }, - { - "name": "TestStatfs", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/fcntl.cpp", - "line": 32 - }, - { - "name": "TestFileCreateReopenClose", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", - "line": 10 - }, - { - "name": "TestCreat", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", - "line": 24 - }, - { - "name": "TestFileCreateReopenCloseWithOpenatAtFdcwd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", - "line": 34 - }, - { - "name": "TestOpenFailsWithOExclIfFileAlreadyExists", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", - "line": 48 - }, - { - "name": "TestFileCreateReopenCloseInDifferentDirectoryWithOpenatAbsolutePath", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", - "line": 62 - }, - { - "name": "TestFileCreateReopenCloseInDifferentDirectoryWithOpenatDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/file.cpp", - "line": 78 - }, - { - "name": "TestFileRenameWhenNewPathDoesNotExist", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/rename.cpp", - "line": 7 - }, - { - "name": "TestFileRenameWithNewPathAlreadyExists", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/rename.cpp", - "line": 22 - }, - { - "name": "TestDirectoryRenameWhenNewPathDoesNotExist", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/rename.cpp", - "line": 39 - }, - { - "name": "TestDirectoryRenameWhenNewPathAlreadyExists", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/rename.cpp", - "line": 52 - }, - { - "name": "TestStatOnFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 17 - }, - { - "name": "TestStatOnDirectory", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 33 - }, - { - "name": "TestStatOnNonexistentFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 44 - }, - { - "name": "TestFstatOnFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 50 - }, - { - "name": "TestFstatOnDirectory", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 66 - }, - { - "name": "TestFstatOnInvalidFd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 81 - }, - { - "name": "TestFstatatOnFileWithAtFdcwd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 87 - }, - { - "name": "TestFstatatOnDirectoryWithAtFdcwd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 103 - }, - { - "name": "TestFstatatOnFileInDifferentDirectoryWithAbsolutePath", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 114 - }, - { - "name": "TestFstatatOnDirectoryInDifferentDirectoryWithAbsolutePath", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 132 - }, - { - "name": "TestFstatatOnFileInDifferentDirectoryWithDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 144 - }, - { - "name": "TestFstatatOnDirectoryInDifferentDirectoryWithDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 162 - }, - { - "name": "TestFstatatOnFileWithAtEmptyPathAndDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 178 - }, - { - "name": "TestFstatatOnDirectoryWIthAtEmptyPathAndDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 193 - }, - { - "name": "TestFstatatOnNonexistentFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 207 - }, - { - "name": "TestFstatatOnRelativePathWithInvalidDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 213 - }, - { - "name": "TestFstatatWithEmptyPathAndNoAtEmptyPath", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 220 - }, - { - "name": "TestFileCreateWriteCloseWithStat", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 226 - }, - { - "name": "TestDirectoryCreateReopenCloseWithStat", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/stat.cpp", - "line": 251 - }, - { - "name": "TestStatxOnFileWithAtFdcwd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 16 - }, - { - "name": "TestStatxOnDirectoryWithAtFdcwd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 32 - }, - { - "name": "TestStatxOnFileInDifferentDirectoryWithAbsolutePath", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 43 - }, - { - "name": "TestStatxOnDirectoryInDifferentDirectoryWithAbsoluePath", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 61 - }, - { - "name": "TestStatxOnFileInDifferentDirectoryWithDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 73 - }, - { - "name": "TestStatxOnDirectoryInDifferentDirectoryWithDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 91 - }, - { - "name": "TestStatxOnFileWithAtEmptyPathAndDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 107 - }, - { - "name": "TestStatxOnDirectoryWithAtEmptyPathAndDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 122 - }, - { - "name": "TestStatxOnNonexistentFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 136 - }, - { - "name": "TestStatxOnRelativePathWithInvalidDirfd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 143 - }, - { - "name": "TestStatxWithStatxReservedSet", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 150 - }, - { - "name": "TestStatxWithEmptyPathAndNoEmptyPath", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/statx.cpp", - "line": 157 - }, - { - "name": "TestFileCreateWriteClose", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/write.cpp", - "line": 11 - }, - { - "name": "TestFileCreateWriteLseekClose", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/write.cpp", - "line": 31 - }, - { - "name": "TestFileCreateBufferedWriteClose", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/syscall\/src\/write.cpp", - "line": 55 - } - ] - } - ] -} diff --git a/cmake_test_discovery_8e6c96c2a6.json b/cmake_test_discovery_8e6c96c2a6.json deleted file mode 100644 index 2c8385496..000000000 --- a/cmake_test_discovery_8e6c96c2a6.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "tests": 1, - "name": "AllTests", - "testsuites": [ - { - "name": "integrationTests", - "tests": 1, - "testsuite": [ - { - "name": "RunTestSplitMergeAndMapReduceFunction", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/integration\/src\/mapreduce.cpp", - "line": 351 - } - ] - } - ] -} diff --git a/cmake_test_discovery_ad21613777.json b/cmake_test_discovery_ad21613777.json deleted file mode 100644 index afe02b1c1..000000000 --- a/cmake_test_discovery_ad21613777.json +++ /dev/null @@ -1,42 +0,0 @@ -{ - "tests": 6, - "name": "AllTests", - "testsuites": [ - { - "name": "RealpathPosixTest", - "tests": 6, - "testsuite": [ - { - "name": "TestAbsolutePathsInCapioDirWhenPathExists", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", - "line": 16 - }, - { - "name": "TestAbsolutePathsInCapioDirWhenPathDoesNotExist", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", - "line": 25 - }, - { - "name": "TestAbsolutePathsOutsideCapioDirWhenPathExists", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", - "line": 30 - }, - { - "name": "TestAbsolutePathOutsideCapioDirWhenPathDoesNotExist", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", - "line": 39 - }, - { - "name": "TestRelativePathsInCapioDirWhenCwdIsCapioDir", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", - "line": 44 - }, - { - "name": "TestRelativePathsInCapioDirWhenCwdIsParentOfCapioDir", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/posix\/src\/realpath.cpp", - "line": 55 - } - ] - } - ] -} diff --git a/cmake_test_discovery_d1ab4fe680.json b/cmake_test_discovery_d1ab4fe680.json deleted file mode 100644 index 7906d223b..000000000 --- a/cmake_test_discovery_d1ab4fe680.json +++ /dev/null @@ -1,215 +0,0 @@ -{ - "tests": 37, - "name": "AllTests", - "testsuites": [ - { - "name": "ServerTest", - "tests": 24, - "testsuite": [ - { - "name": "TestInsertSingleSector", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 14 - }, - { - "name": "TestBufferAllocation", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 22 - }, - { - "name": "TestInsertTwoNonOverlappingSectors", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 32 - }, - { - "name": "TestInsertTwoOverlappingSectors", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 44 - }, - { - "name": "TestInsertTwoOverlappingSectorsSameStart", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 53 - }, - { - "name": "TestInsertTwoOverlappingSectorsSameEnd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 62 - }, - { - "name": "TestInsertTwoOverlappingSectorsNested", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 71 - }, - { - "name": "TestDestructionOfPermanentCapioFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 80 - }, - { - "name": "TestDestructionOfPermanentCapioFileDirectory", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 88 - }, - { - "name": "TestCapioFileWaitForDataMultithreaded", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 97 - }, - { - "name": "TestCapioFileWaitForDataMultithreadedWithCommit", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 136 - }, - { - "name": "TestCapioFileWaitForCompletion", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 157 - }, - { - "name": "TestCommitCapioFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 195 - }, - { - "name": "TestDumpNotHomeNode", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 207 - }, - { - "name": "TestCommitAndDeleteDirectory", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 216 - }, - { - "name": "TesMemcpyCapioFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 227 - }, - { - "name": "TestCloseCapioFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 242 - }, - { - "name": "TestCapioFileSeekData", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 256 - }, - { - "name": "TestCapioFileSeekHole", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 273 - }, - { - "name": "TestAddAndRemoveFD", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 289 - }, - { - "name": "TestSetGetRealFileSize", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 302 - }, - { - "name": "TestDeletePermanentDirectory", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 309 - }, - { - "name": "TestFileSetCommitToFalse", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 318 - }, - { - "name": "TestGetSectorEnd", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 369 - } - ] - }, - { - "name": "MockBackendTestFixture", - "tests": 2, - "testsuite": [ - { - "name": "TestReadFromNodeMockBackend", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 353 - }, - { - "name": "TestSimulateDirectoryStreaming", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/capio_file.cpp", - "line": 380 - } - ] - }, - { - "name": "ClientManagerTestEnvironment", - "tests": 4, - "testsuite": [ - { - "name": "testReplyToNonClient", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/client_manager.cpp", - "line": 9 - }, - { - "name": "testGetNumberOfConnectedClients", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/client_manager.cpp", - "line": 14 - }, - { - "name": "testFailedRequestCode", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/client_manager.cpp", - "line": 25 - }, - { - "name": "testAddAndRemoveProducedFiles", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/client_manager.cpp", - "line": 48 - } - ] - }, - { - "name": "StorageManagerTestEnvironment", - "tests": 7, - "testsuite": [ - { - "name": "testGetPaths", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", - "line": 14 - }, - { - "name": "testExceptions", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", - "line": 36 - }, - { - "name": "testInitDirectory", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", - "line": 42 - }, - { - "name": "testAddDirectoryFailure", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", - "line": 59 - }, - { - "name": "testRemameFile", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", - "line": 72 - }, - { - "name": "testNumberOfOpensAndCloses", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", - "line": 87 - }, - { - "name": "testNumberOfOpensAfterClone", - "file": "\/home\/marco\/Desktop\/capio\/capio\/tests\/unit\/server\/src\/storage_manager.cpp", - "line": 102 - } - ] - } - ] -} From 60d76451626e000b805321d05922081da85de0ac Mon Sep 17 00:00:00 2001 From: = Date: Tue, 14 Apr 2026 11:00:42 +0100 Subject: [PATCH 08/13] Refactor AtomicQueue --- capio/server/include/remote/atomic_queue.hpp | 72 ++++++++++++++++++++ capio/server/include/remote/backend/mtcl.hpp | 50 +------------- capio/server/src/mtcl_backend.cpp | 4 +- 3 files changed, 75 insertions(+), 51 deletions(-) create mode 100644 capio/server/include/remote/atomic_queue.hpp diff --git a/capio/server/include/remote/atomic_queue.hpp b/capio/server/include/remote/atomic_queue.hpp new file mode 100644 index 000000000..258dd1dc2 --- /dev/null +++ b/capio/server/include/remote/atomic_queue.hpp @@ -0,0 +1,72 @@ + +#ifndef CAPIO_BACKEND_ATOMIC_QUEUE_HPP +#define CAPIO_BACKEND_ATOMIC_QUEUE_HPP + +#include +#include +#include +#include +#include + +template struct AtomicQueueElement { + + AtomicQueueElement(T message, size_t message_size, const std::string &origin) { + this->object = message; + this->object_size = message_size; + this->target_or_source = origin; + } + + T object; + size_t object_size = 0; + std::string target_or_source; +}; + +template class AtomicQueue { + std::queue> _queue; + std::mutex _mutex; + std::condition_variable _lock_cond; + + bool _shutdown = false; + + public: + ~AtomicQueue() { + { + std::lock_guard lg(_mutex); + _shutdown = true; + } + _lock_cond.notify_all(); + } + + void push(T message, size_t message_size, const std::string &origin) { + { + std::lock_guard lg(_mutex); + if (_shutdown) { + return; + } + _queue.emplace(message, message_size, origin); + } + _lock_cond.notify_all(); + } + + AtomicQueueElement pop() { + std::unique_lock lock(_mutex); + _lock_cond.wait(lock, [this] { return !_queue.empty() || _shutdown; }); + auto s = std::move(_queue.front()); + _queue.pop(); + + return s; + } + + std::optional> try_pop() { + std::lock_guard lg(_mutex); + if (_queue.empty() || _shutdown) { + return std::nullopt; + } + + auto s = std::move(_queue.front()); + _queue.pop(); + return s; + } +}; + +#endif // CAPIO_BACKEND_ATOMIC_QUEUE_HPP diff --git a/capio/server/include/remote/backend/mtcl.hpp b/capio/server/include/remote/backend/mtcl.hpp index 12060febe..57ea288a0 100644 --- a/capio/server/include/remote/backend/mtcl.hpp +++ b/capio/server/include/remote/backend/mtcl.hpp @@ -15,59 +15,11 @@ #include "common/constants.hpp" #include "common/logger.hpp" +#include "remote/atomic_queue.hpp" #include "remote/backend.hpp" typedef unsigned long long int capio_off64_t; -template class AtomicQueue { - // data, sizeof(data), hostname - std::queue> _queue; - std::mutex _mutex; - std::condition_variable _lock_cond; - - bool _shutdown = false; - - public: - ~AtomicQueue() { - { - std::lock_guard lg(_mutex); - _shutdown = true; - } - _lock_cond.notify_all(); - } - - void push(T message, size_t message_size, const std::string &origin) { - { - std::lock_guard lg(_mutex); - if (_shutdown) { - return; - } - _queue.emplace(message, message_size, origin); - } - _lock_cond.notify_all(); - } - - std::tuple pop() { - std::unique_lock lock(_mutex); - _lock_cond.wait(lock, [this] { return !_queue.empty() || _shutdown; }); - auto s = std::move(_queue.front()); - _queue.pop(); - - return s; - } - - std::optional> try_pop() { - std::lock_guard lg(_mutex); - if (_queue.empty() || _shutdown) { - return std::nullopt; - } - - auto s = std::move(_queue.front()); - _queue.pop(); - return s; - } -}; - /** * This avoids it to include the MTCL library here as it is a header-only library. * this is equivalent to use extern in C but for class diff --git a/capio/server/src/mtcl_backend.cpp b/capio/server/src/mtcl_backend.cpp index bfcb77d11..180c93e58 100644 --- a/capio/server/src/mtcl_backend.cpp +++ b/capio/server/src/mtcl_backend.cpp @@ -27,7 +27,7 @@ RemoteRequest MTCLBackend::read_next_request() { auto [req, req_size, source] = optional_request.value(); LOG("Received %s from %d", req.c_str(), source.c_str()); - return RemoteRequest(req.data(), source); + return {req.data(), source}; } /** @@ -359,5 +359,5 @@ void MTCLBackend::send_file(char *shm, long int nbytes, const std::string &targe void MTCLBackend::recv_file(char *shm, const std::string &source, long int bytes_expected) { const auto queues = open_connections.at(source); const auto data = queues->pop(); - memcpy(shm, std::get<0>(data), bytes_expected); + memcpy(shm, data.object, bytes_expected); } From 5459cf6ca5e15e801e38baa19036dd79fbcd8b4f Mon Sep 17 00:00:00 2001 From: = Date: Tue, 14 Apr 2026 13:27:44 +0100 Subject: [PATCH 09/13] WIP: improved code. TODO: segfault after connection between two different nodes --- capio/server/include/remote/backend/mtcl.hpp | 18 +- capio/server/src/mpi_backend.cpp | 6 + capio/server/src/mtcl_backend.cpp | 171 +++++++------------ capio/tests/unit/server/src/main.cpp | 10 +- 4 files changed, 81 insertions(+), 124 deletions(-) diff --git a/capio/server/include/remote/backend/mtcl.hpp b/capio/server/include/remote/backend/mtcl.hpp index 57ea288a0..aabd6f83a 100644 --- a/capio/server/include/remote/backend/mtcl.hpp +++ b/capio/server/include/remote/backend/mtcl.hpp @@ -31,14 +31,12 @@ class HandleUser; // TODO: extend backend class class MTCLBackend : public Backend { - std::string selfToken, connectedHostname, ownPort, usedProtocol; + std::string selfToken, ownHostname, ownPort, usedProtocol; std::unordered_map *> open_connections; - std::string ownHostname; int thread_sleep_times = 0; bool *continue_execution = new bool; std::mutex *_guard; std::thread *incoming_MTCL_connection_listener_thread = nullptr; - std::thread *incoming_UDP_connection_listener_thread = nullptr; std::vector connection_threads; bool *terminate; @@ -68,18 +66,6 @@ class MTCLBackend : public Backend { std::mutex *guard, std::vector *_connection_threads, bool *terminate, AtomicQueue *incoming_request_queue); - /** - * Initiate a new MTCL connection with "out of band" communication through multicast - * advertisement. when a multicast advertisement is received, start the MTCL handshake with the - * remote server instance. - */ - static void incomingUDPConnectionListener( - bool *terminate, const std::string &ownHostname, std::string ownPort, - std::string usedProtocol, - std::unordered_map *> *open_connections, - std::vector *connection_threads, int thread_sleep_time, - AtomicQueue *incoming_request_queue, std::mutex *_guard); - public: explicit MTCLBackend(const std::string &proto, const std::string &port, int sleep_time); @@ -96,6 +82,8 @@ class MTCLBackend : public Backend { void send_file(char *shm, long int nbytes, const std::string &target) override; void recv_file(char *shm, const std::string &source, long int bytes_expected) override; + + void connect_to(const std::string &target_token) override; }; #endif // MTCL_BACKEND_HPP \ No newline at end of file diff --git a/capio/server/src/mpi_backend.cpp b/capio/server/src/mpi_backend.cpp index 00ebe0aec..c1deee928 100644 --- a/capio/server/src/mpi_backend.cpp +++ b/capio/server/src/mpi_backend.cpp @@ -121,6 +121,12 @@ void MPIBackend::recv_file(char *shm, const std::string &source, long int bytes_ } void MPIBackend::connect_to(const std::string &target) { return; } +void MPIBackend::connect_to(const std::string &target) { + START_LOG(gettid(), "call(target=%s)", target.c_str()); + LOG("connect_to called on backend that is not dynamic. ignoring call"); + return; +} + MPISYNCBackend::MPISYNCBackend(int argc, char *argv[]) : MPIBackend(argc, argv) { START_LOG(gettid(), "call()"); LOG("Wrapped MPI backend with MPISYC backend"); diff --git a/capio/server/src/mtcl_backend.cpp b/capio/server/src/mtcl_backend.cpp index 180c93e58..4b92e870d 100644 --- a/capio/server/src/mtcl_backend.cpp +++ b/capio/server/src/mtcl_backend.cpp @@ -1,19 +1,22 @@ #include "common/logger.hpp" #include "common/requests.hpp" +#include "remote/backend.hpp" #include "remote/backend/mtcl.hpp" +#include "remote/discovery.hpp" #include "storage/manager.hpp" #include "utils/common.hpp" + #include #include // TODO: THERE IS A MASSIVE MEMORY LEAK WHEN SENDING AND RECEIVING CONST CHAR*. FIX IT BEFORE MERGE // TODO: CLI args (with defaults) instead of hardcoded values -constexpr char CAPIO_MULTICAST_ADDRESS[] = "224.0.0.2"; -constexpr int CAPIO_MULTICAST_PORT = 22334; -constexpr int REUSE_MCAST_SOCKET = 1; -constexpr int max_net_op = 10; +constexpr int max_net_op = 10; + +extern Backend *backend; +extern DiscoveryService *discovery_service; extern StorageManager *storage_service; RemoteRequest MTCLBackend::read_next_request() { @@ -140,12 +143,6 @@ void MTCLBackend::incomingMTCLConnectionListener( std::string selfToken = usedProtocol + ":" + ownHostname + ":" + ownPort; - const int sockfd = socket(AF_INET, SOCK_DGRAM, 0); - sockaddr_in multicast_addr{}; - multicast_addr.sin_family = AF_INET; - multicast_addr.sin_port = htons(CAPIO_MULTICAST_PORT); - multicast_addr.sin_addr.s_addr = inet_addr(CAPIO_MULTICAST_ADDRESS); - START_LOG(gettid(), "call(sleep_time=%d)", sleep_time); while (*continue_execution) { @@ -166,97 +163,14 @@ void MTCLBackend::incomingMTCLConnectionListener( _connection_threads->push_back( new std::thread(serverConnectionHandler, std::move(UserManager), connected_hostname, queue, sleep_time, terminate, incoming_request_queue)); - server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, - "Connected to " + std::string(connected_hostname)); + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + usedProtocol + ":" + + std::string(connected_hostname) + + ":" + ownPort); } else { // broadcast ADV on multicast of me being alive by sending token named selfToken - sendto(sockfd, selfToken.data(), selfToken.size(), 0, - reinterpret_cast(&multicast_addr), sizeof(multicast_addr)); - } - } - - close(sockfd); -} -void MTCLBackend::incomingUDPConnectionListener( - bool *terminate, const std::string &ownHostname, std::string ownPort, std::string usedProtocol, - std::unordered_map *> *open_connections, - std::vector *connection_threads, int thread_sleep_time, - AtomicQueue *incoming_request_queue, std::mutex *_guard) { - START_LOG(gettid(), "call()"); - - const std::string selfToken = usedProtocol + ":" + ownHostname + ":" + ownPort; - - int sockfd = socket(AF_INET, SOCK_DGRAM, 0); - - setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &REUSE_MCAST_SOCKET, sizeof(REUSE_MCAST_SOCKET)); - - timeval tv; - tv.tv_sec = 0; - tv.tv_usec = 100000; // 100,000 microseconds = 100ms - setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); - - sockaddr_in local_addr{}; - local_addr.sin_family = AF_INET; - local_addr.sin_port = htons(CAPIO_MULTICAST_PORT); - local_addr.sin_addr.s_addr = htonl(INADDR_ANY); - bind(sockfd, reinterpret_cast(&local_addr), sizeof(local_addr)); - - ip_mreq mreq{}; - mreq.imr_multiaddr.s_addr = inet_addr(CAPIO_MULTICAST_ADDRESS); - mreq.imr_interface.s_addr = htonl(INADDR_ANY); - setsockopt(sockfd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq)); - - while (!*terminate) { - char incoming_token[2 * HOST_NAME_MAX] = {0}; - - if (recvfrom(sockfd, incoming_token, sizeof(incoming_token) - 1, 0, nullptr, nullptr) <= - 0) { - continue; - } - - std::string hostname_port(incoming_token); - - if (std::string(incoming_token) == selfToken) { - LOG("Skipping to connect to self"); - continue; - } - - std::string remoteHost = - hostname_port.substr(hostname_port.find(':') + 1, // Drop proto - hostname_port.find_last_of(':') - hostname_port.find(':') - 1); - - if (open_connections->find(remoteHost) != open_connections->end()) { - LOG("Remote host %s is already connected", remoteHost.c_str()); - continue; - } - - LOG("Trying to connect on remote: %s", incoming_token); - if (auto UserManager = MTCL::Manager::connect(incoming_token); UserManager.isValid()) { - server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, - std::string("Opened connection with ") + incoming_token); - LOG("Opened connection with: %s", incoming_token); - - // send my hostname - char _ownHotname_cstr[PATH_MAX]{0}; - sprintf(_ownHotname_cstr, "%s", ownHostname.c_str()); - UserManager.send(_ownHotname_cstr, HOST_NAME_MAX); - - auto *queue = new AtomicQueue(); - { - const std::lock_guard lg(*_guard); - open_connections->insert({remoteHost, queue}); - } - connection_threads->push_back( - new std::thread(serverConnectionHandler, std::move(UserManager), remoteHost, queue, - thread_sleep_time, terminate, incoming_request_queue)); - server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + remoteHost); - } else { - server_println(CAPIO_LOG_SERVER_CLI_LEVEL_WARNING, "Warning: tried to connect to " + - std::string(remoteHost) + - " but connection is not valid"); + DiscoveryService::advertise(selfToken); } } - close(sockfd); } MTCLBackend::MTCLBackend(const std::string &proto, const std::string &port, const int sleep_time) @@ -292,11 +206,6 @@ void MTCLBackend::handshake_servers() { new std::thread(incomingMTCLConnectionListener, ownHostname, ownPort, usedProtocol, std::ref(continue_execution), thread_sleep_times, &open_connections, _guard, &connection_threads, terminate, &incoming_request_queue); - - incoming_UDP_connection_listener_thread = - new std::thread(incomingUDPConnectionListener, terminate, ownHostname, ownPort, - usedProtocol, &open_connections, &connection_threads, thread_sleep_times, - &incoming_request_queue, _guard); } MTCLBackend::~MTCLBackend() { @@ -312,11 +221,7 @@ MTCLBackend::~MTCLBackend() { pthread_cancel(incoming_MTCL_connection_listener_thread->native_handle()); incoming_MTCL_connection_listener_thread->join(); - pthread_cancel(incoming_UDP_connection_listener_thread->native_handle()); - incoming_UDP_connection_listener_thread->join(); - delete incoming_MTCL_connection_listener_thread; - delete incoming_UDP_connection_listener_thread; delete continue_execution; delete terminate; @@ -361,3 +266,57 @@ void MTCLBackend::recv_file(char *shm, const std::string &source, long int bytes const auto data = queues->pop(); memcpy(shm, data.object, bytes_expected); } + +void MTCLBackend::connect_to(const std::string &target_token) { + START_LOG(gettid(), "call(target=%s)", target_token.c_str()); + + if (std::string(target_token) == selfToken) { + LOG("Skipping to connect to self"); + return; + } + + const std::string hostname_port(target_token); + + std::string remoteHostname = hostname_port.substr( + hostname_port.find(':') + 1, // Drop proto + hostname_port.find_last_of(':') - hostname_port.find(':') - 1 // drop port + ); + + /* + * Connect to remote only if its hostname is lexically smaller than self hostname + * If current server hostname is equal to remoteHostname, avoid connection + * TODO: extend this to support also different workflows on same nodes. (NB: right now we expect + different MCAST groups ) + */ + if (ownHostname >= remoteHostname) { + return; + } + + if (open_connections.find(remoteHostname) != open_connections.end()) { + LOG("Remote host %s is already connected", remoteHostname.c_str()); + return; + } + + if (auto UserManager = MTCL::Manager::connect(target_token); UserManager.isValid()) { + LOG("Opened connection with: %s", target_token.c_str()); + + // send my hostname + char _ownHotname_cstr[PATH_MAX]{0}; + sprintf(_ownHotname_cstr, "%s", ownHostname.c_str()); + UserManager.send(_ownHotname_cstr, HOST_NAME_MAX); + + auto *queue = new AtomicQueue(); + { + const std::lock_guard lg(*_guard); + open_connections.insert({remoteHostname, queue}); + } + connection_threads.push_back( + new std::thread(serverConnectionHandler, std::move(UserManager), remoteHostname, queue, + thread_sleep_times, terminate, &incoming_request_queue)); + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + target_token); + } else { + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_WARNING, "Warning: tried to connect to " + + std::string(remoteHostname) + + " but connection is not valid"); + } +} diff --git a/capio/tests/unit/server/src/main.cpp b/capio/tests/unit/server/src/main.cpp index 28cb277e4..83be6583c 100644 --- a/capio/tests/unit/server/src/main.cpp +++ b/capio/tests/unit/server/src/main.cpp @@ -3,6 +3,7 @@ #include "capiocl.hpp" #include "capiocl/engine.h" #include "client-manager/client_manager.hpp" +#include "remote/discovery.hpp" #include "storage/manager.hpp" #include "utils/capiocl_adapter.hpp" #include "utils/location.hpp" @@ -11,6 +12,7 @@ capiocl::engine::Engine *capio_cl_engine = nullptr; StorageManager *storage_manager = nullptr; ClientManager *client_manager = nullptr; Backend *backend = nullptr; +DiscoveryService *discovery_service = nullptr; const capiocl::engine::Engine &CapioCLEngine::get() { return *capio_cl_engine; } @@ -19,15 +21,17 @@ class ServerUnitTestEnvironment : public testing::Environment { explicit ServerUnitTestEnvironment() = default; void SetUp() override { - capio_cl_engine = new capiocl::engine::Engine(false); - client_manager = new ClientManager(); - storage_manager = new StorageManager(); + capio_cl_engine = new capiocl::engine::Engine(false); + client_manager = new ClientManager(); + storage_manager = new StorageManager(); + discovery_service = new DiscoveryService(); } void TearDown() override { delete storage_manager; delete client_manager; delete capio_cl_engine; + delete discovery_service; } }; From 74dafd7c7000eaea956d6377c7c6d12b150b15d7 Mon Sep 17 00:00:00 2001 From: = Date: Tue, 14 Apr 2026 14:18:55 +0100 Subject: [PATCH 10/13] Cleanup code. TODO: munmap_chunk() error --- capio/server/include/remote/backend/mtcl.hpp | 22 ++-- capio/server/include/remote/listener.hpp | 2 + capio/server/src/mtcl_backend.cpp | 116 +++++++++---------- 3 files changed, 70 insertions(+), 70 deletions(-) diff --git a/capio/server/include/remote/backend/mtcl.hpp b/capio/server/include/remote/backend/mtcl.hpp index aabd6f83a..f7c633745 100644 --- a/capio/server/include/remote/backend/mtcl.hpp +++ b/capio/server/include/remote/backend/mtcl.hpp @@ -18,6 +18,8 @@ #include "remote/atomic_queue.hpp" #include "remote/backend.hpp" +#include + typedef unsigned long long int capio_off64_t; /** @@ -31,14 +33,16 @@ class HandleUser; // TODO: extend backend class class MTCLBackend : public Backend { - std::string selfToken, ownHostname, ownPort, usedProtocol; + int thread_sleep_times = 0; + bool continue_execution = true; + + const std::string selfToken, ownPort, usedProtocol; + + std::shared_mutex open_connections_lock; std::unordered_map *> open_connections; - int thread_sleep_times = 0; - bool *continue_execution = new bool; - std::mutex *_guard; - std::thread *incoming_MTCL_connection_listener_thread = nullptr; + + std::thread *incoming_connection = nullptr; std::vector connection_threads; - bool *terminate; AtomicQueue incoming_request_queue; @@ -55,15 +59,15 @@ class MTCLBackend : public Backend { * @param continue_execution * @param sleep_time * @param open_connections - * @param guard + * @param open_connection_guard * @param _connection_threads - * @param terminate + * @param incoming_request_queue */ void static incomingMTCLConnectionListener( const std::string &ownHostname, const std::string &ownPort, const std::string &usedProtocol, const bool *continue_execution, int sleep_time, std::unordered_map *> *open_connections, - std::mutex *guard, std::vector *_connection_threads, bool *terminate, + std::shared_mutex *open_connection_guard, std::vector *_connection_threads, AtomicQueue *incoming_request_queue); public: diff --git a/capio/server/include/remote/listener.hpp b/capio/server/include/remote/listener.hpp index cb96ec6d9..bf9a3c597 100644 --- a/capio/server/include/remote/listener.hpp +++ b/capio/server/include/remote/listener.hpp @@ -45,6 +45,8 @@ inline Backend *select_backend(const std::string &backend_name, int argc, char * LOG("backend selected: MTCL"); std::cout << CAPIO_LOG_SERVER_CLI_LEVEL_INFO << "Starting CAPIO with MTCL backend" << std::endl; + char hostname[HOST_NAME_MAX]{0}; + gethostname(hostname, HOST_NAME_MAX); return new MTCLBackend("TCP", "1234", 1000000); } diff --git a/capio/server/src/mtcl_backend.cpp b/capio/server/src/mtcl_backend.cpp index 4b92e870d..009382d70 100644 --- a/capio/server/src/mtcl_backend.cpp +++ b/capio/server/src/mtcl_backend.cpp @@ -47,15 +47,15 @@ RemoteRequest MTCLBackend::read_next_request() { * `terminate` flag is false. * @param HandlerPointer A valid MTCL HandlePointer for the connection. * @param remote_hostname The hostname of the remote endpoint. - * @param queue Pointer to the communication hub containing inbound and outbound sub-queues. + * @param queue Pointer to the communication queue containing inbound and outbound sub-queues. * @param sleep_time Microseconds to sleep between thread cycles to prevent CPU pinning. - * @param terminate Reference to a heap-allocated boolean controlled by the main thread + * @param continue_execution Reference to a boolean flag to know when to stop execution * to signal execution shutdown. * @param incoming_request_queue */ void serverConnectionHandler(MTCL::HandleUser HandlerPointer, const std::string &remote_hostname, AtomicQueue *queue, const int sleep_time, - const bool *terminate, + const bool *continue_execution, AtomicQueue *incoming_request_queue) { char ownHostname[HOST_NAME_MAX]; @@ -122,7 +122,7 @@ void serverConnectionHandler(MTCL::HandleUser HandlerPointer, const std::string } // terminate phase - if (*terminate) { + if (!*continue_execution) { LOG("[TERM PHASE] Closing connection"); HandlerPointer.close(); LOG("[TERM PHASE] Terminating thread server_connection_handler"); @@ -138,7 +138,7 @@ void MTCLBackend::incomingMTCLConnectionListener( const std::string &ownHostname, const std::string &ownPort, const std::string &usedProtocol, const bool *continue_execution, int sleep_time, std::unordered_map *> *open_connections, - std::mutex *guard, std::vector *_connection_threads, bool *terminate, + std::shared_mutex *open_connection_guard, std::vector *_connection_threads, AtomicQueue *incoming_request_queue) { std::string selfToken = usedProtocol + ":" + ownHostname + ":" + ownPort; @@ -151,21 +151,23 @@ void MTCLBackend::incomingMTCLConnectionListener( UserManager.isValid()) { // received MTCL handle LOG("Handle user is valid"); - char connected_hostname[HOST_NAME_MAX] = {0}; - UserManager.receive(connected_hostname, HOST_NAME_MAX); - LOG("Received connection hostname: %s", connected_hostname); + size_t remoteHostnameSize = -1; + UserManager.receive(&remoteHostnameSize, sizeof(remoteHostnameSize)); + auto remote_hostname = new char[remoteHostnameSize + 1]{0}; + UserManager.receive(remote_hostname, remoteHostnameSize); + LOG("Received connection hostname: %s", remote_hostname); auto *queue = new AtomicQueue(); { - const std::lock_guard lock(*guard); - open_connections->insert({connected_hostname, queue}); + const std::unique_lock lock(*open_connection_guard); + open_connections->insert({remote_hostname, queue}); } _connection_threads->push_back( - new std::thread(serverConnectionHandler, std::move(UserManager), connected_hostname, - queue, sleep_time, terminate, incoming_request_queue)); - server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + usedProtocol + ":" + - std::string(connected_hostname) + - ":" + ownPort); + new std::thread(serverConnectionHandler, std::move(UserManager), remote_hostname, + queue, sleep_time, continue_execution, incoming_request_queue)); + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected from " + usedProtocol + ":" + + std::string(remote_hostname) + ":" + + ownPort); } else { // broadcast ADV on multicast of me being alive by sending token named selfToken DiscoveryService::advertise(selfToken); @@ -174,56 +176,36 @@ void MTCLBackend::incomingMTCLConnectionListener( } MTCLBackend::MTCLBackend(const std::string &proto, const std::string &port, const int sleep_time) - : Backend(HOST_NAME_MAX), selfToken(proto + ":0.0.0.0:" + port), ownPort(port), - usedProtocol(proto), thread_sleep_times(sleep_time) { + : Backend(HOST_NAME_MAX), thread_sleep_times(sleep_time), selfToken(proto + ":0.0.0.0:" + port), + ownPort(port), usedProtocol(proto) { START_LOG(gettid(), "INFO: instance of MTCLBackend"); - terminate = new bool; - *terminate = false; - - _guard = new std::mutex(); - - ownHostname.resize(HOST_NAME_MAX, '\0'); - gethostname(ownHostname.data(), HOST_NAME_MAX); - ownHostname.resize(strnlen(ownHostname.c_str(), HOST_NAME_MAX)); - - LOG("My hostname is %s. Starting to listen on connection %s", ownHostname.c_str(), + LOG("My hostname is %s. Starting to listen on connection %s", node_name.c_str(), selfToken.c_str()); std::string hostname_id("server-"); - hostname_id += ownHostname; + hostname_id += node_name; MTCL::Manager::init(hostname_id); - *continue_execution = true; - MTCL::Manager::listen(selfToken); server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "MTCL_backend initialization completed."); } -void MTCLBackend::handshake_servers() { - incoming_MTCL_connection_listener_thread = - new std::thread(incomingMTCLConnectionListener, ownHostname, ownPort, usedProtocol, - std::ref(continue_execution), thread_sleep_times, &open_connections, _guard, - &connection_threads, terminate, &incoming_request_queue); -} - MTCLBackend::~MTCLBackend() { START_LOG(gettid(), "call()"); - *terminate = true; - *continue_execution = false; + continue_execution = false; - for (const auto thread : connection_threads) { - thread->join(); + for (const auto t : connection_threads) { + pthread_cancel(t->native_handle()); + t->join(); } LOG("Terminated connection threads"); - pthread_cancel(incoming_MTCL_connection_listener_thread->native_handle()); - incoming_MTCL_connection_listener_thread->join(); + pthread_cancel(incoming_connection->native_handle()); + incoming_connection->join(); - delete incoming_MTCL_connection_listener_thread; - delete continue_execution; - delete terminate; + delete incoming_connection; LOG("Handler closed."); @@ -232,8 +214,16 @@ MTCLBackend::~MTCLBackend() { server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "MTCL backend cleanup completed."); } +void MTCLBackend::handshake_servers() { + incoming_connection = + new std::thread(incomingMTCLConnectionListener, node_name, ownPort, usedProtocol, + &continue_execution, thread_sleep_times, &open_connections, + &open_connections_lock, &connection_threads, &incoming_request_queue); +} + const std::set MTCLBackend::get_nodes() { std::set keys; + shared_lock_guard slg(open_connections_lock); for (const auto &[hostname, _handle] : open_connections) { keys.insert(hostname); } @@ -247,6 +237,7 @@ void MTCLBackend::send_request(const char *message, const int message_len, START_LOG(gettid(), "call(target=%s, message=%s, message_len=%ld)", target.c_str(), message, message_len); + shared_lock_guard slg(open_connections_lock); const auto queues = open_connections.at(target); LOG("obtained access to queue"); @@ -257,11 +248,13 @@ void MTCLBackend::send_request(const char *message, const int message_len, void MTCLBackend::send_file(char *shm, long int nbytes, const std::string &target) { START_LOG(gettid(), "call(target=%s, nbytes=%ld)", target.c_str(), nbytes); + shared_lock_guard slg(open_connections_lock); const auto queue = open_connections.at(target); queue->push(shm, nbytes, target); } void MTCLBackend::recv_file(char *shm, const std::string &source, long int bytes_expected) { + shared_lock_guard slg(open_connections_lock); const auto queues = open_connections.at(source); const auto data = queues->pop(); memcpy(shm, data.object, bytes_expected); @@ -275,12 +268,10 @@ void MTCLBackend::connect_to(const std::string &target_token) { return; } - const std::string hostname_port(target_token); - - std::string remoteHostname = hostname_port.substr( - hostname_port.find(':') + 1, // Drop proto - hostname_port.find_last_of(':') - hostname_port.find(':') - 1 // drop port - ); + std::string remoteHostname = + target_token.substr(target_token.find(':') + 1, // Drop proto + target_token.find_last_of(':') - target_token.find(':') - 1 // drop port + ); /* * Connect to remote only if its hostname is lexically smaller than self hostname @@ -288,31 +279,34 @@ void MTCLBackend::connect_to(const std::string &target_token) { * TODO: extend this to support also different workflows on same nodes. (NB: right now we expect different MCAST groups ) */ - if (ownHostname >= remoteHostname) { + if (node_name >= remoteHostname) { return; } - if (open_connections.find(remoteHostname) != open_connections.end()) { - LOG("Remote host %s is already connected", remoteHostname.c_str()); - return; + { + shared_lock_guard slg(open_connections_lock); + if (open_connections.find(remoteHostname) != open_connections.end()) { + LOG("Remote host %s is already connected", remoteHostname.c_str()); + return; + } } if (auto UserManager = MTCL::Manager::connect(target_token); UserManager.isValid()) { LOG("Opened connection with: %s", target_token.c_str()); // send my hostname - char _ownHotname_cstr[PATH_MAX]{0}; - sprintf(_ownHotname_cstr, "%s", ownHostname.c_str()); - UserManager.send(_ownHotname_cstr, HOST_NAME_MAX); + const size_t ownHostnameLen = node_name.size(); + UserManager.send(&ownHostnameLen, sizeof(ownHostnameLen)); + UserManager.send(node_name.c_str(), ownHostnameLen); auto *queue = new AtomicQueue(); { - const std::lock_guard lg(*_guard); + const std::lock_guard lg(open_connections_lock); open_connections.insert({remoteHostname, queue}); } connection_threads.push_back( new std::thread(serverConnectionHandler, std::move(UserManager), remoteHostname, queue, - thread_sleep_times, terminate, &incoming_request_queue)); + thread_sleep_times, &continue_execution, &incoming_request_queue)); server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + target_token); } else { server_println(CAPIO_LOG_SERVER_CLI_LEVEL_WARNING, "Warning: tried to connect to " + From 1d042af74d57ba72ce33c527636d2e54d11f1958 Mon Sep 17 00:00:00 2001 From: = Date: Tue, 14 Apr 2026 14:38:59 +0100 Subject: [PATCH 11/13] Code cleanup. still problem with munmap --- capio/server/include/remote/backend/mtcl.hpp | 2 +- capio/server/src/discovery_service.cpp | 5 +++++ capio/server/src/mtcl_backend.cpp | 10 ++++------ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/capio/server/include/remote/backend/mtcl.hpp b/capio/server/include/remote/backend/mtcl.hpp index f7c633745..55b935313 100644 --- a/capio/server/include/remote/backend/mtcl.hpp +++ b/capio/server/include/remote/backend/mtcl.hpp @@ -41,7 +41,7 @@ class MTCLBackend : public Backend { std::shared_mutex open_connections_lock; std::unordered_map *> open_connections; - std::thread *incoming_connection = nullptr; + std::thread *incoming_connection_thread = nullptr; std::vector connection_threads; AtomicQueue incoming_request_queue; diff --git a/capio/server/src/discovery_service.cpp b/capio/server/src/discovery_service.cpp index 5bf856f70..065d8d2ad 100644 --- a/capio/server/src/discovery_service.cpp +++ b/capio/server/src/discovery_service.cpp @@ -143,6 +143,7 @@ void DiscoveryService::start(unsigned int adv_delay, const std::string &token, } void DiscoveryService::stop() { terminate = true; +<<<<<<< HEAD if (mcast_listener_thread != nullptr && mcast_listener_thread->joinable()) { mcast_listener_thread->join(); @@ -158,6 +159,10 @@ void DiscoveryService::stop() { advertisement_thread->join(); advertisement_thread = nullptr; } +======= + listener_thread->join(); + listener_thread = nullptr; +>>>>>>> 97ef533 (Code cleanup. still problem with munmap) } DiscoveryService::DiscoveryService(const std::string &mcast_addr, const unsigned int mcast_port) diff --git a/capio/server/src/mtcl_backend.cpp b/capio/server/src/mtcl_backend.cpp index 009382d70..9f4607cce 100644 --- a/capio/server/src/mtcl_backend.cpp +++ b/capio/server/src/mtcl_backend.cpp @@ -196,16 +196,14 @@ MTCLBackend::~MTCLBackend() { START_LOG(gettid(), "call()"); continue_execution = false; + incoming_connection_thread->join(); + for (const auto t : connection_threads) { - pthread_cancel(t->native_handle()); t->join(); } LOG("Terminated connection threads"); - pthread_cancel(incoming_connection->native_handle()); - incoming_connection->join(); - - delete incoming_connection; + delete incoming_connection_thread; LOG("Handler closed."); @@ -215,7 +213,7 @@ MTCLBackend::~MTCLBackend() { } void MTCLBackend::handshake_servers() { - incoming_connection = + incoming_connection_thread = new std::thread(incomingMTCLConnectionListener, node_name, ownPort, usedProtocol, &continue_execution, thread_sleep_times, &open_connections, &open_connections_lock, &connection_threads, &incoming_request_queue); From 357a44839e979ba450e9d14331ec09d0463977a3 Mon Sep 17 00:00:00 2001 From: marcoSanti Date: Sat, 25 Apr 2026 22:01:28 +0200 Subject: [PATCH 12/13] fixed discovery service --- capio/server/capio_server.cpp | 10 +++ capio/server/include/remote/backend/mtcl.hpp | 5 +- capio/server/src/discovery_service.cpp | 5 -- capio/server/src/mtcl_backend.cpp | 64 +++++++++++--------- 4 files changed, 47 insertions(+), 37 deletions(-) diff --git a/capio/server/capio_server.cpp b/capio/server/capio_server.cpp index 782697582..c333c492c 100644 --- a/capio/server/capio_server.cpp +++ b/capio/server/capio_server.cpp @@ -122,6 +122,7 @@ int main(int argc, char **argv) { server_println(line, "", "", ""); } +<<<<<<< HEAD const auto configuration = parseCLI(argc, argv); if (configuration.capio_cl_dynamic_config) { @@ -144,11 +145,20 @@ int main(int argc, char **argv) { discovery_service = new DiscoveryService(); backend = select_backend(configuration.backend_name, argc, argv); +======= + discovery_service = new DiscoveryService(); + + parseCLI(argc, argv); +>>>>>>> 78863f6 (fixed discovery service) START_LOG(gettid(), "call()"); open_files_location(); +<<<<<<< HEAD +======= + shm_canary = new CapioShmCanary(capio_cl_engine->getWorkflowName()); +>>>>>>> 78863f6 (fixed discovery service) storage_manager = new StorageManager(); client_manager = new ClientManager(); diff --git a/capio/server/include/remote/backend/mtcl.hpp b/capio/server/include/remote/backend/mtcl.hpp index 55b935313..c90da12ba 100644 --- a/capio/server/include/remote/backend/mtcl.hpp +++ b/capio/server/include/remote/backend/mtcl.hpp @@ -53,7 +53,6 @@ class MTCLBackend : public Backend { * parameter, then the method will issue an advertisement on UDP multicast of its alive state * so that other servers may instantiate a new connection with me. * - * @param ownHostname * @param ownPort * @param usedProtocol * @param continue_execution @@ -64,8 +63,8 @@ class MTCLBackend : public Backend { * @param incoming_request_queue */ void static incomingMTCLConnectionListener( - const std::string &ownHostname, const std::string &ownPort, const std::string &usedProtocol, - const bool *continue_execution, int sleep_time, + const std::string &ownPort, const std::string &usedProtocol, const bool *continue_execution, + int sleep_time, std::unordered_map *> *open_connections, std::shared_mutex *open_connection_guard, std::vector *_connection_threads, AtomicQueue *incoming_request_queue); diff --git a/capio/server/src/discovery_service.cpp b/capio/server/src/discovery_service.cpp index 065d8d2ad..5bf856f70 100644 --- a/capio/server/src/discovery_service.cpp +++ b/capio/server/src/discovery_service.cpp @@ -143,7 +143,6 @@ void DiscoveryService::start(unsigned int adv_delay, const std::string &token, } void DiscoveryService::stop() { terminate = true; -<<<<<<< HEAD if (mcast_listener_thread != nullptr && mcast_listener_thread->joinable()) { mcast_listener_thread->join(); @@ -159,10 +158,6 @@ void DiscoveryService::stop() { advertisement_thread->join(); advertisement_thread = nullptr; } -======= - listener_thread->join(); - listener_thread = nullptr; ->>>>>>> 97ef533 (Code cleanup. still problem with munmap) } DiscoveryService::DiscoveryService(const std::string &mcast_addr, const unsigned int mcast_port) diff --git a/capio/server/src/mtcl_backend.cpp b/capio/server/src/mtcl_backend.cpp index 9f4607cce..b394ad855 100644 --- a/capio/server/src/mtcl_backend.cpp +++ b/capio/server/src/mtcl_backend.cpp @@ -53,7 +53,7 @@ RemoteRequest MTCLBackend::read_next_request() { * to signal execution shutdown. * @param incoming_request_queue */ -void serverConnectionHandler(MTCL::HandleUser HandlerPointer, const std::string &remote_hostname, +void serverConnectionHandler(MTCL::HandleUser HandlerPointer, const std::string remote_hostname, AtomicQueue *queue, const int sleep_time, const bool *continue_execution, AtomicQueue *incoming_request_queue) { @@ -135,14 +135,11 @@ void serverConnectionHandler(MTCL::HandleUser HandlerPointer, const std::string } void MTCLBackend::incomingMTCLConnectionListener( - const std::string &ownHostname, const std::string &ownPort, const std::string &usedProtocol, - const bool *continue_execution, int sleep_time, - std::unordered_map *> *open_connections, + const std::string &ownPort, const std::string &usedProtocol, const bool *continue_execution, + int sleep_time, std::unordered_map *> *open_connections, std::shared_mutex *open_connection_guard, std::vector *_connection_threads, AtomicQueue *incoming_request_queue) { - std::string selfToken = usedProtocol + ":" + ownHostname + ":" + ownPort; - START_LOG(gettid(), "call(sleep_time=%d)", sleep_time); while (*continue_execution) { @@ -152,25 +149,30 @@ void MTCLBackend::incomingMTCLConnectionListener( // received MTCL handle LOG("Handle user is valid"); size_t remoteHostnameSize = -1; - UserManager.receive(&remoteHostnameSize, sizeof(remoteHostnameSize)); - auto remote_hostname = new char[remoteHostnameSize + 1]{0}; - UserManager.receive(remote_hostname, remoteHostnameSize); - LOG("Received connection hostname: %s", remote_hostname); + if (UserManager.receive(&remoteHostnameSize, sizeof(remoteHostnameSize)) <= 0 || + remoteHostnameSize == 0 || remoteHostnameSize > HOST_NAME_MAX) { + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_WARNING, + "Remote hostname size received is zero or negative"); + UserManager.close(); + continue; + } + + std::string remote_hostname(remoteHostnameSize, '\0'); + UserManager.receive(remote_hostname.data(), remoteHostnameSize); + LOG("Received connection hostname: %s", remote_hostname.c_str()); auto *queue = new AtomicQueue(); { const std::unique_lock lock(*open_connection_guard); - open_connections->insert({remote_hostname, queue}); + (*open_connections)[remote_hostname] = queue; } - _connection_threads->push_back( - new std::thread(serverConnectionHandler, std::move(UserManager), remote_hostname, - queue, sleep_time, continue_execution, incoming_request_queue)); - server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected from " + usedProtocol + ":" + - std::string(remote_hostname) + ":" + - ownPort); - } else { - // broadcast ADV on multicast of me being alive by sending token named selfToken - DiscoveryService::advertise(selfToken); + // _connection_threads->push_back( + // new std::thread(serverConnectionHandler, std::move(UserManager), + // remote_hostname, + // queue, sleep_time, continue_execution, incoming_request_queue)); + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + usedProtocol + ":" + + remote_hostname + ":" + ownPort + + " (incoming)"); } } } @@ -188,8 +190,10 @@ MTCLBackend::MTCLBackend(const std::string &proto, const std::string &port, cons MTCL::Manager::init(hostname_id); MTCL::Manager::listen(selfToken); - server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "MTCL_backend initialization completed."); + + discovery_service->setAdvertisementToken(usedProtocol + ":" + node_name + ":" + ownPort); + discovery_service->start(1000); } MTCLBackend::~MTCLBackend() { @@ -214,9 +218,9 @@ MTCLBackend::~MTCLBackend() { void MTCLBackend::handshake_servers() { incoming_connection_thread = - new std::thread(incomingMTCLConnectionListener, node_name, ownPort, usedProtocol, - &continue_execution, thread_sleep_times, &open_connections, - &open_connections_lock, &connection_threads, &incoming_request_queue); + new std::thread(incomingMTCLConnectionListener, ownPort, usedProtocol, &continue_execution, + thread_sleep_times, &open_connections, &open_connections_lock, + &connection_threads, &incoming_request_queue); } const std::set MTCLBackend::get_nodes() { @@ -300,12 +304,14 @@ void MTCLBackend::connect_to(const std::string &target_token) { auto *queue = new AtomicQueue(); { const std::lock_guard lg(open_connections_lock); - open_connections.insert({remoteHostname, queue}); + open_connections[remoteHostname] = queue; } - connection_threads.push_back( - new std::thread(serverConnectionHandler, std::move(UserManager), remoteHostname, queue, - thread_sleep_times, &continue_execution, &incoming_request_queue)); - server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + target_token); + // connection_threads.push_back( + // new std::thread(serverConnectionHandler, std::move(UserManager), remoteHostname, + // queue, + // thread_sleep_times, &continue_execution, &incoming_request_queue)); + server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, + "Connected to " + target_token + " (outgoing)"); } else { server_println(CAPIO_LOG_SERVER_CLI_LEVEL_WARNING, "Warning: tried to connect to " + std::string(remoteHostname) + From 37df7602dd6b5dac1b062a3d2da25762482a2ee6 Mon Sep 17 00:00:00 2001 From: marcoSanti Date: Sun, 26 Apr 2026 11:43:53 +0200 Subject: [PATCH 13/13] WIP --- capio/server/capio_server.cpp | 10 ---------- capio/server/src/mpi_backend.cpp | 2 -- capio/server/src/mtcl_backend.cpp | 19 +++++++------------ 3 files changed, 7 insertions(+), 24 deletions(-) diff --git a/capio/server/capio_server.cpp b/capio/server/capio_server.cpp index c333c492c..782697582 100644 --- a/capio/server/capio_server.cpp +++ b/capio/server/capio_server.cpp @@ -122,7 +122,6 @@ int main(int argc, char **argv) { server_println(line, "", "", ""); } -<<<<<<< HEAD const auto configuration = parseCLI(argc, argv); if (configuration.capio_cl_dynamic_config) { @@ -145,20 +144,11 @@ int main(int argc, char **argv) { discovery_service = new DiscoveryService(); backend = select_backend(configuration.backend_name, argc, argv); -======= - discovery_service = new DiscoveryService(); - - parseCLI(argc, argv); ->>>>>>> 78863f6 (fixed discovery service) START_LOG(gettid(), "call()"); open_files_location(); -<<<<<<< HEAD -======= - shm_canary = new CapioShmCanary(capio_cl_engine->getWorkflowName()); ->>>>>>> 78863f6 (fixed discovery service) storage_manager = new StorageManager(); client_manager = new ClientManager(); diff --git a/capio/server/src/mpi_backend.cpp b/capio/server/src/mpi_backend.cpp index c1deee928..e62e2013f 100644 --- a/capio/server/src/mpi_backend.cpp +++ b/capio/server/src/mpi_backend.cpp @@ -119,12 +119,10 @@ void MPIBackend::recv_file(char *shm, const std::string &source, long int bytes_ LOG("Chunk size is %ld bytes", bytes_received); } } -void MPIBackend::connect_to(const std::string &target) { return; } void MPIBackend::connect_to(const std::string &target) { START_LOG(gettid(), "call(target=%s)", target.c_str()); LOG("connect_to called on backend that is not dynamic. ignoring call"); - return; } MPISYNCBackend::MPISYNCBackend(int argc, char *argv[]) : MPIBackend(argc, argv) { diff --git a/capio/server/src/mtcl_backend.cpp b/capio/server/src/mtcl_backend.cpp index b394ad855..95bb88489 100644 --- a/capio/server/src/mtcl_backend.cpp +++ b/capio/server/src/mtcl_backend.cpp @@ -53,7 +53,7 @@ RemoteRequest MTCLBackend::read_next_request() { * to signal execution shutdown. * @param incoming_request_queue */ -void serverConnectionHandler(MTCL::HandleUser HandlerPointer, const std::string remote_hostname, +void serverConnectionHandler(MTCL::HandleUser HandlerPointer, const std::string &remote_hostname, AtomicQueue *queue, const int sleep_time, const bool *continue_execution, AtomicQueue *incoming_request_queue) { @@ -166,10 +166,9 @@ void MTCLBackend::incomingMTCLConnectionListener( const std::unique_lock lock(*open_connection_guard); (*open_connections)[remote_hostname] = queue; } - // _connection_threads->push_back( - // new std::thread(serverConnectionHandler, std::move(UserManager), - // remote_hostname, - // queue, sleep_time, continue_execution, incoming_request_queue)); + _connection_threads->push_back( + new std::thread(serverConnectionHandler, std::move(UserManager), remote_hostname, + queue, sleep_time, continue_execution, incoming_request_queue)); server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + usedProtocol + ":" + remote_hostname + ":" + ownPort + " (incoming)"); @@ -191,9 +190,6 @@ MTCLBackend::MTCLBackend(const std::string &proto, const std::string &port, cons MTCL::Manager::listen(selfToken); server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "MTCL_backend initialization completed."); - - discovery_service->setAdvertisementToken(usedProtocol + ":" + node_name + ":" + ownPort); - discovery_service->start(1000); } MTCLBackend::~MTCLBackend() { @@ -306,10 +302,9 @@ void MTCLBackend::connect_to(const std::string &target_token) { const std::lock_guard lg(open_connections_lock); open_connections[remoteHostname] = queue; } - // connection_threads.push_back( - // new std::thread(serverConnectionHandler, std::move(UserManager), remoteHostname, - // queue, - // thread_sleep_times, &continue_execution, &incoming_request_queue)); + connection_threads.push_back( + new std::thread(serverConnectionHandler, std::move(UserManager), remoteHostname, queue, + thread_sleep_times, &continue_execution, &incoming_request_queue)); server_println(CAPIO_LOG_SERVER_CLI_LEVEL_INFO, "Connected to " + target_token + " (outgoing)"); } else {