From cb46f24e1ba692b06f6a825edd91b34cabbd02ff Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Fri, 6 Mar 2026 21:12:11 +0000 Subject: [PATCH 01/13] Implement remove and remove_if algorithms for datapar execution Signed-off-by: BhoomishGupta --- libs/core/algorithms/CMakeLists.txt | 2 + .../hpx/parallel/algorithms/detail/remove.hpp | 91 +++++++++++ .../hpx/parallel/algorithms/remove.hpp | 41 ++--- .../include/hpx/parallel/datapar.hpp | 1 + .../include/hpx/parallel/datapar/remove.hpp | 114 ++++++++++++++ .../unit/datapar_algorithms/CMakeLists.txt | 2 + .../datapar_algorithms/remove_datapar.cpp | 126 ++++++++++++++++ .../datapar_algorithms/remove_if_datapar.cpp | 142 ++++++++++++++++++ 8 files changed, 491 insertions(+), 28 deletions(-) create mode 100644 libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp create mode 100644 libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp create mode 100644 libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp create mode 100644 libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp diff --git a/libs/core/algorithms/CMakeLists.txt b/libs/core/algorithms/CMakeLists.txt index 1b880638f24d..50ad4b41a698 100644 --- a/libs/core/algorithms/CMakeLists.txt +++ b/libs/core/algorithms/CMakeLists.txt @@ -39,6 +39,7 @@ set(algorithms_headers hpx/parallel/algorithms/detail/pivot.hpp hpx/parallel/algorithms/detail/reduce.hpp hpx/parallel/algorithms/detail/reduce_deterministic.hpp + hpx/parallel/algorithms/detail/remove.hpp hpx/parallel/algorithms/detail/replace.hpp hpx/parallel/algorithms/detail/rfa.hpp hpx/parallel/algorithms/detail/rotate.hpp @@ -239,6 +240,7 @@ if(HPX_WITH_DATAPAR) hpx/parallel/datapar/loop.hpp hpx/parallel/datapar/mismatch.hpp hpx/parallel/datapar/reduce.hpp + hpx/parallel/datapar/remove.hpp hpx/parallel/datapar/replace.hpp hpx/parallel/datapar/search.hpp hpx/parallel/datapar/transfer.hpp diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp new file mode 100644 index 000000000000..d93cb94a4029 --- /dev/null +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp @@ -0,0 +1,91 @@ +// Copyright (c) 2026 Bhoomish Gupta +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace hpx::parallel::detail { + + /////////////////////////////////////////////////////////////////////////// + HPX_CXX_CORE_EXPORT template + struct sequential_remove_if_t final + : hpx::functional::detail::tag_fallback< + sequential_remove_if_t> + { + private: + template + friend constexpr Iter tag_fallback_invoke( + sequential_remove_if_t, ExPolicy&&, Iter first, Sent last, + Pred pred, Proj proj) + { + first = hpx::parallel::detail::sequential_find_if( + first, last, pred, proj); + + if (first != last) + { + for (Iter i = first; ++i != last;) + if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *i))) + { + *first++ = HPX_MOVE(*i); + } + } + return first; + } + }; + +#if !defined(HPX_COMPUTE_DEVICE_CODE) + HPX_CXX_CORE_EXPORT template + inline constexpr sequential_remove_if_t sequential_remove_if = + sequential_remove_if_t{}; +#else + HPX_CXX_CORE_EXPORT template + HPX_HOST_DEVICE HPX_FORCEINLINE auto sequential_remove_if(Args&&... args) + { + return sequential_remove_if_t{}(std::forward(args)...); + } +#endif + + /////////////////////////////////////////////////////////////////////////// + HPX_CXX_CORE_EXPORT template + struct sequential_remove_t final + : hpx::functional::detail::tag_fallback> + { + private: + template + friend constexpr Iter tag_fallback_invoke(sequential_remove_t, + ExPolicy&& policy, Iter first, Sent last, T const& value, + Proj proj) + { + return sequential_remove_if( + HPX_FORWARD(ExPolicy, policy), first, last, + [&value](auto const& a) { return value == a; }, + proj); + } + }; + +#if !defined(HPX_COMPUTE_DEVICE_CODE) + HPX_CXX_CORE_EXPORT template + inline constexpr sequential_remove_t sequential_remove = + sequential_remove_t{}; +#else + HPX_CXX_CORE_EXPORT template + HPX_HOST_DEVICE HPX_FORCEINLINE auto sequential_remove(Args&&... args) + { + return sequential_remove_t{}(std::forward(args)...); + } +#endif + +} // namespace hpx::parallel::detail diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp index 987066a5880b..6dcd9d8bd72e 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp @@ -223,6 +223,7 @@ namespace hpx { #include #include #include +#include #include #include #include @@ -243,25 +244,6 @@ namespace hpx::parallel { namespace detail { /// \cond NOINTERNAL - HPX_CXX_CORE_EXPORT template - constexpr Iter sequential_remove_if( - Iter first, Sent last, Pred pred, Proj proj) - { - first = hpx::parallel::detail::sequential_find_if< - hpx::execution::sequenced_policy>(first, last, pred, proj); - - if (first != last) - { - for (Iter i = first; ++i != last;) - if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *i))) - { - *first++ = std::ranges::iter_move(i); - } - } - return first; - } - HPX_CXX_CORE_EXPORT template struct remove_if : public algorithm, FwdIter> { @@ -273,9 +255,11 @@ namespace hpx::parallel { template static constexpr Iter sequential( - ExPolicy, Iter first, Sent last, Pred&& pred, Proj&& proj) + ExPolicy&& policy, Iter first, Sent last, Pred&& pred, + Proj&& proj) { - return sequential_remove_if(first, last, + return sequential_remove_if( + HPX_FORWARD(ExPolicy, policy), first, last, HPX_FORWARD(Pred, pred), HPX_FORWARD(Proj, proj)); } @@ -306,12 +290,16 @@ namespace hpx::parallel { // Note: replacing the invoke() with HPX_INVOKE() // below makes gcc generate errors + using inner_policy_type = + decltype(hpx::execution::experimental::to_non_simd( + std::declval>())); + auto f1 = [pred = HPX_FORWARD(Pred, pred), proj = HPX_FORWARD(Proj, proj)]( zip_iterator part_begin, std::size_t part_size) -> void { // MSVC complains if pred or proj is captured by ref below - util::loop_n>(part_begin, part_size, + util::loop_n(part_begin, part_size, [pred, proj](zip_iterator it) mutable { bool f = hpx::invoke( pred, hpx::invoke(proj, get<0>(*it))); @@ -325,11 +313,10 @@ namespace hpx::parallel { auto dest = first; auto part_size = count; - using execution_policy_type = std::decay_t; if (dest == get<0>(part_begin.get_iterator_tuple())) { // Self-assignment must be detected. - util::loop_n( + util::loop_n( part_begin, part_size, [&dest](zip_iterator it) { if (!get<1>(*it)) { @@ -344,7 +331,7 @@ namespace hpx::parallel { else { // Self-assignment can't be performed. - util::loop_n( + util::loop_n( part_begin, part_size, [&dest](zip_iterator it) { if (!get<1>(*it)) *dest++ = std::ranges::iter_move( @@ -446,10 +433,8 @@ namespace hpx { friend decltype(auto) tag_fallback_invoke(hpx::remove_t, ExPolicy&& policy, FwdIter first, FwdIter last, T const& value) { - using Type = typename std::iterator_traits::value_type; - return hpx::remove_if(HPX_FORWARD(ExPolicy, policy), first, last, - [value](Type const& a) -> bool { return value == a; }); + [value](auto const& a) { return value == a; }); } } remove{}; } // namespace hpx diff --git a/libs/core/algorithms/include/hpx/parallel/datapar.hpp b/libs/core/algorithms/include/hpx/parallel/datapar.hpp index 3ae490748c73..c20f8ebccd78 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp new file mode 100644 index 000000000000..d0b9997001e3 --- /dev/null +++ b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp @@ -0,0 +1,114 @@ +// Copyright (c) 2026 Bhoomish Gupta +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include + +#if defined(HPX_HAVE_DATAPAR) +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace hpx::parallel::detail { + + /////////////////////////////////////////////////////////////////////////// + HPX_CXX_CORE_EXPORT template + struct datapar_remove_if + { + template + static inline Iter call( + ExPolicy&& policy, Iter first, Sent last, Pred pred, Proj proj) + { + first = hpx::parallel::detail::sequential_find_if( + first, last, pred, proj); + + if (first != last) + { + for (Iter i = first; ++i != last;) + if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *i))) + { + *first++ = HPX_MOVE(*i); + } + } + return first; + } + }; + + HPX_CXX_CORE_EXPORT template + requires(hpx::is_vectorpack_execution_policy_v) + HPX_HOST_DEVICE HPX_FORCEINLINE Iter tag_invoke( + sequential_remove_if_t, ExPolicy&& policy, Iter first, + Sent last, Pred pred, Proj proj) + { + if constexpr (hpx::parallel::util::detail::iterator_datapar_compatible< + Iter>::value) + { + return datapar_remove_if::call( + HPX_FORWARD(ExPolicy, policy), first, last, pred, proj); + } + else + { + using base_policy_type = + decltype((hpx::execution::experimental::to_non_simd( + std::declval()))); + return sequential_remove_if( + hpx::execution::experimental::to_non_simd(policy), first, last, + pred, proj); + } + } + + /////////////////////////////////////////////////////////////////////////// + HPX_CXX_CORE_EXPORT template + struct datapar_remove + { + template + static inline Iter call( + ExPolicy&& policy, Iter first, Sent last, T const& value, + Proj proj) + { + return datapar_remove_if::call( + HPX_FORWARD(ExPolicy, policy), first, last, + [&value](auto const& a) { return value == a; }, + proj); + } + }; + + HPX_CXX_CORE_EXPORT template + requires(hpx::is_vectorpack_execution_policy_v) + HPX_HOST_DEVICE HPX_FORCEINLINE Iter tag_invoke( + sequential_remove_t, ExPolicy&& policy, Iter first, + Sent last, T const& value, Proj proj) + { + if constexpr (hpx::parallel::util::detail::iterator_datapar_compatible< + Iter>::value) + { + return datapar_remove::call( + HPX_FORWARD(ExPolicy, policy), first, last, value, proj); + } + else + { + using base_policy_type = + decltype((hpx::execution::experimental::to_non_simd( + std::declval()))); + return sequential_remove( + hpx::execution::experimental::to_non_simd(policy), first, last, + value, proj); + } + } + +} // namespace hpx::parallel::detail + +#endif diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt b/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt index 9d4f949a5652..c4a75fac1e4b 100644 --- a/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt @@ -36,6 +36,8 @@ if(HPX_WITH_DATAPAR) mismatch_datapar none_of_datapar reduce_datapar + remove_datapar + remove_if_datapar replace_copy_if_datapar replace_copy_datapar replace_datapar diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp new file mode 100644 index 000000000000..3929e4b906e4 --- /dev/null +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp @@ -0,0 +1,126 @@ +// Copyright (c) 2026 Bhoomish Gupta +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../algorithms/test_utils.hpp" + +/////////////////////////////////////////////////////////////////////////////// +template +void test_remove(ExPolicy policy, IteratorTag) +{ + static_assert(hpx::is_execution_policy::value, + "hpx::is_execution_policy::value"); + + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::begin(d)); + + std::size_t idx = std::rand() % c.size(); //-V104 + int value = c[idx]; + + auto result = hpx::remove( + policy, iterator(std::begin(c)), iterator(std::end(c)), value); + auto solution = std::remove(std::begin(d), std::end(d), value); + + bool equality = + test::equal(std::begin(c), result.base(), std::begin(d), solution); + + HPX_TEST(equality); +} + +template +void test_remove_async(ExPolicy p, IteratorTag) +{ + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::begin(d)); + + std::size_t idx = std::rand() % c.size(); + int value = c[idx]; + + auto f = hpx::remove( + p, iterator(std::begin(c)), iterator(std::end(c)), value); + auto result = f.get(); + auto solution = std::remove(std::begin(d), std::end(d), value); + + bool equality = + test::equal(std::begin(c), result.base(), std::begin(d), solution); + + HPX_TEST(equality); +} + +template +void test_remove() +{ + using namespace hpx::execution; + test_remove(simd, IteratorTag()); + test_remove(par_simd, IteratorTag()); + + test_remove_async(simd(task), IteratorTag()); + test_remove_async(par_simd(task), IteratorTag()); +} + +void remove_test() +{ + test_remove(); + test_remove(); +} + +int hpx_main(hpx::program_options::variables_map& vm) +{ + unsigned int seed = (unsigned int) std::time(nullptr); + if (vm.count("seed")) + seed = vm["seed"].as(); + + std::cout << "using seed: " << seed << std::endl; + std::srand(seed); + + remove_test(); + return hpx::local::finalize(); +} + +int main(int argc, char* argv[]) +{ + // add command line option which controls the random number generator seed + using namespace hpx::program_options; + options_description desc_commandline( + "Usage: " HPX_APPLICATION_STRING " [options]"); + + desc_commandline.add_options()("seed,s", value(), + "the random number generator seed to use for this run"); + + // By default this test should run on all available cores + std::vector const cfg = {"hpx.os_threads=all"}; + + // Initialize and run HPX + hpx::local::init_params init_args; + init_args.desc_cmdline = desc_commandline; + init_args.cfg = cfg; + + HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0, + "HPX main exited with non-zero status"); + + return hpx::util::report_errors(); +} diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp new file mode 100644 index 000000000000..4c29cc112732 --- /dev/null +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp @@ -0,0 +1,142 @@ +// Copyright (c) 2026 Bhoomish Gupta +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../algorithms/test_utils.hpp" + +//////////////////////////////////////////////////////////////////////////// +struct equal_f +{ + equal_f(int val) + : val_(val) + { + } + + template + auto operator()(T lhs) const + { + return lhs == T(val_); + } + + int val_; +}; + +template +void test_remove_if(ExPolicy policy, IteratorTag) +{ + static_assert(hpx::is_execution_policy::value, + "hpx::is_execution_policy::value"); + + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::begin(d)); + + std::size_t idx = std::rand() % c.size(); + + auto result = hpx::remove_if(policy, iterator(std::begin(c)), + iterator(std::end(c)), equal_f(c[idx])); + auto solution = + std::remove_if(std::begin(d), std::end(d), equal_f(d[idx])); + + bool equality = + test::equal(std::begin(c), result.base(), std::begin(d), solution); + + HPX_TEST(equality); +} + +template +void test_remove_if_async(ExPolicy p, IteratorTag) +{ + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::begin(d)); + + std::size_t idx = std::rand() % c.size(); + + auto f = hpx::remove_if( + p, iterator(std::begin(c)), iterator(std::end(c)), equal_f(c[idx])); + auto result = f.get(); + auto solution = + std::remove_if(std::begin(d), std::end(d), equal_f(d[idx])); + + bool equality = + test::equal(std::begin(c), result.base(), std::begin(d), solution); + + HPX_TEST(equality); +} + +template +void test_remove_if() +{ + using namespace hpx::execution; + test_remove_if(simd, IteratorTag()); + test_remove_if(par_simd, IteratorTag()); + + test_remove_if_async(simd(task), IteratorTag()); + test_remove_if_async(par_simd(task), IteratorTag()); +} + +void remove_if_test() +{ + test_remove_if(); + test_remove_if(); +} + +int hpx_main(hpx::program_options::variables_map& vm) +{ + unsigned int seed = (unsigned int) std::time(nullptr); + if (vm.count("seed")) + seed = vm["seed"].as(); + + std::cout << "using seed: " << seed << std::endl; + std::srand(seed); + + remove_if_test(); + return hpx::local::finalize(); +} + +int main(int argc, char* argv[]) +{ + // add command line option which controls the random number generator seed + using namespace hpx::program_options; + options_description desc_commandline( + "Usage: " HPX_APPLICATION_STRING " [options]"); + + desc_commandline.add_options()("seed,s", value(), + "the random number generator seed to use for this run"); + + // By default this test should run on all available cores + std::vector const cfg = {"hpx.os_threads=all"}; + + // Initialize and run HPX + hpx::local::init_params init_args; + init_args.desc_cmdline = desc_commandline; + init_args.cfg = cfg; + + HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0, + "HPX main exited with non-zero status"); + + return hpx::util::report_errors(); +} From b2b28ff054e2a6dd40d4e0aa48575a61608ab1f8 Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Fri, 20 Mar 2026 06:00:51 +0530 Subject: [PATCH 02/13] Data Parallelism in datapar_remove_if implementation Signed-off-by: BhoomishGupta --- .../include/hpx/parallel/datapar/remove.hpp | 85 ++++++++++++++++--- .../datapar/execution_policy_mappings.hpp | 54 ------------ .../executors/execution_policy_mappings.hpp | 42 +++++++++ 3 files changed, 116 insertions(+), 65 deletions(-) diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp index d0b9997001e3..e2967f4dee0a 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2026 Bhoomish Gupta +// Copyright (c) 2025 Bhoomish Gupta // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -12,36 +12,99 @@ #include #include #include -#include #include #include #include +#include +#include +#include #include #include namespace hpx::parallel::detail { /////////////////////////////////////////////////////////////////////////// + HPX_CXX_CORE_EXPORT template struct datapar_remove_if { template static inline Iter call( - ExPolicy&& policy, Iter first, Sent last, Pred pred, Proj proj) + ExPolicy&&, Iter first, Sent last, Pred pred, Proj proj) { - first = hpx::parallel::detail::sequential_find_if( - first, last, pred, proj); + using value_type = + typename std::iterator_traits::value_type; + using V = hpx::parallel::traits::vector_pack_type_t; + constexpr std::size_t size = + hpx::parallel::traits::vector_pack_size_v; + + Iter dest = first; - if (first != last) + while (first != last && !util::detail::is_data_aligned(first)) { - for (Iter i = first; ++i != last;) - if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *i))) + if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *first))) + { + if (dest != first) + *dest = HPX_MOVE(*first); + ++dest; + } + ++first; + } + + while (last - first >= static_cast(size)) //Safety + { + V tmp(hpx::parallel::traits::vector_pack_load::aligned(first)); + + auto msk = HPX_INVOKE(pred, HPX_INVOKE(proj, tmp)); + + if (hpx::parallel::traits::none_of(msk)) + { + //no elements match + if (dest != first) { - *first++ = HPX_MOVE(*i); + if (util::detail::is_data_aligned(dest)) + { + hpx::parallel::traits::vector_pack_store::aligned(tmp, dest); + } + else + { + hpx::parallel::traits::vector_pack_store::unaligned(tmp, dest); + } } + std::advance(dest, size); + } + else if (!hpx::parallel::traits::all_of(msk)) + { + //mixed + for (std::size_t i = 0; i < size; ++i) + { + if (!hpx::parallel::traits::get(msk, i)) + { + *dest++ = value_type( + hpx::parallel::traits::get(tmp, i)); + } + } + } + //all elements match + std::advance(first, size); + } + + while (first != last) + { + if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *first))) + { + if (dest != first) + *dest = HPX_MOVE(*first); + ++dest; + } + ++first; } - return first; + + return dest; } }; @@ -80,7 +143,7 @@ namespace hpx::parallel::detail { { return datapar_remove_if::call( HPX_FORWARD(ExPolicy, policy), first, last, - [&value](auto const& a) { return value == a; }, + [&value](auto const& a) { return a == value; }, proj); } }; diff --git a/libs/core/executors/include/hpx/executors/datapar/execution_policy_mappings.hpp b/libs/core/executors/include/hpx/executors/datapar/execution_policy_mappings.hpp index fbb23b4558b9..33dd89edb1b6 100644 --- a/libs/core/executors/include/hpx/executors/datapar/execution_policy_mappings.hpp +++ b/libs/core/executors/include/hpx/executors/datapar/execution_policy_mappings.hpp @@ -12,58 +12,4 @@ #if defined(HPX_HAVE_DATAPAR) #include -#include -#include -#include - -#include -#include - -namespace hpx::execution::experimental { - - /////////////////////////////////////////////////////////////////////////// - // Return the matching non-simd (vectorpack) execution policy - HPX_CXX_CORE_EXPORT inline constexpr struct to_non_simd_t final - : hpx::functional::detail::tag_fallback - { - private: - // any non-simd policy just returns itself - template - friend constexpr decltype(auto) tag_fallback_invoke( - to_non_simd_t, ExPolicy&& policy) noexcept - { - static_assert(!hpx::is_vectorpack_execution_policy_v, - "must not be a simd (vectorpack) execution policy"); - return std::forward(policy); - } - } to_non_simd{}; - - template <> - struct is_execution_policy_mapping : std::true_type - { - }; - - // Return the matching simd (vectorpack) execution policy - HPX_CXX_CORE_EXPORT inline constexpr struct to_simd_t final - : hpx::functional::detail::tag_fallback - { - private: - // any simd policy just returns itself - - template - friend constexpr decltype(auto) tag_fallback_invoke( - to_simd_t, ExPolicy&& policy) noexcept - { - static_assert(hpx::is_vectorpack_execution_policy_v, - "must be a simd (vectorpack) execution policy"); - return std::forward(policy); - } - } to_simd{}; - - template <> - struct is_execution_policy_mapping : std::true_type - { - }; -} // namespace hpx::execution::experimental - #endif diff --git a/libs/core/executors/include/hpx/executors/execution_policy_mappings.hpp b/libs/core/executors/include/hpx/executors/execution_policy_mappings.hpp index 36dc382d1671..663c3d7a67b7 100644 --- a/libs/core/executors/include/hpx/executors/execution_policy_mappings.hpp +++ b/libs/core/executors/include/hpx/executors/execution_policy_mappings.hpp @@ -156,4 +156,46 @@ namespace hpx::execution::experimental { struct is_execution_policy_mapping : std::true_type { }; + + HPX_CXX_CORE_EXPORT inline constexpr struct to_non_simd_t final + : hpx::functional::detail::tag_fallback + { + private: + // any non-simd policy just returns itself + template + friend constexpr decltype(auto) tag_fallback_invoke( + to_non_simd_t, ExPolicy&& policy) noexcept + { + static_assert(!hpx::is_vectorpack_execution_policy_v, + "must not be a simd (vectorpack) execution policy"); + return std::forward(policy); + } + } to_non_simd{}; + + template <> + struct is_execution_policy_mapping : std::true_type + { + }; + + // Return the matching simd (vectorpack) execution policy + HPX_CXX_CORE_EXPORT inline constexpr struct to_simd_t final + : hpx::functional::detail::tag_fallback + { + private: + // any simd policy just returns itself + + template + friend constexpr decltype(auto) tag_fallback_invoke( + to_simd_t, ExPolicy&& policy) noexcept + { + static_assert(hpx::is_vectorpack_execution_policy_v, + "must be a simd (vectorpack) execution policy"); + return std::forward(policy); + } + } to_simd{}; + + template <> + struct is_execution_policy_mapping : std::true_type + { + }; } // namespace hpx::execution::experimental From 255c9b487f653e42a99e31c075ebaa78662402e6 Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Tue, 31 Mar 2026 09:19:21 +0530 Subject: [PATCH 03/13] improve simd remove algorithm implementation Signed-off-by: BhoomishGupta --- .../hpx/parallel/algorithms/detail/remove.hpp | 14 ++--- .../hpx/parallel/algorithms/remove.hpp | 35 ++++++------ .../include/hpx/parallel/datapar/remove.hpp | 42 ++++++++++----- .../datapar_algorithms/remove_datapar.cpp | 6 +-- .../datapar_algorithms/remove_if_datapar.cpp | 10 ++-- .../detail/simd/vector_pack_load_store.hpp | 10 +++- .../traits/vector_pack_conditionals.hpp | 4 +- .../datapar/execution_policy_mappings.hpp | 54 +++++++++++++++++++ .../executors/execution_policy_mappings.hpp | 42 --------------- 9 files changed, 122 insertions(+), 95 deletions(-) diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp index d93cb94a4029..7a57d9c5a0b7 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp @@ -22,14 +22,12 @@ namespace hpx::parallel::detail { /////////////////////////////////////////////////////////////////////////// HPX_CXX_CORE_EXPORT template struct sequential_remove_if_t final - : hpx::functional::detail::tag_fallback< - sequential_remove_if_t> + : hpx::functional::detail::tag_fallback> { private: template - friend constexpr Iter tag_fallback_invoke( - sequential_remove_if_t, ExPolicy&&, Iter first, Sent last, - Pred pred, Proj proj) + friend constexpr Iter tag_fallback_invoke(sequential_remove_if_t, + ExPolicy&&, Iter first, Sent last, Pred pred, Proj proj) { first = hpx::parallel::detail::sequential_find_if( first, last, pred, proj); @@ -66,13 +64,11 @@ namespace hpx::parallel::detail { private: template friend constexpr Iter tag_fallback_invoke(sequential_remove_t, - ExPolicy&& policy, Iter first, Sent last, T const& value, - Proj proj) + ExPolicy&& policy, Iter first, Sent last, T const& value, Proj proj) { return sequential_remove_if( HPX_FORWARD(ExPolicy, policy), first, last, - [&value](auto const& a) { return value == a; }, - proj); + [&value](auto const& a) { return value == a; }, proj); } }; diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp index 6dcd9d8bd72e..bb2e09aa5b38 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp @@ -214,6 +214,7 @@ namespace hpx { #else // DOXYGEN #include +#include #include #include #include @@ -254,9 +255,8 @@ namespace hpx::parallel { template - static constexpr Iter sequential( - ExPolicy&& policy, Iter first, Sent last, Pred&& pred, - Proj&& proj) + static constexpr Iter sequential(ExPolicy&& policy, Iter first, + Sent last, Pred&& pred, Proj&& proj) { return sequential_remove_if( HPX_FORWARD(ExPolicy, policy), first, last, @@ -268,7 +268,9 @@ namespace hpx::parallel { static decltype(auto) parallel(ExPolicy&& policy, Iter first, Sent last, Pred&& pred, Proj&& proj) { - using zip_iterator = hpx::util::zip_iterator; + using value_t = std::decay_t< + typename std::iterator_traits::value_type>; + using zip_iterator = hpx::util::zip_iterator; using algorithm_result = util::detail::algorithm_result; using difference_type = @@ -283,28 +285,25 @@ namespace hpx::parallel { if (count == 0) return algorithm_result::get(HPX_MOVE(first)); } - - std::shared_ptr flags(new bool[count]); + std::shared_ptr flags(new value_t[count]); using hpx::get; // Note: replacing the invoke() with HPX_INVOKE() // below makes gcc generate errors - using inner_policy_type = - decltype(hpx::execution::experimental::to_non_simd( - std::declval>())); + using inner_policy_type = std::decay_t; auto f1 = [pred = HPX_FORWARD(Pred, pred), proj = HPX_FORWARD(Proj, proj)]( - zip_iterator part_begin, - std::size_t part_size) -> void { + auto part_begin, std::size_t part_size) -> void { // MSVC complains if pred or proj is captured by ref below - util::loop_n(part_begin, part_size, - [pred, proj](zip_iterator it) mutable { - bool f = hpx::invoke( + util::loop_n( + part_begin, part_size, [pred, proj](auto it) mutable { + auto f = hpx::invoke( pred, hpx::invoke(proj, get<0>(*it))); - - get<1>(*it) = f; + using V = std::decay_t(*it))>; + get<1>(*it) = + hpx::parallel::traits::choose(f, V(1), V(0)); }); }; @@ -316,7 +315,7 @@ namespace hpx::parallel { if (dest == get<0>(part_begin.get_iterator_tuple())) { // Self-assignment must be detected. - util::loop_n( + util::loop_n( part_begin, part_size, [&dest](zip_iterator it) { if (!get<1>(*it)) { @@ -331,7 +330,7 @@ namespace hpx::parallel { else { // Self-assignment can't be performed. - util::loop_n( + util::loop_n( part_begin, part_size, [&dest](zip_iterator it) { if (!get<1>(*it)) *dest++ = std::ranges::iter_move( diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp index e2967f4dee0a..bb5a79546ca2 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp @@ -9,13 +9,14 @@ #include #if defined(HPX_HAVE_DATAPAR) +#include +#include #include #include #include #include #include #include -#include #include #include @@ -33,8 +34,7 @@ namespace hpx::parallel::detail { static inline Iter call( ExPolicy&&, Iter first, Sent last, Pred pred, Proj proj) { - using value_type = - typename std::iterator_traits::value_type; + using value_type = typename std::iterator_traits::value_type; using V = hpx::parallel::traits::vector_pack_type_t; constexpr std::size_t size = hpx::parallel::traits::vector_pack_size_v; @@ -52,7 +52,8 @@ namespace hpx::parallel::detail { ++first; } - while (last - first >= static_cast(size)) //Safety + while ( + last - first >= static_cast(size)) //Safety { V tmp(hpx::parallel::traits::vector_pack_load::aligned(first)); @@ -80,12 +81,27 @@ namespace hpx::parallel::detail { else if (!hpx::parallel::traits::all_of(msk)) { //mixed - for (std::size_t i = 0; i < size; ++i) + int first = hpx::parallel::traits::find_first_of(msk); + + for (int i = 0; i < first; ++i) + { + *dest++ = + value_type(hpx::parallel::traits::get(tmp, i)); + } + + for (std::size_t i = first; i < size; ++i) { - if (!hpx::parallel::traits::get(msk, i)) + bool match = false; + if constexpr (std::is_class_v< + std::decay_t>) + match = msk[i]; + else + match = msk; + + if (!match) { - *dest++ = value_type( - hpx::parallel::traits::get(tmp, i)); + *dest++ = + value_type(hpx::parallel::traits::get(tmp, i)); } } } @@ -138,13 +154,11 @@ namespace hpx::parallel::detail { { template static inline Iter call( - ExPolicy&& policy, Iter first, Sent last, T const& value, - Proj proj) + ExPolicy&& policy, Iter first, Sent last, T const& value, Proj proj) { return datapar_remove_if::call( HPX_FORWARD(ExPolicy, policy), first, last, - [&value](auto const& a) { return a == value; }, - proj); + [&value](auto const& a) { return a == value; }, proj); } }; @@ -152,8 +166,8 @@ namespace hpx::parallel::detail { typename Sent, typename T, typename Proj> requires(hpx::is_vectorpack_execution_policy_v) HPX_HOST_DEVICE HPX_FORCEINLINE Iter tag_invoke( - sequential_remove_t, ExPolicy&& policy, Iter first, - Sent last, T const& value, Proj proj) + sequential_remove_t, ExPolicy&& policy, Iter first, Sent last, + T const& value, Proj proj) { if constexpr (hpx::parallel::util::detail::iterator_datapar_compatible< Iter>::value) diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp index 3929e4b906e4..ad5207bd456a 100644 --- a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp @@ -57,11 +57,11 @@ void test_remove_async(ExPolicy p, IteratorTag) std::iota(std::begin(c), std::end(c), std::rand()); std::copy(std::begin(c), std::end(c), std::begin(d)); - std::size_t idx = std::rand() % c.size(); + std::size_t idx = std::rand() % c.size(); int value = c[idx]; - auto f = hpx::remove( - p, iterator(std::begin(c)), iterator(std::end(c)), value); + auto f = + hpx::remove(p, iterator(std::begin(c)), iterator(std::end(c)), value); auto result = f.get(); auto solution = std::remove(std::begin(d), std::end(d), value); diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp index 4c29cc112732..928f3c328208 100644 --- a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp @@ -49,12 +49,11 @@ void test_remove_if(ExPolicy policy, IteratorTag) std::iota(std::begin(c), std::end(c), std::rand()); std::copy(std::begin(c), std::end(c), std::begin(d)); - std::size_t idx = std::rand() % c.size(); + std::size_t idx = std::rand() % c.size(); auto result = hpx::remove_if(policy, iterator(std::begin(c)), iterator(std::end(c)), equal_f(c[idx])); - auto solution = - std::remove_if(std::begin(d), std::end(d), equal_f(d[idx])); + auto solution = std::remove_if(std::begin(d), std::end(d), equal_f(d[idx])); bool equality = test::equal(std::begin(c), result.base(), std::begin(d), solution); @@ -73,13 +72,12 @@ void test_remove_if_async(ExPolicy p, IteratorTag) std::iota(std::begin(c), std::end(c), std::rand()); std::copy(std::begin(c), std::end(c), std::begin(d)); - std::size_t idx = std::rand() % c.size(); + std::size_t idx = std::rand() % c.size(); auto f = hpx::remove_if( p, iterator(std::begin(c)), iterator(std::end(c)), equal_f(c[idx])); auto result = f.get(); - auto solution = - std::remove_if(std::begin(d), std::end(d), equal_f(d[idx])); + auto solution = std::remove_if(std::begin(d), std::end(d), equal_f(d[idx])); bool equality = test::equal(std::begin(c), result.base(), std::begin(d), solution); diff --git a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp index 1228ebfa7795..d72051c35ebd 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp @@ -56,7 +56,15 @@ namespace hpx::parallel::traits { HPX_HOST_DEVICE HPX_FORCEINLINE static void unaligned( V& value, Iter& iter) { - *iter = value; + if constexpr (std::is_class_v) + { + value.copy_to( + std::addressof(*iter), std::experimental::element_aligned); + } + else + { + *iter = value; + } } }; } // namespace hpx::parallel::traits diff --git a/libs/core/execution/include/hpx/execution/traits/vector_pack_conditionals.hpp b/libs/core/execution/include/hpx/execution/traits/vector_pack_conditionals.hpp index 457324b69161..a808e5034250 100644 --- a/libs/core/execution/include/hpx/execution/traits/vector_pack_conditionals.hpp +++ b/libs/core/execution/include/hpx/execution/traits/vector_pack_conditionals.hpp @@ -8,8 +8,6 @@ #include -#if defined(HPX_HAVE_DATAPAR) - namespace hpx::parallel::traits { //////////////////////////////////////////////////////////////////// @@ -32,6 +30,8 @@ namespace hpx::parallel::traits { } } // namespace hpx::parallel::traits +#if defined(HPX_HAVE_DATAPAR) + #if !defined(__CUDACC__) #include #include diff --git a/libs/core/executors/include/hpx/executors/datapar/execution_policy_mappings.hpp b/libs/core/executors/include/hpx/executors/datapar/execution_policy_mappings.hpp index 33dd89edb1b6..fbb23b4558b9 100644 --- a/libs/core/executors/include/hpx/executors/datapar/execution_policy_mappings.hpp +++ b/libs/core/executors/include/hpx/executors/datapar/execution_policy_mappings.hpp @@ -12,4 +12,58 @@ #if defined(HPX_HAVE_DATAPAR) #include +#include +#include +#include + +#include +#include + +namespace hpx::execution::experimental { + + /////////////////////////////////////////////////////////////////////////// + // Return the matching non-simd (vectorpack) execution policy + HPX_CXX_CORE_EXPORT inline constexpr struct to_non_simd_t final + : hpx::functional::detail::tag_fallback + { + private: + // any non-simd policy just returns itself + template + friend constexpr decltype(auto) tag_fallback_invoke( + to_non_simd_t, ExPolicy&& policy) noexcept + { + static_assert(!hpx::is_vectorpack_execution_policy_v, + "must not be a simd (vectorpack) execution policy"); + return std::forward(policy); + } + } to_non_simd{}; + + template <> + struct is_execution_policy_mapping : std::true_type + { + }; + + // Return the matching simd (vectorpack) execution policy + HPX_CXX_CORE_EXPORT inline constexpr struct to_simd_t final + : hpx::functional::detail::tag_fallback + { + private: + // any simd policy just returns itself + + template + friend constexpr decltype(auto) tag_fallback_invoke( + to_simd_t, ExPolicy&& policy) noexcept + { + static_assert(hpx::is_vectorpack_execution_policy_v, + "must be a simd (vectorpack) execution policy"); + return std::forward(policy); + } + } to_simd{}; + + template <> + struct is_execution_policy_mapping : std::true_type + { + }; +} // namespace hpx::execution::experimental + #endif diff --git a/libs/core/executors/include/hpx/executors/execution_policy_mappings.hpp b/libs/core/executors/include/hpx/executors/execution_policy_mappings.hpp index 663c3d7a67b7..36dc382d1671 100644 --- a/libs/core/executors/include/hpx/executors/execution_policy_mappings.hpp +++ b/libs/core/executors/include/hpx/executors/execution_policy_mappings.hpp @@ -156,46 +156,4 @@ namespace hpx::execution::experimental { struct is_execution_policy_mapping : std::true_type { }; - - HPX_CXX_CORE_EXPORT inline constexpr struct to_non_simd_t final - : hpx::functional::detail::tag_fallback - { - private: - // any non-simd policy just returns itself - template - friend constexpr decltype(auto) tag_fallback_invoke( - to_non_simd_t, ExPolicy&& policy) noexcept - { - static_assert(!hpx::is_vectorpack_execution_policy_v, - "must not be a simd (vectorpack) execution policy"); - return std::forward(policy); - } - } to_non_simd{}; - - template <> - struct is_execution_policy_mapping : std::true_type - { - }; - - // Return the matching simd (vectorpack) execution policy - HPX_CXX_CORE_EXPORT inline constexpr struct to_simd_t final - : hpx::functional::detail::tag_fallback - { - private: - // any simd policy just returns itself - - template - friend constexpr decltype(auto) tag_fallback_invoke( - to_simd_t, ExPolicy&& policy) noexcept - { - static_assert(hpx::is_vectorpack_execution_policy_v, - "must be a simd (vectorpack) execution policy"); - return std::forward(policy); - } - } to_simd{}; - - template <> - struct is_execution_policy_mapping : std::true_type - { - }; } // namespace hpx::execution::experimental From dbac4b10df0a3d997312e55e7e184125f56a23e0 Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Tue, 31 Mar 2026 14:06:08 +0530 Subject: [PATCH 04/13] Added missing scope declarations and formatting Signed-off-by: BhoomishGupta --- .../tests/unit/datapar_algorithms/remove_datapar.cpp | 1 + .../tests/unit/datapar_algorithms/remove_if_datapar.cpp | 1 + .../execution/traits/detail/simd/vector_pack_load_store.hpp | 4 ++-- .../hpx/execution/traits/detail/simd/vector_pack_simd.hpp | 1 + test_package/test_package.cpp | 2 +- 5 files changed, 6 insertions(+), 3 deletions(-) diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp index ad5207bd456a..454b0595e05d 100644 --- a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp index 928f3c328208..fc91ec8c22e9 100644 --- a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include diff --git a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp index d72051c35ebd..0f7fa2aef595 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp @@ -58,8 +58,8 @@ namespace hpx::parallel::traits { { if constexpr (std::is_class_v) { - value.copy_to( - std::addressof(*iter), std::experimental::element_aligned); + value.copy_to(std::addressof(*iter), + datapar::experimental::element_aligned); } else { diff --git a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_simd.hpp b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_simd.hpp index b659f723c87d..70d8e26e963a 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_simd.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_simd.hpp @@ -44,6 +44,7 @@ namespace hpx::datapar::experimental { HPX_CXX_CORE_EXPORT using std::experimental::memory_alignment_v; HPX_CXX_CORE_EXPORT using std::experimental::vector_aligned; + HPX_CXX_CORE_EXPORT using std::experimental::element_aligned; HPX_CXX_CORE_EXPORT using std::experimental::all_of; HPX_CXX_CORE_EXPORT using std::experimental::any_of; diff --git a/test_package/test_package.cpp b/test_package/test_package.cpp index 5cb936ef366c..1b023bc9c9d0 100644 --- a/test_package/test_package.cpp +++ b/test_package/test_package.cpp @@ -4,9 +4,9 @@ // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#include #include #include -#include int hpx_main() { From e5d6b88dce224ba5a44b5eb83a80d19bb7db2abc Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Thu, 2 Apr 2026 14:37:03 +0530 Subject: [PATCH 05/13] Enhance predicate callable checks for remove algorithms in parallel execution Signed-off-by: BhoomishGupta --- .../hpx/parallel/algorithms/remove.hpp | 62 ++++++++++++++----- .../parallel/container_algorithms/remove.hpp | 24 +++++-- 2 files changed, 65 insertions(+), 21 deletions(-) diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp index bb2e09aa5b38..a26a5d5524c3 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp @@ -268,9 +268,21 @@ namespace hpx::parallel { static decltype(auto) parallel(ExPolicy&& policy, Iter first, Sent last, Pred&& pred, Proj&& proj) { + using inner_policy_type = std::decay_t; + constexpr bool vectorpack_policy = + hpx::is_vectorpack_execution_policy_v; + constexpr bool vectorpack_predicate_callable = + hpx::parallel::traits::is_indirect_callable_v< + inner_policy_type, Pred, + hpx::parallel::traits::projected>; + constexpr bool use_vectorpack_predicate = + vectorpack_policy && vectorpack_predicate_callable; + using value_t = std::decay_t< typename std::iterator_traits::value_type>; - using zip_iterator = hpx::util::zip_iterator; + using flag_t = + std::conditional_t; + using zip_iterator = hpx::util::zip_iterator; using algorithm_result = util::detail::algorithm_result; using difference_type = @@ -285,26 +297,40 @@ namespace hpx::parallel { if (count == 0) return algorithm_result::get(HPX_MOVE(first)); } - std::shared_ptr flags(new value_t[count]); + std::shared_ptr flags(new flag_t[count]); using hpx::get; // Note: replacing the invoke() with HPX_INVOKE() // below makes gcc generate errors - using inner_policy_type = std::decay_t; - auto f1 = [pred = HPX_FORWARD(Pred, pred), proj = HPX_FORWARD(Proj, proj)]( auto part_begin, std::size_t part_size) -> void { // MSVC complains if pred or proj is captured by ref below - util::loop_n( - part_begin, part_size, [pred, proj](auto it) mutable { - auto f = hpx::invoke( - pred, hpx::invoke(proj, get<0>(*it))); - using V = std::decay_t(*it))>; - get<1>(*it) = - hpx::parallel::traits::choose(f, V(1), V(0)); - }); + if constexpr (use_vectorpack_predicate) + { + util::loop_n(part_begin, part_size, + [pred, proj](auto it) mutable { + auto f = hpx::invoke( + pred, hpx::invoke(proj, get<0>(*it))); + using V = std::decay_t(*it))>; + get<1>(*it) = hpx::parallel::traits::choose( + f, V(1), V(0)); + }); + } + else + { + using loop_policy_type = + std::conditional_t; + + util::loop_n(part_begin, part_size, + [pred, proj](auto it) mutable { + get<1>(*it) = hpx::invoke( + pred, hpx::invoke(proj, get<0>(*it))); + }); + } }; auto f2 = [flags, first, count](auto&&...) mutable -> Iter { @@ -382,9 +408,15 @@ namespace hpx { requires ( hpx::is_execution_policy_v && hpx::traits::is_iterator_v && - hpx::is_invocable_v::value_type - > + ( + hpx::parallel::traits::is_indirect_callable_v + > || + hpx::is_invocable_v::value_type + > + ) ) // clang-format on friend decltype(auto) tag_fallback_invoke(hpx::remove_if_t, diff --git a/libs/core/algorithms/include/hpx/parallel/container_algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/container_algorithms/remove.hpp index 67c04e88798b..9bb0b1657e66 100644 --- a/libs/core/algorithms/include/hpx/parallel/container_algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/container_algorithms/remove.hpp @@ -555,9 +555,14 @@ namespace hpx::ranges { hpx::traits::is_iterator_v && std::sentinel_for && hpx::parallel::traits::is_projected_v && - hpx::parallel::traits::is_indirect_callable_v - > + ( + hpx::parallel::traits::is_indirect_callable_v + > || + hpx::is_invocable_v::value_type + > + ) ) // clang-format on friend typename parallel::util::detail::algorithm_result && std::ranges::range && hpx::parallel::traits::is_projected_range_v && - hpx::parallel::traits::is_indirect_callable_v - > + ( + hpx::parallel::traits::is_indirect_callable_v + > || + hpx::is_invocable_v + >::value_type + > + ) ) // clang-format on friend parallel::util::detail::algorithm_result_t Date: Fri, 3 Apr 2026 13:14:14 +0530 Subject: [PATCH 06/13] Included a module and changed variable name Signed-off-by: BhoomishGupta --- .../algorithms/include/hpx/parallel/algorithms/remove.hpp | 1 + .../algorithms/include/hpx/parallel/datapar/remove.hpp | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp index a26a5d5524c3..46f3df620316 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp @@ -214,6 +214,7 @@ namespace hpx { #else // DOXYGEN #include +#include #include #include #include diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp index bb5a79546ca2..9acb74f2b798 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp @@ -81,15 +81,16 @@ namespace hpx::parallel::detail { else if (!hpx::parallel::traits::all_of(msk)) { //mixed - int first = hpx::parallel::traits::find_first_of(msk); + int first_match = hpx::parallel::traits::find_first_of(msk); - for (int i = 0; i < first; ++i) + for (int i = 0; i < first_match; ++i) { *dest++ = value_type(hpx::parallel::traits::get(tmp, i)); } - for (std::size_t i = first; i < size; ++i) + for (std::size_t i = static_cast(first_match); + i < size; ++i) { bool match = false; if constexpr (std::is_class_v< From 1bffbea828776e230b7856e7f6a734d625d4fe5d Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Fri, 3 Apr 2026 17:03:34 +0530 Subject: [PATCH 07/13] Fix vector_pack_store::unaligned for EVE backend Signed-off-by: BhoomishGupta --- .../hpx/execution/traits/detail/eve/vector_pack_load_store.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp b/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp index be865300d3bb..5dcb021574ff 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp @@ -54,7 +54,7 @@ namespace hpx::parallel::traits { HPX_HOST_DEVICE HPX_FORCEINLINE static void unaligned( V& value, Iter& iter) { - *iter = value; + eve::store(value, std::addressof(*iter)); } }; } // namespace hpx::parallel::traits From 9f2bc8575497c575ceeb3562e5466f5f244b678f Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Fri, 3 Apr 2026 18:19:34 +0530 Subject: [PATCH 08/13] Fix EVE SIMD backend traits and unaligned store fallbacks Signed-off-by: BhoomishGupta --- .../include/hpx/parallel/datapar/remove.hpp | 6 +++ .../detail/eve/vector_pack_load_store.hpp | 42 ++++++++++++++++--- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp index 9acb74f2b798..270537655613 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp @@ -95,7 +95,13 @@ namespace hpx::parallel::detail { bool match = false; if constexpr (std::is_class_v< std::decay_t>) + { +#if defined(HPX_HAVE_DATAPAR_EVE) + match = msk.get(i); +#else match = msk[i]; +#endif + } else match = msk; diff --git a/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp b/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp index 5dcb021574ff..3058e646302f 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp @@ -14,6 +14,7 @@ #include #include +#include /////////////////////////////////////////////////////////////////////////////// namespace hpx::parallel::traits { @@ -26,14 +27,28 @@ namespace hpx::parallel::traits { template HPX_HOST_DEVICE HPX_FORCEINLINE static V aligned(Iter& iter) { - return V( - eve::as_aligned(std::addressof(*iter), eve::cardinal_t{})); + if constexpr (std::is_class_v) + { + return eve::load(eve::as_aligned( + std::addressof(*iter), eve::cardinal_t{})); + } + else + { + return *iter; + } } template HPX_HOST_DEVICE HPX_FORCEINLINE static V unaligned(Iter& iter) { - return *iter; + if constexpr (std::is_class_v) + { + return eve::load(std::addressof(*iter)); + } + else + { + return *iter; + } } }; @@ -46,15 +61,30 @@ namespace hpx::parallel::traits { HPX_HOST_DEVICE HPX_FORCEINLINE static void aligned( V& value, Iter& iter) { - eve::store(value, - eve::as_aligned(std::addressof(*iter), eve::cardinal_t{})); + if constexpr (std::is_class_v) + { + eve::store(value, + eve::as_aligned( + std::addressof(*iter), eve::cardinal_t{})); + } + else + { + *iter = value; + } } template HPX_HOST_DEVICE HPX_FORCEINLINE static void unaligned( V& value, Iter& iter) { - eve::store(value, std::addressof(*iter)); + if constexpr (std::is_class_v) + { + eve::store(value, std::addressof(*iter)); + } + else + { + *iter = value; + } } }; } // namespace hpx::parallel::traits From 5cd15194bbd2ee2155c235e230b0e7d0ffb2bf3f Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Sat, 4 Apr 2026 14:40:52 +0530 Subject: [PATCH 09/13] Improved mask handling Signed-off-by: BhoomishGupta --- .../hpx/parallel/algorithms/remove.hpp | 2 +- .../include/hpx/parallel/datapar/remove.hpp | 20 +++++++++++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp index 46f3df620316..34e86bcccf20 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp @@ -298,7 +298,7 @@ namespace hpx::parallel { if (count == 0) return algorithm_result::get(HPX_MOVE(first)); } - std::shared_ptr flags(new flag_t[count]); + std::shared_ptr flags(new flag_t[count]()); using hpx::get; diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp index 270537655613..ef5e724bb5ea 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp @@ -83,14 +83,22 @@ namespace hpx::parallel::detail { //mixed int first_match = hpx::parallel::traits::find_first_of(msk); - for (int i = 0; i < first_match; ++i) + std::size_t first_lane = 0; + if (first_match > 0) + { + std::size_t const first_match_lane = + static_cast(first_match); + first_lane = + first_match_lane < size ? first_match_lane : size; + } + + for (std::size_t i = 0; i < first_lane; ++i) { *dest++ = value_type(hpx::parallel::traits::get(tmp, i)); } - for (std::size_t i = static_cast(first_match); - i < size; ++i) + for (std::size_t i = first_lane; i < size; ++i) { bool match = false; if constexpr (std::is_class_v< @@ -103,7 +111,11 @@ namespace hpx::parallel::detail { #endif } else - match = msk; + { + // Mixed masks are vector masks; keep scalar fallback + // only for compatibility with non-vector paths. + match = static_cast(msk); + } if (!match) { From 26cc8062fe7f39abf11c6c99d9a441874033a20f Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Sat, 4 Apr 2026 18:56:48 +0530 Subject: [PATCH 10/13] removed if defined portion Signed-off-by: BhoomishGupta --- .../include/hpx/parallel/datapar/remove.hpp | 23 ++++--------------- 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp index ef5e724bb5ea..b89fe67436d5 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp @@ -100,27 +100,14 @@ namespace hpx::parallel::detail { for (std::size_t i = first_lane; i < size; ++i) { - bool match = false; - if constexpr (std::is_class_v< - std::decay_t>) - { -#if defined(HPX_HAVE_DATAPAR_EVE) - match = msk.get(i); -#else - match = msk[i]; -#endif - } - else - { - // Mixed masks are vector masks; keep scalar fallback - // only for compatibility with non-vector paths. - match = static_cast(msk); - } + auto scalar_val = + value_type(hpx::parallel::traits::get(tmp, i)); + bool match = + HPX_INVOKE(pred, HPX_INVOKE(proj, scalar_val)); if (!match) { - *dest++ = - value_type(hpx::parallel::traits::get(tmp, i)); + *dest++ = scalar_val; } } } From 690acd03048934d70cdf48cc4bf52656de93d37e Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Sat, 18 Apr 2026 23:35:52 +0530 Subject: [PATCH 11/13] Refactor datapar algorithms for improved alignment checks and simplify remove_if logic Signed-off-by: BhoomishGupta --- .gitignore | 1 + .../hpx/parallel/algorithms/remove.hpp | 55 ++++++------------- .../include/hpx/parallel/datapar/loop.hpp | 51 +++++++---------- .../include/hpx/parallel/datapar/remove.hpp | 20 +------ .../datapar_algorithms/remove_if_datapar.cpp | 2 +- 5 files changed, 40 insertions(+), 89 deletions(-) diff --git a/.gitignore b/.gitignore index 2aca1bb59ffc..3c5ccf46d829 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ *.pyc /build +/build-pr6877 .DS_Store .gitignore .ptp-sync-folder diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp index 34e86bcccf20..0adf615df1a9 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp @@ -272,18 +272,15 @@ namespace hpx::parallel { using inner_policy_type = std::decay_t; constexpr bool vectorpack_policy = hpx::is_vectorpack_execution_policy_v; - constexpr bool vectorpack_predicate_callable = - hpx::parallel::traits::is_indirect_callable_v< - inner_policy_type, Pred, - hpx::parallel::traits::projected>; - constexpr bool use_vectorpack_predicate = - vectorpack_policy && vectorpack_predicate_callable; - - using value_t = std::decay_t< - typename std::iterator_traits::value_type>; - using flag_t = - std::conditional_t; - using zip_iterator = hpx::util::zip_iterator; + + if constexpr (vectorpack_policy) + { + return sequential_remove_if( + HPX_FORWARD(ExPolicy, policy), first, last, + HPX_FORWARD(Pred, pred), HPX_FORWARD(Proj, proj)); + } + + using zip_iterator = hpx::util::zip_iterator; using algorithm_result = util::detail::algorithm_result; using difference_type = @@ -298,7 +295,7 @@ namespace hpx::parallel { if (count == 0) return algorithm_result::get(HPX_MOVE(first)); } - std::shared_ptr flags(new flag_t[count]()); + std::shared_ptr flags(new bool[count]); using hpx::get; @@ -306,32 +303,14 @@ namespace hpx::parallel { // below makes gcc generate errors auto f1 = [pred = HPX_FORWARD(Pred, pred), proj = HPX_FORWARD(Proj, proj)]( - auto part_begin, std::size_t part_size) -> void { + zip_iterator part_begin, + std::size_t part_size) -> void { // MSVC complains if pred or proj is captured by ref below - if constexpr (use_vectorpack_predicate) - { - util::loop_n(part_begin, part_size, - [pred, proj](auto it) mutable { - auto f = hpx::invoke( - pred, hpx::invoke(proj, get<0>(*it))); - using V = std::decay_t(*it))>; - get<1>(*it) = hpx::parallel::traits::choose( - f, V(1), V(0)); - }); - } - else - { - using loop_policy_type = - std::conditional_t; - - util::loop_n(part_begin, part_size, - [pred, proj](auto it) mutable { - get<1>(*it) = hpx::invoke( - pred, hpx::invoke(proj, get<0>(*it))); - }); - } + util::loop_n(part_begin, part_size, + [pred, proj](zip_iterator it) mutable { + get<1>(*it) = hpx::invoke( + pred, hpx::invoke(proj, get<0>(*it))); + }); }; auto f2 = [flags, first, count](auto&&...) mutable -> Iter { diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp index b208ed4b11cd..de9766537e64 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp @@ -48,15 +48,14 @@ namespace hpx::parallel::util { if constexpr (datapar_compatible) { - while (!is_data_aligned(first) && first != last) + while (first != last && !is_data_aligned(first)) { datapar_loop_step::call1(f, first); } constexpr std::size_t size = traits::vector_pack_size_v; - End const lastV = last - size + 1; - while (first < lastV) + while (last - first > static_cast(size + 1)) { datapar_loop_step::callv(f, first); } @@ -107,7 +106,7 @@ namespace hpx::parallel::util { HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Begin call( Begin first, End last, Pred&& pred) { - while (!is_data_aligned(first) && first != last) + while (first != last && !is_data_aligned(first)) { if (datapar_loop_pred_step::call1(pred, first) != -1) return first; @@ -116,9 +115,7 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; - End const lastV = last - size + 1; - - while (first < lastV) + while (last - first > static_cast(size + 1)) { int offset = datapar_loop_pred_step::callv(pred, first); @@ -160,15 +157,14 @@ namespace hpx::parallel::util { if constexpr (datapar_compatible) { - while (!is_data_aligned(first) && first != last) + while (first != last && !is_data_aligned(first)) { datapar_loop_step_ind::call1(f, first); } constexpr std::size_t size = traits::vector_pack_size_v; - End const lastV = last - size + 1; - while (first < lastV) + while (last - first > static_cast(size + 1)) { datapar_loop_step_ind::callv(f, first); } @@ -208,8 +204,8 @@ namespace hpx::parallel::util { using V = traits::vector_pack_type_t; - while ((!is_data_aligned(it1) || !is_data_aligned(it2)) && - it1 != last1) + while (it1 != last1 && + (!is_data_aligned(it1) || !is_data_aligned(it2))) { datapar_loop_step2_ind::call1( f, it1, it2); @@ -217,8 +213,7 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; - InIter1 const last1V = last1 - size + 1; - while (it1 < last1V) + while (last1 - it1 > static_cast(size + 1)) { datapar_loop_step2_ind::callv( f, it1, it2); @@ -260,7 +255,7 @@ namespace hpx::parallel::util { std::size_t len = count; // clang-format off - for (/* */; !detail::is_data_aligned(first) && len != 0; + for (/* */; len != 0 && !detail::is_data_aligned(first); --len) { datapar_loop_step::call1(f, first); @@ -268,12 +263,10 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; - for (auto len_v = - static_cast(len - size + 1); - len_v > 0; - len_v -= static_cast(size), len -= size) + while (len > size + 1) { datapar_loop_step::callv(f, first); + len -= size; } // clang-format on @@ -324,11 +317,10 @@ namespace hpx::parallel::util { } // clang-format off - for (auto len_v = static_cast(len - size + 1); - len_v > 0; - len_v -= static_cast(size), len -= size) + while (len > size + 1) { datapar_loop_step::callv(f, first); + len -= size; } // clang-format on @@ -373,7 +365,7 @@ namespace hpx::parallel::util { std::size_t len = count; // clang-format off - for (/* */; !detail::is_data_aligned(first) && len != 0; + for (/* */; len != 0 && !detail::is_data_aligned(first); --len) { datapar_loop_step_ind::call1(f, first); @@ -381,12 +373,10 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; - for (auto len_v = - static_cast(len - size + 1); - len_v > 0; - len_v -= static_cast(size), len -= size) + while (len > size + 1) { datapar_loop_step_ind::callv(f, first); + len -= size; } // clang-format on @@ -423,7 +413,7 @@ namespace hpx::parallel::util { { std::size_t len = count; - for (/* */; !detail::is_data_aligned(it) && len != 0; --len) + for (/* */; len != 0 && !detail::is_data_aligned(it); --len) { datapar_loop_idx_step::call1(f, it, base_idx); ++it; @@ -433,13 +423,12 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; // clang-format off - for (auto len_v = static_cast(len - size + 1); - len_v > 0; - len_v -= static_cast(size), len -= size) + while (len > size + 1) { datapar_loop_idx_step::callv(f, it, base_idx); std::advance(it, size); base_idx += size; + len -= size; } // clang-format on diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp index b89fe67436d5..0e5a126a4156 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp @@ -9,7 +9,6 @@ #include #if defined(HPX_HAVE_DATAPAR) -#include #include #include #include @@ -81,24 +80,7 @@ namespace hpx::parallel::detail { else if (!hpx::parallel::traits::all_of(msk)) { //mixed - int first_match = hpx::parallel::traits::find_first_of(msk); - - std::size_t first_lane = 0; - if (first_match > 0) - { - std::size_t const first_match_lane = - static_cast(first_match); - first_lane = - first_match_lane < size ? first_match_lane : size; - } - - for (std::size_t i = 0; i < first_lane; ++i) - { - *dest++ = - value_type(hpx::parallel::traits::get(tmp, i)); - } - - for (std::size_t i = first_lane; i < size; ++i) + for (std::size_t i = 0; i < size; ++i) { auto scalar_val = value_type(hpx::parallel::traits::get(tmp, i)); diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp index fc91ec8c22e9..65e81e24e9e6 100644 --- a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp @@ -14,8 +14,8 @@ #include #include #include -#include #include +#include #include "../algorithms/test_utils.hpp" From ff90f48fcb5d9b29a0f7044a43c58b48db7db4b0 Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Sun, 19 Apr 2026 01:25:39 +0530 Subject: [PATCH 12/13] Update remove algorithms to use std::ranges::iter_move and correct policy forwarding Signed-off-by: BhoomishGupta --- .../include/hpx/parallel/algorithms/detail/remove.hpp | 3 ++- .../core/algorithms/include/hpx/parallel/algorithms/remove.hpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp index 7a57d9c5a0b7..bf8eecfc443e 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -37,7 +38,7 @@ namespace hpx::parallel::detail { for (Iter i = first; ++i != last;) if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *i))) { - *first++ = HPX_MOVE(*i); + *first++ = std::ranges::iter_move(i); } } return first; diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp index 0adf615df1a9..e6e4a3ecabbd 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp @@ -275,7 +275,7 @@ namespace hpx::parallel { if constexpr (vectorpack_policy) { - return sequential_remove_if( + return sequential_remove_if( HPX_FORWARD(ExPolicy, policy), first, last, HPX_FORWARD(Pred, pred), HPX_FORWARD(Proj, proj)); } From 717009010951c730cf09787238b2ad6b1db140ac Mon Sep 17 00:00:00 2001 From: BhoomishGupta Date: Sun, 19 Apr 2026 01:40:35 +0530 Subject: [PATCH 13/13] formatting Signed-off-by: BhoomishGupta --- .../tests/unit/datapar_algorithms/remove_if_datapar.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp index 65e81e24e9e6..fc91ec8c22e9 100644 --- a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp @@ -14,8 +14,8 @@ #include #include #include -#include #include +#include #include "../algorithms/test_utils.hpp"