diff --git a/.gitignore b/.gitignore index 2aca1bb59ffc..3c5ccf46d829 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ *.pyc /build +/build-pr6877 .DS_Store .gitignore .ptp-sync-folder diff --git a/libs/core/algorithms/CMakeLists.txt b/libs/core/algorithms/CMakeLists.txt index 1b880638f24d..50ad4b41a698 100644 --- a/libs/core/algorithms/CMakeLists.txt +++ b/libs/core/algorithms/CMakeLists.txt @@ -39,6 +39,7 @@ set(algorithms_headers hpx/parallel/algorithms/detail/pivot.hpp hpx/parallel/algorithms/detail/reduce.hpp hpx/parallel/algorithms/detail/reduce_deterministic.hpp + hpx/parallel/algorithms/detail/remove.hpp hpx/parallel/algorithms/detail/replace.hpp hpx/parallel/algorithms/detail/rfa.hpp hpx/parallel/algorithms/detail/rotate.hpp @@ -239,6 +240,7 @@ if(HPX_WITH_DATAPAR) hpx/parallel/datapar/loop.hpp hpx/parallel/datapar/mismatch.hpp hpx/parallel/datapar/reduce.hpp + hpx/parallel/datapar/remove.hpp hpx/parallel/datapar/replace.hpp hpx/parallel/datapar/search.hpp hpx/parallel/datapar/transfer.hpp diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp new file mode 100644 index 000000000000..bf8eecfc443e --- /dev/null +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/detail/remove.hpp @@ -0,0 +1,88 @@ +// Copyright (c) 2026 Bhoomish Gupta +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace hpx::parallel::detail { + + /////////////////////////////////////////////////////////////////////////// + HPX_CXX_CORE_EXPORT template + struct sequential_remove_if_t final + : hpx::functional::detail::tag_fallback> + { + private: + template + friend constexpr Iter tag_fallback_invoke(sequential_remove_if_t, + ExPolicy&&, Iter first, Sent last, Pred pred, Proj proj) + { + first = hpx::parallel::detail::sequential_find_if( + first, last, pred, proj); + + if (first != last) + { + for (Iter i = first; ++i != last;) + if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *i))) + { + *first++ = std::ranges::iter_move(i); + } + } + return first; + } + }; + +#if !defined(HPX_COMPUTE_DEVICE_CODE) + HPX_CXX_CORE_EXPORT template + inline constexpr sequential_remove_if_t sequential_remove_if = + sequential_remove_if_t{}; +#else + HPX_CXX_CORE_EXPORT template + HPX_HOST_DEVICE HPX_FORCEINLINE auto sequential_remove_if(Args&&... args) + { + return sequential_remove_if_t{}(std::forward(args)...); + } +#endif + + /////////////////////////////////////////////////////////////////////////// + HPX_CXX_CORE_EXPORT template + struct sequential_remove_t final + : hpx::functional::detail::tag_fallback> + { + private: + template + friend constexpr Iter tag_fallback_invoke(sequential_remove_t, + ExPolicy&& policy, Iter first, Sent last, T const& value, Proj proj) + { + return sequential_remove_if( + HPX_FORWARD(ExPolicy, policy), first, last, + [&value](auto const& a) { return value == a; }, proj); + } + }; + +#if !defined(HPX_COMPUTE_DEVICE_CODE) + HPX_CXX_CORE_EXPORT template + inline constexpr sequential_remove_t sequential_remove = + sequential_remove_t{}; +#else + HPX_CXX_CORE_EXPORT template + HPX_HOST_DEVICE HPX_FORCEINLINE auto sequential_remove(Args&&... args) + { + return sequential_remove_t{}(std::forward(args)...); + } +#endif + +} // namespace hpx::parallel::detail diff --git a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp index 987066a5880b..e6e4a3ecabbd 100644 --- a/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp @@ -214,6 +214,8 @@ namespace hpx { #else // DOXYGEN #include +#include +#include #include #include #include @@ -223,6 +225,7 @@ namespace hpx { #include #include #include +#include #include #include #include @@ -243,25 +246,6 @@ namespace hpx::parallel { namespace detail { /// \cond NOINTERNAL - HPX_CXX_CORE_EXPORT template - constexpr Iter sequential_remove_if( - Iter first, Sent last, Pred pred, Proj proj) - { - first = hpx::parallel::detail::sequential_find_if< - hpx::execution::sequenced_policy>(first, last, pred, proj); - - if (first != last) - { - for (Iter i = first; ++i != last;) - if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *i))) - { - *first++ = std::ranges::iter_move(i); - } - } - return first; - } - HPX_CXX_CORE_EXPORT template struct remove_if : public algorithm, FwdIter> { @@ -272,10 +256,11 @@ namespace hpx::parallel { template - static constexpr Iter sequential( - ExPolicy, Iter first, Sent last, Pred&& pred, Proj&& proj) + static constexpr Iter sequential(ExPolicy&& policy, Iter first, + Sent last, Pred&& pred, Proj&& proj) { - return sequential_remove_if(first, last, + return sequential_remove_if( + HPX_FORWARD(ExPolicy, policy), first, last, HPX_FORWARD(Pred, pred), HPX_FORWARD(Proj, proj)); } @@ -284,6 +269,17 @@ namespace hpx::parallel { static decltype(auto) parallel(ExPolicy&& policy, Iter first, Sent last, Pred&& pred, Proj&& proj) { + using inner_policy_type = std::decay_t; + constexpr bool vectorpack_policy = + hpx::is_vectorpack_execution_policy_v; + + if constexpr (vectorpack_policy) + { + return sequential_remove_if( + HPX_FORWARD(ExPolicy, policy), first, last, + HPX_FORWARD(Pred, pred), HPX_FORWARD(Proj, proj)); + } + using zip_iterator = hpx::util::zip_iterator; using algorithm_result = util::detail::algorithm_result; @@ -299,7 +295,6 @@ namespace hpx::parallel { if (count == 0) return algorithm_result::get(HPX_MOVE(first)); } - std::shared_ptr flags(new bool[count]); using hpx::get; @@ -311,12 +306,10 @@ namespace hpx::parallel { zip_iterator part_begin, std::size_t part_size) -> void { // MSVC complains if pred or proj is captured by ref below - util::loop_n>(part_begin, part_size, + util::loop_n(part_begin, part_size, [pred, proj](zip_iterator it) mutable { - bool f = hpx::invoke( + get<1>(*it) = hpx::invoke( pred, hpx::invoke(proj, get<0>(*it))); - - get<1>(*it) = f; }); }; @@ -325,11 +318,10 @@ namespace hpx::parallel { auto dest = first; auto part_size = count; - using execution_policy_type = std::decay_t; if (dest == get<0>(part_begin.get_iterator_tuple())) { // Self-assignment must be detected. - util::loop_n( + util::loop_n( part_begin, part_size, [&dest](zip_iterator it) { if (!get<1>(*it)) { @@ -344,7 +336,7 @@ namespace hpx::parallel { else { // Self-assignment can't be performed. - util::loop_n( + util::loop_n( part_begin, part_size, [&dest](zip_iterator it) { if (!get<1>(*it)) *dest++ = std::ranges::iter_move( @@ -396,9 +388,15 @@ namespace hpx { requires ( hpx::is_execution_policy_v && hpx::traits::is_iterator_v && - hpx::is_invocable_v::value_type - > + ( + hpx::parallel::traits::is_indirect_callable_v + > || + hpx::is_invocable_v::value_type + > + ) ) // clang-format on friend decltype(auto) tag_fallback_invoke(hpx::remove_if_t, @@ -446,10 +444,8 @@ namespace hpx { friend decltype(auto) tag_fallback_invoke(hpx::remove_t, ExPolicy&& policy, FwdIter first, FwdIter last, T const& value) { - using Type = typename std::iterator_traits::value_type; - return hpx::remove_if(HPX_FORWARD(ExPolicy, policy), first, last, - [value](Type const& a) -> bool { return value == a; }); + [value](auto const& a) { return value == a; }); } } remove{}; } // namespace hpx diff --git a/libs/core/algorithms/include/hpx/parallel/container_algorithms/remove.hpp b/libs/core/algorithms/include/hpx/parallel/container_algorithms/remove.hpp index 67c04e88798b..9bb0b1657e66 100644 --- a/libs/core/algorithms/include/hpx/parallel/container_algorithms/remove.hpp +++ b/libs/core/algorithms/include/hpx/parallel/container_algorithms/remove.hpp @@ -555,9 +555,14 @@ namespace hpx::ranges { hpx::traits::is_iterator_v && std::sentinel_for && hpx::parallel::traits::is_projected_v && - hpx::parallel::traits::is_indirect_callable_v - > + ( + hpx::parallel::traits::is_indirect_callable_v + > || + hpx::is_invocable_v::value_type + > + ) ) // clang-format on friend typename parallel::util::detail::algorithm_result && std::ranges::range && hpx::parallel::traits::is_projected_range_v && - hpx::parallel::traits::is_indirect_callable_v - > + ( + hpx::parallel::traits::is_indirect_callable_v + > || + hpx::is_invocable_v + >::value_type + > + ) ) // clang-format on friend parallel::util::detail::algorithm_result_t #include #include +#include #include #include #include diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp index b208ed4b11cd..de9766537e64 100644 --- a/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp +++ b/libs/core/algorithms/include/hpx/parallel/datapar/loop.hpp @@ -48,15 +48,14 @@ namespace hpx::parallel::util { if constexpr (datapar_compatible) { - while (!is_data_aligned(first) && first != last) + while (first != last && !is_data_aligned(first)) { datapar_loop_step::call1(f, first); } constexpr std::size_t size = traits::vector_pack_size_v; - End const lastV = last - size + 1; - while (first < lastV) + while (last - first > static_cast(size + 1)) { datapar_loop_step::callv(f, first); } @@ -107,7 +106,7 @@ namespace hpx::parallel::util { HPX_HOST_DEVICE HPX_FORCEINLINE static constexpr Begin call( Begin first, End last, Pred&& pred) { - while (!is_data_aligned(first) && first != last) + while (first != last && !is_data_aligned(first)) { if (datapar_loop_pred_step::call1(pred, first) != -1) return first; @@ -116,9 +115,7 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; - End const lastV = last - size + 1; - - while (first < lastV) + while (last - first > static_cast(size + 1)) { int offset = datapar_loop_pred_step::callv(pred, first); @@ -160,15 +157,14 @@ namespace hpx::parallel::util { if constexpr (datapar_compatible) { - while (!is_data_aligned(first) && first != last) + while (first != last && !is_data_aligned(first)) { datapar_loop_step_ind::call1(f, first); } constexpr std::size_t size = traits::vector_pack_size_v; - End const lastV = last - size + 1; - while (first < lastV) + while (last - first > static_cast(size + 1)) { datapar_loop_step_ind::callv(f, first); } @@ -208,8 +204,8 @@ namespace hpx::parallel::util { using V = traits::vector_pack_type_t; - while ((!is_data_aligned(it1) || !is_data_aligned(it2)) && - it1 != last1) + while (it1 != last1 && + (!is_data_aligned(it1) || !is_data_aligned(it2))) { datapar_loop_step2_ind::call1( f, it1, it2); @@ -217,8 +213,7 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; - InIter1 const last1V = last1 - size + 1; - while (it1 < last1V) + while (last1 - it1 > static_cast(size + 1)) { datapar_loop_step2_ind::callv( f, it1, it2); @@ -260,7 +255,7 @@ namespace hpx::parallel::util { std::size_t len = count; // clang-format off - for (/* */; !detail::is_data_aligned(first) && len != 0; + for (/* */; len != 0 && !detail::is_data_aligned(first); --len) { datapar_loop_step::call1(f, first); @@ -268,12 +263,10 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; - for (auto len_v = - static_cast(len - size + 1); - len_v > 0; - len_v -= static_cast(size), len -= size) + while (len > size + 1) { datapar_loop_step::callv(f, first); + len -= size; } // clang-format on @@ -324,11 +317,10 @@ namespace hpx::parallel::util { } // clang-format off - for (auto len_v = static_cast(len - size + 1); - len_v > 0; - len_v -= static_cast(size), len -= size) + while (len > size + 1) { datapar_loop_step::callv(f, first); + len -= size; } // clang-format on @@ -373,7 +365,7 @@ namespace hpx::parallel::util { std::size_t len = count; // clang-format off - for (/* */; !detail::is_data_aligned(first) && len != 0; + for (/* */; len != 0 && !detail::is_data_aligned(first); --len) { datapar_loop_step_ind::call1(f, first); @@ -381,12 +373,10 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; - for (auto len_v = - static_cast(len - size + 1); - len_v > 0; - len_v -= static_cast(size), len -= size) + while (len > size + 1) { datapar_loop_step_ind::callv(f, first); + len -= size; } // clang-format on @@ -423,7 +413,7 @@ namespace hpx::parallel::util { { std::size_t len = count; - for (/* */; !detail::is_data_aligned(it) && len != 0; --len) + for (/* */; len != 0 && !detail::is_data_aligned(it); --len) { datapar_loop_idx_step::call1(f, it, base_idx); ++it; @@ -433,13 +423,12 @@ namespace hpx::parallel::util { constexpr std::size_t size = traits::vector_pack_size_v; // clang-format off - for (auto len_v = static_cast(len - size + 1); - len_v > 0; - len_v -= static_cast(size), len -= size) + while (len > size + 1) { datapar_loop_idx_step::callv(f, it, base_idx); std::advance(it, size); base_idx += size; + len -= size; } // clang-format on diff --git a/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp new file mode 100644 index 000000000000..0e5a126a4156 --- /dev/null +++ b/libs/core/algorithms/include/hpx/parallel/datapar/remove.hpp @@ -0,0 +1,179 @@ +// Copyright (c) 2025 Bhoomish Gupta +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include + +#if defined(HPX_HAVE_DATAPAR) +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace hpx::parallel::detail { + + /////////////////////////////////////////////////////////////////////////// + + HPX_CXX_CORE_EXPORT template + struct datapar_remove_if + { + template + static inline Iter call( + ExPolicy&&, Iter first, Sent last, Pred pred, Proj proj) + { + using value_type = typename std::iterator_traits::value_type; + using V = hpx::parallel::traits::vector_pack_type_t; + constexpr std::size_t size = + hpx::parallel::traits::vector_pack_size_v; + + Iter dest = first; + + while (first != last && !util::detail::is_data_aligned(first)) + { + if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *first))) + { + if (dest != first) + *dest = HPX_MOVE(*first); + ++dest; + } + ++first; + } + + while ( + last - first >= static_cast(size)) //Safety + { + V tmp(hpx::parallel::traits::vector_pack_load::aligned(first)); + + auto msk = HPX_INVOKE(pred, HPX_INVOKE(proj, tmp)); + + if (hpx::parallel::traits::none_of(msk)) + { + //no elements match + if (dest != first) + { + if (util::detail::is_data_aligned(dest)) + { + hpx::parallel::traits::vector_pack_store::aligned(tmp, dest); + } + else + { + hpx::parallel::traits::vector_pack_store::unaligned(tmp, dest); + } + } + std::advance(dest, size); + } + else if (!hpx::parallel::traits::all_of(msk)) + { + //mixed + for (std::size_t i = 0; i < size; ++i) + { + auto scalar_val = + value_type(hpx::parallel::traits::get(tmp, i)); + bool match = + HPX_INVOKE(pred, HPX_INVOKE(proj, scalar_val)); + + if (!match) + { + *dest++ = scalar_val; + } + } + } + //all elements match + std::advance(first, size); + } + + while (first != last) + { + if (!HPX_INVOKE(pred, HPX_INVOKE(proj, *first))) + { + if (dest != first) + *dest = HPX_MOVE(*first); + ++dest; + } + ++first; + } + + return dest; + } + }; + + HPX_CXX_CORE_EXPORT template + requires(hpx::is_vectorpack_execution_policy_v) + HPX_HOST_DEVICE HPX_FORCEINLINE Iter tag_invoke( + sequential_remove_if_t, ExPolicy&& policy, Iter first, + Sent last, Pred pred, Proj proj) + { + if constexpr (hpx::parallel::util::detail::iterator_datapar_compatible< + Iter>::value) + { + return datapar_remove_if::call( + HPX_FORWARD(ExPolicy, policy), first, last, pred, proj); + } + else + { + using base_policy_type = + decltype((hpx::execution::experimental::to_non_simd( + std::declval()))); + return sequential_remove_if( + hpx::execution::experimental::to_non_simd(policy), first, last, + pred, proj); + } + } + + /////////////////////////////////////////////////////////////////////////// + HPX_CXX_CORE_EXPORT template + struct datapar_remove + { + template + static inline Iter call( + ExPolicy&& policy, Iter first, Sent last, T const& value, Proj proj) + { + return datapar_remove_if::call( + HPX_FORWARD(ExPolicy, policy), first, last, + [&value](auto const& a) { return a == value; }, proj); + } + }; + + HPX_CXX_CORE_EXPORT template + requires(hpx::is_vectorpack_execution_policy_v) + HPX_HOST_DEVICE HPX_FORCEINLINE Iter tag_invoke( + sequential_remove_t, ExPolicy&& policy, Iter first, Sent last, + T const& value, Proj proj) + { + if constexpr (hpx::parallel::util::detail::iterator_datapar_compatible< + Iter>::value) + { + return datapar_remove::call( + HPX_FORWARD(ExPolicy, policy), first, last, value, proj); + } + else + { + using base_policy_type = + decltype((hpx::execution::experimental::to_non_simd( + std::declval()))); + return sequential_remove( + hpx::execution::experimental::to_non_simd(policy), first, last, + value, proj); + } + } + +} // namespace hpx::parallel::detail + +#endif diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt b/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt index 9d4f949a5652..c4a75fac1e4b 100644 --- a/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/CMakeLists.txt @@ -36,6 +36,8 @@ if(HPX_WITH_DATAPAR) mismatch_datapar none_of_datapar reduce_datapar + remove_datapar + remove_if_datapar replace_copy_if_datapar replace_copy_datapar replace_datapar diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp new file mode 100644 index 000000000000..454b0595e05d --- /dev/null +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_datapar.cpp @@ -0,0 +1,127 @@ +// Copyright (c) 2026 Bhoomish Gupta +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../algorithms/test_utils.hpp" + +/////////////////////////////////////////////////////////////////////////////// +template +void test_remove(ExPolicy policy, IteratorTag) +{ + static_assert(hpx::is_execution_policy::value, + "hpx::is_execution_policy::value"); + + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::begin(d)); + + std::size_t idx = std::rand() % c.size(); //-V104 + int value = c[idx]; + + auto result = hpx::remove( + policy, iterator(std::begin(c)), iterator(std::end(c)), value); + auto solution = std::remove(std::begin(d), std::end(d), value); + + bool equality = + test::equal(std::begin(c), result.base(), std::begin(d), solution); + + HPX_TEST(equality); +} + +template +void test_remove_async(ExPolicy p, IteratorTag) +{ + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::begin(d)); + + std::size_t idx = std::rand() % c.size(); + int value = c[idx]; + + auto f = + hpx::remove(p, iterator(std::begin(c)), iterator(std::end(c)), value); + auto result = f.get(); + auto solution = std::remove(std::begin(d), std::end(d), value); + + bool equality = + test::equal(std::begin(c), result.base(), std::begin(d), solution); + + HPX_TEST(equality); +} + +template +void test_remove() +{ + using namespace hpx::execution; + test_remove(simd, IteratorTag()); + test_remove(par_simd, IteratorTag()); + + test_remove_async(simd(task), IteratorTag()); + test_remove_async(par_simd(task), IteratorTag()); +} + +void remove_test() +{ + test_remove(); + test_remove(); +} + +int hpx_main(hpx::program_options::variables_map& vm) +{ + unsigned int seed = (unsigned int) std::time(nullptr); + if (vm.count("seed")) + seed = vm["seed"].as(); + + std::cout << "using seed: " << seed << std::endl; + std::srand(seed); + + remove_test(); + return hpx::local::finalize(); +} + +int main(int argc, char* argv[]) +{ + // add command line option which controls the random number generator seed + using namespace hpx::program_options; + options_description desc_commandline( + "Usage: " HPX_APPLICATION_STRING " [options]"); + + desc_commandline.add_options()("seed,s", value(), + "the random number generator seed to use for this run"); + + // By default this test should run on all available cores + std::vector const cfg = {"hpx.os_threads=all"}; + + // Initialize and run HPX + hpx::local::init_params init_args; + init_args.desc_cmdline = desc_commandline; + init_args.cfg = cfg; + + HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0, + "HPX main exited with non-zero status"); + + return hpx::util::report_errors(); +} diff --git a/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp new file mode 100644 index 000000000000..fc91ec8c22e9 --- /dev/null +++ b/libs/core/algorithms/tests/unit/datapar_algorithms/remove_if_datapar.cpp @@ -0,0 +1,141 @@ +// Copyright (c) 2026 Bhoomish Gupta +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "../algorithms/test_utils.hpp" + +//////////////////////////////////////////////////////////////////////////// +struct equal_f +{ + equal_f(int val) + : val_(val) + { + } + + template + auto operator()(T lhs) const + { + return lhs == T(val_); + } + + int val_; +}; + +template +void test_remove_if(ExPolicy policy, IteratorTag) +{ + static_assert(hpx::is_execution_policy::value, + "hpx::is_execution_policy::value"); + + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::begin(d)); + + std::size_t idx = std::rand() % c.size(); + + auto result = hpx::remove_if(policy, iterator(std::begin(c)), + iterator(std::end(c)), equal_f(c[idx])); + auto solution = std::remove_if(std::begin(d), std::end(d), equal_f(d[idx])); + + bool equality = + test::equal(std::begin(c), result.base(), std::begin(d), solution); + + HPX_TEST(equality); +} + +template +void test_remove_if_async(ExPolicy p, IteratorTag) +{ + typedef std::vector::iterator base_iterator; + typedef test::test_iterator iterator; + + std::vector c(10007); + std::vector d(c.size()); + std::iota(std::begin(c), std::end(c), std::rand()); + std::copy(std::begin(c), std::end(c), std::begin(d)); + + std::size_t idx = std::rand() % c.size(); + + auto f = hpx::remove_if( + p, iterator(std::begin(c)), iterator(std::end(c)), equal_f(c[idx])); + auto result = f.get(); + auto solution = std::remove_if(std::begin(d), std::end(d), equal_f(d[idx])); + + bool equality = + test::equal(std::begin(c), result.base(), std::begin(d), solution); + + HPX_TEST(equality); +} + +template +void test_remove_if() +{ + using namespace hpx::execution; + test_remove_if(simd, IteratorTag()); + test_remove_if(par_simd, IteratorTag()); + + test_remove_if_async(simd(task), IteratorTag()); + test_remove_if_async(par_simd(task), IteratorTag()); +} + +void remove_if_test() +{ + test_remove_if(); + test_remove_if(); +} + +int hpx_main(hpx::program_options::variables_map& vm) +{ + unsigned int seed = (unsigned int) std::time(nullptr); + if (vm.count("seed")) + seed = vm["seed"].as(); + + std::cout << "using seed: " << seed << std::endl; + std::srand(seed); + + remove_if_test(); + return hpx::local::finalize(); +} + +int main(int argc, char* argv[]) +{ + // add command line option which controls the random number generator seed + using namespace hpx::program_options; + options_description desc_commandline( + "Usage: " HPX_APPLICATION_STRING " [options]"); + + desc_commandline.add_options()("seed,s", value(), + "the random number generator seed to use for this run"); + + // By default this test should run on all available cores + std::vector const cfg = {"hpx.os_threads=all"}; + + // Initialize and run HPX + hpx::local::init_params init_args; + init_args.desc_cmdline = desc_commandline; + init_args.cfg = cfg; + + HPX_TEST_EQ_MSG(hpx::local::init(hpx_main, argc, argv, init_args), 0, + "HPX main exited with non-zero status"); + + return hpx::util::report_errors(); +} diff --git a/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp b/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp index be865300d3bb..3058e646302f 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/eve/vector_pack_load_store.hpp @@ -14,6 +14,7 @@ #include #include +#include /////////////////////////////////////////////////////////////////////////////// namespace hpx::parallel::traits { @@ -26,14 +27,28 @@ namespace hpx::parallel::traits { template HPX_HOST_DEVICE HPX_FORCEINLINE static V aligned(Iter& iter) { - return V( - eve::as_aligned(std::addressof(*iter), eve::cardinal_t{})); + if constexpr (std::is_class_v) + { + return eve::load(eve::as_aligned( + std::addressof(*iter), eve::cardinal_t{})); + } + else + { + return *iter; + } } template HPX_HOST_DEVICE HPX_FORCEINLINE static V unaligned(Iter& iter) { - return *iter; + if constexpr (std::is_class_v) + { + return eve::load(std::addressof(*iter)); + } + else + { + return *iter; + } } }; @@ -46,15 +61,30 @@ namespace hpx::parallel::traits { HPX_HOST_DEVICE HPX_FORCEINLINE static void aligned( V& value, Iter& iter) { - eve::store(value, - eve::as_aligned(std::addressof(*iter), eve::cardinal_t{})); + if constexpr (std::is_class_v) + { + eve::store(value, + eve::as_aligned( + std::addressof(*iter), eve::cardinal_t{})); + } + else + { + *iter = value; + } } template HPX_HOST_DEVICE HPX_FORCEINLINE static void unaligned( V& value, Iter& iter) { - *iter = value; + if constexpr (std::is_class_v) + { + eve::store(value, std::addressof(*iter)); + } + else + { + *iter = value; + } } }; } // namespace hpx::parallel::traits diff --git a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp index 1228ebfa7795..0f7fa2aef595 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_load_store.hpp @@ -56,7 +56,15 @@ namespace hpx::parallel::traits { HPX_HOST_DEVICE HPX_FORCEINLINE static void unaligned( V& value, Iter& iter) { - *iter = value; + if constexpr (std::is_class_v) + { + value.copy_to(std::addressof(*iter), + datapar::experimental::element_aligned); + } + else + { + *iter = value; + } } }; } // namespace hpx::parallel::traits diff --git a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_simd.hpp b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_simd.hpp index b659f723c87d..70d8e26e963a 100644 --- a/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_simd.hpp +++ b/libs/core/execution/include/hpx/execution/traits/detail/simd/vector_pack_simd.hpp @@ -44,6 +44,7 @@ namespace hpx::datapar::experimental { HPX_CXX_CORE_EXPORT using std::experimental::memory_alignment_v; HPX_CXX_CORE_EXPORT using std::experimental::vector_aligned; + HPX_CXX_CORE_EXPORT using std::experimental::element_aligned; HPX_CXX_CORE_EXPORT using std::experimental::all_of; HPX_CXX_CORE_EXPORT using std::experimental::any_of; diff --git a/libs/core/execution/include/hpx/execution/traits/vector_pack_conditionals.hpp b/libs/core/execution/include/hpx/execution/traits/vector_pack_conditionals.hpp index 457324b69161..a808e5034250 100644 --- a/libs/core/execution/include/hpx/execution/traits/vector_pack_conditionals.hpp +++ b/libs/core/execution/include/hpx/execution/traits/vector_pack_conditionals.hpp @@ -8,8 +8,6 @@ #include -#if defined(HPX_HAVE_DATAPAR) - namespace hpx::parallel::traits { //////////////////////////////////////////////////////////////////// @@ -32,6 +30,8 @@ namespace hpx::parallel::traits { } } // namespace hpx::parallel::traits +#if defined(HPX_HAVE_DATAPAR) + #if !defined(__CUDACC__) #include #include diff --git a/test_package/test_package.cpp b/test_package/test_package.cpp index 5cb936ef366c..1b023bc9c9d0 100644 --- a/test_package/test_package.cpp +++ b/test_package/test_package.cpp @@ -4,9 +4,9 @@ // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#include #include #include -#include int hpx_main() {