diff --git a/adrtlib/_adrtlib.cpp b/adrtlib/_adrtlib.cpp index 8370884..5c51add 100644 --- a/adrtlib/_adrtlib.cpp +++ b/adrtlib/_adrtlib.cpp @@ -26,17 +26,23 @@ enum class Algorithm { DS, DT }; template static auto py_ids_visit(adrt::Tensor2D const &tensor, adrt::Sign sign, Recursive recursive) { - auto ids_core = adrt::ids::create(tensor.as()); + std::unique_ptr swaps; if (recursive == Recursive::Yes) { - ids_core.recursive(tensor.as(), sign); + auto ids_recursive = + adrt::ids_recursive::create(tensor.as()); + ids_recursive(tensor.as(), sign); + swaps = std::move(ids_recursive.swaps); } else { - ids_core.non_recursive(tensor.as(), sign); + auto ids_non_recursive = + adrt::ids_non_recursive::create(tensor.as()); + ids_non_recursive(tensor.as(), sign); + swaps = std::move(ids_non_recursive.swaps); } - nb::capsule swaps_owner(ids_core.swaps.get(), + nb::capsule swaps_owner(swaps.get(), [](void *p) noexcept { delete[] (int *)p; }); return nb::ndarray, nb::device::cpu>( - /* data = */ ids_core.swaps.release(), + /* data = */ swaps.release(), /* shape = */ {static_cast(tensor.height)}, /* owner = */ swaps_owner); } @@ -73,17 +79,23 @@ auto py_ids(Image2D &image, adrt::Sign sign, Recursive recursive) { template static auto py_idt_visit(adrt::Tensor2D const &tensor, adrt::Sign sign, Recursive recursive) { - auto idt_core = adrt::idt::create(tensor.as()); + std::unique_ptr swaps; if (recursive == Recursive::Yes) { - idt_core.recursive(tensor.as(), sign); + auto idt_recursive = + adrt::idt_recursive::create(tensor.as()); + idt_recursive(tensor.as(), sign); + swaps = std::move(idt_recursive.swaps); } else { - idt_core.non_recursive(tensor.as(), sign); + auto idt_non_recursive = + adrt::idt_non_recursive::create(tensor.as()); + idt_non_recursive(tensor.as(), sign); + swaps = std::move(idt_non_recursive.swaps); } - nb::capsule swaps_owner(idt_core.swaps.get(), + nb::capsule swaps_owner(swaps.get(), [](void *p) noexcept { delete[] (int *)p; }); return nb::ndarray, nb::device::cpu>( - /* data = */ idt_core.swaps.release(), + /* data = */ swaps.release(), /* shape = */ {static_cast(tensor.height)}, /* owner = */ swaps_owner); } diff --git a/adrtlib/benchmark/adrtlib_benchmark.cpp b/adrtlib/benchmark/adrtlib_benchmark.cpp index f941a8f..2c06312 100644 --- a/adrtlib/benchmark/adrtlib_benchmark.cpp +++ b/adrtlib/benchmark/adrtlib_benchmark.cpp @@ -19,15 +19,16 @@ static void BM_fht2ids(benchmark::State &state, IsRecursive is_recursive) { reinterpret_cast(src.get())}; adrt::Sign const sign = adrt::Sign::Positive; - auto ids_core = adrt::ids::create(tensor.as()); - if (is_recursive == IsRecursive::No) { + auto ids_non_recursive = + adrt::ids_non_recursive::create(tensor.as()); for (auto _ : state) { - ids_core.non_recursive(tensor.as(), sign); + ids_non_recursive(tensor.as(), sign); } } else { + auto ids_recursive = adrt::ids_recursive::create(tensor.as()); for (auto _ : state) { - ids_core.recursive(tensor.as(), sign); + ids_recursive(tensor.as(), sign); } } @@ -86,15 +87,16 @@ static void BM_fht2idt(benchmark::State &state, IsRecursive is_recursive) { reinterpret_cast(src.get())}; adrt::Sign const sign = adrt::Sign::Positive; - auto idt_code = adrt::idt::create(tensor.as()); - if (is_recursive == IsRecursive::Yes) { + auto idt_recursive = adrt::idt_recursive::create(tensor.as()); for (auto _ : state) { - idt_code.recursive(tensor.as(), sign); + idt_recursive(tensor.as(), sign); } } else { + auto idt_non_recursive = + adrt::idt_non_recursive::create(tensor.as()); for (auto _ : state) { - idt_code.non_recursive(tensor.as(), sign); + idt_non_recursive(tensor.as(), sign); } } state.SetBytesProcessed(int64_t(state.iterations()) * diff --git a/adrtlib/include/adrtlib/common.hpp b/adrtlib/include/adrtlib/common.hpp index a343b74..3b8e87e 100644 --- a/adrtlib/include/adrtlib/common.hpp +++ b/adrtlib/include/adrtlib/common.hpp @@ -33,10 +33,10 @@ #endif #endif -#ifdef __cplusplus -#define A_RESTRICT +#if defined(__cplusplus) || defined(_MSC_VER) +#define A_RESTRICT __restrict #else -#define A_RESTRICT restrict +#define A_RESTRICT #endif namespace adrt { diff --git a/adrtlib/include/adrtlib/common_algorithms.hpp b/adrtlib/include/adrtlib/common_algorithms.hpp index 07d42e5..c05f040 100644 --- a/adrtlib/include/adrtlib/common_algorithms.hpp +++ b/adrtlib/include/adrtlib/common_algorithms.hpp @@ -1,5 +1,5 @@ #pragma once -#include // memcpy +#include // std::memcpy #include "common.hpp" @@ -30,8 +30,8 @@ static inline void rotate(Scalar *A_RESTRICT dst, Scalar *A_RESTRICT src, int width, int rotation) { A_NEVER(width < 0 || rotation >= width); int const split = width - rotation; - memcpy(dst, src + split, rotation * sizeof(Scalar)); - memcpy(dst + rotation, src, split * sizeof(Scalar)); + std::memcpy(dst, src + split, rotation * sizeof(Scalar)); + std::memcpy(dst + rotation, src, split * sizeof(Scalar)); } template diff --git a/adrtlib/include/adrtlib/fht2ids.hpp b/adrtlib/include/adrtlib/fht2ids.hpp index 4c8d7b9..48013c9 100644 --- a/adrtlib/include/adrtlib/fht2ids.hpp +++ b/adrtlib/include/adrtlib/fht2ids.hpp @@ -1,4 +1,5 @@ #include // round +#include #include "common_algorithms.hpp" #include "non_recursive.hpp" @@ -58,8 +59,8 @@ static inline void fht2ids_core(int const h, Sign sign, int K[], } template -void fht2ids_recursive(Tensor2DTyped const &src, Sign sign, int swaps[], - int swaps_buffer[], Scalar line_buffer[]) { +void _fht2ids_recursive(Tensor2DTyped const &src, Sign sign, + int swaps[], int swaps_buffer[], Scalar line_buffer[]) { auto const height = src.height; if A_UNLIKELY (height <= 1) { return; @@ -69,81 +70,116 @@ void fht2ids_recursive(Tensor2DTyped const &src, Sign sign, int swaps[], Tensor2D const I_T{slice_no_checks(src, 0, h_T)}; Tensor2D const I_B{slice_no_checks(src, h_T, src.height)}; - memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0])); + std::memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0])); if (I_T.height > 1) { - fht2ids_recursive(I_T.as(), sign, swaps, swaps_buffer, line_buffer); + _fht2ids_recursive(I_T.as(), sign, swaps, swaps_buffer, + line_buffer); } if (I_B.height > 1) { - fht2ids_recursive(I_B.as(), sign, swaps + h_T, swaps_buffer + h_T, - line_buffer); + _fht2ids_recursive(I_B.as(), sign, swaps + h_T, swaps_buffer + h_T, + line_buffer); } - memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0])); + std::memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0])); fht2ids_core(height, sign, swaps, swaps_buffer + 0, swaps_buffer + h_T, line_buffer, I_T.as(), I_B.as()); } template -void fht2ids_non_recursive(Tensor2DTyped const &src, Sign sign, - int swaps[], int swaps_buffer[], - Scalar line_buffer[]) { +void _fht2ids_non_recursive(Tensor2DTyped const &src, Sign sign, + int swaps[], int swaps_buffer[], + Scalar line_buffer[], + std::vector const &tasks) { auto const height = src.height; if A_UNLIKELY (height <= 1) { return; } std::memset(swaps, 0, height * sizeof(int)); - non_recursive( - height, - [&](ADRTTask const &task) { - if (task.size < 2) { - return; - } - Tensor2D const I_T{slice_no_checks(src, task.start, task.mid)}; - Tensor2D const I_B{slice_no_checks(src, task.mid, task.stop)}; - int *cur_swaps_buffer = swaps_buffer + task.start; - int *cur_swaps = swaps + task.start; - memcpy(cur_swaps_buffer, cur_swaps, - task.size * sizeof(swaps_buffer[0])); - fht2ids_core(task.size, sign, cur_swaps, cur_swaps_buffer, - swaps_buffer + task.mid, line_buffer, I_T.as(), - I_B.as()); - }, - [](auto val) { return val / 2; }); + for (ADRTTask const &task : tasks) { + A_NEVER(task.size < 2); + Tensor2D const I_T{slice_no_checks(src, task.start, task.mid)}; + Tensor2D const I_B{slice_no_checks(src, task.mid, task.stop)}; + int *cur_swaps_buffer = swaps_buffer + task.start; + int *cur_swaps = swaps + task.start; + std::memcpy(cur_swaps_buffer, cur_swaps, + task.size * sizeof(swaps_buffer[0])); + fht2ids_core(task.size, sign, cur_swaps, cur_swaps_buffer, + swaps_buffer + task.mid, line_buffer, I_T.as(), + I_B.as()); + } } template -class ids { +class ids_recursive { std::unique_ptr line_buffer; std::unique_ptr swaps_buffer; - ids(std::unique_ptr &&line_buffer, std::unique_ptr &&swaps, - std::unique_ptr &&swaps_buffer) + ids_recursive(std::unique_ptr &&line_buffer, + std::unique_ptr &&swaps, + std::unique_ptr &&swaps_buffer) : line_buffer{std::move(line_buffer)}, swaps_buffer{std::move(swaps_buffer)}, swaps{std::move(swaps)} {} public: std::unique_ptr swaps; - static ids create(Tensor2DTyped const &prototype) { + static ids_recursive create(Tensor2DTyped const &prototype) { std::unique_ptr line_buffer{new Scalar[prototype.width]}; std::unique_ptr swaps{new int[prototype.height]}; std::unique_ptr swaps_buffer{new int[prototype.height]}; - return ids{std::move(line_buffer), std::move(swaps_buffer), - std::move(swaps)}; + + return ids_recursive{std::move(line_buffer), + std::move(swaps_buffer), std::move(swaps)}; } - void recursive(Tensor2DTyped const &src, Sign sign) const { - fht2ids_recursive(src, sign, this->swaps.get(), this->swaps_buffer.get(), - this->line_buffer.get()); + void operator()(Tensor2DTyped const &src, Sign sign) const { + _fht2ids_recursive(src, sign, this->swaps.get(), this->swaps_buffer.get(), + this->line_buffer.get()); + } +}; + +template +class ids_non_recursive { + std::unique_ptr line_buffer; + std::unique_ptr swaps_buffer; + std::vector tasks; + ids_non_recursive(std::unique_ptr &&line_buffer, + std::unique_ptr &&swaps, + std::unique_ptr &&swaps_buffer, + std::vector &&tasks) + : line_buffer{std::move(line_buffer)}, + swaps_buffer{std::move(swaps_buffer)}, + swaps{std::move(swaps)}, + tasks{std::move(tasks)} {} + + public: + std::unique_ptr swaps; + static ids_non_recursive create( + Tensor2DTyped const &prototype) { + std::unique_ptr line_buffer{new Scalar[prototype.width]}; + std::unique_ptr swaps{new int[prototype.height]}; + std::unique_ptr swaps_buffer{new int[prototype.height]}; + std::vector tasks; + adrt::non_recursive( + prototype.height, + [&](ADRTTask const &task) { tasks.emplace_back(task); }, + [](auto val) { return val / 2; }); + return ids_non_recursive{std::move(line_buffer), + std::move(swaps_buffer), std::move(swaps), + std::move(tasks)}; } - void non_recursive(Tensor2DTyped const &src, Sign sign) const { - fht2ids_non_recursive(src, sign, this->swaps.get(), - this->swaps_buffer.get(), this->line_buffer.get()); + void operator()(Tensor2DTyped const &src, Sign sign) const { + _fht2ids_non_recursive(src, sign, this->swaps.get(), + this->swaps_buffer.get(), this->line_buffer.get(), + this->tasks); } }; template -using fht2ids = ids; +using fht2ids_recursive = ids_recursive; + +template +using fht2ids_non_recursive = ids_non_recursive; } // namespace adrt diff --git a/adrtlib/include/adrtlib/fht2idt.hpp b/adrtlib/include/adrtlib/fht2idt.hpp index 2dc610f..d7d5918 100644 --- a/adrtlib/include/adrtlib/fht2idt.hpp +++ b/adrtlib/include/adrtlib/fht2idt.hpp @@ -53,7 +53,7 @@ static inline void fht2idt_core( t_T_to_check.resize(h_T); std::iota(t_T_to_check.begin(), t_T_to_check.end(), 0); t_processed.resize(h); - fill(t_processed.begin(), t_processed.end(), false); + std::fill(t_processed.begin(), t_processed.end(), false); int32_t t_B_prev = -1; double const k_T = static_cast(h_T - 1) / static_cast(h - 1); double const k_B = static_cast(h_B - 1) / static_cast(h - 1); @@ -139,11 +139,11 @@ static inline void fht2idt_core( } template -void fht2idt_recursive(Tensor2DTyped const& src, Sign sign, int swaps[], - int swaps_buffer[], Scalar line_buffer[], - OutDegree out_degrees[], std::vector& t_B_to_check, - std::vector& t_T_to_check, - std::vector& t_processed) { +void _fht2idt_recursive(Tensor2DTyped const& src, Sign sign, + int swaps[], int swaps_buffer[], Scalar line_buffer[], + OutDegree out_degrees[], std::vector& t_B_to_check, + std::vector& t_T_to_check, + std::vector& t_processed) { auto const height = src.height; if A_UNLIKELY (height <= 1) { return; @@ -153,15 +153,15 @@ void fht2idt_recursive(Tensor2DTyped const& src, Sign sign, int swaps[], Tensor2D const I_B{slice_no_checks(src, h_T, src.height)}; if (I_T.height > 1) { - fht2idt_recursive(I_T.as(), sign, swaps, swaps_buffer, line_buffer, - out_degrees, t_B_to_check, t_T_to_check, t_processed); + _fht2idt_recursive(I_T.as(), sign, swaps, swaps_buffer, line_buffer, + out_degrees, t_B_to_check, t_T_to_check, t_processed); } if (I_B.height > 1) { - fht2idt_recursive(I_B.as(), sign, swaps + h_T, swaps_buffer + h_T, - line_buffer, out_degrees, t_B_to_check, t_T_to_check, - t_processed); + _fht2idt_recursive(I_B.as(), sign, swaps + h_T, swaps_buffer + h_T, + line_buffer, out_degrees, t_B_to_check, t_T_to_check, + t_processed); } - memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0])); + std::memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0])); fht2idt_core(height, sign, swaps, swaps_buffer + 0, swaps_buffer + h_T, line_buffer, I_T.as(), I_B.as(), out_degrees, t_B_to_check, t_T_to_check, t_processed @@ -170,82 +170,121 @@ void fht2idt_recursive(Tensor2DTyped const& src, Sign sign, int swaps[], } template -void fht2idt_non_recursive(Tensor2DTyped const& src, Sign sign, - int swaps[], int swaps_buffer[], - Scalar line_buffer[], OutDegree out_degrees[], - std::vector& t_B_to_check, - std::vector& t_T_to_check, - std::vector& t_processed) { +void _fht2idt_non_recursive(Tensor2DTyped const& src, Sign sign, + int swaps[], int swaps_buffer[], + Scalar line_buffer[], OutDegree out_degrees[], + std::vector& t_B_to_check, + std::vector& t_T_to_check, + std::vector& t_processed, + std::vector const& tasks) { auto const height = src.height; if A_UNLIKELY (height <= 1) { return; } - - non_recursive( - height, - [&](ADRTTask const& task) { - A_NEVER(task.size < 2); - Tensor2D const I_T{slice_no_checks(src, task.start, task.mid)}; - Tensor2D const I_B{slice_no_checks(src, task.mid, task.stop)}; - int* cur_swaps_buffer = swaps_buffer + task.start; - int* cur_swaps = swaps + task.start; - memcpy(cur_swaps_buffer, cur_swaps, - task.size * sizeof(swaps_buffer[0])); - fht2idt_core(task.size, sign, cur_swaps, cur_swaps_buffer, - swaps_buffer + task.mid, line_buffer, I_T.as(), - I_B.as(), out_degrees, t_B_to_check, t_T_to_check, - t_processed); - }, - [](int val) { - return static_cast(div_by_pow2(static_cast(val))); - }); + for (ADRTTask const& task : tasks) { + A_NEVER(task.size < 2); + Tensor2D const I_T{slice_no_checks(src, task.start, task.mid)}; + Tensor2D const I_B{slice_no_checks(src, task.mid, task.stop)}; + int* cur_swaps_buffer = swaps_buffer + task.start; + int* cur_swaps = swaps + task.start; + std::memcpy(cur_swaps_buffer, cur_swaps, + task.size * sizeof(swaps_buffer[0])); + fht2idt_core(task.size, sign, cur_swaps, cur_swaps_buffer, + swaps_buffer + task.mid, line_buffer, I_T.as(), + I_B.as(), out_degrees, t_B_to_check, t_T_to_check, + t_processed); + } } template -class idt { +struct idt_base { std::unique_ptr swaps_buffer; std::unique_ptr line_buffer; std::unique_ptr out_degrees; std::vector t_B_to_check; std::vector t_T_to_check; std::vector t_processed; + template + idt_base(SwapsBuffer&& swaps_buffer, LineBuffer&& line_buffer, + OutDegrees&& out_degrees) + : swaps_buffer(std::forward(swaps_buffer)), + line_buffer(std::forward(line_buffer)), + out_degrees(std::forward(out_degrees)) {} - public: - std::unique_ptr swaps; - idt(std::unique_ptr&& swaps, std::unique_ptr&& swaps_buffer, - std::unique_ptr&& line_buffer, - std::unique_ptr&& out_degrees) - : swaps_buffer{std::move(swaps_buffer)}, - line_buffer{std::move(line_buffer)}, - out_degrees{std::move(out_degrees)}, - swaps{std::move(swaps)} {} - static idt create(Tensor2DTyped const& prototype) { - std::unique_ptr swaps(new int[prototype.height]); + static idt_base create(Tensor2DTyped const& prototype) { std::unique_ptr swaps_buffer(new int[prototype.height]); std::unique_ptr line_buffer(new Scalar[prototype.height]); std::unique_ptr out_degrees( new adrt::OutDegree[prototype.height]); - return idt(std::move(swaps), std::move(swaps_buffer), - std::move(line_buffer), std::move(out_degrees)); + return idt_base(std::move(swaps_buffer), std::move(line_buffer), + std::move(out_degrees)); } - void recursive(Tensor2DTyped const& src, Sign sign) { +}; + +template +class idt_recursive { + idt_base base; + + public: + std::unique_ptr swaps; + + idt_recursive(idt_base&& base, std::unique_ptr&& swaps) + : base{std::move(base)}, swaps{std::move(swaps)} {} + static idt_recursive create(Tensor2DTyped const& prototype) { + std::unique_ptr swaps(new int[prototype.height]); + return idt_recursive(idt_base::create(prototype), std::move(swaps)); + } + void operator()(Tensor2DTyped const& src, Sign sign) { std::fill(this->swaps.get(), this->swaps.get() + src.height, 0); - fht2idt_recursive(src, sign, this->swaps.get(), this->swaps_buffer.get(), - this->line_buffer.get(), this->out_degrees.get(), - this->t_B_to_check, this->t_T_to_check, - this->t_processed); + _fht2idt_recursive(src, sign, this->swaps.get(), + this->base.swaps_buffer.get(), + this->base.line_buffer.get(), + this->base.out_degrees.get(), this->base.t_B_to_check, + this->base.t_T_to_check, this->base.t_processed); } +}; + +template +class idt_non_recursive { + idt_base base; + std::vector tasks; + + public: + std::unique_ptr swaps; + idt_non_recursive(idt_base&& base, std::unique_ptr&& swaps, + std::vector&& tasks) + : base{std::move(base)}, + swaps{std::move(swaps)}, + tasks{std::move(tasks)} {} + static idt_non_recursive create( + Tensor2DTyped const& prototype) { + std::unique_ptr swaps(new int[prototype.height]); + std::vector tasks; + + non_recursive( + prototype.height, + [&](ADRTTask const& task) { tasks.emplace_back(task); }, + [](int val) { + return static_cast(div_by_pow2(static_cast(val))); + }); - void non_recursive(Tensor2DTyped const& src, Sign sign) { + return idt_non_recursive(idt_base::create(prototype), + std::move(swaps), std::move(tasks)); + } + void operator()(Tensor2DTyped const& src, Sign sign) { std::fill(this->swaps.get(), this->swaps.get() + src.height, 0); - fht2idt_non_recursive(src, sign, this->swaps.get(), - this->swaps_buffer.get(), this->line_buffer.get(), - this->out_degrees.get(), this->t_B_to_check, - this->t_T_to_check, this->t_processed); + _fht2idt_non_recursive( + src, sign, this->swaps.get(), this->base.swaps_buffer.get(), + this->base.line_buffer.get(), this->base.out_degrees.get(), + this->base.t_B_to_check, this->base.t_T_to_check, + this->base.t_processed, this->tasks); } }; template -using fht2idt = idt; +using fht2idt_recursive = idt_recursive; + +template +using fht2idt_non_recursive = idt_non_recursive; } // namespace adrt \ No newline at end of file diff --git a/adrtlib/test/adrtlib_test.cpp b/adrtlib/test/adrtlib_test.cpp index 6f54263..ba89003 100644 --- a/adrtlib/test/adrtlib_test.cpp +++ b/adrtlib/test/adrtlib_test.cpp @@ -279,33 +279,35 @@ static std::vector GenerateTestFHT2DSCases() { FunctionPair( [](adrt::Tensor2DTyped const &dst, adrt::Tensor2DTyped const &src, adrt::Sign sign) { - auto fht2ids_core = adrt::fht2ids::create(src); - fht2ids_core.recursive(src, sign); - unswap_tensor(dst, src, fht2ids_core.swaps.get()); + auto ids_recursive = adrt::ids_recursive::create(src); + ids_recursive(src, sign); + unswap_tensor(dst, src, ids_recursive.swaps.get()); }, "fht2ids_recursive", FunctionType::fht2ds, IsInplace::Yes), FunctionPair( [](adrt::Tensor2DTyped const &dst, adrt::Tensor2DTyped const &src, adrt::Sign sign) { - auto fht2ids_core = adrt::fht2ids::create(src); - fht2ids_core.non_recursive(src, sign); - unswap_tensor(dst, src, fht2ids_core.swaps.get()); + auto ids_non_recursive = + adrt::ids_non_recursive::create(src); + ids_non_recursive(src, sign); + unswap_tensor(dst, src, ids_non_recursive.swaps.get()); }, "fht2ids_non_recursive", FunctionType::fht2ds, IsInplace::Yes), FunctionPair( [](adrt::Tensor2DTyped const &dst, adrt::Tensor2DTyped const &src, adrt::Sign sign) { - auto fht2idt_core = adrt::fht2idt::create(src); - fht2idt_core.recursive(src, sign); - unswap_tensor(dst, src, fht2idt_core.swaps.get()); + auto idt_recursive = adrt::idt_recursive::create(src); + idt_recursive(src, sign); + unswap_tensor(dst, src, idt_recursive.swaps.get()); }, "fht2idt_recursive", FunctionType::fht2dt, IsInplace::Yes), FunctionPair( [](adrt::Tensor2DTyped const &dst, adrt::Tensor2DTyped const &src, adrt::Sign sign) { - auto fht2idt_core = adrt::fht2idt::create(src); - fht2idt_core.non_recursive(src, sign); - unswap_tensor(dst, src, fht2idt_core.swaps.get()); + auto idt_non_recursive = + adrt::idt_non_recursive::create(src); + idt_non_recursive(src, sign); + unswap_tensor(dst, src, idt_non_recursive.swaps.get()); }, "fht2idt_non_recursive", FunctionType::fht2dt, IsInplace::Yes), FunctionPair(