Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions adrtlib/_adrtlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,23 @@ enum class Algorithm { DS, DT };
template <typename Scalar>
static auto py_ids_visit(adrt::Tensor2D const &tensor, adrt::Sign sign,
Recursive recursive) {
auto ids_core = adrt::ids<Scalar>::create(tensor.as<Scalar>());
std::unique_ptr<int[]> swaps;
if (recursive == Recursive::Yes) {
ids_core.recursive(tensor.as<Scalar>(), sign);
auto ids_recursive =
adrt::ids_recursive<Scalar>::create(tensor.as<Scalar>());
ids_recursive(tensor.as<Scalar>(), sign);
swaps = std::move(ids_recursive.swaps);
} else {
ids_core.non_recursive(tensor.as<Scalar>(), sign);
auto ids_non_recursive =
adrt::ids_non_recursive<Scalar>::create(tensor.as<Scalar>());
ids_non_recursive(tensor.as<Scalar>(), sign);
swaps = std::move(ids_non_recursive.swaps);
}
nb::capsule swaps_owner(ids_core.swaps.get(),
nb::capsule swaps_owner(swaps.get(),
[](void *p) noexcept { delete[] (int *)p; });

return nb::ndarray<nb::numpy, int, nb::ndim<1>, nb::device::cpu>(
/* data = */ ids_core.swaps.release(),
/* data = */ swaps.release(),
/* shape = */ {static_cast<size_t>(tensor.height)},
/* owner = */ swaps_owner);
}
Expand Down Expand Up @@ -73,17 +79,23 @@ auto py_ids(Image2D &image, adrt::Sign sign, Recursive recursive) {
template <typename Scalar>
static auto py_idt_visit(adrt::Tensor2D const &tensor, adrt::Sign sign,
Recursive recursive) {
auto idt_core = adrt::idt<Scalar>::create(tensor.as<Scalar>());
std::unique_ptr<int[]> swaps;
if (recursive == Recursive::Yes) {
idt_core.recursive(tensor.as<Scalar>(), sign);
auto idt_recursive =
adrt::idt_recursive<Scalar>::create(tensor.as<Scalar>());
idt_recursive(tensor.as<Scalar>(), sign);
swaps = std::move(idt_recursive.swaps);
} else {
idt_core.non_recursive(tensor.as<Scalar>(), sign);
auto idt_non_recursive =
adrt::idt_non_recursive<Scalar>::create(tensor.as<Scalar>());
idt_non_recursive(tensor.as<Scalar>(), sign);
swaps = std::move(idt_non_recursive.swaps);
}
nb::capsule swaps_owner(idt_core.swaps.get(),
nb::capsule swaps_owner(swaps.get(),
[](void *p) noexcept { delete[] (int *)p; });

return nb::ndarray<nb::numpy, int, nb::ndim<1>, nb::device::cpu>(
/* data = */ idt_core.swaps.release(),
/* data = */ swaps.release(),
/* shape = */ {static_cast<size_t>(tensor.height)},
/* owner = */ swaps_owner);
}
Expand Down
18 changes: 10 additions & 8 deletions adrtlib/benchmark/adrtlib_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,16 @@ static void BM_fht2ids(benchmark::State &state, IsRecursive is_recursive) {
reinterpret_cast<uint8_t *>(src.get())};
adrt::Sign const sign = adrt::Sign::Positive;

auto ids_core = adrt::ids<float>::create(tensor.as<float>());

if (is_recursive == IsRecursive::No) {
auto ids_non_recursive =
adrt::ids_non_recursive<float>::create(tensor.as<float>());
for (auto _ : state) {
ids_core.non_recursive(tensor.as<float>(), sign);
ids_non_recursive(tensor.as<float>(), sign);
}
} else {
auto ids_recursive = adrt::ids_recursive<float>::create(tensor.as<float>());
for (auto _ : state) {
ids_core.recursive(tensor.as<float>(), sign);
ids_recursive(tensor.as<float>(), sign);
}
}

Expand Down Expand Up @@ -86,15 +87,16 @@ static void BM_fht2idt(benchmark::State &state, IsRecursive is_recursive) {
reinterpret_cast<uint8_t *>(src.get())};
adrt::Sign const sign = adrt::Sign::Positive;

auto idt_code = adrt::idt<float>::create(tensor.as<float>());

if (is_recursive == IsRecursive::Yes) {
auto idt_recursive = adrt::idt_recursive<float>::create(tensor.as<float>());
for (auto _ : state) {
idt_code.recursive(tensor.as<float>(), sign);
idt_recursive(tensor.as<float>(), sign);
}
} else {
auto idt_non_recursive =
adrt::idt_non_recursive<float>::create(tensor.as<float>());
for (auto _ : state) {
idt_code.non_recursive(tensor.as<float>(), sign);
idt_non_recursive(tensor.as<float>(), sign);
}
}
state.SetBytesProcessed(int64_t(state.iterations()) *
Expand Down
6 changes: 3 additions & 3 deletions adrtlib/include/adrtlib/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@
#endif
#endif

#ifdef __cplusplus
#define A_RESTRICT
#if defined(__cplusplus) || defined(_MSC_VER)
#define A_RESTRICT __restrict
#else
#define A_RESTRICT restrict
#define A_RESTRICT
#endif

namespace adrt {
Expand Down
6 changes: 3 additions & 3 deletions adrtlib/include/adrtlib/common_algorithms.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once
#include <cstring> // memcpy
#include <cstring> // std::memcpy

#include "common.hpp"

Expand Down Expand Up @@ -30,8 +30,8 @@ static inline void rotate(Scalar *A_RESTRICT dst, Scalar *A_RESTRICT src,
int width, int rotation) {
A_NEVER(width < 0 || rotation >= width);
int const split = width - rotation;
memcpy(dst, src + split, rotation * sizeof(Scalar));
memcpy(dst + rotation, src, split * sizeof(Scalar));
std::memcpy(dst, src + split, rotation * sizeof(Scalar));
std::memcpy(dst + rotation, src, split * sizeof(Scalar));
}

template <typename Scalar>
Expand Down
116 changes: 76 additions & 40 deletions adrtlib/include/adrtlib/fht2ids.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <cmath> // round
#include <vector>

#include "common_algorithms.hpp"
#include "non_recursive.hpp"
Expand Down Expand Up @@ -58,8 +59,8 @@ static inline void fht2ids_core(int const h, Sign sign, int K[],
}

template <typename Scalar>
void fht2ids_recursive(Tensor2DTyped<Scalar> const &src, Sign sign, int swaps[],
int swaps_buffer[], Scalar line_buffer[]) {
void _fht2ids_recursive(Tensor2DTyped<Scalar> const &src, Sign sign,
int swaps[], int swaps_buffer[], Scalar line_buffer[]) {
auto const height = src.height;
if A_UNLIKELY (height <= 1) {
return;
Expand All @@ -69,81 +70,116 @@ void fht2ids_recursive(Tensor2DTyped<Scalar> const &src, Sign sign, int swaps[],
Tensor2D const I_T{slice_no_checks(src, 0, h_T)};
Tensor2D const I_B{slice_no_checks(src, h_T, src.height)};

memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0]));
std::memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0]));

if (I_T.height > 1) {
fht2ids_recursive(I_T.as<Scalar>(), sign, swaps, swaps_buffer, line_buffer);
_fht2ids_recursive(I_T.as<Scalar>(), sign, swaps, swaps_buffer,
line_buffer);
}
if (I_B.height > 1) {
fht2ids_recursive(I_B.as<Scalar>(), sign, swaps + h_T, swaps_buffer + h_T,
line_buffer);
_fht2ids_recursive(I_B.as<Scalar>(), sign, swaps + h_T, swaps_buffer + h_T,
line_buffer);
}
memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0]));
std::memcpy(swaps_buffer, swaps, height * sizeof(swaps_buffer[0]));
fht2ids_core(height, sign, swaps, swaps_buffer + 0, swaps_buffer + h_T,
line_buffer, I_T.as<Scalar>(), I_B.as<Scalar>());
}

template <typename Scalar>
void fht2ids_non_recursive(Tensor2DTyped<Scalar> const &src, Sign sign,
int swaps[], int swaps_buffer[],
Scalar line_buffer[]) {
void _fht2ids_non_recursive(Tensor2DTyped<Scalar> const &src, Sign sign,
int swaps[], int swaps_buffer[],
Scalar line_buffer[],
std::vector<ADRTTask> const &tasks) {
auto const height = src.height;
if A_UNLIKELY (height <= 1) {
return;
}
std::memset(swaps, 0, height * sizeof(int));

non_recursive(
height,
[&](ADRTTask const &task) {
if (task.size < 2) {
return;
}
Tensor2D const I_T{slice_no_checks(src, task.start, task.mid)};
Tensor2D const I_B{slice_no_checks(src, task.mid, task.stop)};
int *cur_swaps_buffer = swaps_buffer + task.start;
int *cur_swaps = swaps + task.start;
memcpy(cur_swaps_buffer, cur_swaps,
task.size * sizeof(swaps_buffer[0]));
fht2ids_core(task.size, sign, cur_swaps, cur_swaps_buffer,
swaps_buffer + task.mid, line_buffer, I_T.as<Scalar>(),
I_B.as<Scalar>());
},
[](auto val) { return val / 2; });
for (ADRTTask const &task : tasks) {
A_NEVER(task.size < 2);
Tensor2D const I_T{slice_no_checks(src, task.start, task.mid)};
Tensor2D const I_B{slice_no_checks(src, task.mid, task.stop)};
int *cur_swaps_buffer = swaps_buffer + task.start;
int *cur_swaps = swaps + task.start;
std::memcpy(cur_swaps_buffer, cur_swaps,
task.size * sizeof(swaps_buffer[0]));
fht2ids_core(task.size, sign, cur_swaps, cur_swaps_buffer,
swaps_buffer + task.mid, line_buffer, I_T.as<Scalar>(),
I_B.as<Scalar>());
}
}

template <typename Scalar>
class ids {
class ids_recursive {
std::unique_ptr<Scalar[]> line_buffer;
std::unique_ptr<int[]> swaps_buffer;
ids(std::unique_ptr<Scalar[]> &&line_buffer, std::unique_ptr<int[]> &&swaps,
std::unique_ptr<int[]> &&swaps_buffer)
ids_recursive(std::unique_ptr<Scalar[]> &&line_buffer,
std::unique_ptr<int[]> &&swaps,
std::unique_ptr<int[]> &&swaps_buffer)
: line_buffer{std::move(line_buffer)},
swaps_buffer{std::move(swaps_buffer)},
swaps{std::move(swaps)} {}

public:
std::unique_ptr<int[]> swaps;
static ids<Scalar> create(Tensor2DTyped<Scalar> const &prototype) {
static ids_recursive<Scalar> create(Tensor2DTyped<Scalar> const &prototype) {
std::unique_ptr<Scalar[]> line_buffer{new Scalar[prototype.width]};
std::unique_ptr<int[]> swaps{new int[prototype.height]};
std::unique_ptr<int[]> swaps_buffer{new int[prototype.height]};
return ids<Scalar>{std::move(line_buffer), std::move(swaps_buffer),
std::move(swaps)};

return ids_recursive<Scalar>{std::move(line_buffer),
std::move(swaps_buffer), std::move(swaps)};
}

void recursive(Tensor2DTyped<Scalar> const &src, Sign sign) const {
fht2ids_recursive(src, sign, this->swaps.get(), this->swaps_buffer.get(),
this->line_buffer.get());
void operator()(Tensor2DTyped<Scalar> const &src, Sign sign) const {
_fht2ids_recursive(src, sign, this->swaps.get(), this->swaps_buffer.get(),
this->line_buffer.get());
}
};

template <typename Scalar>
class ids_non_recursive {
std::unique_ptr<Scalar[]> line_buffer;
std::unique_ptr<int[]> swaps_buffer;
std::vector<ADRTTask> tasks;
ids_non_recursive(std::unique_ptr<Scalar[]> &&line_buffer,
std::unique_ptr<int[]> &&swaps,
std::unique_ptr<int[]> &&swaps_buffer,
std::vector<ADRTTask> &&tasks)
: line_buffer{std::move(line_buffer)},
swaps_buffer{std::move(swaps_buffer)},
swaps{std::move(swaps)},
tasks{std::move(tasks)} {}

public:
std::unique_ptr<int[]> swaps;
static ids_non_recursive<Scalar> create(
Tensor2DTyped<Scalar> const &prototype) {
std::unique_ptr<Scalar[]> line_buffer{new Scalar[prototype.width]};
std::unique_ptr<int[]> swaps{new int[prototype.height]};
std::unique_ptr<int[]> swaps_buffer{new int[prototype.height]};
std::vector<ADRTTask> tasks;
adrt::non_recursive(
prototype.height,
[&](ADRTTask const &task) { tasks.emplace_back(task); },
[](auto val) { return val / 2; });
return ids_non_recursive<Scalar>{std::move(line_buffer),
std::move(swaps_buffer), std::move(swaps),
std::move(tasks)};
}

void non_recursive(Tensor2DTyped<Scalar> const &src, Sign sign) const {
fht2ids_non_recursive(src, sign, this->swaps.get(),
this->swaps_buffer.get(), this->line_buffer.get());
void operator()(Tensor2DTyped<Scalar> const &src, Sign sign) const {
_fht2ids_non_recursive(src, sign, this->swaps.get(),
this->swaps_buffer.get(), this->line_buffer.get(),
this->tasks);
}
};

template <typename Scalar>
using fht2ids = ids<Scalar>;
using fht2ids_recursive = ids_recursive<Scalar>;

template <typename Scalar>
using fht2ids_non_recursive = ids_non_recursive<Scalar>;

} // namespace adrt
Loading