Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions ateam_spatial/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ set_target_properties(${PROJECT_NAME} PROPERTIES
CUDA_ARCHITECTURES native
)
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--maxrregcount 62>)
if(ATEAM_SPATIAL_CUDA_DEBUG)
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-g -G>)
endif()
if(ATEAM_SPATIAL_CUDA_COMPILE_STATS)
target_compile_options(${PROJECT_NAME} PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-Xptxas="-v">)
endif()
Expand Down
52 changes: 28 additions & 24 deletions ateam_spatial/include/ateam_spatial/min_max_loc_kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,28 +72,30 @@ __global__ void max_loc_kernel(const float * buffer, const uint32_t buffer_size,
{
const auto idx = blockDim.x * blockIdx.x + threadIdx.x;
__shared__ uint32_t shared_index_data[blockSize];
if (idx < buffer_size){
shared_index_data[threadIdx.x] = idx;
__syncthreads();

/*copy to shared memory*/
shared_index_data[threadIdx.x] = idx;
__syncthreads();

for(int stride=1; stride < blockDim.x; stride *= 2) {
for(int stride=1; stride < blockDim.x; stride *= 2) {
if (idx < buffer_size) {
if (threadIdx.x % (2*stride) == 0) {
const auto lhs_block_index = threadIdx.x;
const auto rhs_block_index = threadIdx.x + stride;
const auto lhs_index = shared_index_data[lhs_block_index];
const auto rhs_index = shared_index_data[rhs_block_index];
const float lhs_value = buffer[lhs_index];
const float rhs_value = buffer[rhs_index];
if(lhs_value > rhs_value) {
shared_index_data[lhs_block_index] = lhs_index;
if (rhs_block_index < buffer_size) {
const float lhs_value = buffer[lhs_index];
const float rhs_value = buffer[rhs_index];
if(lhs_value > rhs_value) {
shared_index_data[lhs_block_index] = lhs_index;
} else {
shared_index_data[lhs_block_index] = rhs_index;
}
} else {
shared_index_data[lhs_block_index] = rhs_index;
shared_index_data[lhs_block_index] = lhs_index;
}
}
__syncthreads();
}
__syncthreads();
}
if (threadIdx.x == 0) {
atomicMax(loc_out, shared_index_data[0], buffer);
Expand All @@ -105,28 +107,30 @@ __global__ void min_loc_kernel(const float * buffer, const uint32_t buffer_size,
{
const auto idx = blockDim.x * blockIdx.x + threadIdx.x;
__shared__ uint32_t shared_index_data[blockSize];
if (idx < buffer_size){
shared_index_data[threadIdx.x] = idx;
__syncthreads();

/*copy to shared memory*/
shared_index_data[threadIdx.x] = idx;
__syncthreads();

for(int stride=1; stride < blockDim.x; stride *= 2) {
for(int stride=1; stride < blockDim.x; stride *= 2) {
if (idx < buffer_size) {
if (threadIdx.x % (2*stride) == 0) {
const auto lhs_block_index = threadIdx.x;
const auto rhs_block_index = threadIdx.x + stride;
const auto lhs_index = shared_index_data[lhs_block_index];
const auto rhs_index = shared_index_data[rhs_block_index];
const float lhs_value = buffer[lhs_index];
const float rhs_value = buffer[rhs_index];
if(lhs_value < rhs_value) {
shared_index_data[lhs_block_index] = lhs_index;
if (rhs_block_index < buffer_size) {
const float lhs_value = buffer[lhs_index];
const float rhs_value = buffer[rhs_index];
if(lhs_value < rhs_value) {
shared_index_data[lhs_block_index] = lhs_index;
} else {
shared_index_data[lhs_block_index] = rhs_index;
}
} else {
shared_index_data[lhs_block_index] = rhs_index;
shared_index_data[lhs_block_index] = lhs_index;
}
}
__syncthreads();
}
__syncthreads();
}
if (threadIdx.x == 0) {
atomicMin(loc_out, shared_index_data[0], buffer);
Expand Down
2 changes: 1 addition & 1 deletion ateam_spatial/src/layers/distance_down_field.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace ateam_spatial::layers
{

CUDA_HOSTDEV float DistanceDownField(const float x, const FieldDimensions & field_dims, const SpatialSettings & settings) {
return x + (WorldWidthReal(field_dims) / 2.0);
return x + (WorldWidthReal(field_dims) / 2.0f);
}

} // namespace ateam_spatial::layers
4 changes: 2 additions & 2 deletions ateam_spatial/src/layers/distance_from_field_edge.cu
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ namespace ateam_spatial::layers
CUDA_HOSTDEV float DistanceFromFieldEdge(const float x, const float y, const FieldDimensions & field_dims, const SpatialSettings & settings) {
const auto x_real = x;
const auto y_real = y;
const auto half_world_width = WorldWidthReal(field_dims) / 2.0;
const auto half_world_height = WorldHeightReal(field_dims) / 2.0;
const auto half_world_width = WorldWidthReal(field_dims) / 2.0f;
const auto half_world_height = WorldHeightReal(field_dims) / 2.0f;
const auto half_world_diff = half_world_width - half_world_height;

if(fabs(x_real) < half_world_diff || (fabs(x_real) - half_world_diff) < fabs(y_real)) {
Expand Down
8 changes: 4 additions & 4 deletions ateam_spatial/src/layers/in_field.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ namespace ateam_spatial::layers
{

CUDA_HOSTDEV float InField(const float x, const float y, const FieldDimensions & field_dims, const SpatialSettings & settings) {
const auto pos_x_thresh = field_dims.field_length / 2.0;
const auto neg_x_thresh = -field_dims.field_length / 2.0;
const auto pos_y_thresh = field_dims.field_width / 2.0;
const auto neg_y_thresh = -field_dims.field_width / 2.0;
const auto pos_x_thresh = field_dims.field_length / 2.0f;
const auto neg_x_thresh = -field_dims.field_length / 2.0f;
const auto pos_y_thresh = field_dims.field_width / 2.0f;
const auto neg_y_thresh = -field_dims.field_width / 2.0f;

return (x <= pos_x_thresh) && (x >= neg_x_thresh) && (y <= pos_y_thresh) && (y >= neg_y_thresh);
}
Expand Down
6 changes: 3 additions & 3 deletions ateam_spatial/src/layers/line_of_sight.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@ CUDA_HOSTDEV float PointDistance(const float x_1, const float y_1, const float x
CUDA_HOSTDEV float DistanceToSegment(const float line_1_x, const float line_1_y, const float line_2_x, const float line_2_y, const float point_x, const float point_y)
{
const auto l2 = PointDistance(line_1_x, line_1_y, line_2_x, line_2_y);
if (fabsf(l2) < 1e-8) {
if (fabsf(l2) < 1e-8f) {
return PointDistance(line_1_x, line_1_y, point_x, point_y);
}
auto t = (((point_x - line_1_x) * (line_2_x - line_1_x)) + ((point_y - line_1_y)*(line_2_y - line_1_y))) / (l2*l2);
t = max(0.0, min(1.0, t));
t = max(0.0f, min(1.0f, t));
const auto projected_x = line_1_x + (t * (line_2_x - line_1_x));
const auto projected_y = line_1_y + (t * (line_2_y - line_1_y));
return PointDistance(point_x, point_y, projected_x, projected_y);
}

CUDA_HOSTDEV float LineOfSight(const float src_x, const float src_y, const float dst_x, const float dst_y, const Robot * their_bots, const FieldDimensions & field_dims, const SpatialSettings & settings) {
const auto robot_radius = 0.09;
const auto robot_radius = 0.09f;

bool result = 1;
for(auto i = 0; i < 16; ++i) {
Expand Down
9 changes: 5 additions & 4 deletions ateam_spatial/src/layers/outside_their_defense_area.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@ namespace ateam_spatial::layers
{

CUDA_HOSTDEV float OutsideTheirDefenseArea(const float x, const float y, const FieldDimensions & field_dims, const SpatialSettings & settings) {
const auto pos_x_thresh = field_dims.field_length / 2.0;
const auto neg_x_thresh = (field_dims.field_length / 2.0) - field_dims.defense_area_depth;
const auto pos_y_thresh = field_dims.defense_area_width / 2.0;
const auto neg_y_thresh = -field_dims.defense_area_width / 2.0;
const auto robot_diameter = 0.180f;
const auto pos_x_thresh = field_dims.field_length / 2.0f;
const auto neg_x_thresh = (field_dims.field_length / 2.0f) - (field_dims.defense_area_depth + (robot_diameter * 3.0f));
const auto pos_y_thresh = (field_dims.defense_area_width / 2.0f) + (robot_diameter * 3.0f);
const auto neg_y_thresh = -pos_y_thresh;

return !((x <= pos_x_thresh) && (x >= neg_x_thresh) && (y <= pos_y_thresh) && (y >= neg_y_thresh));
}
Expand Down
10 changes: 8 additions & 2 deletions ateam_spatial/src/layers/width_of_shot_on_goal.cu
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ CUDA_HOSTDEV float WidthOfShotOnGoal(const float x, const float y, const Robot *
const auto real_x = x;
const auto real_y = y;

const auto goal_x = field_dims.field_length / 2.0;
const auto goal_y_start = -field_dims.goal_width / 2.0;
const auto goal_x = field_dims.field_length / 2.0f;
const auto goal_y_start = -field_dims.goal_width / 2.0f;

const auto step_size = field_dims.goal_width / kNumSteps;

Expand All @@ -30,6 +30,12 @@ CUDA_HOSTDEV float WidthOfShotOnGoal(const float x, const float y, const Robot *
}
result = max(result, counter);

// Avoid shots at shallow angles
const auto angle_to_goal = atan2(fabs(goal_x - real_x), fabs(real_y));
if (angle_to_goal < 0.7f) {
return 0.0f;
}

return result * step_size;
}

Expand Down
4 changes: 2 additions & 2 deletions ateam_spatial/src/maps/receiver_position_quality.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ CUDA_HOSTDEV float ReceiverPositionQuality(const int spatial_x, const int spatia
{
const auto x = SpatialToRealX(spatial_x, field_dims, settings);
const auto y = SpatialToRealY(spatial_y, field_dims, settings);
const auto max_edge_dist = 0.75;
const auto max_edge_dist = 1.25f;
const auto edge_dist_multiplier = min(layers::DistanceFromFieldEdge(x, y, field_dims, settings), max_edge_dist) / max_edge_dist;
const auto shot_width_multiplier = layers::WidthOfShotOnGoal(x, y, their_bots, field_dims, settings) / field_dims.goal_width;
const auto robot_distance_multiplier = min(layers::DistanceToTheirBots(x, y, their_bots, field_dims, settings), 1.0);
const auto robot_distance_multiplier = min(layers::DistanceToTheirBots(x, y, their_bots, field_dims, settings), 1.0f);
return layers::InField(x, y, field_dims, settings)
* layers::OutsideTheirDefenseArea(x, y, field_dims, settings)
* layers::LineOfSightBall(x, y, ball, their_bots, field_dims, settings)
Expand Down
2 changes: 1 addition & 1 deletion ateam_spatial/src/spatial_evaluator.cu
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace ateam_spatial

SpatialEvaluator::SpatialEvaluator()
{
settings_.resolution = 1e-2; // TODO(barulicm): parameterize this
settings_.resolution = 1e-1; // TODO(barulicm): parameterize this
cuda_device_available_ = IsCudaDeviceAvailable();
if(!cuda_device_available_) {
std::cerr << "Could not detect CUDA device. GPU-accelerated spatial code is disabled." << std::endl;
Expand Down
6 changes: 6 additions & 0 deletions ateam_spatial/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,9 @@ target_include_directories(ateam_spatial_tests PUBLIC
target_link_libraries(ateam_spatial_tests
${PROJECT_NAME}
)

set_target_properties(ateam_spatial_tests PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
CUDA_STANDARD 17
CUDA_ARCHITECTURES native
)
4 changes: 2 additions & 2 deletions ateam_spatial/test/layers_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ TEST(LayersTests, OutsideTheirDefenseArea)
EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(-8.3, -4.8, field, settings), 1.0);
EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(7.199, 0.0, field, settings), 0.0);
EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(7.199, -3.8, field, settings), 1.0);
EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(7.199, 1.2, field, settings), 1.0);
EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(7.199, 1.6, field, settings), 1.0);
EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(-7.8, 0.0, field, settings), 1.0);
}

Expand Down Expand Up @@ -241,7 +241,7 @@ TEST(LayersTests, WidthOfShotOnGoal)
EXPECT_NEAR(WidthOfShotOnGoal(0.0, 0.0, their_robots.data(), field, settings), 0.5, kAcceptableError);
their_robots[0].x = 0.2;
their_robots[0].y = -0.09;
EXPECT_NEAR(WidthOfShotOnGoal(0.0, 0.0, their_robots.data(), field, settings), 0.5, kAcceptableError);
EXPECT_NEAR(WidthOfShotOnGoal(0.0, 0.0, their_robots.data(), field, settings), 0.49, kAcceptableError);
their_robots[0].x = 4.5;
their_robots[0].y = 0.18;
EXPECT_NEAR(WidthOfShotOnGoal(0.0, 0.0, their_robots.data(), field, settings), 0.65, kAcceptableError);
Expand Down
2 changes: 1 addition & 1 deletion ateam_spatial/test/maps_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ TEST(MapsTests, ReceiverPositionQuality)

const auto kAcceptableError = 1e-4;
EXPECT_FLOAT_EQ(ReceiverPositionQuality(0, 4800, ball, their_robots.data(), field, settings), 0.0);
EXPECT_NEAR(ReceiverPositionQuality(600, 4800, ball, their_robots.data(), field, settings), 0.48, kAcceptableError);
EXPECT_NEAR(ReceiverPositionQuality(600, 4800, ball, their_robots.data(), field, settings), 0.288, kAcceptableError);
EXPECT_NEAR(ReceiverPositionQuality(8300, 4800, ball, their_robots.data(), field, settings), 8.3, kAcceptableError);
EXPECT_NEAR(ReceiverPositionQuality(14500, 4800, ball, their_robots.data(), field, settings), 14.5, kAcceptableError);
EXPECT_FLOAT_EQ(ReceiverPositionQuality(16600, 4800, ball, their_robots.data(), field, settings), 0.0);
Expand Down
57 changes: 48 additions & 9 deletions ateam_spatial/test/min_max_loc_kernel_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@ TEST(MinMaxLocKernelTests, MinLoc)
const dim3 grid_size(std::ceil(static_cast<float>(data.size()) / block_size.x));
ateam_spatial::GpuObject<uint32_t> gpu_min_index;
ateam_spatial::min_loc_kernel<threads_per_block><<<grid_size, block_size>>>(gpu_data.Get(), gpu_data.Size(), gpu_min_index.Get());
if(const auto ret = cudaDeviceSynchronize();ret != cudaSuccess) {
if (const auto ret = cudaDeviceSynchronize(); ret != cudaSuccess)
{
FAIL() << "Failed to launch min_loc_kernel: " << cudaGetErrorString(ret);
}
if(const auto ret = cudaGetLastError(); ret != cudaSuccess) {
if (const auto ret = cudaGetLastError(); ret != cudaSuccess)
{
FAIL() << "Failed to run min_loc_kernel: " << cudaGetErrorString(ret);
}
uint32_t min_index = 0;
Expand All @@ -55,10 +57,12 @@ TEST(MinMaxLocKernelTests, MaxLoc)
const dim3 grid_size(std::ceil(static_cast<float>(data.size()) / block_size.x));
ateam_spatial::GpuObject<uint32_t> gpu_max_index;
ateam_spatial::max_loc_kernel<threads_per_block><<<grid_size, block_size>>>(gpu_data.Get(), gpu_data.Size(), gpu_max_index.Get());
if(const auto ret = cudaDeviceSynchronize();ret != cudaSuccess) {
if (const auto ret = cudaDeviceSynchronize(); ret != cudaSuccess)
{
FAIL() << "Failed to launch max_loc_kernel: " << cudaGetErrorString(ret);
}
if(const auto ret = cudaGetLastError(); ret != cudaSuccess) {
if (const auto ret = cudaGetLastError(); ret != cudaSuccess)
{
FAIL() << "Failed to run max_loc_kernel: " << cudaGetErrorString(ret);
}
uint32_t max_index = 0;
Expand All @@ -72,9 +76,8 @@ TEST(MinMaxLocKernelTests, MaxLoc_MultipleBlocks)
std::random_device rdev;
std::default_random_engine randeng(rdev());
std::uniform_real_distribution<float> randist(0.0, 200.0);
std::generate(data.begin(), data.end(), [&](){
return randist(randeng);
});
std::generate(data.begin(), data.end(), [&]()
{ return randist(randeng); });
// Fill the first half with zeros to ensure the max is not in the first block
std::fill_n(data.begin(), data.size() / 2, 0.f);
ateam_spatial::GpuArray<float, 1440000> gpu_data;
Expand All @@ -84,10 +87,46 @@ TEST(MinMaxLocKernelTests, MaxLoc_MultipleBlocks)
const dim3 grid_size(std::ceil(static_cast<float>(data.size()) / block_size.x));
ateam_spatial::GpuObject<uint32_t> gpu_max_index;
ateam_spatial::max_loc_kernel<threads_per_block><<<grid_size, block_size>>>(gpu_data.Get(), gpu_data.Size(), gpu_max_index.Get());
if(const auto ret = cudaDeviceSynchronize();ret != cudaSuccess) {
if (const auto ret = cudaDeviceSynchronize(); ret != cudaSuccess)
{
FAIL() << "Failed to launch max_loc_kernel: " << cudaGetErrorString(ret);
}
if(const auto ret = cudaGetLastError(); ret != cudaSuccess) {
if (const auto ret = cudaGetLastError(); ret != cudaSuccess)
{
FAIL() << "Failed to run max_loc_kernel: " << cudaGetErrorString(ret);
}

const auto max_iter = std::max_element(data.begin(), data.end());

ASSERT_NE(max_iter, data.end()) << "Failed to find a max value.";
const auto expected_max_index = std::distance(data.begin(), max_iter);

uint32_t max_index = 0;
gpu_max_index.CopyFromGpu(max_index);
EXPECT_EQ(max_index, expected_max_index);
}

TEST(MinMaxLocKernelTests, MaxLoc_UnalignedDataSize)
{
std::array<float, 72> data{};
std::random_device rdev;
std::default_random_engine randeng(rdev());
std::uniform_real_distribution<float> randist(0.0, 200.0);
std::generate(data.begin(), data.end(), [&]()
{ return randist(randeng); });
ateam_spatial::GpuArray<float, 72> gpu_data;
gpu_data.CopyToGpu(data);
constexpr int threads_per_block = 64;
const dim3 block_size(threads_per_block);
const dim3 grid_size(std::ceil(static_cast<float>(data.size()) / block_size.x));
ateam_spatial::GpuObject<uint32_t> gpu_max_index;
ateam_spatial::max_loc_kernel<threads_per_block><<<grid_size, block_size>>>(gpu_data.Get(), gpu_data.Size(), gpu_max_index.Get());
if (const auto ret = cudaDeviceSynchronize(); ret != cudaSuccess)
{
FAIL() << "Failed to launch max_loc_kernel: " << cudaGetErrorString(ret);
}
if (const auto ret = cudaGetLastError(); ret != cudaSuccess)
{
FAIL() << "Failed to run max_loc_kernel: " << cudaGetErrorString(ret);
}

Expand Down
9 changes: 7 additions & 2 deletions ateam_spatial/test/spatial_evaluator_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,15 @@ TEST(SpatialEvaluatorTests, Basic)

eval.CopyMapBuffer(MapId::ReceiverPositionQuality, buffer_out);

EXPECT_EQ(buffer_out.size(), 1'593'600);
const auto resolution = eval.GetSettings().resolution;
const auto expected_width = (field.field_width + (2 * field.boundary_width)) / resolution;
const auto expected_length = (field.field_length + (2 * field.boundary_width)) / resolution;
const auto expected_size = expected_length * expected_width;

EXPECT_EQ(buffer_out.size(), expected_size);

std::vector<uint8_t> rendered_buffer;
eval.RenderMapBuffer(MapId::ReceiverPositionQuality, rendered_buffer);

EXPECT_EQ(rendered_buffer.size(), 1'593'600);
EXPECT_EQ(rendered_buffer.size(), expected_size);
}