diff --git a/ateam_spatial/CMakeLists.txt b/ateam_spatial/CMakeLists.txt index c80418254..b92a44c49 100644 --- a/ateam_spatial/CMakeLists.txt +++ b/ateam_spatial/CMakeLists.txt @@ -36,6 +36,9 @@ set_target_properties(${PROJECT_NAME} PROPERTIES CUDA_ARCHITECTURES native ) target_compile_options(${PROJECT_NAME} PRIVATE $<$:--maxrregcount 62>) +if(ATEAM_SPATIAL_CUDA_DEBUG) + target_compile_options(${PROJECT_NAME} PRIVATE $<$:-g -G>) +endif() if(ATEAM_SPATIAL_CUDA_COMPILE_STATS) target_compile_options(${PROJECT_NAME} PRIVATE $<$:-Xptxas="-v">) endif() diff --git a/ateam_spatial/include/ateam_spatial/min_max_loc_kernel.hpp b/ateam_spatial/include/ateam_spatial/min_max_loc_kernel.hpp index 8a0f7c69e..432ebe2dc 100644 --- a/ateam_spatial/include/ateam_spatial/min_max_loc_kernel.hpp +++ b/ateam_spatial/include/ateam_spatial/min_max_loc_kernel.hpp @@ -72,28 +72,30 @@ __global__ void max_loc_kernel(const float * buffer, const uint32_t buffer_size, { const auto idx = blockDim.x * blockIdx.x + threadIdx.x; __shared__ uint32_t shared_index_data[blockSize]; - if (idx < buffer_size){ + shared_index_data[threadIdx.x] = idx; + __syncthreads(); - /*copy to shared memory*/ - shared_index_data[threadIdx.x] = idx; - __syncthreads(); - - for(int stride=1; stride < blockDim.x; stride *= 2) { + for(int stride=1; stride < blockDim.x; stride *= 2) { + if (idx < buffer_size) { if (threadIdx.x % (2*stride) == 0) { const auto lhs_block_index = threadIdx.x; const auto rhs_block_index = threadIdx.x + stride; const auto lhs_index = shared_index_data[lhs_block_index]; const auto rhs_index = shared_index_data[rhs_block_index]; - const float lhs_value = buffer[lhs_index]; - const float rhs_value = buffer[rhs_index]; - if(lhs_value > rhs_value) { - shared_index_data[lhs_block_index] = lhs_index; + if (rhs_block_index < buffer_size) { + const float lhs_value = buffer[lhs_index]; + const float rhs_value = buffer[rhs_index]; + if(lhs_value > rhs_value) { + shared_index_data[lhs_block_index] = lhs_index; + } else { + shared_index_data[lhs_block_index] = rhs_index; + } } else { - shared_index_data[lhs_block_index] = rhs_index; + shared_index_data[lhs_block_index] = lhs_index; } } - __syncthreads(); } + __syncthreads(); } if (threadIdx.x == 0) { atomicMax(loc_out, shared_index_data[0], buffer); @@ -105,28 +107,30 @@ __global__ void min_loc_kernel(const float * buffer, const uint32_t buffer_size, { const auto idx = blockDim.x * blockIdx.x + threadIdx.x; __shared__ uint32_t shared_index_data[blockSize]; - if (idx < buffer_size){ + shared_index_data[threadIdx.x] = idx; + __syncthreads(); - /*copy to shared memory*/ - shared_index_data[threadIdx.x] = idx; - __syncthreads(); - - for(int stride=1; stride < blockDim.x; stride *= 2) { + for(int stride=1; stride < blockDim.x; stride *= 2) { + if (idx < buffer_size) { if (threadIdx.x % (2*stride) == 0) { const auto lhs_block_index = threadIdx.x; const auto rhs_block_index = threadIdx.x + stride; const auto lhs_index = shared_index_data[lhs_block_index]; const auto rhs_index = shared_index_data[rhs_block_index]; - const float lhs_value = buffer[lhs_index]; - const float rhs_value = buffer[rhs_index]; - if(lhs_value < rhs_value) { - shared_index_data[lhs_block_index] = lhs_index; + if (rhs_block_index < buffer_size) { + const float lhs_value = buffer[lhs_index]; + const float rhs_value = buffer[rhs_index]; + if(lhs_value < rhs_value) { + shared_index_data[lhs_block_index] = lhs_index; + } else { + shared_index_data[lhs_block_index] = rhs_index; + } } else { - shared_index_data[lhs_block_index] = rhs_index; + shared_index_data[lhs_block_index] = lhs_index; } } - __syncthreads(); } + __syncthreads(); } if (threadIdx.x == 0) { atomicMin(loc_out, shared_index_data[0], buffer); diff --git a/ateam_spatial/src/layers/distance_down_field.cu b/ateam_spatial/src/layers/distance_down_field.cu index b5e66c2f6..e9df79e80 100644 --- a/ateam_spatial/src/layers/distance_down_field.cu +++ b/ateam_spatial/src/layers/distance_down_field.cu @@ -5,7 +5,7 @@ namespace ateam_spatial::layers { CUDA_HOSTDEV float DistanceDownField(const float x, const FieldDimensions & field_dims, const SpatialSettings & settings) { - return x + (WorldWidthReal(field_dims) / 2.0); + return x + (WorldWidthReal(field_dims) / 2.0f); } } // namespace ateam_spatial::layers diff --git a/ateam_spatial/src/layers/distance_from_field_edge.cu b/ateam_spatial/src/layers/distance_from_field_edge.cu index 2889d066c..af43bd6e5 100644 --- a/ateam_spatial/src/layers/distance_from_field_edge.cu +++ b/ateam_spatial/src/layers/distance_from_field_edge.cu @@ -7,8 +7,8 @@ namespace ateam_spatial::layers CUDA_HOSTDEV float DistanceFromFieldEdge(const float x, const float y, const FieldDimensions & field_dims, const SpatialSettings & settings) { const auto x_real = x; const auto y_real = y; - const auto half_world_width = WorldWidthReal(field_dims) / 2.0; - const auto half_world_height = WorldHeightReal(field_dims) / 2.0; + const auto half_world_width = WorldWidthReal(field_dims) / 2.0f; + const auto half_world_height = WorldHeightReal(field_dims) / 2.0f; const auto half_world_diff = half_world_width - half_world_height; if(fabs(x_real) < half_world_diff || (fabs(x_real) - half_world_diff) < fabs(y_real)) { diff --git a/ateam_spatial/src/layers/in_field.cu b/ateam_spatial/src/layers/in_field.cu index 61a200ea7..890c82069 100644 --- a/ateam_spatial/src/layers/in_field.cu +++ b/ateam_spatial/src/layers/in_field.cu @@ -5,10 +5,10 @@ namespace ateam_spatial::layers { CUDA_HOSTDEV float InField(const float x, const float y, const FieldDimensions & field_dims, const SpatialSettings & settings) { - const auto pos_x_thresh = field_dims.field_length / 2.0; - const auto neg_x_thresh = -field_dims.field_length / 2.0; - const auto pos_y_thresh = field_dims.field_width / 2.0; - const auto neg_y_thresh = -field_dims.field_width / 2.0; + const auto pos_x_thresh = field_dims.field_length / 2.0f; + const auto neg_x_thresh = -field_dims.field_length / 2.0f; + const auto pos_y_thresh = field_dims.field_width / 2.0f; + const auto neg_y_thresh = -field_dims.field_width / 2.0f; return (x <= pos_x_thresh) && (x >= neg_x_thresh) && (y <= pos_y_thresh) && (y >= neg_y_thresh); } diff --git a/ateam_spatial/src/layers/line_of_sight.cu b/ateam_spatial/src/layers/line_of_sight.cu index 795dffaad..e3b8d7dda 100644 --- a/ateam_spatial/src/layers/line_of_sight.cu +++ b/ateam_spatial/src/layers/line_of_sight.cu @@ -11,18 +11,18 @@ CUDA_HOSTDEV float PointDistance(const float x_1, const float y_1, const float x CUDA_HOSTDEV float DistanceToSegment(const float line_1_x, const float line_1_y, const float line_2_x, const float line_2_y, const float point_x, const float point_y) { const auto l2 = PointDistance(line_1_x, line_1_y, line_2_x, line_2_y); - if (fabsf(l2) < 1e-8) { + if (fabsf(l2) < 1e-8f) { return PointDistance(line_1_x, line_1_y, point_x, point_y); } auto t = (((point_x - line_1_x) * (line_2_x - line_1_x)) + ((point_y - line_1_y)*(line_2_y - line_1_y))) / (l2*l2); - t = max(0.0, min(1.0, t)); + t = max(0.0f, min(1.0f, t)); const auto projected_x = line_1_x + (t * (line_2_x - line_1_x)); const auto projected_y = line_1_y + (t * (line_2_y - line_1_y)); return PointDistance(point_x, point_y, projected_x, projected_y); } CUDA_HOSTDEV float LineOfSight(const float src_x, const float src_y, const float dst_x, const float dst_y, const Robot * their_bots, const FieldDimensions & field_dims, const SpatialSettings & settings) { - const auto robot_radius = 0.09; + const auto robot_radius = 0.09f; bool result = 1; for(auto i = 0; i < 16; ++i) { diff --git a/ateam_spatial/src/layers/outside_their_defense_area.cu b/ateam_spatial/src/layers/outside_their_defense_area.cu index a33de9022..8cbb90c27 100644 --- a/ateam_spatial/src/layers/outside_their_defense_area.cu +++ b/ateam_spatial/src/layers/outside_their_defense_area.cu @@ -5,10 +5,11 @@ namespace ateam_spatial::layers { CUDA_HOSTDEV float OutsideTheirDefenseArea(const float x, const float y, const FieldDimensions & field_dims, const SpatialSettings & settings) { - const auto pos_x_thresh = field_dims.field_length / 2.0; - const auto neg_x_thresh = (field_dims.field_length / 2.0) - field_dims.defense_area_depth; - const auto pos_y_thresh = field_dims.defense_area_width / 2.0; - const auto neg_y_thresh = -field_dims.defense_area_width / 2.0; + const auto robot_diameter = 0.180f; + const auto pos_x_thresh = field_dims.field_length / 2.0f; + const auto neg_x_thresh = (field_dims.field_length / 2.0f) - (field_dims.defense_area_depth + (robot_diameter * 3.0f)); + const auto pos_y_thresh = (field_dims.defense_area_width / 2.0f) + (robot_diameter * 3.0f); + const auto neg_y_thresh = -pos_y_thresh; return !((x <= pos_x_thresh) && (x >= neg_x_thresh) && (y <= pos_y_thresh) && (y >= neg_y_thresh)); } diff --git a/ateam_spatial/src/layers/width_of_shot_on_goal.cu b/ateam_spatial/src/layers/width_of_shot_on_goal.cu index b430740d5..34faafc81 100644 --- a/ateam_spatial/src/layers/width_of_shot_on_goal.cu +++ b/ateam_spatial/src/layers/width_of_shot_on_goal.cu @@ -11,8 +11,8 @@ CUDA_HOSTDEV float WidthOfShotOnGoal(const float x, const float y, const Robot * const auto real_x = x; const auto real_y = y; - const auto goal_x = field_dims.field_length / 2.0; - const auto goal_y_start = -field_dims.goal_width / 2.0; + const auto goal_x = field_dims.field_length / 2.0f; + const auto goal_y_start = -field_dims.goal_width / 2.0f; const auto step_size = field_dims.goal_width / kNumSteps; @@ -30,6 +30,12 @@ CUDA_HOSTDEV float WidthOfShotOnGoal(const float x, const float y, const Robot * } result = max(result, counter); + // Avoid shots at shallow angles + const auto angle_to_goal = atan2(fabs(goal_x - real_x), fabs(real_y)); + if (angle_to_goal < 0.7f) { + return 0.0f; + } + return result * step_size; } diff --git a/ateam_spatial/src/maps/receiver_position_quality.cu b/ateam_spatial/src/maps/receiver_position_quality.cu index bd7f7d67c..dc31b8661 100644 --- a/ateam_spatial/src/maps/receiver_position_quality.cu +++ b/ateam_spatial/src/maps/receiver_position_quality.cu @@ -15,10 +15,10 @@ CUDA_HOSTDEV float ReceiverPositionQuality(const int spatial_x, const int spatia { const auto x = SpatialToRealX(spatial_x, field_dims, settings); const auto y = SpatialToRealY(spatial_y, field_dims, settings); - const auto max_edge_dist = 0.75; + const auto max_edge_dist = 1.25f; const auto edge_dist_multiplier = min(layers::DistanceFromFieldEdge(x, y, field_dims, settings), max_edge_dist) / max_edge_dist; const auto shot_width_multiplier = layers::WidthOfShotOnGoal(x, y, their_bots, field_dims, settings) / field_dims.goal_width; - const auto robot_distance_multiplier = min(layers::DistanceToTheirBots(x, y, their_bots, field_dims, settings), 1.0); + const auto robot_distance_multiplier = min(layers::DistanceToTheirBots(x, y, their_bots, field_dims, settings), 1.0f); return layers::InField(x, y, field_dims, settings) * layers::OutsideTheirDefenseArea(x, y, field_dims, settings) * layers::LineOfSightBall(x, y, ball, their_bots, field_dims, settings) diff --git a/ateam_spatial/src/spatial_evaluator.cu b/ateam_spatial/src/spatial_evaluator.cu index ea9997b24..78e500e37 100644 --- a/ateam_spatial/src/spatial_evaluator.cu +++ b/ateam_spatial/src/spatial_evaluator.cu @@ -33,7 +33,7 @@ namespace ateam_spatial SpatialEvaluator::SpatialEvaluator() { - settings_.resolution = 1e-2; // TODO(barulicm): parameterize this + settings_.resolution = 1e-1; // TODO(barulicm): parameterize this cuda_device_available_ = IsCudaDeviceAvailable(); if(!cuda_device_available_) { std::cerr << "Could not detect CUDA device. GPU-accelerated spatial code is disabled." << std::endl; diff --git a/ateam_spatial/test/CMakeLists.txt b/ateam_spatial/test/CMakeLists.txt index f2e9c93cf..367eb1113 100644 --- a/ateam_spatial/test/CMakeLists.txt +++ b/ateam_spatial/test/CMakeLists.txt @@ -19,3 +19,9 @@ target_include_directories(ateam_spatial_tests PUBLIC target_link_libraries(ateam_spatial_tests ${PROJECT_NAME} ) + +set_target_properties(ateam_spatial_tests PROPERTIES + CUDA_SEPARABLE_COMPILATION ON + CUDA_STANDARD 17 + CUDA_ARCHITECTURES native +) diff --git a/ateam_spatial/test/layers_tests.cpp b/ateam_spatial/test/layers_tests.cpp index 7e3ebd35d..123c93607 100644 --- a/ateam_spatial/test/layers_tests.cpp +++ b/ateam_spatial/test/layers_tests.cpp @@ -199,7 +199,7 @@ TEST(LayersTests, OutsideTheirDefenseArea) EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(-8.3, -4.8, field, settings), 1.0); EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(7.199, 0.0, field, settings), 0.0); EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(7.199, -3.8, field, settings), 1.0); - EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(7.199, 1.2, field, settings), 1.0); + EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(7.199, 1.6, field, settings), 1.0); EXPECT_FLOAT_EQ(OutsideTheirDefenseArea(-7.8, 0.0, field, settings), 1.0); } @@ -241,7 +241,7 @@ TEST(LayersTests, WidthOfShotOnGoal) EXPECT_NEAR(WidthOfShotOnGoal(0.0, 0.0, their_robots.data(), field, settings), 0.5, kAcceptableError); their_robots[0].x = 0.2; their_robots[0].y = -0.09; - EXPECT_NEAR(WidthOfShotOnGoal(0.0, 0.0, their_robots.data(), field, settings), 0.5, kAcceptableError); + EXPECT_NEAR(WidthOfShotOnGoal(0.0, 0.0, their_robots.data(), field, settings), 0.49, kAcceptableError); their_robots[0].x = 4.5; their_robots[0].y = 0.18; EXPECT_NEAR(WidthOfShotOnGoal(0.0, 0.0, their_robots.data(), field, settings), 0.65, kAcceptableError); diff --git a/ateam_spatial/test/maps_tests.cpp b/ateam_spatial/test/maps_tests.cpp index b660b7398..8d55bdd38 100644 --- a/ateam_spatial/test/maps_tests.cpp +++ b/ateam_spatial/test/maps_tests.cpp @@ -57,7 +57,7 @@ TEST(MapsTests, ReceiverPositionQuality) const auto kAcceptableError = 1e-4; EXPECT_FLOAT_EQ(ReceiverPositionQuality(0, 4800, ball, their_robots.data(), field, settings), 0.0); - EXPECT_NEAR(ReceiverPositionQuality(600, 4800, ball, their_robots.data(), field, settings), 0.48, kAcceptableError); + EXPECT_NEAR(ReceiverPositionQuality(600, 4800, ball, their_robots.data(), field, settings), 0.288, kAcceptableError); EXPECT_NEAR(ReceiverPositionQuality(8300, 4800, ball, their_robots.data(), field, settings), 8.3, kAcceptableError); EXPECT_NEAR(ReceiverPositionQuality(14500, 4800, ball, their_robots.data(), field, settings), 14.5, kAcceptableError); EXPECT_FLOAT_EQ(ReceiverPositionQuality(16600, 4800, ball, their_robots.data(), field, settings), 0.0); diff --git a/ateam_spatial/test/min_max_loc_kernel_tests.cu b/ateam_spatial/test/min_max_loc_kernel_tests.cu index 6e618e83a..c8dbf4b71 100644 --- a/ateam_spatial/test/min_max_loc_kernel_tests.cu +++ b/ateam_spatial/test/min_max_loc_kernel_tests.cu @@ -34,10 +34,12 @@ TEST(MinMaxLocKernelTests, MinLoc) const dim3 grid_size(std::ceil(static_cast(data.size()) / block_size.x)); ateam_spatial::GpuObject gpu_min_index; ateam_spatial::min_loc_kernel<<>>(gpu_data.Get(), gpu_data.Size(), gpu_min_index.Get()); - if(const auto ret = cudaDeviceSynchronize();ret != cudaSuccess) { + if (const auto ret = cudaDeviceSynchronize(); ret != cudaSuccess) + { FAIL() << "Failed to launch min_loc_kernel: " << cudaGetErrorString(ret); } - if(const auto ret = cudaGetLastError(); ret != cudaSuccess) { + if (const auto ret = cudaGetLastError(); ret != cudaSuccess) + { FAIL() << "Failed to run min_loc_kernel: " << cudaGetErrorString(ret); } uint32_t min_index = 0; @@ -55,10 +57,12 @@ TEST(MinMaxLocKernelTests, MaxLoc) const dim3 grid_size(std::ceil(static_cast(data.size()) / block_size.x)); ateam_spatial::GpuObject gpu_max_index; ateam_spatial::max_loc_kernel<<>>(gpu_data.Get(), gpu_data.Size(), gpu_max_index.Get()); - if(const auto ret = cudaDeviceSynchronize();ret != cudaSuccess) { + if (const auto ret = cudaDeviceSynchronize(); ret != cudaSuccess) + { FAIL() << "Failed to launch max_loc_kernel: " << cudaGetErrorString(ret); } - if(const auto ret = cudaGetLastError(); ret != cudaSuccess) { + if (const auto ret = cudaGetLastError(); ret != cudaSuccess) + { FAIL() << "Failed to run max_loc_kernel: " << cudaGetErrorString(ret); } uint32_t max_index = 0; @@ -72,9 +76,8 @@ TEST(MinMaxLocKernelTests, MaxLoc_MultipleBlocks) std::random_device rdev; std::default_random_engine randeng(rdev()); std::uniform_real_distribution randist(0.0, 200.0); - std::generate(data.begin(), data.end(), [&](){ - return randist(randeng); - }); + std::generate(data.begin(), data.end(), [&]() + { return randist(randeng); }); // Fill the first half with zeros to ensure the max is not in the first block std::fill_n(data.begin(), data.size() / 2, 0.f); ateam_spatial::GpuArray gpu_data; @@ -84,10 +87,46 @@ TEST(MinMaxLocKernelTests, MaxLoc_MultipleBlocks) const dim3 grid_size(std::ceil(static_cast(data.size()) / block_size.x)); ateam_spatial::GpuObject gpu_max_index; ateam_spatial::max_loc_kernel<<>>(gpu_data.Get(), gpu_data.Size(), gpu_max_index.Get()); - if(const auto ret = cudaDeviceSynchronize();ret != cudaSuccess) { + if (const auto ret = cudaDeviceSynchronize(); ret != cudaSuccess) + { FAIL() << "Failed to launch max_loc_kernel: " << cudaGetErrorString(ret); } - if(const auto ret = cudaGetLastError(); ret != cudaSuccess) { + if (const auto ret = cudaGetLastError(); ret != cudaSuccess) + { + FAIL() << "Failed to run max_loc_kernel: " << cudaGetErrorString(ret); + } + + const auto max_iter = std::max_element(data.begin(), data.end()); + + ASSERT_NE(max_iter, data.end()) << "Failed to find a max value."; + const auto expected_max_index = std::distance(data.begin(), max_iter); + + uint32_t max_index = 0; + gpu_max_index.CopyFromGpu(max_index); + EXPECT_EQ(max_index, expected_max_index); +} + +TEST(MinMaxLocKernelTests, MaxLoc_UnalignedDataSize) +{ + std::array data{}; + std::random_device rdev; + std::default_random_engine randeng(rdev()); + std::uniform_real_distribution randist(0.0, 200.0); + std::generate(data.begin(), data.end(), [&]() + { return randist(randeng); }); + ateam_spatial::GpuArray gpu_data; + gpu_data.CopyToGpu(data); + constexpr int threads_per_block = 64; + const dim3 block_size(threads_per_block); + const dim3 grid_size(std::ceil(static_cast(data.size()) / block_size.x)); + ateam_spatial::GpuObject gpu_max_index; + ateam_spatial::max_loc_kernel<<>>(gpu_data.Get(), gpu_data.Size(), gpu_max_index.Get()); + if (const auto ret = cudaDeviceSynchronize(); ret != cudaSuccess) + { + FAIL() << "Failed to launch max_loc_kernel: " << cudaGetErrorString(ret); + } + if (const auto ret = cudaGetLastError(); ret != cudaSuccess) + { FAIL() << "Failed to run max_loc_kernel: " << cudaGetErrorString(ret); } diff --git a/ateam_spatial/test/spatial_evaluator_tests.cpp b/ateam_spatial/test/spatial_evaluator_tests.cpp index f70f70ea5..322c3c40f 100644 --- a/ateam_spatial/test/spatial_evaluator_tests.cpp +++ b/ateam_spatial/test/spatial_evaluator_tests.cpp @@ -50,10 +50,15 @@ TEST(SpatialEvaluatorTests, Basic) eval.CopyMapBuffer(MapId::ReceiverPositionQuality, buffer_out); - EXPECT_EQ(buffer_out.size(), 1'593'600); + const auto resolution = eval.GetSettings().resolution; + const auto expected_width = (field.field_width + (2 * field.boundary_width)) / resolution; + const auto expected_length = (field.field_length + (2 * field.boundary_width)) / resolution; + const auto expected_size = expected_length * expected_width; + + EXPECT_EQ(buffer_out.size(), expected_size); std::vector rendered_buffer; eval.RenderMapBuffer(MapId::ReceiverPositionQuality, rendered_buffer); - EXPECT_EQ(rendered_buffer.size(), 1'593'600); + EXPECT_EQ(rendered_buffer.size(), expected_size); }