Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ if(BUILD_TESTS)
add_executable(test_bicgstab_avx2 tests/math/test_bicgstab_avx2.c)
add_executable(test_bicgstab_neon tests/math/test_bicgstab_neon.c)
add_executable(test_omp_consistency tests/math/test_omp_consistency.c)
add_executable(test_optimal_omega tests/math/test_optimal_omega.c)
add_executable(test_solver_breakdown tests/math/test_solver_breakdown.c)
add_executable(test_solver_robustness tests/math/test_solver_robustness.c)
add_executable(test_convergence_order tests/math/test_convergence_order.c)
Expand Down Expand Up @@ -407,6 +408,7 @@ if(BUILD_TESTS)
target_link_libraries(test_bicgstab_avx2 PRIVATE CFD::Library unity $<$<NOT:$<PLATFORM_ID:Windows>>:m>)
target_link_libraries(test_bicgstab_neon PRIVATE CFD::Library unity $<$<NOT:$<PLATFORM_ID:Windows>>:m>)
target_link_libraries(test_omp_consistency PRIVATE CFD::Library unity $<$<NOT:$<PLATFORM_ID:Windows>>:m>)
target_link_libraries(test_optimal_omega PRIVATE CFD::Library unity $<$<NOT:$<PLATFORM_ID:Windows>>:m>)
target_link_libraries(test_solver_breakdown PRIVATE CFD::Library unity $<$<NOT:$<PLATFORM_ID:Windows>>:m>)
target_link_libraries(test_solver_robustness PRIVATE CFD::Library unity $<$<NOT:$<PLATFORM_ID:Windows>>:m>)
target_link_libraries(test_convergence_order PRIVATE CFD::Library unity $<$<NOT:$<PLATFORM_ID:Windows>>:m>)
Expand Down Expand Up @@ -526,6 +528,7 @@ if(BUILD_TESTS)
add_test(NAME BiCGSTAB_NEON_Consistency COMMAND test_bicgstab_neon)
set_tests_properties(BiCGSTAB_NEON_Consistency PROPERTIES LABELS "cross-arch")
add_test(NAME OMPConsistencyTest COMMAND test_omp_consistency)
add_test(NAME OptimalOmegaTest COMMAND test_optimal_omega)
add_test(NAME SolverBreakdownTest COMMAND test_solver_breakdown)
add_test(NAME SolverRobustnessTest COMMAND test_solver_robustness)
add_test(NAME ConvergenceOrderTest COMMAND test_convergence_order)
Expand Down
21 changes: 8 additions & 13 deletions ROADMAP.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,24 +51,19 @@ Each algorithm should have scalar (CPU) + SIMD + OMP variants. Track gaps here.

#### Active Bugs

**OMP Red-Black SOR Poisson Solver Convergence (P1)**
**OMP Red-Black SOR Poisson Solver Convergence (P1)** ✅ RESOLVED

Status: Workaround implemented (switched OMP projection to CG_OMP)
Root cause: Hard-coded omega=1.5 was suboptimal for larger grids. For a 33×33 grid, the optimal SOR omega is ~1.83. With omega=1.5, the spectral radius was too high, causing convergence to exceed max iterations.

The OMP Red-Black SOR Poisson solver fails to converge on certain problem configurations (e.g., 33×33 grids with dt=5e-4), hitting max iterations (1000) without reaching tolerance (1e-6).

Impact:

- OMP projection solver switched to CG_OMP as workaround
- Red-Black SOR remains available but unreliable for production use with OMP backend
Fix: All SOR and Red-Black SOR solvers now auto-compute optimal omega from grid dimensions using the Jacobi spectral radius formula: ω_opt = 2/(1+√(1-ρ_J²)). Users can override by setting `params.omega > 0`. Projection backends remain on CG (grid-size-independent).

Action items:

- [ ] Profile OMP Red-Black SOR to identify convergence bottleneck
- [ ] Compare OMP vs AVX2 Red-Black implementations for differences
- [ ] Test omega parameter sweep (1.0 to 1.9) for optimal convergence
- [ ] Add convergence diagnostics (residual history logging)
- [ ] Consider switch to Chebyshev acceleration or SSOR
- [x] Profile OMP Red-Black SOR to identify convergence bottleneck — suboptimal omega
- [x] Compare OMP vs AVX2 Red-Black implementations for differences — identical algorithm
- [x] Test omega parameter sweep (1.0 to 1.9) for optimal convergence — auto-computed
- [x] Verify convergence via iteration-count tests — test_optimal_omega.c
- [x] Consider switch to Chebyshev acceleration or SSOR — not needed, optimal omega suffices

**Grid Convergence Non-Monotonic Behavior (P1)** ✅ RESOLVED

Expand Down
6 changes: 3 additions & 3 deletions lib/include/cfd/solvers/poisson_solver.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ typedef struct {
double tolerance; /**< Relative convergence tolerance (default: 1e-6) */
double absolute_tolerance; /**< Absolute tolerance (default: 1e-10) */
int max_iterations; /**< Maximum iterations (default: 1000) */
double omega; /**< SOR relaxation parameter (default: 1.5, range: 1.0-2.0) */
double omega; /**< SOR relaxation parameter (default: 0 = auto-optimal; set > 0 to override) */
int check_interval; /**< Check convergence every N iterations (default: 1) */
bool verbose; /**< Print iteration progress (default: false) */
poisson_precond_type_t preconditioner; /**< Preconditioner type (default: NONE) */
Expand All @@ -124,8 +124,8 @@ typedef struct {
* Default values:
* - tolerance: 1e-6
* - absolute_tolerance: 1e-10
* - max_iterations: 1000
* - omega: 1.5
* - max_iterations: 5000
* - omega: 0.0 (auto-compute optimal for grid dimensions)
* - check_interval: 1
* - verbose: false
*/
Expand Down
3 changes: 2 additions & 1 deletion lib/src/solvers/linear/avx2/linear_solver_redblack_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,8 @@ static cfd_status_t redblack_avx2_init(
poisson_solver_compute_3d_bounds(nz, nx, ny, &ctx->stride_z, &ctx->k_start, &ctx->k_end);
double factor = 2.0 * (1.0 / ctx->dx2 + 1.0 / ctx->dy2 + ctx->inv_dz2);
ctx->inv_factor = 1.0 / factor;
ctx->omega = params ? params->omega : 1.5;
ctx->omega = poisson_solver_resolve_omega(
params ? params->omega : 0.0, nx, ny, nz, dx, dy, dz);

/* Pre-compute SIMD vectors */
ctx->dx2_inv_vec = _mm256_set1_pd(1.0 / ctx->dx2);
Expand Down
3 changes: 2 additions & 1 deletion lib/src/solvers/linear/avx2/linear_solver_sor_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ static cfd_status_t sor_avx2_init(
double factor = 2.0 * (1.0 / ctx->dx2 + 1.0 / ctx->dy2 + ctx->inv_dz2);
ctx->inv_factor = 1.0 / factor;

ctx->omega = (params && params->omega > 0.0) ? params->omega : 1.5;
ctx->omega = poisson_solver_resolve_omega(
params ? params->omega : 0.0, nx, ny, nz, dx, dy, dz);

/* Pre-compute SIMD vectors */
ctx->dx2_inv_vec = _mm256_set1_pd(1.0 / ctx->dx2);
Expand Down
3 changes: 2 additions & 1 deletion lib/src/solvers/linear/cpu/linear_solver_redblack.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ static cfd_status_t redblack_scalar_init(

double factor = 2.0 * (1.0 / ctx->dx2 + 1.0 / ctx->dy2 + ctx->inv_dz2);
ctx->inv_factor = 1.0 / factor;
ctx->omega = params ? params->omega : 1.5;
ctx->omega = poisson_solver_resolve_omega(
params ? params->omega : 0.0, nx, ny, nz, dx, dy, dz);
ctx->initialized = 1;

solver->context = ctx;
Expand Down
3 changes: 2 additions & 1 deletion lib/src/solvers/linear/cpu/linear_solver_sor.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ static cfd_status_t sor_scalar_init(

double factor = 2.0 * (1.0 / ctx->dx2 + 1.0 / ctx->dy2 + ctx->inv_dz2);
ctx->inv_factor = 1.0 / factor;
ctx->omega = params ? params->omega : 1.5;
ctx->omega = poisson_solver_resolve_omega(
params ? params->omega : 0.0, nx, ny, nz, dx, dy, dz);
ctx->initialized = 1;

solver->context = ctx;
Expand Down
2 changes: 1 addition & 1 deletion lib/src/solvers/linear/linear_solver.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ poisson_solver_params_t poisson_solver_params_default(void) {
params.tolerance = 1e-6;
params.absolute_tolerance = 1e-10;
params.max_iterations = 5000; /* Increased from 1000 for CG on fine grids */
params.omega = 1.5;
params.omega = 0.0; /* Auto-compute optimal omega for grid dimensions */
params.check_interval = 1;
params.verbose = false;
params.preconditioner = POISSON_PRECOND_NONE;
Expand Down
61 changes: 61 additions & 0 deletions lib/src/solvers/linear/linear_solver_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,14 @@
#define CFD_LINEAR_SOLVER_INTERNAL_H

#include "cfd/solvers/poisson_solver.h"
#include <math.h>
#include <stdbool.h>
#include <limits.h>

#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -150,6 +155,62 @@ static inline double poisson_solver_compute_inv_dz2(double dz) {
return (dz > 0.0) ? (1.0 / (dz * dz)) : 0.0;
}

/* ============================================================================
* OPTIMAL SOR OMEGA COMPUTATION
* ============================================================================ */

/**
* Compute optimal SOR relaxation parameter for a 2D/3D Laplacian.
*
* Uses the Jacobi spectral radius for a rectangular grid:
* 2D: rho_J = [cos(pi/(nx-1))/dx^2 + cos(pi/(ny-1))/dy^2]
* / [1/dx^2 + 1/dy^2]
* 3D: rho_J = [cos(pi/(nx-1))/dx^2 + cos(pi/(ny-1))/dy^2
* + cos(pi/(nz-1))/dz^2]
* / [1/dx^2 + 1/dy^2 + 1/dz^2]
* omega_opt = 2 / (1 + sqrt(1 - rho_J^2))
*
* When nz <= 1 or dz <= 0, the z-component is ignored and the 2D formula
* is used.
*/
static inline double poisson_solver_compute_optimal_omega(
size_t nx, size_t ny, size_t nz,
double dx, double dy, double dz)
{
double inv_dx2 = 1.0 / (dx * dx);
double inv_dy2 = 1.0 / (dy * dy);
double inv_dz2 = poisson_solver_compute_inv_dz2(dz);

double num = cos(M_PI / (double)(nx - 1)) * inv_dx2
+ cos(M_PI / (double)(ny - 1)) * inv_dy2;
double denom = inv_dx2 + inv_dy2;

if (nz > 1 && inv_dz2 > 0.0) {
num += cos(M_PI / (double)(nz - 1)) * inv_dz2;
denom += inv_dz2;
}

double rho_j = num / denom;
return 2.0 / (1.0 + sqrt(1.0 - (rho_j * rho_j)));
}

/**
* Resolve omega: if omega <= 0.0 (auto sentinel), compute optimal value.
* Otherwise return the user-specified omega as-is.
*
* Supports both 2D (nz <= 1 or dz <= 0) and 3D (nz > 1, dz > 0) problems.
*/
static inline double poisson_solver_resolve_omega(
double omega,
size_t nx, size_t ny, size_t nz,
double dx, double dy, double dz)
{
if (omega <= 0.0) {
return poisson_solver_compute_optimal_omega(nx, ny, nz, dx, dy, dz);
}
return omega;
}

/* ============================================================================
* INTERNAL HELPER FUNCTIONS
* ============================================================================ */
Expand Down
3 changes: 2 additions & 1 deletion lib/src/solvers/linear/neon/linear_solver_redblack_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,8 @@ static cfd_status_t redblack_neon_init(
poisson_solver_compute_3d_bounds(nz, nx, ny, &ctx->stride_z, &ctx->k_start, &ctx->k_end);
double factor = 2.0 * (1.0 / ctx->dx2 + 1.0 / ctx->dy2 + ctx->inv_dz2);
ctx->inv_factor = 1.0 / factor;
ctx->omega = params ? params->omega : 1.5;
ctx->omega = poisson_solver_resolve_omega(
params ? params->omega : 0.0, nx, ny, nz, dx, dy, dz);

/* Pre-compute SIMD vectors */
ctx->dx2_inv_vec = vdupq_n_f64(1.0 / ctx->dx2);
Expand Down
3 changes: 2 additions & 1 deletion lib/src/solvers/linear/neon/linear_solver_sor_neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,8 @@ static cfd_status_t sor_neon_init(

double factor = 2.0 * (1.0 / ctx->dx2 + 1.0 / ctx->dy2 + ctx->inv_dz2);
ctx->inv_factor = 1.0 / factor;
ctx->omega = params ? params->omega : 1.5;
ctx->omega = poisson_solver_resolve_omega(
params ? params->omega : 0.0, nx, ny, nz, dx, dy, dz);

/* Pre-compute SIMD vectors */
ctx->dx2_inv_vec = vdupq_n_f64(1.0 / ctx->dx2);
Expand Down
3 changes: 2 additions & 1 deletion lib/src/solvers/linear/omp/linear_solver_redblack_omp.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ static cfd_status_t redblack_omp_init(
poisson_solver_compute_3d_bounds(nz, nx, ny, &ctx->stride_z, &ctx->k_start, &ctx->k_end);
double factor = 2.0 * (1.0 / ctx->dx2 + 1.0 / ctx->dy2 + ctx->inv_dz2);
ctx->inv_factor = 1.0 / factor;
ctx->omega = params ? params->omega : 1.5;
ctx->omega = poisson_solver_resolve_omega(
params ? params->omega : 0.0, nx, ny, nz, dx, dy, dz);
ctx->initialized = 1;

solver->context = ctx;
Expand Down
12 changes: 5 additions & 7 deletions tests/math/test_omp_consistency.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* consistent results with their scalar reference implementations:
* - CG OMP vs CG Scalar: L2 difference < 1e-9 (iterative math is identical)
* - Red-Black SOR OMP vs Scalar: L2 difference < 1e-6 (parallel ordering
* causes minor rounding differences; OMP RB-SOR is known-fragile per CLAUDE.md)
* causes minor rounding differences)
*
* Both tests skip gracefully when OpenMP is not enabled or unavailable.
*/
Expand Down Expand Up @@ -111,7 +111,7 @@ void test_cg_omp_vs_scalar(void) {
/* Initialize and solve with scalar solver */
poisson_solver_params_t params = poisson_solver_params_default();
params.tolerance = TOLERANCE;
params.max_iterations = 5000;
params.max_iterations = 1000;

cfd_status_t status = poisson_solver_init(solver_scalar, NX, NY, 1, dx, dy, 0.0, &params);
TEST_ASSERT_EQUAL(CFD_SUCCESS, status);
Expand Down Expand Up @@ -193,10 +193,8 @@ void test_cg_omp_vs_scalar(void) {
* Solves the same Poisson problem with both scalar and OMP Red-Black SOR
* solvers, then verifies the solutions agree within a relaxed tolerance.
*
* Note: OMP Red-Black SOR is known-fragile (see CLAUDE.md Known Issues).
* Parallel sweep ordering differs from scalar, producing larger rounding
* differences than CG. Tolerances and iteration allowances are relaxed
* accordingly.
* differences than CG. Tolerances are relaxed accordingly.
*/
void test_redblack_omp_vs_scalar(void) {
double dx = (XMAX - XMIN) / (NX - 1);
Expand Down Expand Up @@ -224,7 +222,7 @@ void test_redblack_omp_vs_scalar(void) {
/* Initialize and solve with scalar solver */
poisson_solver_params_t params = poisson_solver_params_default();
params.tolerance = TOLERANCE;
params.max_iterations = 5000;
params.max_iterations = 1000;

cfd_status_t status = poisson_solver_init(solver_scalar, NX, NY, 1, dx, dy, 0.0, &params);
TEST_ASSERT_EQUAL(CFD_SUCCESS, status);
Expand All @@ -246,7 +244,7 @@ void test_redblack_omp_vs_scalar(void) {
}

/* Create OMP Red-Black SOR solver.
* Known-fragile: solver_create may return NULL if the OMP RB-SOR factory
* solver_create may return NULL if the OMP RB-SOR factory
* is not registered. Skip gracefully rather than failing. */
poisson_solver_t* solver_omp = poisson_solver_create(
POISSON_METHOD_REDBLACK_SOR, POISSON_BACKEND_OMP);
Expand Down
Loading
Loading