Skip to content

Commit 201ef45

Browse files
committed
fix the initialize problem
1 parent 8be7509 commit 201ef45

File tree

1 file changed

+18
-6
lines changed

1 file changed

+18
-6
lines changed

workloads/gromacs/mpi_cxl_shim.c

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -722,15 +722,27 @@ static void init_cxl_memory(void) {
722722
uint32_t expected = 0;
723723
if (atomic_compare_exchange_strong(&g_cxl.header->init_state, &expected, 1)) {
724724
need_init = true; // We won the race, do the init
725-
} else {
726-
// Another process is initializing (state=1) or finished (state=2)
727-
// Spin until init is complete
725+
} else if (expected == 1) {
726+
// Another process is mid-init (state=1). Wait with a timeout
727+
// in case the initializer crashed and left init_state stuck at 1.
728728
LOG_INFO("Waiting for another process to complete CXL init...\n");
729-
while (atomic_load(&g_cxl.header->init_state) != 2) {
730-
__asm__ volatile("pause" ::: "memory");
729+
int wait_us = 0;
730+
while (atomic_load(&g_cxl.header->init_state) != 2 && wait_us < 2000000) {
731+
usleep(1000); // 1ms
732+
wait_us += 1000;
733+
}
734+
if (atomic_load(&g_cxl.header->init_state) != 2) {
735+
// Timed out — previous initializer likely crashed.
736+
// Force re-init by CAS: 1 (stale in-progress) -> 1 (we take over)
737+
LOG_WARN("CXL init timed out (stale init_state=1), forcing re-init\n");
738+
expected = 1;
739+
atomic_compare_exchange_strong(&g_cxl.header->init_state, &expected, 1);
740+
need_init = true;
741+
} else {
742+
__atomic_thread_fence(__ATOMIC_ACQUIRE);
731743
}
732-
__atomic_thread_fence(__ATOMIC_ACQUIRE);
733744
}
745+
// else expected == 2: already done, fall through
734746
}
735747

736748
if (need_init) {

0 commit comments

Comments
 (0)