aliceb-nv · aliceb-nv · Nov 8, 2025 · Nov 8, 2025 · Nov 8, 2025 · Nov 9, 2025
diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp
@@ -12,6 +12,7 @@
 #include <cuopt/linear_programming/mip/solver_solution.hpp>
 #include <cuopt/linear_programming/optimization_problem_interface.hpp>
 #include <cuopt/linear_programming/solve.hpp>
+#include <cuopt/linear_programming/utilities/internals.hpp>
 #include <mps_parser/parser.hpp>
 #include <utilities/logger.hpp>
 
@@ -117,7 +118,7 @@ void read_single_solution_from_path(const std::string& path,
   }
 }
 
-// reads a solution from an input file. The input file needs to be csv formatted
+// Reads a solution from an input file. The input file needs to be csv formatted
 // var_name,val
 std::vector<std::vector<double>> read_solution_from_dir(const std::string file_path,
                                                         const std::string& mps_file_name,
@@ -137,6 +138,58 @@ std::vector<std::vector<double>> read_solution_from_dir(const std::string file_p
   return initial_solutions;
 }
 
+struct incumbent_record_t {
+  double objective;
+  double work_timestamp;
+  double wall_time;
+  cuopt::internals::mip_solution_origin_t origin;
+};
+
+class incumbent_tracker_t : public cuopt::internals::get_solution_callback_ext_t {
+ public:
+  incumbent_tracker_t(std::chrono::high_resolution_clock::time_point start_time)
+    : start_time_(start_time)
+  {
+  }
+
+  void get_solution(void* data,
+                    void* cost,
+                    void* solution_bound,
+                    const cuopt::internals::mip_solution_callback_info_t* info,
+                    void* user_data) override
+  {
+    double obj    = *static_cast<double*>(cost);
+    double wt     = (info != nullptr) ? info->work_timestamp : -1.0;
+    auto origin   = (info != nullptr) ? (cuopt::internals::mip_solution_origin_t)info->origin
+                                      : cuopt::internals::mip_solution_origin_t::UNKNOWN;
+    auto now      = std::chrono::high_resolution_clock::now();
+    double wall_s = std::chrono::duration<double>(now - start_time_).count();
+    records_.push_back({obj, wt, wall_s, (cuopt::internals::mip_solution_origin_t)origin});
+  }
+
+  void write_csv(const std::string& path) const
+  {
+    std::ofstream f(path);
+    if (!f.is_open()) {
+      fprintf(stderr, "Failed to open incumbent CSV: %s\n", path.c_str());
+      return;
+    }
+    f << "index,objective,work_timestamp,wall_time_s,origin\n";
+    for (size_t i = 0; i < records_.size(); ++i) {
+      auto& r = records_[i];
+      f << i << "," << std::setprecision(15) << r.objective << "," << r.work_timestamp << ","
+        << std::setprecision(6) << r.wall_time << ","
+        << cuopt::internals::mip_solution_origin_to_string(r.origin) << "\n";
+    }
+  }
+
+  size_t size() const { return records_.size(); }
+
+ private:
+  std::chrono::high_resolution_clock::time_point start_time_;
+  std::vector<incumbent_record_t> records_;
+};
+
 int run_single_file(std::string file_path,
                     int device,
                     int batch_id,
@@ -203,21 +256,40 @@ int run_single_file(std::string file_path,
       }
     }
   }
-  settings.time_limit       = time_limit;
-  settings.work_limit       = work_limit;
-  settings.heuristics_only  = heuristics_only;
-  settings.num_cpu_threads  = num_cpu_threads;
-  settings.log_to_console   = log_to_console;
-  settings.determinism_mode = deterministic ? CUOPT_MODE_DETERMINISTIC : CUOPT_MODE_OPPORTUNISTIC;
+  settings.time_limit      = time_limit;
+  settings.work_limit      = work_limit;
+  settings.heuristics_only = heuristics_only;
+  settings.num_cpu_threads = num_cpu_threads;
+  settings.log_to_console  = log_to_console;
+  if (deterministic) {
+    settings.determinism_mode =
+      heuristics_only ? CUOPT_MODE_DETERMINISTIC_GPU_HEURISTICS : CUOPT_MODE_DETERMINISTIC;
+  } else {
+    settings.determinism_mode = CUOPT_MODE_OPPORTUNISTIC;
+  }
+  CUOPT_LOG_INFO(
+    "run_mip settings: heuristics_only=%d deterministic=%d determinism_mode=%d "
+    "time_limit=%.6f work_limit=%.6f",
+    (int)heuristics_only,
+    (int)deterministic,
+    settings.determinism_mode,
+    settings.time_limit,
+    settings.work_limit);
   settings.tolerances.relative_tolerance = 1e-12;
   settings.tolerances.absolute_tolerance = 1e-6;
   settings.presolver                     = cuopt::linear_programming::presolver_t::Default;
   settings.reliability_branching         = reliability_branching;
   settings.clique_cuts                   = -1;
   settings.seed                          = 42;
+  settings.bb_work_unit_scale            = 1.0;
+  settings.gpu_heur_work_unit_scale      = 1.0;
+  settings.mip_scaling                   = false;
+  settings.gpu_heur_wait_for_exploration = false;
   cuopt::linear_programming::benchmark_info_t benchmark_info;
   settings.benchmark_info_ptr = &benchmark_info;
   auto start_run_solver       = std::chrono::high_resolution_clock::now();
+  incumbent_tracker_t incumbent_tracker(start_run_solver);
+  settings.set_mip_callback(&incumbent_tracker);
   auto solution = cuopt::linear_programming::solve_mip(&handle_, mps_data_model, settings);
   CUOPT_LOG_INFO(
     "first obj: %f last improvement of best feasible: %f last improvement after recombination: %f",
@@ -253,7 +325,13 @@ int run_single_file(std::string file_path,
      << benchmark_info.last_improvement_after_recombination << "," << mip_gap << "," << is_optimal
      << "\n";
   write_to_output_file(out_dir, base_filename, device, n_gpus, batch_id, ss.str());
-  CUOPT_LOG_INFO("Results written to the file %s", base_filename.c_str());
+  if (!out_dir.empty()) {
+    std::string mps_stem = base_filename.substr(0, base_filename.find(".mps"));
+    std::string csv_path = out_dir + "/" + mps_stem + "_incumbents.csv";
+    incumbent_tracker.write_csv(csv_path);
+    CUOPT_LOG_INFO(
+      "Incumbent trace (%zu entries) written to %s", incumbent_tracker.size(), csv_path.c_str());
+  }
   return sol_found;
 }
 

diff --git a/ci/compute-sanitizer-suppressions.xml b/ci/compute-sanitizer-suppressions.xml
@@ -0,0 +1,249 @@
+<?xml version="1.0" encoding="utf-8"?>
+<ComputeSanitizerOutput>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 4 bytes</text>
+      <size>4</size>
+    </what>
+    <where>
+      <func>.*</func>
+    </where>
+    <hostStack>
+      <frame>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <func>cusparseCsr2cscEx2</func>
+        <module>.*libcusparse.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 4 bytes</text>
+      <size>4</size>
+    </what>
+    <where>
+      <func>ThreadLoad</func>
+    </where>
+    <hostStack>
+      <frame>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>libcudart.*</module>
+      </frame>
+      <frame>
+        <func>cudaLaunchKernel</func>
+      </frame>
+      <frame>
+        <func>.*cub::.*::Device(Segmented)?(Reduce|Scan)(SingleTile)?Kernel.*</func>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 2 bytes</text>
+      <size>2</size>
+    </what>
+    <where>
+      <func>ThreadLoad</func>
+    </where>
+    <hostStack>
+      <frame>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>libcudart.*</module>
+      </frame>
+      <frame>
+        <func>cudaLaunchKernel</func>
+      </frame>
+      <frame>
+        <func>.*cub::.*::Device(Segmented)?(Reduce|Scan)(SingleTile)?Kernel.*</func>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 8 bytes</text>
+      <size>8</size>
+    </what>
+    <where>
+      <func>DeviceSegmentedReduceKernel</func>
+    </where>
+  </record>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 4 bytes</text>
+      <size>4</size>
+    </what>
+    <where>
+      <func>ThreadLoad</func>
+    </where>
+    <hostStack>
+      <frame>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>libcudart.*</module>
+      </frame>
+      <frame>
+        <module>libcudart.*</module>
+      </frame>
+      <frame>
+        <module>.*libcuopt.*</module>
+      </frame>
+      <frame>
+        <func>.*Device(Reduce|Scan)Kernel.*</func>
+      </frame>
+    </hostStack>
+  </record>
+  <!-- Rule matching cccl's pattern of copying tuples back to host after reduce_by_keys, which contain uninitialized padding  -->
+  <!-- Because of aggressive inlining, thrust calls are elided out of the host stack, which prevents a more finely grained rule. In practice this is good enough -->
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+      <accessSize>16</accessSize>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyDtoHAsync.*</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <!-- Suppress uninit copies on rmm::device_vector copy constructor - often vector members are allocated but not filled -->
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyAsync</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*librmm.so.*</module>
+      </frame>
+      <frame>
+        <func>rmm::device_buffer::device_buffer</func>
+        <module>.*librmm.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <!-- Suppress likely harmless Thrust/CUB tuple-buffer initcheck reads during sort_by_key
+       in trivial_presolve's COO->CSC reorder. Source and destination arrays are validated
+       immediately before the sort; the warning appears to come from internal tuple staging. -->
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read</text>
+    </what>
+    <where>
+      <func>transform_kernel</func>
+    </where>
+    <hostStack>
+      <frame>
+        <func>cuLaunchKernel_ptsz</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <func>cudaLaunchKernel_ptsz</func>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyAsync</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*librmm.so.*</module>
+      </frame>
+      <frame>
+        <module>.*librmm.so.*</module>
+      </frame>
+      <frame>
+        <func>rmm::device_uvector.*::device_uvector</func>
+        <module>.*libcuopt.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <!-- Uninitialized device-to-device copies are usually harmless - if actualy bogus, errors may be caught later on -->
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyDtoDAsync.*</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyAsync</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <func>cudaMemcpyAsync</func>
+      </frame>
+      <frame>
+        <func>rmm::device_buffer::resize</func>
+        <module>.*librmm.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+</ComputeSanitizerOutput>