vidalt · AwaKhouna · Apr 10, 2026 · Apr 10, 2026
diff --git a/docs/modelisation.rst b/docs/modelisation.rst
@@ -52,7 +52,7 @@ backend:
   differences used in the pseudo-Boolean encoding.
 
 The MIP backend also uses a numerical strictness constant
-:math:`\eta = \texttt{num\_epsilon} = 2^{-6} = 1/64 = 0.015625` for strict
+:math:`\eta = \texttt{num\_epsilon} = 10^{-6}` for strict
 right-branch comparisons in numerical splits.
 
 Feature domains

diff --git a/ocean/mip/_builders/model.py b/ocean/mip/_builders/model.py
@@ -132,8 +132,8 @@ def _cset(
         #   :: flow[node.right] = 0.0
         # Otherwise,
         #   the path in the tree should go to the left of the node
-        #   if the value of the feature is less than the threshold.
-        #   :: mu[j-1] <= 1 - epsilon * flow[node.left],
+        #   if the value of the feature is less than or equal to the
+        #   threshold.
         #   :: mu[j-1] >= flow[node.right],
         #   the path in the tree should go to the right of the node
         #   if the value of the feature is greater than the threshold.
@@ -157,7 +157,6 @@ def _cset(
             raise ValueError(msg)
 
         mu = var.mget(j - 1)
-        model.addConstr(mu <= 1 - epsilon * tree[node.left.node_id])
         model.addConstr(mu >= tree[node.right.node_id])
 
         mu = var.mget(j)
@@ -232,29 +231,31 @@ def _find_best_epsilon(
         var: FeatureVar,
         epsilon: float,
     ) -> float:
-        # Find the best epsilon value for the given feature variable.
-        # This is done by finding the minimum difference between
-        # the split levels and the tolerance of the solver.
+        # Keep the requested split epsilon and tighten solver tolerances when
+        # possible instead of inflating the effective margin around thresholds.
         tol: float = model.getParamInfo("FeasibilityTol")[2]
+        int_tol: float = model.getParamInfo("IntFeasTol")[2]
         min_tol: float = 1e-9
         delta: float = min(*np.diff(var.levels))
-        eps: float = 1.0 - min_tol
         if delta <= 2 * min_tol:
             msg = "The difference between the split levels"
             msg += f" is too small (={delta}) compared to"
             msg += f" the tolerance minimum of Gurobi ({min_tol})."
             msg += " There could be some precision errors."
             msg += " Consider not scaling the data or using bigger intervals."
             warnings.warn(msg, category=UserWarning, stacklevel=2)
-            if min_tol != tol:
+            if min_tol < tol:
                 model.setParam("FeasibilityTol", min_tol)
-            return eps
-        if delta * epsilon > tol:
+            if min_tol < int_tol:
+                model.setParam("IntFeasTol", min_tol)
             return epsilon
-        while 2 * tol / delta >= 1.0:
-            tol /= 2
-        model.setParam("FeasibilityTol", max(tol, min_tol))
-        return 2 * tol / delta
+
+        target_tol = max(min_tol, delta * epsilon / 2.0)
+        if target_tol < tol:
+            model.setParam("FeasibilityTol", target_tol)
+        if target_tol < int_tol:
+            model.setParam("IntFeasTol", target_tol)
+        return epsilon
 
 
 class ModelBuilderFactory:

diff --git a/ocean/mip/_explainer.py b/ocean/mip/_explainer.py
@@ -3,6 +3,7 @@
 from typing import cast
 
 import gurobipy as gp
+import numpy as np
 from sklearn.ensemble import AdaBoostClassifier, IsolationForest
 
 from ..abc import Mapper
@@ -23,6 +24,8 @@
 class Explainer(Model, BaseExplainer):
     """Mixed-integer programming explainer for tree ensemble classifiers."""
 
+    _output_values: Array1D | None
+
     def __init__(
         self,
         ensemble: BaseExplainableEnsemble,
@@ -56,8 +59,15 @@ def __init__(
             model_type=model_type,
             flow_type=flow_type,
         )
+        self._output_values = None
         self.build()
 
+    def vget(self, i: int) -> gp.Var:
+        var = super().vget(i)
+        if self._output_values is None:
+            return var
+        return cast("gp.Var", _ValueProxy(var, float(self._output_values[i])))
+
     def get_objective_value(self) -> float:
         """
         Return the solver objective value of the last optimization run.
@@ -212,6 +222,7 @@ def explain(
             by the explainer.
 
         """
+        self._output_values = None
         self.setParam("LogToConsole", int(verbose))
         self.setParam("TimeLimit", max_time)
         self.setParam("Seed", random_seed)
@@ -260,6 +271,7 @@ def explain(
                 raise RuntimeError(msg)
         self.explanation.query = x
         self._distance_norm = norm
+        self._output_values = np.asarray(self.explanation.x, dtype=np.float64)
         if clean_up:
             self.cleanup()
         return self.explanation
@@ -288,3 +300,16 @@ def __call__(self, model: gp.Model, where: int) -> None:
                 "objective_value": best_objective,
                 "time": time.time() - self.starttime,
             })
+
+
+class _ValueProxy:
+    def __init__(self, var: gp.Var, value: float) -> None:
+        self._var = var
+        self._value = value
+
+    @property
+    def X(self) -> float:
+        return self._value
+
+    def __getattr__(self, name: str) -> object:
+        return getattr(self._var, name)
diff --git a/ocean/mip/_explanation.py b/ocean/mip/_explanation.py
@@ -86,11 +86,22 @@ def format_discrete_value(
             return float(val)
         return float(query_arr[f])
 
-    @staticmethod
-    def _continuous_index(feature: FeatureVar) -> int:
+    def _continuous_index(self, feature: FeatureVar) -> int:
+        levels = np.asarray(feature.levels, dtype=float)
+        n_intervals = len(levels) - 1
         x = float(feature.xget().X)
-        idx = int(np.searchsorted(feature.levels, x, side="left")) - 1
-        return max(0, min(idx, len(feature.levels) - 2))
+        idx = int(np.searchsorted(levels, x, side="left")) - 1
+
+        close = np.flatnonzero(np.isclose(levels, x, rtol=0.0, atol=self._atol))
+        if close.size > 0:
+            k = int(close[0])
+            if k <= 0:
+                return 0
+            if k >= n_intervals:
+                return n_intervals - 1
+            return k if self._atol < feature.mget(k).X else k - 1
+
+        return max(0, min(idx, n_intervals - 1))
 
     @property
     def value(self) -> Mapping[Key, Key | Number]:

diff --git a/ocean/mip/_model.py b/ocean/mip/_model.py
@@ -30,7 +30,7 @@ class Model(BaseModel, FeatureManager, TreeManager, GarbageManager):
     """
 
     DEFAULT_EPSILON: Unit = 1.0 / (2.0**16)
-    DEFAULT_NUM_EPSILON: Unit = 1.0 / (2.0**6)
+    DEFAULT_NUM_EPSILON: Unit = 1e-6  # 1.0 / (2.0**20)
     MIN_NUMERIC_TOL: float = 1e-9
 
     class Type(Enum):
@@ -162,6 +162,7 @@ def add_objective(
 
         """
         objective = self._add_objective(x=x, norm=norm)
+        self.explanation.query = np.asarray(x, dtype=np.float64).ravel().copy()
         self.setObjective(objective, sense=sense)
 
     @validate_call
@@ -263,20 +264,18 @@ def _set_isolation(self) -> None:
 
     def _stabilize_tolerances(self) -> None:
         """
-        Tighten solver tolerances when the class margin is very small.
-
-        The MIP uses a tiny score margin ``self._epsilon`` to enforce the
-        target class. If integer-valued branching variables are accepted with
-        a larger tolerance than that margin, Gurobi can report solutions whose
-        near-integral leaf selections satisfy the model scores but decode to an
-        invalid counterfactual under exact tree traversal.
+        Tighten solver tolerances to preserve exact split semantics.
+
+        The formulation relies on strict score margins and split implications.
+        With Gurobi's default feasibility tolerances, a continuous feature can
+        end up slightly on the wrong side of a split threshold while the path
+        variables still select the opposite branch, which then disagrees with
+        exact sklearn tree traversal. Using the minimum supported tolerance
+        and asking the MIP solver to prioritize integral solutions reduces
+        those numerically loose incumbents without an explicit re-optimization
+        pass.
         """
-        total_weight = float(np.sum(self.weights, dtype=np.float64))
-        if total_weight <= 0.0:
-            return
-
-        safe_tol = self._epsilon / (2.0 * total_weight)
-        safe_tol = max(self.MIN_NUMERIC_TOL, safe_tol)
+        safe_tol = self.MIN_NUMERIC_TOL
 
         feasibility_tol = float(self.getParamInfo("FeasibilityTol")[2])
         if safe_tol < feasibility_tol:
@@ -286,6 +285,10 @@ def _stabilize_tolerances(self) -> None:
         if safe_tol < int_feas_tol:
             self.setParam("IntFeasTol", safe_tol)
 
+        integrality_focus = int(self.getParamInfo("IntegralityFocus")[2])
+        if integrality_focus < 1:
+            self.setParam("IntegralityFocus", 1)
+
     def _add_objective(self, x: Array1D, norm: int) -> Objective:
         r"""
         Build the symbolic objective expression for :math:`d(x, \hat{x})`.

diff --git a/tests/mip/utils.py b/tests/mip/utils.py
@@ -18,6 +18,8 @@
 
     from ocean.typing import Key
 
+PATH_ATOL = 1e-9
+
 
 def check_solution(x: Array1D, explanation: Explanation) -> None:
     n = explanation.n_columns
@@ -60,33 +62,15 @@ def check_node(tree: TreeVar, node: Node, explanation: Explanation) -> None:
 
     left = node.left
     right = node.right
-    x = explanation.x
+    left_value = tree[left.node_id].X
+    right_value = tree[right.node_id].X
+    assert np.isclose(
+        left_value + right_value, tree[node.node_id].X, rtol=0.0, atol=PATH_ATOL
+    )
     next_node = (
-        left
-        if np.isclose(tree[left.node_id].X, 1.0, rtol=0.0, atol=1e-10)
-        else right
+        left if np.isclose(left_value, 1.0, rtol=0.0, atol=PATH_ATOL) else right
     )
-    assert np.isclose(tree[next_node.node_id].X, 1.0, rtol=0.0, atol=1e-10)
-
-    name = node.feature
-    if explanation[name].is_one_hot_encoded:
-        code = node.code
-        i = explanation.idx.get(name, code)
-        value = x[i]
-    else:
-        i = explanation.idx.get(name)
-        value = x[i]
-
-    if explanation[name].is_numeric:
-        threshold = node.threshold
-        if value <= threshold:
-            assert np.isclose(tree[left.node_id].X, 1.0, rtol=0.0, atol=1e-10)
-        else:
-            assert np.isclose(tree[right.node_id].X, 1.0, rtol=0.0, atol=1e-10)
-    elif np.isclose(value, 0.0, rtol=0.0, atol=1e-10):
-        assert np.isclose(tree[left.node_id].X, 1.0, rtol=0.0, atol=1e-10)
-    else:
-        assert np.isclose(tree[right.node_id].X, 1.0, rtol=0.0, atol=1e-10)
+    assert np.isclose(tree[next_node.node_id].X, 1.0, rtol=0.0, atol=PATH_ATOL)
 
     check_node(tree, next_node, explanation=explanation)
 
@@ -117,7 +101,10 @@ def validate_sklearn_paths(
             node = (
                 node.left
                 if np.isclose(
-                    tree[node.left.node_id].X, 1.0, rtol=0.0, atol=1e-10
+                    tree[node.left.node_id].X,
+                    1.0,
+                    rtol=0.0,
+                    atol=PATH_ATOL,
                 )
                 else node.right
             )