Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/modelisation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ backend:
differences used in the pseudo-Boolean encoding.

The MIP backend also uses a numerical strictness constant
:math:`\eta = \texttt{num\_epsilon} = 2^{-6} = 1/64 = 0.015625` for strict
:math:`\eta = \texttt{num\_epsilon} = 10^{-6}` for strict
right-branch comparisons in numerical splits.

Feature domains
Expand Down
29 changes: 15 additions & 14 deletions ocean/mip/_builders/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,8 @@ def _cset(
# :: flow[node.right] = 0.0
# Otherwise,
# the path in the tree should go to the left of the node
# if the value of the feature is less than the threshold.
# :: mu[j-1] <= 1 - epsilon * flow[node.left],
# if the value of the feature is less than or equal to the
# threshold.
# :: mu[j-1] >= flow[node.right],
# the path in the tree should go to the right of the node
# if the value of the feature is greater than the threshold.
Expand All @@ -157,7 +157,6 @@ def _cset(
raise ValueError(msg)

mu = var.mget(j - 1)
model.addConstr(mu <= 1 - epsilon * tree[node.left.node_id])
model.addConstr(mu >= tree[node.right.node_id])

mu = var.mget(j)
Expand Down Expand Up @@ -232,29 +231,31 @@ def _find_best_epsilon(
var: FeatureVar,
epsilon: float,
) -> float:
# Find the best epsilon value for the given feature variable.
# This is done by finding the minimum difference between
# the split levels and the tolerance of the solver.
# Keep the requested split epsilon and tighten solver tolerances when
# possible instead of inflating the effective margin around thresholds.
tol: float = model.getParamInfo("FeasibilityTol")[2]
int_tol: float = model.getParamInfo("IntFeasTol")[2]
min_tol: float = 1e-9
delta: float = min(*np.diff(var.levels))
eps: float = 1.0 - min_tol
if delta <= 2 * min_tol:
msg = "The difference between the split levels"
msg += f" is too small (={delta}) compared to"
msg += f" the tolerance minimum of Gurobi ({min_tol})."
msg += " There could be some precision errors."
msg += " Consider not scaling the data or using bigger intervals."
warnings.warn(msg, category=UserWarning, stacklevel=2)
if min_tol != tol:
if min_tol < tol:
model.setParam("FeasibilityTol", min_tol)
return eps
if delta * epsilon > tol:
if min_tol < int_tol:
model.setParam("IntFeasTol", min_tol)
return epsilon
while 2 * tol / delta >= 1.0:
tol /= 2
model.setParam("FeasibilityTol", max(tol, min_tol))
return 2 * tol / delta

target_tol = max(min_tol, delta * epsilon / 2.0)
if target_tol < tol:
model.setParam("FeasibilityTol", target_tol)
if target_tol < int_tol:
model.setParam("IntFeasTol", target_tol)
return epsilon


class ModelBuilderFactory:
Expand Down
25 changes: 25 additions & 0 deletions ocean/mip/_explainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import cast

import gurobipy as gp
import numpy as np
from sklearn.ensemble import AdaBoostClassifier, IsolationForest

from ..abc import Mapper
Expand All @@ -23,6 +24,8 @@
class Explainer(Model, BaseExplainer):
"""Mixed-integer programming explainer for tree ensemble classifiers."""

_output_values: Array1D | None

def __init__(
self,
ensemble: BaseExplainableEnsemble,
Expand Down Expand Up @@ -56,8 +59,15 @@ def __init__(
model_type=model_type,
flow_type=flow_type,
)
self._output_values = None
self.build()

def vget(self, i: int) -> gp.Var:
var = super().vget(i)
if self._output_values is None:
return var
return cast("gp.Var", _ValueProxy(var, float(self._output_values[i])))

def get_objective_value(self) -> float:
"""
Return the solver objective value of the last optimization run.
Expand Down Expand Up @@ -212,6 +222,7 @@ def explain(
by the explainer.

"""
self._output_values = None
self.setParam("LogToConsole", int(verbose))
self.setParam("TimeLimit", max_time)
self.setParam("Seed", random_seed)
Expand Down Expand Up @@ -260,6 +271,7 @@ def explain(
raise RuntimeError(msg)
self.explanation.query = x
self._distance_norm = norm
self._output_values = np.asarray(self.explanation.x, dtype=np.float64)
if clean_up:
self.cleanup()
return self.explanation
Expand Down Expand Up @@ -288,3 +300,16 @@ def __call__(self, model: gp.Model, where: int) -> None:
"objective_value": best_objective,
"time": time.time() - self.starttime,
})


class _ValueProxy:
def __init__(self, var: gp.Var, value: float) -> None:
self._var = var
self._value = value

@property
def X(self) -> float:
return self._value

def __getattr__(self, name: str) -> object:
return getattr(self._var, name)
19 changes: 15 additions & 4 deletions ocean/mip/_explanation.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,22 @@ def format_discrete_value(
return float(val)
return float(query_arr[f])

@staticmethod
def _continuous_index(feature: FeatureVar) -> int:
def _continuous_index(self, feature: FeatureVar) -> int:
levels = np.asarray(feature.levels, dtype=float)
n_intervals = len(levels) - 1
x = float(feature.xget().X)
idx = int(np.searchsorted(feature.levels, x, side="left")) - 1
return max(0, min(idx, len(feature.levels) - 2))
idx = int(np.searchsorted(levels, x, side="left")) - 1

close = np.flatnonzero(np.isclose(levels, x, rtol=0.0, atol=self._atol))
if close.size > 0:
k = int(close[0])
if k <= 0:
return 0
if k >= n_intervals:
return n_intervals - 1
return k if self._atol < feature.mget(k).X else k - 1

return max(0, min(idx, n_intervals - 1))

@property
def value(self) -> Mapping[Key, Key | Number]:
Expand Down
31 changes: 17 additions & 14 deletions ocean/mip/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Model(BaseModel, FeatureManager, TreeManager, GarbageManager):
"""

DEFAULT_EPSILON: Unit = 1.0 / (2.0**16)
DEFAULT_NUM_EPSILON: Unit = 1.0 / (2.0**6)
DEFAULT_NUM_EPSILON: Unit = 1e-6 # 1.0 / (2.0**20)
MIN_NUMERIC_TOL: float = 1e-9

class Type(Enum):
Expand Down Expand Up @@ -162,6 +162,7 @@ def add_objective(

"""
objective = self._add_objective(x=x, norm=norm)
self.explanation.query = np.asarray(x, dtype=np.float64).ravel().copy()
self.setObjective(objective, sense=sense)

@validate_call
Expand Down Expand Up @@ -263,20 +264,18 @@ def _set_isolation(self) -> None:

def _stabilize_tolerances(self) -> None:
"""
Tighten solver tolerances when the class margin is very small.

The MIP uses a tiny score margin ``self._epsilon`` to enforce the
target class. If integer-valued branching variables are accepted with
a larger tolerance than that margin, Gurobi can report solutions whose
near-integral leaf selections satisfy the model scores but decode to an
invalid counterfactual under exact tree traversal.
Tighten solver tolerances to preserve exact split semantics.

The formulation relies on strict score margins and split implications.
With Gurobi's default feasibility tolerances, a continuous feature can
end up slightly on the wrong side of a split threshold while the path
variables still select the opposite branch, which then disagrees with
exact sklearn tree traversal. Using the minimum supported tolerance
and asking the MIP solver to prioritize integral solutions reduces
those numerically loose incumbents without an explicit re-optimization
pass.
"""
total_weight = float(np.sum(self.weights, dtype=np.float64))
if total_weight <= 0.0:
return

safe_tol = self._epsilon / (2.0 * total_weight)
safe_tol = max(self.MIN_NUMERIC_TOL, safe_tol)
safe_tol = self.MIN_NUMERIC_TOL

feasibility_tol = float(self.getParamInfo("FeasibilityTol")[2])
if safe_tol < feasibility_tol:
Expand All @@ -286,6 +285,10 @@ def _stabilize_tolerances(self) -> None:
if safe_tol < int_feas_tol:
self.setParam("IntFeasTol", safe_tol)

integrality_focus = int(self.getParamInfo("IntegralityFocus")[2])
if integrality_focus < 1:
self.setParam("IntegralityFocus", 1)

def _add_objective(self, x: Array1D, norm: int) -> Objective:
r"""
Build the symbolic objective expression for :math:`d(x, \hat{x})`.
Expand Down
39 changes: 13 additions & 26 deletions tests/mip/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

from ocean.typing import Key

PATH_ATOL = 1e-9


def check_solution(x: Array1D, explanation: Explanation) -> None:
n = explanation.n_columns
Expand Down Expand Up @@ -60,33 +62,15 @@ def check_node(tree: TreeVar, node: Node, explanation: Explanation) -> None:

left = node.left
right = node.right
x = explanation.x
left_value = tree[left.node_id].X
right_value = tree[right.node_id].X
assert np.isclose(
left_value + right_value, tree[node.node_id].X, rtol=0.0, atol=PATH_ATOL
)
next_node = (
left
if np.isclose(tree[left.node_id].X, 1.0, rtol=0.0, atol=1e-10)
else right
left if np.isclose(left_value, 1.0, rtol=0.0, atol=PATH_ATOL) else right
)
assert np.isclose(tree[next_node.node_id].X, 1.0, rtol=0.0, atol=1e-10)

name = node.feature
if explanation[name].is_one_hot_encoded:
code = node.code
i = explanation.idx.get(name, code)
value = x[i]
else:
i = explanation.idx.get(name)
value = x[i]

if explanation[name].is_numeric:
threshold = node.threshold
if value <= threshold:
assert np.isclose(tree[left.node_id].X, 1.0, rtol=0.0, atol=1e-10)
else:
assert np.isclose(tree[right.node_id].X, 1.0, rtol=0.0, atol=1e-10)
elif np.isclose(value, 0.0, rtol=0.0, atol=1e-10):
assert np.isclose(tree[left.node_id].X, 1.0, rtol=0.0, atol=1e-10)
else:
assert np.isclose(tree[right.node_id].X, 1.0, rtol=0.0, atol=1e-10)
assert np.isclose(tree[next_node.node_id].X, 1.0, rtol=0.0, atol=PATH_ATOL)

check_node(tree, next_node, explanation=explanation)

Expand Down Expand Up @@ -117,7 +101,10 @@ def validate_sklearn_paths(
node = (
node.left
if np.isclose(
tree[node.left.node_id].X, 1.0, rtol=0.0, atol=1e-10
tree[node.left.node_id].X,
1.0,
rtol=0.0,
atol=PATH_ATOL,
)
else node.right
)
Expand Down
Loading