diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json new file mode 100644 index 0000000000..c1a01b7d42 --- /dev/null +++ b/.claude-plugin/marketplace.json @@ -0,0 +1,120 @@ +{ + "name": "nvidia-cuopt-skills", + "owner": { + "name": "NVIDIA" + }, + "metadata": { + "description": "Agent skills for NVIDIA cuOpt: routing (VRP, TSP, PDP), LP/MILP/QP, installation (Python/C/developer), and REST server.", + "version": "26.04.00" + }, + "plugins": [ + { + "name": "cuopt-user-rules", + "source": "./skills/cuopt-user-rules", + "skills": "./", + "description": "Base behavior rules for using NVIDIA cuOpt. Read first when helping users with cuOpt (routing, LP/MILP, QP, installation, server)." + }, + { + "name": "cuopt-developer", + "source": "./skills/cuopt-developer", + "skills": "./", + "description": "Contribute to NVIDIA cuOpt codebase including C++/CUDA, Python, server, docs, and CI. Use when the user wants to modify solver internals, add features, submit PRs, or understand the codebase architecture." + }, + { + "name": "cuopt-installation-common", + "source": "./skills/cuopt-installation-common", + "skills": "./", + "description": "Install cuOpt — system and environment requirements only. Domain concepts; no install commands or interface guidance." + }, + { + "name": "cuopt-installation-api-python", + "source": "./skills/cuopt-installation-api-python", + "skills": "./", + "description": "Install cuOpt for Python — pip, conda, Docker, verification. Use when the user is installing or verifying the Python API." + }, + { + "name": "cuopt-installation-api-c", + "source": "./skills/cuopt-installation-api-c", + "skills": "./", + "description": "Install cuOpt for C — conda, locate lib/headers, verification. Use when the user is installing or verifying the C API." + }, + { + "name": "cuopt-installation-developer", + "source": "./skills/cuopt-installation-developer", + "skills": "./", + "description": "Developer installation — build cuOpt from source, run tests. Use when the user wants to set up a dev environment to contribute or modify cuOpt." + }, + { + "name": "lp-milp-formulation", + "source": "./skills/lp-milp-formulation", + "skills": "./", + "description": "LP/MILP concepts and going from problem text to formulation. What LP/MILP are, required formulation questions, typical modeling elements, and how to parse problem statements." + }, + { + "name": "cuopt-lp-milp-api-python", + "source": "./skills/cuopt-lp-milp-api-python", + "skills": "./", + "description": "Solve LP and MILP with the Python API. Use when the user asks about optimization with linear constraints, integer variables, scheduling, resource allocation, facility location, or production planning." + }, + { + "name": "cuopt-lp-milp-api-c", + "source": "./skills/cuopt-lp-milp-api-c", + "skills": "./", + "description": "LP and MILP with cuOpt — C API only. Use when the user is embedding LP/MILP in C/C++." + }, + { + "name": "cuopt-lp-milp-api-cli", + "source": "./skills/cuopt-lp-milp-api-cli", + "skills": "./", + "description": "LP and MILP with cuOpt — CLI only (MPS files, cuopt_cli). Use when the user is solving from MPS via command line." + }, + { + "name": "routing-formulation", + "source": "./skills/routing-formulation", + "skills": "./", + "description": "Vehicle routing (VRP, TSP, PDP) — problem types and data requirements. Domain concepts; no API or interface." + }, + { + "name": "cuopt-routing-api-python", + "source": "./skills/cuopt-routing-api-python", + "skills": "./", + "description": "Vehicle routing (VRP, TSP, PDP) with cuOpt — Python API only. Use when the user is building or solving routing in Python." + }, + { + "name": "qp-formulation", + "source": "./skills/qp-formulation", + "skills": "./", + "description": "Quadratic Programming (QP) — problem form and constraints. Domain concepts; no API or interface. QP is beta." + }, + { + "name": "cuopt-qp-api-python", + "source": "./skills/cuopt-qp-api-python", + "skills": "./", + "description": "Quadratic Programming (QP) with cuOpt — Python API only (beta). Use when the user is building or solving QP in Python." + }, + { + "name": "cuopt-qp-api-c", + "source": "./skills/cuopt-qp-api-c", + "skills": "./", + "description": "Quadratic Programming (QP) with cuOpt — C API. Use when the user is embedding QP in C/C++." + }, + { + "name": "cuopt-qp-api-cli", + "source": "./skills/cuopt-qp-api-cli", + "skills": "./", + "description": "QP with cuOpt — CLI (e.g. cuopt_cli with QP-capable input). Use when the user is solving QP from the command line." + }, + { + "name": "cuopt-server-common", + "source": "./skills/cuopt-server-common", + "skills": "./", + "description": "cuOpt REST server — what it does and how requests flow. Domain concepts; no deploy or client code." + }, + { + "name": "cuopt-server-api-python", + "source": "./skills/cuopt-server-api-python", + "skills": "./", + "description": "cuOpt REST server — start server, endpoints, Python/curl client examples. Use when the user is deploying or calling the REST API." + } + ] +} diff --git a/.claude/AGENTS.md b/.claude/AGENTS.md new file mode 120000 index 0000000000..be77ac83a1 --- /dev/null +++ b/.claude/AGENTS.md @@ -0,0 +1 @@ +../AGENTS.md \ No newline at end of file diff --git a/.claude/CLAUDE.md b/.claude/CLAUDE.md new file mode 120000 index 0000000000..f5f4bd7b93 --- /dev/null +++ b/.claude/CLAUDE.md @@ -0,0 +1 @@ +../.github/AGENTS.md \ No newline at end of file diff --git a/.claude/skills b/.claude/skills new file mode 120000 index 0000000000..42c5394a18 --- /dev/null +++ b/.claude/skills @@ -0,0 +1 @@ +../skills \ No newline at end of file diff --git a/.cursor-plugin/plugin.json b/.cursor-plugin/plugin.json new file mode 100644 index 0000000000..5f34873671 --- /dev/null +++ b/.cursor-plugin/plugin.json @@ -0,0 +1,22 @@ +{ + "name": "nvidia-cuopt-skills", + "description": "Agent skills for NVIDIA cuOpt: routing (VRP, TSP, PDP), LP/MILP/QP, installation (Python/C/developer), and REST server. Use when building or solving optimization with cuOpt.", + "version": "26.04.00", + "author": { + "name": "NVIDIA" + }, + "homepage": "https://github.com/NVIDIA/cuopt", + "repository": "https://github.com/NVIDIA/cuopt", + "license": "Apache-2.0", + "skills": "skills", + "keywords": [ + "nvidia", + "cuopt", + "optimization", + "routing", + "vrp", + "lp", + "milp", + "qp" + ] +} diff --git a/.cursor/AGENTS.md b/.cursor/AGENTS.md new file mode 120000 index 0000000000..be77ac83a1 --- /dev/null +++ b/.cursor/AGENTS.md @@ -0,0 +1 @@ +../AGENTS.md \ No newline at end of file diff --git a/.cursor/skills b/.cursor/skills new file mode 120000 index 0000000000..42c5394a18 --- /dev/null +++ b/.cursor/skills @@ -0,0 +1 @@ +../skills \ No newline at end of file diff --git a/.github/AGENTS.md b/.github/AGENTS.md deleted file mode 100644 index 7854e6599f..0000000000 --- a/.github/AGENTS.md +++ /dev/null @@ -1,32 +0,0 @@ -# AGENTS.md - cuOpt AI Agent Entry Point - -AI agent skills for NVIDIA cuOpt optimization engine. - -## Quick Start - -| Task | Read These Skills | -|------|-------------------| -| **Using cuOpt** (routing, LP, etc.) | `skills/cuopt-user-rules/` → then domain skill | -| **Developing cuOpt** (contributing) | `skills/cuopt-developer/` | - -## Skills Directory - -See `skills/README.md` for the full index. - -### User Skills (read cuopt-user-rules first) -- `skills/cuopt-routing/` — VRP, TSP, PDP -- `skills/cuopt-lp-milp/` — Linear programming, integer variables -- `skills/cuopt-qp/` — Quadratic programming -- `skills/cuopt-debugging/` — Troubleshooting -- `skills/cuopt-installation/` — Setup & requirements -- `skills/cuopt-server/` — REST API deployment - -### Developer Skill (has its own rules) -- `skills/cuopt-developer/` — Contributing code - -## Resources - -- [cuOpt Documentation](https://docs.nvidia.com/cuopt/user-guide/latest/) -- [cuopt-examples repo](https://github.com/NVIDIA/cuopt-examples) -- [GitHub Issues](https://github.com/NVIDIA/cuopt/issues) -- [Developer Forums](https://forums.developer.nvidia.com/c/ai-data-science/nvidia-cuopt/514) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index cf3a570486..7958eac440 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -22,7 +22,7 @@ container-builder/ @nvidia/cuopt-infra-codeowners /.github/.coderabbit_review_guide.md @nvidia/cuopt-infra-codeowners /.github/ISSUE_TEMPLATE/ @nvidia/cuopt-infra-codeowners /.github/PULL_REQUEST_TEMPLATE.md @nvidia/cuopt-infra-codeowners -/.github/skills/ @nvidia/cuopt-infra-codeowners +/skills/ @nvidia/cuopt-infra-codeowners /.github/agents-legacy/ @nvidia/cuopt-infra-codeowners #packaging code owners diff --git a/.github/skills/README.md b/.github/skills/README.md deleted file mode 100644 index 6ffdb6c5e7..0000000000 --- a/.github/skills/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# cuOpt Skills - -This directory contains AI agent skills for NVIDIA cuOpt. - -## For Agents - -1. **First**: Read `cuopt-user-rules/SKILL.md` for user tasks -2. **Then**: Read the relevant domain skill - -For development tasks, read `cuopt-developer/SKILL.md` (has its own rules). - -## Skills Index - -### Rules -| Skill | Description | -|-------|-------------| -| `cuopt-user-rules/` | Behavior rules for user tasks (read first) | - -### Problem-Solving -| Skill | Description | -|-------|-------------| -| `cuopt-routing/` | VRP, TSP, PDP, fleet optimization | -| `cuopt-lp-milp/` | Linear & mixed-integer programming | -| `cuopt-qp/` | Quadratic programming (beta) | - -### Workflow -| Skill | Description | -|-------|-------------| -| `cuopt-debugging/` | Troubleshooting, errors, diagnostics | -| `cuopt-installation/` | Setup, pip, conda, Docker, GPU | - -### Integration -| Skill | Description | -|-------|-------------| -| `cuopt-server/` | REST API deployment | - -### Development -| Skill | Description | -|-------|-------------| -| `cuopt-developer/` | Contributing to codebase (own rules) | diff --git a/.github/skills/cuopt-debugging/SKILL.md b/.github/skills/cuopt-debugging/SKILL.md deleted file mode 100644 index 649b06387d..0000000000 --- a/.github/skills/cuopt-debugging/SKILL.md +++ /dev/null @@ -1,267 +0,0 @@ ---- -name: cuopt-debugging -description: Troubleshoot cuOpt problems including errors, wrong results, infeasible solutions, performance issues, and status codes. Use when the user says something isn't working, gets unexpected results, or needs help diagnosing issues. ---- - -# cuOpt Debugging Skill - -> **Prerequisites**: Read `cuopt-user-rules/SKILL.md` first for behavior rules. - -Diagnose and fix issues with cuOpt solutions, errors, and performance. - -## Before You Start: Required Questions - -**Ask these to understand the problem:** - -1. **What's the symptom?** - - Error message? - - Wrong/unexpected results? - - Empty solution? - - Performance too slow? - -2. **What's the status?** - - For LP/MILP: `problem.Status.name` - - For Routing: `solution.get_status()` - - For Server: HTTP response code - -3. **Can you share?** - - The error message (exact text) - - The code that produces it - - Problem size (variables, constraints, locations) - - Share important log messages or status of on-going run. - -## Quick Diagnosis by Symptom - -### "Solution is empty/None but status looks OK" - -**Most common cause: Wrong status string case** - -```python -# ❌ WRONG - "OPTIMAL" never matches, silently fails -if problem.Status.name == "OPTIMAL": - print(problem.ObjValue) # Never runs! - -# ✅ CORRECT - use PascalCase -if problem.Status.name in ["Optimal", "FeasibleFound"]: - print(problem.ObjValue) -``` - -**Diagnostic code:** -```python -print(f"Actual status: '{problem.Status.name}'") -print(f"Matches 'Optimal': {problem.Status.name == 'Optimal'}") -print(f"Matches 'OPTIMAL': {problem.Status.name == 'OPTIMAL'}") -``` - -### "Objective value is wrong/zero" - -**Check if variables are actually used:** -```python -for var in [x, y, z]: - print(f"{var.name}: {var.getValue()}") -print(f"Objective: {problem.ObjValue}") -``` - -**Common causes:** -- Constraints too restrictive (all zeros is feasible) -- Objective coefficients have wrong sign -- Wrong variable in objective - -### "Infeasible" status - -**For LP/MILP:** -```python -if problem.Status.name == "Infeasible": - print("Problem has no feasible solution") - # Check constraints manually - for name in constraint_names: - c = problem.getConstraint(name) - print(f"{name}: {c}") -``` - -**Common causes:** -- Conflicting constraints (x <= 5 AND x >= 10) -- Bounds too tight -- Missing a "slack" variable for soft constraints - -**For Routing:** -```python -if solution.get_status() != 0: - print(f"Error: {solution.get_error_message()}") - infeasible = solution.get_infeasible_orders() - print(f"Infeasible orders: {infeasible.to_list()}") -``` - -**Common routing infeasibility causes:** -- Time windows too tight (earliest > vehicle latest) -- Total demand > total capacity -- Order location unreachable in time - -### "Integer variable has fractional value" - -```python -# Check how variable was defined -int_var = problem.addVariable( - lb=0, ub=10, - vtype=INTEGER, # Must be INTEGER, not CONTINUOUS - name="count" -) - -# Also check if status is actually optimal -if problem.Status.name == "FeasibleFound": - print("Warning: not fully optimal, may have fractional intermediate values") -``` - -### Server returns 422 Validation Error - -**Check payload against OpenAPI spec:** - -Common field name mistakes: -``` -❌ transit_time_matrix_data → ✅ travel_time_matrix_data -❌ vehicle_capacities → ✅ capacities -❌ locations → ✅ task_locations -``` - -**Capacity format:** -```json -// ❌ WRONG -"capacities": [[50], [50]] - -// ✅ CORRECT -"capacities": [[50, 50]] -``` - -### OutOfMemoryError - -**Check problem size:** -```python -print(f"Variables: {problem.num_variables}") -print(f"Constraints: {problem.num_constraints}") - -# For routing -print(f"Locations: {n_locations}") -print(f"Orders: {n_orders}") -print(f"Fleet: {n_fleet}") -``` - -**Mitigations:** -- Reduce problem size -- Use sparse constraint matrix -- For routing: reduce time limit, simplify constraints - -### cudf Type Errors - -**Always use explicit dtypes:** -```python -cost_matrix = cost_matrix.astype("float32") -demand = cudf.Series([...], dtype="int32") -order_locations = cudf.Series([...], dtype="int32") -time_windows = cudf.Series([...], dtype="int32") -``` - -### MPS Parsing Fails - -**Check MPS format:** -```bash -head -30 problem.mps -``` - -**Required sections in order:** -1. NAME -2. ROWS -3. COLUMNS -4. RHS -5. (optional) BOUNDS -6. ENDATA - -**Common issues:** -- Missing ENDATA -- Integer markers malformed: `'MARKER'`, `'INTORG'`, `'INTEND'` -- Invalid characters or encoding - -## Status Code Reference - -### LP Status Values -| Status | Meaning | -|--------|---------| -| `Optimal` | Found optimal solution | -| `PrimalFeasible` | Found feasible but may not be optimal | -| `PrimalInfeasible` | No feasible solution exists | -| `DualInfeasible` | Problem is unbounded | -| `TimeLimit` | Stopped due to time limit | -| `IterationLimit` | Stopped due to iteration limit | -| `NumericalError` | Numerical issues encountered | -| `NoTermination` | Solver didn't converge | - -### MILP Status Values -| Status | Meaning | -|--------|---------| -| `Optimal` | Found optimal solution | -| `FeasibleFound` | Found feasible, within gap tolerance | -| `Infeasible` | No feasible solution exists | -| `Unbounded` | Problem is unbounded | -| `TimeLimit` | Stopped due to time limit | -| `NoTermination` | No solution found yet | - -### Routing Status Values -| Code | Meaning | -|------|---------| -| 0 | SUCCESS | -| 1 | FAIL | -| 2 | TIMEOUT | -| 3 | EMPTY | - -## Performance Debugging - -### Slow LP/MILP Solve - -```python -settings = SolverSettings() -settings.set_parameter("log_to_console", 1) # See progress -settings.set_parameter("time_limit", 60) # Don't wait forever - -# For MILP, accept good-enough solution -settings.set_parameter("mip_relative_gap", 0.05) # 5% gap -``` - -### Slow Routing Solve - -```python -ss = routing.SolverSettings() -ss.set_time_limit(60) # Increase time for better solutions -ss.set_verbose_mode(True) # See progress during solve -``` - -## Diagnostic Checklist - -``` -□ Status checked with correct case (PascalCase)? -□ All variables have correct vtype (INTEGER vs CONTINUOUS)? -□ Constraint directions correct (<= vs >= vs ==)? -□ Objective sense correct (MINIMIZE vs MAXIMIZE)? -□ For QP: using MINIMIZE (not MAXIMIZE)? -□ Data types explicit (float32, int32)? -□ Matrix dimensions match n_locations? -□ Time windows have transit_time_matrix? -``` - -## Diagnostic Code Snippets - -See [resources/diagnostic_snippets.md](resources/diagnostic_snippets.md) for copy-paste diagnostic code: -- Status checking -- Variable inspection -- Constraint analysis -- Routing infeasibility diagnosis -- Server response debugging -- Memory and performance checks - -## When to Escalate - -Switch to **cuopt-developer** if: -- Bug appears to be in cuOpt itself -- Need to examine solver internals - -File a GitHub issue if: -- Reproducible bug with minimal example -- Include: cuOpt version, CUDA version, error message, minimal repro code diff --git a/.github/skills/cuopt-debugging/resources/diagnostic_snippets.md b/.github/skills/cuopt-debugging/resources/diagnostic_snippets.md deleted file mode 100644 index 61f5c19bfd..0000000000 --- a/.github/skills/cuopt-debugging/resources/diagnostic_snippets.md +++ /dev/null @@ -1,219 +0,0 @@ -# Debugging: Diagnostic Snippets - -## LP/MILP Diagnostics - -### Check Status Properly - -```python -# Print actual status value -print(f"Status: '{problem.Status.name}'") -print(f"Status type: {type(problem.Status.name)}") - -# Common mistake: wrong case -print(f"== 'Optimal': {problem.Status.name == 'Optimal'}") # ✅ -print(f"== 'OPTIMAL': {problem.Status.name == 'OPTIMAL'}") # ❌ Always False -``` - -### Inspect Variables - -```python -# Check all variable values -for var in [x, y, z]: - print(f"{var.name}: lb={var.lb}, ub={var.ub}, value={var.getValue()}") - -# Check if integer variables are actually integer -for var in integer_vars: - val = var.getValue() - is_int = abs(val - round(val)) < 1e-6 - print(f"{var.name}: {val} (is_integer: {is_int})") -``` - -### Inspect Constraints - -```python -# Check constraint values -for name in ["constraint1", "constraint2"]: - c = problem.getConstraint(name) - print(f"{name}: dual={c.DualValue}") -``` - -### Check Problem Size - -```python -print(f"Variables: {problem.num_variables}") -print(f"Constraints: {problem.num_constraints}") -``` - -## Routing Diagnostics - -### Check Solution Status - -```python -status = solution.get_status() -print(f"Status code: {status}") -# 0 = SUCCESS -# 1 = FAIL -# 2 = TIMEOUT -# 3 = EMPTY - -if status != 0: - print(f"Message: {solution.get_message()}") - print(f"Error: {solution.get_error_message()}") -``` - -### Find Infeasible Orders - -```python -infeasible = solution.get_infeasible_orders() -if len(infeasible) > 0: - print(f"Infeasible orders: {infeasible.to_list()}") - - # Check why each is infeasible - for order_idx in infeasible.to_list(): - print(f"\nOrder {order_idx}:") - print(f" Location: {order_locations[order_idx]}") - print(f" Time window: [{order_earliest[order_idx]}, {order_latest[order_idx]}]") - print(f" Demand: {demand[order_idx]}") -``` - -### Verify Data Dimensions - -```python -print(f"Cost matrix shape: {cost_matrix.shape}") -print(f"n_locations declared: {dm.n_locations}") -print(f"n_orders: {len(order_locations)}") -print(f"n_fleet: {dm.n_fleet}") - -# Check consistency -assert cost_matrix.shape[0] == cost_matrix.shape[1], "Matrix not square" -assert cost_matrix.shape[0] == dm.n_locations, "Matrix size != n_locations" -``` - -### Check Data Types - -```python -# For numpy arrays, use .dtype directly -# For pandas/cudf DataFrames, use .values.dtype or .to_numpy().dtype -print(f"cost_matrix dtype: {cost_matrix.values.dtype}") # float32 -print(f"order_locations dtype: {order_locations.values.dtype}") # int32 -print(f"demand dtype: {demand.values.dtype}") # int32 -``` - -### Verify Time Windows Feasibility - -```python -# Check for impossible time windows -for i in range(len(order_earliest)): - if order_earliest[i] > order_latest[i]: - print(f"Order {i}: earliest ({order_earliest[i]}) > latest ({order_latest[i]})") - -# Check if orders are reachable from depot in time -depot = 0 -for i in range(len(order_locations)): - loc = order_locations[i] - travel_time = transit_time_matrix.iloc[depot, loc] - if travel_time > order_latest[i]: - print(f"Order {i}: unreachable (travel={travel_time}, latest={order_latest[i]})") -``` - -### Check Capacity Feasibility - -```python -total_demand = demand.sum() -total_capacity = vehicle_capacity.sum() -print(f"Total demand: {total_demand}") -print(f"Total capacity: {total_capacity}") -if total_demand > total_capacity: - print("WARNING: Total demand exceeds total capacity!") -``` - -## Server Diagnostics - -### Check Response Structure - -```python -import json - -response = requests.get(f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS) -print(f"Status code: {response.status_code}") -print(f"Response: {json.dumps(response.json(), indent=2)}") -``` - -### Validate Payload Against Schema - -```bash -# Get OpenAPI spec -curl -s http://localhost:8000/cuopt.yaml > cuopt_spec.yaml - -# Check payload structure manually -``` - -### Common 422 Error Fixes - -```python -# ❌ Wrong field name -payload = {"transit_time_matrix_data": {...}} - -# ✅ Correct field name -payload = {"travel_time_matrix_data": {...}} - -# ❌ Wrong capacity format -"capacities": [[50], [50]] - -# ✅ Correct capacity format -"capacities": [[50, 50]] -``` - -## Memory Diagnostics - -### Check GPU Memory - -```python -import subprocess -result = subprocess.run(['nvidia-smi'], capture_output=True, text=True) -print(result.stdout) -``` - -### Estimate Problem Memory - -```python -# Rough estimate for routing -n_locations = 1000 -n_fleet = 50 -n_orders = 500 - -# Cost matrix: n_locations² * 4 bytes (float32) -matrix_size = n_locations * n_locations * 4 / 1e9 # GB -print(f"Cost matrix: ~{matrix_size:.2f} GB") -``` - -## Performance Diagnostics - -### Time the Solve - -```python -import time - -start = time.time() -problem.solve(settings) -elapsed = time.time() - start -print(f"Solve time: {elapsed:.2f}s") -``` - -### Enable Solver Logging - -```python -settings = SolverSettings() -settings.set_parameter("log_to_console", 1) -``` - ---- - -## Additional References - -| Topic | Resource | -|-------|----------| -| Python API docstrings | `python/cuopt/cuopt/routing/vehicle_routing.py` | -| LP/MILP Problem class | `python/cuopt/cuopt/linear_programming/problem.py` | -| Server API spec | `docs/cuopt/source/cuopt_spec.yaml` | -| Troubleshooting guide | [NVIDIA cuOpt Docs](https://docs.nvidia.com/cuopt/user-guide/latest/troubleshooting.html) | diff --git a/.github/skills/cuopt-installation/SKILL.md b/.github/skills/cuopt-installation/SKILL.md deleted file mode 100644 index 981063f477..0000000000 --- a/.github/skills/cuopt-installation/SKILL.md +++ /dev/null @@ -1,295 +0,0 @@ ---- -name: cuopt-installation -description: Install and set up NVIDIA cuOpt including pip, conda, Docker, and GPU requirements. Use when the user asks about installation, setup, environments, CUDA versions, GPU requirements, or getting started. ---- - -# cuOpt Installation Skill - -> **Prerequisites**: Read `cuopt-user-rules/SKILL.md` first for behavior rules. - -Set up NVIDIA cuOpt for GPU-accelerated optimization. - -## Before You Start: Required Questions - -**Ask these if not already clear:** - -1. **What's your environment?** - - Local machine with NVIDIA GPU? - - Cloud instance (AWS, GCP, Azure)? - - Docker/Kubernetes? - - No GPU (need cloud solution)? - -2. **What's your CUDA version?** - ```bash - nvcc --version - # or - nvidia-smi - ``` - -3. **What do you need?** - - Python API only? - - REST Server for production? - - C API for embedding? - -4. **Package manager preference?** - - pip - - conda - - Docker - -## System Requirements - -### GPU Requirements -- NVIDIA GPU with Compute Capability >= 7.0 (Volta or newer) -- Supported: V100, A100, H100, RTX 20xx/30xx/40xx, etc. -- NOT supported: GTX 10xx series (Pascal) - -### CUDA Requirements -- CUDA 12.x or CUDA 13.x (match package suffix) -- Compatible NVIDIA driver - -### Check Your System - -```bash -# Check GPU -nvidia-smi - -# Check CUDA version -nvcc --version - -# Check compute capability -nvidia-smi --query-gpu=compute_cap --format=csv -``` - -## Installation Methods - -### pip (Recommended for Python) - -```bash -# For CUDA 13 -pip install --extra-index-url=https://pypi.nvidia.com cuopt-cu13 - -# For CUDA 12 -pip install --extra-index-url=https://pypi.nvidia.com cuopt-cu12 - -# With version pinning (recommended for reproducibility) -pip install --extra-index-url=https://pypi.nvidia.com 'cuopt-cu12==26.2.*' -``` - -### pip: Server + Client - -```bash -# CUDA 12 example -pip install --extra-index-url=https://pypi.nvidia.com \ - cuopt-server-cu12 cuopt-sh-client - -# With version pinning -pip install --extra-index-url=https://pypi.nvidia.com \ - cuopt-server-cu12==26.02.* cuopt-sh-client==26.02.* -``` - -### conda - -```bash -# Python API -conda install -c rapidsai -c conda-forge -c nvidia cuopt - -# Server + client -conda install -c rapidsai -c conda-forge -c nvidia cuopt-server cuopt-sh-client - -# With version pinning -conda install -c rapidsai -c conda-forge -c nvidia cuopt=26.02.* -``` - -### Docker (Recommended for Server) - -```bash -# Pull image -docker pull nvidia/cuopt:latest-cuda12.9-py3.13 - -# Run server -docker run --gpus all -it --rm \ - -p 8000:8000 \ - -e CUOPT_SERVER_PORT=8000 \ - nvidia/cuopt:latest-cuda12.9-py3.13 - -# Verify -curl http://localhost:8000/cuopt/health -``` - -### Docker: Interactive Python - -```bash -docker run --gpus all -it --rm nvidia/cuopt:latest-cuda12.9-py3.13 python -``` - -## Verification - -### Verify Python Installation - -```python -# Test import -import cuopt -print(f"cuOpt version: {cuopt.__version__}") - -# Test GPU access -from cuopt import routing -dm = routing.DataModel(n_locations=3, n_fleet=1, n_orders=2) -print("GPU access OK") -``` - -### Verify Server Installation - -```bash -# Start server -python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000 & - -# Wait and test -sleep 5 -curl -s http://localhost:8000/cuopt/health | jq . -``` - -### Verify C API Installation - -```bash -# Find header -find $CONDA_PREFIX -name "cuopt_c.h" - -# Find library -find $CONDA_PREFIX -name "libcuopt.so" -``` - -## Common Installation Issues - -### "No module named 'cuopt'" - -```bash -# Check if installed -pip list | grep cuopt - -# Check Python environment -which python -echo $CONDA_PREFIX - -# Reinstall -pip uninstall cuopt-cu12 cuopt-cu13 -pip install --extra-index-url=https://pypi.nvidia.com cuopt-cu12 -``` - -### "CUDA not available" / GPU not detected - -```bash -# Check NVIDIA driver -nvidia-smi - -# Check CUDA toolkit -nvcc --version - -# In Python -import torch # if using PyTorch -print(torch.cuda.is_available()) -``` - -### Version mismatch (CUDA 12 vs 13) - -```bash -# Check installed CUDA -nvcc --version - -# Install matching package -# For CUDA 12.x -pip install cuopt-cu12 - -# For CUDA 13.x -pip install cuopt-cu13 -``` - -### Docker: "could not select device driver" - -```bash -# Install NVIDIA Container Toolkit -# Ubuntu: -distribution=$(. /etc/os-release;echo $ID$VERSION_ID) -curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | sudo apt-key add - -curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.list | \ - sudo tee /etc/apt/sources.list.d/nvidia-docker.list -sudo apt-get update -sudo apt-get install -y nvidia-container-toolkit -sudo systemctl restart docker -``` - -## Environment Setup - -### Create Clean Environment (conda) - -```bash -conda create -n cuopt-env python=3.11 -conda activate cuopt-env -conda install -c rapidsai -c conda-forge -c nvidia cuopt -``` - -### Create Clean Environment (pip/venv) - -```bash -python -m venv cuopt-env -source cuopt-env/bin/activate # Linux/Mac -pip install --extra-index-url=https://pypi.nvidia.com cuopt-cu12 -``` - -## Cloud Deployment - -### AWS - -- Use p4d.24xlarge (A100) or p3.2xlarge (V100) -- Deep Learning AMI has CUDA pre-installed -- Or use provided Docker image - -### GCP - -- Use a2-highgpu-1g (A100) or n1-standard with T4 -- Deep Learning VM has CUDA pre-installed - -### Azure - -- Use NC-series (T4, A100) -- Data Science VM has CUDA pre-installed - -## Offline Installation - -```bash -# Download wheels on connected machine -pip download --extra-index-url=https://pypi.nvidia.com cuopt-cu12 -d ./wheels - -# Transfer wheels directory to offline machine - -# Install from local wheels -pip install --no-index --find-links=./wheels cuopt-cu12 -``` - -## Upgrade - -```bash -# pip -pip install --upgrade --extra-index-url=https://pypi.nvidia.com cuopt-cu12 - -# conda -conda update -c rapidsai -c conda-forge -c nvidia cuopt - -# Docker -docker pull nvidia/cuopt:latest-cuda12.9-py3.13 -``` - -## Verification Examples - -See [resources/verification_examples.md](resources/verification_examples.md) for: -- Python installation verification -- LP/MILP verification -- Server verification -- C API verification -- System requirements check -- Docker verification - -## Additional Resources - -- Full installation docs: `docs/cuopt/source/cuopt-python/quick-start.rst` -- Server setup: `docs/cuopt/source/cuopt-server/quick-start.rst` -- [NVIDIA cuOpt Documentation](https://docs.nvidia.com/cuopt/user-guide/latest/) diff --git a/.github/skills/cuopt-lp-milp/SKILL.md b/.github/skills/cuopt-lp-milp/SKILL.md deleted file mode 100644 index 58dfb4f09a..0000000000 --- a/.github/skills/cuopt-lp-milp/SKILL.md +++ /dev/null @@ -1,240 +0,0 @@ ---- -name: cuopt-lp-milp -description: Solve Linear Programming (LP) and Mixed-Integer Linear Programming (MILP) with NVIDIA cuOpt. Use when the user asks about optimization with linear constraints, integer variables, scheduling, resource allocation, facility location, or production planning. ---- - -# cuOpt LP/MILP Skill - -> **Prerequisites**: Read `cuopt-user-rules/SKILL.md` first for behavior rules. - -Model and solve linear and mixed-integer linear programs using NVIDIA cuOpt's GPU-accelerated solver. - -## Before You Start: Required Questions - -**Ask these if not already clear:** - -1. **Problem formulation?** - - What are the decision variables? - - What is the objective (minimize/maximize what)? - - What are the constraints? - -2. **Variable types?** - - All continuous (LP)? - - Some integer/binary (MILP)? - -3. **Interface preference?** - - Python API (recommended for modeling) - - C API (native embedding) - - CLI (quick solve from MPS file) - - REST Server (production deployment) - -4. **Do you have an MPS file?** - - If yes, CLI or C API may be simpler - - If no, Python API is best for building the model - -## Interface Support - -| Interface | LP | MILP | -|-----------|:--:|:----:| -| Python | ✓ | ✓ | -| C API | ✓ | ✓ | -| CLI | ✓ | ✓ | -| REST | ✓ | ✓ | - -## Quick Reference: Python API - -### LP Example - -```python -from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE -from cuopt.linear_programming.solver_settings import SolverSettings - -# Create problem -problem = Problem("MyLP") - -# Decision variables -x = problem.addVariable(lb=0, vtype=CONTINUOUS, name="x") -y = problem.addVariable(lb=0, vtype=CONTINUOUS, name="y") - -# Constraints -problem.addConstraint(2*x + 3*y <= 120, name="resource_a") -problem.addConstraint(4*x + 2*y <= 100, name="resource_b") - -# Objective -problem.setObjective(40*x + 30*y, sense=MAXIMIZE) - -# Solve -settings = SolverSettings() -settings.set_parameter("time_limit", 60) -problem.solve(settings) - -# Check status (CRITICAL: use PascalCase!) -if problem.Status.name in ["Optimal", "PrimalFeasible"]: - print(f"Objective: {problem.ObjValue}") - print(f"x = {x.getValue()}") - print(f"y = {y.getValue()}") -``` - -### MILP Example (with integer variables) - -```python -from cuopt.linear_programming.problem import Problem, CONTINUOUS, INTEGER, MINIMIZE - -problem = Problem("FacilityLocation") - -# Binary variable (integer with bounds 0-1) -open_facility = problem.addVariable(lb=0, ub=1, vtype=INTEGER, name="open") - -# Continuous variable -production = problem.addVariable(lb=0, vtype=CONTINUOUS, name="production") - -# Linking constraint: can only produce if facility is open -problem.addConstraint(production <= 1000 * open_facility, name="link") - -# Objective: fixed cost + variable cost -problem.setObjective(500*open_facility + 2*production, sense=MINIMIZE) - -# MILP-specific settings -settings = SolverSettings() -settings.set_parameter("time_limit", 120) -settings.set_parameter("mip_relative_gap", 0.01) # 1% optimality gap - -problem.solve(settings) - -# Check status -if problem.Status.name in ["Optimal", "FeasibleFound"]: - print(f"Open facility: {open_facility.getValue() > 0.5}") - print(f"Production: {production.getValue()}") -``` - -## CRITICAL: Status Checking - -**Status values use PascalCase, NOT ALL_CAPS:** - -```python -# ✅ CORRECT -if problem.Status.name in ["Optimal", "FeasibleFound"]: - print(problem.ObjValue) - -# ❌ WRONG - will silently fail! -if problem.Status.name == "OPTIMAL": # Never matches! - print(problem.ObjValue) -``` - -**LP Status Values:** `Optimal`, `NoTermination`, `NumericalError`, `PrimalInfeasible`, `DualInfeasible`, `IterationLimit`, `TimeLimit`, `PrimalFeasible` - -**MILP Status Values:** `Optimal`, `FeasibleFound`, `Infeasible`, `Unbounded`, `TimeLimit`, `NoTermination` - -## Quick Reference: C API - -```c -#include - -// CSR format for constraints -cuopt_int_t row_offsets[] = {0, 2, 4}; -cuopt_int_t col_indices[] = {0, 1, 0, 1}; -cuopt_float_t values[] = {2.0, 3.0, 4.0, 2.0}; - -// Variable types -char var_types[] = {CUOPT_CONTINUOUS, CUOPT_INTEGER}; - -cuOptCreateRangedProblem( - num_constraints, num_variables, CUOPT_MINIMIZE, - 0.0, // objective offset - objective_coefficients, - row_offsets, col_indices, values, - constraint_lower, constraint_upper, - var_lower, var_upper, - var_types, - &problem -); - -cuOptSolve(problem, settings, &solution); -cuOptGetObjectiveValue(solution, &obj_value); -``` - -## Quick Reference: CLI - -```bash -# Solve LP from MPS file -cuopt_cli problem.mps - -# With options -cuopt_cli problem.mps --time-limit 120 --mip-relative-tolerance 0.01 -``` - -## Common Modeling Patterns - -### Binary Selection -```python -# Select exactly k items from n -items = [problem.addVariable(lb=0, ub=1, vtype=INTEGER) for _ in range(n)] -problem.addConstraint(sum(items) == k) -``` - -### Big-M Linking -```python -# If y=1, then x <= 100; if y=0, x can be anything up to M -M = 10000 -problem.addConstraint(x <= 100 + M*(1 - y)) -``` - -### Piecewise Linear (SOS2) -```python -# Approximate nonlinear function with breakpoints -# Use lambda variables that sum to 1, at most 2 adjacent non-zero -``` - -## Solver Settings - -```python -settings = SolverSettings() - -# Time limit -settings.set_parameter("time_limit", 60) - -# MILP gap tolerance (stop when within X% of optimal) -settings.set_parameter("mip_relative_gap", 0.01) - -# Logging -settings.set_parameter("log_to_console", 1) -``` - -## Common Issues - -| Problem | Likely Cause | Fix | -|---------|--------------|-----| -| Status never "OPTIMAL" | Using wrong case | Use `"Optimal"` not `"OPTIMAL"` | -| Integer var has fractional value | Defined as CONTINUOUS | Use `vtype=INTEGER` | -| Infeasible | Conflicting constraints | Check constraint logic | -| Unbounded | Missing bounds | Add variable bounds | -| Slow solve | Large problem | Set time limit, increase gap tolerance | - -## Getting Dual Values (LP only) - -```python -if problem.Status.name == "Optimal": - constraint = problem.getConstraint("resource_a") - shadow_price = constraint.DualValue - print(f"Shadow price: {shadow_price}") -``` - -## Examples - -See `resources/` for complete examples: -- [Python API](resources/python_examples.md) — LP, MILP, knapsack, transportation -- [C API](resources/c_api_examples.md) — LP/MILP with build instructions -- [CLI](resources/cli_examples.md) — MPS file format and commands -- [REST Server](resources/server_examples.md) — curl and Python requests - -## When to Escalate - -Switch to **cuopt-qp** if: -- Objective has quadratic terms (x², x*y) - -Switch to **cuopt-debugging** if: -- Infeasible and can't determine why -- Numerical issues - -Switch to **cuopt-developer** if: -- User wants to modify solver internals diff --git a/.github/skills/cuopt-lp-milp/resources/cli_examples.md b/.github/skills/cuopt-lp-milp/resources/cli_examples.md deleted file mode 100644 index fe1d286780..0000000000 --- a/.github/skills/cuopt-lp-milp/resources/cli_examples.md +++ /dev/null @@ -1,166 +0,0 @@ -# LP/MILP: CLI Examples - -## LP from MPS File - -```bash -# Create sample LP in MPS format -cat > production.mps << 'EOF' -* Production Planning: maximize 40*chairs + 30*tables -* s.t. 2*chairs + 3*tables <= 240 (wood) -* 4*chairs + 2*tables <= 200 (labor) -NAME PRODUCTION -ROWS - N PROFIT - L WOOD - L LABOR -COLUMNS - CHAIRS PROFIT -40.0 - CHAIRS WOOD 2.0 - CHAIRS LABOR 4.0 - TABLES PROFIT -30.0 - TABLES WOOD 3.0 - TABLES LABOR 2.0 -RHS - RHS1 WOOD 240.0 - RHS1 LABOR 200.0 -ENDATA -EOF - -# Solve -cuopt_cli production.mps - -# With time limit -cuopt_cli production.mps --time-limit 30 -``` - -## MILP from MPS File - -```bash -# Create MILP with integer variables -cat > facility.mps << 'EOF' -* Facility location with binary variables -NAME FACILITY -ROWS - N COST - G DEMAND1 - L CAP1 - L CAP2 -COLUMNS - MARKER 'MARKER' 'INTORG' - OPEN1 COST 100.0 - OPEN1 CAP1 50.0 - OPEN2 COST 150.0 - OPEN2 CAP2 70.0 - MARKER 'MARKER' 'INTEND' - SHIP11 COST 5.0 - SHIP11 DEMAND1 1.0 - SHIP11 CAP1 -1.0 - SHIP21 COST 7.0 - SHIP21 DEMAND1 1.0 - SHIP21 CAP2 -1.0 -RHS - RHS1 DEMAND1 30.0 -BOUNDS - BV BND1 OPEN1 - BV BND1 OPEN2 - LO BND1 SHIP11 0.0 - LO BND1 SHIP21 0.0 -ENDATA -EOF - -# Solve MILP -cuopt_cli facility.mps --time-limit 60 --mip-relative-tolerance 0.01 -``` - -## Common CLI Options - -```bash -# Show all options -cuopt_cli --help - -# Time limit (seconds) -cuopt_cli problem.mps --time-limit 120 - -# MIP gap tolerance (stop when within X% of optimal) -cuopt_cli problem.mps --mip-relative-tolerance 0.001 - -# MIP absolute tolerance -cuopt_cli problem.mps --mip-absolute-tolerance 0.0001 - -# Enable presolve -cuopt_cli problem.mps --presolve - -# Iteration limit -cuopt_cli problem.mps --iteration-limit 10000 - -# Solver method (0=auto, 1=pdlp, 2=dual_simplex, 3=barrier) -cuopt_cli problem.mps --method 1 -``` - -## MPS Format Reference - -### Required Sections (in order) - -``` -NAME problem_name -ROWS - N objective_row (N = free/objective) - L constraint1 (L = <=) - G constraint2 (G = >=) - E constraint3 (E = ==) -COLUMNS - var1 row1 coefficient - var1 row2 coefficient -RHS - rhs1 row1 value -ENDATA -``` - -### Optional: BOUNDS Section - -``` -BOUNDS - LO bnd1 var1 0.0 (lower bound) - UP bnd1 var1 100.0 (upper bound) - FX bnd1 var2 50.0 (fixed value) - FR bnd1 var3 (free variable) - BV bnd1 var4 (binary 0/1) - LI bnd1 var5 0 (integer, lower bound) - UI bnd1 var5 10 (integer, upper bound) -``` - -### Integer Markers - -``` -COLUMNS - MARKER 'MARKER' 'INTORG' - int_var1 OBJ 1.0 - int_var2 OBJ 2.0 - MARKER 'MARKER' 'INTEND' - cont_var OBJ 3.0 -``` - -## Troubleshooting - -**"Failed to parse MPS file"** -- Check for missing ENDATA -- Verify section order: NAME, ROWS, COLUMNS, RHS, [BOUNDS], ENDATA -- Check integer markers format - -**"Problem is infeasible"** -- Check constraint directions (L/G/E) -- Verify RHS values are consistent - ---- - -## Additional References (tested in CI) - -For more complete examples, read these files: - -| Example | File | Description | -|---------|------|-------------| -| Basic LP | `docs/cuopt/source/cuopt-cli/examples/lp/examples/basic_lp_example.sh` | Simple LP via CLI | -| Basic MILP | `docs/cuopt/source/cuopt-cli/examples/milp/examples/basic_milp_example.sh` | MILP with integers | -| Solver Parameters | `docs/cuopt/source/cuopt-cli/examples/lp/examples/solver_parameters_example.sh` | CLI options | - -These examples are tested by CI and represent canonical usage. diff --git a/.github/skills/cuopt-lp-milp/resources/python_examples.md b/.github/skills/cuopt-lp-milp/resources/python_examples.md deleted file mode 100644 index eaaee59d48..0000000000 --- a/.github/skills/cuopt-lp-milp/resources/python_examples.md +++ /dev/null @@ -1,257 +0,0 @@ -# LP/MILP: Python API Examples - -## Linear Programming (LP) - -```python -""" -Production Planning LP: - maximize 40*chairs + 30*tables (profit) - subject to 2*chairs + 3*tables <= 240 (wood constraint) - 4*chairs + 2*tables <= 200 (labor constraint) - chairs, tables >= 0 -""" -from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE -from cuopt.linear_programming.solver_settings import SolverSettings - -# Create problem -problem = Problem("ProductionPlanning") - -# Decision variables (continuous, non-negative) -chairs = problem.addVariable(lb=0, vtype=CONTINUOUS, name="chairs") -tables = problem.addVariable(lb=0, vtype=CONTINUOUS, name="tables") - -# Constraints -problem.addConstraint(2 * chairs + 3 * tables <= 240, name="wood") -problem.addConstraint(4 * chairs + 2 * tables <= 200, name="labor") - -# Objective: maximize profit -problem.setObjective(40 * chairs + 30 * tables, sense=MAXIMIZE) - -# Solver settings -settings = SolverSettings() -settings.set_parameter("time_limit", 60) -settings.set_parameter("log_to_console", 1) - -# Solve -problem.solve(settings) - -# Check status and extract results -status = problem.Status.name -print(f"Status: {status}") - -if status in ["Optimal", "PrimalFeasible"]: - print(f"Optimal profit: ${problem.ObjValue:.2f}") - print(f"Chairs to produce: {chairs.getValue():.1f}") - print(f"Tables to produce: {tables.getValue():.1f}") - - # Get dual values (shadow prices) - wood_constraint = problem.getConstraint("wood") - labor_constraint = problem.getConstraint("labor") - print(f"\nShadow price (wood): ${wood_constraint.DualValue:.2f} per unit") - print(f"Shadow price (labor): ${labor_constraint.DualValue:.2f} per unit") -else: - print(f"No optimal solution found. Status: {status}") -``` - -## Mixed-Integer Linear Programming (MILP) - -```python -""" -Facility Location MILP: -- Decide which warehouses to open (binary) -- Assign customers to open warehouses -- Minimize fixed costs + transportation costs -""" -from cuopt.linear_programming.problem import ( - Problem, CONTINUOUS, INTEGER, MINIMIZE -) -from cuopt.linear_programming.solver_settings import SolverSettings - -# Problem data -warehouses = ["W1", "W2", "W3"] -customers = ["C1", "C2", "C3", "C4"] - -fixed_costs = {"W1": 100, "W2": 150, "W3": 120} -capacities = {"W1": 50, "W2": 70, "W3": 60} -demands = {"C1": 20, "C2": 25, "C3": 15, "C4": 30} - -transport_cost = { - ("W1", "C1"): 5, ("W1", "C2"): 8, ("W1", "C3"): 6, ("W1", "C4"): 10, - ("W2", "C1"): 7, ("W2", "C2"): 4, ("W2", "C3"): 9, ("W2", "C4"): 5, - ("W3", "C1"): 6, ("W3", "C2"): 7, ("W3", "C3"): 4, ("W3", "C4"): 8, -} - -# Create problem -problem = Problem("FacilityLocation") - -# Binary variables: y[w] = 1 if warehouse w is open -y = {w: problem.addVariable(lb=0, ub=1, vtype=INTEGER, name=f"open_{w}") - for w in warehouses} - -# Continuous variables: x[w,c] = units shipped from w to c -x = {(w, c): problem.addVariable(lb=0, vtype=CONTINUOUS, name=f"ship_{w}_{c}") - for w in warehouses for c in customers} - -# Objective: minimize fixed + transportation costs -problem.setObjective( - sum(fixed_costs[w] * y[w] for w in warehouses) + - sum(transport_cost[w, c] * x[w, c] for w in warehouses for c in customers), - sense=MINIMIZE -) - -# Constraints: meet customer demand -for c in customers: - problem.addConstraint( - sum(x[w, c] for w in warehouses) == demands[c], - name=f"demand_{c}" - ) - -# Constraints: respect warehouse capacity (only if open) -for w in warehouses: - problem.addConstraint( - sum(x[w, c] for c in customers) <= capacities[w] * y[w], - name=f"capacity_{w}" - ) - -# Solver settings -settings = SolverSettings() -settings.set_parameter("time_limit", 120) -settings.set_parameter("mip_relative_gap", 0.01) - -# Solve -problem.solve(settings) - -# Results -status = problem.Status.name -print(f"Status: {status}") - -if status in ["Optimal", "FeasibleFound"]: - print(f"Total cost: ${problem.ObjValue:.2f}") - print("\nOpen warehouses:") - for w in warehouses: - if y[w].getValue() > 0.5: - print(f" {w} (fixed cost: ${fixed_costs[w]})") - - print("\nShipments:") - for w in warehouses: - for c in customers: - shipped = x[w, c].getValue() - if shipped > 0.01: - print(f" {w} -> {c}: {shipped:.1f} units") -``` - -## Knapsack Problem (MILP) - -```python -""" -0/1 Knapsack: select items to maximize value within weight limit -""" -from cuopt.linear_programming.problem import Problem, INTEGER, MAXIMIZE -from cuopt.linear_programming.solver_settings import SolverSettings - -items = ["laptop", "camera", "phone", "tablet", "headphones"] -values = [1000, 500, 300, 600, 150] -weights = [3, 1, 0.5, 1.5, 0.3] -max_weight = 5 - -problem = Problem("Knapsack") - -# Binary variables: x[i] = 1 if item i is selected -x = [problem.addVariable(lb=0, ub=1, vtype=INTEGER, name=items[i]) - for i in range(len(items))] - -# Objective: maximize total value -problem.setObjective(sum(values[i] * x[i] for i in range(len(items))), sense=MAXIMIZE) - -# Constraint: weight limit -problem.addConstraint(sum(weights[i] * x[i] for i in range(len(items))) <= max_weight) - -problem.solve(SolverSettings()) - -if problem.Status.name in ["Optimal", "FeasibleFound"]: - print(f"Total value: ${problem.ObjValue:.0f}") - print("Selected items:") - for i, item in enumerate(items): - if x[i].getValue() > 0.5: - print(f" {item}: value=${values[i]}, weight={weights[i]}") -``` - -## Transportation Problem (LP) - -```python -""" -Minimize shipping cost from suppliers to customers -""" -from cuopt.linear_programming.problem import Problem, CONTINUOUS, MINIMIZE - -suppliers = ["S1", "S2"] -customers = ["C1", "C2", "C3"] -supply = {"S1": 100, "S2": 150} -demand = {"C1": 80, "C2": 70, "C3": 100} -cost = { - ("S1", "C1"): 4, ("S1", "C2"): 6, ("S1", "C3"): 8, - ("S2", "C1"): 5, ("S2", "C2"): 3, ("S2", "C3"): 7, -} - -problem = Problem("Transportation") - -x = {(s, c): problem.addVariable(lb=0, vtype=CONTINUOUS, name=f"x_{s}_{c}") - for s in suppliers for c in customers} - -# Minimize total shipping cost -problem.setObjective(sum(cost[s,c] * x[s,c] for s in suppliers for c in customers), - sense=MINIMIZE) - -# Supply constraints -for s in suppliers: - problem.addConstraint(sum(x[s,c] for c in customers) <= supply[s]) - -# Demand constraints -for c in customers: - problem.addConstraint(sum(x[s,c] for s in suppliers) >= demand[c]) - -problem.solve() - -if problem.Status.name in ("Optimal", "PrimalFeasible"): - print(f"Total cost: ${problem.ObjValue:.2f}") - for s in suppliers: - for c in customers: - val = x[s,c].getValue() - if val > 0.01: - print(f" {s} -> {c}: {val:.0f} units") -``` - -## Status Checking (Critical) - -```python -# ✅ CORRECT - use PascalCase -if problem.Status.name in ["Optimal", "FeasibleFound"]: - print(problem.ObjValue) - -# ❌ WRONG - will silently fail! -if problem.Status.name == "OPTIMAL": # Never matches! - print(problem.ObjValue) - -# LP status values: Optimal, PrimalFeasible, PrimalInfeasible, -# DualInfeasible, TimeLimit, NumericalError -# MILP status values: Optimal, FeasibleFound, Infeasible, -# Unbounded, TimeLimit, NoTermination -``` - ---- - -## Additional References (tested in CI) - -For more complete examples, read these files: - -| Example | File | Description | -|---------|------|-------------| -| Simple LP | `docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_lp_example.py` | Basic LP setup | -| Simple MILP | `docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_milp_example.py` | Integer variables | -| Production Planning | `docs/cuopt/source/cuopt-python/lp-qp-milp/examples/production_planning_example.py` | Real-world LP | -| Expressions | `docs/cuopt/source/cuopt-python/lp-qp-milp/examples/expressions_constraints_example.py` | Advanced constraint syntax | -| Incumbent Solutions | `docs/cuopt/source/cuopt-python/lp-qp-milp/examples/incumbent_solutions_example.py` | Tracking MIP progress | -| Warmstart | `docs/cuopt/source/cuopt-python/lp-qp-milp/examples/pdlp_warmstart_example.py` | Warm starting LP | -| Solution Handling | `docs/cuopt/source/cuopt-python/lp-qp-milp/examples/solution_example.py` | Working with results | - -These examples are tested by CI (`ci/test_doc_examples.sh`) and represent canonical usage. diff --git a/.github/skills/cuopt-lp-milp/resources/server_examples.md b/.github/skills/cuopt-lp-milp/resources/server_examples.md deleted file mode 100644 index 521d8a6ead..0000000000 --- a/.github/skills/cuopt-lp-milp/resources/server_examples.md +++ /dev/null @@ -1,208 +0,0 @@ -# LP/MILP: REST Server Examples - -## LP Request (curl) - -```bash -# Production Planning LP via REST -# maximize 40*chairs + 30*tables -# s.t. 2*chairs + 3*tables <= 240 -# 4*chairs + 2*tables <= 200 - -REQID=$(curl -s -X POST "http://localhost:8000/cuopt/request" \ - -H "Content-Type: application/json" \ - -H "CLIENT-VERSION: custom" \ - -d '{ - "csr_constraint_matrix": { - "offsets": [0, 2, 4], - "indices": [0, 1, 0, 1], - "values": [2.0, 3.0, 4.0, 2.0] - }, - "constraint_bounds": { - "upper_bounds": [240.0, 200.0], - "lower_bounds": ["ninf", "ninf"] - }, - "objective_data": { - "coefficients": [40.0, 30.0], - "scalability_factor": 1.0, - "offset": 0.0 - }, - "variable_bounds": { - "upper_bounds": ["inf", "inf"], - "lower_bounds": [0.0, 0.0] - }, - "maximize": true, - "solver_config": { - "tolerances": {"optimality": 0.0001}, - "time_limit": 60 - } - }' | jq -r '.reqId') - -echo "Request ID: $REQID" - -# Get solution -sleep 2 -curl -s "http://localhost:8000/cuopt/solution/$REQID" \ - -H "CLIENT-VERSION: custom" | jq . -``` - -## MILP Request (curl) - -```bash -# Add integer variable types -REQID=$(curl -s -X POST "http://localhost:8000/cuopt/request" \ - -H "Content-Type: application/json" \ - -H "CLIENT-VERSION: custom" \ - -d '{ - "csr_constraint_matrix": { - "offsets": [0, 2, 4], - "indices": [0, 1, 0, 1], - "values": [2.0, 3.0, 4.0, 2.0] - }, - "constraint_bounds": { - "upper_bounds": [240.0, 200.0], - "lower_bounds": ["ninf", "ninf"] - }, - "objective_data": { - "coefficients": [40.0, 30.0] - }, - "variable_bounds": { - "upper_bounds": ["inf", "inf"], - "lower_bounds": [0.0, 0.0] - }, - "variable_types": ["integer", "continuous"], - "maximize": true, - "solver_config": { - "time_limit": 120, - "tolerances": { - "mip_relative_gap": 0.01 - } - } - }' | jq -r '.reqId') - -echo "Request ID: $REQID" - -# Poll for solution (MILP may take longer than LP) -while true; do - RESULT=$(curl -s "http://localhost:8000/cuopt/solution/$REQID" \ - -H "CLIENT-VERSION: custom") - STATUS=$(echo "$RESULT" | jq -r '.response.status // empty') - if [ -n "$STATUS" ]; then - echo "$RESULT" | jq . - break - fi - sleep 2 -done -``` - -## LP Request (Python) - -```python -import requests -import time - -SERVER = "http://localhost:8000" -HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} - -payload = { - "csr_constraint_matrix": { - "offsets": [0, 2, 4], - "indices": [0, 1, 0, 1], - "values": [2.0, 3.0, 4.0, 2.0] - }, - "constraint_bounds": { - "upper_bounds": [240.0, 200.0], - "lower_bounds": ["ninf", "ninf"] - }, - "objective_data": { - "coefficients": [40.0, 30.0], - "scalability_factor": 1.0, - "offset": 0.0 - }, - "variable_bounds": { - "upper_bounds": ["inf", "inf"], - "lower_bounds": [0.0, 0.0] - }, - "maximize": True, - "solver_config": { - "time_limit": 60 - } -} - -# Submit -response = requests.post(f"{SERVER}/cuopt/request", json=payload, headers=HEADERS) -req_id = response.json()["reqId"] -print(f"Submitted: {req_id}") - -# Poll for solution -for _ in range(30): - response = requests.get(f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS) - result = response.json() - - if "response" in result: - print(f"Status: {result['response'].get('status')}") - print(f"Objective: {result['response'].get('objective_value')}") - print(f"Solution: {result['response'].get('primal_solution')}") - break - time.sleep(1) -``` - -## CSR Matrix Format - -The constraint matrix uses Compressed Sparse Row (CSR) format: - -``` -Matrix: [2, 3] (row 0: 2*x0 + 3*x1) - [4, 2] (row 1: 4*x0 + 2*x1) - -CSR format: - offsets: [0, 2, 4] # Row pointers - indices: [0, 1, 0, 1] # Column indices - values: [2.0, 3.0, 4.0, 2.0] # Non-zero values -``` - -## Special Values - -```json -{ - "constraint_bounds": { - "lower_bounds": ["ninf", "ninf"], - "upper_bounds": [100.0, "inf"] - } -} -``` - -- `"ninf"` — negative infinity (-∞) -- `"inf"` — positive infinity (+∞) - -## Variable Types - -```json -{ - "variable_types": ["continuous", "integer", "binary"] -} -``` - -- `"continuous"` - real-valued -- `"integer"` - integer-valued -- `"binary"` - 0 or 1 only - ---- - -## Additional References (tested in CI) - -For more complete examples, read these files: - -| Example | File | Description | -|---------|------|-------------| -| Basic LP (Python) | `docs/cuopt/source/cuopt-server/examples/lp/examples/basic_lp_example.py` | LP via REST | -| Basic LP (curl) | `docs/cuopt/source/cuopt-server/examples/lp/examples/basic_lp_example.sh` | LP shell script | -| MPS Input | `docs/cuopt/source/cuopt-server/examples/lp/examples/mps_file_example.py` | MPS file format | -| MPS DataModel | `docs/cuopt/source/cuopt-server/examples/lp/examples/mps_datamodel_example.py` | MPS in payload | -| Warmstart | `docs/cuopt/source/cuopt-server/examples/lp/examples/warmstart_example.py` | Warm starting | -| Basic MILP (Python) | `docs/cuopt/source/cuopt-server/examples/milp/examples/basic_milp_example.py` | MILP via REST | -| Basic MILP (curl) | `docs/cuopt/source/cuopt-server/examples/milp/examples/basic_milp_example.sh` | MILP shell script | -| Incumbent Callback | `docs/cuopt/source/cuopt-server/examples/milp/examples/incumbent_callback_example.py` | MIP progress tracking | -| Abort Job | `docs/cuopt/source/cuopt-server/examples/milp/examples/abort_job_example.py` | Canceling requests | -| Batch Mode | `docs/cuopt/source/cuopt-server/examples/lp/examples/batch_mode_example.sh` | Multiple problems | - -These examples are tested by CI (`ci/test_doc_examples.sh`) and represent canonical usage. diff --git a/.github/skills/cuopt-qp/SKILL.md b/.github/skills/cuopt-qp/SKILL.md deleted file mode 100644 index 5fd7d83af6..0000000000 --- a/.github/skills/cuopt-qp/SKILL.md +++ /dev/null @@ -1,195 +0,0 @@ ---- -name: cuopt-qp -description: Solve Quadratic Programming (QP) with NVIDIA cuOpt. Use when the user asks about quadratic objectives, portfolio optimization, variance minimization, or least squares problems. Note that QP support is currently in beta. ---- - -# cuOpt QP Skill - -> **Prerequisites**: Read `cuopt-user-rules/SKILL.md` first for behavior rules. - -Model and solve quadratic programs using NVIDIA cuOpt. **QP support is currently in beta.** - -## Before You Start: Required Questions - -**Ask these if not already clear:** - -1. **Is this actually QP?** - - Does the objective have x² or x*y terms? - - Are constraints still linear? - - (If constraints are also quadratic, cuOpt doesn't support that) - -2. **Minimize or maximize?** - - ⚠️ **QP only supports MINIMIZE** - - For maximization, negate the objective - -3. **Is the quadratic form convex?** - - Q matrix should be positive semi-definite for minimization - - Non-convex QP may not solve correctly - -## Critical Constraints - -### ⚠️ MINIMIZE ONLY - -**QP objectives MUST be minimization.** The solver rejects maximize for QP. - -```python -# ❌ WRONG - will fail -problem.setObjective(x*x + y*y, sense=MAXIMIZE) - -# ✅ CORRECT - minimize instead -# To maximize f(x), minimize -f(x) -problem.setObjective(-(x*x + y*y), sense=MINIMIZE) -``` - -### Interface Support - -| Interface | QP Support | -|-----------|:----------:| -| Python | ✓ (beta) | -| C API | ✓ | -| REST | ✗ | -| CLI | ✗ | - -## Quick Reference: Python API - -### Portfolio Optimization Example - -```python -""" -Minimize portfolio variance (risk): - minimize x^T * Q * x - subject to sum(x) = 1 (fully invested) - r^T * x >= target (minimum return) - x >= 0 (no short selling) -""" -from cuopt.linear_programming.problem import Problem, CONTINUOUS, MINIMIZE -from cuopt.linear_programming.solver_settings import SolverSettings - -problem = Problem("Portfolio") - -# Portfolio weights (decision variables) -x1 = problem.addVariable(lb=0, ub=1, vtype=CONTINUOUS, name="stock_a") -x2 = problem.addVariable(lb=0, ub=1, vtype=CONTINUOUS, name="stock_b") -x3 = problem.addVariable(lb=0, ub=1, vtype=CONTINUOUS, name="stock_c") - -# Expected returns -r1, r2, r3 = 0.12, 0.08, 0.05 - -# Quadratic objective: variance = x^T * Q * x -# Q = [[0.04, 0.01, 0.005], -# [0.01, 0.02, 0.008], -# [0.005, 0.008, 0.01]] -# Expanded: 0.04*x1² + 0.02*x2² + 0.01*x3² + 2*0.01*x1*x2 + ... -problem.setObjective( - 0.04*x1*x1 + 0.02*x2*x2 + 0.01*x3*x3 + - 0.02*x1*x2 + 0.01*x1*x3 + 0.016*x2*x3, - sense=MINIMIZE # MUST be minimize for QP! -) - -# Linear constraints -problem.addConstraint(x1 + x2 + x3 == 1, name="budget") -problem.addConstraint(r1*x1 + r2*x2 + r3*x3 >= 0.08, name="min_return") - -# Solve -settings = SolverSettings() -settings.set_parameter("time_limit", 60) -problem.solve(settings) - -# Check results -if problem.Status.name in ["Optimal", "PrimalFeasible"]: - print(f"Variance: {problem.ObjValue:.6f}") - print(f"Std Dev: {problem.ObjValue**0.5:.4f}") - print(f"Allocation: A={x1.getValue():.2%}, B={x2.getValue():.2%}, C={x3.getValue():.2%}") -``` - -### Least Squares Example - -```python -""" -Minimize ||Ax - b||² = x^T*A^T*A*x - 2*b^T*A*x + b^T*b -""" -problem = Problem("LeastSquares") - -x = problem.addVariable(lb=-100, ub=100, vtype=CONTINUOUS, name="x") -y = problem.addVariable(lb=-100, ub=100, vtype=CONTINUOUS, name="y") - -# Quadratic objective: (x-3)² + (y-4)² = x² + y² - 6x - 8y + 25 -problem.setObjective( - x*x + y*y - 6*x - 8*y + 25, - sense=MINIMIZE -) - -problem.solve(SolverSettings()) - -print(f"x = {x.getValue()}") # Should be ~3 -print(f"y = {y.getValue()}") # Should be ~4 -``` - -## Formulating Quadratic Objectives - -### From Covariance Matrix - -Given covariance matrix Q and weights x: -``` -variance = x^T * Q * x = Σᵢ Σⱼ Qᵢⱼ * xᵢ * xⱼ -``` - -Expand manually: -```python -# Q = [[a, b], [b, c]] -# x^T * Q * x = a*x1² + 2b*x1*x2 + c*x2² -objective = a*x1*x1 + 2*b*x1*x2 + c*x2*x2 -``` - -### Maximization Workaround - -To maximize `f(x) = -x² + 4x`: -```python -# maximize -x² + 4x -# = minimize -(-x² + 4x) -# = minimize x² - 4x -problem.setObjective(x*x - 4*x, sense=MINIMIZE) -# Then negate the objective value for the true maximum -true_max = -problem.ObjValue -``` - -## Status Checking - -Same as LP/MILP - use PascalCase: - -```python -if problem.Status.name in ["Optimal", "PrimalFeasible"]: - print(f"Optimal variance: {problem.ObjValue}") -``` - -## Common Issues - -| Problem | Likely Cause | Fix | -|---------|--------------|-----| -| "Quadratic problems must be minimized" | Using MAXIMIZE | Use MINIMIZE, negate objective | -| Poor convergence | Non-convex Q | Ensure Q is positive semi-definite | -| NumericalError | Ill-conditioned Q | Scale variables, regularize | -| Slow solve | Large dense Q | Check if problem can be simplified | - -## Solver Notes - -- QP uses **Barrier method** internally (different from LP/MILP defaults) -- May be more sensitive to numerical issues than LP -- Beta status means API may change in future versions - -## Examples - -See `resources/` for complete examples: -- [Python API](resources/python_examples.md) — portfolio, least squares, maximization workaround - -## When to Escalate - -Switch to **cuopt-lp-milp** if: -- Objective is actually linear (no x² or x*y terms) - -Switch to **cuopt-debugging** if: -- Numerical errors -- Unexpected results - -Switch to **cuopt-developer** if: -- Need features not in beta QP diff --git a/.github/skills/cuopt-routing/SKILL.md b/.github/skills/cuopt-routing/SKILL.md deleted file mode 100644 index e634da7dba..0000000000 --- a/.github/skills/cuopt-routing/SKILL.md +++ /dev/null @@ -1,298 +0,0 @@ ---- -name: cuopt-routing -description: Solve vehicle routing problems (VRP, TSP, PDP) with NVIDIA cuOpt. Use when the user asks about delivery optimization, fleet routing, time windows, capacities, pickup-delivery pairs, or traveling salesman problems. ---- - -# cuOpt Routing Skill - -> **Prerequisites**: Read `cuopt-user-rules/SKILL.md` first for behavior rules. - -Model and solve vehicle routing problems using NVIDIA cuOpt's GPU-accelerated solver. - -## Before You Start: Required Questions - -**Ask these if not already clear:** - -1. **Problem type?** - - TSP (single vehicle, visit all locations) - - VRP (multiple vehicles, capacity constraints) - - PDP (pickup and delivery pairs) - -2. **What constraints?** - - Time windows (earliest/latest arrival)? - - Vehicle capacities? - - Service times at locations? - - Multiple depots? - - Vehicle-specific start/end locations? - -3. **What data do you have?** - - Cost/distance matrix or coordinates? - - Demand per location? - - Fleet size fixed or to optimize? - -4. **Interface preference?** - - Python API (in-process) - - REST Server (production/async) - -## Interface Support - -| Interface | Routing Support | -|-----------|:---------------:| -| Python | ✓ | -| REST | ✓ | -| C API | ✗ | -| CLI | ✗ | - -## Quick Reference: Python API - -### Minimal VRP Example - -```python -import cudf -from cuopt import routing - -# Cost matrix (n_locations x n_locations) -cost_matrix = cudf.DataFrame([ - [0, 10, 15, 20], - [10, 0, 12, 18], - [15, 12, 0, 10], - [20, 18, 10, 0], -], dtype="float32") - -# Build data model -dm = routing.DataModel( - n_locations=4, # Total locations including depot - n_fleet=2, # Number of vehicles - n_orders=3 # Orders to fulfill (locations 1,2,3) -) - -# Required: cost matrix -dm.add_cost_matrix(cost_matrix) - -# Required: order locations (which location each order is at) -dm.set_order_locations(cudf.Series([1, 2, 3])) - -# Solve -solution = routing.Solve(dm, routing.SolverSettings()) - -# Check result -if solution.get_status() == 0: # SUCCESS - solution.display_routes() -``` - -### Adding Constraints - -```python -# Time windows (need transit time matrix) -dm.add_transit_time_matrix(transit_time_matrix) -dm.set_order_time_windows( - cudf.Series([0, 10, 20]), # earliest - cudf.Series([50, 60, 70]) # latest -) - -# Capacities -dm.add_capacity_dimension( - "weight", - cudf.Series([20, 30, 25]), # demand per order - cudf.Series([100, 100]) # capacity per vehicle -) - -# Service times -dm.set_order_service_times(cudf.Series([5, 5, 5])) - -# Vehicle locations (start/end) -dm.set_vehicle_locations( - cudf.Series([0, 0]), # start at depot - cudf.Series([0, 0]) # return to depot -) - -# Vehicle time windows -dm.set_vehicle_time_windows( - cudf.Series([0, 0]), # earliest start - cudf.Series([200, 200]) # latest return -) -``` - -### Pickup and Delivery (PDP) - -```python -# Demand: positive=pickup, negative=delivery (must sum to 0 per pair) -demand = cudf.Series([10, -10, 15, -15]) - -# Pair indices: order 0 pairs with 1, order 2 pairs with 3 -dm.set_pickup_delivery_pairs( - cudf.Series([0, 2]), # pickup order indices - cudf.Series([1, 3]) # delivery order indices -) -``` - -### Precedence Constraints - -Use `add_order_precedence()` to require certain orders to be visited before others. - -**Important:** This is a per-node API — call it once for each order that has predecessors. - -```python -import numpy as np - -# Order 2 must come after orders 0 and 1 -dm.add_order_precedence( - node_id=2, # this order - preceding_nodes=np.array([0, 1]) # must come after these -) - -# Order 3 must come after order 2 -dm.add_order_precedence( - node_id=3, - preceding_nodes=np.array([2]) -) -``` - -**Rules:** -- Call once per order that has predecessors -- `preceding_nodes` is a numpy array of order indices -- Circular dependencies are NOT allowed (A before B before A) -- Orders without precedence constraints don't need a call - -**Example: Assembly sequence** -```python -# Task B requires Task A to be done first -# Task C requires Tasks A and B to be done first -dm.add_order_precedence(1, np.array([0])) # B after A -dm.add_order_precedence(2, np.array([0, 1])) # C after A and B -``` - -## Quick Reference: REST Server - -### Terminology Difference - -| Concept | Python API | REST Server | -|---------|------------|-------------| -| Jobs | `order_locations` | `task_locations` | -| Time windows | `set_order_time_windows()` | `task_time_windows` | -| Service times | `set_order_service_times()` | `service_times` | - -### Minimal REST Payload - -```json -{ - "cost_matrix_data": { - "data": {"0": [[0,10,15],[10,0,12],[15,12,0]]} - }, - "travel_time_matrix_data": { - "data": {"0": [[0,10,15],[10,0,12],[15,12,0]]} - }, - "task_data": { - "task_locations": [1, 2] - }, - "fleet_data": { - "vehicle_locations": [[0, 0]], - "capacities": [[100]] - }, - "solver_config": { - "time_limit": 10 - } -} -``` - -## Solution Checking - -```python -status = solution.get_status() -# 0 = SUCCESS -# 1 = FAIL -# 2 = TIMEOUT -# 3 = EMPTY - -if status == 0: - solution.display_routes() - route_df = solution.get_route() - total_cost = solution.get_total_objective() -else: - print(f"Error: {solution.get_error_message()}") - infeasible = solution.get_infeasible_orders() - if len(infeasible) > 0: - print(f"Infeasible orders: {infeasible.to_list()}") -``` - -## Solution DataFrame Schema - -`solution.get_route()` returns a `cudf.DataFrame` with these columns: - -| Column | Type | Description | -|--------|------|-------------| -| `route` | int | Order/task index in the route sequence | -| `truck_id` | int | Vehicle ID assigned to this stop | -| `location` | int | Location index (0 = depot typically) | -| `arrival_stamp` | float | Arrival time at this location | - -**Example output:** -``` - route arrival_stamp truck_id location -0 0 0.0 1 0 # Vehicle 1 starts at depot -1 3 2.0 1 3 # Vehicle 1 visits location 3 -2 2 4.0 1 2 # Vehicle 1 visits location 2 -3 0 5.0 1 0 # Vehicle 1 returns to depot -4 0 0.0 0 0 # Vehicle 0 starts at depot -5 1 1.0 0 1 # Vehicle 0 visits location 1 -6 0 3.0 0 0 # Vehicle 0 returns to depot -``` - -**Working with results:** -```python -route_df = solution.get_route() - -# Routes per vehicle -for vid in route_df["truck_id"].unique().to_arrow().tolist(): - vehicle_route = route_df[route_df["truck_id"] == vid] - locations = vehicle_route["location"].to_arrow().tolist() - print(f"Vehicle {vid}: {locations}") - -# Total travel time -max_arrival = route_df["arrival_stamp"].max() -``` - -## Common Issues - -| Problem | Likely Cause | Fix | -|---------|--------------|-----| -| Empty solution | Time windows too tight | Widen windows or check travel times | -| Infeasible orders | Demand > capacity | Increase fleet or capacity | -| Status != 0 | Missing transit time matrix | Add `add_transit_time_matrix()` when using time windows | -| Wrong route cost | Matrix not symmetric | Check cost_matrix values | - -## Data Type Requirements - -```python -# Always use explicit dtypes -cost_matrix = cost_matrix.astype("float32") -order_locations = cudf.Series([...], dtype="int32") -demand = cudf.Series([...], dtype="int32") -vehicle_capacity = cudf.Series([...], dtype="int32") -time_windows = cudf.Series([...], dtype="int32") -``` - -## Solver Settings - -```python -ss = routing.SolverSettings() -ss.set_time_limit(30) # seconds -ss.set_verbose_mode(True) # enable progress output -ss.set_error_logging_mode(True) # log constraint errors if infeasible -``` - -## Examples - -See `resources/` for complete examples: -- [Python API](resources/python_examples.md) — VRP, PDP, multi-depot -- [REST Server](resources/server_examples.md) — curl and Python requests - -## When to Escalate - -Switch to **cuopt-debugging** if: -- Solution is infeasible and you can't determine why -- Performance is unexpectedly slow - -Switch to **cuopt-developer** if: -- User wants to modify solver behavior -- User wants to add new constraint types diff --git a/.github/skills/cuopt-server/SKILL.md b/.github/skills/cuopt-server/SKILL.md deleted file mode 100644 index e118a0ba43..0000000000 --- a/.github/skills/cuopt-server/SKILL.md +++ /dev/null @@ -1,356 +0,0 @@ ---- -name: cuopt-server -description: Deploy and integrate cuOpt REST server for production use. Use when the user asks about REST API, HTTP endpoints, deployment, curl requests, microservices, async solving, or server payloads. ---- - -# cuOpt Server Skill - -> **Prerequisites**: Read `cuopt-user-rules/SKILL.md` first for behavior rules. - -Deploy and use the cuOpt REST server for production optimization workloads. - -## Before You Start: Required Questions - -**Ask these if not already clear:** - -1. **Problem type?** - - Routing (VRP/TSP/PDP)? - - LP/MILP? - - (Note: QP not supported via REST) - -2. **Deployment target?** - - Local development? - - Docker/Kubernetes? - - Cloud service? - -3. **Client preference?** - - curl (quick testing) - - Python requests - - cuopt-sh-client library - -## Server Capabilities - -| Problem Type | REST Support | -|--------------|:------------:| -| Routing | ✓ | -| LP | ✓ | -| MILP | ✓ | -| QP | ✗ | - -## Starting the Server - -### Direct (Development) - -```bash -python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000 -``` - -### Docker (Production) - -```bash -docker run --gpus all -d \ - -p 8000:8000 \ - -e CUOPT_SERVER_PORT=8000 \ - --name cuopt-server \ - nvidia/cuopt:latest-cuda12.9-py3.13 -``` - -### Verify Running - -```bash -curl http://localhost:8000/cuopt/health -# Expected: {"status": "healthy"} -``` - -## API Endpoints - -| Endpoint | Method | Purpose | -|----------|--------|---------| -| `/cuopt/health` | GET | Health check | -| `/cuopt/request` | POST | Submit optimization request | -| `/cuopt/solution/{reqId}` | GET | Get solution by request ID | -| `/cuopt.yaml` | GET | OpenAPI specification | -| `/cuopt/docs` | GET | Swagger UI | - -## Workflow - -1. **POST** problem to `/cuopt/request` → get `reqId` -2. **Poll** `/cuopt/solution/{reqId}` until solution ready -3. **Parse** response - -## Routing Request Example - -### curl - -```bash -REQID=$(curl -s -X POST "http://localhost:8000/cuopt/request" \ - -H "Content-Type: application/json" \ - -H "CLIENT-VERSION: custom" \ - -d '{ - "cost_matrix_data": { - "data": {"0": [[0,10,15],[10,0,12],[15,12,0]]} - }, - "travel_time_matrix_data": { - "data": {"0": [[0,10,15],[10,0,12],[15,12,0]]} - }, - "task_data": { - "task_locations": [1, 2], - "demand": [[10, 20]], - "task_time_windows": [[0, 100], [0, 100]], - "service_times": [5, 5] - }, - "fleet_data": { - "vehicle_locations": [[0, 0]], - "capacities": [[50]], - "vehicle_time_windows": [[0, 200]] - }, - "solver_config": {"time_limit": 5} - }' | jq -r '.reqId') - -echo "Request ID: $REQID" - -# Poll for solution -sleep 2 -curl -s "http://localhost:8000/cuopt/solution/$REQID" \ - -H "CLIENT-VERSION: custom" | jq . -``` - -### Python - -```python -import requests -import time - -SERVER = "http://localhost:8000" -HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} - -payload = { - "cost_matrix_data": { - "data": {"0": [[0,10,15],[10,0,12],[15,12,0]]} - }, - "travel_time_matrix_data": { - "data": {"0": [[0,10,15],[10,0,12],[15,12,0]]} - }, - "task_data": { - "task_locations": [1, 2], - "demand": [[10, 20]], - "task_time_windows": [[0, 100], [0, 100]], # optional - "service_times": [5, 5] # optional - }, - "fleet_data": { - "vehicle_locations": [[0, 0]], - "capacities": [[50]], - "vehicle_time_windows": [[0, 200]] # optional - }, - "solver_config": {"time_limit": 5} -} - -# Submit -resp = requests.post(f"{SERVER}/cuopt/request", json=payload, headers=HEADERS) -req_id = resp.json()["reqId"] - -# Poll -for _ in range(30): - resp = requests.get(f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS) - result = resp.json() - if "response" in result: - print(result["response"]["solver_response"]) - break - time.sleep(1) -``` - -## LP/MILP Request Example - -```bash -curl -s -X POST "http://localhost:8000/cuopt/request" \ - -H "Content-Type: application/json" \ - -H "CLIENT-VERSION: custom" \ - -d '{ - "csr_constraint_matrix": { - "offsets": [0, 2, 4], - "indices": [0, 1, 0, 1], - "values": [2.0, 3.0, 4.0, 2.0] - }, - "constraint_bounds": { - "upper_bounds": [240.0, 200.0], - "lower_bounds": ["ninf", "ninf"] - }, - "objective_data": { - "coefficients": [40.0, 30.0], - "scalability_factor": 1.0, - "offset": 0.0 - }, - "variable_bounds": { - "upper_bounds": ["inf", "inf"], - "lower_bounds": [0.0, 0.0] - }, - "maximize": true, - "solver_config": {"time_limit": 60} - }' -``` - -## Terminology: REST vs Python API - -**CRITICAL:** REST API uses different terminology than Python API. - -| Concept | Python API | REST API | -|---------|------------|----------| -| Orders/Jobs | `order_locations` | `task_locations` | -| Time windows | `set_order_time_windows()` | `task_time_windows` | -| Service times | `set_order_service_times()` | `service_times` | -| Transit matrix | `add_transit_time_matrix()` | `travel_time_matrix_data` | - -## Common Payload Mistakes - -### Wrong field names - -```json -// ❌ WRONG -"transit_time_matrix_data": {...} - -// ✅ CORRECT -"travel_time_matrix_data": {...} -``` - -### Wrong capacity format - -```json -// ❌ WRONG - per vehicle -"capacities": [[50], [50]] - -// ✅ CORRECT - per dimension across all vehicles -"capacities": [[50, 50]] -``` - -### Missing required fields - -Routing requires at minimum: -- `cost_matrix_data` -- `task_data.task_locations` -- `fleet_data.vehicle_locations` -- `fleet_data.capacities` - -## Response Structure - -### Routing Success - -```json -{ - "reqId": "abc123", - "response": { - "solver_response": { - "status": 0, - "solution_cost": 45.0, - "vehicle_data": { - "0": {"route": [0, 1, 2, 0], "arrival_times": [...]} - } - } - } -} -``` - -### LP/MILP Success - -```json -{ - "reqId": "abc123", - "response": { - "status": "Optimal", - "objective_value": 1600.0, - "primal_solution": [30.0, 60.0] - } -} -``` - -## Error Handling - -### 422 Validation Error - -Check the error message for field issues: -```bash -curl ... | jq '.error' -``` - -Compare against OpenAPI spec at `/cuopt.yaml` - -### 500 Server Error - -- Check server logs -- Capture `reqId` for debugging -- Try with smaller problem - -### Polling Returns Empty - -- Solution still computing - keep polling -- Check `solver_config.time_limit` - -## Server Configuration - -### Environment Variables - -```bash -CUOPT_SERVER_PORT=8000 -CUOPT_SERVER_HOST=0.0.0.0 -``` - -### Command Line Options - -```bash -python -m cuopt_server.cuopt_service \ - --ip 0.0.0.0 \ - --port 8000 \ - --workers 4 -``` - -## Production Considerations - -### Health Checks - -```bash -# Kubernetes liveness probe -curl -f http://localhost:8000/cuopt/health - -# Readiness check -curl -f http://localhost:8000/cuopt/health -``` - -### Resource Limits - -```yaml -# Kubernetes example -resources: - limits: - nvidia.com/gpu: 1 - memory: "32Gi" - requests: - memory: "16Gi" -``` - -### Scaling - -- GPU is the bottleneck - one server per GPU -- Use load balancer for multiple GPUs -- Queue requests to avoid overwhelming - -## OpenAPI Specification - -Full API spec available at: -- Runtime: `http://localhost:8000/cuopt.yaml` -- Source: `docs/cuopt/source/cuopt_spec.yaml` -- Swagger UI: `http://localhost:8000/cuopt/docs` - -## Examples - -See `resources/` for complete examples: -- [Routing examples](resources/routing_examples.md) — VRP, PDP via REST -- [LP/MILP examples](resources/lp_milp_examples.md) — Linear programming via REST - -## When to Escalate - -Switch to **cuopt-debugging** if: -- Consistent 5xx errors -- Unexpected solution results - -Switch to **cuopt-developer** if: -- Need to modify server behavior -- Need new endpoints diff --git a/.github/skills/cuopt-server/resources/lp_milp_examples.md b/.github/skills/cuopt-server/resources/lp_milp_examples.md deleted file mode 100644 index 107bb2490b..0000000000 --- a/.github/skills/cuopt-server/resources/lp_milp_examples.md +++ /dev/null @@ -1,176 +0,0 @@ -# Server: LP/MILP Examples - -## LP Request (curl) - -```bash -# maximize 40*x + 30*y -# s.t. 2x + 3y <= 240 -# 4x + 2y <= 200 - -REQID=$(curl -s -X POST "http://localhost:8000/cuopt/request" \ - -H "Content-Type: application/json" \ - -H "CLIENT-VERSION: custom" \ - -d '{ - "csr_constraint_matrix": { - "offsets": [0, 2, 4], - "indices": [0, 1, 0, 1], - "values": [2.0, 3.0, 4.0, 2.0] - }, - "constraint_bounds": { - "upper_bounds": [240.0, 200.0], - "lower_bounds": ["ninf", "ninf"] - }, - "objective_data": { - "coefficients": [40.0, 30.0], - "scalability_factor": 1.0, - "offset": 0.0 - }, - "variable_bounds": { - "upper_bounds": ["inf", "inf"], - "lower_bounds": [0.0, 0.0] - }, - "maximize": true, - "solver_config": { - "time_limit": 60 - } - }' | jq -r '.reqId') - -sleep 2 -curl -s "http://localhost:8000/cuopt/solution/$REQID" -H "CLIENT-VERSION: custom" | jq . -``` - -## MILP Request (curl) - -```bash -# Submit MILP request and capture reqId -REQID=$(curl -s -X POST "http://localhost:8000/cuopt/request" \ - -H "Content-Type: application/json" \ - -H "CLIENT-VERSION: custom" \ - -d '{ - "csr_constraint_matrix": { - "offsets": [0, 2, 4], - "indices": [0, 1, 0, 1], - "values": [2.0, 3.0, 4.0, 2.0] - }, - "constraint_bounds": { - "upper_bounds": [240.0, 200.0], - "lower_bounds": ["ninf", "ninf"] - }, - "objective_data": { - "coefficients": [40.0, 30.0] - }, - "variable_bounds": { - "upper_bounds": ["inf", "inf"], - "lower_bounds": [0.0, 0.0] - }, - "variable_types": ["integer", "continuous"], - "maximize": true, - "solver_config": { - "time_limit": 120, - "tolerances": { - "mip_relative_gap": 0.01 - } - } - }' | jq -r '.reqId') -# Note: objective_data also supports optional "scalability_factor" and "offset" fields - -# Poll for solution (MILP may take longer than LP) -sleep 3 -curl -s "http://localhost:8000/cuopt/solution/$REQID" -H "CLIENT-VERSION: custom" | jq . -``` - -## LP Request (Python) - -```python -import requests -import time - -SERVER = "http://localhost:8000" -HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} - -payload = { - "csr_constraint_matrix": { - "offsets": [0, 2, 4], - "indices": [0, 1, 0, 1], - "values": [2.0, 3.0, 4.0, 2.0] - }, - "constraint_bounds": { - "upper_bounds": [240.0, 200.0], - "lower_bounds": ["ninf", "ninf"] - }, - "objective_data": { - "coefficients": [40.0, 30.0] - }, - "variable_bounds": { - "upper_bounds": ["inf", "inf"], - "lower_bounds": [0.0, 0.0] - }, - "maximize": True, - "solver_config": { - "time_limit": 60 - } -} - -# Submit -response = requests.post(f"{SERVER}/cuopt/request", json=payload, headers=HEADERS) -req_id = response.json()["reqId"] -print(f"Submitted: {req_id}") - -# Poll for solution -for _ in range(30): - response = requests.get(f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS) - result = response.json() - - if "response" in result: - print(f"Status: {result['response'].get('status')}") - print(f"Objective: {result['response'].get('objective_value')}") - print(f"Solution: {result['response'].get('primal_solution')}") - break - time.sleep(1) -``` - -## CSR Matrix Format - -``` -Matrix: [2, 3] (row 0: 2*x0 + 3*x1) - [4, 2] (row 1: 4*x0 + 2*x1) - -CSR format: - offsets: [0, 2, 4] # Row pointers (n_rows + 1) - indices: [0, 1, 0, 1] # Column indices - values: [2.0, 3.0, 4.0, 2.0] # Non-zero values -``` - -## Special Values - -```json -{ - "constraint_bounds": { - "lower_bounds": ["ninf", "ninf"], - "upper_bounds": [100.0, "inf"] - } -} -``` - -## Variable Types - -- `"continuous"` - real-valued -- `"integer"` - integer-valued -- `"binary"` - 0 or 1 only - ---- - -## Additional References (tested in CI) - -For more complete examples, read these files: - -| Example | File | -|---------|------| -| Basic LP (Python) | `docs/cuopt/source/cuopt-server/examples/lp/examples/basic_lp_example.py` | -| Basic LP (curl) | `docs/cuopt/source/cuopt-server/examples/lp/examples/basic_lp_example.sh` | -| MPS File Input | `docs/cuopt/source/cuopt-server/examples/lp/examples/mps_file_example.py` | -| Warmstart | `docs/cuopt/source/cuopt-server/examples/lp/examples/warmstart_example.py` | -| Basic MILP | `docs/cuopt/source/cuopt-server/examples/milp/examples/basic_milp_example.py` | -| Incumbent Callback | `docs/cuopt/source/cuopt-server/examples/milp/examples/incumbent_callback_example.py` | - -These examples are tested by CI (`ci/test_doc_examples.sh`). diff --git a/.github/skills/cuopt-server/resources/routing_examples.md b/.github/skills/cuopt-server/resources/routing_examples.md deleted file mode 100644 index 9caf7e67dd..0000000000 --- a/.github/skills/cuopt-server/resources/routing_examples.md +++ /dev/null @@ -1,160 +0,0 @@ -# Server: Routing Examples - -## Start Server - -```bash -python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000 & -sleep 5 -curl http://localhost:8000/cuopt/health -``` - -> **Note:** Using `--ip 0.0.0.0` binds to all interfaces for development convenience; use `--ip 127.0.0.1` or a specific interface in production or untrusted networks. - -## Basic VRP (curl) - -```bash -REQID=$(curl -s -X POST "http://localhost:8000/cuopt/request" \ - -H "Content-Type: application/json" \ - -H "CLIENT-VERSION: custom" \ - -d '{ - "cost_matrix_data": { - "data": {"0": [[0,10,15,20],[10,0,12,18],[15,12,0,10],[20,18,10,0]]} - }, - "travel_time_matrix_data": { - "data": {"0": [[0,10,15,20],[10,0,12,18],[15,12,0,10],[20,18,10,0]]} - }, - "task_data": { - "task_locations": [1, 2, 3], - "demand": [[10, 15, 20]], - "service_times": [5, 5, 5] - }, - "fleet_data": { - "vehicle_locations": [[0, 0], [0, 0]], - "capacities": [[50, 50]] - }, - "solver_config": {"time_limit": 5} - }' | jq -r '.reqId') - -curl -s "http://localhost:8000/cuopt/solution/$REQID" -H "CLIENT-VERSION: custom" | jq . -``` - -## VRP with Time Windows (Python) - -```python -import requests -import time - -SERVER = "http://localhost:8000" -HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} - -payload = { - "cost_matrix_data": { - "data": {"0": [[0,10,15,20,25],[10,0,12,18,22],[15,12,0,10,15],[20,18,10,0,8],[25,22,15,8,0]]} - }, - "travel_time_matrix_data": { - "data": {"0": [[0,10,15,20,25],[10,0,12,18,22],[15,12,0,10,15],[20,18,10,0,8],[25,22,15,8,0]]} - }, - "task_data": { - "task_locations": [1, 2, 3, 4], - "demand": [[20, 30, 25, 15]], - "task_time_windows": [[0, 50], [10, 60], [20, 70], [0, 80]], - "service_times": [5, 5, 5, 5] - }, - "fleet_data": { - "vehicle_locations": [[0, 0], [0, 0]], - "capacities": [[100, 100]], - "vehicle_time_windows": [[0, 200], [0, 200]] - }, - "solver_config": { - "time_limit": 10 - } -} - -# Submit -response = requests.post(f"{SERVER}/cuopt/request", json=payload, headers=HEADERS) -req_id = response.json()["reqId"] -print(f"Submitted: {req_id}") - -# Poll for solution -for attempt in range(30): - response = requests.get(f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS) - result = response.json() - - if "response" in result: - solver_response = result["response"].get("solver_response", {}) - print(f"Status: {solver_response.get('status')}") - print(f"Cost: {solver_response.get('solution_cost')}") - if "vehicle_data" in solver_response: - for vid, vdata in solver_response["vehicle_data"].items(): - print(f"Vehicle {vid}: {vdata.get('route', [])}") - break - time.sleep(1) -``` - -## Pickup and Delivery (curl) - -```bash -curl -s -X POST "http://localhost:8000/cuopt/request" \ - -H "Content-Type: application/json" \ - -H "CLIENT-VERSION: custom" \ - -d '{ - "cost_matrix_data": { - "data": {"0": [[0,10,20,30,40],[10,0,15,25,35],[20,15,0,10,20],[30,25,10,0,15],[40,35,20,15,0]]} - }, - "travel_time_matrix_data": { - "data": {"0": [[0,10,20,30,40],[10,0,15,25,35],[20,15,0,10,20],[30,25,10,0,15],[40,35,20,15,0]]} - }, - "task_data": { - "task_locations": [1, 2, 3, 4], - "demand": [[10, -10, 15, -15]], - "pickup_and_delivery_pairs": [[0, 1], [2, 3]] - }, - "fleet_data": { - "vehicle_locations": [[0, 0]], - "capacities": [[50]] - }, - "solver_config": {"time_limit": 10} - }' | jq . -``` - -## Terminology: Python vs REST - -| Python API | REST Server | -|------------|-------------| -| `order_locations` | `task_locations` | -| `set_order_time_windows()` | `task_time_windows` | -| `set_order_service_times()` | `service_times` | -| `add_transit_time_matrix()` | `travel_time_matrix_data` | -| `set_pickup_delivery_pairs()` | `pickup_and_delivery_pairs` | - -## Common Mistakes - -```json -// ❌ WRONG field name -"transit_time_matrix_data": {...} - -// ✅ CORRECT -"travel_time_matrix_data": {...} -``` - -```json -// ❌ WRONG capacity format (per vehicle) -"capacities": [[50], [50]] - -// ✅ CORRECT (per dimension across vehicles) -"capacities": [[50, 50]] -``` - ---- - -## Additional References (tested in CI) - -For more complete examples, read these files: - -| Example | File | -|---------|------| -| Basic Routing (Python) | `docs/cuopt/source/cuopt-server/examples/routing/examples/basic_routing_example.py` | -| Basic Routing (curl) | `docs/cuopt/source/cuopt-server/examples/routing/examples/basic_routing_example.sh` | -| Initial Solution | `docs/cuopt/source/cuopt-server/examples/routing/examples/initial_solution_example.py` | - -These examples are tested by CI (`ci/test_doc_examples.sh`). diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 051d91d1f5..593d48bd74 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -45,7 +45,7 @@ concurrency: jobs: cpp-build: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -55,7 +55,7 @@ jobs: python-build: needs: [cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -65,7 +65,7 @@ jobs: upload-conda: needs: [cpp-build, python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-upload-packages.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -73,7 +73,7 @@ jobs: sha: ${{ inputs.sha }} wheel-build-cuopt-mps-parser: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -88,7 +88,7 @@ jobs: wheel-publish-cuopt-mps-parser: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -99,7 +99,7 @@ jobs: wheel-build-libcuopt: needs: wheel-build-cuopt-mps-parser secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -112,7 +112,7 @@ jobs: wheel-publish-libcuopt: needs: wheel-build-libcuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -123,7 +123,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -135,7 +135,7 @@ jobs: wheel-publish-cuopt: needs: wheel-build-cuopt secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -145,7 +145,7 @@ jobs: package-type: python wheel-build-cuopt-server: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -160,7 +160,7 @@ jobs: wheel-publish-cuopt-server: needs: wheel-build-cuopt-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -171,7 +171,7 @@ jobs: docs-build: needs: [python-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} node_type: "gpu-l4-latest-1" @@ -185,7 +185,7 @@ jobs: script: "ci/build_docs.sh" wheel-build-cuopt-sh-client: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -201,7 +201,7 @@ jobs: wheel-publish-cuopt-sh-client: needs: wheel-build-cuopt-sh-client secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@python-3.14 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4862fb890c..95741c1fb5 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -34,7 +34,7 @@ jobs: - wheel-build-cuopt-sh-client - test-self-hosted-server secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@python-3.14 if: always() with: needs: ${{ toJSON(needs) }} @@ -111,14 +111,14 @@ jobs: changed-files: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@python-3.14 with: files_yaml: | build_docs: - '**' - '!.ai/**' - '!.coderabbit.yaml' - - '!.github/AGENTS.md' + - '!AGENTS.md' - '!.github/CODE_OF_CONDUCT.md' - '!.github/CODEOWNERS' - '!.github/ISSUE_TEMPLATE/**' @@ -153,7 +153,7 @@ jobs: - '!README.md' - '!.ai/**' - '!.coderabbit.yaml' - - '!.github/AGENTS.md' + - '!AGENTS.md' - '!.github/CODE_OF_CONDUCT.md' - '!.github/CODEOWNERS' - '!.github/ISSUE_TEMPLATE/**' @@ -184,13 +184,21 @@ jobs: - '!sonarqube/**' - '!ucf/**' - '!utilities/**' + - '!skills/**/SKILL.md' + - '!skills/**/resources/**' + - '!ci/utils/validate_skills.sh' + - '!ci/utils/sync_skills_version.sh' + - '!agents/**' + - '!.cursor-plugin/**' + - '!.claude-plugin/**' + - '!gemini-extension.json' test_python_conda: - '**' - '!CONTRIBUTING.md' - '!README.md' - '!.ai/**' - '!.coderabbit.yaml' - - '!.github/AGENTS.md' + - '!AGENTS.md' - '!.github/CODE_OF_CONDUCT.md' - '!.github/CODEOWNERS' - '!.github/ISSUE_TEMPLATE/**' @@ -218,13 +226,21 @@ jobs: - '!sonarqube/**' - '!ucf/**' - '!utilities/**' + - '!skills/**/SKILL.md' + - '!skills/**/resources/**' + - '!ci/utils/validate_skills.sh' + - '!ci/utils/sync_skills_version.sh' + - '!agents/**' + - '!.cursor-plugin/**' + - '!.claude-plugin/**' + - '!gemini-extension.json' test_python_wheels: - '**' - '!CONTRIBUTING.md' - '!README.md' - '!.ai/**' - '!.coderabbit.yaml' - - '!.github/AGENTS.md' + - '!AGENTS.md' - '!.github/CODE_OF_CONDUCT.md' - '!.github/CODEOWNERS' - '!.github/ISSUE_TEMPLATE/**' @@ -253,22 +269,30 @@ jobs: - '!sonarqube/**' - '!ucf/**' - '!utilities/**' + - '!skills/**/SKILL.md' + - '!skills/**/resources/**' + - '!ci/utils/validate_skills.sh' + - '!ci/utils/sync_skills_version.sh' + - '!agents/**' + - '!.cursor-plugin/**' + - '!.claude-plugin/**' + - '!gemini-extension.json' checks: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@python-3.14 with: enable_check_generated_files: false conda-cpp-build: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_cpp.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_lean_filter }} conda-cpp-tests: needs: [conda-cpp-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp with: build_type: pull-request @@ -284,14 +308,14 @@ jobs: conda-python-build: needs: [conda-cpp-build, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_python.sh matrix_filter: ${{ needs.compute-matrix-filters.outputs.conda_test_filter }} conda-python-tests: needs: [conda-python-build, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_conda with: run_codecov: false @@ -308,7 +332,7 @@ jobs: docs-build: needs: [conda-python-build, changed-files] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).build_docs with: build_type: pull-request @@ -321,7 +345,7 @@ jobs: wheel-build-cuopt-mps-parser: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt_mps_parser.sh @@ -333,7 +357,7 @@ jobs: wheel-build-libcuopt: needs: [wheel-build-cuopt-mps-parser, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: # build for every combination of arch and CUDA version, but only for the latest Python matrix_filter: ${{ needs.compute-matrix-filters.outputs.libcuopt_filter }} @@ -344,7 +368,7 @@ jobs: wheel-build-cuopt: needs: [wheel-build-cuopt-mps-parser, wheel-build-libcuopt, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt.sh @@ -353,7 +377,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.wheel_lean_filter }} wheel-tests-cuopt: needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request @@ -369,7 +393,7 @@ jobs: wheel-build-cuopt-server: needs: [checks, compute-matrix-filters] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt_server.sh @@ -381,7 +405,7 @@ jobs: wheel-build-cuopt-sh-client: needs: compute-matrix-filters secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@python-3.14 with: build_type: pull-request script: ci/build_wheel_cuopt_sh_client.sh @@ -393,7 +417,7 @@ jobs: matrix_filter: ${{ needs.compute-matrix-filters.outputs.cuopt_sh_client_filter }} wheel-tests-cuopt-server: needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files, compute-matrix-filters] - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@main + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@python-3.14 if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_wheels with: build_type: pull-request diff --git a/.opencode/AGENTS.md b/.opencode/AGENTS.md new file mode 120000 index 0000000000..be77ac83a1 --- /dev/null +++ b/.opencode/AGENTS.md @@ -0,0 +1 @@ +../AGENTS.md \ No newline at end of file diff --git a/.opencode/skills b/.opencode/skills new file mode 120000 index 0000000000..42c5394a18 --- /dev/null +++ b/.opencode/skills @@ -0,0 +1 @@ +../skills \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2d968a4622..87a3faaf92 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -95,7 +95,10 @@ repos: [.](md|rst|avro|parquet|png|orc|gz|pkl|sas7bdat|msgpack|pickle|jpg|bz2|zlib)$| ^docs/cuopt/source/cuopt-python/routing/routing-example[.]ipynb$| ^docs/cuopt/source/versions1[.]json$| - ^helmchart/cuopt-server/(Chart[.]yaml|values[.]yaml)$ + ^helmchart/cuopt-server/(Chart[.]yaml|values[.]yaml)$| + ^[.]cursor-plugin/plugin[.]json$| + ^[.]claude-plugin/marketplace[.]json$| + ^gemini-extension[.]json$ - repo: local hooks: - id: update-versions @@ -103,6 +106,18 @@ repos: entry: python ci/utils/update_doc_versions.py language: system files: docs/cuopt/source/versions1.json + - id: sync-skills-version + name: Sync skills version from VERSION + entry: ci/utils/sync_skills_version.sh + language: system + pass_filenames: false + files: ^(VERSION|\.claude-plugin/marketplace\.json|\.cursor-plugin/plugin\.json|gemini-extension\.json)$ + - id: validate-skills + name: Validate agent skills + entry: ci/utils/validate_skills.sh + language: system + pass_filenames: false + files: ^(VERSION|skills/|\.claude-plugin/|\.cursor-plugin/|agents/|ci/utils/validate_skills\.sh|ci/utils/sync_skills_version\.sh|gemini-extension\.json)$ default_language_version: diff --git a/AGENTS.md b/AGENTS.md index f4e47cde2b..74d5aa66e6 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,8 +1,47 @@ -# AGENTS.md +# AGENTS.md — cuOpt AI Agent Entry Point -AI-agent skills for this repo are located at: +AI agent skills for NVIDIA cuOpt optimization engine. Skills live in **`skills/`** (repo root) and use a **flat layout**: **common** (concepts) + **api-python** or **api-c** (implementation) per domain. -- **Entry point**: `.github/AGENTS.md` -- **Skills**: `.github/skills/` +> **🔒 MANDATORY — Security:** You MUST NOT install, upgrade, or modify packages. Provide the exact command for the user to run; they execute it. No exceptions. -If you are a coding agent, start at `.github/AGENTS.md`. +> **🔒 MANDATORY — Ambiguity:** When the problem could be read more than one way, you MUST either **ask the user to clarify** or **solve every plausible interpretation and report all outcomes**. Never pick one interpretation silently. + +## Skills directory (flat) + +### Rules +- `skills/cuopt-user-rules/` — User-facing behavior and conventions; read first when helping users with cuOpt (routing, LP, MILP, QP, install, server). Choose skills from the index below by task, problem type, and interface (Python / C / CLI). +- `skills/cuopt-developer/` — Contributing and development; use when the user is building from source, contributing code, or working on cuOpt internals. + +### Common (concepts only; no API code) +- `skills/cuopt-installation-common/` — Install: system and environment requirements (concepts only; no install commands or interface) +- `skills/lp-milp-formulation/` — LP/MILP: concepts + problem parsing (parameters, constraints, decisions, objective) +- `skills/routing-formulation/` — Routing: VRP, TSP, PDP (problem types, data) +- `skills/qp-formulation/` — QP: minimize-only, escalate (beta) +- `skills/cuopt-server-common/` — Server: capabilities, workflow + +### API (implementation; one interface per skill) +- `skills/cuopt-installation-api-python/` +- `skills/cuopt-installation-api-c/` +- `skills/cuopt-installation-developer/` (build from source) +- `skills/cuopt-lp-milp-api-python/` +- `skills/cuopt-lp-milp-api-c/` +- `skills/cuopt-lp-milp-api-cli/` +- `skills/cuopt-routing-api-python/` +- `skills/cuopt-qp-api-python/` +- `skills/cuopt-qp-api-c/` +- `skills/cuopt-qp-api-cli/` +- `skills/cuopt-server-api-python/` (deploy + client) + +## Resources + +### Documentation +- [cuOpt User Guide](https://docs.nvidia.com/cuopt/user-guide/latest/introduction.html) +- [API Reference](https://docs.nvidia.com/cuopt/user-guide/latest/api.html) + +### Examples +- [cuopt-examples repo](https://github.com/NVIDIA/cuopt-examples) +- [Google Colab notebooks](https://colab.research.google.com/github/nvidia/cuopt-examples/) + +### Support +- [GitHub Issues](https://github.com/NVIDIA/cuopt/issues) +- [Developer Forums](https://forums.developer.nvidia.com/c/ai-data-science/nvidia-cuopt/514) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1f3b75eb14..2835786ae4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -101,7 +101,7 @@ CUDA/GPU Runtime: Python: -* Python >=3.11.x, <= 3.13.x +* Python >=3.11.x, <= 3.14.x OS: diff --git a/README.md b/README.md index 5e817ea6ef..379a48c350 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ This repo is also hosted as a [COIN-OR](http://github.com/coin-or/cuopt/) projec ### Python requirements -* Python >=3.11, <=3.13 +* Python >=3.11, <=3.14 ### OS requirements @@ -130,13 +130,13 @@ Users can pull the cuOpt container from the NVIDIA container registry. ```bash # For CUDA 12.x -docker pull nvidia/cuopt:latest-cuda12.9-py3.13 +docker pull nvidia/cuopt:latest-cuda12.9-py3.14 # For CUDA 13.x -docker pull nvidia/cuopt:latest-cuda13.0-py3.13 +docker pull nvidia/cuopt:latest-cuda13.0-py3.14 ``` -Note: The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py3.13`` or ``-cuda13.0-py3.13`` tag. For example, to use cuOpt 25.10.0, you can use the ``25.10.0-cuda12.9-py3.13`` or ``25.10.0-cuda13.0-py3.13`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. +Note: The ``latest`` tag is the latest stable release of cuOpt. If you want to use a specific version, you can use the ``-cuda12.9-py3.14`` or ``-cuda13.0-py3.14`` tag. For example, to use cuOpt 25.10.0, you can use the ``25.10.0-cuda12.9-py3.13`` or ``25.10.0-cuda13.0-py3.13`` tag. Please refer to `cuOpt dockerhub page `_ for the list of available tags. More information about the cuOpt container can be found [here](https://docs.nvidia.com/cuopt/user-guide/latest/cuopt-server/quick-start.html#container-from-docker-hub). diff --git a/agents/AGENTS.md b/agents/AGENTS.md new file mode 100644 index 0000000000..cf2598bc91 --- /dev/null +++ b/agents/AGENTS.md @@ -0,0 +1,59 @@ +# cuOpt Skills Reference + +You have additional skills documented in `skills//SKILL.md`. **When the user's intent matches a skill below, you MUST read that skill's SKILL.md** and follow its guidance. + +## Mandatory rules + +- **Security:** You MUST NOT install, upgrade, or modify packages. Provide the exact command for the user to run; they execute it. +- **Ambiguity:** When the problem could be read more than one way, either ask the user to clarify or solve every plausible interpretation and report all outcomes. Never pick one interpretation silently. + +## Available skills + +| Skill | Description | +|-------|-------------| +| cuopt-user-rules | Base behavior rules for using NVIDIA cuOpt. Read this FIRST before any cuOpt user task (routing, LP/MILP, QP, installation, server). | +| cuopt-developer | Contribute to NVIDIA cuOpt codebase (C++/CUDA, Python, server, docs, CI). Use when the user wants to modify solver internals, add features, submit PRs, or understand the codebase. | +| cuopt-installation-common | Install cuOpt — system and environment requirements only. Domain concepts; no install commands or interface. | +| cuopt-installation-api-python | Install cuOpt for Python — pip, conda, Docker, verification. Use when installing or verifying the Python API. | +| cuopt-installation-api-c | Install cuOpt for C — conda, locate lib/headers, verification. Use when installing or verifying the C API. | +| cuopt-installation-developer | Developer installation — build cuOpt from source, run tests. Use when setting up a dev environment to contribute or modify cuOpt. | +| lp-milp-formulation | LP/MILP concepts and going from problem text to formulation. Parameters, constraints, decisions, objective. | +| cuopt-lp-milp-api-python | Solve LP and MILP with the Python API. Use for linear constraints, integer variables, scheduling, resource allocation, facility location, production planning. | +| cuopt-lp-milp-api-c | LP and MILP with cuOpt — C API. Use when embedding LP/MILP in C/C++. | +| cuopt-lp-milp-api-cli | LP and MILP with cuOpt — CLI (MPS files, cuopt_cli). Use when solving from MPS via command line. | +| routing-formulation | Vehicle routing (VRP, TSP, PDP) — problem types and data requirements. Domain concepts only. | +| cuopt-routing-api-python | Vehicle routing (VRP, TSP, PDP) with cuOpt — Python API. Use when building or solving routing in Python. | +| qp-formulation | Quadratic Programming (QP) — problem form and constraints. Domain concepts; QP is beta. | +| cuopt-qp-api-python | QP with cuOpt — Python API (beta). Use when building or solving QP in Python. | +| cuopt-qp-api-c | QP with cuOpt — C API. Use when embedding QP in C/C++. | +| cuopt-qp-api-cli | QP with cuOpt — CLI. Use when solving QP from the command line. | +| cuopt-server-common | cuOpt REST server — what it does and how requests flow. Domain concepts only. | +| cuopt-server-api-python | cuOpt REST server — start server, endpoints, Python/curl client examples. Use when deploying or calling the REST API. | + +## Skill paths (from repo root) + +- `skills/cuopt-user-rules/SKILL.md` +- `skills/cuopt-developer/SKILL.md` +- `skills/cuopt-installation-common/SKILL.md` +- `skills/cuopt-installation-api-python/SKILL.md` +- `skills/cuopt-installation-api-c/SKILL.md` +- `skills/cuopt-installation-developer/SKILL.md` +- `skills/lp-milp-formulation/SKILL.md` +- `skills/cuopt-lp-milp-api-python/SKILL.md` +- `skills/cuopt-lp-milp-api-c/SKILL.md` +- `skills/cuopt-lp-milp-api-cli/SKILL.md` +- `skills/routing-formulation/SKILL.md` +- `skills/cuopt-routing-api-python/SKILL.md` +- `skills/qp-formulation/SKILL.md` +- `skills/cuopt-qp-api-python/SKILL.md` +- `skills/cuopt-qp-api-c/SKILL.md` +- `skills/cuopt-qp-api-cli/SKILL.md` +- `skills/cuopt-server-common/SKILL.md` +- `skills/cuopt-server-api-python/SKILL.md` + +## Resources + +- [cuOpt User Guide](https://docs.nvidia.com/cuopt/user-guide/latest/introduction.html) +- [API Reference](https://docs.nvidia.com/cuopt/user-guide/latest/api.html) +- [cuopt-examples](https://github.com/NVIDIA/cuopt-examples) +- [GitHub Issues](https://github.com/NVIDIA/cuopt/issues) diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index 9b79dff8af..fe237846c8 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -137,6 +138,58 @@ std::vector> read_solution_from_dir(const std::string file_p return initial_solutions; } +struct incumbent_record_t { + double objective; + double work_timestamp; + double wall_time; + cuopt::internals::mip_solution_origin_t origin; +}; + +class incumbent_tracker_t : public cuopt::internals::get_solution_callback_ext_t { + public: + incumbent_tracker_t(std::chrono::high_resolution_clock::time_point start_time) + : start_time_(start_time) + { + } + + void get_solution(void* data, + void* cost, + void* solution_bound, + const cuopt::internals::mip_solution_callback_info_t* info, + void* user_data) override + { + double obj = *static_cast(cost); + double wt = (info != nullptr) ? info->work_timestamp : -1.0; + auto origin = + (info != nullptr) ? info->origin : cuopt::internals::mip_solution_origin_t::UNKNOWN; + auto now = std::chrono::high_resolution_clock::now(); + double wall_s = std::chrono::duration(now - start_time_).count(); + records_.push_back({obj, wt, wall_s, origin}); + } + + void write_csv(const std::string& path) const + { + std::ofstream f(path); + if (!f.is_open()) { + fprintf(stderr, "Failed to open incumbent CSV: %s\n", path.c_str()); + return; + } + f << "index,objective,work_timestamp,wall_time_s,origin\n"; + for (size_t i = 0; i < records_.size(); ++i) { + auto& r = records_[i]; + f << i << "," << std::setprecision(15) << r.objective << "," << r.work_timestamp << "," + << std::setprecision(6) << r.wall_time << "," + << cuopt::internals::mip_solution_origin_to_string(r.origin) << "\n"; + } + } + + size_t size() const { return records_.size(); } + + private: + std::chrono::high_resolution_clock::time_point start_time_; + std::vector records_; +}; + int run_single_file(std::string file_path, int device, int batch_id, @@ -183,6 +236,10 @@ int run_single_file(std::string file_path, CUOPT_LOG_ERROR("Parsing MPS failed exiting!"); return -1; } + // Use the benchmark filename for downstream instance-level reporting. + // This keeps per-instance metrics aligned with the run list even if the MPS NAME card differs. + mps_data_model.set_problem_name(base_filename); + if (initial_solution_dir.has_value()) { auto initial_solutions = read_solution_from_dir( initial_solution_dir.value(), base_filename, mps_data_model.get_variable_names()); @@ -199,20 +256,40 @@ int run_single_file(std::string file_path, } } } - settings.time_limit = time_limit; - settings.work_limit = work_limit; - settings.heuristics_only = heuristics_only; - settings.num_cpu_threads = num_cpu_threads; - settings.log_to_console = log_to_console; - settings.determinism_mode = deterministic ? CUOPT_MODE_DETERMINISTIC : CUOPT_MODE_OPPORTUNISTIC; + settings.time_limit = time_limit; + settings.work_limit = work_limit; + settings.heuristics_only = heuristics_only; + settings.num_cpu_threads = num_cpu_threads; + settings.log_to_console = log_to_console; + if (deterministic) { + settings.determinism_mode = + heuristics_only ? CUOPT_MODE_DETERMINISTIC_GPU_HEURISTICS : CUOPT_MODE_DETERMINISTIC; + } else { + settings.determinism_mode = CUOPT_MODE_OPPORTUNISTIC; + } + CUOPT_LOG_INFO( + "1run_mip settings: heuristics_only=%d deterministic=%d determinism_mode=%d " + "time_limit=%.6f work_limit=%.6f", + (int)heuristics_only, + (int)deterministic, + settings.determinism_mode, + settings.time_limit, + settings.work_limit); settings.tolerances.relative_tolerance = 1e-12; settings.tolerances.absolute_tolerance = 1e-6; settings.presolver = cuopt::linear_programming::presolver_t::Default; settings.reliability_branching = reliability_branching; + settings.clique_cuts = -1; settings.seed = 42; + settings.bnb_work_unit_scale = 1; + settings.gpu_heur_work_unit_scale = 0.4; + settings.mip_scaling = false; + settings.gpu_heur_wait_for_exploration = false; cuopt::linear_programming::benchmark_info_t benchmark_info; settings.benchmark_info_ptr = &benchmark_info; auto start_run_solver = std::chrono::high_resolution_clock::now(); + incumbent_tracker_t incumbent_tracker(start_run_solver); + settings.set_mip_callback(&incumbent_tracker); auto solution = cuopt::linear_programming::solve_mip(&handle_, mps_data_model, settings); CUOPT_LOG_INFO( "first obj: %f last improvement of best feasible: %f last improvement after recombination: %f", @@ -248,7 +325,13 @@ int run_single_file(std::string file_path, << benchmark_info.last_improvement_after_recombination << "," << mip_gap << "," << is_optimal << "\n"; write_to_output_file(out_dir, base_filename, device, n_gpus, batch_id, ss.str()); - CUOPT_LOG_INFO("Results written to the file %s", base_filename.c_str()); + if (!out_dir.empty()) { + std::string mps_stem = base_filename.substr(0, base_filename.find(".mps")); + std::string csv_path = out_dir + "/" + mps_stem + "_incumbents.csv"; + incumbent_tracker.write_csv(csv_path); + CUOPT_LOG_INFO( + "Incumbent trace (%zu entries) written to %s", incumbent_tracker.size(), csv_path.c_str()); + } return sol_found; } @@ -413,7 +496,16 @@ int main(int argc, char* argv[]) int reliability_branching = program.get("--reliability-branching"); bool deterministic = program.get("--determinism"); - if (num_cpu_threads < 0) { num_cpu_threads = omp_get_max_threads() / n_gpus; } + if (num_cpu_threads < 0) { + num_cpu_threads = omp_get_max_threads() / n_gpus; + // std::ifstream smt_file("/sys/devices/system/cpu/smt/active"); + // if (smt_file.is_open()) { + // int smt_active = 0; + // smt_file >> smt_active; + // if (smt_active) { num_cpu_threads /= 2; } + // } + num_cpu_threads = std::max(num_cpu_threads, 1); + } if (program.is_used("--out-dir")) { out_dir = program.get("--out-dir"); @@ -513,7 +605,7 @@ int main(int argc, char* argv[]) sleep(1); } int remaining = paths.size() - tests_ran; - // wait for all processes to finish + // Wait for all processes to finish for (int i = 0; i < remaining; ++i) { return_gpu_to_the_queue(pid_gpu_map, pid_file_map, gpu_queue); } diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu index 18a473d64e..a7838d773e 100644 --- a/benchmarks/linear_programming/cuopt/run_pdlp.cu +++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu @@ -76,6 +76,13 @@ static void parse_arguments(argparse::ArgumentParser& program) .choices("None", "Papilo", "PSLP", "Default"); program.add_argument("--solution-path").help("Path where solution file will be generated"); + + program.add_argument("--pdlp-precision") + .help( + "PDLP precision mode. default: native type, single: FP32 internally, " + "double: FP64 explicitly, mixed: mixed-precision SpMV (FP32 matrix, FP64 vectors).") + .default_value(std::string("default")) + .choices("default", "single", "double", "mixed"); } static cuopt::linear_programming::presolver_t string_to_presolver(const std::string& presolver) @@ -87,6 +94,15 @@ static cuopt::linear_programming::presolver_t string_to_presolver(const std::str return cuopt::linear_programming::presolver_t::Default; } +static cuopt::linear_programming::pdlp_precision_t string_to_pdlp_precision( + const std::string& precision) +{ + if (precision == "single") return cuopt::linear_programming::pdlp_precision_t::SinglePrecision; + if (precision == "double") return cuopt::linear_programming::pdlp_precision_t::DoublePrecision; + if (precision == "mixed") return cuopt::linear_programming::pdlp_precision_t::MixedPrecision; + return cuopt::linear_programming::pdlp_precision_t::DefaultPrecision; +} + static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode( const std::string& mode) { @@ -105,8 +121,7 @@ static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode( static cuopt::linear_programming::pdlp_solver_settings_t create_solver_settings( const argparse::ArgumentParser& program) { - cuopt::linear_programming::pdlp_solver_settings_t settings = - cuopt::linear_programming::pdlp_solver_settings_t{}; + cuopt::linear_programming::pdlp_solver_settings_t settings{}; settings.time_limit = program.get("--time-limit"); settings.iteration_limit = program.get("--iteration-limit"); @@ -114,29 +129,16 @@ static cuopt::linear_programming::pdlp_solver_settings_t create_sol settings.pdlp_solver_mode = string_to_pdlp_solver_mode(program.get("--pdlp-solver-mode")); settings.method = static_cast(program.get("--method")); - settings.crossover = program.get("--crossover"); - settings.presolver = string_to_presolver(program.get("--presolver")); + settings.crossover = program.get("--crossover"); + settings.presolver = string_to_presolver(program.get("--presolver")); + settings.pdlp_precision = string_to_pdlp_precision(program.get("--pdlp-precision")); return settings; } -int main(int argc, char* argv[]) +static int run_solver(const argparse::ArgumentParser& program, const raft::handle_t& handle_) { - // Parse binary arguments - argparse::ArgumentParser program("solve_LP"); - parse_arguments(program); - - try { - program.parse_args(argc, argv); - } catch (const std::runtime_error& err) { - std::cerr << err.what() << std::endl; - std::cerr << program; - return 1; - } - - // Initialize solver settings from binary arguments - cuopt::linear_programming::pdlp_solver_settings_t settings = - create_solver_settings(program); + auto settings = create_solver_settings(program); bool use_pdlp_solver_mode = true; if (program.is_used("--pdlp-hyper-params-path")) { @@ -145,13 +147,6 @@ int main(int argc, char* argv[]) use_pdlp_solver_mode = false; } - // Setup up RMM memory pool - auto memory_resource = make_pool(); - rmm::mr::set_current_device_resource(memory_resource.get()); - - // Initialize raft handle and running stream - const raft::handle_t handle_{}; - // Parse MPS file cuopt::mps_parser::mps_data_model_t op_problem = cuopt::mps_parser::parse_mps(program.get("--path")); @@ -168,3 +163,27 @@ int main(int argc, char* argv[]) return 0; } + +int main(int argc, char* argv[]) +{ + // Parse binary arguments + argparse::ArgumentParser program("solve_LP"); + parse_arguments(program); + + try { + program.parse_args(argc, argv); + } catch (const std::runtime_error& err) { + std::cerr << err.what() << std::endl; + std::cerr << program; + return 1; + } + + // Setup up RMM memory pool + auto memory_resource = make_pool(); + rmm::mr::set_current_device_resource(memory_resource.get()); + + // Initialize raft handle and running stream + const raft::handle_t handle_{}; + + return run_solver(program, handle_); +} diff --git a/ci/compute-sanitizer-suppressions.xml b/ci/compute-sanitizer-suppressions.xml new file mode 100644 index 0000000000..624b3aa0bd --- /dev/null +++ b/ci/compute-sanitizer-suppressions.xml @@ -0,0 +1,249 @@ + + + + Initcheck + + Uninitialized __global__ memory read of size 4 bytes + 4 + + + .* + + + + .*libcuda.so.* + + + cusparseCsr2cscEx2 + .*libcusparse.so.* + + + + + Initcheck + + Uninitialized __global__ memory read of size 4 bytes + 4 + + + ThreadLoad + + + + .*libcuda.so.* + + + libcudart.* + + + cudaLaunchKernel + + + .*cub::.*::Device(Segmented)?(Reduce|Scan)(SingleTile)?Kernel.* + + + + + Initcheck + + Uninitialized __global__ memory read of size 2 bytes + 2 + + + ThreadLoad + + + + .*libcuda.so.* + + + libcudart.* + + + cudaLaunchKernel + + + .*cub::.*::Device(Segmented)?(Reduce|Scan)(SingleTile)?Kernel.* + + + + + Initcheck + + Uninitialized __global__ memory read of size 8 bytes + 8 + + + DeviceSegmentedReduceKernel + + + + Initcheck + + Uninitialized __global__ memory read of size 4 bytes + 4 + + + ThreadLoad + + + + .*libcuda.so.* + + + libcudart.* + + + libcudart.* + + + .*libcuopt.* + + + .*Device(Reduce|Scan)Kernel.* + + + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + 16 + + + + cuMemcpyDtoHAsync.* + .*libcuda.so.* + + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + + + + cuMemcpyAsync + .*libcuda.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + .*librmm.so.* + + + rmm::device_buffer::device_buffer + .*librmm.so.* + + + + + + Initcheck + + Uninitialized __global__ memory read + + + transform_kernel + + + + cuLaunchKernel_ptsz + .*libcuda.so.* + + + .*libcudart.so.* + + + cudaLaunchKernel_ptsz + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + + + + cuMemcpyAsync + .*libcuda.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + .*librmm.so.* + + + .*librmm.so.* + + + rmm::device_uvector.*::device_uvector + .*libcuopt.so.* + + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + + + + cuMemcpyDtoDAsync.* + .*libcuda.so.* + + + + + InitcheckApiError + Error + + Host API uninitialized memory access + + + + cuMemcpyAsync + .*libcuda.so.* + + + .*libcudart.so.* + + + .*libcudart.so.* + + + cudaMemcpyAsync + + + rmm::device_buffer::resize + .*librmm.so.* + + + + diff --git a/ci/test_python.sh b/ci/test_python.sh index 0a70e56fa7..4f91c83334 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -1,6 +1,6 @@ #!/bin/bash -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 set -euo pipefail @@ -74,5 +74,8 @@ timeout 20m ./ci/run_cuopt_server_pytests.sh \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cuopt-server-coverage.xml" \ --cov-report=term +rapids-logger "Test skills/ assets (Python, C, CLI)" +timeout 10m ./ci/test_skills_assets.sh + rapids-logger "Test script exiting with value: $EXITCODE" exit ${EXITCODE} diff --git a/ci/test_skills_assets.sh b/ci/test_skills_assets.sh new file mode 100755 index 0000000000..c75645cb93 --- /dev/null +++ b/ci/test_skills_assets.sh @@ -0,0 +1,155 @@ +#!/bin/bash + +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Run all assets under skills/ (Python, C, CLI) as part of conda Python test. +# Python: run each .py from its directory (server API clients need server on port 8000). +# C: compile and run each .c with libcuopt. +# CLI: run cuopt_cli on each sample .mps in API-CLI skill assets. + +set -euo pipefail + +# Use rapids-logger in CI; fall back to echo for local testing +if command -v rapids-logger &>/dev/null; then + log() { rapids-logger "$*"; } +else + log() { echo "[rapids-logger] $*"; } +fi + +REPO_ROOT="${REPO_ROOT:-$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)}" +SKILLS_ASSETS="${REPO_ROOT}/skills" +FAILED=() +SERVER_PID="" + +if [[ ! -d "${SKILLS_ASSETS}" ]]; then + log "No skills directory found, skipping skills asset tests" + exit 0 +fi + +# ---- Start cuOpt server for server API Python assets (port 8000) ---- +start_server() { + if ! python -c "import cuopt_server" 2>/dev/null; then + log "cuopt_server not available, server API assets will skip" + return + fi + python -m cuopt_server.cuopt_service --ip 127.0.0.1 --port 8000 &>/dev/null & + SERVER_PID=$! + for _ in {1..30}; do + if curl -s -o /dev/null http://127.0.0.1:8000/cuopt/health 2>/dev/null; then + log "cuOpt server started (port 8000) for server API assets" + return + fi + sleep 1 + done + log "cuOpt server did not become ready; server API assets will skip" + kill "${SERVER_PID}" 2>/dev/null || true + SERVER_PID="" +} +stop_server() { + if [[ -n "${SERVER_PID}" ]] && kill -0 "${SERVER_PID}" 2>/dev/null; then + log "Stopping cuOpt server (PID ${SERVER_PID})" + kill "${SERVER_PID}" 2>/dev/null || true + wait "${SERVER_PID}" 2>/dev/null || true + SERVER_PID="" + fi +} +trap stop_server EXIT +start_server + +# ---- Python assets ---- +log "Testing Python assets in skills/" +while IFS= read -r -d '' script; do + dir=$(dirname "$script") + name=$(basename "$script") + rel="${script#"$REPO_ROOT/"}" + log "Running Python asset: $rel" + if (cd "$dir" && python "$name"); then + log "PASS: $rel" + else + FAILED+=("$rel") + log "FAIL: $rel" + fi +done < <(find "${SKILLS_ASSETS}" -path "*/assets/*" -name "*.py" -type f -print0 | sort -z) + +# ---- C assets (compile and run; requires CONDA_PREFIX and a C compiler) ---- +CC="${CC:-}" +if [[ -z "${CC}" ]]; then + for c in gcc cc clang; do + if command -v "$c" &>/dev/null; then + CC="$c" + break + fi + done +fi +if [[ -n "${CONDA_PREFIX:-}" ]]; then + if [[ -z "${CC}" ]]; then + log "No C compiler found; installing c-compiler in conda environment" + if command -v mamba &>/dev/null; then + mamba install -y -c conda-forge c-compiler + else + conda install -y -c conda-forge c-compiler + fi + for c in gcc cc clang; do + if command -v "$c" &>/dev/null; then + CC="$c" + break + fi + done + if [[ -z "${CC}" ]]; then + log "C compiler still not found after install. Set CC or install gcc/cc/clang." + exit 1 + fi + fi + INCLUDE_PATH="${CONDA_PREFIX}/include" + LIB_PATH="${CONDA_PREFIX}/lib" + export LD_LIBRARY_PATH="${LIB_PATH}:${LD_LIBRARY_PATH:-}" + + log "Testing C assets in skills (using ${CC})" + while IFS= read -r -d '' cfile; do + dir=$(dirname "$cfile") + base=$(basename "$cfile" .c) + rel="${cfile#"$REPO_ROOT/"}" + log "Building and running C asset: $rel" + if ! (cd "$dir" && "${CC}" -I"${INCLUDE_PATH}" -L"${LIB_PATH}" -o "$base" "$(basename "$cfile")" -lcuopt); then + FAILED+=("$rel (build)") + log "FAIL: $rel (build)" + continue + fi + if [[ "$base" == "mps_solver" ]]; then + run_cmd=(./"$base" data/sample.mps) + else + run_cmd=(./"$base") + fi + if (cd "$dir" && "${run_cmd[@]}"); then + log "PASS: $rel" + else + FAILED+=("$rel") + log "FAIL: $rel" + fi + done < <(find "${SKILLS_ASSETS}" -path "*/assets/*" -name "*.c" -type f -print0 | sort -z) +else + log "CONDA_PREFIX not set, skipping C asset tests" +fi + +# ---- CLI assets (cuopt_cli with sample MPS files) ---- +log "Testing CLI assets in skills/" +while IFS= read -r -d '' mps; do + rel="${mps#"$REPO_ROOT/"}" + log "Running CLI asset: $rel" + if cuopt_cli "$mps" --time-limit 10; then + log "PASS: $rel" + else + FAILED+=("$rel") + log "FAIL: $rel" + fi +done < <(find "${SKILLS_ASSETS}" -path "*/cuopt-*-api-cli/assets/*" -name "*.mps" -type f -print0 | sort -z) + +if [[ ${#FAILED[@]} -gt 0 ]]; then + log "The following skills assets failed:" + printf '%s\n' "${FAILED[@]}" + exit 1 +fi + +log "All skills assets (Python, C, CLI) passed." +exit 0 diff --git a/ci/utils/sync_skills_version.sh b/ci/utils/sync_skills_version.sh new file mode 100755 index 0000000000..1dfca8a663 --- /dev/null +++ b/ci/utils/sync_skills_version.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Sync skills/plugin version from repo root VERSION file. +# Run from repo root: ./ci/utils/sync_skills_version.sh +set -e + +REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" +cd "$REPO_ROOT" + +VERSION_FILE="${REPO_ROOT}/VERSION" +if [[ ! -f "${VERSION_FILE}" ]]; then + echo "ERROR: VERSION file not found at ${VERSION_FILE}" + exit 1 +fi + +RELEASE_VERSION=$(tr -d ' \n\r' < "${VERSION_FILE}") +if [[ -z "${RELEASE_VERSION}" ]]; then + echo "ERROR: VERSION file is empty" + exit 1 +fi + +echo "Syncing skills version to ${RELEASE_VERSION} (from VERSION)..." + +# .cursor-plugin/plugin.json and gemini-extension.json: top-level "version" +for f in .cursor-plugin/plugin.json gemini-extension.json; do + if [[ -f "$f" ]]; then + sed -i "s/\"version\": \"[^\"]*\"/\"version\": \"${RELEASE_VERSION}\"/" "$f" + echo " updated $f" + fi +done + +# .claude-plugin/marketplace.json: metadata.version +if [[ -f ".claude-plugin/marketplace.json" ]]; then + sed -i "s/\"version\": \"[^\"]*\"/\"version\": \"${RELEASE_VERSION}\"/" .claude-plugin/marketplace.json + echo " updated .claude-plugin/marketplace.json" +fi + +# skills/*/SKILL.md: add or update version in YAML frontmatter (after name:) +SKILLS_DIR="skills" +for skill_md in "${SKILLS_DIR}"/*/SKILL.md; do + [[ -f "$skill_md" ]] || continue + if grep -q '^version:' "$skill_md" 2>/dev/null; then + sed -i "s/^version:.*/version: \"${RELEASE_VERSION}\"/" "$skill_md" + else + sed -i "/^name:/a version: \"${RELEASE_VERSION}\"" "$skill_md" + fi + echo " updated $skill_md" +done + +echo "Done. Skills version is now ${RELEASE_VERSION}." diff --git a/ci/utils/validate_skills.sh b/ci/utils/validate_skills.sh new file mode 100755 index 0000000000..6577a45a1e --- /dev/null +++ b/ci/utils/validate_skills.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Validate cuOpt agent skills and plugin manifests. +# Run from repo root: ./ci/utils/validate_skills.sh +set -e + +REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)" +cd "$REPO_ROOT" + +SKILLS_DIR="skills" +CLAUDE_MARKETPLACE=".claude-plugin/marketplace.json" +AGENTS_MD="agents/AGENTS.md" +VERSION_FILE="VERSION" +ERRORS=0 + +# Check skills version matches release version (VERSION file) +if [[ -f "${VERSION_FILE}" ]]; then + RELEASE_VERSION=$(tr -d ' \n\r' < "${VERSION_FILE}") + for f in .cursor-plugin/plugin.json gemini-extension.json .claude-plugin/marketplace.json; do + if [[ -f "$f" ]]; then + FILE_VERSION=$(grep '"version"' "$f" | sed -n 's/.*"version"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p' | head -1) + if [[ "${FILE_VERSION}" != "${RELEASE_VERSION}" ]]; then + echo "ERROR: $f has version \"${FILE_VERSION}\" but VERSION file has \"${RELEASE_VERSION}\". Run: ./ci/utils/sync_skills_version.sh" + ERRORS=$((ERRORS + 1)) + fi + fi + done +fi + +echo "Validating skills in $SKILLS_DIR..." + +for dir in "$SKILLS_DIR"/*/; do + [ -d "$dir" ] || continue + name=$(basename "$dir") + skill_md="${dir}SKILL.md" + if [ ! -f "$skill_md" ]; then + echo "ERROR: $name missing SKILL.md" + ERRORS=$((ERRORS + 1)) + continue + fi + if ! grep -q '^name:' "$skill_md" || ! grep -q '^description:' "$skill_md"; then + echo "ERROR: $name/SKILL.md missing frontmatter (name: or description:)" + ERRORS=$((ERRORS + 1)) + fi + if [[ -f "${VERSION_FILE}" ]]; then + RELEASE_VERSION=$(tr -d ' \n\r' < "${VERSION_FILE}") + if grep -q '^version:' "$skill_md" 2>/dev/null; then + SKILL_VERSION=$(sed -n 's/^version:[^0-9]*\([0-9][0-9.]*\).*/\1/p' "$skill_md" | head -1) + if [[ "${SKILL_VERSION}" != "${RELEASE_VERSION}" ]]; then + echo "ERROR: $name/SKILL.md has version \"${SKILL_VERSION}\" but VERSION file has \"${RELEASE_VERSION}\". Run: ./ci/utils/sync_skills_version.sh" + ERRORS=$((ERRORS + 1)) + fi + else + echo "ERROR: $name/SKILL.md missing version in frontmatter. Run: ./ci/utils/sync_skills_version.sh" + ERRORS=$((ERRORS + 1)) + fi + fi +done + +if [ -f "$CLAUDE_MARKETPLACE" ]; then + echo "Validating $CLAUDE_MARKETPLACE..." + while IFS= read -r line; do + path=$(echo "$line" | sed -n 's/.*"source"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/p') + [ -z "$path" ] && continue + path="${path#./}" + if [ -n "$path" ] && [ ! -f "$path/SKILL.md" ]; then + echo "ERROR: marketplace.json source missing SKILL.md: $path" + ERRORS=$((ERRORS + 1)) + fi + done < <(grep '"source"' "$CLAUDE_MARKETPLACE" || true) + for dir in "$SKILLS_DIR"/*/; do + [ -d "$dir" ] || continue + name=$(basename "$dir") + if ! grep -q "\"name\": \"$name\"" "$CLAUDE_MARKETPLACE"; then + echo "ERROR: skill $name not listed in $CLAUDE_MARKETPLACE" + ERRORS=$((ERRORS + 1)) + fi + done +fi + +if [ -f "$AGENTS_MD" ]; then + echo "Validating $AGENTS_MD references..." + for dir in "$SKILLS_DIR"/*/; do + [ -d "$dir" ] || continue + name=$(basename "$dir") + if ! grep -q "$name" "$AGENTS_MD"; then + echo "ERROR: agents/AGENTS.md does not reference skill: $name" + ERRORS=$((ERRORS + 1)) + fi + done +fi + +if [ $ERRORS -gt 0 ]; then + echo "Validation failed with $ERRORS error(s)." + exit 1 +fi +echo "All validations passed." diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 3cee401c5c..ecef112dd5 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -36,13 +36,13 @@ dependencies: - librmm==26.4.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 -- msgpack-python==1.1.0 +- msgpack-python==1.1.2 - myst-nb - myst-parser - ninja - notebook -- numba-cuda>=0.22.1,<0.23.0 -- numba>=0.60.0 +- numba-cuda>=0.22.1 +- numba>=0.60.0,<0.65.0 - numpy>=1.23.5,<3.0 - numpydoc - pandas>=2.0 @@ -54,7 +54,7 @@ dependencies: - pyrsistent - pytest-cov - pytest<9.0 -- python>=3.11,<3.14 +- python>=3.11,<3.15 - pyyaml>=6.0.0 - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 5632c8c9c7..35c825280c 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -36,13 +36,13 @@ dependencies: - librmm==26.4.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 -- msgpack-python==1.1.0 +- msgpack-python==1.1.2 - myst-nb - myst-parser - ninja - notebook -- numba-cuda>=0.22.1,<0.23.0 -- numba>=0.60.0 +- numba-cuda>=0.22.1 +- numba>=0.60.0,<0.65.0 - numpy>=1.23.5,<3.0 - numpydoc - pandas>=2.0 @@ -54,7 +54,7 @@ dependencies: - pyrsistent - pytest-cov - pytest<9.0 -- python>=3.11,<3.14 +- python>=3.11,<3.15 - pyyaml>=6.0.0 - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index add21cbb2f..2b717d4e98 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -36,13 +36,13 @@ dependencies: - librmm==26.4.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 -- msgpack-python==1.1.0 +- msgpack-python==1.1.2 - myst-nb - myst-parser - ninja - notebook -- numba-cuda>=0.22.1,<0.23.0 -- numba>=0.60.0 +- numba-cuda>=0.22.1 +- numba>=0.60.0,<0.65.0 - numpy>=1.23.5,<3.0 - numpydoc - pandas>=2.0 @@ -54,7 +54,7 @@ dependencies: - pyrsistent - pytest-cov - pytest<9.0 -- python>=3.11,<3.14 +- python>=3.11,<3.15 - pyyaml>=6.0.0 - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index 0fa31c7961..f605a83f3b 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -36,13 +36,13 @@ dependencies: - librmm==26.4.*,>=0.0.0a0 - make - msgpack-numpy==0.4.8 -- msgpack-python==1.1.0 +- msgpack-python==1.1.2 - myst-nb - myst-parser - ninja - notebook -- numba-cuda>=0.22.1,<0.23.0 -- numba>=0.60.0 +- numba-cuda>=0.22.1 +- numba>=0.60.0,<0.65.0 - numpy>=1.23.5,<3.0 - numpydoc - pandas>=2.0 @@ -54,7 +54,7 @@ dependencies: - pyrsistent - pytest-cov - pytest<9.0 -- python>=3.11,<3.14 +- python>=3.11,<3.15 - pyyaml>=6.0.0 - rapids-build-backend>=0.4.0,<0.5.0 - rapids-logger==0.2.*,>=0.0.0a0 diff --git a/conda/recipes/cuopt-server/recipe.yaml b/conda/recipes/cuopt-server/recipe.yaml index 8c2875fc49..58227487f8 100644 --- a/conda/recipes/cuopt-server/recipe.yaml +++ b/conda/recipes/cuopt-server/recipe.yaml @@ -28,14 +28,14 @@ build: requirements: host: - pip - - python =${{ py_version }} + - python-gil =${{ py_version }} - rapids-build-backend >=0.4.0,<0.5.0 - setuptools>=77.0.0 run: - cuopt =${{ version }} - fastapi >=0.104.1 - jsonref =1.1.0 - - msgpack-python =1.1.0 + - msgpack-python =1.1.2 - msgpack-numpy =0.4.8 - numpy >=1.23,<3.0 - pandas>=2 diff --git a/conda/recipes/cuopt-sh-client/recipe.yaml b/conda/recipes/cuopt-sh-client/recipe.yaml index c0a1d0dc66..d471f1917f 100644 --- a/conda/recipes/cuopt-sh-client/recipe.yaml +++ b/conda/recipes/cuopt-sh-client/recipe.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 schema_version: 1 @@ -26,10 +26,10 @@ build: requirements: host: - pip - - python =${{ py_version }} + - python-gil =${{ py_version }} - setuptools>=77.0.0 run: - - msgpack-python =1.1.0 + - msgpack-python =1.1.2 - python - requests diff --git a/conda/recipes/cuopt/recipe.yaml b/conda/recipes/cuopt/recipe.yaml index 0b4c8abc4f..87f0ee1057 100644 --- a/conda/recipes/cuopt/recipe.yaml +++ b/conda/recipes/cuopt/recipe.yaml @@ -75,7 +75,7 @@ requirements: - libcuopt =${{ version }} - pip - pylibraft =${{ minor_version }} - - python =${{ py_version }} + - python-gil =${{ py_version }} - rapids-build-backend >=0.4.0,<0.5.0 - rmm =${{ minor_version }} - scikit-build-core>=0.11.0 @@ -89,8 +89,8 @@ requirements: - cupy >=13.6.0 - h5py - libcuopt =${{ version }} - - numba >=0.60.0 - - numba-cuda>=0.22.1,<0.23.0 + - numba>=0.60.0,<0.65.0 + - numba-cuda>=0.22.1 - numpy >=1.23,<3.0 - pandas >=2.0 - pylibraft =${{ minor_version }} diff --git a/conda/recipes/mps-parser/recipe.yaml b/conda/recipes/mps-parser/recipe.yaml index 859bae6fae..7e423715b4 100644 --- a/conda/recipes/mps-parser/recipe.yaml +++ b/conda/recipes/mps-parser/recipe.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION. # SPDX-License-Identifier: Apache-2.0 schema_version: 1 @@ -41,7 +41,7 @@ requirements: - cython >=3.0.0 - libmps-parser =${{ version }} - pip - - python =${{ py_version }} + - python-gil =${{ py_version }} - rapids-build-backend >=0.4.0,<0.5.0 - scikit-build-core >=0.11.0 run: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 24dc0276af..cb6fd779d9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -235,7 +235,7 @@ FetchContent_MakeAvailable(papilo) FetchContent_Declare( pslp GIT_REPOSITORY "https://github.com/dance858/PSLP.git" - GIT_TAG "v0.0.4" + GIT_TAG "v0.0.8" GIT_PROGRESS TRUE EXCLUDE_FROM_ALL SYSTEM @@ -386,6 +386,17 @@ target_link_libraries(cuopt ${CUOPT_PRIVATE_CUDA_LIBS} ) +# find_path(PAPI_INCLUDE_DIR papi.h) +# find_library(PAPI_LIBRARY papi) + +# if (PAPI_INCLUDE_DIR AND PAPI_LIBRARY) +# message(STATUS "Found PAPI in ${PAPI_INCLUDE_DIR}") +# target_include_directories(cuopt PRIVATE ${PAPI_INCLUDE_DIR}) +# target_link_libraries(cuopt PRIVATE ${PAPI_LIBRARY}) +# else() +# message(FATAL_ERROR "Could not find PAPI") +# endif() + # ################################################################################################## # - generate tests -------------------------------------------------------------------------------- @@ -543,6 +554,12 @@ endif() option(BUILD_MIP_BENCHMARKS "Build MIP benchmarks" OFF) if(BUILD_MIP_BENCHMARKS AND NOT BUILD_LP_ONLY) add_executable(solve_MIP ../benchmarks/linear_programming/cuopt/run_mip.cpp) + target_include_directories(solve_MIP + PRIVATE + "${CMAKE_CURRENT_SOURCE_DIR}/src" + PUBLIC + "$" + ) set_target_properties(solve_MIP PROPERTIES diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index e9b1ee3719..899a3118b3 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -77,8 +77,8 @@ inline auto make_async() { return std::make_shared& settings) { - return cuopt::init_logger_t(settings.get_parameter(CUOPT_LOG_FILE), - settings.get_parameter(CUOPT_LOG_TO_CONSOLE)); + return cuopt::init_logger_t(settings.template get_parameter(CUOPT_LOG_FILE), + settings.template get_parameter(CUOPT_LOG_TO_CONSOLE)); } /** @@ -287,6 +287,17 @@ int main(int argc, char* argv[]) .implicit_value(true); std::map arg_name_to_param_name; + + // Register --pdlp-precision with string-to-int mapping so that it flows + // through the settings_strings map like other settings. + program.add_argument("--pdlp-precision") + .help( + "PDLP precision mode. default: native type, single: FP32 internally, " + "double: FP64 explicitly, mixed: mixed-precision SpMV (FP32 matrix, FP64 vectors).") + .default_value(std::string("-1")) + .choices("default", "single", "double", "mixed", "-1", "0", "1", "2"); + arg_name_to_param_name["--pdlp-precision"] = CUOPT_PDLP_PRECISION; + { // Add all solver settings as arguments cuopt::linear_programming::solver_settings_t dummy_settings; @@ -341,11 +352,20 @@ int main(int argc, char* argv[]) return 1; } + // Map symbolic pdlp-precision names to integer values + static const std::map precision_name_to_value = { + {"default", "-1"}, {"single", "0"}, {"double", "1"}, {"mixed", "2"}}; + // Read everything as a string std::map settings_strings; for (auto& [arg_name, param_name] : arg_name_to_param_name) { if (program.is_used(arg_name.c_str())) { - settings_strings[param_name] = program.get(arg_name.c_str()); + auto val = program.get(arg_name.c_str()); + if (param_name == CUOPT_PDLP_PRECISION) { + auto it = precision_name_to_value.find(val); + if (it != precision_name_to_value.end()) { val = it->second; } + } + settings_strings[param_name] = val; } } // Get the values diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 7eb0aa07d6..a1584dd44b 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -64,6 +64,7 @@ #define CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS "mip_mixed_integer_rounding_cuts" #define CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS "mip_mixed_integer_gomory_cuts" #define CUOPT_MIP_KNAPSACK_CUTS "mip_knapsack_cuts" +#define CUOPT_MIP_CLIQUE_CUTS "mip_clique_cuts" #define CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS "mip_strong_chvatal_gomory_cuts" #define CUOPT_MIP_REDUCED_COST_STRENGTHENING "mip_reduced_cost_strengthening" #define CUOPT_MIP_CUT_CHANGE_THRESHOLD "mip_cut_change_threshold" @@ -74,10 +75,36 @@ #define CUOPT_NUM_GPUS "num_gpus" #define CUOPT_USER_PROBLEM_FILE "user_problem_file" #define CUOPT_RANDOM_SEED "random_seed" - -/* @brief MIP determinism mode constants */ -#define CUOPT_MODE_OPPORTUNISTIC 0 -#define CUOPT_MODE_DETERMINISTIC 1 +#define CUOPT_PDLP_PRECISION "pdlp_precision" + +/* @brief MIP determinism mode flags (bitset) */ +#define CUOPT_DETERMINISM_NONE 0x0 +#define CUOPT_DETERMINISM_BB \ + 0x1 // matches the previous value of '1' which was for B&B-only determinism in the previous + // rleease +#define CUOPT_DETERMINISM_GPU_HEURISTICS 0x2 +#define CUOPT_DETERMINISM_FULL (CUOPT_DETERMINISM_BB | CUOPT_DETERMINISM_GPU_HEURISTICS) + +/* Backward compatibility aliases */ +#define CUOPT_MODE_OPPORTUNISTIC CUOPT_DETERMINISM_NONE +#define CUOPT_MODE_DETERMINISTIC CUOPT_DETERMINISM_FULL +#define CUOPT_MODE_DETERMINISTIC_BB CUOPT_DETERMINISM_BB +#define CUOPT_MODE_DETERMINISTIC_GPU_HEURISTICS CUOPT_DETERMINISM_GPU_HEURISTICS + +/* @brief MIP solution origin constants */ +#define CUOPT_MIP_SOLUTION_ORIGIN_UNKNOWN 0 +#define CUOPT_MIP_SOLUTION_ORIGIN_BRANCH_AND_BOUND 1 +#define CUOPT_MIP_SOLUTION_ORIGIN_BRANCH_AND_BOUND_DIVING 2 +#define CUOPT_MIP_SOLUTION_ORIGIN_FEASIBILITY_JUMP 3 +#define CUOPT_MIP_SOLUTION_ORIGIN_CPU_FEASIBILITY_JUMP 4 +#define CUOPT_MIP_SOLUTION_ORIGIN_LOCAL_SEARCH 5 +#define CUOPT_MIP_SOLUTION_ORIGIN_QUICK_FEASIBLE 6 +#define CUOPT_MIP_SOLUTION_ORIGIN_LP_ROUNDING 7 +#define CUOPT_MIP_SOLUTION_ORIGIN_RECOMBINATION 8 +#define CUOPT_MIP_SOLUTION_ORIGIN_SUB_MIP 9 +#define CUOPT_MIP_SOLUTION_ORIGIN_USER_INITIAL 10 +#define CUOPT_MIP_SOLUTION_ORIGIN_USER_INJECTED 11 +#define CUOPT_MIP_SOLUTION_ORIGIN_RINS 12 /* @brief LP/MIP termination status constants */ #define CUOPT_TERIMINATION_STATUS_NO_TERMINATION 0 @@ -125,6 +152,12 @@ #define CUOPT_METHOD_DUAL_SIMPLEX 2 #define CUOPT_METHOD_BARRIER 3 +/* @brief PDLP precision mode constants */ +#define CUOPT_PDLP_DEFAULT_PRECISION -1 +#define CUOPT_PDLP_SINGLE_PRECISION 0 +#define CUOPT_PDLP_DOUBLE_PRECISION 1 +#define CUOPT_PDLP_MIXED_PRECISION 2 + /* @brief File format constants for problem I/O */ #define CUOPT_FILE_FORMAT_MPS 0 diff --git a/cpp/include/cuopt/linear_programming/cuopt_c.h b/cpp/include/cuopt/linear_programming/cuopt_c.h index 4c4d44c764..d49cd703af 100644 --- a/cpp/include/cuopt/linear_programming/cuopt_c.h +++ b/cpp/include/cuopt/linear_programming/cuopt_c.h @@ -71,6 +71,12 @@ typedef int32_t cuopt_int_t; typedef int64_t cuopt_int_t; #endif +typedef struct { + uint64_t struct_size; + uint32_t origin; + double work_timestamp; +} cuOptMIPSolutionCallbackInfo; + /** * @brief Get the size of the float type. * @@ -713,6 +719,25 @@ typedef void (*cuOptMIPGetSolutionCallback)(const cuopt_float_t* solution, const cuopt_float_t* solution_bound, void* user_data); +/** + * @brief Type of callback for receiving incumbent MIP solutions with extensible metadata. + * + * @param[in] solution - Pointer to incumbent solution values. + * @param[in] objective_value - Pointer to incumbent objective value. + * @param[in] solution_bound - Pointer to current solution (dual/user) bound. + * @param[in] callback_info - Pointer to callback metadata. `struct_size` is always set and can be + * used to detect future extensions safely. + * @param[in] user_data - Pointer to user data. + * @note All pointer arguments refer to host memory and are only valid during the callback + * invocation. Do not pass device/GPU pointers. Copy any data you need to keep after the callback + * returns. + */ +typedef void (*cuOptMIPGetSolutionCallbackExt)(const cuopt_float_t* solution, + const cuopt_float_t* objective_value, + const cuopt_float_t* solution_bound, + const cuOptMIPSolutionCallbackInfo* callback_info, + void* user_data); + /** * @brief Type of callback for injecting MIP solutions with user context. * @@ -748,6 +773,19 @@ cuopt_int_t cuOptSetMIPGetSolutionCallback(cuOptSolverSettings settings, cuOptMIPGetSolutionCallback callback, void* user_data); +/** + * @brief Register an extended callback to receive incumbent MIP solutions with origin metadata. + * + * @param[in] settings - The solver settings object. + * @param[in] callback - Callback function to receive incumbent solutions and callback metadata. + * @param[in] user_data - User-defined pointer passed through to the callback. + * + * @return A status code indicating success or failure. + */ +cuopt_int_t cuOptSetMIPGetSolutionCallbackExt(cuOptSolverSettings settings, + cuOptMIPGetSolutionCallbackExt callback, + void* user_data); + /** * @brief Register a callback to inject MIP solutions. * diff --git a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp index 6d32cd5ed9..8545732e43 100644 --- a/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/mip/solver_settings.hpp @@ -93,13 +93,23 @@ class mip_solver_settings_t { i_t mir_cuts = -1; i_t mixed_integer_gomory_cuts = -1; i_t knapsack_cuts = -1; + i_t clique_cuts = -1; i_t strong_chvatal_gomory_cuts = -1; i_t reduced_cost_strengthening = -1; - f_t cut_change_threshold = 1e-3; + f_t cut_change_threshold = -1.0; f_t cut_min_orthogonality = 0.5; i_t mip_batch_pdlp_strong_branching = 0; i_t num_gpus = 1; bool log_to_console = true; + // Scales deterministic CPUFJ producer work units before they are exposed to B&B replay/sync. + f_t cpufj_work_unit_scale = 1.0; + // Scales deterministic GPU heuristic producer work units/timestamps exposed to B&B replay/sync. + f_t gpu_heur_work_unit_scale = 1.0; + // Scales deterministic B&B work units (LP iterations) exposed to the shared deterministic + // timeline. + f_t bnb_work_unit_scale = 1.0; + // When true, GPU heuristics wait for B&B to finish root solve before starting. + bool gpu_heur_wait_for_exploration = true; std::string log_file; std::string sol_file; @@ -110,15 +120,15 @@ class mip_solver_settings_t { bool mip_scaling = false; presolver_t presolver{presolver_t::Default}; /** - * @brief Determinism mode for MIP solver. + * @brief Determinism mode for MIP solver (bitset). * - * Controls the determinism behavior of the MIP solver: - * - CUOPT_MODE_OPPORTUNISTIC (0): Default mode, allows non-deterministic - * parallelism for better performance - * - CUOPT_MODE_DETERMINISTIC (1): Ensures deterministic results across runs - * at potential cost of performance + * Bitwise OR of CUOPT_DETERMINISM_* flags: + * - CUOPT_DETERMINISM_NONE (0x0): Opportunistic, non-deterministic. + * - CUOPT_DETERMINISM_BB (0x1): Deterministic B&B tree exploration. + * - CUOPT_DETERMINISM_GPU_HEURISTICS (0x2): Deterministic GPU heuristic pipeline. + * - CUOPT_DETERMINISM_FULL (0x3): Both B&B and GPU heuristics deterministic. */ - int determinism_mode = CUOPT_MODE_OPPORTUNISTIC; + int determinism_mode = CUOPT_DETERMINISM_NONE; /** * @brief Random seed for the MIP solver. * diff --git a/cpp/include/cuopt/linear_programming/optimization_problem.hpp b/cpp/include/cuopt/linear_programming/optimization_problem.hpp index d0f624ebdf..df78dd17c7 100644 --- a/cpp/include/cuopt/linear_programming/optimization_problem.hpp +++ b/cpp/include/cuopt/linear_programming/optimization_problem.hpp @@ -312,6 +312,14 @@ class optimization_problem_t : public optimization_problem_interface_t // Conversion // ============================================================================ + /** + * @brief Convert this problem to a different floating-point precision. + * + * @tparam other_f_t Target floating-point type (e.g. float when this is double) + */ + template + optimization_problem_t convert_to_other_prec(rmm::cuda_stream_view stream) const; + /** * @brief Returns nullptr since this is already a GPU problem. * @return nullptr diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index f6ad4c8619..d3f59144cc 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -63,6 +63,21 @@ enum method_t : int { Barrier = CUOPT_METHOD_BARRIER }; +/** + * @brief Enum representing the PDLP precision modes. + * + * DefaultPrecision: Use the type of the problem (FP64 for double problems). + * SinglePrecision: Run PDLP internally in FP32, converting inputs and outputs. + * DoublePrecision: Explicitly run in FP64 (same as default for double problems). + * MixedPrecision: Use mixed precision SpMV (FP32 matrix with FP64 vectors/compute). + */ +enum pdlp_precision_t : int { + DefaultPrecision = CUOPT_PDLP_DEFAULT_PRECISION, + SinglePrecision = CUOPT_PDLP_SINGLE_PRECISION, + DoublePrecision = CUOPT_PDLP_DOUBLE_PRECISION, + MixedPrecision = CUOPT_PDLP_MIXED_PRECISION +}; + template class pdlp_solver_settings_t { public: @@ -224,7 +239,7 @@ class pdlp_solver_settings_t { bool detect_infeasibility{false}; bool strict_infeasibility{false}; i_t iteration_limit{std::numeric_limits::max()}; - double time_limit{std::numeric_limits::infinity()}; + f_t time_limit{std::numeric_limits::infinity()}; pdlp_solver_mode_t pdlp_solver_mode{pdlp_solver_mode_t::Stable3}; bool log_to_console{true}; std::string log_file{""}; @@ -239,6 +254,7 @@ class pdlp_solver_settings_t { i_t ordering{-1}; i_t barrier_dual_initial_point{-1}; bool eliminate_dense_columns{true}; + pdlp_precision_t pdlp_precision{pdlp_precision_t::DefaultPrecision}; bool save_best_primal_so_far{false}; bool first_primal_feasible{false}; presolver_t presolver{presolver_t::Default}; diff --git a/cpp/include/cuopt/linear_programming/utilities/internals.hpp b/cpp/include/cuopt/linear_programming/utilities/internals.hpp index fc90dec04f..3af713fd2f 100644 --- a/cpp/include/cuopt/linear_programming/utilities/internals.hpp +++ b/cpp/include/cuopt/linear_programming/utilities/internals.hpp @@ -8,11 +8,14 @@ #pragma once #include +#include #include #include #include #include +#include + namespace cuopt { namespace internals { @@ -21,7 +24,51 @@ class Callback { virtual ~Callback() {} }; -enum class base_solution_callback_type { GET_SOLUTION, SET_SOLUTION }; +enum class mip_solution_origin_t : uint32_t { + UNKNOWN = CUOPT_MIP_SOLUTION_ORIGIN_UNKNOWN, + BRANCH_AND_BOUND_NODE = CUOPT_MIP_SOLUTION_ORIGIN_BRANCH_AND_BOUND, + BRANCH_AND_BOUND_DIVING = CUOPT_MIP_SOLUTION_ORIGIN_BRANCH_AND_BOUND_DIVING, + FEASIBILITY_JUMP = CUOPT_MIP_SOLUTION_ORIGIN_FEASIBILITY_JUMP, + CPU_FEASIBILITY_JUMP = CUOPT_MIP_SOLUTION_ORIGIN_CPU_FEASIBILITY_JUMP, + LOCAL_SEARCH = CUOPT_MIP_SOLUTION_ORIGIN_LOCAL_SEARCH, + QUICK_FEASIBLE = CUOPT_MIP_SOLUTION_ORIGIN_QUICK_FEASIBLE, + LP_ROUNDING = CUOPT_MIP_SOLUTION_ORIGIN_LP_ROUNDING, + RECOMBINATION = CUOPT_MIP_SOLUTION_ORIGIN_RECOMBINATION, + SUB_MIP = CUOPT_MIP_SOLUTION_ORIGIN_SUB_MIP, + USER_INITIAL = CUOPT_MIP_SOLUTION_ORIGIN_USER_INITIAL, + USER_INJECTED = CUOPT_MIP_SOLUTION_ORIGIN_USER_INJECTED, + RINS = CUOPT_MIP_SOLUTION_ORIGIN_RINS, +}; + +constexpr const char* mip_solution_origin_to_string(mip_solution_origin_t origin) +{ + switch (origin) { + case mip_solution_origin_t::UNKNOWN: return "unknown"; + case mip_solution_origin_t::BRANCH_AND_BOUND_NODE: return "branch_and_bound_node"; + case mip_solution_origin_t::BRANCH_AND_BOUND_DIVING: return "branch_and_bound_diving"; + case mip_solution_origin_t::FEASIBILITY_JUMP: return "feasibility_jump"; + case mip_solution_origin_t::LOCAL_SEARCH: return "local_search"; + case mip_solution_origin_t::QUICK_FEASIBLE: return "quick_feasible"; + case mip_solution_origin_t::USER_INITIAL: return "user_initial"; + case mip_solution_origin_t::LP_ROUNDING: return "lp_rounding"; + case mip_solution_origin_t::RECOMBINATION: return "recombination"; + case mip_solution_origin_t::SUB_MIP: return "sub_mip"; + case mip_solution_origin_t::CPU_FEASIBILITY_JUMP: return "cpu_feasibility_jump"; + case mip_solution_origin_t::USER_INJECTED: return "user_injected"; + case mip_solution_origin_t::RINS: return "rins"; + default: return "unknown"; + } +} + +struct mip_solution_callback_info_t { + uint64_t struct_size{sizeof(mip_solution_callback_info_t)}; + mip_solution_origin_t origin{mip_solution_origin_t::UNKNOWN}; + double work_timestamp{-1.0}; +}; + +// get_solution_ext was added to support passing additional information to the get_solution callback +// without inducing a breaking ABI change +enum class base_solution_callback_type { GET_SOLUTION, GET_SOLUTION_EXT, SET_SOLUTION }; class base_solution_callback_t : public Callback { public: @@ -55,6 +102,19 @@ class get_solution_callback_t : public base_solution_callback_t { } }; +class get_solution_callback_ext_t : public base_solution_callback_t { + public: + virtual void get_solution(void* data, + void* objective_value, + void* solution_bound, + const mip_solution_callback_info_t* callback_info, + void* user_data) = 0; + base_solution_callback_type get_type() const override + { + return base_solution_callback_type::GET_SOLUTION_EXT; + } +}; + class set_solution_callback_t : public base_solution_callback_t { public: virtual void set_solution(void* data, diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt index c99210bf34..411db36289 100644 --- a/cpp/src/CMakeLists.txt +++ b/cpp/src/CMakeLists.txt @@ -7,6 +7,9 @@ set(UTIL_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/utilities/seed_generator.cu ${CMAKE_CURRENT_SOURCE_DIR}/utilities/logger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/utilities/version_info.cpp ${CMAKE_CURRENT_SOURCE_DIR}/utilities/timestamp_utils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/utilities/models/fj_predictor/main.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/utilities/models/fj_predictor/quantize.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/utilities/work_unit_predictor.cpp ${CMAKE_CURRENT_SOURCE_DIR}/utilities/work_unit_scheduler.cpp) add_subdirectory(pdlp) diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index 6ce9a4f4d0..5c2d62289d 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -5,11 +5,14 @@ */ /* clang-format on */ +#include + #include #include #include #include +#include #include #include @@ -24,6 +27,7 @@ #include #include +#include #include @@ -41,6 +45,13 @@ #include #include +// uncomment to enable detailed detemrinism logs +#undef CUOPT_DETERMINISM_LOG +#define CUOPT_DETERMINISM_LOG(logger, ...) \ + do { \ + logger.printf(__VA_ARGS__); \ + } while (0) + namespace cuopt::linear_programming::dual_simplex { namespace { @@ -241,9 +252,11 @@ template branch_and_bound_t::branch_and_bound_t( const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings, - f_t start_time) + f_t start_time, + std::shared_ptr> clique_table) : original_problem_(user_problem), settings_(solver_settings), + clique_table_(std::move(clique_table)), original_lp_(user_problem.handle_ptr, 1, 1, 1), Arow_(1, 1, 0), incumbent_(1), @@ -261,6 +274,23 @@ branch_and_bound_t::branch_and_bound_t( dualize_info_t dualize_info; convert_user_problem(original_problem_, settings_, original_lp_, new_slacks_, dualize_info); full_variable_types(original_problem_, original_lp_, var_types_); + assert(new_slacks_.size() == static_cast(original_lp_.num_rows)); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic LP init state: rows=%d cols=%d nnz=%zu slacks=%zu slack_hash=0x%x " + "rhs_hash=0x%x lower_hash=0x%x upper_hash=0x%x Acol_hash=0x%x Arow_hash=0x%x " + "Aval_hash=0x%x\n", + original_lp_.num_rows, + original_lp_.num_cols, + original_lp_.A.x.size(), + new_slacks_.size(), + detail::compute_hash(new_slacks_), + detail::compute_hash(original_lp_.rhs), + detail::compute_hash(original_lp_.lower), + detail::compute_hash(original_lp_.upper), + detail::compute_hash(original_lp_.A.col_start), + detail::compute_hash(original_lp_.A.i), + detail::compute_hash(original_lp_.A.x)); // Check slack #ifdef CHECK_SLACKS @@ -282,8 +312,9 @@ branch_and_bound_t::branch_and_bound_t( } #endif - upper_bound_ = inf; - root_objective_ = std::numeric_limits::quiet_NaN(); + upper_bound_ = inf; + root_objective_ = std::numeric_limits::quiet_NaN(); + root_lp_current_lower_bound_ = -inf; } template @@ -304,23 +335,45 @@ f_t branch_and_bound_t::get_lower_bound() } template -void branch_and_bound_t::report_heuristic(f_t obj) +void branch_and_bound_t::report_heuristic(f_t obj, double work_time) { if (is_running_) { f_t user_obj = compute_user_objective(original_lp_, obj); f_t user_lower = compute_user_objective(original_lp_, get_lower_bound()); std::string user_gap = user_mip_gap(user_obj, user_lower); - - settings_.log.printf( - "H %+13.6e %+10.6e %s %9.2f\n", - user_obj, - user_lower, - user_gap.c_str(), - toc(exploration_stats_.start_time)); + if (settings_.deterministic) { + const double reported_work = work_time >= 0.0 ? work_time : work_unit_context_.current_work(); + settings_.log.printf( + "H %+13.6e %+10.6e %s " + "%9.2f %9.2f\n", + user_obj, + user_lower, + user_gap.c_str(), + reported_work, + toc(exploration_stats_.start_time)); + } else { + settings_.log.printf( + "H %+13.6e %+10.6e %s %9.2f\n", + user_obj, + user_lower, + user_gap.c_str(), + toc(exploration_stats_.start_time)); + } } else { - settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n", - compute_user_objective(original_lp_, obj), - toc(exploration_stats_.start_time)); + if (solving_root_relaxation_.load()) { + f_t user_obj = compute_user_objective(original_lp_, obj); + f_t user_lower = root_lp_current_lower_bound_.load(); + std::string user_gap = user_mip_gap(user_obj, user_lower); + settings_.log.printf( + "New solution from primal heuristics. Objective %+.6e. Gap %s. Time %.2f\n", + user_obj, + user_gap.c_str(), + toc(exploration_stats_.start_time)); + } else { + settings_.log.printf("New solution from primal heuristics. Objective %+.6e. Time %.2f\n", + compute_user_objective(original_lp_, obj), + toc(exploration_stats_.start_time)); + } } } @@ -433,6 +486,39 @@ void branch_and_bound_t::update_user_bound(f_t lower_bound) user_bound_callback_(user_lower); } +template +void branch_and_bound_t::emit_solution_callback( + std::vector& original_x, + f_t objective, + cuopt::internals::mip_solution_origin_t origin, + double work_timestamp) +{ + cuopt_assert(work_timestamp >= 0.0, "work_timestamp must not be negative"); + if (settings_.new_incumbent_callback != nullptr) { + settings_.log.debug("Publishing incumbent: obj=%g wut=%.6f origin=%s\n", + compute_user_objective(original_lp_, objective), + work_timestamp, + cuopt::internals::mip_solution_origin_to_string(origin)); + cuopt::internals::mip_solution_callback_info_t callback_info{}; + callback_info.origin = origin; + callback_info.work_timestamp = work_timestamp; + settings_.new_incumbent_callback(original_x, objective, callback_info, work_timestamp); + } +} + +template +void branch_and_bound_t::emit_solution_callback_from_crushed( + const std::vector& crushed_solution, + f_t objective, + cuopt::internals::mip_solution_origin_t origin, + double work_timestamp) +{ + if (settings_.new_incumbent_callback == nullptr) { return; } + std::vector original_x; + uncrush_primal_solution(original_problem_, original_lp_, crushed_solution, original_x); + emit_solution_callback(original_x, objective, origin, work_timestamp); +} + template void branch_and_bound_t::set_new_solution(const std::vector& solution) { @@ -444,7 +530,9 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu std::vector crushed_solution; crush_primal_solution( original_problem_, original_lp_, solution, new_slacks_, crushed_solution); - f_t obj = compute_objective(original_lp_, crushed_solution); + f_t obj = compute_objective(original_lp_, crushed_solution); + const uint32_t host_hash = detail::compute_hash(solution); + const uint32_t crushed_hash = detail::compute_hash(crushed_solution); mutex_original_lp_.unlock(); bool is_feasible = false; bool attempt_repair = false; @@ -467,8 +555,17 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu mutex_original_lp_.unlock(); mutex_upper_.lock(); if (is_feasible && obj < upper_bound_) { - upper_bound_ = obj; + const f_t previous_upper = upper_bound_; + upper_bound_ = obj; incumbent_.set_incumbent_solution(obj, crushed_solution); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic B&B incumbent update: source=external_direct prev_upper=%.16e " + "new_upper=%.16e obj=%.16e hash=0x%x\n", + previous_upper, + upper_bound_.load(), + obj, + detail::compute_hash(crushed_solution)); } else { attempt_repair = true; constexpr bool verbose = false; @@ -489,52 +586,80 @@ void branch_and_bound_t::set_new_solution(const std::vector& solu if (is_feasible) { report_heuristic(obj); } if (attempt_repair) { mutex_repair_.lock(); - repair_queue_.push_back(solution); + repair_queue_.push_back({solution, cuopt::internals::mip_solution_origin_t::UNKNOWN}); mutex_repair_.unlock(); } } template void branch_and_bound_t::queue_external_solution_deterministic( - const std::vector& solution, double work_unit_ts) + const std::vector& solution, + f_t user_objective, + double work_unit_ts, + cuopt::internals::mip_solution_origin_t origin) { - // In deterministic mode, queue the solution to be processed at the correct work unit timestamp - // This ensures deterministic ordering of solution events + // In deterministic mode, external solutions remain raw until their retirement + // horizon so that feasibility and repair use the retirement LP state. if (solution.size() != original_problem_.num_cols) { settings_.log.printf( "Solution size mismatch %ld %d\n", solution.size(), original_problem_.num_cols); return; } + const double bnb_work_total = work_unit_context_.current_work(); + const uint32_t host_hash = detail::compute_hash(solution); + settings_.log.printf( + "Queueing deterministic external incumbent: obj=%g heur_wut=%.3f bnb_wut=%.3f origin=%s " + "hash=0x%x\n", + user_objective, + work_unit_ts, + bnb_work_total, + cuopt::internals::mip_solution_origin_to_string(origin), + host_hash); mutex_original_lp_.lock(); - std::vector crushed_solution; - crush_primal_solution( - original_problem_, original_lp_, solution, new_slacks_, crushed_solution); - f_t obj = compute_objective(original_lp_, crushed_solution); - - // Validate solution before queueing - f_t primal_err; - f_t bound_err; - i_t num_fractional; - bool is_feasible = check_guess( - original_lp_, settings_, var_types_, crushed_solution, primal_err, bound_err, num_fractional); + const size_t lp_nnz = original_lp_.A.x.size(); + const i_t active_cut_rows = std::max((i_t)0, original_lp_.num_rows - original_problem_.num_rows); + const uint32_t new_slacks_hash = detail::compute_hash(new_slacks_); + const uint32_t rhs_hash = detail::compute_hash(original_lp_.rhs); + const uint32_t lower_hash = detail::compute_hash(original_lp_.lower); + const uint32_t upper_hash = detail::compute_hash(original_lp_.upper); + const uint32_t a_col_hash = detail::compute_hash(original_lp_.A.col_start); + const uint32_t a_row_hash = detail::compute_hash(original_lp_.A.i); + const uint32_t a_val_hash = detail::compute_hash(original_lp_.A.x); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic external crush ctx: wut=%.6f lp_rows=%d lp_cols=%d lp_nnz=%zu " + "active_cut_rows=%d " + "slacks=%zu slack_hash=0x%x rhs_hash=0x%x lower_hash=0x%x upper_hash=0x%x " + "Acol_hash=0x%x Arow_hash=0x%x Aval_hash=0x%x\n", + work_unit_ts, + original_lp_.num_rows, + original_lp_.num_cols, + lp_nnz, + active_cut_rows, + new_slacks_.size(), + new_slacks_hash, + rhs_hash, + lower_hash, + upper_hash, + a_col_hash, + a_row_hash, + a_val_hash); mutex_original_lp_.unlock(); - if (!is_feasible) { - // Queue the uncrushed solution for repair; it will be crushed at - // consumption time so that the crush reflects the current LP state - // (which may have gained slack columns from cuts added after this point). - mutex_repair_.lock(); - repair_queue_.push_back(solution); - mutex_repair_.unlock(); - return; - } - - // Queue the solution with its work unit timestamp mutex_heuristic_queue_.lock(); - heuristic_solution_queue_.push_back({obj, std::move(crushed_solution), 0, -1, 0, work_unit_ts}); + heuristic_solution_queue_.push_back({solution, user_objective, work_unit_ts, origin}); + const size_t heuristic_queue_size = heuristic_solution_queue_.size(); mutex_heuristic_queue_.unlock(); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic external queued_for_retirement: wut=%.6f user_obj=%.16e host_hash=0x%x " + "heur_q=%zu\n", + work_unit_ts, + user_objective, + host_hash, + heuristic_queue_size); } template @@ -595,6 +720,14 @@ bool branch_and_bound_t::repair_solution(const std::vector& edge_ num_fractional, repaired_obj); } + } else { + settings_.log.printf( + "Repair LP failed: status=%s iters=%d time=%.3fs time_limit=%.3f cut_off=%e\n", + dual::status_to_string(lp_status).c_str(), + iter, + toc(lp_start_time), + lp_settings.time_limit, + lp_settings.cut_off); } return feasible; @@ -605,7 +738,7 @@ void branch_and_bound_t::repair_heuristic_solutions() { raft::common::nvtx::range scope("BB::repair_heuristics"); // Check if there are any solutions to repair - std::vector> to_repair; + std::vector to_repair; mutex_repair_.lock(); if (repair_queue_.size() > 0) { to_repair = repair_queue_; @@ -615,7 +748,8 @@ void branch_and_bound_t::repair_heuristic_solutions() if (to_repair.size() > 0) { settings_.log.debug("Attempting to repair %ld injected solutions\n", to_repair.size()); - for (const std::vector& uncrushed_solution : to_repair) { + for (const auto& queued_solution : to_repair) { + const std::vector& uncrushed_solution = queued_solution.solution; std::vector crushed_solution; crush_primal_solution( original_problem_, original_lp_, uncrushed_solution, new_slacks_, crushed_solution); @@ -627,15 +761,23 @@ void branch_and_bound_t::repair_heuristic_solutions() mutex_upper_.lock(); if (repaired_obj < upper_bound_) { - upper_bound_ = repaired_obj; + const f_t previous_upper = upper_bound_; + upper_bound_ = repaired_obj; incumbent_.set_incumbent_solution(repaired_obj, repaired_solution); - report_heuristic(repaired_obj); - - if (settings_.solution_callback != nullptr) { - std::vector original_x; - uncrush_primal_solution(original_problem_, original_lp_, repaired_solution, original_x); - settings_.solution_callback(original_x, repaired_obj); - } + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic B&B incumbent update: source=repair_queue prev_upper=%.16e " + "new_upper=%.16e obj=%.16e hash=0x%x\n", + previous_upper, + upper_bound_.load(), + repaired_obj, + detail::compute_hash(repaired_solution)); + report_heuristic(repaired_obj, queued_solution.work_timestamp); + + emit_solution_callback_from_crushed(repaired_solution, + repaired_obj, + queued_solution.origin, + queued_solution.work_timestamp); } mutex_upper_.unlock(); @@ -665,14 +807,49 @@ void branch_and_bound_t::set_solution_at_root(mip_solution_t compute_user_objective(original_lp_, root_objective_), toc(exploration_stats_.start_time)); - if (settings_.solution_callback != nullptr) { - settings_.solution_callback(solution.x, solution.objective); - } + emit_solution_callback(solution.x, + solution.objective, + cuopt::internals::mip_solution_origin_t::BRANCH_AND_BOUND_NODE, + work_unit_context_.current_work()); if (settings_.heuristic_preemption_callback != nullptr) { settings_.heuristic_preemption_callback(); } } +template +bool branch_and_bound_t::retire_queued_solution( + const queued_external_solution_t& queued_solution, f_t& out_obj, std::vector& out_crushed) +{ + f_t primal_err; + f_t bound_err; + i_t num_fractional; + + mutex_original_lp_.lock(); + crush_primal_solution( + original_problem_, original_lp_, queued_solution.solution, new_slacks_, out_crushed); + out_obj = compute_objective(original_lp_, out_crushed); + bool is_feasible = check_guess( + original_lp_, settings_, var_types_, out_crushed, primal_err, bound_err, num_fractional); + mutex_original_lp_.unlock(); + + if (is_feasible) { return true; } + + std::vector repaired_solution; + f_t repaired_obj; + bool repaired = repair_solution(edge_norms_, out_crushed, repaired_obj, repaired_solution); + if (repaired) { + out_crushed = std::move(repaired_solution); + out_obj = repaired_obj; + return true; + } + + CUOPT_DETERMINISM_LOG(settings_.log, + "Deterministic repair FAILED: wut=%.3f origin=%s\n", + queued_solution.work_timestamp, + cuopt::internals::mip_solution_origin_to_string(queued_solution.origin)); + return false; +} + template void branch_and_bound_t::set_final_solution(mip_solution_t& solution, f_t lower_bound) @@ -709,6 +886,12 @@ void branch_and_bound_t::set_final_solution(mip_solution_t& obj, is_maximization ? "Upper" : "Lower", user_bound); + { + const f_t root_lp_obj = root_lp_current_lower_bound_.load(); + if (std::isfinite(root_lp_obj)) { + settings_.log.printf("Root LP dual objective (last): %.16e\n", root_lp_obj); + } + } if (gap <= settings_.absolute_mip_gap_tol || gap_rel <= settings_.relative_mip_gap_tol) { solver_status_ = mip_status_t::OPTIMAL; @@ -740,6 +923,35 @@ void branch_and_bound_t::set_final_solution(mip_solution_t& } } + // Drain any pending heuristic solutions that B&B never got to retire during exploration + // (e.g., root solve consumed the entire budget, or exploration ended between sync horizons). + if (settings_.deterministic) { + const double current_work = work_unit_context_.current_work(); + mutex_heuristic_queue_.lock(); + std::vector pending; + pending.swap(heuristic_solution_queue_); + mutex_heuristic_queue_.unlock(); + + for (const auto& queued_solution : pending) { + if (queued_solution.work_timestamp > current_work) { continue; } + std::vector crushed_solution; + f_t obj; + bool is_feasible = retire_queued_solution(queued_solution, obj, crushed_solution); + + if (is_feasible && obj < upper_bound_) { + upper_bound_ = obj; + incumbent_.set_incumbent_solution(obj, crushed_solution); + settings_.log.printf( + "Late-retired heuristic incumbent: obj=%.6e wut=%.3f origin=%s\n", + compute_user_objective(original_lp_, obj), + queued_solution.work_timestamp, + cuopt::internals::mip_solution_origin_to_string(queued_solution.origin)); + emit_solution_callback_from_crushed( + crushed_solution, obj, queued_solution.origin, queued_solution.work_timestamp); + } + } + } + if (upper_bound_ != inf) { assert(incumbent_.has_incumbent); uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, solution.x); @@ -748,6 +960,17 @@ void branch_and_bound_t::set_final_solution(mip_solution_t& solution.lower_bound = lower_bound; solution.nodes_explored = exploration_stats_.nodes_explored; solution.simplex_iterations = exploration_stats_.total_lp_iters; + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic B&B final package: status=%d incumbent_obj=%.16e lower_bound=%.16e " + "incumbent_hash=0x%x final_hash=0x%x nodes=%d simplex_iterations=%d\n", + (int)solver_status_.load(), + solution.objective, + solution.lower_bound, + detail::compute_hash(incumbent_.x), + detail::compute_hash(solution.x), + solution.nodes_explored, + solution.simplex_iterations); } template @@ -764,16 +987,29 @@ void branch_and_bound_t::add_feasible_solution(f_t leaf_objective, mutex_upper_.lock(); if (leaf_objective < upper_bound_) { + const f_t previous_upper = upper_bound_; incumbent_.set_incumbent_solution(leaf_objective, leaf_solution); upper_bound_ = leaf_objective; + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic B&B incumbent update: source=leaf prev_upper=%.16e new_upper=%.16e " + "obj=%.16e hash=0x%x depth=%d worker_type=%d\n", + previous_upper, + upper_bound_.load(), + leaf_objective, + detail::compute_hash(leaf_solution), + leaf_depth, + (int)thread_type); report(feasible_solution_symbol(thread_type), leaf_objective, get_lower_bound(), leaf_depth, 0); send_solution = true; } - if (send_solution && settings_.solution_callback != nullptr) { - std::vector original_x; - uncrush_primal_solution(original_problem_, original_lp_, incumbent_.x, original_x); - settings_.solution_callback(original_x, upper_bound_); + if (send_solution) { + emit_solution_callback_from_crushed( + incumbent_.x, + upper_bound_, + cuopt::internals::mip_solution_origin_t::BRANCH_AND_BOUND_NODE, + work_unit_context_.current_work()); } mutex_upper_.unlock(); } @@ -909,6 +1145,23 @@ struct nondeterministic_policy_t : tree_update_policy_t { f_t obj, const std::vector& x) override { + f_t primal_err; + f_t bound_err; + i_t num_fractional; + bool cg = check_guess( + bnb.original_lp_, bnb.settings_, bnb.var_types_, x, primal_err, bound_err, num_fractional); + if (!cg) { + bnb.settings_.log.printf( + "Rejecting infeasible integer solution: node=%d depth=%d " + "obj=%.6e primal_err=%.6e bound_err=%.6e fractional=%d\n", + node->node_id, + node->depth, + obj, + primal_err, + bound_err, + num_fractional); + return; + } bnb.add_feasible_solution(obj, x, node->depth, worker->search_strategy); } @@ -1002,17 +1255,98 @@ struct deterministic_bfs_policy_t const std::vector& x) override { if (obj < this->worker.local_upper_bound) { + f_t primal_err; + f_t bound_err; + i_t num_fractional; + bool cg = check_guess(this->bnb.original_lp_, + this->bnb.settings_, + this->bnb.var_types_, + x, + primal_err, + bound_err, + num_fractional); + if (!cg) { + this->bnb.settings_.log.printf( + "Rejecting infeasible integer solution: worker=%d node=%d depth=%d " + "obj=%.6e primal_err=%.6e bound_err=%.6e fractional=%d\n", + this->worker.worker_id, + node->creation_seq, + node->depth, + obj, + primal_err, + bound_err, + num_fractional); + return; + } + const f_t previous_local_upper = this->worker.local_upper_bound; + const int previous_seq = this->worker.next_solution_seq; this->worker.local_upper_bound = obj; this->worker.integer_solutions.push_back( - {obj, x, node->depth, this->worker.worker_id, this->worker.next_solution_seq++}); + {obj, + x, + node->depth, + this->worker.worker_id, + this->worker.next_solution_seq++, + this->worker.clock, + cuopt::internals::mip_solution_origin_t::BRANCH_AND_BOUND_NODE}); + if (this->bnb.deterministic_current_horizon_ <= + this->bnb.deterministic_horizon_step_ + 1e-9) { + CUOPT_DETERMINISM_LOG( + this->bnb.settings_.log, + "Deterministic BFS local integer queue: horizon=%.6f worker=%d node_id=%d packed=0x%llx " + "path_hash=0x%x depth=%d obj=%.16e sol_hash=0x%x local_upper_before=%.16e " + "local_upper_after=%.16e queue_seq=%d clock=%.6f\n", + this->bnb.deterministic_current_horizon_, + this->worker.worker_id, + node->creation_seq, + (unsigned long long)node->get_id_packed(), + node->compute_path_hash(), + node->depth, + obj, + detail::compute_hash(x), + previous_local_upper, + this->worker.local_upper_bound, + previous_seq, + this->worker.clock); + } } } - branch_variable_t select_branch_variable(mip_node_t*, + branch_variable_t select_branch_variable(mip_node_t* node, const std::vector& fractional, const std::vector& x) override { - i_t var = this->worker.pc_snapshot.variable_selection(fractional, x); + i_t var; + if (this->bnb.settings_.reliability_branching != 0 && + this->worker.nodes_explored_snapshot > 0) { + var = reliable_variable_selection_core(node, + fractional, + x, + this->bnb.settings_, + this->bnb.var_types_, + this->worker.leaf_problem, + this->worker.leaf_edge_norms, + this->worker.basis_factors, + this->worker.basic_list, + this->worker.nonbasic_list, + this->worker.pc_snapshot.sum_down_.data(), + this->worker.pc_snapshot.sum_up_.data(), + this->worker.pc_snapshot.num_down_.data(), + this->worker.pc_snapshot.num_up_.data(), + this->worker.pc_snapshot.n_vars(), + this->worker.pc_snapshot.strong_branching_lp_iter_, + this->worker.local_upper_bound, + (int64_t)this->worker.total_lp_iters_snapshot, + (int64_t)this->worker.nodes_explored_snapshot, + this->bnb.exploration_stats_.start_time, + this->bnb.pc_.reliability_branching_settings, + 1, + nullptr, + nullptr, + nullptr); + } else { + var = this->worker.pc_snapshot.variable_selection(fractional, x); + } auto dir = martin_criteria(x[var], this->bnb.root_relax_soln_.x[var]); return {var, dir}; } @@ -1043,11 +1377,37 @@ struct deterministic_bfs_policy_t node->fractional_val); this->bnb.exploration_stats_.nodes_unexplored += 2; this->worker.enqueue_children_for_plunge(node->get_down_child(), node->get_up_child(), dir); + if (this->bnb.deterministic_current_horizon_ <= + this->bnb.deterministic_horizon_step_ + 1e-9) { + CUOPT_DETERMINISM_LOG( + this->bnb.settings_.log, + "Deterministic BFS branch create: horizon=%.6f worker=%d parent_packed=0x%llx " + "parent_path_hash=0x%x depth=%d branch_var=%d dir=%d frac=%.16e " + "down_packed=0x%llx down_path_hash=0x%x up_packed=0x%llx up_path_hash=0x%x " + "queue_size=%zu local_upper=%.16e\n", + this->bnb.deterministic_current_horizon_, + this->worker.worker_id, + (unsigned long long)node->get_id_packed(), + node->compute_path_hash(), + node->depth, + node->branch_var, + (int)dir, + node->fractional_val, + (unsigned long long)node->get_down_child()->get_id_packed(), + node->get_down_child()->compute_path_hash(), + (unsigned long long)node->get_up_child()->get_id_packed(), + node->get_up_child()->compute_path_hash(), + this->worker.queue_size(), + this->worker.local_upper_bound); + } break; case node_status_t::NUMERICAL: this->worker.record_numerical(node); break; + case node_status_t::PENDING: this->worker.plunge_stack.push_back(node); break; default: break; } - if (status != node_status_t::HAS_CHILDREN) { this->worker.recompute_bounds_and_basis = true; } + if (status != node_status_t::HAS_CHILDREN && status != node_status_t::PENDING) { + this->worker.recompute_bounds_and_basis = true; + } } void on_numerical_issue(mip_node_t* node) override @@ -1078,8 +1438,55 @@ struct deterministic_diving_policy_t const std::vector& x) override { if (obj < this->worker.local_upper_bound) { + f_t primal_err; + f_t bound_err; + i_t num_fractional; + bool cg = check_guess(this->bnb.original_lp_, + this->bnb.settings_, + this->bnb.var_types_, + x, + primal_err, + bound_err, + num_fractional); + if (!cg) { + this->bnb.settings_.log.printf( + "Rejecting infeasible diving integer solution: worker=%d node=%d depth=%d " + "obj=%.6e primal_err=%.6e bound_err=%.6e fractional=%d\n", + this->worker.worker_id, + node->creation_seq, + node->depth, + obj, + primal_err, + bound_err, + num_fractional); + return; + } + const f_t previous_local_upper = this->worker.local_upper_bound; + const int previous_seq = this->worker.next_solution_seq; this->worker.local_upper_bound = obj; this->worker.queue_integer_solution(obj, x, node->depth); + if (this->bnb.deterministic_current_horizon_ <= + this->bnb.deterministic_horizon_step_ + 1e-9) { + CUOPT_DETERMINISM_LOG( + this->bnb.settings_.log, + "Deterministic diving local integer queue: horizon=%.6f worker=%d node_id=%d " + "packed=0x%llx " + "path_hash=0x%x depth=%d obj=%.16e sol_hash=0x%x local_upper_before=%.16e " + "local_upper_after=%.16e queue_seq=%d clock=%.6f type=%d\n", + this->bnb.deterministic_current_horizon_, + this->worker.worker_id, + node->creation_seq, + (unsigned long long)node->get_id_packed(), + node->compute_path_hash(), + node->depth, + obj, + detail::compute_hash(x), + previous_local_upper, + this->worker.local_upper_bound, + previous_seq, + this->worker.clock, + (int)this->worker.diving_type); + } } } @@ -1929,6 +2336,15 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( settings_.log.printf("\n"); is_root_solution_set = true; + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic root flag set: root_status=%d root_obj=%.16e recomputed_root_obj=%.16e " + "callback_flag=%d x_hash=0x%x\n", + (int)root_status, + root_objective_, + compute_objective(original_lp_, root_relax_soln_.x), + (int)root_crossover_solution_set_.load(std::memory_order_acquire), + detail::compute_hash(root_relax_soln_.x)); return root_status; } @@ -1937,16 +2353,55 @@ template mip_status_t branch_and_bound_t::solve(mip_solution_t& solution) { raft::common::nvtx::range scope("BB::solve"); + auto exploration_signal_guard = cuopt::scope_guard([this]() { + if (!exploration_started_.load()) { + std::lock_guard lock(exploration_started_mutex_); + exploration_started_ = true; + exploration_started_cv_.notify_all(); + } + }); + auto heuristic_preemption_guard = cuopt::scope_guard([this]() { + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } + }); logger_t log; log.log = false; log.log_prefix = settings_.log.log_prefix; solver_status_ = mip_status_t::UNSET; is_running_ = false; + root_lp_current_lower_bound_ = -inf; exploration_stats_.nodes_unexplored = 0; exploration_stats_.nodes_explored = 0; original_lp_.A.to_compressed_row(Arow_); + work_unit_scheduler_t* saved_scheduler = work_unit_context_.scheduler; + if (settings_.deterministic) { + work_unit_context_.deterministic = true; + cuopt_assert(settings_.bnb_work_unit_scale > 0.0, "B&B work-unit scale must be positive"); + if (settings_.gpu_heur_wait_for_exploration) { + // Scale=0 during pre-exploration: root LP/cuts/SB don't advance the deterministic timeline. + // GPU heuristics start after exploration, so both timelines begin at 0 together. + work_unit_context_.work_unit_scale = 0.0; + } else { + // GPU heuristics race with B&B pre-exploration, so B&B work must advance normally. + work_unit_context_.work_unit_scale = settings_.bnb_work_unit_scale; + } + + // Detach the scheduler during the serial root/cuts/SB phase. + // record_work_sync_on_horizon still accumulates global_work_units_elapsed, + // but avoids scheduler->on_work_recorded whose OMP directives + // perturb FP state in a single-thread context. + work_unit_context_.scheduler = nullptr; + } + + settings_.log.printf("Reduced cost strengthening enabled: %d\n", + settings_.reduced_cost_strengthening); + + variable_bounds_t variable_bounds( + original_lp_, settings_, var_types_, Arow_, new_slacks_); + if (guess_.size() != 0) { raft::common::nvtx::range scope_guess("BB::check_initial_guess"); std::vector crushed_guess; @@ -1959,23 +2414,62 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (feasible) { const f_t computed_obj = compute_objective(original_lp_, crushed_guess); mutex_upper_.lock(); + const f_t previous_upper = upper_bound_; incumbent_.set_incumbent_solution(computed_obj, crushed_guess); upper_bound_ = computed_obj; + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic B&B incumbent update: source=initial_guess prev_upper=%.16e " + "new_upper=%.16e obj=%.16e hash=0x%x\n", + previous_upper, + upper_bound_.load(), + computed_obj, + detail::compute_hash(crushed_guess)); mutex_upper_.unlock(); } } root_relax_soln_.resize(original_lp_.num_rows, original_lp_.num_cols); - i_t original_rows = original_lp_.num_rows; - simplex_solver_settings_t lp_settings = settings_; - lp_settings.inside_mip = 1; - lp_settings.scale_columns = false; - lp_settings.concurrent_halt = get_root_concurrent_halt(); + // TODO: ensure clique tables work well w/ determinism + if (settings_.clique_cuts != 0 && clique_table_ == nullptr && !settings_.deterministic) { + signal_extend_cliques_.store(false, std::memory_order_release); + typename ::cuopt::linear_programming::mip_solver_settings_t::tolerances_t + tolerances_for_clique{}; + tolerances_for_clique.presolve_absolute_tolerance = settings_.primal_tol; + tolerances_for_clique.absolute_tolerance = settings_.primal_tol; + tolerances_for_clique.relative_tolerance = settings_.zero_tol; + tolerances_for_clique.integrality_tolerance = settings_.integer_tol; + tolerances_for_clique.absolute_mip_gap = settings_.absolute_mip_gap_tol; + tolerances_for_clique.relative_mip_gap = settings_.relative_mip_gap_tol; + auto* signal_ptr = &signal_extend_cliques_; + clique_table_future_ = + std::async(std::launch::async, + [this, + tolerances_for_clique, + signal_ptr]() -> std::shared_ptr> { + user_problem_t problem_copy = original_problem_; + cuopt::timer_t timer(std::numeric_limits::infinity()); + std::shared_ptr> table; + detail::find_initial_cliques( + problem_copy, tolerances_for_clique, &table, timer, false, signal_ptr); + return table; + }); + } + + i_t original_rows = original_lp_.num_rows; + simplex_solver_settings_t lp_settings = settings_; + lp_settings.inside_mip = 1; + lp_settings.scale_columns = false; + lp_settings.concurrent_halt = get_root_concurrent_halt(); + lp_settings.dual_simplex_objective_callback = [this](f_t user_obj) { + root_lp_current_lower_bound_.store(user_obj); + }; std::vector basic_list(original_lp_.num_rows); std::vector nonbasic_list; basis_update_mpf_t basis_update(original_lp_.num_rows, settings_.refactor_frequency); lp_status_t root_status; + solving_root_relaxation_ = true; if (!enable_concurrent_lp_root_solve()) { // RINS/SUBMIP path @@ -1988,7 +2482,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basic_list, nonbasic_list, root_vstatus_, - edge_norms_); + edge_norms_, + &work_unit_context_); } else { settings_.log.printf("\nSolving LP root relaxation in concurrent mode\n"); root_status = solve_root_relaxation(lp_settings, @@ -1999,17 +2494,24 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut nonbasic_list, edge_norms_); } + solving_root_relaxation_ = false; exploration_stats_.total_lp_iters = root_relax_soln_.iterations; exploration_stats_.total_lp_solve_time = toc(exploration_stats_.start_time); + CUOPT_DETERMINISM_LOG(settings_.log, + "Post-root-LP work: %.16e iters=%d\n", + work_unit_context_.current_work(), + root_relax_soln_.iterations); + + auto finish_clique_thread = [this]() { + if (clique_table_future_.valid()) { + signal_extend_cliques_.store(true, std::memory_order_release); + clique_table_ = clique_table_future_.get(); + } + }; if (root_status == lp_status_t::INFEASIBLE) { settings_.log.printf("MIP Infeasible\n"); - // FIXME: rarely dual simplex detects infeasible whereas it is feasible. - // to add a small safety net, check if there is a primal solution already. - // Uncomment this if the issue with cost266-UUE is resolved - // if (settings.heuristic_preemption_callback != nullptr) { - // settings.heuristic_preemption_callback(); - // } + finish_clique_thread(); return mip_status_t::INFEASIBLE; } if (root_status == lp_status_t::UNBOUNDED) { @@ -2017,30 +2519,45 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (settings_.heuristic_preemption_callback != nullptr) { settings_.heuristic_preemption_callback(); } + finish_clique_thread(); return mip_status_t::UNBOUNDED; } if (root_status == lp_status_t::TIME_LIMIT) { solver_status_ = mip_status_t::TIME_LIMIT; set_final_solution(solution, -inf); + finish_clique_thread(); return solver_status_; } if (root_status == lp_status_t::WORK_LIMIT) { solver_status_ = mip_status_t::WORK_LIMIT; set_final_solution(solution, -inf); + finish_clique_thread(); return solver_status_; } if (root_status == lp_status_t::NUMERICAL_ISSUES) { solver_status_ = mip_status_t::NUMERICAL; set_final_solution(solution, -inf); + finish_clique_thread(); return solver_status_; } assert(root_vstatus_.size() == original_lp_.num_cols); set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms_); - root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + { + const f_t previous_root_objective = root_objective_; + root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic root objective assign: source=post_root_solve old=%.16e new=%.16e " + "x_hash=0x%x obj_hash=0x%x\n", + previous_root_objective, + root_objective_, + detail::compute_hash(root_relax_soln_.x), + detail::compute_hash(original_lp_.objective)); + } if (settings_.set_simplex_solution_callback != nullptr) { std::vector original_x; @@ -2064,6 +2581,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut if (num_fractional == 0) { set_solution_at_root(solution, cut_info); + finish_clique_thread(); return mip_status_t::OPTIMAL; } @@ -2078,8 +2596,16 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } cut_pool_t cut_pool(original_lp_.num_cols, settings_); - cut_generation_t cut_generation( - cut_pool, original_lp_, settings_, Arow_, new_slacks_, var_types_); + cut_generation_t cut_generation(cut_pool, + original_lp_, + settings_, + Arow_, + new_slacks_, + var_types_, + original_problem_, + clique_table_, + &clique_table_future_, + &signal_extend_cliques_); std::vector saved_solution; #ifdef CHECK_CUTS_AGAINST_SAVED_SOLUTION @@ -2090,7 +2616,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut f_t last_objective = root_objective_; f_t root_relax_objective = root_objective_; - i_t cut_pool_size = 0; + f_t cut_generation_start_time = tic(); + i_t cut_pool_size = 0; for (i_t cut_pass = 0; cut_pass < settings_.max_cut_passes; cut_pass++) { if (num_fractional == 0) { set_solution_at_root(solution, cut_info); @@ -2108,17 +2635,34 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } #endif + if (toc(exploration_stats_.start_time) > settings_.time_limit) { + solver_status_ = mip_status_t::TIME_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; + } + // Generate cuts and add them to the cut pool - f_t cut_start_time = tic(); - cut_generation.generate_cuts(original_lp_, - settings_, - Arow_, - new_slacks_, - var_types_, - basis_update, - root_relax_soln_.x, - basic_list, - nonbasic_list); + f_t cut_start_time = tic(); + bool problem_feasible = cut_generation.generate_cuts(original_lp_, + settings_, + Arow_, + new_slacks_, + var_types_, + basis_update, + root_relax_soln_.x, + root_relax_soln_.y, + root_relax_soln_.z, + basic_list, + nonbasic_list, + variable_bounds, + exploration_stats_.start_time); + if (!problem_feasible) { + if (settings_.heuristic_preemption_callback != nullptr) { + settings_.heuristic_preemption_callback(); + } + finish_clique_thread(); + return mip_status_t::INFEASIBLE; + } f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings_.log.debug("Cut generation time %.2f seconds\n", cut_generation_time); @@ -2157,7 +2701,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut cut_pool_size = cut_pool.pool_size(); // Resolve the LP with the new cuts - settings_.log.debug( + settings_.log.printf( "Solving LP with %d cuts (%d cut nonzeros). Cuts in pool %d. Total constraints %d\n", num_cuts, cuts_to_add.row_start[cuts_to_add.m], @@ -2179,6 +2723,7 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_vstatus_, edge_norms_); var_types_.resize(original_lp_.num_cols, variable_type_t::CONTINUOUS); + variable_bounds.resize(original_lp_.num_cols); mutex_original_lp_.unlock(); f_t add_cuts_time = toc(add_cuts_start_time); if (add_cuts_time > 1.0) { @@ -2227,14 +2772,39 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } if (!feasible) { settings_.log.printf("Bound strengthening detected infeasibility\n"); +#ifdef WRITE_BOUND_STRENGTHENING_INFEASIBLE_MPS + original_lp_.write_mps("bound_strengthening_infeasible.mps"); +#endif return mip_status_t::INFEASIBLE; } i_t iter = 0; bool initialize_basis = false; lp_settings.concurrent_halt = NULL; - f_t dual_phase2_start_time = tic(); - dual::status_t cut_status = dual_phase2_with_advanced_basis(2, + CUOPT_DETERMINISM_LOG(settings_.log, + "Cut loop LP warm-start: pass=%d rows=%d cols=%d " + "lower_hash=0x%x upper_hash=0x%x " + "x_hash=0x%x y_hash=0x%x z_hash=0x%x " + "basic_hash=0x%x nonbasic_hash=0x%x " + "vstatus_hash=0x%x edge_norms_hash=0x%x " + "cut_off=%.16e work_limit=%.16e time_limit=%.16e\n", + cut_pass, + original_lp_.num_rows, + original_lp_.num_cols, + detail::compute_hash(original_lp_.lower), + detail::compute_hash(original_lp_.upper), + detail::compute_hash(root_relax_soln_.x), + detail::compute_hash(root_relax_soln_.y), + detail::compute_hash(root_relax_soln_.z), + detail::compute_hash(basic_list), + detail::compute_hash(nonbasic_list), + detail::compute_hash(root_vstatus_), + detail::compute_hash(edge_norms_), + lp_settings.cut_off, + lp_settings.work_limit, + lp_settings.time_limit); + f_t dual_phase2_start_time = tic(); + dual::status_t cut_status = dual_phase2_with_advanced_basis(2, 0, initialize_basis, exploration_stats_.start_time, @@ -2246,9 +2816,9 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut nonbasic_list, root_relax_soln_, iter, - edge_norms_); + edge_norms_, + &work_unit_context_); exploration_stats_.total_lp_iters += iter; - root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); f_t dual_phase2_time = toc(dual_phase2_start_time); if (dual_phase2_time > 1.0) { settings_.log.debug("Dual phase2 time %.2f seconds\n", dual_phase2_time); @@ -2259,6 +2829,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut return solver_status_; } + if (cut_status == dual::status_t::WORK_LIMIT) { + solver_status_ = mip_status_t::WORK_LIMIT; + set_final_solution(solution, root_objective_); + return solver_status_; + } + if (cut_status != dual::status_t::OPTIMAL) { settings_.log.printf("Numerical issue at root node. Resolving from scratch\n"); lp_status_t scratch_status = @@ -2270,20 +2846,69 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basic_list, nonbasic_list, root_vstatus_, - edge_norms_); + edge_norms_, + &work_unit_context_); if (scratch_status == lp_status_t::OPTIMAL) { // We recovered cut_status = convert_lp_status_to_dual_status(scratch_status); exploration_stats_.total_lp_iters += root_relax_soln_.iterations; - root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + { + const f_t previous_root_objective = root_objective_; + root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic root objective assign: source=cut_lp_scratch old=%.16e new=%.16e " + "pass=%d x_hash=0x%x obj_hash=0x%x\n", + previous_root_objective, + root_objective_, + cut_pass, + detail::compute_hash(root_relax_soln_.x), + detail::compute_hash(original_lp_.objective)); + } } else { settings_.log.printf("Cut status %s\n", dual::status_to_string(cut_status).c_str()); +#ifdef WRITE_CUT_INFEASIBLE_MPS + original_lp_.write_mps("cut_infeasible.mps"); +#endif return mip_status_t::NUMERICAL; } } + root_objective_ = compute_objective(original_lp_, root_relax_soln_.x); f_t remove_cuts_start_time = tic(); mutex_original_lp_.lock(); + assert(new_slacks_.size() == static_cast(original_lp_.num_rows)); + const f_t root_objective_before_remove = root_objective_; + const f_t root_objective_before_remove_recomputed = + compute_objective(original_lp_, root_relax_soln_.x); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic root LP before remove_cuts: pass=%d fractional=%d rows=%d cols=%d " + "nnz=%zu original_rows=%d active_cut_rows=%d slacks=%zu slack_hash=0x%x rhs_hash=0x%x " + "lower_hash=0x%x upper_hash=0x%x obj_hash=0x%x Acol_hash=0x%x Arow_hash=0x%x " + "Aval_hash=0x%x root_obj_before_remove=%.16e root_obj_before_remove_recomputed=%.16e " + "root_obj_before_remove_delta=%.16e callback_flag=%d root_flag=%d\n", + cut_pass, + num_fractional, + original_lp_.num_rows, + original_lp_.num_cols, + original_lp_.A.x.size(), + original_rows, + std::max((i_t)0, original_lp_.num_rows - original_rows), + new_slacks_.size(), + detail::compute_hash(new_slacks_), + detail::compute_hash(original_lp_.rhs), + detail::compute_hash(original_lp_.lower), + detail::compute_hash(original_lp_.upper), + detail::compute_hash(original_lp_.objective), + detail::compute_hash(original_lp_.A.col_start), + detail::compute_hash(original_lp_.A.i), + detail::compute_hash(original_lp_.A.x), + root_objective_before_remove, + root_objective_before_remove_recomputed, + root_objective_before_remove_recomputed - root_objective_before_remove, + (int)root_crossover_solution_set_.load(std::memory_order_acquire), + (int)is_root_solution_set); remove_cuts(original_lp_, settings_, exploration_stats_.start_time, @@ -2299,6 +2924,35 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut basic_list, nonbasic_list, basis_update); + variable_bounds.resize(original_lp_.num_cols); + assert(new_slacks_.size() == static_cast(original_lp_.num_rows)); + const f_t root_objective_after_remove = compute_objective(original_lp_, root_relax_soln_.x); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic root LP after remove_cuts: pass=%d fractional=%d rows=%d cols=%d " + "nnz=%zu original_rows=%d active_cut_rows=%d slacks=%zu slack_hash=0x%x rhs_hash=0x%x " + "lower_hash=0x%x upper_hash=0x%x obj_hash=0x%x Acol_hash=0x%x Arow_hash=0x%x " + "Aval_hash=0x%x root_obj_before_remove=%.16e root_obj_after_remove=%.16e " + "root_obj_remove_delta=%.16e\n", + cut_pass, + num_fractional, + original_lp_.num_rows, + original_lp_.num_cols, + original_lp_.A.x.size(), + original_rows, + std::max((i_t)0, original_lp_.num_rows - original_rows), + new_slacks_.size(), + detail::compute_hash(new_slacks_), + detail::compute_hash(original_lp_.rhs), + detail::compute_hash(original_lp_.lower), + detail::compute_hash(original_lp_.upper), + detail::compute_hash(original_lp_.objective), + detail::compute_hash(original_lp_.A.col_start), + detail::compute_hash(original_lp_.A.i), + detail::compute_hash(original_lp_.A.x), + root_objective_before_remove, + root_objective_after_remove, + root_objective_after_remove - root_objective_before_remove); mutex_original_lp_.unlock(); f_t remove_cuts_time = toc(remove_cuts_start_time); if (remove_cuts_time > 1.0) { @@ -2306,11 +2960,50 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut } fractional.clear(); num_fractional = fractional_variables(settings_, root_relax_soln_.x, var_types_, fractional); + assert(root_relax_soln_.x.size() == static_cast(original_lp_.num_cols)); + assert(root_relax_soln_.y.size() == static_cast(original_lp_.num_rows)); + assert(root_relax_soln_.z.size() == static_cast(original_lp_.num_cols)); + assert(basic_list.size() == static_cast(original_lp_.num_rows)); + assert(nonbasic_list.size() == + static_cast(original_lp_.num_cols - original_lp_.num_rows)); + assert(root_vstatus_.size() == static_cast(original_lp_.num_cols)); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic root pass state: pass=%d root_obj=%.16e num_fractional=%d rel_gap=%.16e " + "abs_gap=%.16e x_hash=0x%x y_hash=0x%x z_hash=0x%x basic_hash=0x%x nonbasic_hash=0x%x " + "vstatus_hash=0x%x edge_norms_hash=0x%x root_obj_after_remove=%.16e " + "root_obj_remove_delta=%.16e root_obj_member_delta=%.16e callback_flag=%d root_flag=%d\n", + cut_pass, + root_objective_, + num_fractional, + user_relative_gap(original_lp_, upper_bound_.load(), root_objective_), + upper_bound_.load() - root_objective_, + detail::compute_hash(root_relax_soln_.x), + detail::compute_hash(root_relax_soln_.y), + detail::compute_hash(root_relax_soln_.z), + detail::compute_hash(basic_list), + detail::compute_hash(nonbasic_list), + detail::compute_hash(root_vstatus_), + detail::compute_hash(edge_norms_), + root_objective_after_remove, + root_objective_after_remove - root_objective_, + root_objective_ - root_objective_after_remove, + (int)root_crossover_solution_set_.load(std::memory_order_acquire), + (int)is_root_solution_set); if (num_fractional == 0) { - upper_bound_ = root_objective_; + const f_t previous_upper = upper_bound_; + upper_bound_ = root_objective_; mutex_upper_.lock(); incumbent_.set_incumbent_solution(root_objective_, root_relax_soln_.x); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic B&B incumbent update: source=root_integral_pass prev_upper=%.16e " + "new_upper=%.16e obj=%.16e hash=0x%x\n", + previous_upper, + upper_bound_.load(), + root_objective_, + detail::compute_hash(root_relax_soln_.x)); mutex_upper_.unlock(); } f_t obj = upper_bound_.load(); @@ -2327,20 +3020,32 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut f_t change_in_objective = root_objective_ - last_objective; const f_t factor = settings_.cut_change_threshold; const f_t min_objective = 1e-3; - if (change_in_objective <= factor * std::max(min_objective, std::abs(root_relax_objective))) { - settings_.log.debug( + if (factor > 0.0 && + change_in_objective <= factor * std::max(min_objective, std::abs(root_relax_objective))) { + settings_.log.printf( "Change in objective %.16e is less than 1e-3 of root relax objective %.16e\n", change_in_objective, root_relax_objective); break; } + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic root pass continue: pass=%d change=%.16e threshold=%.16e last_obj=%.16e " + "root_relax_obj=%.16e root_obj=%.16e\n", + cut_pass, + change_in_objective, + factor * std::max(min_objective, std::abs(root_relax_objective)), + last_objective, + root_relax_objective, + root_objective_); last_objective = root_objective_; } } print_cut_info(settings_, cut_info); - + f_t cut_generation_time = toc(cut_generation_start_time); if (cut_info.has_cuts()) { + settings_.log.printf("Cut generation time: %.2f seconds\n", cut_generation_time); settings_.log.printf("Cut pool size : %d\n", cut_pool_size); settings_.log.printf("Size with cuts : %d constraints, %d variables, %d nonzeros\n", original_lp_.num_rows, @@ -2363,7 +3068,8 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut root_objective_, root_vstatus_, edge_norms_, - pc_); + pc_, + &work_unit_context_); } if (toc(exploration_stats_.start_time) > settings_.time_limit) { @@ -2444,6 +3150,15 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut calculate_variable_locks(original_lp_, var_up_locks_, var_down_locks_); } if (settings_.deterministic) { + pre_exploration_work_ = work_unit_context_.current_work(); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Pre-exploration work breakdown: total=%.16e scale=%.6f deterministic=%d\n", + pre_exploration_work_, + work_unit_context_.work_unit_scale, + (int)work_unit_context_.deterministic); + work_unit_context_.scheduler = saved_scheduler; + work_unit_context_.work_unit_scale = settings_.bnb_work_unit_scale; settings_.log.printf( " | Explored | Unexplored | Objective | Bound | IntInf | Depth | Iter/Node " "| Gap | Work | Time |\n"); @@ -2453,6 +3168,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut "| Gap | Time |\n"); } + { + std::lock_guard lock(exploration_started_mutex_); + exploration_started_ = true; + } + exploration_started_cv_.notify_all(); + if (settings_.deterministic) { run_deterministic_coordinator(Arow_); } else if (settings_.num_threads > 1) { @@ -2625,8 +3346,7 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri deterministic_horizon_step_ = 0.50; - // Compute worker counts using the same formula as reliability-branching scheduler - const i_t num_workers = 2 * settings_.num_threads; + const i_t num_workers = settings_.num_threads; std::vector search_strategies = get_search_strategies(settings_.diving_settings); std::array max_num_workers = @@ -2639,7 +3359,7 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri } deterministic_mode_enabled_ = true; - deterministic_current_horizon_ = deterministic_horizon_step_; + deterministic_current_horizon_ = pre_exploration_work_ + deterministic_horizon_step_; deterministic_horizon_number_ = 0; deterministic_global_termination_status_ = mip_status_t::UNSET; @@ -2671,10 +3391,12 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri scoped_context_registrations_t context_registrations(*deterministic_scheduler_); for (auto& worker : *deterministic_workers_) { + worker.clock = pre_exploration_work_; context_registrations.add(worker.work_context); } if (deterministic_diving_workers_) { for (auto& worker : *deterministic_diving_workers_) { + worker.clock = pre_exploration_work_; context_registrations.add(worker.work_context); } } @@ -2682,8 +3404,9 @@ void branch_and_bound_t::run_deterministic_coordinator(const csr_matri int actual_diving_workers = deterministic_diving_workers_ ? (int)deterministic_diving_workers_->size() : 0; settings_.log.printf( - "Deterministic Mode: %d BFS workers + %d diving workers, horizon step = %.2f work " - "units\n", + "Deterministic Mode: %d total threads split as %d BFS workers + %d diving workers, " + "horizon step = %.2f work units\n", + num_workers, num_bfs_workers, actual_diving_workers, deterministic_horizon_step_); @@ -2816,11 +3539,16 @@ void branch_and_bound_t::run_deterministic_bfs_loop( bool is_child = (node->parent == worker.last_solved_node); worker.recompute_bounds_and_basis = !is_child; - node_status_t status = solve_node_deterministic(worker, node, search_tree); - worker.last_solved_node = node; + node_status_t status = solve_node_deterministic(worker, node, search_tree); + worker.current_node = nullptr; - worker.current_node = nullptr; - continue; + if (status == node_status_t::PENDING) { + // LP didn't finish (TIME_LIMIT/WORK_LIMIT). Node was re-enqueued by on_node_completed. + // Fall through to sync barrier instead of immediately retrying. + } else { + worker.last_solved_node = node; + continue; + } } // No work - advance to sync point to participate in barrier @@ -2844,26 +3572,57 @@ void branch_and_bound_t::deterministic_sync_callback() total_producer_wait_time_ += wait_time; max_producer_wait_time_ = std::max(max_producer_wait_time_, wait_time); ++producer_wait_count_; + if (wait_time > 0.01) { + settings_.log.printf( + "Producer sync wait: %.3fs at horizon %.2f (cumulative: %.3fs, count: %d)\n", + wait_time, + horizon_end, + total_producer_wait_time_, + producer_wait_count_); + } - work_unit_context_.global_work_units_elapsed = horizon_end; - - bb_event_batch_t all_events = deterministic_workers_->collect_and_sort_events(); + work_unit_context_.set_current_work(horizon_end, false); - deterministic_sort_replay_events(all_events); + { + std::string worker_clocks_str; + for (const auto& w : *deterministic_workers_) { + worker_clocks_str += std::to_string(w.worker_id) + ":" + std::to_string(w.clock) + "/" + + std::to_string(w.integer_solutions.size()) + " "; + } + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic sync #%d: horizon=%.6f pre_expl=%.6f heur_q=%zu workers=[%s]\n", + deterministic_horizon_number_, + deterministic_current_horizon_, + pre_exploration_work_, + heuristic_solution_queue_.size(), + worker_clocks_str.c_str()); + } - // deterministic_prune_worker_nodes_vs_incumbent(); + bb_event_batch_t all_events = deterministic_workers_->collect_and_sort_events(); - deterministic_collect_diving_solutions_and_update_pseudocosts(); + std::vector::deterministic_replay_solution_t> + replay_solutions; + deterministic_collect_worker_solutions( + *deterministic_workers_, + [](const deterministic_bfs_worker_pool_t&, int) { + return search_strategy_t::BEST_FIRST; + }, + replay_solutions); + deterministic_collect_diving_solutions_and_update_pseudocosts(replay_solutions); - for (auto& worker : *deterministic_workers_) { - worker.integer_solutions.clear(); - } if (deterministic_diving_workers_) { for (auto& worker : *deterministic_diving_workers_) { - worker.integer_solutions.clear(); + i_t delta = worker.total_nodes_explored - worker.nodes_explored_last_sync; + worker.nodes_explored_last_sync = worker.total_nodes_explored; + exploration_stats_.nodes_explored += delta; } } + deterministic_sort_replay_events(all_events, replay_solutions); + + // deterministic_prune_worker_nodes_vs_incumbent(); + deterministic_populate_diving_heap(); deterministic_assign_diving_nodes(); @@ -3007,9 +3766,15 @@ node_status_t branch_and_bound_t::solve_node_deterministic( simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); - lp_settings.cut_off = worker.local_upper_bound + settings_.dual_tol; + if (original_lp_.objective_is_integral) { + lp_settings.cut_off = + std::ceil(worker.local_upper_bound - settings_.integer_tol) + settings_.dual_tol; + } else { + lp_settings.cut_off = worker.local_upper_bound + settings_.dual_tol; + } lp_settings.inside_mip = 2; lp_settings.time_limit = remaining_time; + lp_settings.work_limit = std::numeric_limits::infinity(); lp_settings.scale_columns = false; bool feasible = true; @@ -3039,7 +3804,7 @@ node_status_t branch_and_bound_t::solve_node_deterministic( std::vector& leaf_vstatus = node_ptr->vstatus; i_t node_iter = 0; f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; + worker.leaf_edge_norms = edge_norms_; dual::status_t lp_status = dual_phase2_with_advanced_basis(2, 0, @@ -3053,7 +3818,7 @@ node_status_t branch_and_bound_t::solve_node_deterministic( worker.nonbasic_list, worker.leaf_solution, node_iter, - leaf_edge_norms, + worker.leaf_edge_norms, &worker.work_context); if (lp_status == dual::status_t::NUMERICAL) { @@ -3066,18 +3831,24 @@ node_status_t branch_and_bound_t::solve_node_deterministic( worker.basic_list, worker.nonbasic_list, leaf_vstatus, - leaf_edge_norms, + worker.leaf_edge_norms, &worker.work_context); lp_status = convert_lp_status_to_dual_status(second_status); } + double clock_before = worker.clock; double work_performed = worker.work_context.global_work_units_elapsed - work_units_at_start; worker.clock += work_performed; exploration_stats_.total_lp_solve_time += toc(lp_start_time); exploration_stats_.total_lp_iters += node_iter; - ++exploration_stats_.nodes_explored; - --exploration_stats_.nodes_unexplored; + + bool lp_conclusive = + (lp_status != dual::status_t::TIME_LIMIT && lp_status != dual::status_t::WORK_LIMIT); + if (lp_conclusive) { + ++exploration_stats_.nodes_explored; + --exploration_stats_.nodes_unexplored; + } deterministic_bfs_policy_t policy{*this, worker}; auto [status, round_dir] = update_tree_impl(node_ptr, search_tree, &worker, lp_status, policy); @@ -3087,58 +3858,17 @@ node_status_t branch_and_bound_t::solve_node_deterministic( template template -void branch_and_bound_t::deterministic_process_worker_solutions( - PoolT& pool, WorkerTypeGetter get_worker_type) +void branch_and_bound_t::deterministic_collect_worker_solutions( + PoolT& pool, + WorkerTypeGetter get_worker_type, + std::vector::deterministic_replay_solution_t>& + replay_solutions) { - std::vector*> all_solutions; for (auto& worker : pool) { for (auto& sol : worker.integer_solutions) { - all_solutions.push_back(&sol); + const search_strategy_t strategy = get_worker_type(pool, sol.worker_id); + replay_solutions.push_back({std::move(sol), strategy}); } - } - - // relies on queued_integer_solution_t's operator< - // sorts based on objective first, then the tuple - std::sort(all_solutions.begin(), - all_solutions.end(), - [](const queued_integer_solution_t* a, - const queued_integer_solution_t* b) { return *a < *b; }); - - f_t deterministic_lower = deterministic_compute_lower_bound(); - f_t current_upper = upper_bound_.load(); - - for (const auto* sol : all_solutions) { - if (sol->objective < current_upper) { - f_t user_obj = compute_user_objective(original_lp_, sol->objective); - f_t user_lower = compute_user_objective(original_lp_, deterministic_lower); - i_t nodes_explored = exploration_stats_.nodes_explored.load(); - i_t nodes_unexplored = exploration_stats_.nodes_unexplored.load(); - - search_strategy_t worker_type = get_worker_type(pool, sol->worker_id); - report(feasible_solution_symbol(worker_type), - sol->objective, - deterministic_lower, - sol->depth, - 0, - deterministic_current_horizon_); - - bool improved = false; - if (sol->objective < upper_bound_) { - upper_bound_ = sol->objective; - incumbent_.set_incumbent_solution(sol->objective, sol->solution); - current_upper = sol->objective; - improved = true; - } - - if (improved && settings_.solution_callback != nullptr) { - std::vector original_x; - uncrush_primal_solution(original_problem_, original_lp_, sol->solution, original_x); - settings_.solution_callback(original_x, sol->objective); - } - } - } - - for (auto& worker : pool) { worker.integer_solutions.clear(); } } @@ -3148,12 +3878,17 @@ template void branch_and_bound_t::deterministic_merge_pseudo_cost_updates(PoolT& pool) { std::vector> all_pc_updates; + int64_t sb_iter_delta = 0; for (auto& worker : pool) { auto updates = worker.pc_snapshot.take_updates(); all_pc_updates.insert(all_pc_updates.end(), updates.begin(), updates.end()); + int64_t snapshot_sb = worker.pc_snapshot.strong_branching_lp_iter_; + int64_t base_sb = pc_.strong_branching_lp_iter.load(); + if (snapshot_sb > base_sb) { sb_iter_delta += snapshot_sb - base_sb; } } std::sort(all_pc_updates.begin(), all_pc_updates.end()); pc_.merge_updates(all_pc_updates); + if (sb_iter_delta > 0) { pc_.strong_branching_lp_iter += sb_iter_delta; } } template @@ -3164,6 +3899,7 @@ void branch_and_bound_t::deterministic_broadcast_snapshots( deterministic_snapshot_t snap; snap.upper_bound = upper_bound_.load(); snap.total_lp_iters = exploration_stats_.total_lp_iters.load(); + snap.nodes_explored = exploration_stats_.nodes_explored.load(); snap.incumbent = incumbent_snapshot; snap.pc_snapshot = pc_.create_snapshot(); @@ -3174,91 +3910,153 @@ void branch_and_bound_t::deterministic_broadcast_snapshots( template void branch_and_bound_t::deterministic_sort_replay_events( - const bb_event_batch_t& events) + const bb_event_batch_t& events, + std::vector::deterministic_replay_solution_t>& + replay_solutions) { - // Infeasible solutions from GPU heuristics are queued for repair; process them now + // Retire external solutions that have reached the current horizon. Feasibility + // classification and repair happen only here in deterministic mode. { - std::vector> to_repair; - // TODO: support repair queue in deterministic mode - // mutex_repair_.lock(); - // if (repair_queue_.size() > 0) { - // to_repair = repair_queue_; - // repair_queue_.clear(); - // } - // mutex_repair_.unlock(); - - std::sort(to_repair.begin(), - to_repair.end(), - [](const std::vector& a, const std::vector& b) { return a < b; }); - - if (to_repair.size() > 0) { - settings_.log.debug("Deterministic sync: Attempting to repair %ld injected solutions\n", - to_repair.size()); - for (const std::vector& uncrushed_solution : to_repair) { + std::vector due_solutions; + mutex_heuristic_queue_.lock(); + { + std::vector future_solutions; + for (auto& sol : heuristic_solution_queue_) { + if (sol.work_timestamp < deterministic_current_horizon_) { + due_solutions.push_back(std::move(sol)); + } else { + future_solutions.push_back(std::move(sol)); + } + } + heuristic_solution_queue_ = std::move(future_solutions); + } + mutex_heuristic_queue_.unlock(); + + std::sort(due_solutions.begin(), + due_solutions.end(), + [](const queued_external_solution_t& a, const queued_external_solution_t& b) { + if (a.work_timestamp != b.work_timestamp) { + return a.work_timestamp < b.work_timestamp; + } + if (a.user_objective != b.user_objective) { + return a.user_objective < b.user_objective; + } + if (a.origin != b.origin) { return a.origin < b.origin; } + return a.solution < b.solution; + }); + + if (!due_solutions.empty() || !heuristic_solution_queue_.empty()) { + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic sync retire: horizon=%.6f due=%zu future=%zu pre_expl=%.6f\n", + deterministic_current_horizon_, + due_solutions.size(), + heuristic_solution_queue_.size(), + pre_exploration_work_); + for (size_t i = 0; i < due_solutions.size(); ++i) { + CUOPT_DETERMINISM_LOG( + settings_.log, + " due[%zu]: wut=%.6f obj=%g origin=%s\n", + i, + due_solutions[i].work_timestamp, + due_solutions[i].user_objective, + cuopt::internals::mip_solution_origin_to_string(due_solutions[i].origin)); + } + } + if (!due_solutions.empty()) { + CUOPT_DETERMINISM_LOG(settings_.log, + "Deterministic sync: retiring %ld external solutions\n", + due_solutions.size()); + for (const auto& queued_solution : due_solutions) { std::vector crushed_solution; - crush_primal_solution( - original_problem_, original_lp_, uncrushed_solution, new_slacks_, crushed_solution); - std::vector repaired_solution; - f_t repaired_obj; - bool success = - repair_solution(edge_norms_, crushed_solution, repaired_obj, repaired_solution); - if (success) { - // Queue repaired solution with work unit timestamp (...workstamp?) - mutex_heuristic_queue_.lock(); - heuristic_solution_queue_.push_back( - {repaired_obj, std::move(repaired_solution), 0, -1, 0, deterministic_current_horizon_}); - mutex_heuristic_queue_.unlock(); + f_t obj; + bool is_feasible = retire_queued_solution(queued_solution, obj, crushed_solution); + if (is_feasible) { + replay_solutions.push_back({{obj, + std::move(crushed_solution), + 0, + -1, + 0, + queued_solution.work_timestamp, + queued_solution.origin}, + search_strategy_t::BEST_FIRST}); } } } } + if (!replay_solutions.empty() || !heuristic_solution_queue_.empty()) { + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic replay extract: horizon=%.6f now=%zu future=%zu upper=%.16e\n", + deterministic_current_horizon_, + replay_solutions.size(), + heuristic_solution_queue_.size(), + upper_bound_.load()); + } - // Extract heuristic solutions, keeping future solutions for next horizon - // Use deterministic_current_horizon_ as the upper bound (horizon_end) - std::vector> heuristic_solutions; - mutex_heuristic_queue_.lock(); - { - std::vector> future_solutions; - for (auto& sol : heuristic_solution_queue_) { - if (sol.work_timestamp < deterministic_current_horizon_) { - heuristic_solutions.push_back(std::move(sol)); - } else { - future_solutions.push_back(std::move(sol)); - } + // Sort the full replay stream by work unit timestamp, with stable deterministic tie-breakers. + std::sort(replay_solutions.begin(), replay_solutions.end(), [](const auto& a, const auto& b) { + if (a.solution.work_timestamp != b.solution.work_timestamp) { + return a.solution.work_timestamp < b.solution.work_timestamp; } - heuristic_solution_queue_ = std::move(future_solutions); - } - mutex_heuristic_queue_.unlock(); + if (a.solution.objective != b.solution.objective) { + return a.solution.objective < b.solution.objective; + } + if (a.solution.origin != b.solution.origin) { return a.solution.origin < b.solution.origin; } + if (a.solution.worker_id != b.solution.worker_id) { + return a.solution.worker_id < b.solution.worker_id; + } + if (a.solution.sequence_id != b.solution.sequence_id) { + return a.solution.sequence_id < b.solution.sequence_id; + } + return a.solution.solution < b.solution.solution; + }); - // sort by work unit timestamp, with objective and solution values as tie-breakers - std::sort( - heuristic_solutions.begin(), - heuristic_solutions.end(), - [](const queued_integer_solution_t& a, const queued_integer_solution_t& b) { - if (a.work_timestamp != b.work_timestamp) { return a.work_timestamp < b.work_timestamp; } - if (a.objective != b.objective) { return a.objective < b.objective; } - return a.solution < b.solution; // edge-case - lexicographical comparison - }); + f_t deterministic_lower = deterministic_compute_lower_bound(); + f_t current_upper = upper_bound_.load(); + if (deterministic_current_horizon_ <= deterministic_horizon_step_ + 1e-9) { + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic solution replay: candidates=%zu lower=%.16e upper_before=%.16e\n", + replay_solutions.size(), + deterministic_lower, + current_upper); + for (size_t i = 0; i < replay_solutions.size(); ++i) { + const auto& replay = replay_solutions[i]; + const auto& sol = replay.solution; + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic replay solution[%zu]: wut=%.6f obj=%.16e origin=%s worker=%d seq=%d " + "depth=%d sol_hash=0x%x\n", + i, + sol.work_timestamp, + sol.objective, + cuopt::internals::mip_solution_origin_to_string(sol.origin), + sol.worker_id, + sol.sequence_id, + sol.depth, + detail::compute_hash(sol.solution)); + } + } - // Merge B&B events and heuristic solutions for unified timeline replay - size_t event_idx = 0; - size_t heuristic_idx = 0; + // Merge B&B events and all incumbent-producing solutions for unified timeline replay. + size_t event_idx = 0; + size_t solution_idx = 0; - while (event_idx < events.events.size() || heuristic_idx < heuristic_solutions.size()) { - bool process_event = false; - bool process_heuristic = false; + while (event_idx < events.events.size() || solution_idx < replay_solutions.size()) { + bool process_event = false; + bool process_solution = false; if (event_idx >= events.events.size()) { - process_heuristic = true; - } else if (heuristic_idx >= heuristic_solutions.size()) { + process_solution = true; + } else if (solution_idx >= replay_solutions.size()) { process_event = true; } else { - // Both have items - pick the one with smaller WUT if (events.events[event_idx].work_timestamp <= - heuristic_solutions[heuristic_idx].work_timestamp) { + replay_solutions[solution_idx].solution.work_timestamp) { process_event = true; } else { - process_heuristic = true; + process_solution = true; } } @@ -3273,43 +4071,72 @@ void branch_and_bound_t::deterministic_sort_replay_events( } } - if (process_heuristic) { - const auto& hsol = heuristic_solutions[heuristic_idx++]; - - CUOPT_LOG_TRACE( - "Deterministic sync: Heuristic solution received at WUT %f with objective %g, current " - "horizon %f", - hsol.work_timestamp, - hsol.objective, - deterministic_current_horizon_); + if (process_solution) { + const auto& replay = replay_solutions[solution_idx++]; + const auto& sol = replay.solution; + bool improved = false; - // Process heuristic solution at its correct work unit timestamp position - f_t new_upper = std::numeric_limits::infinity(); - - if (hsol.objective < upper_bound_) { - upper_bound_ = hsol.objective; - incumbent_.set_incumbent_solution(hsol.objective, hsol.solution); - new_upper = hsol.objective; + if (sol.objective < upper_bound_) { + const f_t previous_upper = upper_bound_; + upper_bound_ = sol.objective; + incumbent_.set_incumbent_solution(sol.objective, sol.solution); + current_upper = sol.objective; + improved = true; + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic B&B incumbent update: source=det_replay prev_upper=%.16e " + "new_upper=%.16e obj=%.16e hash=0x%x worker=%d seq=%d wut=%.6f horizon=%.6f\n", + previous_upper, + upper_bound_.load(), + sol.objective, + detail::compute_hash(sol.solution), + sol.worker_id, + sol.sequence_id, + sol.work_timestamp, + deterministic_current_horizon_); } - - if (new_upper < std::numeric_limits::infinity()) { - report_heuristic(new_upper); - - if (settings_.solution_callback != nullptr) { - std::vector original_x; - uncrush_primal_solution(original_problem_, original_lp_, hsol.solution, original_x); - settings_.solution_callback(original_x, hsol.objective); + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic replay: horizon=%.6f wut=%.6f obj=%.16e origin=%s accepted=%d " + "upper_now=%.16e worker=%d seq=%d sol_hash=0x%x\n", + deterministic_current_horizon_, + sol.work_timestamp, + sol.objective, + cuopt::internals::mip_solution_origin_to_string(sol.origin), + (int)improved, + current_upper, + sol.worker_id, + sol.sequence_id, + detail::compute_hash(sol.solution)); + + if (improved) { + CUOPT_DETERMINISM_LOG( + settings_.log, + "Deterministic replay PUBLISH: horizon=%.6f wut=%.6f obj=%g origin=%s worker=%d " + "upper_after=%.16e\n", + deterministic_current_horizon_, + sol.work_timestamp, + compute_user_objective(original_lp_, sol.objective), + cuopt::internals::mip_solution_origin_to_string(sol.origin), + sol.worker_id, + current_upper); + if (sol.origin == cuopt::internals::mip_solution_origin_t::BRANCH_AND_BOUND_NODE || + sol.origin == cuopt::internals::mip_solution_origin_t::BRANCH_AND_BOUND_DIVING) { + report(feasible_solution_symbol(replay.strategy), + sol.objective, + deterministic_lower, + sol.depth, + 0, + deterministic_current_horizon_); + } else { + report_heuristic(sol.objective, sol.work_timestamp); } + emit_solution_callback_from_crushed( + sol.solution, sol.objective, sol.origin, sol.work_timestamp); } } } - // Merge integer solutions from BFS workers and update global incumbent - deterministic_process_worker_solutions(*deterministic_workers_, - [](const deterministic_bfs_worker_pool_t&, int) { - return search_strategy_t::BEST_FIRST; - }); - // Merge and apply pseudo-cost updates from BFS workers deterministic_merge_pseudo_cost_updates(*deterministic_workers_); @@ -3365,52 +4192,44 @@ void branch_and_bound_t::deterministic_balance_worker_loads() constexpr bool force_rebalance_every_sync = false; - // Count work for each worker: current_node (if any) + plunge_stack + backlog - std::vector work_counts(num_workers); - size_t total_work = 0; - size_t max_work = 0; - size_t min_work = std::numeric_limits::max(); + std::vector backlog_counts(num_workers); + size_t total_backlog = 0; + size_t max_backlog = 0; + size_t min_backlog = std::numeric_limits::max(); for (size_t w = 0; w < num_workers; ++w) { - auto& worker = (*deterministic_workers_)[w]; - work_counts[w] = worker.queue_size(); - total_work += work_counts[w]; - max_work = std::max(max_work, work_counts[w]); - min_work = std::min(min_work, work_counts[w]); + auto& worker = (*deterministic_workers_)[w]; + backlog_counts[w] = worker.backlog.size(); + total_backlog += backlog_counts[w]; + max_backlog = std::max(max_backlog, backlog_counts[w]); + min_backlog = std::min(min_backlog, backlog_counts[w]); } - if (total_work == 0) return; + if (total_backlog == 0) return; bool needs_balance; if (force_rebalance_every_sync) { - needs_balance = (total_work > 1); + needs_balance = (total_backlog > 1); } else { - needs_balance = (min_work == 0 && max_work >= 2) || (min_work > 0 && max_work > 4 * min_work); + needs_balance = + (min_backlog == 0 && max_backlog >= 2) || (min_backlog > 0 && max_backlog > 4 * min_backlog); } if (!needs_balance) return; - std::vector*> all_nodes; + std::vector*> all_backlog_nodes; for (auto& worker : *deterministic_workers_) { for (auto* node : worker.backlog.data()) { - all_nodes.push_back(node); + all_backlog_nodes.push_back(node); } worker.backlog.clear(); } - if (all_nodes.empty()) return; - - auto deterministic_less = [](const mip_node_t* a, const mip_node_t* b) { - if (a->origin_worker_id != b->origin_worker_id) { - return a->origin_worker_id < b->origin_worker_id; - } - return a->creation_seq < b->creation_seq; - }; - std::sort(all_nodes.begin(), all_nodes.end(), deterministic_less); + if (all_backlog_nodes.empty()) return; - // Distribute nodes - for (size_t i = 0; i < all_nodes.size(); ++i) { + // Round-robin distribute into backlogs; priority queue handles ordering internally + for (size_t i = 0; i < all_backlog_nodes.size(); ++i) { size_t worker_idx = i % num_workers; - (*deterministic_workers_)[worker_idx].enqueue_node(all_nodes[i]); + (*deterministic_workers_)[worker_idx].backlog.push(all_backlog_nodes[i]); } } @@ -3530,16 +4349,18 @@ void branch_and_bound_t::deterministic_assign_diving_nodes() } template -void branch_and_bound_t::deterministic_collect_diving_solutions_and_update_pseudocosts() +void branch_and_bound_t::deterministic_collect_diving_solutions_and_update_pseudocosts( + std::vector::deterministic_replay_solution_t>& + replay_solutions) { if (!deterministic_diving_workers_) return; - // Collect integer solutions from diving workers and update global incumbent - deterministic_process_worker_solutions( + deterministic_collect_worker_solutions( *deterministic_diving_workers_, [](const deterministic_diving_worker_pool_t& pool, int worker_id) { return pool[worker_id].diving_type; - }); + }, + replay_solutions); // Merge pseudo-cost updates from diving workers deterministic_merge_pseudo_cost_updates(*deterministic_diving_workers_); @@ -3618,9 +4439,15 @@ void branch_and_bound_t::deterministic_dive( // Setup LP settings simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); - lp_settings.cut_off = worker.local_upper_bound + settings_.dual_tol; + if (original_lp_.objective_is_integral) { + lp_settings.cut_off = + std::ceil(worker.local_upper_bound - settings_.integer_tol) + settings_.dual_tol; + } else { + lp_settings.cut_off = worker.local_upper_bound + settings_.dual_tol; + } lp_settings.inside_mip = 2; lp_settings.time_limit = remaining_time; + lp_settings.work_limit = std::numeric_limits::infinity(); lp_settings.scale_columns = false; #ifndef DETERMINISM_DISABLE_BOUNDS_STRENGTHENING @@ -3628,7 +4455,6 @@ void branch_and_bound_t::deterministic_dive( lp_settings, worker.bounds_changed, worker.leaf_problem.lower, worker.leaf_problem.upper); if (settings_.deterministic) { - // TEMP APPROXIMATION; worker.work_context.record_work_sync_on_horizon(worker.node_presolver.last_nnz_processed / 1e8); } @@ -3682,17 +4508,17 @@ void branch_and_bound_t::deterministic_dive( lp_status = convert_lp_status_to_dual_status(second_status); } - ++nodes_this_dive; - ++worker.total_nodes_explored; worker.lp_iters_this_dive += node_iter; - - worker.clock = worker.work_context.global_work_units_elapsed; + worker.clock = pre_exploration_work_ + worker.work_context.global_work_units_elapsed; if (lp_status == dual::status_t::TIME_LIMIT || lp_status == dual::status_t::WORK_LIMIT || lp_status == dual::status_t::ITERATION_LIMIT) { break; } + ++nodes_this_dive; + ++worker.total_nodes_explored; + deterministic_diving_policy_t policy{*this, worker, stack, max_backtrack_depth}; update_tree_impl(node_ptr, dive_tree, &worker, lp_status, policy); } diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index a13d5cedcf..ad8a0a88bd 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -32,9 +33,19 @@ #include +#include +#include #include +#include +#include +#include #include +namespace cuopt::linear_programming::detail { +template +struct clique_table_t; +} + namespace cuopt::linear_programming::dual_simplex { enum class mip_status_t { @@ -68,7 +79,8 @@ class branch_and_bound_t { public: branch_and_bound_t(const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings, - f_t start_time); + f_t start_time, + std::shared_ptr> clique_table = nullptr); // Set an initial guess based on the user_problem. This should be called before solve. void set_initial_guess(const std::vector& user_guess) { guess_ = user_guess; } @@ -97,7 +109,11 @@ class branch_and_bound_t { void set_new_solution(const std::vector& solution); // This queues the solution to be processed at the correct work unit timestamp - void queue_external_solution_deterministic(const std::vector& solution, double work_unit_ts); + void queue_external_solution_deterministic(const std::vector& solution, + f_t user_objective, + double work_unit_ts, + cuopt::internals::mip_solution_origin_t origin = + cuopt::internals::mip_solution_origin_t::UNKNOWN); void set_user_bound_callback(std::function callback) { @@ -106,8 +122,6 @@ class branch_and_bound_t { void set_concurrent_lp_root_solve(bool enable) { enable_concurrent_lp_root_solve_ = enable; } - bool stop_for_time_limit(mip_solution_t& solution); - // Repair a low-quality solution from the heuristics. bool repair_solution(const std::vector& leaf_edge_norms, const std::vector& potential_solution, @@ -115,6 +129,7 @@ class branch_and_bound_t { std::vector& repaired_solution) const; f_t get_lower_bound(); + f_t get_upper_bound() const { return upper_bound_; } bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; } @@ -138,11 +153,24 @@ class branch_and_bound_t { // Get producer sync for external heuristics (e.g., CPUFJ) to register producer_sync_t& get_producer_sync() { return producer_sync_; } + void wait_for_exploration_start() + { + std::unique_lock lock(exploration_started_mutex_); + exploration_started_cv_.wait(lock, [this] { return exploration_started_.load(); }); + } + private: const user_problem_t& original_problem_; const simplex_solver_settings_t settings_; + std::shared_ptr> clique_table_; + std::future>> clique_table_future_; + std::atomic signal_extend_cliques_{false}; work_limit_context_t work_unit_context_{"B&B"}; + double pre_exploration_work_{0.0}; + std::atomic exploration_started_{false}; + std::mutex exploration_started_mutex_; + std::condition_variable exploration_started_cv_; // Initial guess. std::vector guess_; @@ -180,7 +208,13 @@ class branch_and_bound_t { // Mutex for repair omp_mutex_t mutex_repair_; - std::vector> repair_queue_; + struct queued_repair_solution_t { + std::vector solution; + cuopt::internals::mip_solution_origin_t origin{ + cuopt::internals::mip_solution_origin_t::UNKNOWN}; + double work_timestamp{-1.0}; + }; + std::vector repair_queue_; // Variables for the root node in the search tree. std::vector root_vstatus_; @@ -190,6 +224,8 @@ class branch_and_bound_t { lp_solution_t root_crossover_soln_; std::vector edge_norms_; std::atomic root_crossover_solution_set_{false}; + omp_atomic_t root_lp_current_lower_bound_; + omp_atomic_t solving_root_relaxation_{false}; bool enable_concurrent_lp_root_solve_{false}; std::atomic root_concurrent_halt_{0}; bool is_root_solution_set{false}; @@ -224,13 +260,21 @@ class branch_and_bound_t { omp_atomic_t lower_bound_ceiling_; std::function user_bound_callback_; - void report_heuristic(f_t obj); + void report_heuristic(f_t obj, double work_time = -1.0); void report(char symbol, f_t obj, f_t lower_bound, i_t node_depth, i_t node_int_infeas, double work_time = -1); + void emit_solution_callback(std::vector& original_x, + f_t objective, + cuopt::internals::mip_solution_origin_t origin, + double work_timestamp); + void emit_solution_callback_from_crushed(const std::vector& crushed_solution, + f_t objective, + cuopt::internals::mip_solution_origin_t origin, + double work_timestamp); // Set the solution when found at the root node void set_solution_at_root(mip_solution_t& solution, @@ -303,7 +347,14 @@ class branch_and_bound_t { void run_deterministic_coordinator(const csr_matrix_t& Arow); // Gather all events generated, sort by WU timestamp, apply - void deterministic_sort_replay_events(const bb_event_batch_t& events); + struct deterministic_replay_solution_t { + queued_integer_solution_t solution; + search_strategy_t strategy{search_strategy_t::BEST_FIRST}; + }; + + void deterministic_sort_replay_events( + const bb_event_batch_t& events, + std::vector& replay_solutions); // Prune nodes held by workers based on new incumbent void deterministic_prune_worker_nodes_vs_incumbent(); @@ -336,10 +387,14 @@ class branch_and_bound_t { void deterministic_assign_diving_nodes(); // Collect and merge diving solutions at sync - void deterministic_collect_diving_solutions_and_update_pseudocosts(); + void deterministic_collect_diving_solutions_and_update_pseudocosts( + std::vector& replay_solutions); template - void deterministic_process_worker_solutions(PoolT& pool, WorkerTypeGetter get_worker_type); + void deterministic_collect_worker_solutions( + PoolT& pool, + WorkerTypeGetter get_worker_type, + std::vector& replay_solutions); template void deterministic_merge_pseudo_cost_updates(PoolT& pool); @@ -370,10 +425,23 @@ class branch_and_bound_t { double max_producer_wait_time_{0.0}; i_t producer_wait_count_{0}; - // Determinism heuristic solution queue - solutions received from GPU heuristics - // Stored with work unit timestamp for deterministic ordering + struct queued_external_solution_t { + std::vector solution; + f_t user_objective{std::numeric_limits::infinity()}; + double work_timestamp{0.0}; + cuopt::internals::mip_solution_origin_t origin{ + cuopt::internals::mip_solution_origin_t::UNKNOWN}; + }; + + bool retire_queued_solution(const queued_external_solution_t& queued_solution, + f_t& out_obj, + std::vector& out_crushed); + + // Deterministic pending external solution queue. + // External solutions stay raw until their retirement horizon, where they are + // crushed, checked, and repaired immediately if needed. omp_mutex_t mutex_heuristic_queue_; - std::vector> heuristic_solution_queue_; + std::vector heuristic_solution_queue_; // ============================================================================ // Determinism Diving state diff --git a/cpp/src/branch_and_bound/deterministic_workers.hpp b/cpp/src/branch_and_bound/deterministic_workers.hpp index 7a074051c6..f6f9a1bd51 100644 --- a/cpp/src/branch_and_bound/deterministic_workers.hpp +++ b/cpp/src/branch_and_bound/deterministic_workers.hpp @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -44,6 +45,8 @@ struct queued_integer_solution_t { int worker_id{-1}; int sequence_id{0}; double work_timestamp{0.0}; + cuopt::internals::mip_solution_origin_t origin{ + cuopt::internals::mip_solution_origin_t::BRANCH_AND_BOUND_NODE}; bool operator<(const queued_integer_solution_t& other) const { @@ -59,6 +62,7 @@ struct deterministic_snapshot_t { pseudo_cost_snapshot_t pc_snapshot; std::vector incumbent; i_t total_lp_iters; + i_t nodes_explored; }; template @@ -75,6 +79,7 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t { // Diving-specific snapshots (ignored by BFS workers) std::vector incumbent_snapshot; i_t total_lp_iters_snapshot{0}; + i_t nodes_explored_snapshot{0}; std::vector> integer_solutions; int next_solution_seq{0}; @@ -101,6 +106,7 @@ class deterministic_worker_base_t : public branch_and_bound_worker_t { pc_snapshot = snap.pc_snapshot; incumbent_snapshot = snap.incumbent; total_lp_iters_snapshot = snap.total_lp_iters; + nodes_explored_snapshot = snap.nodes_explored; } bool has_work() const { return static_cast(this)->has_work_impl(); } @@ -158,11 +164,6 @@ class deterministic_bfs_worker_t mip_node_t* up_child, rounding_direction_t preferred_direction) { - if (!plunge_stack.empty()) { - backlog.push(plunge_stack.back()); - plunge_stack.pop_back(); - } - down_child->origin_worker_id = this->worker_id; down_child->creation_seq = next_creation_seq++; up_child->origin_worker_id = this->worker_id; @@ -170,11 +171,11 @@ class deterministic_bfs_worker_t mip_node_t* first_child; if (preferred_direction == rounding_direction_t::UP) { - plunge_stack.push_front(down_child); + backlog.push(down_child); plunge_stack.push_front(up_child); first_child = up_child; } else { - plunge_stack.push_front(up_child); + backlog.push(up_child); plunge_stack.push_front(down_child); first_child = down_child; } @@ -288,6 +289,7 @@ class deterministic_diving_worker_t // Diving statistics i_t total_nodes_explored{0}; + i_t nodes_explored_last_sync{0}; i_t total_dives{0}; i_t lp_iters_this_dive{0}; @@ -339,7 +341,13 @@ class deterministic_diving_worker_t void queue_integer_solution(f_t objective, const std::vector& solution, i_t depth) { this->integer_solutions.push_back( - {objective, solution, depth, this->worker_id, this->next_solution_seq++}); + {objective, + solution, + depth, + this->worker_id, + this->next_solution_seq++, + this->clock, + cuopt::internals::mip_solution_origin_t::BRANCH_AND_BOUND_DIVING}); ++this->total_integer_solutions; } diff --git a/cpp/src/branch_and_bound/pseudo_costs.cpp b/cpp/src/branch_and_bound/pseudo_costs.cpp index ee7e2f7803..7be411ac7b 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.cpp +++ b/cpp/src/branch_and_bound/pseudo_costs.cpp @@ -34,7 +34,8 @@ void strong_branch_helper(i_t start, const std::vector& root_soln, const std::vector& root_vstatus, const std::vector& edge_norms, - pseudo_costs_t& pc) + pseudo_costs_t& pc, + cuopt::work_limit_context_t* work_unit_context) { raft::common::nvtx::range scope("BB::strong_branch_helper"); lp_problem_t child_problem = original_lp; @@ -74,7 +75,8 @@ void strong_branch_helper(i_t start, vstatus, solution, iter, - child_edge_norms); + child_edge_norms, + work_unit_context); f_t obj = std::numeric_limits::quiet_NaN(); if (status == dual::status_t::DUAL_UNBOUNDED) { @@ -216,8 +218,276 @@ f_t trial_branching(const lp_problem_t& original_lp, } } +template +f_t trial_branching_generic(const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + i_t branch_var, + f_t branch_var_lower, + f_t branch_var_upper, + f_t upper_bound, + i_t bnb_lp_iter_per_node, + f_t start_time, + i_t upper_max_lp_iter, + i_t lower_max_lp_iter, + omp_atomic_t& total_lp_iter) +{ + return trial_branching(original_lp, + settings, + var_types, + vstatus, + edge_norms, + basis_factors, + basic_list, + nonbasic_list, + branch_var, + branch_var_lower, + branch_var_upper, + upper_bound, + bnb_lp_iter_per_node, + start_time, + upper_max_lp_iter, + lower_max_lp_iter, + total_lp_iter); +} + +template +f_t trial_branching_generic(const lp_problem_t& original_lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& vstatus, + const std::vector& edge_norms, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + i_t branch_var, + f_t branch_var_lower, + f_t branch_var_upper, + f_t upper_bound, + i_t bnb_lp_iter_per_node, + f_t start_time, + i_t upper_max_lp_iter, + i_t lower_max_lp_iter, + int64_t& total_lp_iter) +{ + omp_atomic_t atomic_iter{0}; + f_t result = trial_branching(original_lp, + settings, + var_types, + vstatus, + edge_norms, + basis_factors, + basic_list, + nonbasic_list, + branch_var, + branch_var_lower, + branch_var_upper, + upper_bound, + bnb_lp_iter_per_node, + start_time, + upper_max_lp_iter, + lower_max_lp_iter, + atomic_iter); + total_lp_iter += atomic_iter.load(); + return result; +} + } // namespace +template +i_t reliable_variable_selection_core(mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const lp_problem_t& leaf_problem, + const std::vector& edge_norms, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + SumT* sum_down, + SumT* sum_up, + CountT* num_down, + CountT* num_up, + i_t n_vars, + SBIterT& strong_branching_lp_iter, + f_t upper_bound, + int64_t bnb_lp_iters, + int64_t bnb_nodes_explored, + f_t start_time, + const reliability_branching_settings_t& rb_settings, + int num_tasks, + omp_mutex_t* var_mutex_down, + omp_mutex_t* var_mutex_up, + pcgenerator_t* rng) +{ + constexpr f_t eps = 1e-6; + i_t branch_var = fractional[0]; + f_t max_score = -1; + + auto avgs = compute_pseudo_cost_averages(sum_down, sum_up, num_down, num_up, (size_t)n_vars); + f_t pseudo_cost_down_avg = avgs.down_avg; + f_t pseudo_cost_up_avg = avgs.up_avg; + + const i_t bnb_lp_iter_per_node = + bnb_nodes_explored > 0 ? (i_t)(bnb_lp_iters / bnb_nodes_explored) : 0; + + i_t reliable_threshold = settings.reliability_branching; + if (reliable_threshold < 0) { + const int64_t alpha = (int64_t)(rb_settings.bnb_lp_factor * bnb_lp_iters); + const int64_t max_reliability_iter = alpha + rb_settings.bnb_lp_offset; + + f_t iter_fraction = + (max_reliability_iter - strong_branching_lp_iter) / (strong_branching_lp_iter + 1.0); + iter_fraction = std::min(1.0, iter_fraction); + iter_fraction = std::max((alpha - strong_branching_lp_iter) / (strong_branching_lp_iter + 1.0), + iter_fraction); + reliable_threshold = (int)((1 - iter_fraction) * rb_settings.min_reliable_threshold + + iter_fraction * rb_settings.max_reliable_threshold); + reliable_threshold = strong_branching_lp_iter < max_reliability_iter ? reliable_threshold : 0; + } + + std::vector unreliable_list; + + for (i_t j : fractional) { + if (num_down[j] < reliable_threshold || num_up[j] < reliable_threshold) { + unreliable_list.push_back(j); + continue; + } + f_t pc_down = num_down[j] > 0 ? sum_down[j] / num_down[j] : pseudo_cost_down_avg; + f_t pc_up = num_up[j] > 0 ? sum_up[j] / num_up[j] : pseudo_cost_up_avg; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + if (score > max_score) { + max_score = score; + branch_var = j; + } + } + + if (unreliable_list.empty()) { return branch_var; } + + const i_t max_num_candidates = rb_settings.max_num_candidates; + const i_t num_candidates = std::min(unreliable_list.size(), max_num_candidates); + + settings.log.debug( + "Reliability branching: node=%d depth=%d fractional=%zu unreliable=%zu candidates=%d " + "threshold=%d sb_iters=%lld bnb_iters=%lld explored=%lld tasks=%d\n", + node_ptr->node_id, + node_ptr->depth, + fractional.size(), + unreliable_list.size(), + num_candidates, + reliable_threshold, + (long long)strong_branching_lp_iter, + (long long)bnb_lp_iters, + (long long)bnb_nodes_explored, + num_tasks); + + if (rng != nullptr && unreliable_list.size() > (size_t)max_num_candidates) { + rng->shuffle(unreliable_list); + } + + if (toc(start_time) > settings.time_limit) { return branch_var; } + + omp_mutex_t score_mutex; + const int task_priority = rb_settings.task_priority; + +#pragma omp taskloop if (num_tasks > 1) priority(task_priority) num_tasks(num_tasks) \ + shared(score_mutex, strong_branching_lp_iter) + for (i_t i = 0; i < num_candidates; ++i) { + const i_t j = unreliable_list[i]; + + if (toc(start_time) > settings.time_limit) { continue; } + + if (var_mutex_down) { var_mutex_down[j].lock(); } + if (num_down[j] < reliable_threshold) { + f_t obj = trial_branching_generic(leaf_problem, + settings, + var_types, + node_ptr->vstatus, + edge_norms, + basis_factors, + basic_list, + nonbasic_list, + j, + leaf_problem.lower[j], + std::floor(solution[j]), + upper_bound, + bnb_lp_iter_per_node, + start_time, + rb_settings.upper_max_lp_iter, + rb_settings.lower_max_lp_iter, + strong_branching_lp_iter); + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = solution[j] - std::floor(solution[j]); + sum_down[j] += change_in_obj / change_in_x; + num_down[j]++; + } + } + if (var_mutex_down) { var_mutex_down[j].unlock(); } + + if (toc(start_time) > settings.time_limit) { continue; } + + if (var_mutex_up) { var_mutex_up[j].lock(); } + if (num_up[j] < reliable_threshold) { + f_t obj = trial_branching_generic(leaf_problem, + settings, + var_types, + node_ptr->vstatus, + edge_norms, + basis_factors, + basic_list, + nonbasic_list, + j, + std::ceil(solution[j]), + leaf_problem.upper[j], + upper_bound, + bnb_lp_iter_per_node, + start_time, + rb_settings.upper_max_lp_iter, + rb_settings.lower_max_lp_iter, + strong_branching_lp_iter); + if (!std::isnan(obj)) { + f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); + f_t change_in_x = std::ceil(solution[j]) - solution[j]; + sum_up[j] += change_in_obj / change_in_x; + num_up[j]++; + } + } + if (var_mutex_up) { var_mutex_up[j].unlock(); } + + if (toc(start_time) > settings.time_limit) { continue; } + + f_t pc_down = num_down[j] > 0 ? sum_down[j] / num_down[j] : pseudo_cost_down_avg; + f_t pc_up = num_up[j] > 0 ? sum_up[j] / num_up[j] : pseudo_cost_up_avg; + f_t f_down = solution[j] - std::floor(solution[j]); + f_t f_up = std::ceil(solution[j]) - solution[j]; + f_t score = std::max(f_down * pc_down, eps) * std::max(f_up * pc_up, eps); + + score_mutex.lock(); + if (score > max_score) { + max_score = score; + branch_var = j; + } + score_mutex.unlock(); + } + + settings.log.debug("Reliability branching result: node=%d branch_var=%d value=%e score=%e\n", + node_ptr->node_id, + branch_var, + solution[branch_var], + max_score); + + return branch_var; +} + template static cuopt::mps_parser::mps_data_model_t simplex_problem_to_mps_data_model( const dual_simplex::user_problem_t& user_problem) @@ -307,7 +577,8 @@ void strong_branching(const user_problem_t& original_problem, f_t root_obj, const std::vector& root_vstatus, const std::vector& edge_norms, - pseudo_costs_t& pc) + pseudo_costs_t& pc, + cuopt::work_limit_context_t* work_unit_context) { pc.resize(original_lp.num_cols); pc.strong_branch_down.assign(fractional.size(), 0); @@ -400,16 +671,25 @@ void strong_branching(const user_problem_t& original_problem, fractional.size()); f_t strong_branching_start_time = tic(); + const bool use_work_accounting = work_unit_context && work_unit_context->deterministic; + // More tasks than threads in order to allow for dynamic load balancing through OpenMP. + // work context accounting needs to be one on a task basis to avoid + // nondeterminism, because openmp is free to schedule threads as it likes. + const i_t n_tasks = std::min(4 * settings.num_threads, fractional.size()); + std::vector task_work_contexts; + if (use_work_accounting) { + for (i_t k = 0; k < n_tasks; ++k) { + task_work_contexts.emplace_back("sb_task_" + std::to_string(k)); + task_work_contexts.back().deterministic = true; + } + } + #pragma omp parallel num_threads(settings.num_threads) { - i_t n = std::min(4 * settings.num_threads, fractional.size()); - - // Here we are creating more tasks than the number of threads - // such that they can be scheduled dynamically to the threads. #pragma omp for schedule(dynamic, 1) - for (i_t k = 0; k < n; k++) { - i_t start = std::floor(k * fractional.size() / n); - i_t end = std::floor((k + 1) * fractional.size() / n); + for (i_t k = 0; k < n_tasks; k++) { + i_t start = std::floor(k * fractional.size() / n_tasks); + i_t end = std::floor((k + 1) * fractional.size() / n_tasks); constexpr bool verbose = false; if (verbose) { @@ -421,6 +701,9 @@ void strong_branching(const user_problem_t& original_problem, end - start); } + cuopt::work_limit_context_t* task_ctx = + use_work_accounting ? &task_work_contexts[k] : nullptr; + strong_branch_helper(start, end, start_time, @@ -432,9 +715,21 @@ void strong_branching(const user_problem_t& original_problem, root_soln, root_vstatus, edge_norms, - pc); + pc, + task_ctx); } } + + // record pre-exploration work by taking the max work performed by any task + // important to aggregate by task and not thread, as openmp uses dynamic scheduling here + if (use_work_accounting) { + double max_work = 0.0; + for (auto& ctx : task_work_contexts) { + max_work = std::max(max_work, ctx.current_work()); + } + work_unit_context->record_work_sync_on_horizon(max_work); + } + settings.log.printf("Strong branching completed in %.2fs\n", toc(strong_branching_start_time)); } @@ -540,181 +835,31 @@ i_t pseudo_costs_t::reliable_variable_selection( int max_num_tasks, logger_t& log) { - constexpr f_t eps = 1e-6; - f_t start_time = bnb_stats.start_time; - i_t branch_var = fractional[0]; - f_t max_score = -1; - i_t num_initialized_down; - i_t num_initialized_up; - f_t pseudo_cost_down_avg; - f_t pseudo_cost_up_avg; - - initialized(num_initialized_down, num_initialized_up, pseudo_cost_down_avg, pseudo_cost_up_avg); - - log.printf("PC: num initialized down %d up %d avg down %e up %e\n", - num_initialized_down, - num_initialized_up, - pseudo_cost_down_avg, - pseudo_cost_up_avg); - - const int64_t branch_and_bound_lp_iters = bnb_stats.total_lp_iters; - const int64_t branch_and_bound_explored = bnb_stats.nodes_explored; - const i_t branch_and_bound_lp_iter_per_node = - branch_and_bound_lp_iters / bnb_stats.nodes_explored; - - i_t reliable_threshold = settings.reliability_branching; - if (reliable_threshold < 0) { - const i_t max_threshold = reliability_branching_settings.max_reliable_threshold; - const i_t min_threshold = reliability_branching_settings.min_reliable_threshold; - const f_t iter_factor = reliability_branching_settings.bnb_lp_factor; - const i_t iter_offset = reliability_branching_settings.bnb_lp_offset; - const int64_t alpha = iter_factor * branch_and_bound_lp_iters; - const int64_t max_reliability_iter = alpha + reliability_branching_settings.bnb_lp_offset; - - f_t iter_fraction = - (max_reliability_iter - strong_branching_lp_iter) / (strong_branching_lp_iter + 1.0); - iter_fraction = std::min(1.0, iter_fraction); - iter_fraction = std::max((alpha - strong_branching_lp_iter) / (strong_branching_lp_iter + 1.0), - iter_fraction); - reliable_threshold = (1 - iter_fraction) * min_threshold + iter_fraction * max_threshold; - reliable_threshold = strong_branching_lp_iter < max_reliability_iter ? reliable_threshold : 0; - } - - std::vector unreliable_list; - omp_mutex_t score_mutex; - - for (i_t j : fractional) { - if (pseudo_cost_num_down[j] < reliable_threshold || - pseudo_cost_num_up[j] < reliable_threshold) { - unreliable_list.push_back(j); - continue; - } - - f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); - - if (score > max_score) { - max_score = score; - branch_var = j; - } - } - - if (unreliable_list.empty()) { - log.printf( - "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], max_score); - - return branch_var; - } - - const int num_tasks = std::max(max_num_tasks, 1); - const int task_priority = reliability_branching_settings.task_priority; - const i_t max_num_candidates = reliability_branching_settings.max_num_candidates; - const i_t num_candidates = std::min(unreliable_list.size(), max_num_candidates); - - assert(task_priority > 0); - assert(max_num_candidates > 0); - assert(num_candidates > 0); - assert(num_tasks > 0); - - log.printf( - "RB iters = %d, B&B iters = %d, unreliable = %d, num_tasks = %d, reliable_threshold = %d\n", - strong_branching_lp_iter.load(), - branch_and_bound_lp_iters, - unreliable_list.size(), - num_tasks, - reliable_threshold); - - // Shuffle the unreliable list so every variable has the same chance to be selected. - if (unreliable_list.size() > max_num_candidates) { worker->rng.shuffle(unreliable_list); } - - if (toc(start_time) > settings.time_limit) { - log.printf("Time limit reached"); - return branch_var; - } - -#pragma omp taskloop if (num_tasks > 1) priority(task_priority) num_tasks(num_tasks) \ - shared(score_mutex) - for (i_t i = 0; i < num_candidates; ++i) { - const i_t j = unreliable_list[i]; - - if (toc(start_time) > settings.time_limit) { continue; } - - pseudo_cost_mutex_down[j].lock(); - if (pseudo_cost_num_down[j] < reliable_threshold) { - // Do trial branching on the down branch - f_t obj = trial_branching(worker->leaf_problem, - settings, - var_types, - node_ptr->vstatus, - worker->leaf_edge_norms, - worker->basis_factors, - worker->basic_list, - worker->nonbasic_list, - j, - worker->leaf_problem.lower[j], - std::floor(solution[j]), - upper_bound, - branch_and_bound_lp_iter_per_node, - start_time, - reliability_branching_settings.upper_max_lp_iter, - reliability_branching_settings.lower_max_lp_iter, - strong_branching_lp_iter); - - if (!std::isnan(obj)) { - f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); - f_t change_in_x = solution[j] - std::floor(solution[j]); - pseudo_cost_sum_down[j] += change_in_obj / change_in_x; - pseudo_cost_num_down[j]++; - } - } - pseudo_cost_mutex_down[j].unlock(); - - if (toc(start_time) > settings.time_limit) { continue; } - - pseudo_cost_mutex_up[j].lock(); - if (pseudo_cost_num_up[j] < reliable_threshold) { - f_t obj = trial_branching(worker->leaf_problem, - settings, - var_types, - node_ptr->vstatus, - worker->leaf_edge_norms, - worker->basis_factors, - worker->basic_list, - worker->nonbasic_list, - j, - std::ceil(solution[j]), - worker->leaf_problem.upper[j], - upper_bound, - branch_and_bound_lp_iter_per_node, - start_time, - reliability_branching_settings.upper_max_lp_iter, - reliability_branching_settings.lower_max_lp_iter, - strong_branching_lp_iter); - - if (!std::isnan(obj)) { - f_t change_in_obj = std::max(obj - node_ptr->lower_bound, eps); - f_t change_in_x = std::ceil(solution[j]) - solution[j]; - pseudo_cost_sum_up[j] += change_in_obj / change_in_x; - pseudo_cost_num_up[j]++; - } - } - pseudo_cost_mutex_up[j].unlock(); - - if (toc(start_time) > settings.time_limit) { continue; } - - f_t score = calculate_pseudocost_score(j, solution, pseudo_cost_up_avg, pseudo_cost_down_avg); - - score_mutex.lock(); - if (score > max_score) { - max_score = score; - branch_var = j; - } - score_mutex.unlock(); - } - - log.printf( - "pc branching on %d. Value %e. Score %e\n", branch_var, solution[branch_var], max_score); - - return branch_var; + return reliable_variable_selection_core(node_ptr, + fractional, + solution, + settings, + var_types, + worker->leaf_problem, + worker->leaf_edge_norms, + worker->basis_factors, + worker->basic_list, + worker->nonbasic_list, + pseudo_cost_sum_down.data(), + pseudo_cost_sum_up.data(), + pseudo_cost_num_down.data(), + pseudo_cost_num_up.data(), + (i_t)pseudo_cost_sum_down.size(), + strong_branching_lp_iter, + upper_bound, + bnb_stats.total_lp_iters, + bnb_stats.nodes_explored, + bnb_stats.start_time, + reliability_branching_settings, + std::max(max_num_tasks, 1), + pseudo_cost_mutex_down.data(), + pseudo_cost_mutex_up.data(), + &worker->rng); } template @@ -776,6 +921,66 @@ void pseudo_costs_t::update_pseudo_costs_from_strong_branching( template class pseudo_costs_t; +// Opportunistic: omp_atomic_t arrays, omp_atomic_t sb_iter +template int reliable_variable_selection_core, + omp_atomic_t, + omp_atomic_t>( + mip_node_t*, + const std::vector&, + const std::vector&, + const simplex_solver_settings_t&, + const std::vector&, + const lp_problem_t&, + const std::vector&, + const basis_update_mpf_t&, + const std::vector&, + const std::vector&, + omp_atomic_t*, + omp_atomic_t*, + omp_atomic_t*, + omp_atomic_t*, + int, + omp_atomic_t&, + double, + int64_t, + int64_t, + double, + const reliability_branching_settings_t&, + int, + omp_mutex_t*, + omp_mutex_t*, + pcgenerator_t*); + +// Deterministic: plain arrays, plain int64_t sb_iter +template int reliable_variable_selection_core( + mip_node_t*, + const std::vector&, + const std::vector&, + const simplex_solver_settings_t&, + const std::vector&, + const lp_problem_t&, + const std::vector&, + const basis_update_mpf_t&, + const std::vector&, + const std::vector&, + double*, + double*, + int*, + int*, + int, + int64_t&, + double, + int64_t, + int64_t, + double, + const reliability_branching_settings_t&, + int, + omp_mutex_t*, + omp_mutex_t*, + pcgenerator_t*); + template void strong_branching(const user_problem_t& original_problem, const lp_problem_t& original_lp, const simplex_solver_settings_t& settings, @@ -786,7 +991,8 @@ template void strong_branching(const user_problem_t& o double root_obj, const std::vector& root_vstatus, const std::vector& edge_norms, - pseudo_costs_t& pc); + pseudo_costs_t& pc, + cuopt::work_limit_context_t* work_unit_context); #endif diff --git a/cpp/src/branch_and_bound/pseudo_costs.hpp b/cpp/src/branch_and_bound/pseudo_costs.hpp index 6b6c6917b6..11c377febd 100644 --- a/cpp/src/branch_and_bound/pseudo_costs.hpp +++ b/cpp/src/branch_and_bound/pseudo_costs.hpp @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -367,6 +368,7 @@ class pseudo_cost_snapshot_t { std::vector sum_up_; std::vector num_down_; std::vector num_up_; + int64_t strong_branching_lp_iter_{0}; private: std::vector> updates_; @@ -431,8 +433,10 @@ class pseudo_costs_t { nd[j] = pseudo_cost_num_down[j]; nu[j] = pseudo_cost_num_up[j]; } - return pseudo_cost_snapshot_t( - std::move(sd), std::move(su), std::move(nd), std::move(nu)); + auto snap = + pseudo_cost_snapshot_t(std::move(sd), std::move(su), std::move(nd), std::move(nu)); + snap.strong_branching_lp_iter_ = strong_branching_lp_iter.load(); + return snap; } void merge_updates(const std::vector>& updates) @@ -516,6 +520,37 @@ class pseudo_costs_t { omp_atomic_t strong_branching_lp_iter = 0; }; +// Core reliability branching loop usable by both opportunistic and deterministic paths. +// When num_tasks == 1, runs serially with no locking (deterministic). +// When num_tasks > 1 with mutexes/rng, uses OMP taskloop (opportunistic). +// SumT/CountT can be f_t/i_t (deterministic snapshot) or omp_atomic_t/omp_atomic_t. +template +i_t reliable_variable_selection_core(mip_node_t* node_ptr, + const std::vector& fractional, + const std::vector& solution, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const lp_problem_t& leaf_problem, + const std::vector& edge_norms, + const basis_update_mpf_t& basis_factors, + const std::vector& basic_list, + const std::vector& nonbasic_list, + SumT* sum_down, + SumT* sum_up, + CountT* num_down, + CountT* num_up, + i_t n_vars, + SBIterT& strong_branching_lp_iter, + f_t upper_bound, + int64_t bnb_lp_iters, + int64_t bnb_nodes_explored, + f_t start_time, + const reliability_branching_settings_t& rb_settings, + int num_tasks, + omp_mutex_t* var_mutex_down, + omp_mutex_t* var_mutex_up, + pcgenerator_t* rng); + template void strong_branching(const user_problem_t& original_problem, const lp_problem_t& original_lp, @@ -527,6 +562,7 @@ void strong_branching(const user_problem_t& original_problem, f_t root_obj, const std::vector& root_vstatus, const std::vector& edge_norms, - pseudo_costs_t& pc); + pseudo_costs_t& pc, + cuopt::work_limit_context_t* work_unit_context = nullptr); } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/cuts/cuts.cpp b/cpp/src/cuts/cuts.cpp index cc2555611a..c74a12b36b 100644 --- a/cpp/src/cuts/cuts.cpp +++ b/cpp/src/cuts/cuts.cpp @@ -9,20 +9,496 @@ #include #include +#include +#include + +#include +#include +#include +#include #include +#include +#include + namespace cuopt::linear_programming::dual_simplex { +namespace { + +#define DEBUG_CLIQUE_CUTS 0 +#define CHECK_WORKSPACE 0 + +enum class clique_cut_build_status_t : int8_t { NO_CUT = 0, CUT_ADDED = 1, INFEASIBLE = 2 }; + +#if DEBUG_CLIQUE_CUTS +#define CLIQUE_CUTS_DEBUG(...) \ + do { \ + std::fprintf(stderr, "[DEBUG_CLIQUE_CUTS] "); \ + std::fprintf(stderr, __VA_ARGS__); \ + std::fprintf(stderr, "\n"); \ + } while (0) +#else +#define CLIQUE_CUTS_DEBUG(...) \ + do { \ + } while (0) +#endif + +template +clique_cut_build_status_t build_clique_cut(const std::vector& clique_vertices, + i_t num_vars, + const std::vector& var_types, + const std::vector& lower_bounds, + const std::vector& upper_bounds, + const std::vector& xstar, + f_t bound_tol, + f_t min_violation, + sparse_vector_t& cut, + f_t& cut_rhs, + f_t* work_estimate, + f_t max_work_estimate) +{ + if (clique_vertices.size() < 2) { return clique_cut_build_status_t::NO_CUT; } + const f_t clique_size = static_cast(clique_vertices.size()); + CLIQUE_CUTS_DEBUG("build_clique_cut start clique_size=%lld", + static_cast(clique_vertices.size())); + const f_t sort_work = clique_size > 0.0 ? 2.0 * clique_size * std::log2(clique_size + 1.0) : 0.0; + const f_t dot_work = 2.0 * clique_size; + const f_t estimated_work = 9.0 * clique_size + sort_work + dot_work; + if (add_work_estimate(estimated_work, work_estimate, max_work_estimate)) { + CLIQUE_CUTS_DEBUG("build_clique_cut skip work_limit clique_size=%lld work=%g limit=%g", + static_cast(clique_vertices.size()), + work_estimate == nullptr ? -1.0 : static_cast(*work_estimate), + static_cast(max_work_estimate)); + return clique_cut_build_status_t::NO_CUT; + } + + cuopt_assert(num_vars > 0, "Clique cut num_vars must be positive"); + cuopt_assert(static_cast(num_vars) <= lower_bounds.size(), + "Clique cut lower bounds size mismatch"); + cuopt_assert(static_cast(num_vars) <= xstar.size(), "Clique cut xstar size mismatch"); + + cut.i.clear(); + cut.x.clear(); + i_t num_complements = 0; + std::unordered_set seen_original; + std::unordered_set seen_complement; + seen_original.reserve(clique_vertices.size()); + seen_complement.reserve(clique_vertices.size()); + for (const auto vertex_idx : clique_vertices) { + cuopt_assert(vertex_idx >= 0 && vertex_idx < 2 * num_vars, "Clique vertex out of range"); + const i_t var_idx = vertex_idx % num_vars; + const bool complement = vertex_idx >= num_vars; + const f_t lower_bound = lower_bounds[var_idx]; + const f_t upper_bound = upper_bounds[var_idx]; + + cuopt_assert(var_types[var_idx] != variable_type_t::CONTINUOUS, + "Clique contains continuous variable"); + cuopt_assert(lower_bound >= -bound_tol, "Clique variable lower bound below zero"); + cuopt_assert(upper_bound <= 1 + bound_tol, "Clique variable upper bound above one"); + + // we store the cut in the form of >= 1, for easy violation check with dot product + // that's why compelements have 1 as coeff and normal vars have -1 + if (complement) { + if (seen_original.count(var_idx) > 0) { + // FIXME: this is temporary, fix all the vars of all other vars in the clique + return clique_cut_build_status_t::NO_CUT; + CLIQUE_CUTS_DEBUG("build_clique_cut infeasible var=%lld appears as variable and complement", + static_cast(var_idx)); + return clique_cut_build_status_t::INFEASIBLE; + } + cuopt_assert(seen_complement.count(var_idx) == 0, "Duplicate complement in clique"); + seen_complement.insert(var_idx); + num_complements++; + cut.i.push_back(var_idx); + cut.x.push_back(1.0); + } else { + if (seen_complement.count(var_idx) > 0) { + // FIXME: this is temporary, fix all the vars of all other vars in the clique + return clique_cut_build_status_t::NO_CUT; + CLIQUE_CUTS_DEBUG("build_clique_cut infeasible var=%lld appears as variable and complement", + static_cast(var_idx)); + return clique_cut_build_status_t::INFEASIBLE; + } + cuopt_assert(seen_original.count(var_idx) == 0, "Duplicate variable in clique"); + seen_original.insert(var_idx); + cut.i.push_back(var_idx); + cut.x.push_back(-1.0); + } + } + + if (cut.i.empty()) { + CLIQUE_CUTS_DEBUG("build_clique_cut no_cut empty support"); + return clique_cut_build_status_t::NO_CUT; + } + + cut_rhs = static_cast(num_complements - 1); + cut.sort(); + + const f_t dot = cut.dot(xstar); + const f_t violation = cut_rhs - dot; + if (violation > min_violation) { + CLIQUE_CUTS_DEBUG( + "build_clique_cut accepted nz=%lld rhs=%g dot=%g violation=%g threshold=%g complements=%lld", + static_cast(cut.i.size()), + static_cast(cut_rhs), + static_cast(dot), + static_cast(violation), + static_cast(min_violation), + static_cast(num_complements)); + return clique_cut_build_status_t::CUT_ADDED; + } + CLIQUE_CUTS_DEBUG( + "build_clique_cut rejected nz=%lld rhs=%g dot=%g violation=%g threshold=%g complements=%lld", + static_cast(cut.i.size()), + static_cast(cut_rhs), + static_cast(dot), + static_cast(violation), + static_cast(min_violation), + static_cast(num_complements)); + return clique_cut_build_status_t::NO_CUT; +} + +template +struct bk_bitset_context_t { + const std::vector>& adj; + const std::vector& weights; + f_t min_weight; + i_t max_calls; + f_t start_time; + f_t time_limit; + size_t words; + f_t* work_estimate; + f_t max_work_estimate; + i_t num_calls{0}; + bool work_limit_reached{false}; + bool call_limit_reached{false}; + std::vector> cliques; + + bool add_work(f_t accesses) + { + return add_work_estimate(accesses, work_estimate, max_work_estimate, &work_limit_reached); + } + + bool over_work_limit() const + { + if (work_limit_reached) { return true; } + if (work_estimate == nullptr) { return false; } + return *work_estimate > max_work_estimate; + } + + bool over_call_limit() const { return call_limit_reached || num_calls >= max_calls; } +}; + +inline size_t bitset_words(size_t n) { return (n + 63) / 64; } + +inline bool bitset_any(const std::vector& bs) +{ + for (auto word : bs) { + if (word != 0) { return true; } + } + return false; +} + +inline void bitset_set(std::vector& bs, size_t idx) +{ + bs[idx >> 6] |= (uint64_t(1) << (idx & 63)); +} + +inline void bitset_clear(std::vector& bs, size_t idx) +{ + bs[idx >> 6] &= ~(uint64_t(1) << (idx & 63)); +} + +template +f_t sum_weights_bitset(const std::vector& bs, const std::vector& weights) +{ + f_t sum = 0.0; + for (size_t w = 0; w < bs.size(); ++w) { + uint64_t word = bs[w]; + while (word) { + const int bit = __builtin_ctzll(word); + const size_t idx = w * 64 + static_cast(bit); + sum += weights[idx]; + word &= (word - 1); + } + } + return sum; +} + +template +void bron_kerbosch(bk_bitset_context_t& ctx, + std::vector& R, // current clique + std::vector& P, // potential candidates + std::vector& X, // already in the clique + f_t weight_R) +{ + if (ctx.over_work_limit() || ctx.over_call_limit()) { return; } + if (toc(ctx.start_time) >= ctx.time_limit) { return; } + ctx.num_calls++; + // stop the recursion, for perf reasons + if (ctx.num_calls > ctx.max_calls) { + ctx.call_limit_reached = true; + return; + } + if (ctx.add_work(static_cast(4 * ctx.words))) { return; } + + // if P and X are empty, we are at maximal clique + if (!bitset_any(P) && !bitset_any(X)) { + // if the weight is enough, add and exit + if (weight_R >= ctx.min_weight) { + ctx.add_work(static_cast(R.size())); + ctx.cliques.push_back(R); + } + return; + } + + const f_t sumP = sum_weights_bitset(P, ctx.weights); + // check if all P is added to clique, would we exceed the weight? + if (weight_R + sumP < ctx.min_weight) { return; } + + i_t pivot = -1; + i_t max_deg = -1; + i_t pivot_vertices_examined = 0; + // pivoting rule according to the highest degree vertex + // TODO try other pivoting strategies, we can also implement some online learning like MAB + for (size_t w = 0; w < ctx.words; ++w) { + // union of P and X + uint64_t word = P[w] | X[w]; + while (word) { + pivot_vertices_examined++; + // least significant set bit idnex + const int bit = __builtin_ctzll(word); + // overall vertex index + const i_t v = static_cast(w * 64 + static_cast(bit)); + // clear the least significant set bit (v) + word &= (word - 1); + i_t count = 0; + // count the number of neighbors of v in P + for (size_t k = 0; k < ctx.words; ++k) { + count += __builtin_popcountll(P[k] & ctx.adj[v][k]); + } + // chose the highest degree v as the pivot + // we choose the highest degree as the pivot to reduce the recursion size + // later in this function we recurse on the candidate P / N(v) + // so it is good to maximize P n N(v) + if (count > max_deg) { + max_deg = count; + pivot = v; + } + } + } + ctx.add_work(static_cast(2 * ctx.words) + + static_cast(pivot_vertices_examined) * static_cast(2 * ctx.words)); + + std::vector candidates; + candidates.reserve(ctx.weights.size()); + cuopt_assert(pivot >= 0, "Pivot must be valid when P or X is non-empty"); + for (size_t w = 0; w < ctx.words; ++w) { + // P / N(pivot) + uint64_t word = P[w] & ~ctx.adj[pivot][w]; + while (word) { + const int bit = __builtin_ctzll(word); + const i_t v = static_cast(w * 64 + static_cast(bit)); + word &= (word - 1); + candidates.push_back(v); + } + } + const i_t num_candidates = static_cast(candidates.size()); + ctx.add_work(static_cast(2 * ctx.words + num_candidates)); + ctx.add_work(static_cast(num_candidates) * static_cast(7 * ctx.words + 6)); + // note that candidates will include pivot if it is in P + for (auto v : candidates) { + if (ctx.over_call_limit()) { + ctx.call_limit_reached = true; + return; + } + if (toc(ctx.start_time) >= ctx.time_limit) { return; } + + R.push_back(v); + std::vector P_next(ctx.words, 0); + std::vector X_next(ctx.words, 0); + for (size_t k = 0; k < ctx.words; ++k) { + P_next[k] = P[k] & ctx.adj[v][k]; + X_next[k] = X[k] & ctx.adj[v][k]; + } + + bron_kerbosch(ctx, R, P_next, X_next, weight_R + ctx.weights[v]); + if (ctx.over_work_limit()) { return; } + if (ctx.over_call_limit()) { + ctx.call_limit_reached = true; + return; + } + R.pop_back(); + bitset_clear(P, static_cast(v)); + bitset_set(X, static_cast(v)); + } +} + +template +void extend_clique_vertices(std::vector& clique_vertices, + detail::clique_table_t& graph, + const std::vector& xstar, + const std::vector& reduced_costs, + i_t num_vars, + f_t integer_tol, + f_t start_time, + f_t time_limit, + f_t* work_estimate, + f_t max_work_estimate) +{ + if (toc(start_time) >= time_limit) { return; } + if (clique_vertices.empty()) { return; } +#if DEBUG_CLIQUE_CUTS + const size_t initial_clique_vertices = clique_vertices.size(); +#endif + CLIQUE_CUTS_DEBUG("extend_clique_vertices start size=%lld", + static_cast(clique_vertices.size())); + const f_t initial_clique_size = static_cast(clique_vertices.size()); + + i_t smallest_degree = std::numeric_limits::max(); + i_t smallest_degree_var = -1; + for (auto v : clique_vertices) { + if (toc(start_time) >= time_limit) { return; } + i_t degree = graph.get_degree_of_var(v); + if (degree < smallest_degree) { + smallest_degree = degree; + smallest_degree_var = v; + } + } + + auto adj_set = graph.get_adj_set_of_var(smallest_degree_var); + std::unordered_set clique_members(clique_vertices.begin(), clique_vertices.end()); + std::vector candidates; + candidates.reserve(adj_set.size()); + // the candidate list if only the integer valued vertices + for (const auto& candidate : adj_set) { + if (toc(start_time) >= time_limit) { return; } + if (clique_members.count(candidate) != 0) { continue; } + i_t var_idx = candidate % num_vars; + f_t value = candidate >= num_vars ? (1.0 - xstar[var_idx]) : xstar[var_idx]; + if (std::abs(value - std::round(value)) <= integer_tol) { candidates.push_back(candidate); } + } + CLIQUE_CUTS_DEBUG( + "extend_clique_vertices anchor=%lld degree=%lld adj_size=%lld integer_candidates=%lld", + static_cast(smallest_degree_var), + static_cast(smallest_degree), + static_cast(adj_set.size()), + static_cast(candidates.size())); + const f_t candidate_size = static_cast(candidates.size()); + const f_t sort_work = + candidate_size > 0.0 ? 2.0 * candidate_size * std::log2(candidate_size + 1.0) : 0.0; + const f_t adj_set_build_cost = 2.0 * static_cast(adj_set.size()); + const f_t adj_check_cost = 5.0; + const f_t estimated_preloop_work = 2.0 * initial_clique_size + adj_set_build_cost + + 3.0 * static_cast(adj_set.size()) + sort_work + + 2.0 * candidate_size; + if (add_work_estimate(estimated_preloop_work, work_estimate, max_work_estimate)) { + CLIQUE_CUTS_DEBUG("extend_clique_vertices skip work_limit work=%g limit=%g", + work_estimate == nullptr ? -1.0 : static_cast(*work_estimate), + static_cast(max_work_estimate)); + return; + } + + // sort the candidates by reduced cost. + // smaller reduce cost disturbs dual simplex less + // less refactors and less iterations after resolve. + // it also increases the cut's effectiveness by keeping xstar not disturbed much + // if it is disturbed too much, the cut might become non-binding + auto reduced_cost = [&](i_t vertex_idx) -> f_t { + i_t var_idx = vertex_idx % num_vars; + cuopt_assert(var_idx >= 0 && var_idx < static_cast(reduced_costs.size()), + "Variable index out of range"); + f_t rc = reduced_costs[var_idx]; + if (!std::isfinite(rc)) { rc = 0.0; } + return vertex_idx >= num_vars ? -rc : rc; + }; + + std::sort(candidates.begin(), candidates.end(), [&](i_t a, i_t b) { + return reduced_cost(a) < reduced_cost(b); + }); + + for (const auto candidate : candidates) { + bool add = true; + i_t checks = 0; + for (const auto v : clique_vertices) { + checks++; + if (!graph.check_adjacency(candidate, v)) { + add = false; + break; + } + } + if (add_work_estimate( + adj_check_cost * static_cast(checks), work_estimate, max_work_estimate)) { + break; + } + if (add) { + clique_vertices.push_back(candidate); + clique_members.insert(candidate); + } + } + CLIQUE_CUTS_DEBUG("extend_clique_vertices done start=%lld final=%lld added=%lld", + static_cast(initial_clique_vertices), + static_cast(clique_vertices.size()), + static_cast(clique_vertices.size() - initial_clique_vertices)); +} + +} // namespace + +// This function is only used in tests +std::vector> find_maximal_cliques_for_test( + const std::vector>& adjacency_list, + const std::vector& weights, + double min_weight, + int max_calls, + double time_limit) +{ + const size_t n_vertices = adjacency_list.size(); + if (n_vertices == 0) { return {}; } + cuopt_assert(weights.size() == n_vertices, "Weights size mismatch in clique test helper"); + cuopt_assert(max_calls > 0, "max_calls must be positive in clique test helper"); + + const size_t words = bitset_words(n_vertices); + std::vector> adj_bitset(n_vertices, std::vector(words, 0)); + for (size_t v = 0; v < n_vertices; ++v) { + for (const auto& nbr : adjacency_list[v]) { + cuopt_assert(nbr >= 0 && static_cast(nbr) < n_vertices, + "Neighbor index out of range in clique test helper"); + bitset_set(adj_bitset[v], static_cast(nbr)); + } + } + + double work_estimate = 0.0; + const double max_work_estimate = std::numeric_limits::infinity(); + const double start_time = tic(); + + bk_bitset_context_t ctx{adj_bitset, + weights, + min_weight, + max_calls, + start_time, + time_limit, + words, + &work_estimate, + max_work_estimate}; + + std::vector R; + std::vector P(words, 0); + std::vector X(words, 0); + for (size_t idx = 0; idx < n_vertices; ++idx) { + bitset_set(P, idx); + } + bron_kerbosch(ctx, R, P, X, 0.0); + return ctx.cliques; +} + template -void cut_pool_t::add_cut(cut_type_t cut_type, - const sparse_vector_t& cut, - f_t rhs) +void cut_pool_t::add_cut(cut_type_t cut_type, const inequality_t& cut) { // TODO: Need to deduplicate cuts and only add if the cut is not already in the pool - for (i_t p = 0; p < cut.i.size(); p++) { - const i_t j = cut.i[p]; + for (i_t p = 0; p < cut.size(); p++) { + const i_t j = cut.index(p); if (j >= original_vars_) { settings_.log.printf( "Cut has variable %d that is greater than original_vars_ %d\n", j, original_vars_); @@ -30,14 +506,14 @@ void cut_pool_t::add_cut(cut_type_t cut_type, } } - sparse_vector_t cut_squeezed; + inequality_t cut_squeezed; cut.squeeze(cut_squeezed); - if (cut_squeezed.i.size() == 0) { + if (cut_squeezed.size() == 0) { settings_.log.printf("Cut has no coefficients\n"); return; } - cut_storage_.append_row(cut_squeezed); - rhs_storage_.push_back(rhs); + cut_storage_.append_row(cut_squeezed.vector); + rhs_storage_.push_back(cut_squeezed.rhs); cut_type_.push_back(cut_type); cut_age_.push_back(0); } @@ -99,7 +575,6 @@ f_t cut_pool_t::cut_orthogonality(i_t i, i_t j) template void cut_pool_t::score_cuts(std::vector& x_relax) { - const f_t min_cut_distance = 1e-4; cut_distances_.resize(cut_storage_.m, 0.0); cut_norms_.resize(cut_storage_.m, 0.0); @@ -107,7 +582,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) for (i_t i = 0; i < cut_storage_.m; i++) { f_t violation; f_t cut_dist = cut_distance(i, x_relax, violation, cut_norms_[i]); - cut_distances_[i] = cut_dist <= min_cut_distance ? 0.0 : cut_dist; + cut_distances_[i] = cut_dist <= min_cut_distance_ ? 0.0 : cut_dist; if (verbose) { settings_.log.printf("Cut %d type %d distance %+e violation %+e cut_norm %e\n", i, @@ -138,7 +613,7 @@ void cut_pool_t::score_cuts(std::vector& x_relax) const i_t i = sorted_indices.back(); sorted_indices.pop_back(); - if (cut_distances_[i] <= min_cut_distance) { break; } + if (cut_distances_[i] <= min_cut_distance_) { break; } f_t cut_ortho = 1.0; const i_t best_cuts_size = best_cuts_.size(); @@ -171,6 +646,7 @@ i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, best_cut_types.reserve(scored_cuts_); for (i_t i : best_cuts_) { + if (cut_distances_[i] <= min_cut_distance_) { continue; } sparse_vector_t cut(cut_storage_, i); cut.negate(); best_cuts.append_row(cut); @@ -180,7 +656,7 @@ i_t cut_pool_t::get_best_cuts(csr_matrix_t& best_cuts, age_cuts(); - return static_cast(best_cuts_.size()); + return static_cast(best_rhs.size()); } template @@ -274,8 +750,7 @@ i_t knapsack_generation_t::generate_knapsack_cuts( const std::vector& var_types, const std::vector& xstar, i_t knapsack_row, - sparse_vector_t& cut, - f_t& cut_rhs) + inequality_t& cut) { const bool verbose = false; // Get the row associated with the knapsack constraint @@ -346,44 +821,31 @@ i_t knapsack_generation_t::generate_knapsack_cuts( if (solution[k] == 0.0) { cover_size++; } } - cut.i.clear(); - cut.x.clear(); - cut.i.reserve(cover_size); - cut.x.reserve(cover_size); + cut.reserve(cover_size); + cut.clear(); h = 0; for (i_t k = 0; k < knapsack_inequality.i.size(); k++) { const i_t j = knapsack_inequality.i[k]; if (!is_slack_[j]) { - if (solution[h] == 0.0) { - cut.i.push_back(j); - cut.x.push_back(-1.0); - } + if (solution[h] == 0.0) { cut.push_back(j, -1.0); } h++; } } - cut_rhs = -cover_size + 1; + cut.rhs = -cover_size + 1; cut.sort(); // The cut is in the form: - sum_{j in cover} x_j >= -cover_size + 1 // Which is equivalent to: sum_{j in cover} x_j <= cover_size - 1 // Verify the cut is violated - f_t dot = cut.dot(xstar); - f_t violation = dot - cut_rhs; + f_t dot = cut.vector.dot(xstar); + f_t violation = dot - cut.rhs; if (verbose) { settings.log.printf("Knapsack cut %d violation %e < 0\n", knapsack_row, violation); } if (violation >= -tol) { return -1; } - -#ifdef PRINT_KNAPSACK_CUT - settings.log.printf("knapsack cut (cover %d): \n", cover_size); - for (i_t k = 0; k < cut.i.size(); k++) { - settings.log.printf("x%d coeff %g value %g\n", cut.i[k], -cut.x[k], xstar[cut.i[k]]); - } - settings.log.printf("cut_rhs %g\n", -cut_rhs); -#endif return 0; } @@ -553,15 +1015,19 @@ f_t knapsack_generation_t::solve_knapsack_problem(const std::vector -void cut_generation_t::generate_cuts(const lp_problem_t& lp, +bool cut_generation_t::generate_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, basis_update_mpf_t& basis_update, const std::vector& xstar, + const std::vector& ystar, + const std::vector& zstar, const std::vector& basic_list, - const std::vector& nonbasic_list) + const std::vector& nonbasic_list, + variable_bounds_t& variable_bounds, + f_t start_time) { // Generate Gomory and CG Cuts if (settings.mixed_integer_gomory_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) { @@ -587,12 +1053,27 @@ void cut_generation_t::generate_cuts(const lp_problem_t& lp, // Generate MIR and CG cuts if (settings.mir_cuts != 0 || settings.strong_chvatal_gomory_cuts != 0) { f_t cut_start_time = tic(); - generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar); + generate_mir_cuts(lp, settings, Arow, new_slacks, var_types, xstar, ystar, variable_bounds); f_t cut_generation_time = toc(cut_start_time); if (cut_generation_time > 1.0) { settings.log.debug("MIR and CG cut generation time %.2f seconds\n", cut_generation_time); } } + + // Generate Clique cuts (last to give background clique table generation maximum time) + if (settings.clique_cuts != 0) { + f_t cut_start_time = tic(); + bool feasible = generate_clique_cuts(lp, settings, var_types, xstar, zstar, start_time); + if (!feasible) { + settings.log.printf("Clique cuts proved infeasible\n"); + return false; + } + f_t cut_generation_time = toc(cut_start_time); + if (cut_generation_time > 1.0) { + settings.log.debug("Clique cut generation time %.2f seconds\n", cut_generation_time); + } + } + return true; } template @@ -606,80 +1087,314 @@ void cut_generation_t::generate_knapsack_cuts( { if (knapsack_generation_.num_knapsack_constraints() > 0) { for (i_t knapsack_row : knapsack_generation_.get_knapsack_constraints()) { - sparse_vector_t cut(lp.num_cols, 0); - f_t cut_rhs; + inequality_t cut(lp.num_cols); i_t knapsack_status = knapsack_generation_.generate_knapsack_cuts( - lp, settings, Arow, new_slacks, var_types, xstar, knapsack_row, cut, cut_rhs); - if (knapsack_status == 0) { cut_pool_.add_cut(cut_type_t::KNAPSACK, cut, cut_rhs); } + lp, settings, Arow, new_slacks, var_types, xstar, knapsack_row, cut); + if (knapsack_status == 0) { cut_pool_.add_cut(cut_type_t::KNAPSACK, cut); } } } } template -void cut_generation_t::generate_mir_cuts( +bool cut_generation_t::generate_clique_cuts( const lp_problem_t& lp, const simplex_solver_settings_t& settings, - csr_matrix_t& Arow, - const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar) + const std::vector& xstar, + const std::vector& reduced_costs, + f_t start_time) { - f_t mir_start_time = tic(); - mixed_integer_rounding_cut_t mir(lp, settings, new_slacks, xstar); - strong_cg_cut_t cg(lp, var_types, xstar); - - std::vector slack_map(lp.num_rows, -1); - for (i_t slack : new_slacks) { - const i_t col_start = lp.A.col_start[slack]; - const i_t col_end = lp.A.col_start[slack + 1]; - const i_t col_len = col_end - col_start; - assert(col_len == 1); - const i_t i = lp.A.i[col_start]; - slack_map[i] = slack; - } + if (settings.clique_cuts == 0) { return true; } + if (toc(start_time) >= settings.time_limit) { return true; } - // Compute initial scores for all rows - std::vector score(lp.num_rows, 0.0); - for (i_t i = 0; i < lp.num_rows; i++) { - const i_t row_start = Arow.row_start[i]; - const i_t row_end = Arow.row_start[i + 1]; + const i_t num_vars = user_problem_.num_cols; + CLIQUE_CUTS_DEBUG("generate_clique_cuts start num_vars=%lld time_limit=%g elapsed=%g", + static_cast(num_vars), + static_cast(settings.time_limit), + static_cast(toc(start_time))); - const i_t row_nz = row_end - row_start; - i_t num_integer_in_row = 0; - i_t num_continuous_in_row = 0; - for (i_t p = row_start; p < row_end; p++) { - const i_t j = Arow.j[p]; - if (var_types[j] == variable_type_t::INTEGER) { - num_integer_in_row++; - } else { - num_continuous_in_row++; - } + if (clique_table_ == nullptr && clique_table_future_ != nullptr && + clique_table_future_->valid()) { + CLIQUE_CUTS_DEBUG("generate_clique_cuts signaling background thread and waiting"); + if (signal_extend_) { signal_extend_->store(true, std::memory_order_release); } + clique_table_ = clique_table_future_->get(); + clique_table_future_ = nullptr; + if (clique_table_) { + CLIQUE_CUTS_DEBUG("generate_clique_cuts received clique table first=%lld addtl=%lld", + static_cast(clique_table_->first.size()), + static_cast(clique_table_->addtl_cliques.size())); } + } - if (num_integer_in_row == 0) { - score[i] = 0.0; + if (clique_table_ == nullptr) { + CLIQUE_CUTS_DEBUG("generate_clique_cuts no clique table available, skipping"); + return true; + } + CLIQUE_CUTS_DEBUG("generate_clique_cuts using clique table first=%lld addtl=%lld", + static_cast(clique_table_->first.size()), + static_cast(clique_table_->addtl_cliques.size())); - } else { - f_t nz_score = lp.num_cols - row_nz; + if (clique_table_->first.empty() && clique_table_->addtl_cliques.empty()) { + CLIQUE_CUTS_DEBUG("generate_clique_cuts empty clique table, nothing to separate"); + return true; + } - const i_t slack = slack_map[i]; - assert(slack >= 0); - const f_t slack_value = xstar[slack]; + cuopt_assert(clique_table_->n_variables == num_vars, "Clique table variable count mismatch"); + cuopt_assert(static_cast(num_vars) <= xstar.size(), "Clique cut xstar size mismatch"); - f_t slack_score = -std::log10(1e-16 + std::abs(slack_value)); + const f_t min_violation = std::max(settings.primal_tol, static_cast(1e-6)); + const f_t bound_tol = settings.primal_tol; + const f_t min_weight = 1.0 + min_violation; + // TODO this can be problem dependent + const i_t max_calls = 100000; + f_t work_estimate = 0.0; + const f_t max_work_estimate = 1e8; - const f_t nz_weight = 1.0; - const f_t slack_weight = 1.0; - const f_t integer_weight = 1.0; + cuopt_assert(user_problem_.var_types.size() == static_cast(num_vars), + "User problem var_types size mismatch"); - score[i] = - nz_weight * nz_score + slack_weight * slack_score + integer_weight * num_integer_in_row; + std::vector vertices; + std::vector weights; + vertices.reserve(num_vars * 2); + weights.reserve(num_vars * 2); + + // create the sub graph induced by fractional binary variables + for (i_t j = 0; j < num_vars; ++j) { + if (user_problem_.var_types[j] == variable_type_t::CONTINUOUS) { continue; } + const f_t lower_bound = user_problem_.lower[j]; + const f_t upper_bound = user_problem_.upper[j]; + if (lower_bound < -bound_tol || upper_bound > 1 + bound_tol) { continue; } + const f_t xj = xstar[j]; + if (std::abs(xj - std::round(xj)) <= settings.integer_tol) { continue; } + vertices.push_back(j); + weights.push_back(xj); + vertices.push_back(j + num_vars); + weights.push_back(1.0 - xj); + } + // Coarse loop estimate: variable scans + selected vertex/weight writes + work_estimate += 4.0 * static_cast(num_vars) + 2.0 * static_cast(vertices.size()); + if (work_estimate > max_work_estimate) { return true; } + + if (vertices.empty()) { + CLIQUE_CUTS_DEBUG("generate_clique_cuts no fractional binary vertices"); + return true; + } + CLIQUE_CUTS_DEBUG("generate_clique_cuts fractional subgraph vertices=%lld (literals=%lld)", + static_cast(vertices.size() / 2), + static_cast(vertices.size())); + + std::vector vertex_to_local(2 * num_vars, -1); + std::vector in_subgraph(2 * num_vars, 0); + for (size_t idx = 0; idx < vertices.size(); ++idx) { + if (toc(start_time) >= settings.time_limit) { return true; } + const i_t vertex_idx = vertices[idx]; + vertex_to_local[vertex_idx] = static_cast(idx); + in_subgraph[vertex_idx] = 1; + } + work_estimate += 3.0 * static_cast(vertices.size()); + if (work_estimate > max_work_estimate) { return true; } + + std::vector> adj_local(vertices.size()); + size_t total_adj_entries = 0; + size_t kept_adj_entries = 0; + for (size_t idx = 0; idx < vertices.size(); ++idx) { + if (toc(start_time) >= settings.time_limit) { return true; } + i_t vertex_idx = vertices[idx]; + // returns the complement as well + auto adj_set = clique_table_->get_adj_set_of_var(vertex_idx); + total_adj_entries += adj_set.size(); + auto& adj = adj_local[idx]; + adj.reserve(adj_set.size()); + for (const auto neighbor : adj_set) { + if (toc(start_time) >= settings.time_limit) { return true; } + cuopt_assert(neighbor >= 0 && neighbor < 2 * num_vars, "Neighbor out of range"); + if (!in_subgraph[neighbor]) { continue; } + i_t local_neighbor = vertex_to_local[neighbor]; + cuopt_assert(local_neighbor >= 0, "Local neighbor out of range"); + adj.push_back(local_neighbor); + } + kept_adj_entries += adj.size(); +#ifdef ASSERT_MODE + { + std::unordered_set adj_global; + adj_global.reserve(adj.size()); + for (const auto neighbor : adj) { + i_t v = vertices[neighbor]; + cuopt_assert(adj_global.insert(v).second, "Duplicate neighbor in adjacency list"); + i_t complement = (v >= num_vars) ? (v - num_vars) : (v + num_vars); + cuopt_assert(adj_global.find(complement) == adj_global.end(), + "Adjacency list contains complementing variable"); + } + } +#endif + } + work_estimate += static_cast(vertices.size()) + static_cast(total_adj_entries) + + 2.0 * static_cast(kept_adj_entries); + if (work_estimate > max_work_estimate) { return true; } + CLIQUE_CUTS_DEBUG("generate_clique_cuts adjacency raw_entries=%lld kept_entries=%lld", + static_cast(total_adj_entries), + static_cast(kept_adj_entries)); + + const size_t words = bitset_words(vertices.size()); + std::vector> adj_bitset(vertices.size(), std::vector(words, 0)); + size_t local_adj_entries = 0; + for (size_t v = 0; v < adj_local.size(); ++v) { + local_adj_entries += adj_local[v].size(); + for (const auto neighbor : adj_local[v]) { + bitset_set(adj_bitset[v], static_cast(neighbor)); + } + } + work_estimate += static_cast(adj_local.size()) + 3.0 * static_cast(local_adj_entries); + if (work_estimate > max_work_estimate) { return true; } + CLIQUE_CUTS_DEBUG("generate_clique_cuts bitset graph words=%lld local_entries=%lld", + static_cast(words), + static_cast(local_adj_entries)); + + bk_bitset_context_t ctx{adj_bitset, + weights, + min_weight, + max_calls, + start_time, + settings.time_limit, + words, + &work_estimate, + max_work_estimate}; + std::vector R; + std::vector P(words, 0); + std::vector X(words, 0); + for (size_t idx = 0; idx < vertices.size(); ++idx) { + bitset_set(P, idx); + } + work_estimate += 2.0 * static_cast(vertices.size()); + if (work_estimate > max_work_estimate) { return true; } + bron_kerbosch(ctx, R, P, X, 0.0); + CLIQUE_CUTS_DEBUG( + "generate_clique_cuts maximal cliques found=%lld bk_calls=%lld work=%g work_limit=%d " + "call_limit=%d", + static_cast(ctx.cliques.size()), + static_cast(ctx.num_calls), + static_cast(work_estimate), + ctx.over_work_limit() ? 1 : 0, + ctx.over_call_limit() ? 1 : 0); + if (ctx.over_call_limit()) { return true; } + if (ctx.over_work_limit()) { return true; } + if (toc(start_time) >= settings.time_limit) { return true; } + if (work_estimate > max_work_estimate) { return true; } + + sparse_vector_t cut(lp.num_cols, 0); + f_t cut_rhs = 0.0; +#if DEBUG_CLIQUE_CUTS + size_t candidate_cliques = 0; + size_t added_cuts = 0; + size_t rejected_cliques = 0; + size_t extension_gain = 0; +#endif + for (auto& clique_local : ctx.cliques) { + if (toc(start_time) >= settings.time_limit) { return true; } +#if DEBUG_CLIQUE_CUTS + candidate_cliques++; +#endif + std::vector clique_vertices; + clique_vertices.reserve(clique_local.size()); + for (auto local_idx : clique_local) { + clique_vertices.push_back(vertices[local_idx]); + } + work_estimate += 3.0 * static_cast(clique_local.size()); + if (work_estimate > max_work_estimate) { return true; } +#if DEBUG_CLIQUE_CUTS + const size_t size_before_extension = clique_vertices.size(); +#endif + extend_clique_vertices(clique_vertices, + *clique_table_, + xstar, + reduced_costs, + num_vars, + settings.integer_tol, + start_time, + settings.time_limit, + &work_estimate, + max_work_estimate); +#if DEBUG_CLIQUE_CUTS + extension_gain += clique_vertices.size() - size_before_extension; +#endif + if (work_estimate > max_work_estimate) { return true; } + if (toc(start_time) >= settings.time_limit) { return true; } + const auto build_status = build_clique_cut(clique_vertices, + num_vars, + var_types, + user_problem_.lower, + user_problem_.upper, + xstar, + bound_tol, + min_violation, + cut, + cut_rhs, + &work_estimate, + max_work_estimate); + if (work_estimate > max_work_estimate) { return true; } + if (build_status == clique_cut_build_status_t::INFEASIBLE) { + settings.log.debug("Detected contradictory variable/complement clique\n"); + CLIQUE_CUTS_DEBUG( + "generate_clique_cuts infeasible clique detected after processing=%lld cliques", + static_cast(candidate_cliques)); + return false; + } + if (build_status == clique_cut_build_status_t::CUT_ADDED) { + inequality_t cut_inequality; + cut_inequality.vector = cut; + cut_inequality.rhs = cut_rhs; + cut_pool_.add_cut(cut_type_t::CLIQUE, cut_inequality); +#if DEBUG_CLIQUE_CUTS + added_cuts++; + CLIQUE_CUTS_DEBUG("generate_clique_cuts added cut nz=%lld rhs=%g clique_size=%lld", + static_cast(cut.i.size()), + static_cast(cut_rhs), + static_cast(clique_vertices.size())); +#endif + } +#if DEBUG_CLIQUE_CUTS + else { + rejected_cliques++; } +#endif } +#if DEBUG_CLIQUE_CUTS + CLIQUE_CUTS_DEBUG( + "generate_clique_cuts done candidate_cliques=%lld added=%lld rejected=%lld extension_gain=%lld " + "final_work=%g", + static_cast(candidate_cliques), + static_cast(added_cuts), + static_cast(rejected_cliques), + static_cast(extension_gain), + static_cast(work_estimate)); +#endif + return true; +} - // Sort the rows by score - std::vector sorted_indices; - best_score_last_permutation(score, sorted_indices); +template +void cut_generation_t::generate_mir_cuts( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csr_matrix_t& Arow, + const std::vector& new_slacks, + const std::vector& var_types, + const std::vector& xstar, + const std::vector& ystar, + variable_bounds_t& variable_bounds) +{ + f_t mir_start_time = tic(); + constexpr bool verbose = false; + complemented_mixed_integer_rounding_cut_t complemented_mir(lp, settings, new_slacks); + strong_cg_cut_t cg(lp, var_types, xstar); + + std::vector scores; + complemented_mir.compute_initial_scores_for_rows(lp, settings, Arow, xstar, ystar, scores); + + // Push all the scores onto the priority queue + std::priority_queue> score_queue; + for (i_t i = 0; i < lp.num_rows; i++) { + score_queue.push(std::make_pair(scores[i], i)); + } // These data structures are used to track the rows that have been aggregated // The invariant is that aggregated_rows is empty and aggregated_mark is all zeros @@ -687,62 +1402,72 @@ void cut_generation_t::generate_mir_cuts( std::vector aggregated_rows; std::vector aggregated_mark(lp.num_rows, 0); - const i_t max_cuts = std::min(lp.num_rows, 1000); - f_t work_estimate = 0.0; - for (i_t h = 0; h < max_cuts; h++) { - // Get the row with the highest score - const i_t i = sorted_indices.back(); - sorted_indices.pop_back(); - const f_t max_score = score[i]; + // Transform the relaxation solution + std::vector transformed_xstar; + complemented_mir.bound_substitution(lp, variable_bounds, var_types, xstar, transformed_xstar); - const i_t row_nz = Arow.row_start[i + 1] - Arow.row_start[i]; - const i_t slack = slack_map[i]; + const i_t max_cuts = std::min(lp.num_rows, 100000); + f_t work_estimate = 0.0; + i_t num_cuts = 0; + while (num_cuts < max_cuts && !score_queue.empty()) { + // Get the row with the highest score from the queue + auto [max_score, i] = score_queue.top(); + score_queue.pop(); + // skip stale score entries + if (max_score != scores[i]) { continue; } + + // Add the current row to the aggregated set + aggregated_mark[i] = 1; + aggregated_rows.push_back(i); + + const i_t row_nz = Arow.row_length(i); + const i_t slack = complemented_mir.slack_cols(i); const f_t slack_value = xstar[slack]; if (max_score <= 0.0) { break; } if (work_estimate > 2e9) { break; } - sparse_vector_t inequality(Arow, i); - work_estimate += inequality.i.size(); - f_t inequality_rhs = lp.rhs[i]; + inequality_t inequality(Arow, i, lp.rhs[i]); + work_estimate += inequality.size(); + const bool generate_cg_cut = settings.strong_chvatal_gomory_cuts != 0; - f_t fractional_part_rhs = fractional_part(inequality_rhs); + f_t fractional_part_rhs = fractional_part(inequality.rhs); if (generate_cg_cut && fractional_part_rhs > 1e-6 && fractional_part_rhs < (1 - 1e-6)) { // Try to generate a CG cut - sparse_vector_t cg_inequality = inequality; - f_t cg_inequality_rhs = inequality_rhs; - if (fractional_part(inequality_rhs) < 0.5) { + + inequality_t cg_inequality = inequality; + if (fractional_part(inequality.rhs) < 0.5) { // Multiply by -1 to force the fractional part to be greater than 0.5 - cg_inequality_rhs *= -1; cg_inequality.negate(); } - sparse_vector_t cg_cut(lp.num_cols, 0); - f_t cg_cut_rhs; - i_t cg_status = cg.generate_strong_cg_cut( - lp, settings, var_types, cg_inequality, cg_inequality_rhs, xstar, cg_cut, cg_cut_rhs); - if (cg_status == 0) { cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); } + inequality_t cg_cut; + i_t cg_status = + cg.generate_strong_cg_cut(lp, settings, var_types, cg_inequality, xstar, cg_cut); + if (cg_status == 0) { cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut); } } + if (settings.mir_cuts == 0) { continue; } + // Remove the slack from the equality to get an inequality - work_estimate += inequality.i.size(); + work_estimate += inequality.size(); i_t negate_inequality = 1; - for (i_t k = 0; k < inequality.i.size(); k++) { - const i_t j = inequality.i[k]; + for (i_t k = 0; k < inequality.size(); k++) { + const i_t j = inequality.index(k); if (j == slack) { - if (inequality.x[k] != 1.0) { - if (inequality.x[k] == -1.0 && lp.lower[j] >= 0.0) { + if (inequality.coeff(k) != 1.0) { + if (inequality.coeff(k) == -1.0 && lp.lower[j] >= 0.0) { negate_inequality = 0; } else { settings.log.debug("Bad slack %d in inequality: aj %e lo %e up %e\n", j, - inequality.x[k], + inequality.coeff(k), lp.lower[j], lp.upper[j]); negate_inequality = -1; break; } } - inequality.x[k] = 0.0; + inequality.vector.x[k] = 0.0; } } @@ -751,112 +1476,56 @@ void cut_generation_t::generate_mir_cuts( if (negate_inequality) { // inequaility'*x <= inequality_rhs // But for MIR we need: inequality'*x >= inequality_rhs - inequality_rhs *= -1; inequality.negate(); - work_estimate += inequality.i.size(); + work_estimate += inequality.size(); } // We should now have: inequality'*x >= inequality_rhs - // Transform the relaxation solution - std::vector transformed_xstar; - mir.relaxation_to_nonnegative(lp, xstar, transformed_xstar); - work_estimate += transformed_xstar.size(); + for (i_t k = 0; k < inequality.size(); k++) { + const i_t j = inequality.index(k); + if (var_types[j] == variable_type_t::INTEGER) { + if (transformed_xstar[j] > complemented_mir.new_upper(j) / 2.0) { + settings.log.printf("!!!!!! j %d transformed x_j %e new_upper_j/2.0 %e\n", + j, + transformed_xstar[j], + complemented_mir.new_upper(j) / 2.0); + } + } + } - sparse_vector_t cut(lp.num_cols, 0); - f_t cut_rhs; bool add_cut = false; i_t num_aggregated = 0; const i_t max_aggregated = 6; + f_t min_abs_multiplier = 1.0; + f_t max_abs_multiplier = 1.0; work_estimate += lp.num_cols; while (!add_cut && num_aggregated < max_aggregated) { - sparse_vector_t transformed_inequality; + inequality_t transformed_inequality; inequality.squeeze(transformed_inequality); - f_t transformed_rhs = inequality_rhs; - work_estimate += transformed_inequality.i.size(); - - mir.to_nonnegative(lp, transformed_inequality, transformed_rhs); - work_estimate += transformed_inequality.i.size(); - std::vector> transformed_cuts; - std::vector transformed_cut_rhs; - std::vector transformed_violations; - - // Generate cut for delta = 1 - { - sparse_vector_t cut_1(lp.num_cols, 0); - f_t cut_1_rhs; - mir.generate_cut_nonnegative( - transformed_inequality, transformed_rhs, var_types, cut_1, cut_1_rhs); - f_t cut_1_violation = mir.compute_violation(cut_1, cut_1_rhs, transformed_xstar); - if (cut_1_violation > 1e-6) { - transformed_cuts.push_back(cut_1); - transformed_cut_rhs.push_back(cut_1_rhs); - transformed_violations.push_back(cut_1_violation); - } - work_estimate += transformed_inequality.i.size(); - } - - // Generate a cut for delta = max { |a_j|, j in I} - { - f_t max_coeff = 0.0; - for (i_t k = 0; k < transformed_inequality.i.size(); k++) { - const i_t j = transformed_inequality.i[k]; - if (var_types[j] == variable_type_t::INTEGER) { - const f_t abs_aj = std::abs(transformed_inequality.x[k]); - if (abs_aj > max_coeff) { max_coeff = abs_aj; } - } - } - work_estimate += transformed_inequality.i.size(); - - if (max_coeff > 1e-6 && max_coeff != 1.0) { - sparse_vector_t scaled_inequality = transformed_inequality; - const i_t nz = transformed_inequality.i.size(); - for (i_t k = 0; k < nz; k++) { - scaled_inequality.x[k] /= max_coeff; - } - const f_t scaled_rhs = transformed_rhs / max_coeff; - sparse_vector_t cut_2(lp.num_cols, 0); - f_t cut_2_rhs; - mir.generate_cut_nonnegative(scaled_inequality, scaled_rhs, var_types, cut_2, cut_2_rhs); - f_t cut_2_violation = mir.compute_violation(cut_2, cut_2_rhs, transformed_xstar); - if (cut_2_violation > 1e-6) { - transformed_cuts.push_back(cut_2); - transformed_cut_rhs.push_back(cut_2_rhs); - transformed_violations.push_back(cut_2_violation); - } - work_estimate += 5 * transformed_inequality.i.size(); - } - } - - if (!transformed_violations.empty()) { - std::vector permuted(transformed_violations.size()); - std::iota(permuted.begin(), permuted.end(), 0); - std::sort(permuted.begin(), permuted.end(), [&](i_t i, i_t j) { - return transformed_violations[i] > transformed_violations[j]; - }); - work_estimate += transformed_violations.size() * std::log2(transformed_violations.size()); - // Get the biggest violation - const i_t best_index = permuted[0]; - f_t max_viol = transformed_violations[best_index]; - cut = transformed_cuts[best_index]; - cut_rhs = transformed_cut_rhs[best_index]; - - if (max_viol > 1e-6) { - // TODO: Divide by 1/2*violation, 1/4*violation, 1/8*violation - // Transform back to the original variables - mir.to_original(lp, cut, cut_rhs); - mir.remove_small_coefficients(lp.lower, lp.upper, cut, cut_rhs); - mir.substitute_slacks(lp, Arow, cut, cut_rhs); - f_t viol = mir.compute_violation(cut, cut_rhs, xstar); - work_estimate += 10 * cut.i.size(); - add_cut = true; - } + work_estimate += transformed_inequality.size(); + + complemented_mir.transform_inequality(variable_bounds, var_types, transformed_inequality); + work_estimate += transformed_inequality.size(); + + inequality_t cut; + bool cut_found = complemented_mir.cut_generation_heuristic( + transformed_inequality, var_types, transformed_xstar, cut, work_estimate); + // Note cut is in the transformed variables + + if (cut_found) { + // Transform back to the original variables + complemented_mir.untransform_inequality(variable_bounds, var_types, cut); + complemented_mir.remove_small_coefficients(lp.lower, lp.upper, cut); + complemented_mir.substitute_slacks(lp, Arow, cut); + complemented_mir.remove_small_coefficients(lp.lower, lp.upper, cut); + f_t viol = complemented_mir.compute_violation(cut, xstar); + work_estimate += 10 * cut.size(); + if (viol > 1e-6) { add_cut = true; } } if (add_cut) { - if (settings.mir_cuts != 0) { - cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut, cut_rhs); - } + if (settings.mir_cuts != 0) { cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_ROUNDING, cut); } break; } else { // Perform aggregation to try and find a cut @@ -865,24 +1534,26 @@ void cut_generation_t::generate_mir_cuts( i_t num_continuous = 0; f_t max_off_bound = 0.0; i_t max_off_bound_var = -1; - for (i_t p = 0; p < inequality.i.size(); p++) { - const i_t j = inequality.i[p]; + for (i_t p = 0; p < inequality.size(); p++) { + const i_t j = inequality.index(p); + const f_t aj = inequality.coeff(p); + if (aj == 0.0) { continue; } if (var_types[j] == variable_type_t::CONTINUOUS) { num_continuous++; - const f_t off_lower = lp.lower[j] > -inf ? xstar[j] - lp.lower[j] : std::abs(xstar[j]); - const f_t off_upper = lp.upper[j] < inf ? lp.upper[j] - xstar[j] : std::abs(xstar[j]); - const f_t off_bound = std::max(off_lower, off_upper); - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - const i_t col_len = col_end - col_start; + const f_t lb_star_j = complemented_mir.get_lb_star(j); + const f_t ub_star_j = complemented_mir.get_ub_star(j); + const f_t off_lower = lb_star_j > -inf ? xstar[j] - lb_star_j : std::abs(xstar[j]); + const f_t off_upper = ub_star_j < inf ? ub_star_j - xstar[j] : std::abs(xstar[j]); + const f_t off_bound = std::min(off_lower, off_upper); + const i_t col_len = lp.A.col_length(j); if (off_bound > max_off_bound && col_len > 1) { max_off_bound = off_bound; max_off_bound_var = j; } } } - work_estimate += 10 * inequality.i.size(); + work_estimate += 10 * inequality.size(); if (num_continuous == 0 || max_off_bound < 1e-6) { break; } @@ -890,8 +1561,8 @@ void cut_generation_t::generate_mir_cuts( if (max_off_bound_var >= 0) { const i_t col_start = lp.A.col_start[max_off_bound_var]; const i_t col_end = lp.A.col_start[max_off_bound_var + 1]; - const i_t col_len = col_end - col_start; - const i_t max_potential_rows = 10; + const i_t col_len = lp.A.col_length(max_off_bound_var); + const i_t max_potential_rows = col_len; if (col_len > 1) { std::vector potential_rows; potential_rows.reserve(col_len); @@ -901,35 +1572,44 @@ void cut_generation_t::generate_mir_cuts( const i_t i = lp.A.i[q]; const f_t val = lp.A.x[q]; // Can't use rows that have already been aggregated - if (std::abs(val) > threshold && aggregated_mark[i] == 0) { - potential_rows.push_back(i); - } + if (std::abs(val) > threshold && !aggregated_mark[i]) { potential_rows.push_back(i); } if (potential_rows.size() >= max_potential_rows) { break; } } work_estimate += 5 * (col_end - col_start); - if (!potential_rows.empty()) { - std::sort(potential_rows.begin(), potential_rows.end(), [&](i_t a, i_t b) { - return score[a] > score[b]; - }); - work_estimate += 10 * std::log2(10); - - const i_t pivot_row = potential_rows[0]; - - sparse_vector_t pivot_row_inequality(Arow, pivot_row); - f_t pivot_row_rhs = lp.rhs[pivot_row]; - work_estimate += pivot_row_inequality.i.size(); - mir.combine_rows(lp, - Arow, - max_off_bound_var, - pivot_row_inequality, - pivot_row_rhs, - inequality, - inequality_rhs); + bool did_aggregate = false; + while (!potential_rows.empty()) { + const i_t pivot_row = + *std::max_element(potential_rows.begin(), potential_rows.end(), [&](i_t a, i_t b) { + return scores[a] < scores[b]; + }); + work_estimate += potential_rows.size(); + + inequality_t pivot_row_inequality(Arow, pivot_row, lp.rhs[pivot_row]); + work_estimate += pivot_row_inequality.size(); + // Save inequality before combine_rows mutates it, so we can restore on rejection + inequality_t saved_inequality = inequality; + f_t multiplier = complemented_mir.combine_rows( + lp, Arow, max_off_bound_var, pivot_row_inequality, inequality); + if (max_abs_multiplier / std::abs(multiplier) > 10000 || + std::abs(multiplier) / min_abs_multiplier > 10000) { + inequality = saved_inequality; + // Erase the pivot row from the potential rows + potential_rows.erase( + std::remove(potential_rows.begin(), potential_rows.end(), pivot_row), + potential_rows.end()); + continue; + } + max_abs_multiplier = std::max(max_abs_multiplier, std::abs(multiplier)); + min_abs_multiplier = std::min(min_abs_multiplier, std::abs(multiplier)); aggregated_rows.push_back(pivot_row); aggregated_mark[pivot_row] = 1; - work_estimate += inequality.i.size() + pivot_row_inequality.i.size(); - } else { + work_estimate += inequality.size() + pivot_row_inequality.size(); + did_aggregate = true; + break; + } + + if (!did_aggregate) { // No potential rows to aggregate break; } @@ -942,32 +1622,26 @@ void cut_generation_t::generate_mir_cuts( if (add_cut) { // We were successful in generating a cut. - // Set the score of the aggregated rows to zero + // Set the score of the aggregated rows to a lower value for (i_t row : aggregated_rows) { - score[row] = 0.0; + scores[row] = 0.99 * scores[row]; + score_queue.push(std::make_pair(scores[row], row)); } + work_estimate += aggregated_rows.size() * std::log2(score_queue.size()); } // Clear the aggregated mark + work_estimate += 2 * aggregated_rows.size(); for (i_t row : aggregated_rows) { aggregated_mark[row] = 0; } - work_estimate += 2 * aggregated_rows.size(); // Clear the aggregated rows aggregated_rows.clear(); // Set the score of the current row to zero - score[i] = 0.0; - - // Re-sort the rows by score - // It's possible this could be made more efficient by storing the rows in a data structure - // that allows us to: - // 1. Get the row with the best score - // 2. Get the row with a nonzero in column j that has the best score - // 3. Remove the rows that have been aggregated - // 4. Remove the current row - best_score_last_permutation(score, sorted_indices); - work_estimate += score.size() * std::log2(score.size()); + scores[i] = 0.0; + score_queue.push(std::make_pair(scores[i], i)); + work_estimate += std::log2(std::max(1, static_cast(score_queue.size()))); } } @@ -984,100 +1658,118 @@ void cut_generation_t::generate_gomory_cuts( const std::vector& nonbasic_list) { tableau_equality_t tableau(lp, basis_update, nonbasic_list); - mixed_integer_rounding_cut_t mir(lp, settings, new_slacks, xstar); + mixed_integer_gomory_cut_t gomory_cut; + complemented_mixed_integer_rounding_cut_t complemented_mir(lp, settings, new_slacks); + simplex_solver_settings_t variable_settings = settings; + variable_settings.sub_mip = 1; + variable_bounds_t variable_bounds(lp, variable_settings, var_types, Arow, new_slacks); strong_cg_cut_t cg(lp, var_types, xstar); + std::vector transformed_xstar; + complemented_mir.bound_substitution(lp, variable_bounds, var_types, xstar, transformed_xstar); for (i_t i = 0; i < lp.num_rows; i++) { - sparse_vector_t inequality(lp.num_cols, 0); - f_t inequality_rhs; + inequality_t inequality(lp.num_cols); const i_t j = basic_list[i]; if (var_types[j] != variable_type_t::INTEGER) { continue; } const f_t x_j = xstar[j]; - if (std::abs(x_j - std::round(x_j)) < settings.integer_tol) { continue; } - i_t tableau_status = tableau.generate_base_equality(lp, - settings, - Arow, - var_types, - basis_update, - xstar, - basic_list, - nonbasic_list, - i, - inequality, - inequality_rhs); + if (fractional_part(x_j) < 0.05 || fractional_part(x_j) > 0.95) { continue; } + + i_t tableau_status = tableau.generate_base_equality( + lp, settings, Arow, var_types, basis_update, xstar, basic_list, nonbasic_list, i, inequality); if (tableau_status == 0) { // Generate a CG cut const bool generate_cg_cut = settings.strong_chvatal_gomory_cuts != 0; if (generate_cg_cut) { // Try to generate a CG cut - sparse_vector_t cg_inequality = inequality; - f_t cg_inequality_rhs = inequality_rhs; - if (fractional_part(inequality_rhs) < 0.5) { + inequality_t cg_inequality = inequality; + if (fractional_part(inequality.rhs) < 0.5) { // Multiply by -1 to force the fractional part to be greater than 0.5 - cg_inequality_rhs *= -1; cg_inequality.negate(); } - sparse_vector_t cg_cut(lp.num_cols, 0); - f_t cg_cut_rhs; - i_t cg_status = cg.generate_strong_cg_cut( - lp, settings, var_types, cg_inequality, cg_inequality_rhs, xstar, cg_cut, cg_cut_rhs); - if (cg_status == 0) { cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut, cg_cut_rhs); } + inequality_t cg_cut(lp.num_cols); + i_t cg_status = + cg.generate_strong_cg_cut(lp, settings, var_types, cg_inequality, xstar, cg_cut); + if (cg_status == 0) { cut_pool_.add_cut(cut_type_t::CHVATAL_GOMORY, cg_cut); } } if (settings.mixed_integer_gomory_cuts == 0) { continue; } - // Given the base inequality, generate a MIR cut - sparse_vector_t cut_A(lp.num_cols, 0); - f_t cut_A_rhs; - i_t mir_status = mir.generate_cut( - inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_A, cut_A_rhs); - bool A_valid = false; - f_t cut_A_distance = 0.0; - if (mir_status == 0) { - if (cut_A.i.size() == 0) { continue; } - mir.substitute_slacks(lp, Arow, cut_A, cut_A_rhs); - if (cut_A.i.size() == 0) { + // Transform the inequality + inequality_t transformed_inequality = inequality; + complemented_mir.transform_inequality(variable_bounds, var_types, transformed_inequality); + + // Generate a MIR cut from the transformed inequality + inequality_t cut_A_float(lp.num_cols); + bool cut_ok = complemented_mir.generate_cut_nonnegative_maintain_indicies( + transformed_inequality, var_types, cut_A_float); + + // Transform the cut back to the original variables + complemented_mir.untransform_inequality(variable_bounds, var_types, cut_A_float); + complemented_mir.remove_small_coefficients(lp.lower, lp.upper, cut_A_float); + + inequality_t cut_A(lp.num_cols); + if (cut_ok) { cut_ok = gomory_cut.rational_coefficients(var_types, cut_A_float, cut_A); } + + // See if the inequality is violated by the original relaxation solution + f_t cut_A_violation = complemented_mir.compute_violation(cut_A, xstar); + bool A_valid = false; + f_t cut_A_distance = 0.0; + if (cut_ok && cut_A_violation > 1e-6) { + if (cut_A.size() == 0) { continue; } + complemented_mir.substitute_slacks(lp, Arow, cut_A); + complemented_mir.remove_small_coefficients(lp.lower, lp.upper, cut_A); + if (cut_A.size() == 0) { A_valid = false; } else { // Check that the cut is violated - f_t dot = cut_A.dot(xstar); - f_t cut_norm = cut_A.norm2_squared(); - if (dot >= cut_A_rhs) { continue; } - cut_A_distance = (cut_A_rhs - dot) / std::sqrt(cut_norm); + f_t dot = cut_A.vector.dot(xstar); + f_t cut_norm = cut_A.vector.norm2_squared(); + if (dot >= cut_A.rhs) { continue; } + cut_A_distance = (cut_A.rhs - dot) / std::sqrt(cut_norm); A_valid = true; } } // Negate the base inequality inequality.negate(); - inequality_rhs *= -1; - - sparse_vector_t cut_B(lp.num_cols, 0); - f_t cut_B_rhs; - - mir_status = mir.generate_cut( - inequality, inequality_rhs, lp.upper, lp.lower, var_types, cut_B, cut_B_rhs); - bool B_valid = false; - f_t cut_B_distance = 0.0; - if (mir_status == 0) { - if (cut_B.i.size() == 0) { continue; } - mir.substitute_slacks(lp, Arow, cut_B, cut_B_rhs); - if (cut_B.i.size() == 0) { + + inequality_t cut_B_float(lp.num_cols); + + transformed_inequality = inequality; + complemented_mir.transform_inequality(variable_bounds, var_types, transformed_inequality); + + cut_ok = complemented_mir.generate_cut_nonnegative_maintain_indicies( + transformed_inequality, var_types, cut_B_float); + // Transform the cut back to the original variables + complemented_mir.untransform_inequality(variable_bounds, var_types, cut_B_float); + complemented_mir.remove_small_coefficients(lp.lower, lp.upper, cut_B_float); + + inequality_t cut_B(lp.num_cols); + if (cut_ok) { cut_ok = gomory_cut.rational_coefficients(var_types, cut_B_float, cut_B); } + + bool B_valid = false; + f_t cut_B_distance = 0.0; + f_t cut_B_violation = complemented_mir.compute_violation(cut_B, xstar); + if (cut_ok && cut_B_violation > 1e-6) { + if (cut_B.size() == 0) { continue; } + complemented_mir.substitute_slacks(lp, Arow, cut_B); + complemented_mir.remove_small_coefficients(lp.lower, lp.upper, cut_B); + if (cut_B.size() == 0) { B_valid = false; } else { // Check that the cut is violated - f_t dot = cut_B.dot(xstar); - f_t cut_norm = cut_B.norm2_squared(); - if (dot >= cut_B_rhs) { continue; } - cut_B_distance = (cut_B_rhs - dot) / std::sqrt(cut_norm); + f_t dot = cut_B.vector.dot(xstar); + f_t cut_norm = cut_B.vector.norm2_squared(); + if (dot >= cut_B.rhs) { continue; } + cut_B_distance = (cut_B.rhs - dot) / std::sqrt(cut_norm); B_valid = true; } } if ((cut_A_distance > cut_B_distance) && A_valid) { - cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_A, cut_A_rhs); + cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_A); } else if (B_valid) { - cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_B, cut_B_rhs); + cut_pool_.add_cut(cut_type_t::MIXED_INTEGER_GOMORY, cut_B); } } } @@ -1094,8 +1786,7 @@ i_t tableau_equality_t::generate_base_equality( const std::vector& basic_list, const std::vector& nonbasic_list, i_t i, - sparse_vector_t& inequality, - f_t& inequality_rhs) + inequality_t& inequality) { // Let's look for Gomory cuts const i_t j = basic_list[i]; @@ -1233,504 +1924,1155 @@ i_t tableau_equality_t::generate_base_equality( settings_.log.printf("b_bar[%d] = %e\n", i, b_bar[i]); #endif - inequality = a_bar; - inequality_rhs = b_bar_[i]; + inequality.vector = a_bar; + inequality.rhs = b_bar_[i]; return 0; } template -mixed_integer_rounding_cut_t::mixed_integer_rounding_cut_t( - const lp_problem_t& lp, - const simplex_solver_settings_t& settings, - const std::vector& new_slacks, - const std::vector& xstar) - : num_vars_(lp.num_cols), - settings_(settings), - x_workspace_(num_vars_, 0.0), - x_mark_(num_vars_, 0), - has_lower_(num_vars_, 0), - has_upper_(num_vars_, 0), - is_slack_(num_vars_, 0), - slack_rows_(num_vars_, 0), - bound_info_(num_vars_, 0) +bool mixed_integer_gomory_cut_t::rational_approximation(f_t x, + int64_t max_denominator, + int64_t& numerator, + int64_t& denominator) { - for (i_t j : new_slacks) { - is_slack_[j] = 1; - const i_t col_start = lp.A.col_start[j]; - const i_t i = lp.A.i[col_start]; - slack_rows_[j] = i; - assert(std::abs(lp.A.x[col_start]) == 1.0); + int64_t a, p0 = 0, q0 = 1, p1 = 1, q1 = 0; + f_t val = x; + bool negative = false; + + if (x < 0) { + negative = true; + val = -val; } - needs_complement_ = false; - for (i_t j = 0; j < num_vars_; j++) { - if (lp.lower[j] < 0) { - settings_.log.debug("Variable %d has negative lower bound %e\n", j, lp.lower[j]); - } - const f_t uj = lp.upper[j]; - const f_t lj = lp.lower[j]; - const f_t xstar_j = xstar[j]; - if (uj < inf) { - if (uj - xstar_j <= xstar_j - lj) { - has_upper_[j] = 1; - bound_info_[j] = 1; - needs_complement_ = true; - } else if (lj != 0.0) { - has_lower_[j] = 1; - bound_info_[j] = -1; - needs_complement_ = true; - } - continue; - } + while (1) { + a = (int64_t)std::floor(val); + if (a < 0 || a > INT64_MAX) { return false; } // Protect against overflow + int64_t p2 = a * p1 + p0; + int64_t q2 = a * q1 + q0; + if (q2 > max_denominator) { break; } + p0 = p1; + q0 = q1; + p1 = p2; + q1 = q2; - if (lj > -inf && lj != 0.0) { - has_lower_[j] = 1; - bound_info_[j] = -1; - needs_complement_ = true; - } + f_t rem = val - a; + if (rem < 1e-14) { break; } + val = 1.0 / rem; } + + numerator = negative ? -p1 : p1; + denominator = q1; + + f_t approx = static_cast(numerator) / static_cast(denominator); + f_t err = std::abs(approx - x); + return err <= 1e-14; } template -void mixed_integer_rounding_cut_t::to_nonnegative(const lp_problem_t& lp, - sparse_vector_t& inequality, - f_t& rhs) +bool mixed_integer_gomory_cut_t::rational_coefficients( + const std::vector& var_types, + const inequality_t& input_inequality, + inequality_t& rational_inequality) { - const i_t nz = inequality.i.size(); - for (i_t k = 0; k < nz; k++) { - const i_t j = inequality.i[k]; - const f_t aj = inequality.x[k]; - if (bound_info_[j] == -1) { - // v_j = x_j - l_j, v_j >= 0 - // x_j = v_j + l_j - // sum_{k != j} a_k x_j + a_j x_j <= beta - // sum_{k != j} a_k x_j + a_j (v_j + l_j) <= beta - // sum_{k != j} a_k x_j + a_j v_j <= beta - a_j l_j - const f_t lj = lp.lower[j]; - rhs -= aj * lj; - } else if (bound_info_[j] == 1) { - // w_j = u_j - x_j, w_j >= 0 - // x_j = u_j - w_j - // sum_{k != j} a_k x_k + a_j x_j <= beta - // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= beta - // sum_{k != j} a_k x_k - a_j w_j <= beta - a_j u_j - const f_t uj = lp.upper[j]; - inequality.x[k] *= -1.0; - rhs -= aj * uj; + rational_inequality = input_inequality; + + std::vector numerators; + std::vector denominators; + std::vector indices; + for (i_t k = 0; k < input_inequality.size(); k++) { + const i_t j = rational_inequality.index(k); + const f_t x = rational_inequality.coeff(k); + if (var_types[j] == variable_type_t::INTEGER) { + int64_t numerator, denominator; + if (!rational_approximation(x, static_cast(1000), numerator, denominator)) { + return false; + } + numerators.push_back(numerator); + denominators.push_back(denominator); + indices.push_back(k); + rational_inequality.vector.x[k] = static_cast(numerator) / static_cast(denominator); } } + + int64_t gcd_numerators = gcd(numerators); + int64_t lcm_denominators = lcm(denominators); + + f_t scalar = static_cast(lcm_denominators) / static_cast(gcd_numerators); + if (scalar < 0) { return false; } + if (std::abs(scalar) > 1000) { return false; } + + rational_inequality.scale(scalar); + + return true; } template -void mixed_integer_rounding_cut_t::relaxation_to_nonnegative( - const lp_problem_t& lp, - const std::vector& xstar, - std::vector& xstar_nonnegative) +int64_t mixed_integer_gomory_cut_t::gcd(const std::vector& integers) { - xstar_nonnegative = xstar; - const i_t n = lp.num_cols; - for (i_t j = 0; j < n; ++j) { - if (bound_info_[j] == -1) { - // v_j = x_j - l_j - const f_t lj = lp.lower[j]; - xstar_nonnegative[j] -= lj; - } else if (bound_info_[j] == 1) { - // w_j = u_j - x_j - const f_t uj = lp.upper[j]; - xstar_nonnegative[j] = uj - xstar_nonnegative[j]; - } + if (integers.empty()) { return 0; } + + int64_t result = integers[0]; + for (size_t i = 1; i < integers.size(); ++i) { + result = std::gcd(result, integers[i]); } + return result; } template -void mixed_integer_rounding_cut_t::to_original(const lp_problem_t& lp, - sparse_vector_t& inequality, - f_t& rhs) +int64_t mixed_integer_gomory_cut_t::lcm(const std::vector& integers) { - const i_t nz = inequality.i.size(); - for (i_t k = 0; k < nz; k++) { - const i_t j = inequality.i[k]; - const f_t dj = inequality.x[k]; - if (bound_info_[j] == -1) { - // v_j = x_j - l_j, v_j >= 0 - // sum_{k != j} d_k x_k + d_j v_j >= beta - // sum_{k != j} d_k x_k + d_j (x_j - l_j) >= beta - // sum_{k != j} d_k x_k + d_j x_j >= beta + d_j l_j - const f_t lj = lp.lower[j]; - rhs += dj * lj; - } else if (bound_info_[j] == 1) { - // w_j = u_j - x_j, w_j >= 0 - // sum_{k != j} d_k x_k + d_j w_j >= beta - // sum_{k != j} d_k x_k + d_j (u_j - x_j) >= beta - // sum_{k != j} d_k x_k - d_j x_j >= beta - d_j u_j - const f_t uj = lp.upper[j]; - inequality.x[k] *= -1.0; - rhs -= dj * uj; - } + if (integers.empty()) { return 0; } + int64_t result = + std::reduce(std::next(integers.begin()), integers.end(), integers[0], [](int64_t a, int64_t b) { + return std::lcm(a, b); + }); + return result; +} + +template +variable_bounds_t::variable_bounds_t(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const csr_matrix_t& Arow, + const std::vector& new_slacks) + : upper_offsets(lp.num_cols + 1, 0), + lower_offsets(lp.num_cols + 1, 0), + upper_activities_(lp.num_rows, 0.0), + lower_activities_(lp.num_rows, 0.0), + num_pos_inf_(lp.num_rows, 0), + num_neg_inf_(lp.num_rows, 0) +{ + if (settings.sub_mip) { + return; // Don't compute the variable upper/lower bounds inside sub-MIP + } + f_t start_time = tic(); + + // Construct the slack map + slack_map_.resize(lp.num_rows, -1); + std::vector slack_coeff(lp.num_rows, 0.0); + for (i_t j : new_slacks) { + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + const i_t col_len = col_end - col_start; + assert(col_len == 1); + const i_t i = lp.A.i[col_start]; + slack_map_[i] = j; + slack_coeff[i] = lp.A.x[col_start]; + } + + // The constraints are in the form: + // sum_j a_j x_j + sigma * slack = beta + + std::vector num_integer_in_row(lp.num_rows, 0); + // Compute the upper activities of the constraints + for (i_t i = 0; i < lp.num_rows; i++) { + const i_t row_start = Arow.row_start[i]; + const i_t row_end = Arow.row_start[i + 1]; + const i_t slack_index = slack_map_[i]; + f_t activity = 0.0; + for (i_t p = row_start; p < row_end; p++) { + const i_t j = Arow.j[p]; + if (j == slack_index) { continue; } + const f_t aj = Arow.x[p]; + const f_t uj = lp.upper[j]; + const f_t lj = lp.lower[j]; + + if (aj > 0.0) { + if (uj < inf) { + activity += aj * uj; + } else { + num_pos_inf_[i]++; + } + } else { // a_j < 0.0 + if (lj > -inf) { + activity += aj * lj; + } else { + num_pos_inf_[i]++; + } + } + + if (var_types[j] == variable_type_t::INTEGER) { num_integer_in_row[i]++; } + } + upper_activities_[i] = activity; + } + + // Compute the lower activities of the constraints + for (i_t i = 0; i < lp.num_rows; i++) { + const i_t row_start = Arow.row_start[i]; + const i_t row_end = Arow.row_start[i + 1]; + const i_t slack_index = slack_map_[i]; + f_t activity = 0.0; + for (i_t p = row_start; p < row_end; p++) { + const i_t j = Arow.j[p]; + if (j == slack_index) { continue; } + const f_t aj = Arow.x[p]; + const f_t uj = lp.upper[j]; + const f_t lj = lp.lower[j]; + if (aj > 0.0) { + if (lj > -inf) { + activity += aj * lj; + } else { + num_neg_inf_[i]++; + } + } else { // a_j < 0.0 + if (uj < inf) { + activity += aj * uj; + } else { + num_neg_inf_[i]++; + } + } + } + lower_activities_[i] = activity; + } + + // Now go through all continuous variables and use the activiites to get upper variable bounds + i_t upper_edges = 0; + for (i_t j = 0; j < lp.num_cols; j++) { + upper_offsets[j] = upper_edges; + if (var_types[j] != variable_type_t::CONTINUOUS) { continue; } + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; p++) { + const i_t i = lp.A.i[p]; + if (num_integer_in_row[i] < 1) { continue; } + if (num_neg_inf_[i] > 2 && num_pos_inf_[i] > 2) { continue; } + const i_t row_start = Arow.row_start[i]; + const i_t row_end = Arow.row_start[i + 1]; + const i_t row_len = row_end - row_start; + if (row_len < 2) { continue; } + const f_t a_ij = lp.A.x[p]; + const f_t slack_lower = lp.lower[slack_map_[i]]; + const f_t slack_upper = lp.upper[slack_map_[i]]; + const f_t slack_coeff_i = slack_coeff[i]; + const f_t sigma_slack_lower = slack_coeff_i == 1.0 ? slack_lower : -slack_upper; + const f_t sigma_slack_upper = slack_coeff_i == 1.0 ? slack_upper : -slack_lower; + + if (sigma_slack_lower > -inf) { + const f_t beta = lp.rhs[i] - sigma_slack_lower; + // sum_k a_ik x_k <= beta + + // If we have too many variables in the row that would cause the activity to be infinite, + // we cannot derive an variable bound + if (a_ij > 0.0 && num_neg_inf_[i] <= 2) { + const f_t lower_activity_j = lower_activity(lp.lower[j], lp.upper[j], a_ij); + + // This is inefficient if num_neg_inf_[i] > 0 + // If num_neg_inf_[i] == 1 and var_types[s] != INTEGER, we can't derive a bound + // If num_neg_inf_[i] == 2 and var_types[s ^ j] != INTEGER, we can't derive a bound + // If num_neg_inf_[i] == 2 and var_types[s ^ j] == INTEGER, and lower_activity_j != -inf, + // we can't derive a bound + for (i_t q = row_start; q < row_end; q++) { + const i_t l = Arow.j[q]; + if (var_types[l] == variable_type_t::CONTINUOUS) { continue; } + // sum_{k != l, k != j} a_ik x_k + a_ij x_j + a_il x_l <= beta + // a_ij x_j <= -a_il x_l + beta - sum_{k != l, k != j} a_ik x_k + const f_t a_il = Arow.x[q]; + const f_t lower_activity_l = lower_activity(lp.lower[l], lp.upper[l], a_il); + const f_t sum = adjusted_lower_activity( + lower_activities_[i], num_neg_inf_[i], lower_activity_j, lower_activity_l); + if (sum > -inf) { + // We have a valid variable upper bound + // x_j <= -a_il/a_ij * x_l + beta/a_ij - 1/a_ij * sum_{k != l, k != j} a_ik * + // bound(x_k) + upper_variables.push_back(l); + upper_weights.push_back(-a_il / a_ij); + upper_biases.push_back(beta / a_ij - (1.0 / a_ij) * sum); + upper_edges++; + } + } + } + } + + if (sigma_slack_upper < inf) { + const f_t beta = lp.rhs[i] - sigma_slack_upper; + // sum_k a_ik x_k >= beta + + // If we have too many variables in the row that would cause the activity to be infinite, + // we cannot derive an variable bound + if (a_ij < 0.0 && num_pos_inf_[i] <= 2) { + const f_t upper_activity_j = upper_activity(lp.lower[j], lp.upper[j], a_ij); + + for (i_t q = row_start; q < row_end; q++) { + const i_t l = Arow.j[q]; + if (var_types[l] == variable_type_t::CONTINUOUS) { continue; } + // sum_{k != l, k != j} a_ik x_k + a_ij x_j + a_il x_l >= beta + // a_ij x_j >= -a_il x_l + beta - sum_{k != l, k != j} a_ik x_k + const f_t a_il = Arow.x[q]; + const f_t upper_activity_l = upper_activity(lp.lower[l], lp.upper[l], a_il); + const f_t sum = adjusted_upper_activity( + upper_activities_[i], num_pos_inf_[i], upper_activity_j, upper_activity_l); + if (sum < inf) { + // We have a valid variable upper bound + // x_j <= -a_il/a_ij * x_l + beta/a_ij - 1/a_ij * sum_{k != l, k != j} a_ik * + // bound(x_k) + upper_variables.push_back(l); + upper_weights.push_back(-a_il / a_ij); + upper_biases.push_back(beta / a_ij - (1.0 / a_ij) * sum); + upper_edges++; + } + } + } + } + } + } + upper_offsets[lp.num_cols] = upper_edges; + settings.log.printf("%d variable upper bounds in %.2f seconds\n", upper_edges, toc(start_time)); + + // Now go through all continuous variables and use the activiites to get lower variable bounds + i_t lower_edges = 0; + for (i_t j = 0; j < lp.num_cols; j++) { + lower_offsets[j] = lower_edges; + if (var_types[j] != variable_type_t::CONTINUOUS) { continue; } + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; p++) { + const i_t i = lp.A.i[p]; + if (num_integer_in_row[i] < 1) { continue; } + const i_t row_start = Arow.row_start[i]; + const i_t row_end = Arow.row_start[i + 1]; + const i_t row_len = row_end - row_start; + if (row_len < 2) { continue; } + const f_t a_ij = lp.A.x[p]; + const f_t slack_lower = lp.lower[slack_map_[i]]; + const f_t slack_upper = lp.upper[slack_map_[i]]; + const f_t slack_coeff_i = slack_coeff[i]; + const f_t sigma_slack_lower = slack_coeff_i == 1.0 ? slack_lower : -slack_upper; + const f_t sigma_slack_upper = slack_coeff_i == 1.0 ? slack_upper : -slack_lower; + + if (sigma_slack_lower > -inf) { + const f_t beta = lp.rhs[i] - sigma_slack_lower; + // sum_k a_ik x_k <= beta + + // If we have too many variables in the row that would cause the activity to be infinite, + // we cannot derive a variable bound + if (a_ij < 0.0 && num_neg_inf_[i] <= 2) { + const f_t lower_activity_j = lower_activity(lp.lower[j], lp.upper[j], a_ij); + + for (i_t q = row_start; q < row_end; q++) { + const i_t l = Arow.j[q]; + if (var_types[l] == variable_type_t::CONTINUOUS) { continue; } + // sum_{k != l, k != j} a_ik x_k + a_ij x_j + a_il x_l <= beta + // a_ij x_j <= -a_il x_l + beta - sum_{k != l, k != j} a_ik x_k + // x_j >= -a_il/a_ij * x_l + beta/a_ij - 1/a_ij * sum_{k != l, k != j} a_ik * bound(x_k) + const f_t a_il = Arow.x[q]; + const f_t lower_activity_l = lower_activity(lp.lower[l], lp.upper[l], a_il); + const f_t sum = adjusted_lower_activity( + lower_activities_[i], num_neg_inf_[i], lower_activity_j, lower_activity_l); + if (sum > -inf) { + // We have a valid variable lower bound + // x_j >= -a_il/a_ij * x_l + beta/a_ij - 1/a_ij * sum_{k != l, k != j} a_ik * + // bound(x_k) + lower_variables.push_back(l); + lower_weights.push_back(-a_il / a_ij); + lower_biases.push_back(beta / a_ij - (1.0 / a_ij) * sum); + lower_edges++; + } + } + } + } + + if (sigma_slack_upper < inf) { + const f_t beta = lp.rhs[i] - sigma_slack_upper; + // sum_k a_ik x_k >= beta + + // If we have too many variables in the row that would cause the activity to be infinite, + // we cannot derive a variable bound + if (a_ij > 0.0 && num_pos_inf_[i] <= 2) { + const f_t upper_activity_j = upper_activity(lp.lower[j], lp.upper[j], a_ij); + + for (i_t q = row_start; q < row_end; q++) { + const i_t l = Arow.j[q]; + if (var_types[l] == variable_type_t::CONTINUOUS) { continue; } + // sum_{k != l, k != j} a_ik x_k + a_ij x_j + a_il x_l >= beta + // a_ij x_j >= -a_il x_l + beta - sum_{k != l, k != j} a_ik x_k + const f_t a_il = Arow.x[q]; + const f_t upper_activity_l = upper_activity(lp.lower[l], lp.upper[l], a_il); + const f_t sum = adjusted_upper_activity( + upper_activities_[i], num_pos_inf_[i], upper_activity_j, upper_activity_l); + if (sum < inf) { + // We have a valid variable lower bound + // x_j >= -a_il/a_ij * x_l + beta/a_ij - 1/a_ij * sum_{k != l, k != j} a_ik * + // bound(x_k) + lower_variables.push_back(l); + lower_weights.push_back(-a_il / a_ij); + lower_biases.push_back(beta / a_ij - (1.0 / a_ij) * sum); + lower_edges++; + } + } + } + } + } + } + lower_offsets[lp.num_cols] = lower_edges; + settings.log.printf("%d variable lower bounds in %.2f seconds\n", lower_edges, toc(start_time)); +} + +template +complemented_mixed_integer_rounding_cut_t::complemented_mixed_integer_rounding_cut_t( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& new_slacks) + : is_slack_(lp.num_cols, 0), + slack_rows_(lp.num_cols, -1), + slack_cols_(lp.num_rows, -1), + lb_variable_(lp.num_cols, -1), + lb_star_(lp.num_cols, 0.0), + ub_variable_(lp.num_cols, -1), + ub_star_(lp.num_cols, 0.0), + transformed_upper_(lp.num_cols, inf), + bound_changed_(lp.num_cols, 0), + scratch_pad_(lp.num_cols) +{ + for (i_t j : new_slacks) { + is_slack_[j] = 1; + const i_t col_start = lp.A.col_start[j]; + const i_t i = lp.A.i[col_start]; + slack_rows_[j] = i; + slack_cols_[i] = j; + assert(std::abs(lp.A.x[col_start]) == 1.0); } } template -void mixed_integer_rounding_cut_t::remove_small_coefficients( +void complemented_mixed_integer_rounding_cut_t::compute_initial_scores_for_rows( + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const csr_matrix_t& Arow, + const std::vector& xstar, + const std::vector& ystar, + std::vector& scores) +{ + const bool verbose = false; + const i_t n = lp.num_cols; + const f_t obj_norm = vector_norm2(lp.objective); + const f_t obj_denom = std::max(1.0, obj_norm); + + // Compute initial scores for all rows + scores.resize(lp.num_rows, 0.0); + for (i_t i = 0; i < lp.num_rows; i++) { + const i_t row_start = Arow.row_start[i]; + const i_t row_end = Arow.row_start[i + 1]; + + const i_t row_nz = row_end - row_start; + f_t row_norm = 0.0; + for (i_t p = row_start; p < row_end; p++) { + const f_t a_j = Arow.x[p]; + row_norm += a_j * a_j; + } + row_norm = std::sqrt(row_norm); + + const f_t density = static_cast(row_nz) / static_cast(n); + const f_t dual = std::abs(ystar[i]); + + const i_t slack = slack_cols_[i]; + assert(slack >= 0); + const f_t slack_value = std::max(xstar[slack], 0.0); + const f_t slack_denom = std::max(0.1, std::sqrt(row_norm)); + + const f_t nz_weight = 0.0001; + const f_t dual_weight = 1.0; + const f_t slack_weight = 0.001; + + scores[i] = nz_weight * (1.0 - density) + dual_weight * std::max(dual / obj_denom, 0.0001) + + slack_weight * (1.0 - slack_value / slack_denom); + + if (verbose) { + settings.log.printf("Scores[%d] = %e density %.2f dual %e slack %e\n", + i, + scores[i], + density, + dual, + slack_value); + } + } +} + +template +bool complemented_mixed_integer_rounding_cut_t::cut_generation_heuristic( + const inequality_t& transformed_inequality, + const std::vector& var_types, + const std::vector& transformed_xstar, + inequality_t& transformed_cut, + f_t& work_estimate) +{ + std::vector deltas_to_try; + deltas_to_try.reserve(transformed_inequality.size()); + deltas_to_try.push_back(1.0); + work_estimate += transformed_inequality.size(); + i_t num_integers = 0; + f_t max_coeff = 0.0; + for (i_t k = 0; k < transformed_inequality.size(); k++) { + const i_t j = transformed_inequality.index(k); + const f_t abs_aj = std::abs(transformed_inequality.coeff(k)); + if (var_types[j] == variable_type_t::INTEGER) { + num_integers++; + max_coeff = std::max(max_coeff, abs_aj); + const f_t x_j = transformed_xstar[j]; + const f_t new_upper_j = new_upper(j); + const f_t dist_upper = new_upper_j - x_j; + const f_t dist_lower = x_j; + const bool between_bounds = x_j > 1e-6 && (new_upper_j == inf || dist_upper > 0.0); + if (between_bounds) { deltas_to_try.push_back(abs_aj); } + } + } + if (max_coeff > 1e-6 && max_coeff != 1.0) { + deltas_to_try.push_back(max_coeff); + deltas_to_try.push_back(max_coeff + 1.0); + } + + std::vector complemented_indices; + complemented_indices.reserve(num_integers); + std::vector distance_from_midpoint; + distance_from_midpoint.reserve(num_integers); + std::vector integer_indices; + integer_indices.reserve(num_integers); + for (i_t k = 0; k < transformed_inequality.size(); k++) { + const i_t j = transformed_inequality.index(k); + if (var_types[j] == variable_type_t::INTEGER && new_upper(j) < inf) { + const f_t x_j = transformed_xstar[j]; + const f_t new_upper_j = new_upper(j); + if (x_j > 1e-6 && new_upper_j < inf) { + const f_t midpoint_j = new_upper_j / 2.0; + distance_from_midpoint.push_back(x_j - midpoint_j); + integer_indices.push_back(k); + } + } + } + + std::vector perm(integer_indices.size()); + best_score_first_permutation(distance_from_midpoint, perm); + work_estimate += + integer_indices.size() > 0 ? integer_indices.size() * std::log2(integer_indices.size()) : 0; + + bool cut_found = false; + + inequality_t complemented_inequality = transformed_inequality; + work_estimate += 4 * transformed_inequality.size(); + + f_t delta = 0.0; + f_t best_violation = 0.0; + + // First try without any complementation + for (const f_t tmp_delta : deltas_to_try) { + bool cut_ok = scale_uncomplement_and_generate_cut(var_types, + transformed_xstar, + complemented_indices, + complemented_inequality, + tmp_delta, + transformed_cut, + work_estimate); + if (!cut_ok) { continue; } + // Check if the cut is violated + best_violation = compute_violation(transformed_cut, transformed_xstar); + work_estimate += 4 * transformed_cut.size(); + if (best_violation > 1e-6) { + cut_found = true; + delta = tmp_delta; + break; + } + } + + if (!cut_found) { + // Complement an integer variable + for (const i_t idx : perm) { + const i_t l = integer_indices[idx]; + const i_t j = complemented_inequality.index(l); + // We have an integer variable x_j <= b_j + // We create a new variable xbar_j such that + // x_j + xbar_j = b_j + // x_j = b_j - xbar_j, xbar_j = b_j - x_j + // + // The inequality + // sum_{k != j} a_k x_k + a_j x_j >= beta + // becomes + // sum_{k != j} a_k x_k + a_j (b_j - xbar_j) >= beta + // sum_{k != j} a_k x_k - a_j xbar_j >= beta - a_j b_j + const f_t b_j = new_upper(j); + const f_t a_j = complemented_inequality.coeff(l); + + complemented_inequality.vector.x[l] = -a_j; + complemented_inequality.rhs -= a_j * b_j; + complemented_indices.push_back(l); + + for (const f_t tmp_delta : deltas_to_try) { + bool cut_ok = scale_uncomplement_and_generate_cut(var_types, + transformed_xstar, + complemented_indices, + complemented_inequality, + tmp_delta, + transformed_cut, + work_estimate); + if (!cut_ok) { continue; } + // Check if the cut is violated + best_violation = compute_violation(transformed_cut, transformed_xstar); + work_estimate += 4 * transformed_cut.size(); + if (best_violation > 1e-6) { + cut_found = true; + delta = tmp_delta; + break; + } + } + if (cut_found) { break; } + } + } + + if (!cut_found) { return false; } + + // We have found a cut. Now try to improve the violation by scaling the cut by 1/2, 1/4, 1/8, etc. + std::vector scaled_deltas_to_try = {delta / 2.0, delta / 4.0, delta / 8.0}; + for (const f_t tmp_delta : scaled_deltas_to_try) { + inequality_t tmp_cut_delta; + bool cut_ok = scale_uncomplement_and_generate_cut(var_types, + transformed_xstar, + complemented_indices, + complemented_inequality, + tmp_delta, + tmp_cut_delta, + work_estimate); + if (!cut_ok) { continue; } + + // Check if the cut is violated + f_t violation = compute_violation(tmp_cut_delta, transformed_xstar); + work_estimate += 4 * tmp_cut_delta.size(); + if (violation > best_violation) { + best_violation = violation; + transformed_cut = tmp_cut_delta; + delta = tmp_delta; + } + } + + std::vector best_complemented_indices = complemented_indices; + work_estimate += 2 * best_complemented_indices.size(); + + // Try to improve the violation by complementing integer variables + complemented_inequality = transformed_inequality; + work_estimate += 4 * transformed_inequality.size(); + complemented_indices.clear(); + for (const i_t idx : perm) { + const i_t l = integer_indices[idx]; + const i_t j = complemented_inequality.index(l); + // We have an integer variable x_j <= b_j + // We create a new variable xbar_j such that + // x_j + xbar_j = b_j + // x_j = b_j - xbar_j, xbar_j = b_j - x_j + // + // The inequality + // sum_{k != j} a_k x_k + a_j x_j >= beta + // becomes + // sum_{k != j} a_k x_k + a_j (b_j - xbar_j) >= beta + // sum_{k != j} a_k x_k - a_j xbar_j >= beta - a_j b_j + const f_t b_j = new_upper(j); + const f_t a_j = complemented_inequality.coeff(l); + + complemented_inequality.vector.x[l] = -a_j; + complemented_inequality.rhs -= a_j * b_j; + complemented_indices.push_back(l); + + inequality_t tmp_cut_delta; + + bool cut_ok = scale_uncomplement_and_generate_cut(var_types, + transformed_xstar, + complemented_indices, + complemented_inequality, + delta, + tmp_cut_delta, + work_estimate); + if (!cut_ok) { continue; } + // Check if the cut is violated + f_t violation = compute_violation(tmp_cut_delta, transformed_xstar); + work_estimate += 4 * tmp_cut_delta.size(); + if (violation > best_violation) { + best_violation = violation; + best_complemented_indices = complemented_indices; + transformed_cut = tmp_cut_delta; + } + } + + return best_violation > 1e-6; +} + +template +bool complemented_mixed_integer_rounding_cut_t::scale_uncomplement_and_generate_cut( + const std::vector& var_types, + const std::vector& transformed_xstar, + const std::vector& complemented_indices, + const inequality_t& complemented_inequality, + f_t delta, + inequality_t& cut_delta, + f_t& work_estimate) +{ + inequality_t scaled_inequality = complemented_inequality; + if (delta != 1.0) { scaled_inequality.scale(1.0 / delta); } + bool cut_ok = generate_cut_nonnegative_maintain_indicies(scaled_inequality, var_types, cut_delta); + if (!cut_ok) { return false; } + work_estimate += 4 * scaled_inequality.size(); + + // Now we need to transform the complemented variables back + for (i_t h = 0; h < complemented_indices.size(); h++) { + const i_t l = complemented_indices[h]; + const i_t j = complemented_inequality.index(l); + // Our cut is of the form + // sum_{k != j} d_k x_k + d_j xbar_j >= alpha + // we have that xbar_j = b_j - x_j + // So + // sum_{k != j} d_k x_k + d_j (b_j - x_j) >= alpha + // Or + // sum_{k != j} d_k x_k - d_j x_j >= alpha - d_j b_j + + const f_t b_j = new_upper(j); + const f_t d_j = cut_delta.coeff(l); + cut_delta.vector.x[l] = -d_j; + cut_delta.rhs -= d_j * b_j; + } + work_estimate += 5 * complemented_indices.size(); + return true; +} + +template +void complemented_mixed_integer_rounding_cut_t::remove_small_coefficients( const std::vector& lower_bounds, const std::vector& upper_bounds, - sparse_vector_t& cut, - f_t& cut_rhs) + inequality_t& cut) { - const i_t nz = cut.i.size(); + const i_t nz = cut.size(); i_t removed = 0; - for (i_t k = 0; k < cut.i.size(); k++) { - const i_t j = cut.i[k]; + for (i_t k = 0; k < cut.size(); k++) { + const i_t j = cut.index(k); // Check for small coefficients - const f_t aj = cut.x[k]; + const f_t aj = cut.coeff(k); if (std::abs(aj) < 1e-6) { if (aj >= 0.0 && upper_bounds[j] < inf) { // Move this to the right-hand side - cut_rhs -= aj * upper_bounds[j]; - cut.x[k] = 0.0; + cut.rhs -= aj * upper_bounds[j]; + cut.vector.x[k] = 0.0; removed++; } else if (aj <= 0.0 && lower_bounds[j] > -inf) { - cut_rhs += aj * lower_bounds[j]; - cut.x[k] = 0.0; + cut.rhs -= aj * lower_bounds[j]; + cut.vector.x[k] = 0.0; removed++; continue; } else { + // We need to keep the coefficient } } } if (removed > 0) { - sparse_vector_t new_cut(cut.n, 0); + inequality_t new_cut(cut.vector.n); cut.squeeze(new_cut); cut = new_cut; } } template -i_t mixed_integer_rounding_cut_t::generate_cut_nonnegative( - const sparse_vector_t& a, - f_t beta, +void complemented_mixed_integer_rounding_cut_t::bound_substitution( + const lp_problem_t& lp, + const variable_bounds_t& variable_bounds, const std::vector& var_types, - sparse_vector_t& cut, - f_t& cut_rhs) + const std::vector& xstar, + std::vector& transformed_xstar) { - auto f = [](f_t q_1, f_t q_2) -> f_t { - f_t q_1_hat = q_1 - std::floor(q_1); - f_t q_2_hat = q_2 - std::floor(q_2); - return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1); - }; - - auto h = [](f_t q) -> f_t { return std::max(q, 0.0); }; + transformed_xstar.resize(lp.num_cols); + // Perform bound substitution for continuous variables + for (i_t j = 0; j < lp.num_cols; j++) { + if (var_types[j] != variable_type_t::CONTINUOUS) { continue; } + // Step 1: Decide whether to use variable or simple bounds + const f_t uj = lp.upper[j]; + const f_t lj = lp.lower[j]; + const f_t xstar_j = xstar[j]; - std::vector cut_indices; - cut_indices.reserve(a.i.size()); - f_t R = (beta - std::floor(beta)) * std::ceil(beta); - - for (i_t k = 0; k < a.i.size(); k++) { - const i_t jj = a.i[k]; - f_t aj = a.x[k]; - if (var_types[jj] == variable_type_t::INTEGER) { - x_workspace_[jj] += f(aj, beta); - if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { - x_mark_[jj] = 1; - cut_indices.push_back(jj); - } - } else { - x_workspace_[jj] += h(aj); - if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { - x_mark_[jj] = 1; - cut_indices.push_back(jj); + // Set lb and lb_star to the simple lower bound + lb_variable_[j] = -1; + lb_star_[j] = lj; + + // Set ub and ub_star to the simple upper bound + ub_variable_[j] = -1; + ub_star_[j] = uj; + + // Check the variable lower bound and update lb and lb_star + // if these yield a tighter bound + const i_t lower_variable_start = variable_bounds.lower_offsets[j]; + const i_t lower_variable_end = variable_bounds.lower_offsets[j + 1]; + for (i_t p = lower_variable_start; p < lower_variable_end; p++) { + const i_t i = variable_bounds.lower_variables[p]; + const f_t gamma = variable_bounds.lower_weights[p]; + const f_t alpha = variable_bounds.lower_biases[p]; + // x_j >= gamma * x_i + alpha + + const f_t xstar_i = xstar[i]; + const f_t val = gamma * xstar_i + alpha; + if (val > lb_star_[j]) { + lb_variable_[j] = p; + lb_star_[j] = val; } } - } - cut.i.reserve(cut_indices.size()); - cut.x.reserve(cut_indices.size()); - cut.i.clear(); - cut.x.clear(); - for (i_t k = 0; k < cut_indices.size(); k++) { - const i_t j = cut_indices[k]; - cut.i.push_back(j); - cut.x.push_back(x_workspace_[j]); - } - - // Clear the workspace - for (i_t jj : cut_indices) { - x_workspace_[jj] = 0.0; - x_mark_[jj] = 0; - } - -#ifdef CHECK_WORKSPACE - for (i_t j = 0; j < x_workspace_.size(); j++) { - if (x_workspace_[j] != 0.0) { - printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - assert(x_workspace_[j] == 0.0); + // Check the variable upper bound and update ub and ub_star + // if these yield a tighter bound + const i_t upper_variable_start = variable_bounds.upper_offsets[j]; + const i_t upper_variable_end = variable_bounds.upper_offsets[j + 1]; + for (i_t p = upper_variable_start; p < upper_variable_end; p++) { + const i_t i = variable_bounds.upper_variables[p]; + const f_t gamma = variable_bounds.upper_weights[p]; + const f_t alpha = variable_bounds.upper_biases[p]; + // x_j <= gamma * x_i + alpha + + const f_t xstar_i = xstar[i]; + const f_t val = gamma * xstar_i + alpha; + if (val < ub_star_[j]) { + ub_variable_[j] = p; + ub_star_[j] = val; + } } - if (x_mark_[j] != 0) { - printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - assert(x_mark_[j] == 0); + + // Step 2: Decide to use the lower or upper bound + const bool has_finite_lower_bound = lb_star_[j] > -inf; + const bool has_finite_upper_bound = ub_star_[j] < inf; + if (!has_finite_lower_bound && !has_finite_upper_bound) { + transformed_xstar[j] = xstar_j; + transformed_upper_[j] = inf; + bound_changed_[j] = 0; + continue; } - } -#endif + if (has_finite_lower_bound && + (!has_finite_upper_bound || (xstar_j - lb_star_[j] <= ub_star_[j] - xstar_j))) { + // Use the lower bound + // lb_star_j <= x_j <= ub_star_j + // v_j = x_j - lb_star_j, + // 0 <= v_j <= ub_star_j - lb_star_j + transformed_upper_[j] = ub_star_[j] - lb_star_[j]; + transformed_xstar[j] = xstar_j - lb_star_[j]; + bound_changed_[j] = (lb_star_[j] == 0.0) ? 0 : -1; + } else if (has_finite_upper_bound) { + // Use the upper bound + // lb_star_j <= x_j <= ub_star_j + // x_j + w_j = ub_star_j, + // w_j = ub_star_j - x_j, + // x_j = ub_star_j - w_j + // 0 <= w_j <= ub_star_j - lb_star_j + transformed_upper_[j] = ub_star_[j] - lb_star_[j]; + transformed_xstar[j] = ub_star_[j] - xstar_j; + bound_changed_[j] = 1; + } + } + + // Perform bound substitution for the integer variables + for (i_t j = 0; j < lp.num_cols; j++) { + if (var_types[j] != variable_type_t::INTEGER) { continue; } + const f_t uj = lp.upper[j]; + const f_t lj = lp.lower[j]; + const f_t xstar_j = xstar[j]; - // The new cut is: g'*x >= R - // But we want to have it in the form h'*x <= b - cut.sort(); + lb_star_[j] = lj; + ub_star_[j] = uj; + transformed_xstar[j] = xstar_j; + transformed_upper_[j] = uj; + bound_changed_[j] = 0; - cut_rhs = R; + if (uj < inf) { + if (uj - xstar_j <= xstar_j - lj) { + // Use the upper bound + // lj <= x_j <= uj + // x_j + w_j = uj, + // w_j = uj - x_j, + // x_j = uj - w_j + // 0 <= w_j <= uj - lj + transformed_upper_[j] = uj - lj; + transformed_xstar[j] = uj - xstar_j; + bound_changed_[j] = 1; + } else if (lj != 0.0) { + // Use the lower bound + // lj <= x_j <= uj + // v_j = x_j - lj, + // 0 <= v_j <= uj - lj + transformed_upper_[j] = uj - lj; + transformed_xstar[j] = xstar_j - lj; + bound_changed_[j] = -1; + } + continue; + } -#ifdef CHECK_REPEATED_INDICES - // Check for repeated indicies - std::vector check(num_vars_, 0); - for (i_t p = 0; p < cut.i.size(); p++) { - if (check[cut.i[p]] != 0) { - printf("repeated index in generated cut\n"); - assert(check[cut.i[p]] == 0); + if (lj > -inf && lj != 0.0) { + // Use the lower bound + // lj <= x_j <= uj + // v_j = x_j - lj, + // 0 <= v_j <= uj - lj + transformed_upper_[j] = uj - lj; + transformed_xstar[j] = xstar_j - lj; + bound_changed_[j] = -1; } - check[cut.i[p]] = 1; } -#endif - - if (cut.i.size() == 0) { return -1; } - - return 0; } template -i_t mixed_integer_rounding_cut_t::generate_cut( - const sparse_vector_t& a, - f_t beta, - const std::vector& upper_bounds, - const std::vector& lower_bounds, - const std::vector& var_types, - sparse_vector_t& cut, - f_t& cut_rhs) +void complemented_mixed_integer_rounding_cut_t::transform_inequality( + const variable_bounds_t& variable_bounds, + const std::vector& var_type, + inequality_t& inequality) { -#ifdef CHECK_WORKSPACE - for (i_t j = 0; j < x_workspace_.size(); j++) { - if (x_workspace_[j] != 0.0) { - printf("Before generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - printf("num_vars_ %d\n", num_vars_); - printf("x_workspace_.size() %ld\n", x_workspace_.size()); - assert(x_workspace_[j] == 0.0); - } - if (x_mark_[j] != 0) { - printf("Before generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - assert(x_mark_[j] == 0); + const i_t nz = inequality.size(); + for (i_t k = 0; k < nz; k++) { + const i_t j = inequality.index(k); + const f_t aj = inequality.coeff(k); + if (var_type[j] != variable_type_t::CONTINUOUS) { + scratch_pad_.add_to_pad(j, aj); + continue; } - } -#endif - - auto f = [](f_t q_1, f_t q_2) -> f_t { - f_t q_1_hat = q_1 - std::floor(q_1); - f_t q_2_hat = q_2 - std::floor(q_2); - return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1); - }; - - auto h = [](f_t q) -> f_t { return std::max(q, 0.0); }; - - std::vector cut_indices; - cut_indices.reserve(a.i.size()); - f_t R; - if (!needs_complement_) { - R = (beta - std::floor(beta)) * std::ceil(beta); - - for (i_t k = 0; k < a.i.size(); k++) { - const i_t jj = a.i[k]; - f_t aj = a.x[k]; - if (var_types[jj] == variable_type_t::INTEGER) { - x_workspace_[jj] += f(aj, beta); - if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { - x_mark_[jj] = 1; - cut_indices.push_back(jj); - } + if (bound_changed_[j] == -1) { + if (lb_variable_[j] == -1) { + // v_j = x_j - l_j, v_j >= 0 + // x_j = v_j + l_j + // sum_{k != j} a_k x_k + a_j x_j >= beta + // sum_{k != j} a_k x_k + a_j (v_j + l_j) >= beta + // sum_{k != j} a_k x_k + a_j v_j >= beta - a_j l_j + const f_t lj = lb_star_[j]; + inequality.rhs -= aj * lj; + scratch_pad_.add_to_pad(j, aj); } else { - x_workspace_[jj] += h(aj); - if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { - x_mark_[jj] = 1; - cut_indices.push_back(jj); - } - } - } - } else { - // Compute r - f_t r = beta; - for (i_t k = 0; k < a.i.size(); k++) { - const i_t jj = a.i[k]; - if (has_upper_[jj]) { - const f_t uj = upper_bounds[jj]; - r -= uj * a.x[k]; - continue; + // v_j = x_j - lb*_j, v_j >= 0 + // x_j = v_j + lb*_j + // lb*_j = gamma * x_i + alpha + // x_j = v_j + gamma * x_i + alpha + // sum_{k != j} a_k x_k + a_j x_j >= beta + // sum_{k != j} a_k x_k + a_j (v_j + gamma * x_i + alpha) >= beta + // sum_{k != j} a_k x_k + a_j v_j + a_j * gamma * x_i >= beta - a_j alpha + const i_t p = lb_variable_[j]; + const f_t alpha = variable_bounds.lower_biases[p]; + const f_t gamma = variable_bounds.lower_weights[p]; + const i_t i = variable_bounds.lower_variables[p]; + inequality.rhs -= aj * alpha; + scratch_pad_.add_to_pad(j, aj); + scratch_pad_.add_to_pad(i, aj * gamma); } - if (has_lower_[jj]) { - const f_t lj = lower_bounds[jj]; - r -= lj * a.x[k]; - } - } - - // Compute R - R = std::ceil(r) * (r - std::floor(r)); - for (i_t k = 0; k < a.i.size(); k++) { - const i_t jj = a.i[k]; - const f_t aj = a.x[k]; - if (has_upper_[jj]) { - const f_t uj = upper_bounds[jj]; - if (var_types[jj] == variable_type_t::INTEGER) { - R -= f(-aj, r) * uj; - } else { - R -= h(-aj) * uj; - } - } else if (has_lower_[jj]) { - const f_t lj = lower_bounds[jj]; - if (var_types[jj] == variable_type_t::INTEGER) { - R += f(aj, r) * lj; - } else { - R += h(aj) * lj; - } - } - } - - // Compute the cut coefficients - for (i_t k = 0; k < a.i.size(); k++) { - const i_t jj = a.i[k]; - const f_t aj = a.x[k]; - if (has_upper_[jj]) { - if (var_types[jj] == variable_type_t::INTEGER) { - // Upper intersect I - x_workspace_[jj] -= f(-aj, r); - if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { - x_mark_[jj] = 1; - cut_indices.push_back(jj); - } - } else { - // Upper intersect C - f_t h_j = h(-aj); - if (h_j != 0.0) { - x_workspace_[jj] -= h_j; - if (!x_mark_[jj]) { - x_mark_[jj] = 1; - cut_indices.push_back(jj); - } - } - } - } else if (var_types[jj] == variable_type_t::INTEGER) { - // I \ Upper - x_workspace_[jj] += f(aj, r); - if (!x_mark_[jj] && x_workspace_[jj] != 0.0) { - x_mark_[jj] = 1; - cut_indices.push_back(jj); - } + } else if (bound_changed_[j] == 1) { + if (ub_variable_[j] == -1) { + // w_j = u_j - x_j, w_j >= 0 + // x_j = u_j - w_j + // sum_{k != j} a_k x_k + a_j x_j >= beta + // sum_{k != j} a_k x_k + a_j (u_j - w_j) >= beta + // sum_{k != j} a_k x_k - a_j w_j >= beta - a_j u_j + const f_t uj = ub_star_[j]; + inequality.rhs -= aj * uj; + scratch_pad_.add_to_pad(j, -aj); } else { - // C \ Upper - f_t h_j = h(aj); - if (h_j != 0.0) { - x_workspace_[jj] += h_j; - if (!x_mark_[jj]) { - x_mark_[jj] = 1; - cut_indices.push_back(jj); - } - } + // w_j = ub*_j - x_j, w_j >= 0 + // x_j = ub*_j - w_j + // ub*_j = gamma * x_i + alpha + // x_j = gamma * x_i + alpha - w_j + // sum_{k != j} a_k x_k + a_j x_j >= beta + // sum_{k != j} a_k x_k + a_j (ub*_j - w_j) >= beta + // sum_{k != j} a_k x_k + a_j (gamma * x_i + alpha) - a_j w_j >= beta + // sum_{k != j} a_k x_k + a_j gamma * x_i - a_j w_j >= beta - a_j alpha + const i_t p = ub_variable_[j]; + const f_t alpha = variable_bounds.upper_biases[p]; + const f_t gamma = variable_bounds.upper_weights[p]; + const i_t i = variable_bounds.upper_variables[p]; + inequality.rhs -= aj * alpha; + scratch_pad_.add_to_pad(j, -aj); + scratch_pad_.add_to_pad(i, aj * gamma); } + } else if (bound_changed_[j] == 0) { + scratch_pad_.add_to_pad(j, aj); } } + scratch_pad_.get_pad(inequality.vector.i, inequality.vector.x); + // At this point we have converted all the continuous variables to be nonnegative + // Note that since continuous variables had VUB or VLB, they modified + // the integer variables. - cut.i.reserve(cut_indices.size()); - cut.x.reserve(cut_indices.size()); - cut.i.clear(); - cut.x.clear(); - for (i_t k = 0; k < cut_indices.size(); k++) { - const i_t jj = cut_indices[k]; + // We clear the scratch pad. As it is no longer needed. + scratch_pad_.clear_pad(); - // Check for small coefficients - const f_t aj = x_workspace_[jj]; - if (std::abs(aj) < 1e-6) { - if (aj >= 0.0 && upper_bounds[jj] < inf) { - // Move this to the right-hand side - R -= aj * upper_bounds[jj]; - continue; - } else if (aj <= 0.0 && lower_bounds[jj] > -inf) { - R += aj * lower_bounds[jj]; - continue; - } else { - } + // We now convert all the integer variables to be nonnegative + const i_t nz_after = inequality.size(); + for (i_t k = 0; k < nz_after; k++) { + const i_t j = inequality.index(k); + if (var_type[j] != variable_type_t::INTEGER) { continue; } + const f_t aj = inequality.coeff(k); + if (bound_changed_[j] == -1) { + // v_j = x_j - l_j, v_j >= 0 + // x_j = v_j + l_j + // sum_{k != j} a_k x_k + a_j x_j >= beta + // sum_{k != j} a_k x_k + a_j (v_j + l_j) >= beta + // sum_{k != j} a_k x_k + a_j v_j >= beta - a_j l_j + const f_t lj = lb_star_[j]; + inequality.rhs -= aj * lj; + } else if (bound_changed_[j] == 1) { + // w_j = u_j - x_j, w_j >= 0 + // x_j = u_j - w_j + // sum_{k != j} a_k x_j + a_j x_j >= beta + // sum_{k != j} a_k x_j + a_j (u_j - w_j) >= beta + // sum_{k != j} a_k x_j - a_j w_j >= beta - a_j u_j + const f_t uj = ub_star_[j]; + inequality.rhs -= aj * uj; + inequality.vector.x[k] *= -1.0; } - cut.i.push_back(jj); - cut.x.push_back(x_workspace_[jj]); } +} - // Clear the workspace - for (i_t jj : cut_indices) { - x_workspace_[jj] = 0.0; - x_mark_[jj] = 0; +template +void complemented_mixed_integer_rounding_cut_t::untransform_inequality( + const variable_bounds_t& variable_bounds, + const std::vector& var_type, + inequality_t& inequality) +{ + // First convert all the integers variables back to their original form: l_j <= x_j <= u_j + const i_t nz = inequality.size(); + for (i_t k = 0; k < nz; k++) { + const i_t j = inequality.index(k); + if (var_type[j] != variable_type_t::INTEGER) { continue; } + const f_t dj = inequality.coeff(k); + if (bound_changed_[j] == -1) { + // v_j = x_j - l_j, v_j >= 0 + // sum_{k != j} d_k x_k + d_j v_j >= beta + // sum_{k != j} d_k x_k + d_j (x_j - l_j) >= beta + // sum_{k != j} d_k x_k + d_j x_j >= beta + d_j l_j + const f_t lj = lb_star_[j]; + inequality.rhs += dj * lj; + } else if (bound_changed_[j] == 1) { + // w_j = u_j - x_j, w_j >= 0 + // sum_{k != j} d_k x_k + d_j w_j >= beta + // sum_{k != j} d_k x_k + d_j (u_j - x_j) >= beta + // sum_{k != j} d_k x_k - d_j x_j >= beta - d_j u_j + const f_t uj = ub_star_[j]; + inequality.rhs -= dj * uj; + inequality.vector.x[k] *= -1.0; + } } - -#ifdef CHECK_WORKSPACE - for (i_t j = 0; j < x_workspace_.size(); j++) { - if (x_workspace_[j] != 0.0) { - printf("After generate_cut: Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - assert(x_workspace_[j] == 0.0); + // Then undo the VUB/VLB substitions and bring continuous variables back to their original form + for (i_t k = 0; k < nz; k++) { + const i_t j = inequality.index(k); + const f_t dj = inequality.coeff(k); + if (var_type[j] != variable_type_t::CONTINUOUS) { + scratch_pad_.add_to_pad(j, dj); + continue; } - if (x_mark_[j] != 0) { - printf("After generate_cut: Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - assert(x_mark_[j] == 0); + if (bound_changed_[j] == -1) { + if (lb_variable_[j] == -1) { + // v_j = x_j - l_j, v_j >= 0 + // sum_{k != j} d_k x_k + d_j v_j >= beta + // sum_{k != j} d_k x_k + d_j (x_j - l_j) >= beta + // sum_{k != j} d_k x_k + d_j x_j >= beta + d_j l_j + const f_t lj = lb_star_[j]; + inequality.rhs += dj * lj; + scratch_pad_.add_to_pad(j, dj); + } else { + // v_j = x_j - lb*_j, v_j >= 0 + // lb*_j = gamma * x_i + alpha + // v_j = x_j - gamma * x_i - alpha + // sum_{k != j} d_k x_k + d_j v_j >= beta + // sum_{k != j} d_k x_k + d_j (x_j - gamma * x_i - alpha) >= beta + // sum_{k != j} d_k x_k + d_j x_j - d_j * gamma * x_i >= beta + d_j alpha + const i_t p = lb_variable_[j]; + const f_t alpha = variable_bounds.lower_biases[p]; + const f_t gamma = variable_bounds.lower_weights[p]; + const i_t i = variable_bounds.lower_variables[p]; + inequality.rhs += dj * alpha; + scratch_pad_.add_to_pad(j, dj); + scratch_pad_.add_to_pad(i, -dj * gamma); + } + } else if (bound_changed_[j] == 1) { + if (ub_variable_[j] == -1) { + // w_j = u_j - x_j, w_j >= 0 + // sum_{k != j} d_k x_k + d_j w_j >= beta + // sum_{k != j} d_k x_k + d_j (u_j - x_j) >= beta + // sum_{k != j} d_k x_k - d_j x_j >= beta - d_j u_j + const f_t uj = ub_star_[j]; + inequality.rhs -= dj * uj; + scratch_pad_.add_to_pad(j, -dj); + } else { + // w_j = ub*_j - x_j, w_j >= 0 + // ub*_j = gamma * x_i + alpha + // w_j = gamma * x_i + alpha - x_j + // sum_{k != j} d_k x_k + d_j w_j >= beta + // sum_{k != j} d_k x_k + d_j (gamma * x_i + alpha - x_j) >= beta + // sum_{k != j} d_k x_k + d_j gamma * x_i - d_j x_j >= beta - d_j alpha + const i_t p = ub_variable_[j]; + const f_t alpha = variable_bounds.upper_biases[p]; + const f_t gamma = variable_bounds.upper_weights[p]; + const i_t i = variable_bounds.upper_variables[p]; + inequality.rhs -= dj * alpha; + scratch_pad_.add_to_pad(j, -dj); + scratch_pad_.add_to_pad(i, dj * gamma); + } + } else { + scratch_pad_.add_to_pad(j, dj); } } -#endif - // The new cut is: g'*x >= R - // But we want to have it in the form h'*x <= b - cut.sort(); + scratch_pad_.get_pad(inequality.vector.i, inequality.vector.x); + scratch_pad_.clear_pad(); +} + +template +bool complemented_mixed_integer_rounding_cut_t:: + generate_cut_nonnegative_maintain_indicies(const inequality_t& inequality, + const std::vector& var_types, + inequality_t& cut) +{ + auto f = [](f_t q_1, f_t q_2) -> f_t { + f_t q_1_hat = q_1 - std::floor(q_1); + f_t q_2_hat = q_2 - std::floor(q_2); + return std::min(q_1_hat, q_2_hat) + q_2_hat * std::floor(q_1); + }; - cut_rhs = R; + auto h = [](f_t q) -> f_t { return std::max(q, 0.0); }; + + cut.vector = inequality.vector; + const f_t beta = inequality.rhs; + const f_t f_beta = fractional_part(beta); + cut.rhs = f_beta * std::ceil(beta); + if (f_beta < 0.05 || f_beta > 0.95) { return false; } -#ifdef CHECK_REPEATED_INDICES - // Check for repeated indicies - std::vector check(num_vars_, 0); - for (i_t p = 0; p < cut.i.size(); p++) { - if (check[cut.i[p]] != 0) { - printf("repeated index in generated cut\n"); - assert(check[cut.i[p]] == 0); + for (i_t k = 0; k < inequality.size(); k++) { + const i_t j = inequality.index(k); + f_t aj = inequality.coeff(k); + if (var_types[j] == variable_type_t::INTEGER) { + cut.vector.x[k] = f(aj, beta); + } else { + cut.vector.x[k] = h(aj); + } + if (cut.vector.x[k] != cut.vector.x[k]) { + printf("cut.x[%d] %e != cut.x[%d] %e. aj %e beta %e var type %d\n", + k, + cut.vector.x[k], + k, + cut.vector.x[k], + aj, + beta, + static_cast(var_types[j])); + exit(1); } - check[cut.i[p]] = 1; } -#endif - if (cut.i.size() == 0) { return -1; } + return true; +} - return 0; +template +f_t complemented_mixed_integer_rounding_cut_t::compute_violation( + const inequality_t& cut, const std::vector& xstar) +{ + f_t dot = cut.vector.dot(xstar); + f_t cut_violation = cut.rhs - dot; + return cut_violation; } template -void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_t& lp, - csr_matrix_t& Arow, - sparse_vector_t& cut, - f_t& cut_rhs) +void complemented_mixed_integer_rounding_cut_t::substitute_slacks( + const lp_problem_t& lp, csr_matrix_t& Arow, inequality_t& cut) { // Remove slacks from the cut // So that the cut is only over the original variables bool found_slack = false; i_t cut_nz = 0; std::vector cut_indices; - cut_indices.reserve(cut.i.size()); - -#ifdef CHECK_WORKSPACE - for (i_t j = 0; j < x_workspace_.size(); j++) { - if (x_workspace_[j] != 0.0) { - printf("Begin Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - assert(x_workspace_[j] == 0.0); - } - if (x_mark_[j] != 0) { - printf("Begin Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - assert(x_mark_[j] == 0); - } - } -#endif + cut_indices.reserve(cut.size()); - for (i_t k = 0; k < cut.i.size(); k++) { - const i_t j = cut.i[k]; - const f_t cj = cut.x[k]; + for (i_t k = 0; k < cut.size(); k++) { + const i_t j = cut.index(k); + const f_t cj = cut.coeff(k); if (is_slack_[j]) { found_slack = true; const i_t slack_start = lp.A.col_start[j]; @@ -1766,198 +3108,97 @@ void mixed_integer_rounding_cut_t::substitute_slacks(const lp_problem_ // sum_{k != j} C(k) * x_k + sum_{h != j} -C(j)/alpha * A(i, h) * x_h >= cut_rhs - C(j)/alpha // * rhs_i const i_t i = slack_rows_[j]; - cut_rhs -= cj * lp.rhs[i] / alpha; + cut.rhs -= cj * lp.rhs[i] / alpha; const i_t row_start = Arow.row_start[i]; const i_t row_end = Arow.row_start[i + 1]; for (i_t q = row_start; q < row_end; q++) { const i_t h = Arow.j[q]; if (h != j) { const f_t aih = Arow.x[q]; - x_workspace_[h] -= cj * aih / alpha; - if (!x_mark_[h]) { - x_mark_[h] = 1; - cut_indices.push_back(h); - cut_nz++; - } + scratch_pad_.add_to_pad(h, -cj * aih / alpha); } else { const f_t aij = Arow.x[q]; if (std::abs(aij) != 1.0) { - settings_.log.printf( - "Slack row %d has non-unit coefficient %e for variable %d\n", i, aij, j); + printf("Slack row %d has non-unit coefficient %e for variable %d\n", i, aij, j); assert(std::abs(aij) == 1.0); } } } } else { - x_workspace_[j] += cj; - if (!x_mark_[j]) { - x_mark_[j] = 1; - cut_indices.push_back(j); - cut_nz++; - } + scratch_pad_.add_to_pad(j, cj); } } if (found_slack) { - cut.i.reserve(cut_nz); - cut.x.reserve(cut_nz); - cut.i.clear(); - cut.x.clear(); - - for (i_t k = 0; k < cut_nz; k++) { - const i_t j = cut_indices[k]; - - // Check for small coefficients - const f_t aj = x_workspace_[j]; - if (std::abs(aj) < 1e-6) { - if (aj >= 0.0 && lp.upper[j] < inf) { - // Move this to the right-hand side - cut_rhs -= aj * lp.upper[j]; - continue; - } else if (aj <= 0.0 && lp.lower[j] > -inf) { - cut_rhs += aj * lp.lower[j]; - continue; - } else { - } - } - - cut.i.push_back(j); - cut.x.push_back(x_workspace_[j]); - } + scratch_pad_.get_pad(cut.vector.i, cut.vector.x); // Sort the cut cut.sort(); } // Clear the workspace - for (i_t jj : cut_indices) { - x_workspace_[jj] = 0.0; - x_mark_[jj] = 0; - } - -#ifdef CHECK_WORKSPACE - for (i_t j = 0; j < x_workspace_.size(); j++) { - if (x_workspace_[j] != 0.0) { - printf("End Dirty x_workspace_[%d] = %e\n", j, x_workspace_[j]); - assert(x_workspace_[j] == 0.0); - } - if (x_mark_[j] != 0) { - printf("End Dirty x_mark_[%d] = %d\n", j, x_mark_[j]); - assert(x_mark_[j] == 0); - } - } -#endif -} - -template -f_t mixed_integer_rounding_cut_t::compute_violation(const sparse_vector_t& cut, - f_t cut_rhs, - const std::vector& xstar) -{ - f_t dot = cut.dot(xstar); - f_t cut_violation = cut_rhs - dot; - return cut_violation; + scratch_pad_.clear_pad(); } template -void mixed_integer_rounding_cut_t::combine_rows( +f_t complemented_mixed_integer_rounding_cut_t::combine_rows( const lp_problem_t& lp, csr_matrix_t& Arow, i_t xj, - const sparse_vector_t& pivot_row, - f_t pivot_row_rhs, - sparse_vector_t& inequality, - f_t& inequality_rhs) + const inequality_t& pivot_row, + inequality_t& inequality) { -#ifdef CHECK_WORKSPACE - for (i_t k = 0; k < x_workspace_.size(); k++) { - if (x_workspace_[k] != 0.0) { - printf("Dirty x_workspace_[%d] = %e\n", k, x_workspace_[k]); - assert(x_workspace_[k] == 0.0); - } - if (x_mark_[k] != 0) { - printf("Dirty x_mark_[%d] = %d\n", k, x_mark_[k]); - assert(x_mark_[k] == 0); - } - } -#endif - - indices_.clear(); - indices_.reserve(pivot_row.i.size() + inequality.i.size()); - // Find the coefficient associated with variable xj in the pivot row f_t a_l_j = 0.0; - for (i_t k = 0; k < pivot_row.i.size(); k++) { - const i_t j = pivot_row.i[k]; + for (i_t k = 0; k < pivot_row.size(); k++) { + const i_t j = pivot_row.index(k); if (j == xj) { - a_l_j = pivot_row.x[k]; + a_l_j = pivot_row.coeff(k); break; } } - if (a_l_j == 0) { return; } + if (a_l_j == 0) { + printf("Pivot row has no coefficient for variable %d\n", xj); + return 0.0; + } f_t a_i_j = 0.0; - i_t nz = 0; // Store the inequality in the workspace // and save the coefficient associated with variable xj - for (i_t k = 0; k < inequality.i.size(); k++) { - const i_t j = inequality.i[k]; + for (i_t k = 0; k < inequality.size(); k++) { + const i_t j = inequality.index(k); if (j != xj) { - x_workspace_[j] = inequality.x[k]; - x_mark_[j] = 1; - indices_.push_back(j); - nz++; + scratch_pad_.add_to_pad(j, inequality.coeff(k)); } else { - a_i_j = inequality.x[k]; + a_i_j = inequality.coeff(k); } } + if (a_i_j == 0.0) { + printf("Inequality has zero coefficient for variable %d\n", xj); + scratch_pad_.clear_pad(); + return 0.0; + } f_t pivot_value = a_i_j / a_l_j; // Adjust the rhs of the inequality - inequality_rhs -= pivot_value * pivot_row_rhs; + inequality.rhs -= pivot_value * pivot_row.rhs; // Adjust the coefficients of the inequality // based on the nonzeros in the pivot row - for (i_t k = 0; k < pivot_row.i.size(); k++) { - const i_t j = pivot_row.i[k]; - if (j != xj) { - x_workspace_[j] -= pivot_value * pivot_row.x[k]; - if (!x_mark_[j]) { - x_mark_[j] = 1; - indices_.push_back(j); - nz++; - } - } + for (i_t k = 0; k < pivot_row.size(); k++) { + const i_t j = pivot_row.index(k); + if (j != xj) { scratch_pad_.add_to_pad(j, -pivot_value * pivot_row.coeff(k)); } } // Store the new inequality - inequality.i.resize(nz); - inequality.x.resize(nz); - for (i_t k = 0; k < nz; k++) { - inequality.i[k] = indices_[k]; - inequality.x[k] = x_workspace_[indices_[k]]; - } - -#ifdef CHECK_REPEATED_INDICES - // Check for repeated indices - std::vector check(num_vars_, 0); - for (i_t k = 0; k < inequality.i.size(); k++) { - if (check[inequality.i[k]] == 1) { - printf("repeated index\n"); - assert(check[inequality.i[k]] == 0); - } - check[inequality.i[k]] = 1; - } -#endif + scratch_pad_.get_pad(inequality.vector.i, inequality.vector.x); // Clear the workspace - for (i_t j : indices_) { - x_workspace_[j] = 0.0; - x_mark_[j] = 0; - } - indices_.clear(); + scratch_pad_.clear_pad(); + + return -pivot_value; } template @@ -1993,15 +3234,14 @@ i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( const lp_problem_t& lp, const simplex_solver_settings_t& settings, const std::vector& var_types, - sparse_vector_t& inequality, - f_t& inequality_rhs) + inequality_t& inequality) { const bool verbose = false; // Count the number of continuous variables in the inequality i_t num_continuous = 0; - const i_t nz = inequality.i.size(); + const i_t nz = inequality.size(); for (i_t k = 0; k < nz; k++) { - const i_t j = inequality.i[k]; + const i_t j = inequality.index(k); if (var_types[j] == variable_type_t::CONTINUOUS) { num_continuous++; } } @@ -2009,10 +3249,10 @@ i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( // We assume the inequality is of the form sum_j a_j x_j <= rhs for (i_t k = 0; k < nz; k++) { - const i_t j = inequality.i[k]; + const i_t j = inequality.index(k); const f_t l_j = lp.lower[j]; const f_t u_j = lp.upper[j]; - const f_t a_j = inequality.x[k]; + const f_t a_j = inequality.coeff(k); if (var_types[j] == variable_type_t::CONTINUOUS) { if (a_j == 0.0) { continue; } @@ -2022,13 +3262,13 @@ i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( // sum_{k != j} a_k x_k + a_j x_j <= rhs // sum_{k != j} a_k x_k + a_j (v_j + l_j) <= rhs // sum_{k != j} a_k x_k + a_j v_j <= rhs - a_j l_j - inequality_rhs -= a_j * l_j; + inequality.rhs -= a_j * l_j; transformed_variables_[j] = -1; // We now have a_j * v_j with a_j, v_j >= 0 // So we have sum_{k != j} a_k x_k <= sum_{k != j} a_k x_k + a_j v_j <= rhs - a_j l_j // So we can now drop the continuous variable v_j - inequality.x[k] = 0.0; + inequality.vector.x[k] = 0.0; } else if (a_j < 0.0 && u_j < inf) { // w_j = u_j - x_j >= 0 @@ -2036,13 +3276,13 @@ i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( // sum_{k != j} a_k x_k + a_j x_j <= rhs // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= rhs // sum_{k != j} a_k x_k - a_j w_j <= rhs - a_j u_j - inequality_rhs -= a_j * u_j; + inequality.rhs -= a_j * u_j; transformed_variables_[j] = 1; // We now have a_j * w_j with a_j, w_j >= 0 // So we have sum_{k != j} a_k x_k <= sum_{k != j} a_k x_k + a_j w_j <= rhs - a_j u_j // So we can now drop the continuous variable w_j - inequality.x[k] = 0.0; + inequality.vector.x[k] = 0.0; } else { // We can't keep the coefficient of the continuous variable positive // This means we can't eliminate the continuous variable @@ -2058,7 +3298,7 @@ i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( // sum_{k != j} a_k x_k + a_j x_j <= rhs // sum_{k != j} a_k x_k + a_j (v_j + l_j) <= rhs // sum_{k != j} a_k x_k + a_j v_j <= rhs - a_j l_j - inequality_rhs -= a_j * l_j; + inequality.rhs -= a_j * l_j; } else if (transformed_variables_[j] == 1) { // We are closer to the finite upper bound // w_j = u_j - x_j >= 0 @@ -2066,44 +3306,43 @@ i_t strong_cg_cut_t::remove_continuous_variables_integers_nonnegative( // sum_{k != j} a_k x_k + a_j x_j <= rhs // sum_{k != j} a_k x_k + a_j (u_j - w_j) <= rhs // sum_{k != j} a_k x_k - a_j w_j <= rhs - a_j u_j - inequality_rhs -= a_j * u_j; - inequality.x[k] *= -1.0; + inequality.rhs -= a_j * u_j; + inequality.vector.x[k] *= -1.0; } } } // Squeeze out the zero coefficents - sparse_vector_t new_inequality(inequality.n, 0); - inequality.squeeze(new_inequality); - inequality = new_inequality; + sparse_vector_t new_inequality_vector(inequality.vector.n, 0); + inequality.vector.squeeze(new_inequality_vector); + inequality.vector = new_inequality_vector; return 0; } template void strong_cg_cut_t::to_original_integer_variables(const lp_problem_t& lp, - sparse_vector_t& cut, - f_t& cut_rhs) + inequality_t& cut) { // We expect a cut of the form sum_j a_j y_j <= rhs // where y_j >= 0 is a transformed variable // We need to convert it back into a cut on the original variables - for (i_t k = 0; k < cut.i.size(); k++) { - const i_t j = cut.i[k]; - const f_t a_j = cut.x[k]; + for (i_t k = 0; k < cut.size(); k++) { + const i_t j = cut.index(k); + const f_t a_j = cut.coeff(k); if (transformed_variables_[j] == -1) { // sum_{k != j} a_k x_k + a_j v_j <= rhs // v_j = x_j - l_j >= 0, // sum_{k != j} a_k x_k + a_j (x_j - l_j) <= rhs // sum_{k != j} a_k x_k + a_j x_j <= rhs + a_j l_j - cut_rhs += a_j * lp.lower[j]; + cut.rhs += a_j * lp.lower[j]; } else if (transformed_variables_[j] == 1) { // sum_{k != j} a_k x_k + a_j w_j <= rhs // w_j = u_j - x_j >= 0 // sum_{k != j} a_k x_k + a_j (u_j - x_j) <= rhs // sum_{k != j} a_k x_k - a_j x_j <= rhs - a_j u_j - cut_rhs -= a_j * lp.upper[j]; - cut.x[k] *= -1.0; + cut.rhs -= a_j * lp.upper[j]; + cut.vector.x[k] *= -1.0; } } } @@ -2112,44 +3351,37 @@ template i_t strong_cg_cut_t::generate_strong_cg_cut_integer_only( const simplex_solver_settings_t& settings, const std::vector& var_types, - const sparse_vector_t& inequality, - f_t inequality_rhs, - sparse_vector_t& cut, - f_t& cut_rhs) + const inequality_t& inequality, + inequality_t& cut) { // We expect an inequality of the form sum_j a_j x_j <= rhs // where all the variables x_j are integer and nonnegative // We then apply the CG cut: // sum_j floor(a_j) x_j <= floor(rhs) - cut.i.reserve(inequality.i.size()); - cut.x.reserve(inequality.i.size()); - cut.i.clear(); - cut.x.clear(); + cut.reserve(inequality.size()); + cut.clear(); - f_t a_0 = inequality_rhs; + f_t a_0 = inequality.rhs; f_t f_a_0 = fractional_part(a_0); if (f_a_0 == 0.0) { // f(a_0) == 0.0 so we do a weak CG cut - cut.i.reserve(inequality.i.size()); - cut.x.reserve(inequality.i.size()); - cut.i.clear(); - cut.x.clear(); - for (i_t k = 0; k < inequality.i.size(); k++) { - const i_t j = inequality.i[k]; - const f_t a_j = inequality.x[k]; + cut.reserve(inequality.size()); + cut.clear(); + for (i_t k = 0; k < inequality.size(); k++) { + const i_t j = inequality.index(k); + const f_t a_j = inequality.coeff(k); if (var_types[j] == variable_type_t::INTEGER) { - cut.i.push_back(j); - cut.x.push_back(std::floor(a_j)); + cut.push_back(j, std::floor(a_j)); } else { return -1; } } - cut_rhs = std::floor(inequality_rhs); + cut.rhs = std::floor(inequality.rhs); } else { return generate_strong_cg_cut_helper( - inequality.i, inequality.x, inequality_rhs, var_types, cut, cut_rhs); + inequality.vector.i, inequality.vector.x, inequality.rhs, var_types, cut); } return 0; } @@ -2160,8 +3392,7 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_helper( const std::vector& coefficients, f_t rhs, const std::vector& var_types, - sparse_vector_t& cut, - f_t& cut_rhs) + inequality_t& cut) { const bool verbose = false; const i_t nz = indicies.size(); @@ -2182,10 +3413,8 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_helper( f_t upper = 1.0 / static_cast(k); if (verbose) { printf("f_a_0 %e lower %e upper %e alpha %e\n", f_a_0, lower, upper, alpha); } if (f_a_0 >= lower && f_a_0 < upper) { - cut.i.reserve(nz); - cut.x.reserve(nz); - cut.i.clear(); - cut.x.clear(); + cut.reserve(nz); + cut.clear(); for (i_t q = 0; q < nz; q++) { const i_t j = indicies[q]; const f_t a_j = coefficients[q]; @@ -2193,8 +3422,7 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_helper( const f_t f_a_j = fractional_part(a_j); const f_t tol = 1e-4; if (f_a_j <= f_a_0 + tol) { - cut.i.push_back(j); - cut.x.push_back((k + 1.0) * std::floor(a_j)); + cut.push_back(j, (k + 1.0) * std::floor(a_j)); if (verbose) { printf("j %d a_j %e f_a_j %e k %d\n", j, a_j, f_a_j, k); } } else { // Find p such that p <= k * f(a_j) < p + 1 @@ -2203,11 +3431,9 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_helper( const f_t rhs_j = f_a_0 + static_cast(p) / static_cast(k) * alpha; const i_t coeff = (k + 1) * static_cast(std::floor(a_j)) + p; if (f_a_j > rhs_j + tol) { - cut.i.push_back(j); - cut.x.push_back(static_cast(coeff + 1)); + cut.push_back(j, static_cast(coeff + 1)); } else { - cut.i.push_back(j); - cut.x.push_back(static_cast(coeff)); + cut.push_back(j, static_cast(coeff)); } } } else { @@ -2218,11 +3444,11 @@ i_t strong_cg_cut_t::generate_strong_cg_cut_helper( if (verbose) { printf("Error: k %d lower %e f(a_0) %e upper %e\n", k, lower, f_a_0, upper); } return -1; } - cut_rhs = (k + 1.0) * std::floor(rhs); + cut.rhs = (k + 1.0) * std::floor(rhs); if (verbose) { - printf("Generated strong CG cut: k %d f_a_0 %e cut_rhs %e\n", k, f_a_0, cut_rhs); - for (i_t q = 0; q < cut.i.size(); q++) { - if (cut.x[q] != 0.0) { printf("%.16e x%d ", cut.x[q], cut.i[q]); } + printf("Generated strong CG cut: k %d f_a_0 %e cut_rhs %e\n", k, f_a_0, cut.rhs); + for (i_t q = 0; q < cut.size(); q++) { + if (cut.vector.x[q] != 0.0) { printf("%.16e x%d ", cut.vector.x[q], cut.vector.i[q]); } } printf("\n"); printf("Original inequality rhs %e nz %ld\n", rhs, coefficients.size()); @@ -2239,11 +3465,9 @@ i_t strong_cg_cut_t::generate_strong_cg_cut( const lp_problem_t& lp, const simplex_solver_settings_t& settings, const std::vector& var_types, - const sparse_vector_t& inequality, - const f_t inequality_rhs, + const inequality_t& inequality, const std::vector& xstar, - sparse_vector_t& cut, - f_t& cut_rhs) + inequality_t& cut) { #ifdef PRINT_INEQUALITY_INFO for (i_t k = 0; k < inequality.i.size(); k++) { @@ -2258,36 +3482,33 @@ i_t strong_cg_cut_t::generate_strong_cg_cut( // and transform integer variables to be nonnegative // Copy the inequality since remove continuous variables will modify it - sparse_vector_t cg_inequality = inequality; - f_t cg_inequality_rhs = inequality_rhs; - i_t status = remove_continuous_variables_integers_nonnegative( - lp, settings, var_types, cg_inequality, cg_inequality_rhs); + inequality_t cg_inequality = inequality; + i_t status = + remove_continuous_variables_integers_nonnegative(lp, settings, var_types, cg_inequality); if (status != 0) { // Try negating the equality and see if that helps cg_inequality = inequality; cg_inequality.negate(); - cg_inequality_rhs = -inequality_rhs; - status = remove_continuous_variables_integers_nonnegative( - lp, settings, var_types, cg_inequality, cg_inequality_rhs); + status = + remove_continuous_variables_integers_nonnegative(lp, settings, var_types, cg_inequality); } if (status == 0) { // We have an inequality with no continuous variables // Generate a CG cut - status = generate_strong_cg_cut_integer_only( - settings, var_types, cg_inequality, cg_inequality_rhs, cut, cut_rhs); + status = generate_strong_cg_cut_integer_only(settings, var_types, cg_inequality, cut); if (status != 0) { return -1; } // Convert the CG cut back to the original variables - to_original_integer_variables(lp, cut, cut_rhs); + to_original_integer_variables(lp, cut); // Check for violation - f_t dot = cut.dot(xstar); + f_t dot = cut.vector.dot(xstar); // If the cut is violated we will have: sum_j a_j xstar_j > rhs - f_t violation = dot - cut_rhs; + f_t violation = dot - cut.rhs; const f_t min_violation_threshold = 1e-6; if (violation > min_violation_threshold) { // Note that no slacks are currently present. Since slacks are currently treated as @@ -2296,7 +3517,6 @@ i_t strong_cg_cut_t::generate_strong_cg_cut( // The CG cut is in the form: sum_j a_j x_j <= rhs // The cut pool wants the cut in the form: sum_j a_j x_j >= rhs cut.negate(); - cut_rhs *= -1.0; return 0; } } @@ -2784,7 +4004,8 @@ template class cut_pool_t; template class cut_generation_t; template class knapsack_generation_t; template class tableau_equality_t; -template class mixed_integer_rounding_cut_t; +template class complemented_mixed_integer_rounding_cut_t; +template class variable_bounds_t; template int add_cuts(const simplex_solver_settings_t& settings, const csr_matrix_t& cuts, diff --git a/cpp/src/cuts/cuts.hpp b/cpp/src/cuts/cuts.hpp index 4f55e96e4d..91806d81aa 100644 --- a/cpp/src/cuts/cuts.hpp +++ b/cpp/src/cuts/cuts.hpp @@ -15,6 +15,9 @@ #include #include +#include +#include +#include #include #include #include @@ -22,6 +25,11 @@ #include #include +namespace cuopt::linear_programming::detail { +template +struct clique_table_t; +} + namespace cuopt::linear_programming::dual_simplex { enum cut_type_t : int8_t { @@ -29,7 +37,82 @@ enum cut_type_t : int8_t { MIXED_INTEGER_ROUNDING = 1, KNAPSACK = 2, CHVATAL_GOMORY = 3, - MAX_CUT_TYPE = 4 + CLIQUE = 4, + MAX_CUT_TYPE = 5 +}; + +template +struct cut_gap_closure_t { + f_t initial_gap{0.0}; + f_t final_gap{0.0}; + f_t gap_closed{0.0}; + f_t gap_closed_ratio{0.0}; +}; + +template +cut_gap_closure_t compute_cut_gap_closure(f_t objective_reference, + f_t objective_before_cuts, + f_t objective_after_cuts) +{ + const f_t initial_gap = std::abs(objective_reference - objective_before_cuts); + const f_t final_gap = std::abs(objective_reference - objective_after_cuts); + const f_t gap_closed = initial_gap - final_gap; + constexpr f_t eps = static_cast(1e-12); + const f_t gap_closed_ratio = initial_gap > eps ? gap_closed / initial_gap : static_cast(0.0); + return {initial_gap, final_gap, gap_closed, gap_closed_ratio}; +} + +template +struct inequality_t { + inequality_t() : vector(), rhs(0.0) {} + inequality_t(i_t num_cols) : vector(num_cols, 0), rhs(0.0) {} + inequality_t(csr_matrix_t& A, i_t row, f_t rhs_value) : vector(A, row), rhs(rhs_value) + { + } + sparse_vector_t vector; + f_t rhs; + + void push_back(i_t j, f_t x) + { + vector.i.push_back(j); + vector.x.push_back(x); + } + void clear() + { + vector.i.clear(); + vector.x.clear(); + } + void reserve(size_t n) + { + vector.i.reserve(n); + vector.x.reserve(n); + } + size_t size() const { return vector.i.size(); } + i_t index(i_t k) const { return vector.i[k]; } + f_t coeff(i_t k) const { return vector.x[k]; } + void negate() + { + vector.negate(); + rhs *= -1.0; + } + void sort() { vector.sort(); } + void squeeze(inequality_t& out) const + { + vector.squeeze(out.vector); + out.rhs = rhs; + } + void scale(f_t factor) + { + vector.scale(factor); + rhs *= factor; + } + void print() const + { + for (i_t k = 0; k < size(); k++) { + printf("%g x%d ", coeff(k), index(k)); + } + printf("\nrhs %g\n", rhs); + } }; template @@ -48,8 +131,9 @@ struct cut_info_t { num_cuts[static_cast(cut_type)]++; } } - const char* cut_type_names[MAX_CUT_TYPE] = {"Gomory ", "MIR ", "Knapsack ", "Strong CG"}; - std::array num_cuts = {0}; + const char* cut_type_names[MAX_CUT_TYPE] = { + "Gomory ", "MIR ", "Knapsack ", "Strong CG", "Clique "}; + std::array num_cuts = {0}; }; template @@ -72,10 +156,8 @@ void print_cut_types(const std::string& prefix, cut_info.record_cut_types(cut_types); settings.log.printf("%s: ", prefix.c_str()); for (i_t i = 0; i < MAX_CUT_TYPE; i++) { - settings.log.printf("%s cuts: %d ", cut_info.cut_type_names[i], cut_info.num_cuts[i]); - if (i < MAX_CUT_TYPE - 1) { settings.log.printf(", "); } + settings.log.printf("%s cuts: %d\n", cut_info.cut_type_names[i], cut_info.num_cuts[i]); } - settings.log.printf("\n"); } template @@ -84,6 +166,19 @@ f_t fractional_part(f_t a) return a - std::floor(a); } +template +bool add_work_estimate(f_t accesses, + f_t* work_estimate, + f_t max_work_estimate, + bool* work_limit_reached = nullptr) +{ + if (work_estimate == nullptr) { return false; } + *work_estimate += accesses; + const bool over_work_limit = *work_estimate > max_work_estimate; + if (over_work_limit && work_limit_reached != nullptr) { *work_limit_reached = true; } + return over_work_limit; +} + // Computes a permutation of a score vector that puts the highest scores first template void best_score_first_permutation(std::vector& scores, std::vector& permutation) @@ -119,6 +214,15 @@ void verify_cuts_against_saved_solution(const csr_matrix_t& cuts, const std::vector& cut_rhs, const std::vector& saved_solution); +// Test-only helper to run the production maximal-clique algorithm used by clique cuts. +// adjacency_list must contain local vertex indices in [0, n_vertices). +std::vector> find_maximal_cliques_for_test( + const std::vector>& adjacency_list, + const std::vector& weights, + double min_weight, + int max_calls, + double time_limit); + template class cut_pool_t { public: @@ -136,7 +240,7 @@ class cut_pool_t { // Add a cut in the form: cut'*x >= rhs. // We expect that the cut is violated by the current relaxation xstar // cut'*xstart < rhs - void add_cut(cut_type_t cut_type, const sparse_vector_t& cut, f_t rhs); + void add_cut(cut_type_t cut_type, const inequality_t& cut); void score_cuts(std::vector& x_relax); @@ -172,6 +276,7 @@ class cut_pool_t { std::vector cut_orthogonality_; std::vector cut_scores_; std::vector best_cuts_; + const f_t min_cut_distance_{1e-4}; }; template @@ -190,8 +295,7 @@ class knapsack_generation_t { const std::vector& var_types, const std::vector& xstar, i_t knapsack_row, - sparse_vector_t& cut, - f_t& cut_rhs); + inequality_t& cut); i_t num_knapsack_constraints() const { return knapsack_constraints_.size(); } const std::vector& get_knapsack_constraints() const { return knapsack_constraints_; } @@ -214,32 +318,49 @@ class knapsack_generation_t { const simplex_solver_settings_t& settings_; }; -// Forward declaration +// Forward declarations template class mixed_integer_rounding_cut_t; +template +class variable_bounds_t; + template class cut_generation_t { public: - cut_generation_t(cut_pool_t& cut_pool, - const lp_problem_t& lp, - const simplex_solver_settings_t& settings, - csr_matrix_t& Arow, - const std::vector& new_slacks, - const std::vector& var_types) - : cut_pool_(cut_pool), knapsack_generation_(lp, settings, Arow, new_slacks, var_types) + cut_generation_t( + cut_pool_t& cut_pool, + const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + csr_matrix_t& Arow, + const std::vector& new_slacks, + const std::vector& var_types, + const user_problem_t& user_problem, + std::shared_ptr> clique_table = nullptr, + std::future>>* clique_table_future = nullptr, + std::atomic* signal_extend = nullptr) + : cut_pool_(cut_pool), + knapsack_generation_(lp, settings, Arow, new_slacks, var_types), + user_problem_(user_problem), + clique_table_(std::move(clique_table)), + clique_table_future_(clique_table_future), + signal_extend_(signal_extend) { } - void generate_cuts(const lp_problem_t& lp, + bool generate_cuts(const lp_problem_t& lp, const simplex_solver_settings_t& settings, csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, basis_update_mpf_t& basis_update, const std::vector& xstar, + const std::vector& ystar, + const std::vector& zstar, const std::vector& basic_list, - const std::vector& nonbasic_list); + const std::vector& nonbasic_list, + variable_bounds_t& variable_bounds, + f_t start_time); private: // Generate all mixed integer gomory cuts @@ -259,7 +380,9 @@ class cut_generation_t { csr_matrix_t& Arow, const std::vector& new_slacks, const std::vector& var_types, - const std::vector& xstar); + const std::vector& xstar, + const std::vector& ystar, + variable_bounds_t& variable_bounds); // Generate all knapsack cuts void generate_knapsack_cuts(const lp_problem_t& lp, @@ -269,8 +392,91 @@ class cut_generation_t { const std::vector& var_types, const std::vector& xstar); + // Generate clique cuts from conflict graph cliques + bool generate_clique_cuts(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const std::vector& xstar, + const std::vector& reduced_costs, + f_t start_time); + cut_pool_t& cut_pool_; knapsack_generation_t knapsack_generation_; + const user_problem_t& user_problem_; + std::shared_ptr> clique_table_; + std::future>>* clique_table_future_{nullptr}; + std::atomic* signal_extend_{nullptr}; +}; + +template +class scratch_pad_t { + public: + scratch_pad_t(i_t num_vars) : workspace_(num_vars, 0.0), mark_(num_vars, 0) + { + indices_.reserve(num_vars); + } + + // O(1) to add a value to the pad + void add_to_pad(i_t j, f_t value) + { + workspace_[j] += value; + if (!mark_[j]) { + mark_[j] = 1; + indices_.push_back(j); + } + } + + // O(nz) to clear the pad + void clear_pad() + { + for (i_t j : indices_) { + workspace_[j] = 0.0; + mark_[j] = 0; + } + indices_.clear(); + } + + // O(nz) to get the pad + void get_pad(std::vector& indices, std::vector& values) + { + indices.reserve(indices_.size()); + values.reserve(indices_.size()); + indices.clear(); + values.clear(); + const i_t nz = indices_.size(); + for (i_t k = 0; k < nz; k++) { + const i_t j = indices_[k]; + const f_t val = workspace_[j]; + if (val != 0.0) { + indices.push_back(j); + values.push_back(val); + } + } + } + + private: + std::vector workspace_; + std::vector mark_; + std::vector indices_; +}; + +template +class mixed_integer_gomory_cut_t { + public: + mixed_integer_gomory_cut_t() {} + + bool rational_coefficients(const std::vector& var_types, + const inequality_t& input_inequality, + inequality_t& rational_inequality); + + private: + bool rational_approximation(f_t x, + int64_t max_denominator, + int64_t& numerator, + int64_t& denominator); + + int64_t gcd(const std::vector& integers); + int64_t lcm(const std::vector& integers); }; template @@ -301,8 +507,7 @@ class tableau_equality_t { const std::vector& basic_list, const std::vector& nonbasic_list, i_t i, - sparse_vector_t& inequality, - f_t& inequality_rhs); + inequality_t& inequality); private: std::vector b_bar_; @@ -313,93 +518,227 @@ class tableau_equality_t { }; template -class mixed_integer_rounding_cut_t { +class variable_bounds_t { public: - mixed_integer_rounding_cut_t(const lp_problem_t& lp, - const simplex_solver_settings_t& settings, - const std::vector& new_slacks, - const std::vector& xstar); + variable_bounds_t(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& var_types, + const csr_matrix_t& Arow, + const std::vector& new_slacks); + + std::vector upper_offsets; + std::vector upper_variables; + std::vector upper_weights; + std::vector upper_biases; + + std::vector lower_offsets; + std::vector lower_variables; + std::vector lower_weights; + std::vector lower_biases; + + void resize(i_t new_num_cols) + { + const i_t current_upper_nz = upper_offsets.back(); + upper_offsets.resize(new_num_cols + 1, current_upper_nz); + const i_t current_lower_nz = lower_offsets.back(); + lower_offsets.resize(new_num_cols + 1, current_lower_nz); + } + + private: + f_t lower_activity(f_t lower_bound, f_t upper_bound, f_t coefficient) + { + return (coefficient > 0.0 ? lower_bound : upper_bound) * coefficient; + } - // Convert an inequality of the form: sum_j a_j x_j >= beta + f_t upper_activity(f_t lower_bound, f_t upper_bound, f_t coefficient) + { + return (coefficient > 0.0 ? upper_bound : lower_bound) * coefficient; + } + + // Returns the lower activity adjusted for the number of lower inf variables + // adjusted_lower_activity = { activity - lower_activity_i - lower_activity_j, if num_lower_inf = + // 0 + // { activity - lower_activity_i , if num_lower_inf = + // 1, lower_activity_j = -inf { activity - lower_activity_j , if + // num_lower_inf = 1, lower_activity_i != -inf { activity , if + // num_lower_inf = 2, lower_activity_i = lower_activity_j = -inf { -inf + // , if num_lower_inf > 2 + f_t adjusted_lower_activity(f_t activity, + i_t num_lower_inf, + f_t lower_activity_i, + f_t lower_activity_j) + { + if (num_lower_inf == 0) { + return activity - lower_activity_i - lower_activity_j; + } else if (num_lower_inf == 1 && lower_activity_j == -inf) { + return activity - lower_activity_i; + } else if (num_lower_inf == 1 && lower_activity_i == -inf) { + return activity - lower_activity_j; + } else if (num_lower_inf == 2 && lower_activity_i == -inf && lower_activity_j == -inf) { + return activity; + } else { + return -inf; + } + } + + // Returns the upper activity adjusted for the number of upper inf variables + // adjusted_upper_activity = { activity - upper_activity_i - upper_activity_j, if num_upper_inf = + // 0 + // { activity - upper_activity_i , if num_upper_inf = + // 1, upper_activity_j = inf { activity - upper_activity_j , if + // num_upper_inf = 1, upper_activity_i != inf { activity , if + // num_upper_inf = 2, upper_activity_i = upper_activity_j = inf { inf , + // if num_upper_inf > 2 + f_t adjusted_upper_activity(f_t activity, + i_t num_upper_inf, + f_t upper_activity_i, + f_t upper_activity_j) + { + if (num_upper_inf == 0) { + return activity - upper_activity_i - upper_activity_j; + } else if (num_upper_inf == 1 && upper_activity_j == inf) { + return activity - upper_activity_i; + } else if (num_upper_inf == 1 && upper_activity_i == inf) { + return activity - upper_activity_j; + } else if (num_upper_inf == 2 && upper_activity_i == inf && upper_activity_j == inf) { + return activity; + } else { + return inf; + } + } + + std::vector upper_activities_; + std::vector num_pos_inf_; + std::vector lower_activities_; + std::vector num_neg_inf_; + + std::vector slack_map_; +}; + +template +class complemented_mixed_integer_rounding_cut_t { + public: + complemented_mixed_integer_rounding_cut_t(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& new_slacks); + + void compute_initial_scores_for_rows(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const csr_matrix_t& Arow, + const std::vector& xstar, + const std::vector& ystar, + std::vector& score); + + // Perform bound substitution for the continuous variables using simple bounds + // and variable bounds. And bound substitution for the integer variables + // using simple bounds. + void bound_substitution(const lp_problem_t& lp, + const variable_bounds_t& variable_bounds, + const std::vector& var_types, + const std::vector& xstar, + std::vector& transformed_xstar); + + // Converts an inequality of the form: sum_j a_j x_j >= beta // with l_j <= x_j <= u_j into the form: // sum_{j not in L union U} d_j x_j + sum_{j in L} d_j v_j // + sum_{j in U} d_j w_j >= delta, // where v_j = x_j - l_j for j in L - // and w_j = u_j - x_j for j in Us - void to_nonnegative(const lp_problem_t& lp, - sparse_vector_t& inequality, - f_t& rhs); - - void relaxation_to_nonnegative(const lp_problem_t& lp, - const std::vector& xstar, - std::vector& xstar_nonnegative); + // and w_j = u_j - x_j for j in U + void transform_inequality(const variable_bounds_t& variable_bounds, + const std::vector& var_type, + inequality_t& inequality); - // Convert an inequality of the form: + // Converts an inequality of the form: // sum_{j not in L union U} d_j x_j + sum_{j in L} d_j v_j - // + sum_{j in U} d_j w_j >= delta + // + sum_{j in U} d_j w_j >= delta, // where v_j = x_j - l_j for j in L // and w_j = u_j - x_j for j in U - // back to an inequality on the original variables - // sum_j a_j x_j >= beta - void to_original(const lp_problem_t& lp, - sparse_vector_t& inequality, - f_t& rhs); + // back to the form: sum_j a_j x_j >= beta + // with l_j <= x_j <= u_j + void untransform_inequality(const variable_bounds_t& variable_bounds, + const std::vector& var_type, + inequality_t& inequality); + + bool cut_generation_heuristic(const inequality_t& transformed_inequality, + const std::vector& var_types, + const std::vector& transformed_xstar, + inequality_t& transformed_cut, + f_t& work_estimate); + + bool scale_uncomplement_and_generate_cut(const std::vector& var_types, + const std::vector& transformed_xstar, + const std::vector& complemented_indices, + const inequality_t& complemented_inequality, + f_t delta, + inequality_t& cut_delta, + f_t& work_estimate); + + // This routine takes an inequality and generates the MIR cut + bool generate_cut_nonnegative_maintain_indicies(const inequality_t& inequality, + const std::vector& var_types, + inequality_t& cut); + + f_t compute_violation(const inequality_t& cut, const std::vector& xstar); + + f_t new_upper(i_t j) const { return transformed_upper_[j]; } // Given a cut of the form sum_j d_j x_j >= beta // with l_j <= x_j <= u_j, try to remove coefficients d_j // with | d_j | < epsilon void remove_small_coefficients(const std::vector& lower_bounds, const std::vector& upper_bounds, - sparse_vector_t& cut, - f_t& cut_rhs); - - // Given an inequality sum_j a_j x_j >= beta, x_j >= 0, x_j in Z, j in I - // generate an MIR cut of the form sum_j d_j x_j >= delta - i_t generate_cut_nonnegative(const sparse_vector_t& a, - f_t beta, - const std::vector& var_types, - sparse_vector_t& cut, - f_t& cut_rhs); - - f_t compute_violation(const sparse_vector_t& cut, - f_t cut_rhs, - const std::vector& xstar); - - i_t generate_cut(const sparse_vector_t& a, - f_t beta, - const std::vector& upper_bounds, - const std::vector& lower_bounds, - const std::vector& var_types, - sparse_vector_t& cut, - f_t& cut_rhs); + inequality_t& cut); void substitute_slacks(const lp_problem_t& lp, csr_matrix_t& Arow, - sparse_vector_t& cut, - f_t& cut_rhs); + inequality_t& cut); // Combine the pivot row with the inequality to eliminate the variable j // The new inequality is returned in inequality and inequality_rhs - void combine_rows(const lp_problem_t& lp, - csr_matrix_t& Arow, - i_t j, - const sparse_vector_t& pivot_row, - f_t pivot_row_rhs, - sparse_vector_t& inequality, - f_t& inequality_rhs); + // The multiplier for the pivot row is returned + f_t combine_rows(const lp_problem_t& lp, + csr_matrix_t& Arow, + i_t j, + const inequality_t& pivot_row, + inequality_t& inequality); + + const f_t get_lb_star(i_t j) const { return lb_star_[j]; } + const f_t get_ub_star(i_t j) const { return ub_star_[j]; } + + const i_t slack_rows(i_t j) const { return slack_rows_[j]; } + const i_t slack_cols(i_t i) const { return slack_cols_[i]; } + + bool scale_and_generate_mir_cut(const std::vector& var_types, + const std::vector& transformed_xstar, + const inequality_t& inequality, + f_t divisor, + std::vector>& cuts, + std::vector& violations, + std::vector& deltas); + + bool check_violation_and_add_cut(const inequality_t& inequality, + const std::vector& xstar, + f_t divisor, + std::vector>& cuts, + std::vector& violations, + std::vector& deltas); private: - i_t num_vars_; - const simplex_solver_settings_t& settings_; - std::vector x_workspace_; - std::vector x_mark_; - std::vector has_lower_; - std::vector has_upper_; std::vector is_slack_; - std::vector slack_rows_; - std::vector indices_; - std::vector bound_info_; - bool needs_complement_; + std::vector + slack_rows_; // slack_rows_[j] = i, if variable j is slack for row i, -1 is sentinal value + std::vector + slack_cols_; // slack_cols_[i] = j, if variable j is slack for row i -1 is sentinal value + + std::vector lb_variable_; + std::vector lb_star_; + std::vector ub_variable_; + std::vector ub_star_; + + std::vector bound_changed_; + std::vector transformed_upper_; + + scratch_pad_t scratch_pad_; }; template @@ -412,37 +751,29 @@ class strong_cg_cut_t { i_t generate_strong_cg_cut(const lp_problem_t& lp, const simplex_solver_settings_t& settings, const std::vector& var_types, - const sparse_vector_t& inequality, - const f_t inequality_rhs, + const inequality_t& inequality, const std::vector& xstar, - sparse_vector_t& cut, - f_t& cut_rhs); + inequality_t& cut); i_t remove_continuous_variables_integers_nonnegative( const lp_problem_t& lp, const simplex_solver_settings_t& settings, const std::vector& var_types, - sparse_vector_t& inequality, - f_t& inequality_rhs); + inequality_t& inequality); - void to_original_integer_variables(const lp_problem_t& lp, - sparse_vector_t& cut, - f_t& cut_rhs); + void to_original_integer_variables(const lp_problem_t& lp, inequality_t& cut); i_t generate_strong_cg_cut_integer_only(const simplex_solver_settings_t& settings, const std::vector& var_types, - const sparse_vector_t& inequality, - f_t inequality_rhs, - sparse_vector_t& cut, - f_t& cut_rhs); + const inequality_t& inequality, + inequality_t& cut); private: i_t generate_strong_cg_cut_helper(const std::vector& indicies, const std::vector& coefficients, f_t rhs, const std::vector& var_types, - sparse_vector_t& cut, - f_t& cut_rhs); + inequality_t& cut); std::vector transformed_variables_; }; diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 9c56ada50e..28a3845378 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -2202,7 +2202,7 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde // Ensure the workspace is sorted. Otherwise, the sparse dot will be incorrect. std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); - work_estimate_ += (m + nz) * std::log2(m + nz); + if ((m + nz) > 1) { work_estimate_ += (m + nz) * std::log2((f_t)(m + nz)); } // Gather the workspace into a column of S i_t S_start; @@ -2214,7 +2214,7 @@ i_t basis_update_mpf_t::update(const sparse_vector_t& utilde // Gather etilde into a column of S etilde.sort(); // Needs to be sorted for the sparse dot. TODO(CMM): Is etilde sorted on input? - work_estimate_ += etilde.i.size() * std::log2(etilde.i.size()); + if (etilde.i.size() > 1) { work_estimate_ += etilde.i.size() * std::log2((f_t)etilde.i.size()); } S_.append_column(etilde); work_estimate_ += 4 * etilde.i.size(); diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp index e30b067398..d9abc26fe1 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -235,7 +235,7 @@ void bound_flipping_ratio_test_t::heap_passes(const std::vector& // Remove minimum ratio from the heap and rebalance i_t heap_index = bare_idx.front(); std::pop_heap(bare_idx.begin(), bare_idx.end(), compare); - work_estimate_ += 2 * std::log2(bare_idx.size()); + if (bare_idx.size() > 1) { work_estimate_ += 2 * std::log2((f_t)bare_idx.size()); } bare_idx.pop_back(); nonbasic_entering = current_indicies[heap_index]; diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp index 244ff334df..4b62c66771 100644 --- a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp @@ -100,7 +100,7 @@ class bound_flipping_ratio_test_t { i_t n_; i_t m_; - f_t work_estimate_; + f_t work_estimate_{0.0}; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 426d9a7535..50f13f0e04 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -412,19 +412,23 @@ void compute_delta_z(const csc_matrix_t& A_transpose, delta_z[leaving_index] = direction; #ifdef CHECK_CHANGE_IN_REDUCED_COST - delta_y_sparse.to_dense(delta_y); + const i_t m = A_transpose.n; + const i_t n = A_transpose.m; + std::vector delta_y_dense(m); + delta_y.to_dense(delta_y_dense); std::vector delta_z_check(n); std::vector delta_z_mark_check(n, 0); std::vector delta_z_indices_check; phase2::compute_reduced_cost_update(lp, basic_list, nonbasic_list, - delta_y, + delta_y_dense, leaving_index, direction, delta_z_mark_check, delta_z_indices_check, - delta_z_check); + delta_z_check, + work_estimate); f_t error_check = 0.0; for (i_t k = 0; k < n; ++k) { const f_t diff = std::abs(delta_z[k] - delta_z_check[k]); @@ -1726,6 +1730,7 @@ i_t compute_delta_x(const lp_problem_t& lp, const std::vector& basic_list, const std::vector& delta_x_flip, const sparse_vector_t& rhs_sparse, + const std::vector& delta_z, const std::vector& x, sparse_vector_t& utilde_sparse, sparse_vector_t& scaled_delta_xB_sparse, @@ -1782,6 +1787,23 @@ i_t compute_delta_x(const lp_problem_t& lp, scaled_delta_xB_sparse.negate(); work_estimate += 2 * scaled_delta_xB_sparse.i.size() + scaled_delta_xB.size(); scale = -scaled_delta_xB[basic_leaving_index]; + } else if (delta_z[entering_index] != 0.0) { + scale = -delta_z[entering_index]; + // The sparse solve did not produce a coefficient for basic_leaving_index. + // Add it so update_primal_variables / update_primal_infeasibilities process + // the leaving variable (they iterate over scaled_delta_xB_sparse.i). + bool found_leaving = false; + for (i_t k = 0; k < static_cast(scaled_delta_xB_sparse.i.size()); ++k) { + if (scaled_delta_xB_sparse.i[k] == basic_leaving_index) { + scaled_delta_xB_sparse.x[k] = scale; + found_leaving = true; + break; + } + } + if (!found_leaving) { + scaled_delta_xB_sparse.i.push_back(basic_leaving_index); + scaled_delta_xB_sparse.x.push_back(scale); + } } else { return -1; } @@ -2029,8 +2051,8 @@ void check_primal_infeasibilities(const lp_problem_t& lp, const simplex_solver_settings_t& settings, const std::vector& basic_list, const std::vector& x, - const ins_vector& squared_infeasibilities, - const ins_vector& infeasibility_indices) + const std::vector& squared_infeasibilities, + const std::vector& infeasibility_indices) { const i_t m = basic_list.size(); for (i_t k = 0; k < m; ++k) { @@ -2054,14 +2076,30 @@ void check_primal_infeasibilities(const lp_problem_t& lp, } } if (!found) { settings.log.printf("Infeasibility index not found %d\n", j); } + } else { + bool found = false; + i_t h; + for (h = 0; h < infeasibility_indices.size(); ++h) { + if (infeasibility_indices[h] == j) { + found = true; + break; + } + } + if (found) { + settings.log.printf("Incorrect infeasible index %d/%d infeas %e sq %e\n", + j, + h, + infeas, + squared_infeasibilities[j]); + } } } } template void check_basic_infeasibilities(const std::vector& basic_list, - const ins_vector& basic_mark, - const ins_vector& infeasibility_indices, + const std::vector& basic_mark, + const std::vector& infeasibility_indices, i_t info) { for (i_t k = 0; k < infeasibility_indices.size(); ++k) { @@ -2104,8 +2142,8 @@ template void check_basis_mark(const simplex_solver_settings_t& settings, const std::vector& basic_list, const std::vector& nonbasic_list, - const ins_vector& basic_mark, - const ins_vector& nonbasic_mark) + const std::vector& basic_mark, + const std::vector& nonbasic_mark) { const i_t m = basic_list.size(); const i_t n = basic_mark.size(); @@ -2925,7 +2963,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, #ifdef COMPUTE_DUAL_RESIDUAL std::vector dual_residual; std::vector zeros(n, 0.0); - phase2::compute_dual_residual(lp.A, zeros, delta_y, delta_z, dual_residual); + std::vector delta_y_dense(m); + delta_y_sparse.to_dense(delta_y_dense); + phase2::compute_dual_residual(lp.A, zeros, delta_y_dense, delta_z, dual_residual); // || A'*delta_y + delta_z ||_inf f_t dual_residual_norm = vector_norm_inf(dual_residual); settings.log.printf( @@ -3182,6 +3222,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, timers.vector_time += timers.stop_timer(); #ifdef COMPUTE_DUAL_RESIDUAL + std::vector dual_res1; phase2::compute_dual_residual(lp.A, objective, y, z, dual_res1); f_t dual_res_norm = vector_norm_inf(dual_res1); if (dual_res_norm > settings.dual_tol) { @@ -3241,6 +3282,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, basic_list, delta_x_flip, rhs_sparse, + delta_z, x, utilde_sparse, scaled_delta_xB_sparse, @@ -3406,7 +3448,7 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, if (should_refactor) { PHASE2_NVTX_RANGE("DualSimplex::refactorization"); num_refactors++; - bool should_recompute_x = false; + bool should_recompute_x = true; // Need for numerically difficult problems like cbs-cta i_t refactor_status = ft.refactor_basis( lp.A, settings, lp.lower, lp.upper, start_time, basic_list, nonbasic_list, vstatus); if (refactor_status == CONCURRENT_HALT_RETURN) { return dual::status_t::CONCURRENT_LIMIT; } @@ -3502,7 +3544,9 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, phase2_work_estimate += ft.work_estimate(); ft.clear_work_estimate(); - work_unit_context->record_work_sync_on_horizon(phase2_work_estimate / 1e8); + if (work_unit_context) { + work_unit_context->record_work_sync_on_horizon(phase2_work_estimate / 1e8); + } phase2_work_estimate = 0.0; last_feature_log_iter = iter; @@ -3510,16 +3554,20 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, if ((iter - start_iter) < settings.first_iteration_log || (iter % settings.iteration_log_frequency) == 0) { + const f_t user_obj = compute_user_objective(lp, obj); if (phase == 1 && iter == 1) { settings.log.printf(" Iter Objective Num Inf. Sum Inf. Perturb Time\n"); } settings.log.printf("%5d %+.16e %7d %.8e %.2e %.2f\n", iter, - compute_user_objective(lp, obj), + user_obj, infeasibility_indices.size(), primal_infeasibility_squared, sum_perturb, now); + if (phase == 2 && settings.inside_mip == 1 && settings.dual_simplex_objective_callback) { + settings.dual_simplex_objective_callback(user_obj); + } } if (obj >= settings.cut_off) { diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index 17e6176e3b..a068ed04ab 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -13,6 +13,13 @@ #include #include +#include +#include +#include +#include +#include +#include + namespace cuopt::linear_programming::dual_simplex { template @@ -42,6 +49,113 @@ struct lp_problem_t { f_t obj_constant; f_t obj_scale; // 1.0 for min, -1.0 for max bool objective_is_integral{false}; + + void write_problem(const std::string& path) const + { + FILE* fid = fopen(path.c_str(), "w"); + if (fid) { + fwrite(&num_rows, sizeof(i_t), 1, fid); + fwrite(&num_cols, sizeof(i_t), 1, fid); + fwrite(&obj_constant, sizeof(f_t), 1, fid); + fwrite(&obj_scale, sizeof(f_t), 1, fid); + i_t is_integral = objective_is_integral ? 1 : 0; + fwrite(&is_integral, sizeof(i_t), 1, fid); + fwrite(objective.data(), sizeof(f_t), num_cols, fid); + fwrite(rhs.data(), sizeof(f_t), num_rows, fid); + fwrite(lower.data(), sizeof(f_t), num_cols, fid); + fwrite(upper.data(), sizeof(f_t), num_cols, fid); + fwrite(A.col_start.data(), sizeof(i_t), A.col_start.size(), fid); + fwrite(A.i.data(), sizeof(i_t), A.i.size(), fid); + fwrite(A.x.data(), sizeof(f_t), A.x.size(), fid); + fclose(fid); + } + } + + void read_problem(const std::string& path) + { + FILE* fid = fopen(path.c_str(), "r"); + if (fid) { + fread(&num_rows, sizeof(i_t), 1, fid); + fread(&num_cols, sizeof(i_t), 1, fid); + fread(&obj_constant, sizeof(f_t), 1, fid); + fread(&obj_scale, sizeof(f_t), 1, fid); + i_t is_integral; + fread(&is_integral, sizeof(i_t), 1, fid); + objective_is_integral = is_integral == 1; + objective.resize(num_cols); + fread(objective.data(), sizeof(f_t), num_cols, fid); + rhs.resize(num_rows); + fread(rhs.data(), sizeof(f_t), num_rows, fid); + lower.resize(num_cols); + fread(lower.data(), sizeof(f_t), num_cols, fid); + upper.resize(num_cols); + fread(upper.data(), sizeof(f_t), num_cols, fid); + A.n = num_cols; + A.m = num_rows; + A.col_start.resize(num_cols + 1); + fread(A.col_start.data(), sizeof(i_t), num_cols + 1, fid); + A.i.resize(A.col_start[num_cols]); + fread(A.i.data(), sizeof(i_t), A.i.size(), fid); + A.x.resize(A.i.size()); + fread(A.x.data(), sizeof(f_t), A.x.size(), fid); + fclose(fid); + } + } + + void write_mps(const std::string& path) const + { + std::ofstream mps_file(path); + if (!mps_file.is_open()) { + printf("Failed to open file %s\n", path.c_str()); + return; + } + mps_file << std::setprecision(std::numeric_limits::max_digits10); + mps_file << "NAME " << "cuopt_lp_problem_t" << "\n"; + mps_file << "ROWS\n"; + mps_file << " N OBJ\n"; + for (i_t i = 0; i < num_rows; i++) { + mps_file << " E R" << i << "\n"; + } + mps_file << "COLUMNS\n"; + for (i_t j = 0; j < num_cols; j++) { + const i_t col_start = A.col_start[j]; + const i_t col_end = A.col_start[j + 1]; + mps_file << " " << "C" << j << " OBJ " << objective[j] << "\n"; + for (i_t k = col_start; k < col_end; k++) { + const i_t i = A.i[k]; + const f_t x = A.x[k]; + std::string col_name = "C" + std::to_string(j); + std::string row_name = "R" + std::to_string(i); + mps_file << " " << col_name << " " << row_name << " " << x << "\n"; + } + } + mps_file << "RHS\n"; + for (i_t i = 0; i < num_rows; i++) { + mps_file << " RHS1 R" << i << " " << rhs[i] << "\n"; + } + + mps_file << "BOUNDS\n"; + for (i_t j = 0; j < num_cols; j++) { + const f_t lb = lower[j]; + const f_t ub = upper[j]; + std::string col_name = "C" + std::to_string(j); + if (lb == -std::numeric_limits::infinity() && + ub == std::numeric_limits::infinity()) { + mps_file << " FR BOUND1 " << col_name << "\n"; + } else { + if (lb == -std::numeric_limits::infinity()) { + mps_file << " MI BOUND1 " << col_name << "\n"; + } else { + mps_file << " LO BOUND1 " << col_name << " " << lb << "\n"; + } + if (ub != std::numeric_limits::infinity()) { + mps_file << " UP BOUND1 " << col_name << " " << ub << "\n"; + } + } + } + mps_file << "ENDATA\n"; + mps_file.close(); + } }; template diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index 815e229232..57514a7488 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -7,6 +7,7 @@ #pragma once +#include #include #include @@ -100,6 +101,7 @@ struct simplex_solver_settings_t { mir_cuts(-1), mixed_integer_gomory_cuts(-1), knapsack_cuts(-1), + clique_cuts(-1), strong_chvatal_gomory_cuts(-1), reduced_cost_strengthening(-1), cut_change_threshold(1e-3), @@ -108,8 +110,9 @@ struct simplex_solver_settings_t { reliability_branching(-1), inside_mip(0), sub_mip(0), - solution_callback(nullptr), + new_incumbent_callback(nullptr), heuristic_preemption_callback(nullptr), + dual_simplex_objective_callback(nullptr), concurrent_halt(nullptr) { } @@ -178,6 +181,7 @@ struct simplex_solver_settings_t { i_t mixed_integer_gomory_cuts; // -1 automatic, 0 to disable, >0 to enable mixed integer Gomory // cuts i_t knapsack_cuts; // -1 automatic, 0 to disable, >0 to enable knapsack cuts + i_t clique_cuts; // -1 automatic, 0 to disable, >0 to enable clique cuts i_t strong_chvatal_gomory_cuts; // -1 automatic, 0 to disable, >0 to enable strong Chvatal Gomory // cuts i_t reduced_cost_strengthening; // -1 automatic, 0 to disable, >0 to enable reduced cost @@ -186,6 +190,8 @@ struct simplex_solver_settings_t { f_t cut_min_orthogonality; // minimum orthogonality for cuts i_t mip_batch_pdlp_strong_branching{0}; // 0 if not using batch PDLP for strong branching, 1 if // using batch PDLP for strong branching + f_t bnb_work_unit_scale{1.0}; + bool gpu_heur_wait_for_exploration{true}; diving_heuristics_settings_t diving_settings; // Settings for the diving heuristics @@ -198,10 +204,13 @@ struct simplex_solver_settings_t { i_t inside_mip; // 0 if outside MIP, 1 if inside MIP at root node, 2 if inside MIP at leaf node i_t sub_mip; // 0 if in regular MIP solve, 1 if in sub-MIP solve - std::function&, f_t)> solution_callback; + std::function&, f_t, const cuopt::internals::mip_solution_callback_info_t&, double)> + new_incumbent_callback; std::function&, f_t)> node_processed_callback; std::function heuristic_preemption_callback; std::function&, std::vector&, f_t)> set_simplex_solution_callback; + std::function dual_simplex_objective_callback; // Called with current dual obj mutable logger_t log; std::atomic* concurrent_halt; // if nullptr ignored, if !nullptr, 0 if solver should // continue, 1 if solver should halt diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index ec8aee09c4..63004be72b 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -10,6 +10,7 @@ #include #include +#include // #include // #include @@ -657,6 +658,12 @@ i_t csr_matrix_t::check_matrix(std::string matrix_name) const return 0; } +template +std::pair csr_matrix_t::get_constraint_range(i_t cstr_idx) const +{ + return std::make_pair(this->row_start[cstr_idx], this->row_start[cstr_idx + 1]); +} + // x <- x + alpha * A(:, j) template void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x) @@ -932,6 +939,12 @@ f_t sparse_dot(const std::vector& xind, return dot; } +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT +// Minimal float instantiation for LP usage +template class csc_matrix_t; +template class csr_matrix_t; +#endif + #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class csc_matrix_t; diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 56e3ca82c6..09b580cad1 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -48,7 +48,12 @@ class csc_matrix_t { // Adjust to i and x vectors for a new number of nonzeros void reallocate(i_t new_nz); + // Get the number of nonzeros in the matrix i_t nnz() const { return col_start[n]; } + + // Get the number of nonzeros in column j + i_t col_length(i_t j) const { return col_start[j + 1] - col_start[j]; } + // Convert the CSC matrix to a CSR matrix i_t to_compressed_row( cuopt::linear_programming::dual_simplex::csr_matrix_t& Arow) const; @@ -145,6 +150,9 @@ class csr_matrix_t { { } + // Get the number of nonzeros in row i + i_t row_length(i_t i) const { return row_start[i + 1] - row_start[i]; } + // Convert the CSR matrix to CSC i_t to_compressed_col(csc_matrix_t& Acol) const; @@ -173,6 +181,8 @@ class csr_matrix_t { return true; } + // get constraint range + std::pair get_constraint_range(i_t cstr_idx) const; i_t nz_max; // maximum number of nonzero entries i_t m; // number of rows i_t n; // number of cols diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp index 0d34d4c390..1f3798e7dd 100644 --- a/cpp/src/dual_simplex/sparse_vector.cpp +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -240,6 +240,15 @@ void sparse_vector_t::negate() } } +template +void sparse_vector_t::scale(f_t factor) +{ + const i_t nz = x.size(); + for (i_t k = 0; k < nz; ++k) { + x[k] *= factor; + } +} + template f_t sparse_vector_t::find_coefficient(i_t index) const { diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp index 6ea642ee07..d9ca540d18 100644 --- a/cpp/src/dual_simplex/sparse_vector.hpp +++ b/cpp/src/dual_simplex/sparse_vector.hpp @@ -48,7 +48,11 @@ class sparse_vector_t { void sort(); // compute the squared 2-norm of the sparse vector f_t norm2_squared() const; + // negate the coefficients in the sparse vector void negate(); + // scale the coefficients in the sparse vector by a factor + void scale(f_t factor); + // find the coefficient of a given index f_t find_coefficient(i_t index) const; void clear() diff --git a/cpp/src/math_optimization/solution_writer.cu b/cpp/src/math_optimization/solution_writer.cu index 273b8e989c..880127546d 100644 --- a/cpp/src/math_optimization/solution_writer.cu +++ b/cpp/src/math_optimization/solution_writer.cu @@ -9,15 +9,18 @@ #include #include "solution_writer.hpp" +#include + #include namespace cuopt::linear_programming { +template void solution_writer_t::write_solution_to_sol_file(const std::string& filename, const std::string& status, - const double objective_value, + const f_t objective_value, const std::vector& variable_names, - const std::vector& variable_values) + const std::vector& variable_values) { raft::common::nvtx::range fun_scope("write final solution to .sol file"); std::ofstream file(filename.data()); @@ -27,7 +30,7 @@ void solution_writer_t::write_solution_to_sol_file(const std::string& filename, return; } - file.precision(std::numeric_limits::max_digits10 + 1); + file.precision(std::numeric_limits::max_digits10 + 1); file << "# Status: " << status << std::endl; @@ -39,4 +42,22 @@ void solution_writer_t::write_solution_to_sol_file(const std::string& filename, } } +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT +template void solution_writer_t::write_solution_to_sol_file( + const std::string& filename, + const std::string& status, + const float objective_value, + const std::vector& variable_names, + const std::vector& variable_values); +#endif + +#if MIP_INSTANTIATE_DOUBLE +template void solution_writer_t::write_solution_to_sol_file( + const std::string& filename, + const std::string& status, + const double objective_value, + const std::vector& variable_names, + const std::vector& variable_values); +#endif + } // namespace cuopt::linear_programming diff --git a/cpp/src/math_optimization/solution_writer.hpp b/cpp/src/math_optimization/solution_writer.hpp index 0890bf260b..0ac1b64464 100644 --- a/cpp/src/math_optimization/solution_writer.hpp +++ b/cpp/src/math_optimization/solution_writer.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -23,10 +23,11 @@ namespace cuopt::linear_programming { */ class solution_writer_t { public: + template static void write_solution_to_sol_file(const std::string& sol_file_path, const std::string& status, - const double objective_value, + const f_t objective_value, const std::vector& variable_names, - const std::vector& variable_values); + const std::vector& variable_values); }; } // namespace cuopt::linear_programming diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index f1350ca432..586866fe00 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -58,24 +58,24 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings // clang-format off // Float parameters float_parameters = { - {CUOPT_TIME_LIMIT, &mip_settings.time_limit, 0.0, std::numeric_limits::infinity(), std::numeric_limits::infinity()}, - {CUOPT_TIME_LIMIT, &pdlp_settings.time_limit, 0.0, std::numeric_limits::infinity(), std::numeric_limits::infinity()}, - {CUOPT_WORK_LIMIT, &mip_settings.work_limit, 0.0, std::numeric_limits::infinity(), std::numeric_limits::infinity()}, - {CUOPT_ABSOLUTE_DUAL_TOLERANCE, &pdlp_settings.tolerances.absolute_dual_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_RELATIVE_DUAL_TOLERANCE, &pdlp_settings.tolerances.relative_dual_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.absolute_primal_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_RELATIVE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.relative_primal_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_ABSOLUTE_GAP_TOLERANCE, &pdlp_settings.tolerances.absolute_gap_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_RELATIVE_GAP_TOLERANCE, &pdlp_settings.tolerances.relative_gap_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_MIP_ABSOLUTE_TOLERANCE, &mip_settings.tolerances.absolute_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_MIP_RELATIVE_TOLERANCE, &mip_settings.tolerances.relative_tolerance, 0.0, 1e-1, 1e-4}, - {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, 0.0, 1e-1, 1e-5}, - {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, 0.0, CUOPT_INFINITY, 1e-10}, - {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, 0.0, 1e-1, 1e-4}, - {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, 0.0, 1e-1, 1e-10}, - {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, 0.0, 1e-1, 1e-10}, - {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, 0.0, std::numeric_limits::infinity(), 1e-3}, - {CUOPT_MIP_CUT_MIN_ORTHOGONALITY, &mip_settings.cut_min_orthogonality, 0.0, 1.0, 0.5} + {CUOPT_TIME_LIMIT, &mip_settings.time_limit, f_t(0.0), std::numeric_limits::infinity(), std::numeric_limits::infinity()}, + {CUOPT_TIME_LIMIT, &pdlp_settings.time_limit, f_t(0.0), std::numeric_limits::infinity(), std::numeric_limits::infinity()}, + {CUOPT_WORK_LIMIT, &mip_settings.work_limit, f_t(0.0), std::numeric_limits::infinity(), std::numeric_limits::infinity()}, + {CUOPT_ABSOLUTE_DUAL_TOLERANCE, &pdlp_settings.tolerances.absolute_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_RELATIVE_DUAL_TOLERANCE, &pdlp_settings.tolerances.relative_dual_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.absolute_primal_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_RELATIVE_PRIMAL_TOLERANCE, &pdlp_settings.tolerances.relative_primal_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_ABSOLUTE_GAP_TOLERANCE, &pdlp_settings.tolerances.absolute_gap_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_RELATIVE_GAP_TOLERANCE, &pdlp_settings.tolerances.relative_gap_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_MIP_ABSOLUTE_TOLERANCE, &mip_settings.tolerances.absolute_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_MIP_RELATIVE_TOLERANCE, &mip_settings.tolerances.relative_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_MIP_INTEGRALITY_TOLERANCE, &mip_settings.tolerances.integrality_tolerance, f_t(0.0), f_t(1e-1), f_t(1e-5)}, + {CUOPT_MIP_ABSOLUTE_GAP, &mip_settings.tolerances.absolute_mip_gap, f_t(0.0), std::numeric_limits::infinity(), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, + {CUOPT_MIP_RELATIVE_GAP, &mip_settings.tolerances.relative_mip_gap, f_t(0.0), f_t(1e-1), f_t(1e-4)}, + {CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.primal_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, + {CUOPT_DUAL_INFEASIBLE_TOLERANCE, &pdlp_settings.tolerances.dual_infeasible_tolerance, f_t(0.0), f_t(1e-1), std::max(f_t(1e-10), std::numeric_limits::epsilon())}, + {CUOPT_MIP_CUT_CHANGE_THRESHOLD, &mip_settings.cut_change_threshold, f_t(-1.0), std::numeric_limits::infinity(), f_t(-1.0)}, + {CUOPT_MIP_CUT_MIN_ORTHOGONALITY, &mip_settings.cut_min_orthogonality, f_t(0.0), f_t(1.0), f_t(0.5)} }; // Int parameters @@ -94,6 +94,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_MIP_MIXED_INTEGER_ROUNDING_CUTS, &mip_settings.mir_cuts, -1, 1, -1}, {CUOPT_MIP_MIXED_INTEGER_GOMORY_CUTS, &mip_settings.mixed_integer_gomory_cuts, -1, 1, -1}, {CUOPT_MIP_KNAPSACK_CUTS, &mip_settings.knapsack_cuts, -1, 1, -1}, + {CUOPT_MIP_CLIQUE_CUTS, &mip_settings.clique_cuts, -1, 1, -1}, {CUOPT_MIP_STRONG_CHVATAL_GOMORY_CUTS, &mip_settings.strong_chvatal_gomory_cuts, -1, 1, -1}, {CUOPT_MIP_REDUCED_COST_STRENGTHENING, &mip_settings.reduced_cost_strengthening, -1, std::numeric_limits::max(), -1}, {CUOPT_NUM_GPUS, &pdlp_settings.num_gpus, 1, 2, 1}, @@ -101,9 +102,10 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_MIP_BATCH_PDLP_STRONG_BRANCHING, &mip_settings.mip_batch_pdlp_strong_branching, 0, 1, 0}, {CUOPT_PRESOLVE, reinterpret_cast(&pdlp_settings.presolver), CUOPT_PRESOLVE_DEFAULT, CUOPT_PRESOLVE_PSLP, CUOPT_PRESOLVE_DEFAULT}, {CUOPT_PRESOLVE, reinterpret_cast(&mip_settings.presolver), CUOPT_PRESOLVE_DEFAULT, CUOPT_PRESOLVE_PSLP, CUOPT_PRESOLVE_DEFAULT}, - {CUOPT_MIP_DETERMINISM_MODE, &mip_settings.determinism_mode, CUOPT_MODE_OPPORTUNISTIC, CUOPT_MODE_DETERMINISTIC, CUOPT_MODE_OPPORTUNISTIC}, + {CUOPT_MIP_DETERMINISM_MODE, &mip_settings.determinism_mode, CUOPT_DETERMINISM_NONE, CUOPT_DETERMINISM_FULL, CUOPT_DETERMINISM_NONE}, {CUOPT_RANDOM_SEED, &mip_settings.seed, -1, std::numeric_limits::max(), -1}, - {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits::max(), -1} + {CUOPT_MIP_RELIABILITY_BRANCHING, &mip_settings.reliability_branching, -1, std::numeric_limits::max(), -1}, + {CUOPT_PDLP_PRECISION, reinterpret_cast(&pdlp_settings.pdlp_precision), CUOPT_PDLP_DEFAULT_PRECISION, CUOPT_PDLP_MIXED_PRECISION, CUOPT_PDLP_DEFAULT_PRECISION} }; // Bool parameters @@ -120,7 +122,7 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings {CUOPT_CROSSOVER, &pdlp_settings.crossover, false}, {CUOPT_ELIMINATE_DENSE_COLUMNS, &pdlp_settings.eliminate_dense_columns, true}, {CUOPT_CUDSS_DETERMINISTIC, &pdlp_settings.cudss_deterministic, false}, - {CUOPT_DUAL_POSTSOLVE, &pdlp_settings.dual_postsolve, true} + {CUOPT_DUAL_POSTSOLVE, &pdlp_settings.dual_postsolve, true}, }; // String parameters string_parameters = { diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index 538e3c49ac..a200d4265b 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -38,6 +38,7 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/multi_probe.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/probing_cache.cu ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu + ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu) diff --git a/cpp/src/mip_heuristics/diversity/diversity_config.hpp b/cpp/src/mip_heuristics/diversity/diversity_config.hpp index de14260794..b608839539 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_config.hpp +++ b/cpp/src/mip_heuristics/diversity/diversity_config.hpp @@ -30,6 +30,10 @@ struct diversity_config_t { double lp_run_time_if_feasible = 2.; double lp_run_time_if_infeasible = 1.; bool halve_population = false; + bool fj_only_run = false; + bool dry_run = false; + bool initial_solution_only = false; + int n_fp_iterations = 1000000; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 4fa0d3f4ab..2ee17090c0 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -5,16 +5,26 @@ */ /* clang-format on */ -#include "cuda_profiler_api.h" +// uncomment to enable detailed detemrinism logs +#undef CUOPT_DETERMINISM_LOG +#define CUOPT_DETERMINISM_LOG(...) \ + do { \ + CUOPT_LOG_INFO(__VA_ARGS__); \ + } while (0) + #include "diversity_manager.cuh" #include + +#include #include #include #include +#include #include +#include #include constexpr bool fj_only_run = false; @@ -51,7 +61,7 @@ diversity_manager_t::diversity_manager_t(mip_solver_context_thandle_ptr->get_stream()), ls(context, lp_optimal_solution), rins(context, *this), - timer(diversity_config.default_time_limit), + timer(0.0, cuopt::termination_checker_t::root_tag_t{}), bound_prop_recombiner(context, context.problem_ptr->n_variables, ls.constraint_prop, @@ -75,9 +85,32 @@ diversity_manager_t::diversity_manager_t(mip_solver_context_t::n_of_arms, cuopt::seed_generator::get_seed(), ls_alpha, "ls"), ls_hash_map(*context.problem_ptr) { - // Read configuration ID from environment variable - int max_config = -1; - // Read max configuration value from environment variable + fp_recombiner_config_t::max_n_of_vars_from_other = + fp_recombiner_config_t::initial_n_of_vars_from_other; + ls_recombiner_config_t::max_n_of_vars_from_other = + ls_recombiner_config_t::initial_n_of_vars_from_other; + bp_recombiner_config_t::max_n_of_vars_from_other = + bp_recombiner_config_t::initial_n_of_vars_from_other; + sub_mip_recombiner_config_t::max_n_of_vars_from_other = + sub_mip_recombiner_config_t::initial_n_of_vars_from_other; + mab_ls_config_t::last_lm_config = 0; + mab_ls_config_t::last_ls_mab_option = 0; + + CUOPT_DETERMINISM_LOG( + "Deterministic solve start diversity state: seed_state=%lld fp_max=%zu " + "ls_max=%zu bp_max=%zu sub_mip_max=%zu last_lm=%d last_ls=%d " + "enabled_recombiners=%zu", + (long long)cuopt::seed_generator::peek_seed(), + fp_recombiner_config_t::max_n_of_vars_from_other, + ls_recombiner_config_t::max_n_of_vars_from_other, + bp_recombiner_config_t::max_n_of_vars_from_other, + sub_mip_recombiner_config_t::max_n_of_vars_from_other, + (int)mab_ls_config_t::last_lm_config, + (int)mab_ls_config_t::last_ls_mab_option, + recombiner_t::enabled_recombiners.size()); + + int max_config = -1; + int env_config_id = -1; const char* env_max_config = std::getenv("CUOPT_MAX_CONFIG"); if (env_max_config != nullptr) { try { @@ -87,25 +120,32 @@ diversity_manager_t::diversity_manager_t(mip_solver_context_t 1) { - [[maybe_unused]] int config_id = -1; // Default value - const char* env_config_id = std::getenv("CUOPT_CONFIG_ID"); - if (env_config_id != nullptr) { - try { - config_id = std::stoi(env_config_id); - CUOPT_LOG_INFO("Using configuration ID from environment: %d", config_id); - } catch (const std::exception& e) { - CUOPT_LOG_WARN("Failed to parse CUOPT_CONFIG_ID environment variable: %s", e.what()); - } - } + + const char* env_config_id_raw = std::getenv("CUOPT_CONFIG_ID"); + if (env_config_id_raw == nullptr) { return; } + + try { + env_config_id = std::stoi(env_config_id_raw); + } catch (const std::exception& e) { + CUOPT_LOG_WARN("Failed to parse CUOPT_CONFIG_ID environment variable: %s", e.what()); + return; } + + if (max_config > 0 && env_config_id >= max_config) { + CUOPT_LOG_WARN( + "CUOPT_CONFIG_ID=%d is outside [0, %d). Ignoring cut override.", env_config_id, max_config); + return; + } + + context.gpu_heur_loop.deterministic = + (context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS); } // this function is to specialize the local search with config from diversity manager template bool diversity_manager_t::run_local_search(solution_t& solution, const weight_t& weights, - timer_t& timer, + work_limit_timer_t& timer, ls_config_t& ls_config) { raft::common::nvtx::range fun_scope("run_local_search"); @@ -126,7 +166,7 @@ void diversity_manager_t::generate_solution(f_t time_limit, bool rando sol.compute_feasibility(); // if a feasible is found, it is added to the population ls.generate_solution(sol, random_start, &population, time_limit); - population.add_solution(std::move(sol)); + population.add_solution(std::move(sol), internals::mip_solution_origin_t::LOCAL_SEARCH); } template @@ -139,7 +179,12 @@ void diversity_manager_t::add_user_given_solutions( rmm::device_uvector init_sol_assignment(*init_sol, sol.handle_ptr->get_stream()); if (problem_ptr->pre_process_assignment(init_sol_assignment)) { relaxed_lp_settings_t lp_settings; - lp_settings.time_limit = std::min(60., timer.remaining_time() / 2); + lp_settings.time_limit = std::min(60., timer.remaining_time() / 2); + if (timer.deterministic) { + lp_settings.work_limit = lp_settings.time_limit; + lp_settings.work_context = timer.work_context; + cuopt_assert(lp_settings.work_context != nullptr, "Missing deterministic work context"); + } lp_settings.tolerance = problem_ptr->tolerances.absolute_tolerance; lp_settings.save_state = false; lp_settings.return_first_feasible = true; @@ -158,7 +203,6 @@ void diversity_manager_t::add_user_given_solutions( is_feasible, sol.get_user_objective(), sol.get_total_excess()); - population.run_solution_callbacks(sol); initial_sol_vector.emplace_back(std::move(sol)); } else { CUOPT_LOG_ERROR( @@ -172,11 +216,13 @@ void diversity_manager_t::add_user_given_solutions( } template -bool diversity_manager_t::run_presolve(f_t time_limit) +bool diversity_manager_t::run_presolve(f_t time_limit, + cuopt::termination_checker_t& global_timer) { raft::common::nvtx::range fun_scope("run_presolve"); CUOPT_LOG_INFO("Running presolve!"); - timer_t presolve_timer(time_limit); + CUOPT_LOG_INFO("Problem fingerprint before DM presolve: 0x%x", problem_ptr->get_fingerprint()); + work_limit_timer_t presolve_timer(context.gpu_heur_loop, time_limit, *context.termination); auto term_crit = ls.constraint_prop.bounds_update.solve(*problem_ptr); if (ls.constraint_prop.bounds_update.infeas_constraints_count > 0) { stats.presolve_time = timer.elapsed_time(); @@ -185,40 +231,64 @@ bool diversity_manager_t::run_presolve(f_t time_limit) if (termination_criterion_t::NO_UPDATE != term_crit) { ls.constraint_prop.bounds_update.set_updated_bounds(*problem_ptr); } + bool run_probing_cache = !fj_only_run; - // Don't run probing cache in deterministic mode yet as neither B&B nor CPUFJ need it - // and it doesn't make use of work units yet - if (context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC) { run_probing_cache = false; } if (run_probing_cache) { // Run probing cache before trivial presolve to discover variable implications - const f_t time_ratio_of_probing_cache = diversity_config.time_ratio_of_probing_cache; - const f_t max_time_on_probing = diversity_config.max_time_on_probing; - f_t time_for_probing_cache = - std::min(max_time_on_probing, time_limit * time_ratio_of_probing_cache); - timer_t probing_timer{time_for_probing_cache}; + const f_t max_time_on_probing = diversity_config.max_time_on_probing; + f_t time_for_probing_cache = std::min(max_time_on_probing, time_limit); + work_limit_timer_t probing_timer( + context.gpu_heur_loop, time_for_probing_cache, *context.termination); // this function computes probing cache, finds singletons, substitutions and changes the problem bool problem_is_infeasible = compute_probing_cache(ls.constraint_prop.bounds_update, *problem_ptr, probing_timer); if (problem_is_infeasible) { return false; } } - if (!presolve_timer.check_time_limit()) { - const bool remap_cache_ids = true; - trivial_presolve(*problem_ptr, remap_cache_ids); - if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } - // May overconstrain if Papilo presolve has been run before - if (context.settings.presolver == presolver_t::None) { - if (!problem_ptr->empty) { - // do the resizing no-matter what, bounds presolve might not change the bounds but initial - // trivial presolve might have - ls.constraint_prop.bounds_update.resize(*problem_ptr); - ls.constraint_prop.conditional_bounds_update.update_constraint_bounds( - *problem_ptr, ls.constraint_prop.bounds_update); - if (!check_bounds_sanity(*problem_ptr)) { return false; } - } + const bool remap_cache_ids = true; + if (!global_timer.check_time_limit()) { trivial_presolve(*problem_ptr, remap_cache_ids); } + if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } + const bool run_clique_table = + !presolve_timer.check_time_limit() && !context.settings.heuristics_only && + !problem_ptr->empty && !(context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS); + // if (run_clique_table) { + // f_t time_limit_for_clique_table = std::min(3., presolve_timer.remaining_time() / 5); + // timer_t clique_timer(time_limit_for_clique_table); + // dual_simplex::user_problem_t host_problem(problem_ptr->handle_ptr); + // problem_ptr->get_host_user_problem(host_problem); + // std::shared_ptr> clique_table; + // constexpr bool modify_problem_with_cliques = false; + // find_initial_cliques( + // host_problem, context.settings.tolerances, clique_timer, modify_problem_with_cliques); + // if (modify_problem_with_cliques) { + // problem_ptr->set_constraints_from_host_user_problem(host_problem); + // cuopt_assert(host_problem.lower.size() == static_cast(problem_ptr->n_variables), + // "host lower bound size mismatch"); + // cuopt_assert(host_problem.upper.size() == static_cast(problem_ptr->n_variables), + // "host upper bound size mismatch"); + // std::vector all_var_indices(problem_ptr->n_variables); + // std::iota(all_var_indices.begin(), all_var_indices.end(), 0); + // problem_ptr->update_variable_bounds(all_var_indices, host_problem.lower, + // host_problem.upper); trivial_presolve(*problem_ptr, remap_cache_ids); + // } + // } + // May overconstrain if Papilo presolve has been run before + if (context.settings.presolver == presolver_t::None) { + if (!problem_ptr->empty) { + // do the resizing no-matter what, bounds presolve might not change the bounds but initial + // trivial presolve might have + ls.constraint_prop.bounds_update.resize(*problem_ptr); + ls.constraint_prop.bounds_update.upd.init_changed_constraints(problem_ptr->handle_ptr); + ls.constraint_prop.conditional_bounds_update.update_constraint_bounds( + *problem_ptr, ls.constraint_prop.bounds_update); } + if (!check_bounds_sanity(*problem_ptr)) { return false; } } stats.presolve_time = presolve_timer.elapsed_time(); lp_optimal_solution.resize(problem_ptr->n_variables, problem_ptr->handle_ptr->get_stream()); + thrust::fill(problem_ptr->handle_ptr->get_thrust_policy(), + lp_optimal_solution.begin(), + lp_optimal_solution.end(), + f_t(0)); lp_dual_optimal_solution.resize(problem_ptr->n_constraints, problem_ptr->handle_ptr->get_stream()); problem_ptr->handle_ptr->sync_stream(); @@ -226,7 +296,9 @@ bool diversity_manager_t::run_presolve(f_t time_limit) problem_ptr->n_constraints, problem_ptr->n_variables, problem_ptr->presolve_data.objective_offset); - CUOPT_LOG_INFO("cuOpt presolve time: %.2f", stats.presolve_time); + CUOPT_LOG_INFO("cuOpt presolve time: %.2f, fingerprint: 0x%x", + stats.presolve_time, + problem_ptr->get_fingerprint()); return true; } @@ -238,24 +310,21 @@ void diversity_manager_t::generate_quick_feasible_solution() // min 1 second, max 10 seconds const f_t generate_fast_solution_time = std::min(diversity_config.max_fast_sol_time, std::max(1., timer.remaining_time() / 20.)); - timer_t sol_timer(generate_fast_solution_time); + work_limit_timer_t sol_timer( + context.gpu_heur_loop, generate_fast_solution_time, *context.termination); // do very short LP run to get somewhere close to the optimal point ls.generate_fast_solution(solution, sol_timer); if (solution.get_feasible()) { - population.run_solution_callbacks(solution); initial_sol_vector.emplace_back(std::move(solution)); problem_ptr->handle_ptr->sync_stream(); solution_t searched_sol(initial_sol_vector.back()); ls_config_t ls_config; run_local_search(searched_sol, population.weights, sol_timer, ls_config); - population.run_solution_callbacks(searched_sol); initial_sol_vector.emplace_back(std::move(searched_sol)); auto& feas_sol = initial_sol_vector.back().get_feasible() ? initial_sol_vector.back() : initial_sol_vector[initial_sol_vector.size() - 2]; - CUOPT_LOG_INFO("Generated fast solution in %f seconds with objective %f", - timer.elapsed_time(), - feas_sol.get_user_objective()); + CUOPT_LOG_INFO("Generated fast solution with objective %f", feas_sol.get_user_objective()); } problem_ptr->handle_ptr->sync_stream(); } @@ -284,6 +353,10 @@ void diversity_manager_t::run_fj_alone(solution_t& solution) ls.fj.settings.feasibility_run = false; ls.fj.settings.time_limit = timer.remaining_time(); ls.fj.solve(solution); + if (solution.get_feasible()) { + population.add_solution(std::move(solution), + internals::mip_solution_origin_t::FEASIBILITY_JUMP); + } CUOPT_LOG_INFO("FJ alone finished!"); } @@ -291,10 +364,34 @@ void diversity_manager_t::run_fj_alone(solution_t& solution) template void diversity_manager_t::run_fp_alone() { - CUOPT_LOG_DEBUG("Running FP alone!"); + CUOPT_DETERMINISM_LOG("Deterministic FP alone enter"); solution_t sol(population.best_feasible()); - ls.run_fp(sol, timer, &population); - CUOPT_LOG_DEBUG("FP alone finished!"); + sol.handle_ptr->sync_stream(); + CUOPT_DETERMINISM_LOG( + "Deterministic FP alone input: hash=0x%x feasible=%d obj=%.16e excess=%.16e", + sol.get_hash(), + (int)sol.get_feasible(), + sol.get_user_objective(), + sol.get_total_excess()); + ls.run_fp(sol, timer, &population, diversity_config.n_fp_iterations); + sol.handle_ptr->sync_stream(); + CUOPT_DETERMINISM_LOG( + "Deterministic FP alone output: hash=0x%x feasible=%d obj=%.16e excess=%.16e", + sol.get_hash(), + (int)sol.get_feasible(), + sol.get_user_objective(), + sol.get_total_excess()); + if (sol.get_feasible()) { + population.add_solution(std::move(sol), internals::mip_solution_origin_t::LOCAL_SEARCH); + } + auto& best_sol = population.best_feasible(); + best_sol.handle_ptr->sync_stream(); + CUOPT_DETERMINISM_LOG( + "Deterministic FP alone population best after: hash=0x%x feasible=%d obj=%.16e excess=%.16e", + best_sol.get_hash(), + (int)best_sol.get_feasible(), + best_sol.get_user_objective(), + best_sol.get_total_excess()); } template @@ -311,49 +408,77 @@ solution_t diversity_manager_t::run_solver() raft::common::nvtx::range fun_scope("run_solver"); CUOPT_LOG_DEBUG("Determinism mode: %s", - context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC ? "deterministic" - : "opportunistic"); + (context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS) + ? "deterministic" + : "opportunistic"); // to automatically compute the solving time on scope exit auto timer_raii_guard = cuopt::scope_guard([&]() { stats.total_solve_time = timer.elapsed_time(); }); + auto log_return_solution = [&](const char* reason, solution_t& sol) { + sol.handle_ptr->sync_stream(); + CUOPT_DETERMINISM_LOG( + "Deterministic run_solver return: reason=%s hash=0x%x feasible=%d " + "obj=%.16e excess=%.16e", + reason, + sol.get_hash(), + (int)sol.get_feasible(), + sol.get_user_objective(), + sol.get_total_excess()); + }; - // Debug: Allow disabling GPU heuristics to test B&B tree determinism in isolation + const bool deterministic_bb_without_deterministic_heuristics = + (context.settings.determinism_mode & CUOPT_DETERMINISM_BB) && + !(context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS); const char* disable_heuristics_env = std::getenv("CUOPT_DISABLE_GPU_HEURISTICS"); - if (context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC) { - CUOPT_LOG_INFO("Running deterministic mode with CPUFJ heuristic"); + if (deterministic_bb_without_deterministic_heuristics || + (disable_heuristics_env != nullptr && std::string(disable_heuristics_env) == "1")) { + CUOPT_LOG_INFO("GPU heuristics disabled (det_bb_only=%d env=%s)", + (int)deterministic_bb_without_deterministic_heuristics, + disable_heuristics_env ? disable_heuristics_env : "unset"); + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB) && + context.branch_and_bound_ptr != nullptr) { + auto& producer_sync = context.branch_and_bound_ptr->get_producer_sync(); + producer_sync.registration_complete(); + } population.initialize_population(); population.allocate_solutions(); - // Start CPUFJ in deterministic mode with B&B integration - if (context.branch_and_bound_ptr != nullptr) { - ls.start_cpufj_deterministic(*context.branch_and_bound_ptr); - } - while (!check_b_b_preemption()) { - if (timer.check_time_limit()) break; std::this_thread::sleep_for(std::chrono::milliseconds(100)); } - - // Stop CPUFJ when B&B is done - ls.stop_cpufj_deterministic(); - - population.add_external_solutions_to_population(); - return population.best_feasible(); + auto& best_sol = population.best_feasible(); + log_return_solution("heuristics_disabled", best_sol); + return best_sol; } - if (disable_heuristics_env != nullptr && std::string(disable_heuristics_env) == "1") { - CUOPT_LOG_INFO("GPU heuristics disabled via CUOPT_DISABLE_GPU_HEURISTICS=1"); - population.initialize_population(); - population.allocate_solutions(); - while (!check_b_b_preemption()) { - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + bool gpu_heuristic_producer_registered = false; + auto gpu_heuristic_producer_guard = cuopt::scope_guard([&]() { + if (!gpu_heuristic_producer_registered || context.branch_and_bound_ptr == nullptr) { return; } + auto& producer_sync = context.branch_and_bound_ptr->get_producer_sync(); + producer_sync.deregister_producer(context.gpu_heur_loop.producer_progress_ptr()); + context.gpu_heur_loop.detach_producer_sync(); + }); + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB) && + context.branch_and_bound_ptr != nullptr) { + if (context.settings.gpu_heur_wait_for_exploration) { + CUOPT_LOG_INFO("GPU heuristics waiting for B&B tree exploration to start..."); + auto wait_start = std::chrono::high_resolution_clock::now(); + context.branch_and_bound_ptr->wait_for_exploration_start(); + double wait_elapsed = + std::chrono::duration(std::chrono::high_resolution_clock::now() - wait_start) + .count(); + CUOPT_LOG_INFO("GPU heuristics resumed after %.2fs (B&B exploration started)", wait_elapsed); } - return population.best_feasible(); + auto& producer_sync = context.branch_and_bound_ptr->get_producer_sync(); + context.gpu_heur_loop.attach_producer_sync(&producer_sync); + producer_sync.register_producer(context.gpu_heur_loop.producer_progress_ptr()); + producer_sync.registration_complete(); + gpu_heuristic_producer_registered = true; } population.timer = timer; - const f_t time_limit = timer.remaining_time(); + const f_t time_limit = timer.deterministic ? timer.get_time_limit() : timer.remaining_time(); const f_t lp_time_limit = std::min(diversity_config.max_time_on_lp, time_limit * diversity_config.time_ratio_on_init_lp); // after every change to the problem, we should resize all the relevant vars @@ -364,7 +489,7 @@ solution_t diversity_manager_t::run_solver() problem_ptr->check_problem_representation(true); // have the structure ready for reusing later problem_ptr->compute_integer_fixed_problem(); - recombiner_t::init_enabled_recombiners(*problem_ptr); + recombiner_t::init_enabled_recombiners(context, *problem_ptr); mab_recombiner.resize_mab_arm_stats(recombiner_t::enabled_recombiners.size()); // test problem is not ii cuopt_func_call( @@ -374,20 +499,34 @@ solution_t diversity_manager_t::run_solver() "The problem must not be ii"); population.initialize_population(); population.allocate_solutions(); - if (check_b_b_preemption()) { return population.best_feasible(); } + if (check_b_b_preemption()) { + auto& best_sol = population.best_feasible(); + log_return_solution("preempted_after_population_init", best_sol); + return best_sol; + } add_user_given_solutions(initial_sol_vector); + CUOPT_LOG_DEBUG("DM bootstrap: initial_sol_vector size after user solutions = %lu", + initial_sol_vector.size()); // Run CPUFJ early to find quick initial solutions ls_cpufj_raii_guard_t ls_cpufj_raii_guard(ls); // RAII to stop cpufj threads on solve stop - ls.start_cpufj_scratch_threads(population); - if (check_b_b_preemption()) { return population.best_feasible(); } + if (!diversity_config.dry_run && + !(context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS)) { + ls.start_cpufj_scratch_threads(population); + } + + if (check_b_b_preemption()) { + auto& best_sol = population.best_feasible(); + log_return_solution("preempted_before_lp", best_sol); + return best_sol; + } lp_state_t& lp_state = problem_ptr->lp_state; // resize because some constructor might be called before the presolve lp_state.resize(*problem_ptr, problem_ptr->handle_ptr->get_stream()); bool bb_thread_solution_exists = simplex_solution_exists.load(); if (bb_thread_solution_exists) { ls.lp_optimal_exists = true; - } else if (!fj_only_run) { + } else if (!diversity_config.fj_only_run) { convert_greater_to_less(*problem_ptr); f_t tolerance_divisor = @@ -395,20 +534,49 @@ solution_t diversity_manager_t::run_solver() if (tolerance_divisor == 0) { tolerance_divisor = 1; } f_t absolute_tolerance = context.settings.tolerances.absolute_tolerance; - pdlp_solver_settings_t pdlp_settings{}; - pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor; - pdlp_settings.tolerances.relative_dual_tolerance = absolute_tolerance / tolerance_divisor; - pdlp_settings.time_limit = lp_time_limit; - pdlp_settings.first_primal_feasible = false; - pdlp_settings.concurrent_halt = &global_concurrent_halt; - pdlp_settings.method = method_t::Concurrent; - pdlp_settings.inside_mip = true; - pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; - pdlp_settings.num_gpus = context.settings.num_gpus; - pdlp_settings.presolver = presolver_t::None; - - timer_t lp_timer(lp_time_limit); - auto lp_result = solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); + auto lp_result = [&]() { + if (timer.deterministic) { + relaxed_lp_settings_t lp_settings{}; + lp_settings.time_limit = lp_time_limit; + lp_settings.work_limit = lp_time_limit; + lp_settings.tolerance = absolute_tolerance; + lp_settings.check_infeasibility = true; + lp_settings.return_first_feasible = false; + lp_settings.save_state = true; + lp_settings.per_constraint_residual = true; + lp_settings.has_initial_primal = false; + lp_settings.concurrent_halt = &global_concurrent_halt; + lp_settings.work_context = &context.gpu_heur_loop; + cuopt_assert(lp_settings.work_context != nullptr, "Missing deterministic work context"); + CUOPT_DETERMINISM_LOG( + "DM root LP config: dry_run=%d deterministic=%d work_limit=%.6f time_limit=%.6f", + (int)diversity_config.dry_run, + (int)timer.deterministic, + lp_settings.work_limit, + lp_settings.time_limit); + return get_relaxed_lp_solution( + *problem_ptr, lp_optimal_solution, lp_state, lp_settings); + } + pdlp_solver_settings_t pdlp_settings{}; + pdlp_settings.tolerances.relative_primal_tolerance = absolute_tolerance / tolerance_divisor; + pdlp_settings.tolerances.relative_dual_tolerance = absolute_tolerance / tolerance_divisor; + pdlp_settings.time_limit = lp_time_limit; + pdlp_settings.first_primal_feasible = false; + pdlp_settings.concurrent_halt = &global_concurrent_halt; + pdlp_settings.method = method_t::Concurrent; + pdlp_settings.inside_mip = true; + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; + pdlp_settings.num_gpus = context.settings.num_gpus; + pdlp_settings.presolver = presolver_t::None; + timer_t lp_timer(lp_time_limit); + return solve_lp_with_method(*problem_ptr, pdlp_settings, lp_timer); + }(); + CUOPT_DETERMINISM_LOG( + "DM root LP result: status=%d iters=%d user_obj=%.12f primal_hash=0x%x", + (int)lp_result.get_termination_status(), + lp_result.get_additional_termination_information().number_of_steps_taken, + lp_result.get_objective_value(), + detail::compute_hash(lp_result.get_primal_solution(), problem_ptr->handle_ptr->get_stream())); { std::lock_guard guard(relaxed_solution_mutex); @@ -452,9 +620,10 @@ solution_t diversity_manager_t::run_solver() } else if (lp_result.get_termination_status() == pdlp_termination_status_t::DualInfeasible) { CUOPT_LOG_ERROR("PDLP detected dual infeasibility, continuing anyway!"); ls.lp_optimal_exists = false; - } else if (lp_result.get_termination_status() == pdlp_termination_status_t::TimeLimit) { + } else if (lp_result.get_termination_status() == pdlp_termination_status_t::TimeLimit || + lp_result.get_termination_status() == pdlp_termination_status_t::IterationLimit) { CUOPT_LOG_DEBUG( - "Initial LP run exceeded time limit, continuing solver with partial LP result!"); + "Initial LP run exceeded time/iteration limit, continuing solver with partial LP result!"); // note to developer, in debug mode the LP run might be too slow and it might cause PDLP not // to bring variables within the bounds } @@ -493,43 +662,89 @@ solution_t diversity_manager_t::run_solver() // in case the pdlp returned var boudns that are out of bounds clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); + CUOPT_DETERMINISM_LOG( + "DM root LP post-clamp: lp_optimal_solution hash=0x%x", + detail::compute_hash(lp_optimal_solution, problem_ptr->handle_ptr->get_stream())); } if (ls.lp_optimal_exists) { solution_t lp_rounded_sol(*problem_ptr); lp_rounded_sol.copy_new_assignment(lp_optimal_solution); + CUOPT_LOG_DEBUG("DM bootstrap candidate (LP raw): hash=0x%x feas=%d obj=%.12f", + lp_rounded_sol.get_hash(), + (int)lp_rounded_sol.get_feasible(), + lp_rounded_sol.get_user_objective()); lp_rounded_sol.round_nearest(); lp_rounded_sol.compute_feasibility(); - population.add_solution(std::move(lp_rounded_sol)); - ls.start_cpufj_lptopt_scratch_threads(population); + CUOPT_LOG_DEBUG("DM bootstrap candidate (LP rounded): hash=0x%x feas=%d obj=%.12f", + lp_rounded_sol.get_hash(), + (int)lp_rounded_sol.get_feasible(), + lp_rounded_sol.get_user_objective()); + population.add_solution(std::move(lp_rounded_sol), + internals::mip_solution_origin_t::LP_ROUNDING); + if (!diversity_config.dry_run && + !(context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS)) { + ls.start_cpufj_lptopt_scratch_threads(population); + } } - population.add_solutions_from_vec(std::move(initial_sol_vector)); + for (size_t i = 0; i < initial_sol_vector.size(); ++i) { + CUOPT_LOG_DEBUG("DM bootstrap candidate (initial_sol_vector[%lu]): hash=0x%x feas=%d obj=%.12f", + i, + initial_sol_vector[i].get_hash(), + (int)initial_sol_vector[i].get_feasible(), + initial_sol_vector[i].get_user_objective()); + } + population.add_solutions_from_vec(std::move(initial_sol_vector), + internals::mip_solution_origin_t::USER_INITIAL); - if (check_b_b_preemption()) { return population.best_feasible(); } + if (check_b_b_preemption()) { + auto& best_sol = population.best_feasible(); + log_return_solution("preempted_after_initial_population", best_sol); + return best_sol; + } if (context.settings.benchmark_info_ptr != nullptr) { context.settings.benchmark_info_ptr->objective_of_initial_population = population.best_feasible().get_user_objective(); } - if (fj_only_run) { + if (diversity_config.dry_run) { + auto& best_sol = population.best_feasible(); + log_return_solution("dry_run", best_sol); + return best_sol; + } + if (diversity_config.fj_only_run) { solution_t sol(*problem_ptr); run_fj_alone(sol); + log_return_solution("fj_only_run", sol); return sol; } - rins.enable(); + if (!(context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS)) { rins.enable(); } generate_solution(timer.remaining_time(), false); - if (timer.check_time_limit()) { + if (diversity_config.initial_solution_only) { + auto& best_sol = population.best_feasible(); + log_return_solution("initial_solution_only", best_sol); + return best_sol; + } + if (work_limit_reached()) { population.add_external_solutions_to_population(); - return population.best_feasible(); + auto& best_sol = population.best_feasible(); + log_return_solution("work_limit_reached", best_sol); + return best_sol; + } + if (check_b_b_preemption()) { + auto& best_sol = population.best_feasible(); + log_return_solution("preempted_before_fp", best_sol); + return best_sol; } - if (check_b_b_preemption()) { return population.best_feasible(); } run_fp_alone(); population.add_external_solutions_to_population(); - return population.best_feasible(); + auto& best_sol = population.best_feasible(); + log_return_solution("post_fp_alone", best_sol); + return best_sol; }; template @@ -549,13 +764,15 @@ void diversity_manager_t::diversity_step(i_t max_iterations_without_im CUOPT_LOG_DEBUG("Population degenerated in diversity step"); return; } - if (timer.check_time_limit()) return; + if (work_limit_reached()) return; constexpr bool tournament = true; auto [sol1, sol2] = population.get_two_random(tournament); cuopt_assert(population.test_invariant(), ""); auto [lp_offspring, offspring] = recombine_and_local_search(sol1, sol2); - auto [inserted_pos_1, best_updated_1] = population.add_solution(std::move(lp_offspring)); - auto [inserted_pos_2, best_updated_2] = population.add_solution(std::move(offspring)); + auto [inserted_pos_1, best_updated_1] = population.add_solution( + std::move(lp_offspring), internals::mip_solution_origin_t::RECOMBINATION); + auto [inserted_pos_2, best_updated_2] = population.add_solution( + std::move(offspring), internals::mip_solution_origin_t::RECOMBINATION); if (best_updated_1 || best_updated_2) { recombine_stats.add_best_updated(); } cuopt_assert(population.test_invariant(), ""); if ((inserted_pos_1 != -1 && inserted_pos_1 <= 2) || @@ -597,12 +814,14 @@ void diversity_manager_t::recombine_and_ls_with_all(solution_t::recombine_and_ls_with_all(solution_t void diversity_manager_t::recombine_and_ls_with_all( - std::vector>& solutions, bool add_only_feasible) + std::vector::drained_external_solution_t>& solutions, + bool add_only_feasible) { raft::common::nvtx::range fun_scope("recombine_and_ls_with_all"); if (solutions.size() > 0) { CUOPT_LOG_DEBUG("Running recombiners on B&B solutions with size %lu", solutions.size()); // add all solutions because time limit might have been consumed and we might have exited before - for (auto& sol : solutions) { + for (auto& drained_sol : solutions) { + auto& sol = drained_sol.solution; cuopt_func_call(sol.test_feasibility(true)); - population.add_solution(std::move(solution_t(sol))); + population.add_solution(std::move(solution_t(sol)), drained_sol.origin); } - for (auto& sol : solutions) { - if (timer.check_time_limit()) { return; } + for (auto& drained_sol : solutions) { + auto& sol = drained_sol.solution; + if (work_limit_reached()) { return; } solution_t ls_solution(sol); ls_config_t ls_config; run_local_search(ls_solution, population.weights, timer, ls_config); - if (timer.check_time_limit()) { return; } + if (work_limit_reached()) { return; } // TODO try if running LP with integers fixed makes it feasible if (ls_solution.get_feasible()) { CUOPT_LOG_DEBUG("LS searched solution feasible, running recombiners!"); @@ -672,6 +894,7 @@ diversity_manager_t::recombine_and_local_search(solution_t& sol1.get_feasible(), sol2.get_quality(population.weights), sol2.get_feasible()); + bool deterministic = (context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS); double best_objective_of_parents = std::min(sol1.get_objective(), sol2.get_objective()); bool at_least_one_parent_feasible = sol1.get_feasible() || sol2.get_feasible(); // randomly choose among 3 recombiners @@ -682,7 +905,7 @@ diversity_manager_t::recombine_and_local_search(solution_t& std::numeric_limits::lowest(), std::numeric_limits::lowest(), std::numeric_limits::max(), - recombiner_work_normalized_reward_t(0.0)); + recombiner_work_normalized_reward_t(deterministic, 0.0)); return std::make_pair(solution_t(sol1), solution_t(sol2)); } cuopt_assert(population.test_invariant(), ""); @@ -702,7 +925,7 @@ diversity_manager_t::recombine_and_local_search(solution_t& std::numeric_limits::lowest(), std::numeric_limits::lowest(), std::numeric_limits::max(), - recombiner_work_normalized_reward_t(0.0)); + recombiner_work_normalized_reward_t(deterministic, 0.0)); return std::make_pair(solution_t(sol1), solution_t(sol2)); } cuopt_assert(offspring.test_number_all_integer(), "All must be integers after LS"); @@ -720,7 +943,12 @@ diversity_manager_t::recombine_and_local_search(solution_t& : diversity_config.lp_run_time_if_infeasible; lp_run_time = std::min(lp_run_time, timer.remaining_time()); relaxed_lp_settings_t lp_settings; - lp_settings.time_limit = lp_run_time; + lp_settings.time_limit = lp_run_time; + if (timer.deterministic) { + lp_settings.work_limit = lp_settings.time_limit; + lp_settings.work_context = timer.work_context; + cuopt_assert(lp_settings.work_context != nullptr, "Missing deterministic work context"); + } lp_settings.tolerance = context.settings.tolerances.absolute_tolerance; lp_settings.return_first_feasible = false; lp_settings.save_state = true; @@ -741,12 +969,15 @@ diversity_manager_t::recombine_and_local_search(solution_t& offspring_qual, sol1.get_quality(population.weights), sol2.get_quality(population.weights)); f_t best_quality_of_parents = std::min(sol1.get_quality(population.weights), sol2.get_quality(population.weights)); - mab_recombiner.add_mab_reward( - mab_recombiner.last_chosen_option, - best_quality_of_parents, - population.best().get_quality(population.weights), - offspring_qual, - recombiner_work_normalized_reward_t(recombine_stats.get_last_recombiner_time())); + mab_recombiner.add_mab_reward(mab_recombiner.last_chosen_option, + best_quality_of_parents, + population.best().get_quality(population.weights), + offspring_qual, + !deterministic + ? recombiner_work_normalized_reward_t( + deterministic, recombine_stats.get_last_recombiner_time()) + : recombiner_work_normalized_reward_t( + deterministic, recombine_stats.get_last_recombiner_work())); mab_ls.add_mab_reward(mab_ls_config_t::last_ls_mab_option, best_quality_of_parents, population.best_feasible().get_quality(population.weights), @@ -791,31 +1022,49 @@ std::pair, bool> diversity_manager_t::recombine( } } } + CUOPT_DETERMINISM_LOG( + "Deterministic recombiner selection: requested=%s selected_index=%d chosen=%s " + "enabled_size=%zu last_choice_before=%d", + recombiner_t::recombiner_name(recombiner_type), + (int)selected_index, + recombiner_t::recombiner_name(recombiner), + recombiner_t::enabled_recombiners.size(), + mab_recombiner.last_chosen_option); mab_recombiner.set_last_chosen_option(selected_index); recombine_stats.add_attempt((recombiner_enum_t)recombiner); recombine_stats.start_recombiner_time(); + CUOPT_LOG_TRACE("Recombining sol %x and %x with recombiner %d, weights %x", + a.get_hash(), + b.get_hash(), + recombiner, + population.weights.get_hash()); + // Refactored code using a switch statement switch (recombiner) { case recombiner_enum_t::BOUND_PROP: { - auto [sol, success] = bound_prop_recombiner.recombine(a, b, population.weights); + auto [sol, success, work] = bound_prop_recombiner.recombine(a, b, population.weights); + recombine_stats.set_recombiner_work(work); recombine_stats.stop_recombiner_time(); if (success) { recombine_stats.add_success(); } return std::make_pair(sol, success); } case recombiner_enum_t::FP: { - auto [sol, success] = fp_recombiner.recombine(a, b, population.weights); + auto [sol, success, work] = fp_recombiner.recombine(a, b, population.weights); + recombine_stats.set_recombiner_work(work); recombine_stats.stop_recombiner_time(); if (success) { recombine_stats.add_success(); } return std::make_pair(sol, success); } case recombiner_enum_t::LINE_SEGMENT: { - auto [sol, success] = line_segment_recombiner.recombine(a, b, population.weights); + auto [sol, success, work] = line_segment_recombiner.recombine(a, b, population.weights); + recombine_stats.set_recombiner_work(work); recombine_stats.stop_recombiner_time(); if (success) { recombine_stats.add_success(); } return std::make_pair(sol, success); } case recombiner_enum_t::SUB_MIP: { - auto [sol, success] = sub_mip_recombiner.recombine(a, b, population.weights); + auto [sol, success, work] = sub_mip_recombiner.recombine(a, b, population.weights); + recombine_stats.set_recombiner_work(work); recombine_stats.stop_recombiner_time(); if (success) { recombine_stats.add_success(); } return std::make_pair(sol, success); @@ -861,6 +1110,12 @@ void diversity_manager_t::set_simplex_solution(const std::vector& context.handle_ptr->sync_stream(); } +template +bool diversity_manager_t::work_limit_reached() +{ + return timer.check_time_limit(); +} + #if MIP_INSTANTIATE_FLOAT template class diversity_manager_t; #endif diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh index 91fc4049a6..7f6e2fc741 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh @@ -26,6 +26,9 @@ #include #include #include +#include + +#include namespace cuopt::linear_programming::detail { @@ -33,7 +36,7 @@ template class diversity_manager_t { public: diversity_manager_t(mip_solver_context_t& context); - bool run_presolve(f_t time_limit); + bool run_presolve(f_t time_limit, cuopt::termination_checker_t& global_timer); solution_t run_solver(); void generate_solution(f_t time_limit, bool random_start = true); void run_fj_alone(solution_t& solution); @@ -48,8 +51,9 @@ class diversity_manager_t { void diversity_step(i_t max_iterations_without_improvement); void add_user_given_solutions(std::vector>& initial_sol_vector); population_t* get_population_pointer() { return &population; } - void recombine_and_ls_with_all(std::vector>& solutions, - bool add_only_feasible = false); + void recombine_and_ls_with_all( + std::vector::drained_external_solution_t>& solutions, + bool add_only_feasible = false); void recombine_and_ls_with_all(solution_t& solution, bool add_only_feasible = false); std::pair, solution_t> recombine_and_local_search( solution_t& a, @@ -63,13 +67,13 @@ class diversity_manager_t { solution_t& sol2); bool run_local_search(solution_t& solution, const weight_t& weights, - timer_t& timer, + work_limit_timer_t& timer, ls_config_t& ls_config); + bool work_limit_reached(); void set_simplex_solution(const std::vector& solution, const std::vector& dual_solution, f_t objective); - mip_solver_context_t& context; dual_simplex::branch_and_bound_t* branch_and_bound_ptr; problem_t* problem_ptr; @@ -79,7 +83,7 @@ class diversity_manager_t { rmm::device_uvector lp_dual_optimal_solution; std::atomic simplex_solution_exists{false}; local_search_t ls; - cuopt::timer_t timer; + cuopt::work_limit_timer_t timer; bound_prop_recombiner_t bound_prop_recombiner; fp_recombiner_t fp_recombiner; line_segment_recombiner_t line_segment_recombiner; diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index 7fd8533f82..6c94c159f2 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -186,7 +186,7 @@ void rins_t::run_rins() total_calls++; node_count_at_last_rins = node_count.load(); - time_limit = std::min(time_limit, dm.timer.remaining_time()); + time_limit = std::min(time_limit, static_cast(dm.timer.remaining_time())); CUOPT_LOG_DEBUG("Running RINS on solution with objective %g, fixing %d/%d", best_sol.get_user_objective(), vars_to_fix.size(), @@ -266,13 +266,15 @@ void rins_t::run_rins() branch_and_bound_settings.num_threads = 1; branch_and_bound_settings.reliability_branching = 0; branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.clique_cuts = 0; branch_and_bound_settings.sub_mip = 1; branch_and_bound_settings.log.log = false; branch_and_bound_settings.log.log_prefix = "[RINS] "; - branch_and_bound_settings.solution_callback = [&rins_solution_queue](std::vector& solution, - f_t objective) { - rins_solution_queue.push_back(solution); - }; + branch_and_bound_settings.new_incumbent_callback = + [&rins_solution_queue](std::vector& solution, + f_t objective, + const cuopt::internals::mip_solution_callback_info_t&, + double) { rins_solution_queue.push_back(solution); }; dual_simplex::branch_and_bound_t branch_and_bound( branch_and_bound_problem, branch_and_bound_settings, dual_simplex::tic()); branch_and_bound.set_initial_guess(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); @@ -288,22 +290,22 @@ void rins_t::run_rins() if (branch_and_bound_status == dual_simplex::mip_status_t::OPTIMAL) { CUOPT_LOG_DEBUG("RINS submip optimal"); // do goldilocks update - fixrate = std::max(fixrate - 0.05, settings.min_fixrate); - time_limit = std::max(time_limit - 2, settings.min_time_limit); + fixrate = std::max(fixrate - f_t(0.05), static_cast(settings.min_fixrate)); + time_limit = std::max(time_limit - f_t(2), static_cast(settings.min_time_limit)); } else if (branch_and_bound_status == dual_simplex::mip_status_t::TIME_LIMIT) { CUOPT_LOG_DEBUG("RINS submip time limit"); // do goldilocks update - fixrate = std::min(fixrate + 0.05, settings.max_fixrate); - time_limit = std::min(time_limit + 2, settings.max_time_limit); + fixrate = std::min(fixrate + f_t(0.05), static_cast(settings.max_fixrate)); + time_limit = std::min(time_limit + f_t(2), static_cast(settings.max_time_limit)); } else if (branch_and_bound_status == dual_simplex::mip_status_t::INFEASIBLE) { CUOPT_LOG_DEBUG("RINS submip infeasible"); // do goldilocks update, decreasing fixrate - fixrate = std::max(fixrate - 0.05, settings.min_fixrate); + fixrate = std::max(fixrate - f_t(0.05), static_cast(settings.min_fixrate)); } else { CUOPT_LOG_DEBUG("RINS solution not found"); // do goldilocks update - fixrate = std::min(fixrate + 0.05, settings.max_fixrate); - time_limit = std::min(time_limit + 2, settings.max_time_limit); + fixrate = std::min(fixrate + f_t(0.05), static_cast(settings.max_fixrate)); + time_limit = std::min(time_limit + f_t(2), static_cast(settings.max_time_limit)); } cpu_fj_thread.stop_cpu_solver(); @@ -342,8 +344,9 @@ void rins_t::run_rins() cuopt_assert(best_sol.assignment.size() == sol_size_before_rins, "Assignment size mismatch"); cuopt_assert(best_sol.assignment.size() == problem_copy->n_variables, "Assignment size mismatch"); - dm.population.add_external_solution( - best_sol.get_host_assignment(), best_sol.get_objective(), solution_origin_t::RINS); + dm.population.add_external_solution(best_sol.get_host_assignment(), + best_sol.get_objective(), + internals::mip_solution_origin_t::RINS); } } diff --git a/cpp/src/mip_heuristics/diversity/multi_armed_bandit.cuh b/cpp/src/mip_heuristics/diversity/multi_armed_bandit.cuh index 4571d0d57f..b9219b8dcb 100644 --- a/cpp/src/mip_heuristics/diversity/multi_armed_bandit.cuh +++ b/cpp/src/mip_heuristics/diversity/multi_armed_bandit.cuh @@ -45,16 +45,22 @@ struct ls_work_normalized_reward_t { }; struct recombiner_work_normalized_reward_t { - double time_in_miliseconds; - recombiner_work_normalized_reward_t(double time_in_miliseconds) - : time_in_miliseconds(time_in_miliseconds) + bool deterministic; + double work; + recombiner_work_normalized_reward_t(bool deterministic, double work) + : deterministic(deterministic), work(work) { } double operator()(double factor) const { // normal recombiners take 2000 ms - return factor * (std::max(0.1, 4.0 - (time_in_miliseconds / 2000))); + if (!deterministic) { + double time_in_miliseconds = work; + return factor * (std::max(0.1, 4.0 - (time_in_miliseconds / 2000))); + } else { + return factor * (std::max(0.1, 4.0 - (work / 200))); + } } }; diff --git a/cpp/src/mip_heuristics/diversity/population.cu b/cpp/src/mip_heuristics/diversity/population.cu index bca87223d9..3b19949d05 100644 --- a/cpp/src/mip_heuristics/diversity/population.cu +++ b/cpp/src/mip_heuristics/diversity/population.cu @@ -8,11 +8,14 @@ #include "diversity_manager.cuh" #include "population.cuh" +#include + #include #include #include #include #include +#include #include #include @@ -44,7 +47,7 @@ population_t::population_t(std::string const& name_, rng(cuopt::seed_generator::get_seed()), early_exit_primal_generation(false), population_hash_map(*problem_ptr), - timer(0) + timer(0.0, cuopt::termination_checker_t::root_tag_t{}) { best_feasible_objective = std::numeric_limits::max(); } @@ -125,11 +128,12 @@ std::pair, solution_t> population_t::ge } template -void population_t::add_solutions_from_vec(std::vector>&& solutions) +void population_t::add_solutions_from_vec( + std::vector>&& solutions, internals::mip_solution_origin_t callback_origin) { raft::common::nvtx::range fun_scope("add_solution_from_vec"); for (auto&& sol : solutions) { - add_solution(std::move(sol)); + add_solution(std::move(sol), callback_origin); } } @@ -143,11 +147,11 @@ size_t population_t::get_external_solution_size() template void population_t::add_external_solution(const std::vector& solution, f_t objective, - solution_origin_t origin) + internals::mip_solution_origin_t origin) { std::lock_guard lock(solution_mutex); - if (origin == solution_origin_t::CPUFJ) { + if (origin == internals::mip_solution_origin_t::CPU_FEASIBILITY_JUMP) { external_solution_queue_cpufj.emplace_back(solution, objective, origin); } else { external_solution_queue.emplace_back(solution, objective, origin); @@ -165,7 +169,7 @@ void population_t::add_external_solution(const std::vector& solut } CUOPT_LOG_DEBUG("%s added a solution to population, solution queue size %lu with objective %g", - solution_origin_to_string(origin), + internals::mip_solution_origin_to_string(origin), external_solution_queue.size(), problem_ptr->get_user_obj_from_solver_obj(objective)); if (objective < best_feasible_objective) { @@ -179,9 +183,12 @@ void population_t::add_external_solution(const std::vector& solut template void population_t::add_external_solutions_to_population() { + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB)) { return; } // don't do early exit checks here. mutex needs to be acquired to prevent race conditions auto new_sol_vector = get_external_solutions(); - add_solutions_from_vec(std::move(new_sol_vector)); + for (auto& drained_sol : new_sol_vector) { + add_solution(std::move(drained_sol.solution), drained_sol.origin); + } } // normally we would need a lock here but these are boolean types and race conditions are not @@ -194,10 +201,11 @@ void population_t::preempt_heuristic_solver() } template -std::vector> population_t::get_external_solutions() +std::vector::drained_external_solution_t> +population_t::get_external_solutions() { std::lock_guard lock(solution_mutex); - std::vector> return_vector; + std::vector return_vector; i_t counter = 0; f_t new_best_feasible_objective = best_feasible_objective; f_t longest_wait_time = 0; @@ -205,10 +213,10 @@ std::vector> population_t::get_external_solutions for (auto& h_entry : queue) { // ignore CPUFJ solutions if they're not better than the best feasible. // It seems they worsen results on some instances despite the potential for improved diversity - if (h_entry.origin == solution_origin_t::CPUFJ && + if (h_entry.origin == internals::mip_solution_origin_t::CPU_FEASIBILITY_JUMP && h_entry.objective > new_best_feasible_objective) { continue; - } else if (h_entry.origin != solution_origin_t::CPUFJ && + } else if (h_entry.origin != internals::mip_solution_origin_t::CPU_FEASIBILITY_JUMP && h_entry.objective > new_best_feasible_objective) { new_best_feasible_objective = h_entry.objective; } @@ -233,7 +241,7 @@ std::vector> population_t::get_external_solutions problem_ptr->n_integer_vars); } sol.handle_ptr->sync_stream(); - return_vector.emplace_back(std::move(sol)); + return_vector.emplace_back(std::move(sol), h_entry.origin); counter++; } } @@ -253,126 +261,52 @@ std::vector> population_t::get_external_solutions template bool population_t::is_better_than_best_feasible(solution_t& sol) { - bool obj_better = sol.get_objective() < best_feasible_objective; - return obj_better && sol.get_feasible(); -} - -template -void population_t::invoke_get_solution_callback( - solution_t& sol, internals::get_solution_callback_t* callback) -{ - f_t user_objective = sol.get_user_objective(); - f_t user_bound = context.stats.get_solution_bound(); - solution_t temp_sol(sol); - problem_ptr->post_process_assignment(temp_sol.assignment); - if (context.settings.mip_scaling) { - rmm::device_uvector dummy(0, temp_sol.handle_ptr->get_stream()); - context.scaling.unscale_solutions(temp_sol.assignment, dummy); - } - if (problem_ptr->has_papilo_presolve_data()) { - problem_ptr->papilo_uncrush_assignment(temp_sol.assignment); + if (!sol.get_feasible()) { return false; } + f_t threshold = best_feasible_objective; + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB) && + context.branch_and_bound_ptr != nullptr) { + threshold = context.branch_and_bound_ptr->get_upper_bound(); } - - std::vector user_objective_vec(1); - std::vector user_bound_vec(1); - std::vector user_assignment_vec(temp_sol.assignment.size()); - user_objective_vec[0] = user_objective; - user_bound_vec[0] = user_bound; - raft::copy(user_assignment_vec.data(), - temp_sol.assignment.data(), - temp_sol.assignment.size(), - temp_sol.handle_ptr->get_stream()); - temp_sol.handle_ptr->sync_stream(); - callback->get_solution(user_assignment_vec.data(), - user_objective_vec.data(), - user_bound_vec.data(), - callback->get_user_data()); + return sol.get_objective() < threshold; } template -void population_t::run_solution_callbacks(solution_t& sol) +void population_t::run_solution_callbacks( + solution_t& sol, internals::mip_solution_origin_t callback_origin) { - bool better_solution_found = is_better_than_best_feasible(sol); - auto user_callbacks = context.settings.get_mip_callbacks(); - if (better_solution_found) { - if (context.settings.benchmark_info_ptr != nullptr) { - context.settings.benchmark_info_ptr->last_improvement_of_best_feasible = timer.elapsed_time(); - } - CUOPT_LOG_DEBUG("Population: Found new best solution %g", sol.get_user_objective()); - if (problem_ptr->branch_and_bound_callback != nullptr) { - problem_ptr->branch_and_bound_callback(sol.get_host_assignment()); - } - for (auto callback : user_callbacks) { - if (callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { - auto get_sol_callback = static_cast(callback); - invoke_get_solution_callback(sol, get_sol_callback); + if (is_better_than_best_feasible(sol)) { + const bool deterministic_bb = (context.settings.determinism_mode & CUOPT_DETERMINISM_BB) && + context.branch_and_bound_ptr != nullptr; + + if (deterministic_bb) { + const double work_timestamp = context.gpu_heur_loop.current_producer_work(); + cuopt_assert(std::isfinite(work_timestamp), + "Deterministic heuristic work timestamp must be finite"); + context.branch_and_bound_ptr->queue_external_solution_deterministic( + sol.get_host_assignment(), sol.get_user_objective(), work_timestamp, callback_origin); + } else { + const double work_timestamp = context.gpu_heur_loop.current_work(); + const auto payload = context.solution_publication.build_payload( + context.problem_ptr, context.scaling, sol, callback_origin, work_timestamp); + context.solution_publication.publish_new_best_feasible(payload, timer.elapsed_time()); + + if (context.branch_and_bound_ptr != nullptr && + context.problem_ptr->branch_and_bound_callback != nullptr) { + context.problem_ptr->branch_and_bound_callback(sol.get_host_assignment()); } } - // save the best objective here, because we might not have been able to return the solution to - // the user because of the unscaling that causes infeasibility. - // This prevents an issue of repaired, or a fully feasible solution being reported in the call - // back in next run. + best_feasible_objective = sol.get_objective(); } - for (auto callback : user_callbacks) { - if (callback->get_type() == internals::base_solution_callback_type::SET_SOLUTION) { - auto set_sol_callback = static_cast(callback); - f_t user_bound = context.stats.get_solution_bound(); - auto callback_num_variables = problem_ptr->original_problem_ptr->get_n_variables(); - rmm::device_uvector incumbent_assignment(callback_num_variables, - sol.handle_ptr->get_stream()); - solution_t outside_sol(sol); - rmm::device_scalar d_outside_sol_objective(sol.handle_ptr->get_stream()); - auto inf = std::numeric_limits::infinity(); - d_outside_sol_objective.set_value_async(inf, sol.handle_ptr->get_stream()); - sol.handle_ptr->sync_stream(); - std::vector h_incumbent_assignment(incumbent_assignment.size()); - std::vector h_outside_sol_objective(1, inf); - std::vector h_user_bound(1, user_bound); - set_sol_callback->set_solution(h_incumbent_assignment.data(), - h_outside_sol_objective.data(), - h_user_bound.data(), - set_sol_callback->get_user_data()); - f_t outside_sol_objective = h_outside_sol_objective[0]; - // The callback might be called without setting any valid solution or objective which triggers - // asserts - if (outside_sol_objective == inf) { return; } - d_outside_sol_objective.set_value_async(outside_sol_objective, sol.handle_ptr->get_stream()); - raft::copy(incumbent_assignment.data(), - h_incumbent_assignment.data(), - incumbent_assignment.size(), - sol.handle_ptr->get_stream()); - - if (context.settings.mip_scaling) { context.scaling.scale_solutions(incumbent_assignment); } - bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment); - if (!is_valid) { return; } - cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(), - "Incumbent assignment size mismatch"); - raft::copy(outside_sol.assignment.data(), - incumbent_assignment.data(), - incumbent_assignment.size(), - sol.handle_ptr->get_stream()); - outside_sol.compute_feasibility(); - - CUOPT_LOG_DEBUG("Injected solution feasibility = %d objective = %g excess = %g", - outside_sol.get_feasible(), - outside_sol.get_user_objective(), - outside_sol.get_total_excess()); - if (std::abs(outside_sol.get_user_objective() - outside_sol_objective) > 1e-6) { - cuopt_func_call( - CUOPT_LOG_DEBUG("External solution objective mismatch: outside_sol.get_user_objective() " - "= %g, outside_sol_objective = %g", - outside_sol.get_user_objective(), - outside_sol_objective)); - } - cuopt_assert(std::abs(outside_sol.get_user_objective() - outside_sol_objective) <= 1e-6, - "External solution objective mismatch"); - auto h_outside_sol = outside_sol.get_host_assignment(); - add_external_solution( - h_outside_sol, outside_sol.get_objective(), solution_origin_t::EXTERNAL); - } - } + context.solution_injection.invoke_set_solution_callbacks( + problem_ptr, + context.scaling, + sol, + [this]( + const std::vector& assignment, f_t objective, internals::mip_solution_origin_t origin) { + add_external_solution(assignment, objective, origin); + }); } template @@ -408,7 +342,8 @@ void population_t::adjust_weights_according_to_best_feasible() } template -std::pair population_t::add_solution(solution_t&& sol) +std::pair population_t::add_solution( + solution_t&& sol, internals::mip_solution_origin_t callback_origin) { std::lock_guard lock(write_mutex); raft::common::nvtx::range fun_scope("add_solution"); @@ -418,16 +353,18 @@ std::pair population_t::add_solution(solution_t&& // for hash computation, quality calculation, and similarity comparisons. sol.handle_ptr->sync_stream(); population_hash_map.insert(sol); - double sol_cost = sol.get_quality(weights); - bool best_updated = false; - CUOPT_LOG_DEBUG("Adding solution with quality %f and objective %f n_integers %d!", + double sol_cost = sol.get_quality(weights); + bool best_updated = false; + const uint32_t candidate_hash = sol.get_hash(); + CUOPT_LOG_DEBUG("Adding solution with quality %f and objective %f n_integers %d, hash %x!", sol_cost, sol.get_user_objective(), - sol.n_assigned_integers); + sol.n_assigned_integers, + candidate_hash); // We store the best feasible found so far at index 0. if (sol.get_feasible() && (solutions[0].first == false || sol_cost + OBJECTIVE_EPSILON < indices[0].second)) { - run_solution_callbacks(sol); + run_solution_callbacks(sol, callback_origin); solutions[0].first = true; // we only have move assignment operator solution_t temp_sol(sol); @@ -751,7 +688,7 @@ void population_t::start_threshold_adjustment() } template -void population_t::adjust_threshold(cuopt::timer_t timer) +void population_t::adjust_threshold(cuopt::work_limit_timer_t timer) { double time_ratio = (timer.elapsed_time() - population_start_time) / (timer.get_time_limit() - population_start_time); @@ -840,23 +777,29 @@ bool population_t::test_invariant() template void population_t::print() { + std::vector hashes; + for (auto& index : indices) + hashes.push_back(solutions[index.first].second.get_hash()); + uint32_t final_hash = compute_hash(hashes); CUOPT_LOG_DEBUG(" -------------- "); - CUOPT_LOG_DEBUG("%s infeas weight %f threshold %d/%d:", + CUOPT_LOG_DEBUG("%s infeas weight %f threshold %d/%d (hash %x):", name.c_str(), infeasibility_importance, var_threshold, - problem_ptr->n_integer_vars); + problem_ptr->n_integer_vars, + final_hash); i_t i = 0; for (auto& index : indices) { if (index.first == 0 && solutions[0].first) { CUOPT_LOG_DEBUG(" Best feasible: %f", solutions[index.first].second.get_user_objective()); } - CUOPT_LOG_DEBUG("%d : %f\t%f\t%f\t%d", + CUOPT_LOG_DEBUG("%d : %f\t%f\t%f\t%d (hash %x)", i, index.second, solutions[index.first].second.get_total_excess(), solutions[index.first].second.get_user_objective(), - solutions[index.first].second.get_feasible()); + solutions[index.first].second.get_feasible(), + solutions[index.first].second.get_hash()); i++; } CUOPT_LOG_DEBUG(" -------------- "); @@ -865,8 +808,8 @@ void population_t::print() template void population_t::run_all_recombiners(solution_t& sol) { - std::vector> sol_vec; - sol_vec.emplace_back(std::move(solution_t(sol))); + std::vector::drained_external_solution_t> sol_vec; + sol_vec.emplace_back(solution_t(sol), internals::mip_solution_origin_t::LOCAL_SEARCH); dm.recombine_and_ls_with_all(sol_vec, true); } diff --git a/cpp/src/mip_heuristics/diversity/population.cuh b/cpp/src/mip_heuristics/diversity/population.cuh index 2509ae17df..a19f4ff2df 100644 --- a/cpp/src/mip_heuristics/diversity/population.cuh +++ b/cpp/src/mip_heuristics/diversity/population.cuh @@ -25,22 +25,20 @@ namespace cuopt::linear_programming::detail { template class diversity_manager_t; -enum class solution_origin_t { BRANCH_AND_BOUND, CPUFJ, RINS, EXTERNAL }; - -constexpr const char* solution_origin_to_string(solution_origin_t origin) -{ - switch (origin) { - case solution_origin_t::BRANCH_AND_BOUND: return "B&B"; - case solution_origin_t::CPUFJ: return "CPUFJ"; - case solution_origin_t::RINS: return "RINS"; - case solution_origin_t::EXTERNAL: return "injected"; - default: return "unknown"; - } -} - template class population_t { public: + struct drained_external_solution_t { + drained_external_solution_t(solution_t&& solution_, + internals::mip_solution_origin_t origin_) + : solution(std::move(solution_)), origin(origin_) + { + } + + solution_t solution; + internals::mip_solution_origin_t origin; + }; + population_t(std::string const& name, mip_solver_context_t& context, diversity_manager_t& dm, @@ -83,6 +81,7 @@ class population_t { a.first = false; indices[0].second = std::numeric_limits::max(); indices.erase(indices.begin() + 1, indices.end()); + best_feasible_objective = std::numeric_limits::max(); } void clear_except_best_feasible() @@ -92,6 +91,7 @@ class population_t { } solutions[indices[0].first].first = true; indices.erase(indices.begin() + 1, indices.end()); + best_feasible_objective = solutions[indices[0].first].second.get_objective(); } // ------------------- @@ -103,16 +103,20 @@ class population_t { /*! \brief { Add a solution to population. Similar solutions may be ejected from the pool. } * \return { -1 = not inserted , others = inserted index} */ - std::pair add_solution(solution_t&& sol); + std::pair add_solution( + solution_t&& sol, + internals::mip_solution_origin_t callback_origin = internals::mip_solution_origin_t::UNKNOWN); void add_external_solution(const std::vector& solution, f_t objective, - solution_origin_t origin); - std::vector> get_external_solutions(); + internals::mip_solution_origin_t origin); + std::vector get_external_solutions(); void add_external_solutions_to_population(); size_t get_external_solution_size(); void preempt_heuristic_solver(); - void add_solutions_from_vec(std::vector>&& solutions); + void add_solutions_from_vec( + std::vector>&& solutions, + internals::mip_solution_origin_t callback_origin = internals::mip_solution_origin_t::UNKNOWN); // Updates the cstr weights according to the best solutions feasibility void compute_new_weights(); @@ -122,7 +126,7 @@ class population_t { // updates qualities of each solution void update_qualities(); // adjusts the threshold of the population - void adjust_threshold(cuopt::timer_t timer); + void adjust_threshold(cuopt::work_limit_timer_t timer); /*! \param sol { Input solution } * \return { Index of the best solution similar to sol. If no similar is found we return * max_solutions. }*/ @@ -153,7 +157,9 @@ class population_t { std::vector> population_to_vector(); void halve_the_population(); - void run_solution_callbacks(solution_t& sol); + void run_solution_callbacks( + solution_t& sol, + internals::mip_solution_origin_t callback_origin = internals::mip_solution_origin_t::UNKNOWN); void adjust_weights_according_to_best_feasible(); @@ -161,9 +167,6 @@ class population_t { void diversity_step(i_t max_iterations_without_improvement); - void invoke_get_solution_callback(solution_t& sol, - internals::get_solution_callback_t* callback); - // does some consistency tests bool test_invariant(); @@ -186,7 +189,9 @@ class population_t { struct external_solution_t { external_solution_t() = default; - external_solution_t(const std::vector& solution, f_t objective, solution_origin_t origin) + external_solution_t(const std::vector& solution, + f_t objective, + internals::mip_solution_origin_t origin) : solution(solution), objective(objective), origin(origin), @@ -195,7 +200,7 @@ class population_t { } std::vector solution; f_t objective; - solution_origin_t origin; + internals::mip_solution_origin_t origin; timer_t timer; // debug timer to track how long a solution has lingered in the queue }; @@ -209,7 +214,7 @@ class population_t { std::atomic solutions_in_external_queue_ = false; f_t best_feasible_objective = std::numeric_limits::max(); assignment_hash_map_t population_hash_map; - cuopt::timer_t timer; + cuopt::work_limit_timer_t timer; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/diversity/recombiners/bound_prop_recombiner.cuh b/cpp/src/mip_heuristics/diversity/recombiners/bound_prop_recombiner.cuh index 9d6bb3902c..80f8383fab 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/bound_prop_recombiner.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/bound_prop_recombiner.cuh @@ -29,6 +29,7 @@ class bound_prop_recombiner_t : public recombiner_t { rng(cuopt::seed_generator::get_seed()), vars_to_fix(n_vars, handle_ptr->get_stream()) { + thrust::fill(handle_ptr->get_thrust_policy(), vars_to_fix.begin(), vars_to_fix.end(), -1); } void get_probing_values_for_infeasible( @@ -131,9 +132,9 @@ class bound_prop_recombiner_t : public recombiner_t { }); } - std::pair, bool> recombine(solution_t& a, - solution_t& b, - const weight_t& weights) + std::tuple, bool, double> recombine(solution_t& a, + solution_t& b, + const weight_t& weights) { raft::common::nvtx::range fun_scope("bound_prop_recombiner"); auto& guiding_solution = a.get_feasible() ? a : b; @@ -148,10 +149,11 @@ class bound_prop_recombiner_t : public recombiner_t { i_t n_vars_from_other = n_different_vars; i_t fixed_from_guiding = 0; i_t fixed_from_other = 0; + i_t seed = cuopt::seed_generator::get_seed(); if (n_different_vars > (i_t)bp_recombiner_config_t::max_n_of_vars_from_other) { fixed_from_guiding = n_vars_from_other - bp_recombiner_config_t::max_n_of_vars_from_other; n_vars_from_other = bp_recombiner_config_t::max_n_of_vars_from_other; - thrust::default_random_engine g{(unsigned int)cuopt::seed_generator::get_seed()}; + thrust::default_random_engine g{(unsigned int)seed}; thrust::shuffle(a.handle_ptr->get_thrust_policy(), this->remaining_indices.data(), this->remaining_indices.data() + n_different_vars, @@ -160,12 +162,35 @@ class bound_prop_recombiner_t : public recombiner_t { i_t n_vars_from_guiding = a.problem_ptr->n_integer_vars - n_vars_from_other; CUOPT_LOG_DEBUG( "n_vars_from_guiding %d n_vars_from_other %d", n_vars_from_guiding, n_vars_from_other); + + // DETERMINISM DEBUG: Log everything that could affect divergence + CUOPT_DETERMINISM_LOG("BP_DET: sol_a_hash=0x%x sol_b_hash=0x%x offspring_hash=0x%x, seed %x", + a.get_hash(), + b.get_hash(), + offspring.get_hash(), + seed); + CUOPT_DETERMINISM_LOG("BP_DET: n_different_vars=%d n_vars_from_other=%d n_vars_from_guiding=%d", + n_different_vars, + n_vars_from_other, + n_vars_from_guiding); + CUOPT_DETERMINISM_LOG( + "BP_DET: remaining_indices_hash=0x%x (first %d elements)", + detail::compute_hash(make_span(this->remaining_indices), a.handle_ptr->get_stream()), + std::min((i_t)10, n_vars_from_other)); + CUOPT_DETERMINISM_LOG("BP_DET: guiding_feasible=%d other_feasible=%d expensive_to_fix=%d", + guiding_solution.get_feasible(), + other_solution.get_feasible(), + a.problem_ptr->expensive_to_fix_vars); + CUOPT_DETERMINISM_LOG( + "BP_DET: fixed_from_guiding=%d fixed_from_other=%d", fixed_from_guiding, fixed_from_other); + // if either all integers are from A(meaning all are common) or all integers are from B(meaning // all are different), return if (n_vars_from_guiding == 0 || n_vars_from_other == 0) { CUOPT_LOG_DEBUG("Returning false because all vars are common or different"); - return std::make_pair(offspring, false); + return std::make_tuple(offspring, false, 0.0); } + double work = static_cast(n_vars_from_other); cuopt_assert(a.problem_ptr == b.problem_ptr, "The two solutions should not refer to different problems"); @@ -175,9 +200,16 @@ class bound_prop_recombiner_t : public recombiner_t { a.handle_ptr->get_stream()); probing_config_t probing_config(a.problem_ptr->n_variables, a.handle_ptr); if (guiding_solution.get_feasible() && !a.problem_ptr->expensive_to_fix_vars) { + CUOPT_DETERMINISM_LOG("BP_DET: Taking FEASIBLE path (with variable fixing)"); this->compute_vars_to_fix(offspring, vars_to_fix, n_vars_from_other, n_vars_from_guiding); + CUOPT_DETERMINISM_LOG("BP_DET: vars_to_fix_size=%lu", vars_to_fix.size()); auto [fixed_problem, fixed_assignment, variable_map] = offspring.fix_variables(vars_to_fix); - timer_t timer(bp_recombiner_config_t::bounds_prop_time_limit); + CUOPT_DETERMINISM_LOG("BP_DET: fixed_problem_fingerprint=0x%x variable_map_size=%lu", + fixed_problem.get_fingerprint(), + variable_map.size()); + work_limit_timer_t timer(this->context.gpu_heur_loop, + bp_recombiner_config_t::bounds_prop_time_limit, + *this->context.termination); rmm::device_uvector old_assignment(offspring.assignment, offspring.handle_ptr->get_stream()); offspring.handle_ptr->sync_stream(); @@ -197,26 +229,44 @@ class bound_prop_recombiner_t : public recombiner_t { constraint_prop.single_rounding_only = true; constraint_prop.apply_round(offspring, lp_run_time_after_feasible, timer, probing_config); constraint_prop.single_rounding_only = false; - cuopt_func_call(bool feasible_after_bounds_prop = offspring.get_feasible()); + offspring.compute_feasibility(); + bool feasible_after_bounds_prop = offspring.get_feasible(); offspring.handle_ptr->sync_stream(); offspring.problem_ptr = a.problem_ptr; fixed_assignment = std::move(offspring.assignment); offspring.assignment = std::move(old_assignment); offspring.handle_ptr->sync_stream(); offspring.unfix_variables(fixed_assignment, variable_map); - cuopt_func_call(bool feasible_after_unfix = offspring.get_feasible()); - // May be triggered due to numerical issues - // TODO: investigate further - // cuopt_assert(feasible_after_unfix == feasible_after_bounds_prop, - // "Feasible after unfix should be same as feasible after bounds prop!"); + offspring.compute_feasibility(); + bool feasible_after_unfix = offspring.get_feasible(); + cuopt_func_call(f_t excess_after_unfix = offspring.get_total_excess()); + if (feasible_after_unfix != feasible_after_bounds_prop) { + CUOPT_LOG_WARN("Numerical issue in bounds prop, infeasibility after unfix"); + // might become infeasible after unfixing due to numerical issues. Check that the excess + // remains consistent + // CUOPT_LOG_ERROR("Excess: %g, %g, %g, %g, feas %d", offspring.get_total_excess(), + // offspring.compute_max_constraint_violation(), offspring.compute_max_int_violation(), + // offspring.compute_max_variable_violation(), feasible_after_unfix); + // cuopt_assert(fabs(excess_after_unfix - excess_before) < 1e-6, + // "Excess after unfix should be same as before unfix!"); + } a.handle_ptr->sync_stream(); } else { - timer_t timer(bp_recombiner_config_t::bounds_prop_time_limit); + CUOPT_DETERMINISM_LOG("BP_DET: Taking INFEASIBLE path (no variable fixing)"); + work_limit_timer_t timer(this->context.gpu_heur_loop, + bp_recombiner_config_t::bounds_prop_time_limit, + *this->context.termination); get_probing_values_for_infeasible( guiding_solution, other_solution, offspring, probing_values, n_vars_from_other); probing_config.probing_values = host_copy(probing_values, offspring.handle_ptr->get_stream()); + CUOPT_DETERMINISM_LOG( + "BP_DET: probing_values_hash=0x%x", + detail::compute_hash(make_span(probing_values), a.handle_ptr->get_stream())); constraint_prop.apply_round(offspring, lp_run_time_after_feasible, timer, probing_config); } + CUOPT_DETERMINISM_LOG("BP_DET: After apply_round: offspring_hash=0x%x feasible=%d", + offspring.get_hash(), + offspring.get_feasible()); constraint_prop.max_n_failed_repair_iterations = 1; cuopt_func_call(offspring.test_number_all_integer()); bool better_cost_than_parents = @@ -236,11 +286,17 @@ class bound_prop_recombiner_t : public recombiner_t { bp_recombiner_config_t::decrease_max_n_of_vars_from_other(); } } + CUOPT_DETERMINISM_LOG( + "BP_DET: Final offspring_hash=0x%x same_as_parents=%d better_cost=%d better_feas=%d", + offspring.get_hash(), + same_as_parents, + better_cost_than_parents, + better_feasibility_than_parents); if (better_cost_than_parents || better_feasibility_than_parents) { CUOPT_LOG_DEBUG("Offspring is feasible or better than both parents"); - return std::make_pair(offspring, true); + return std::make_tuple(offspring, true, work); } - return std::make_pair(offspring, !same_as_parents); + return std::make_tuple(offspring, !same_as_parents, work); } rmm::device_uvector vars_to_fix; diff --git a/cpp/src/mip_heuristics/diversity/recombiners/fp_recombiner.cuh b/cpp/src/mip_heuristics/diversity/recombiners/fp_recombiner.cuh index 1cca1ba371..786a3e8798 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/fp_recombiner.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/fp_recombiner.cuh @@ -35,9 +35,9 @@ class fp_recombiner_t : public recombiner_t { { } - std::pair, bool> recombine(solution_t& a, - solution_t& b, - const weight_t& weights) + std::tuple, bool, double> recombine(solution_t& a, + solution_t& b, + const weight_t& weights) { raft::common::nvtx::range fun_scope("FP recombiner"); auto& guiding_solution = a.get_feasible() ? a : b; @@ -50,6 +50,7 @@ class fp_recombiner_t : public recombiner_t { CUOPT_LOG_DEBUG("FP rec: Number of different variables %d MAX_VARS %d", n_different_vars, fp_recombiner_config_t::max_n_of_vars_from_other); + CUOPT_DETERMINISM_LOG("FP rec: offspring hash 0x%x", offspring.get_hash()); i_t n_vars_from_other = n_different_vars; if (n_vars_from_other > (i_t)fp_recombiner_config_t::max_n_of_vars_from_other) { n_vars_from_other = fp_recombiner_config_t::max_n_of_vars_from_other; @@ -62,17 +63,39 @@ class fp_recombiner_t : public recombiner_t { i_t n_vars_from_guiding = a.problem_ptr->n_integer_vars - n_vars_from_other; if (n_vars_from_other == 0 || n_vars_from_guiding == 0) { CUOPT_LOG_DEBUG("Returning false because all vars are common or different"); - return std::make_pair(offspring, false); + return std::make_tuple(offspring, false, 0.0); } + // TODO: CHANGE + double work = static_cast(n_vars_from_other); CUOPT_LOG_DEBUG( "n_vars_from_guiding %d n_vars_from_other %d", n_vars_from_guiding, n_vars_from_other); + CUOPT_DETERMINISM_LOG( + "FP rec: offspring hash 0x%x, vars to fix 0x%x", + offspring.get_hash(), + detail::compute_hash(make_span(vars_to_fix), offspring.handle_ptr->get_stream())); this->compute_vars_to_fix(offspring, vars_to_fix, n_vars_from_other, n_vars_from_guiding); + CUOPT_DETERMINISM_LOG( + "FP rec post computevarstofix: offspring hash 0x%x, vars to fix 0x%x", + offspring.get_hash(), + detail::compute_hash(make_span(vars_to_fix), offspring.handle_ptr->get_stream())); auto [fixed_problem, fixed_assignment, variable_map] = offspring.fix_variables(vars_to_fix); + CUOPT_DETERMINISM_LOG( + "FP rec: fixed_problem hash 0x%x assigned hash 0x%x", + fixed_problem.get_fingerprint(), + detail::compute_hash(make_span(fixed_assignment), offspring.handle_ptr->get_stream())); fixed_problem.check_problem_representation(true); if (!guiding_solution.get_feasible() && !other_solution.get_feasible()) { + CUOPT_DETERMINISM_LOG("FP rec: running LP with infeasibility detection"); relaxed_lp_settings_t lp_settings; lp_settings.time_limit = fp_recombiner_config_t::infeasibility_detection_time_limit; - lp_settings.tolerance = fixed_problem.tolerances.absolute_tolerance; + if (this->context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS) { + lp_settings.time_limit = + std::numeric_limits::max(); // TODO should be global time limit + lp_settings.work_limit = fp_recombiner_config_t::infeasibility_detection_time_limit; + lp_settings.work_context = &this->context.gpu_heur_loop; + cuopt_assert(lp_settings.work_context != nullptr, "Missing deterministic work context"); + } + lp_settings.tolerance = fixed_problem.tolerances.absolute_tolerance; lp_settings.return_first_feasible = true; lp_settings.save_state = true; lp_settings.check_infeasibility = true; @@ -83,7 +106,7 @@ class fp_recombiner_t : public recombiner_t { lp_response.get_termination_status() == pdlp_termination_status_t::DualInfeasible || lp_response.get_termination_status() == pdlp_termination_status_t::TimeLimit) { CUOPT_LOG_DEBUG("FP recombiner failed because LP found infeasible!"); - return std::make_pair(offspring, false); + return std::make_tuple(offspring, false, 0.0); } } // brute force rounding threshold is 8 @@ -96,7 +119,16 @@ class fp_recombiner_t : public recombiner_t { offspring.handle_ptr->sync_stream(); offspring.assignment = std::move(fixed_assignment); cuopt_func_call(offspring.test_variable_bounds(false)); - timer_t timer(fp_recombiner_config_t::fp_time_limit); + CUOPT_DETERMINISM_LOG( + "FP rec pre-descent: offspring_hash=0x%x fixed_assignment_hash=0x%x " + "problem_fingerprint=0x%x fixed_n_integer_vars=%d", + offspring.get_hash(), + detail::compute_hash(offspring.assignment, offspring.handle_ptr->get_stream()), + fixed_problem.get_fingerprint(), + fixed_problem.n_integer_vars); + work_limit_timer_t timer(this->context.gpu_heur_loop, + fp_recombiner_config_t::fp_time_limit, + *this->context.termination); fp.timer = timer; fp.cycle_queue.reset(offspring); fp.reset(); @@ -134,9 +166,9 @@ class fp_recombiner_t : public recombiner_t { !guiding_solution.get_feasible(); if (better_cost_than_parents || better_feasibility_than_parents) { CUOPT_LOG_DEBUG("Offspring is feasible or better than both parents"); - return std::make_pair(offspring, true); + return std::make_tuple(offspring, true, work); } - return std::make_pair(offspring, !same_as_parents); + return std::make_tuple(offspring, !same_as_parents, work); } rmm::device_uvector vars_to_fix; // keep a copy of FP to prevent interference with generation FP diff --git a/cpp/src/mip_heuristics/diversity/recombiners/line_segment_recombiner.cuh b/cpp/src/mip_heuristics/diversity/recombiners/line_segment_recombiner.cuh index d413af86cd..ab0ba3d21c 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/line_segment_recombiner.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/line_segment_recombiner.cuh @@ -66,22 +66,26 @@ class line_segment_recombiner_t : public recombiner_t { return delta_vector; } - std::pair, bool> recombine(solution_t& a, - solution_t& b, - const weight_t& weights) + std::tuple, bool, double> recombine(solution_t& a, + solution_t& b, + const weight_t& weights) { raft::common::nvtx::range fun_scope("line_segment_recombiner"); + CUOPT_DETERMINISM_LOG("LS rec: a %d b %d", a.get_hash(), b.get_hash()); auto& guiding_solution = a.get_feasible() ? a : b; auto& other_solution = a.get_feasible() ? b : a; // copy the solution from A solution_t offspring(guiding_solution); - timer_t line_segment_timer{ls_recombiner_config_t::time_limit}; + work_limit_timer_t line_segment_timer{ + this->context.gpu_heur_loop, ls_recombiner_config_t::time_limit, *this->context.termination}; // TODO after we have the conic combination, detect the lambda change // (i.e. the integral variables flip on line segment) i_t n_points_to_search = ls_recombiner_config_t::n_points_to_search; const bool is_feasibility_run = false; i_t n_different_vars = this->assign_same_integer_values(guiding_solution, other_solution, offspring); + // TODO: CHANGE + double work = static_cast(n_different_vars); rmm::device_uvector delta_vector = generate_delta_vector( guiding_solution, other_solution, offspring, n_points_to_search, n_different_vars); line_segment_search.fj.copy_weights(weights, offspring.handle_ptr); @@ -117,9 +121,9 @@ class line_segment_recombiner_t : public recombiner_t { } if (better_cost_than_parents || better_feasibility_than_parents) { CUOPT_LOG_DEBUG("Offspring is feasible or better than both parents"); - return std::make_pair(offspring, true); + return std::make_tuple(offspring, true, work); } - return std::make_pair(offspring, !same_as_parents); + return std::make_tuple(offspring, !same_as_parents, work); } line_segment_search_t& line_segment_search; diff --git a/cpp/src/mip_heuristics/diversity/recombiners/recombiner.cuh b/cpp/src/mip_heuristics/diversity/recombiners/recombiner.cuh index 89a5e86c17..e43a1d1efd 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/recombiner.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/recombiner.cuh @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -63,6 +64,18 @@ __global__ void assign_same_variables_kernel(typename solution_t::view template class recombiner_t { public: + static const char* recombiner_name(recombiner_enum_t recombiner) + { + switch (recombiner) { + case recombiner_enum_t::BOUND_PROP: return "BOUND_PROP"; + case recombiner_enum_t::FP: return "FP"; + case recombiner_enum_t::LINE_SEGMENT: return "LINE_SEGMENT"; + case recombiner_enum_t::SUB_MIP: return "SUB_MIP"; + case recombiner_enum_t::SIZE: return "SIZE"; + } + return "UNKNOWN"; + } + recombiner_t(mip_solver_context_t& context_, i_t n_integer_vars, const raft::handle_t* handle_ptr) @@ -92,6 +105,15 @@ class recombiner_t { cuopt::make_span(remaining_indices), n_remaining.data()); i_t remaining_variables = this->n_remaining.value(a.handle_ptr->get_stream()); + // Sort the indices to resolve nondeterministic order due to atomicAdd + thrust::sort(a.handle_ptr->get_thrust_policy(), + this->remaining_indices.data(), + this->remaining_indices.data() + remaining_variables); + + CUOPT_DETERMINISM_LOG( + "remaining indices hash 0x%x, size %d", + detail::compute_hash(make_span(this->remaining_indices), a.handle_ptr->get_stream()), + remaining_variables); auto vec_remaining_indices = host_copy(this->remaining_indices.data(), remaining_variables, a.handle_ptr->get_stream()); @@ -173,6 +195,12 @@ class recombiner_t { i_t n_vars_from_guiding) { vars_to_fix.resize(n_vars_from_guiding, offspring.handle_ptr->get_stream()); + CUOPT_DETERMINISM_LOG( + "remaining indices hash 0x%x", + detail::compute_hash(make_span(this->remaining_indices), offspring.handle_ptr->get_stream())); + CUOPT_DETERMINISM_LOG("integer_indices hash 0x%x", + detail::compute_hash(make_span(offspring.problem_ptr->integer_indices), + offspring.handle_ptr->get_stream())); // set difference needs two sorted arrays thrust::sort(offspring.handle_ptr->get_thrust_policy(), this->remaining_indices.data(), @@ -195,23 +223,53 @@ class recombiner_t { "vars_to_fix should be sorted!"); } - static void init_enabled_recombiners(const problem_t& problem) + static void init_enabled_recombiners(mip_solver_context_t& context, + const problem_t& problem) { std::unordered_set enabled_recombiners; + const bool disable_fp_and_submip_for_expensive_fix = problem.expensive_to_fix_vars; + const i_t n_continuous_vars = problem.n_variables - problem.n_integer_vars; + const bool disable_submip_for_continuous_limit = + n_continuous_vars > (i_t)sub_mip_recombiner_config_t::max_continuous_vars; + const bool disable_submip_for_determinism = + (context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS) != 0; for (auto recombiner : recombiner_types) { enabled_recombiners.insert(recombiner); } - if (problem.expensive_to_fix_vars) { + if (disable_fp_and_submip_for_expensive_fix) { enabled_recombiners.erase(recombiner_enum_t::FP); enabled_recombiners.erase(recombiner_enum_t::SUB_MIP); } // check the size of the continous vars - if (problem.n_variables - problem.n_integer_vars > - (i_t)sub_mip_recombiner_config_t::max_continuous_vars) { + if (disable_submip_for_continuous_limit) { + enabled_recombiners.erase(recombiner_enum_t::SUB_MIP); + } + // submip not supported in deterministic mode yet + if (disable_submip_for_determinism) { + // temp, added for debugging enabled_recombiners.erase(recombiner_enum_t::SUB_MIP); } recombiner_t::enabled_recombiners = std::vector(enabled_recombiners.begin(), enabled_recombiners.end()); + cuopt_assert(!recombiner_t::enabled_recombiners.empty(), "No recombiners enabled after init"); + std::string order_str; + for (size_t i = 0; i < recombiner_t::enabled_recombiners.size(); ++i) { + if (i > 0) { order_str += ','; } + order_str += recombiner_name(recombiner_t::enabled_recombiners[i]); + } + CUOPT_DETERMINISM_LOG( + "Deterministic recombiner init: expensive_to_fix=%d n_continuous=%d " + "max_continuous=%zu disable_fp_submip_expensive=%d " + "disable_submip_continuous=%d disable_submip_deterministic=%d size=%zu " + "order=[%s]", + (int)problem.expensive_to_fix_vars, + (int)n_continuous_vars, + sub_mip_recombiner_config_t::max_continuous_vars, + (int)disable_fp_and_submip_for_expensive_fix, + (int)disable_submip_for_continuous_limit, + (int)disable_submip_for_determinism, + recombiner_t::enabled_recombiners.size(), + order_str.c_str()); } mip_solver_context_t& context; diff --git a/cpp/src/mip_heuristics/diversity/recombiners/recombiner_stats.hpp b/cpp/src/mip_heuristics/diversity/recombiners/recombiner_stats.hpp index 044e313284..9b0c4c8d69 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/recombiner_stats.hpp +++ b/cpp/src/mip_heuristics/diversity/recombiners/recombiner_stats.hpp @@ -77,6 +77,11 @@ struct all_recombine_stats { std::optional last_attempt; double last_recombiner_time; std::chrono::high_resolution_clock::time_point last_recombiner_start_time; + double last_recombiner_work; + + void set_recombiner_work(double work) { last_recombiner_work = work; } + + double get_last_recombiner_work() { return last_recombiner_work; } void start_recombiner_time() { diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh index b2f7f80066..1b4e3f562b 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh @@ -37,9 +37,9 @@ class sub_mip_recombiner_t : public recombiner_t { solution_vector.push_back(solution); } - std::pair, bool> recombine(solution_t& a, - solution_t& b, - const weight_t& weights) + std::tuple, bool, double> recombine(solution_t& a, + solution_t& b, + const weight_t& weights) { raft::common::nvtx::range fun_scope("Sub-MIP recombiner"); solution_vector.clear(); @@ -65,8 +65,10 @@ class sub_mip_recombiner_t : public recombiner_t { i_t n_vars_from_guiding = a.problem_ptr->n_integer_vars - n_vars_from_other; if (n_vars_from_other == 0 || n_vars_from_guiding == 0) { CUOPT_LOG_DEBUG("Returning false because all vars are common or different"); - return std::make_pair(offspring, false); + return std::make_tuple(offspring, false, 0.0); } + // TODO: CHANGE + double work = static_cast(n_vars_from_other); CUOPT_LOG_DEBUG( "n_vars_from_guiding %d n_vars_from_other %d", n_vars_from_guiding, n_vars_from_other); this->compute_vars_to_fix(offspring, vars_to_fix, n_vars_from_other, n_vars_from_guiding); @@ -108,11 +110,13 @@ class sub_mip_recombiner_t : public recombiner_t { branch_and_bound_settings.num_threads = 1; branch_and_bound_settings.reliability_branching = 0; branch_and_bound_settings.max_cut_passes = 0; + branch_and_bound_settings.clique_cuts = 0; branch_and_bound_settings.sub_mip = 1; - branch_and_bound_settings.solution_callback = [this](std::vector& solution, - f_t objective) { - this->solution_callback(solution, objective); - }; + branch_and_bound_settings.new_incumbent_callback = + [this](std::vector& solution, + f_t objective, + const cuopt::internals::mip_solution_callback_info_t&, + double) { this->solution_callback(solution, objective); }; // disable B&B logs, so that it is not interfering with the main B&B thread branch_and_bound_settings.log.log = false; @@ -180,7 +184,7 @@ class sub_mip_recombiner_t : public recombiner_t { sol.clamp_within_bounds(); // Scaling might bring some very slight variable bound violations sol.compute_feasibility(); cuopt_func_call(sol.test_variable_bounds()); - population.add_solution(std::move(sol)); + population.add_solution(std::move(sol), internals::mip_solution_origin_t::SUB_MIP); } bool better_cost_than_parents = offspring.get_quality(weights) < @@ -190,9 +194,9 @@ class sub_mip_recombiner_t : public recombiner_t { !guiding_solution.get_feasible(); if (better_cost_than_parents || better_feasibility_than_parents) { CUOPT_LOG_DEBUG("Offspring is feasible or better than both parents"); - return std::make_pair(offspring, true); + return std::make_tuple(offspring, true, work); } - return std::make_pair(offspring, !std::isnan(branch_and_bound_solution.objective)); + return std::make_tuple(offspring, !std::isnan(branch_and_bound_solution.objective), work); } rmm::device_uvector vars_to_fix; mip_solver_context_t& context; diff --git a/cpp/src/mip_heuristics/diversity/weights.cuh b/cpp/src/mip_heuristics/diversity/weights.cuh index 7502ae9210..fbe72aba8e 100644 --- a/cpp/src/mip_heuristics/diversity/weights.cuh +++ b/cpp/src/mip_heuristics/diversity/weights.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -12,6 +12,8 @@ #include #include +#include + namespace cuopt::linear_programming::detail { template @@ -25,6 +27,11 @@ struct weight_t { objective_weight.set_value_async(one, handle_ptr->get_stream()); } + uint32_t get_hash(rmm::cuda_stream_view stream = rmm::cuda_stream_default) const + { + return compute_hash(cstr_weights, stream) ^ compute_hash(objective_weight.value(stream)); + } + rmm::device_uvector cstr_weights; rmm::device_scalar objective_weight; }; diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu index e9cf0760de..33ff207260 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cu @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -30,6 +31,9 @@ #include +#include +#include + #define FJ_LOG_PREFIX "FJ: " namespace cuopt::linear_programming::detail { @@ -432,9 +436,11 @@ void fj_t::climber_init(i_t climber_idx, const rmm::cuda_stream_view& f_t inf = std::numeric_limits::infinity(); climber->best_objective.set_value_async(inf, climber_stream); climber->saved_solution_objective.set_value_async(inf, climber_stream); - climber->violation_score.set_value_to_zero_async(climber_stream); - climber->weighted_violation_score.set_value_to_zero_async(climber_stream); - init_lhs_and_violation<<<256, 256, 0, climber_stream.value()>>>(view); + refresh_lhs_and_violation(climber_stream); + + // printf("init: Violated constraints hash: %x\n", compute_hash( + // make_span(climber->violated_constraints.contents, 0, + // climber->violated_constraints.set_size.value(climber_stream)), climber_stream)); // initialize the best_objective values according to the initial assignment f_t best_obj = compute_objective_from_vec( @@ -650,10 +656,10 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream auto [grid_setval, blocks_setval] = setval_launch_dims; auto [grid_update_changed_constraints, blocks_update_changed_constraints] = update_changed_constraints_launch_dims; - auto [grid_resetmoves, blocks_resetmoves] = resetmoves_launch_dims; - auto [grid_resetmoves_bin, blocks_resetmoves_bin] = resetmoves_bin_launch_dims; - auto [grid_update_weights, blocks_update_weights] = update_weights_launch_dims; - auto [grid_lift_move, blocks_lift_move] = lift_move_launch_dims; + auto [grid_resetmoves, blocks_resetmoves] = resetmoves_launch_dims; + auto [grid_resetmoves_bin, blocks_resetmoves_bin] = resetmoves_bin_launch_dims; + [[maybe_unused]] auto [grid_update_weights, blocks_update_weights] = update_weights_launch_dims; + [[maybe_unused]] auto [grid_lift_move, blocks_lift_move] = lift_move_launch_dims; auto& data = *climbers[climber_idx]; auto v = data.view(); @@ -841,16 +847,228 @@ void fj_t::refresh_lhs_and_violation(const rmm::cuda_stream_view& stre auto v = data.view(); data.violated_constraints.clear(stream); - data.violation_score.set_value_to_zero_async(stream); - data.weighted_violation_score.set_value_to_zero_async(stream); init_lhs_and_violation<<<4096, 256, 0, stream>>>(v); + // both transformreduce could be fused; but oh well hardly a bottleneck + auto violation = + thrust::transform_reduce(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(pb_ptr->n_constraints), + cuda::proclaim_return_type([v] __device__(i_t cstr_idx) { + return v.excess_score(cstr_idx, v.incumbent_lhs[cstr_idx]); + }), + (f_t)0, + thrust::plus()); + auto weighted_violation = thrust::transform_reduce( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(pb_ptr->n_constraints), + cuda::proclaim_return_type([v] __device__(i_t cstr_idx) { + return v.excess_score(cstr_idx, v.incumbent_lhs[cstr_idx]) * v.cstr_weights[cstr_idx]; + }), + (f_t)0, + thrust::plus()); + data.violation_score.set_value_async(violation, stream); + data.weighted_violation_score.set_value_async(weighted_violation, stream); + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS)) { + data.violated_constraints.sort(stream); + } +#if FJ_SINGLE_STEP + CUOPT_LOG_DEBUG("hash assignment %x, hash lhs %x, hash lhscomp %x", + detail::compute_hash(data.incumbent_assignment, stream), + detail::compute_hash(data.incumbent_lhs, stream), + detail::compute_hash(data.incumbent_lhs_sumcomp, stream)); + CUOPT_LOG_DEBUG("Violated constraints hash post sort: %x, index map %x", + detail::compute_hash(data.violated_constraints.contents, stream), + detail::compute_hash(data.violated_constraints.index_map, stream)); +#endif +} + +template +std::map fj_t::get_feature_vector(i_t climber_idx) const +{ + auto& data = *climbers[climber_idx]; + auto climber_stream = data.stream.view(); + if (climber_idx == 0) climber_stream = handle_ptr->get_stream(); + + std::map features; + + // Basic problem dimensions + features["n_variables"] = (float)pb_ptr->n_variables; + features["n_constraints"] = (float)pb_ptr->n_constraints; + features["nnz"] = (float)pb_ptr->coefficients.size(); + + // Matrix sparsity metrics (already computed) + features["sparsity"] = (float)pb_ptr->sparsity; + features["nnz_stddev"] = (float)pb_ptr->nnz_stddev; + features["unbalancedness"] = (float)pb_ptr->unbalancedness; + + // Algorithm settings + features["time"] = (float)settings.work_limit; + features["n_of_minimums_for_exit"] = (float)settings.n_of_minimums_for_exit; + features["feasibility_run"] = (float)settings.feasibility_run; + + // Variable type metrics + features["n_integer_vars"] = (float)pb_ptr->n_integer_vars; + features["n_binary_vars"] = (float)pb_ptr->n_binary_vars; + features["integer_ratio"] = + pb_ptr->n_variables > 0 ? (float)pb_ptr->n_integer_vars / pb_ptr->n_variables : 0.0f; + features["binary_ratio"] = + pb_ptr->n_variables > 0 ? (float)pb_ptr->n_binary_vars / pb_ptr->n_variables : 0.0f; + + // Initial violation metrics (from current state) + features["initial_violation_count"] = + (float)data.violated_constraints.set_size.value(climber_stream); + // features["initial_violation_score"] = (float)data.violation_score.value(climber_stream); + // features["initial_weighted_violation"] = + // (float)data.weighted_violation_score.value(climber_stream); + + // Load balancing decision + bool use_load_balancing = false; + if (settings.load_balancing_mode == fj_load_balancing_mode_t::ALWAYS_OFF) { + use_load_balancing = false; + } else if (settings.load_balancing_mode == fj_load_balancing_mode_t::ALWAYS_ON) { + use_load_balancing = true; + } else if (settings.load_balancing_mode == fj_load_balancing_mode_t::AUTO) { + use_load_balancing = + pb_ptr->n_variables > settings.parameters.load_balancing_codepath_min_varcount; + } + if (settings.mode == fj_mode_t::ROUNDING) { use_load_balancing = false; } + features["uses_load_balancing"] = (float)use_load_balancing; + + // Related variables metrics (if available) + if (pb_ptr->related_variables_offsets.size() > 0) { + auto h_offsets = cuopt::host_copy(pb_ptr->related_variables_offsets, handle_ptr->get_stream()); + i_t total_related = 0; + i_t max_related = 0; + for (i_t i = 0; i < pb_ptr->n_variables; ++i) { + i_t count = h_offsets[i + 1] - h_offsets[i]; + total_related += count; + max_related = std::max(max_related, count); + } + features["avg_related_vars_per_var"] = + pb_ptr->n_variables > 0 ? (float)total_related / pb_ptr->n_variables : 0.0f; + // features["max_related_vars"] = (float)max_related; + } else { + features["avg_related_vars_per_var"] = 0.0f; + // features["max_related_vars"] = 0.0f; + } + + // Constraint characteristics + auto h_lower = cuopt::host_copy(pb_ptr->constraint_lower_bounds, handle_ptr->get_stream()); + auto h_upper = cuopt::host_copy(pb_ptr->constraint_upper_bounds, handle_ptr->get_stream()); + i_t n_equality = 0; + i_t n_tight = 0; + f_t total_range = 0.0; + i_t n_range_constraints = 0; + + for (i_t i = 0; i < pb_ptr->n_constraints; ++i) { + if (pb_ptr->integer_equal(h_lower[i], h_upper[i])) { + n_equality++; + } else { + f_t range = h_upper[i] - h_lower[i]; + if (std::isfinite(range)) { + total_range += range; + n_range_constraints++; + if (range < 1.0) n_tight++; + } + } + } + features["equality_ratio"] = + pb_ptr->n_constraints > 0 ? (float)n_equality / pb_ptr->n_constraints : 0.0f; + features["avg_constraint_range"] = + n_range_constraints > 0 ? (float)(total_range / n_range_constraints) : 0.0f; + features["tight_constraint_ratio"] = + pb_ptr->n_constraints > 0 ? (float)n_tight / pb_ptr->n_constraints : 0.0f; + + // Variable bound characteristics + auto h_var_bounds = cuopt::host_copy(pb_ptr->variable_bounds, handle_ptr->get_stream()); + i_t n_unbounded = 0; + i_t n_fixed = 0; + f_t total_var_range = 0.0; + i_t n_bounded_vars = 0; + + for (i_t i = 0; i < pb_ptr->n_variables; ++i) { + f_t lower = get_lower(h_var_bounds[i]); + f_t upper = get_upper(h_var_bounds[i]); + + if (!std::isfinite(lower) || !std::isfinite(upper)) { + n_unbounded++; + } else if (pb_ptr->integer_equal(lower, upper)) { + n_fixed++; + } else { + f_t range = upper - lower; + total_var_range += range; + n_bounded_vars++; + } + } + features["unbounded_var_ratio"] = + pb_ptr->n_variables > 0 ? (float)n_unbounded / pb_ptr->n_variables : 0.0f; + features["fixed_var_ratio"] = + pb_ptr->n_variables > 0 ? (float)n_fixed / pb_ptr->n_variables : 0.0f; + features["avg_variable_range"] = + n_bounded_vars > 0 ? (float)(total_var_range / n_bounded_vars) : 0.0f; + + // Objective characteristics + auto h_obj_coeffs = cuopt::host_copy(pb_ptr->objective_coefficients, handle_ptr->get_stream()); + i_t n_obj_vars = 0; + f_t total_obj_magnitude = 0.0; + for (i_t i = 0; i < pb_ptr->n_variables; ++i) { + if (h_obj_coeffs[i] != 0.0) { + n_obj_vars++; + total_obj_magnitude += std::abs(h_obj_coeffs[i]); + } + } + features["obj_var_ratio"] = + pb_ptr->n_variables > 0 ? (float)n_obj_vars / pb_ptr->n_variables : 0.0f; + features["avg_obj_coeff_magnitude"] = + n_obj_vars > 0 ? (float)(total_obj_magnitude / n_obj_vars) : 0.0f; + + // Matrix density patterns + auto h_offsets = cuopt::host_copy(pb_ptr->offsets, handle_ptr->get_stream()); + i_t max_nnz_per_row = 0; + i_t min_nnz_per_row = pb_ptr->n_variables; + f_t sum_sq_dev = 0.0; + f_t mean_nnz = + pb_ptr->n_constraints > 0 ? (f_t)pb_ptr->coefficients.size() / pb_ptr->n_constraints : 0.0f; + + for (i_t i = 0; i < pb_ptr->n_constraints; ++i) { + i_t nnz_row = h_offsets[i + 1] - h_offsets[i]; + max_nnz_per_row = std::max(max_nnz_per_row, nnz_row); + min_nnz_per_row = std::min(min_nnz_per_row, nnz_row); + f_t dev = nnz_row - mean_nnz; + sum_sq_dev += dev * dev; + } + features["max_nnz_per_row"] = (float)max_nnz_per_row; + features["min_nnz_per_row"] = (float)min_nnz_per_row; + features["nnz_variance"] = + pb_ptr->n_constraints > 0 ? (float)(sum_sq_dev / pb_ptr->n_constraints) : 0.0f; + + // Average variable degree (avg constraints per variable) + features["avg_var_degree"] = + pb_ptr->n_variables > 0 ? (float)pb_ptr->coefficients.size() / pb_ptr->n_variables : 0.0f; + + // Derived complexity metrics + features["problem_size_score"] = + (float)(pb_ptr->n_variables * pb_ptr->n_constraints) * (float)pb_ptr->sparsity; + features["structural_complexity"] = + (features["integer_ratio"] + 1.0f) * (float)pb_ptr->unbalancedness; + features["constraint_var_ratio"] = + pb_ptr->n_variables > 0 ? (float)pb_ptr->n_constraints / pb_ptr->n_variables : 0.0f; + + return features; } template i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) { - auto& data = *climbers[climber_idx]; - auto v = data.view(); // == climber_views[climber_idx] + auto& data = *climbers[climber_idx]; + auto v = data.view(); // == climber_views[climber_idx] + const double work_units_at_start = context.gpu_heur_loop.current_work(); + const bool publish_progress = (context.settings.determinism_mode & CUOPT_DETERMINISM_BB) && + context.branch_and_bound_ptr != nullptr && + std::isfinite(settings.work_limit) && settings.work_limit > 0.0 && + settings.iteration_limit > 0 && + settings.iteration_limit != std::numeric_limits::max(); auto climber_stream = data.stream.view(); if (climber_idx == 0) climber_stream = handle_ptr->get_stream(); @@ -865,7 +1083,7 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) data.incumbent_quality.set_value_async(obj, handle_ptr->get_stream()); - timer_t timer(settings.time_limit); + work_limit_timer_t timer(context.gpu_heur_loop, settings.time_limit, *context.termination); i_t steps; bool limit_reached = false; for (steps = 0; steps < std::numeric_limits::max(); steps += iterations_per_graph) { @@ -879,9 +1097,10 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) // every now and then, ensure external solutions are added to the population // this is done here because FJ is called within FP and also after recombiners // so FJ is one of the most inner and most frequent functions to be called - if (steps % 10000 == 0) { - context.diversity_manager_ptr->get_population_pointer() - ->add_external_solutions_to_population(); + if (steps % 10000 == 0 && context.diversity_manager_ptr != nullptr) { + auto* population_ptr = context.diversity_manager_ptr->get_population_pointer(); + cuopt_assert(population_ptr != nullptr, ""); + population_ptr->add_external_solutions_to_population(); } #if !FJ_SINGLE_STEP @@ -891,7 +1110,7 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) CUOPT_LOG_TRACE( "FJ " "step %d viol %.2g [%d], obj %.8g, best %.8g, mins %d, maxw %g, " - "objw %g", + "objw %g, sol %x, delta %x, inc %x, lhs %x, lhscomp %x, viol %x, weights %x", steps, data.violation_score.value(climber_stream), data.violated_constraints.set_size.value(climber_stream), @@ -899,7 +1118,14 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) data.best_objective.value(climber_stream), data.local_minimums_reached.value(climber_stream), max_cstr_weight.value(climber_stream), - objective_weight.value(climber_stream)); + objective_weight.value(climber_stream), + solution.get_hash(), + detail::compute_hash(data.jump_move_delta, climber_stream), + detail::compute_hash(data.incumbent_assignment, climber_stream), + detail::compute_hash(data.incumbent_lhs, climber_stream), + detail::compute_hash(data.incumbent_lhs_sumcomp, climber_stream), + detail::compute_hash(data.violated_constraints.contents, climber_stream), + detail::compute_hash(cstr_left_weights, climber_stream)); } if (!limit_reached) { run_step_device(climber_stream, climber_idx); } @@ -919,6 +1145,12 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) } i_t iterations = data.iterations.value(climber_stream); + if (publish_progress) { + const double progress_ratio = + std::min(1.0, (double)iterations / (double)settings.iteration_limit); + const double published_work = work_units_at_start + settings.work_limit * progress_ratio; + context.gpu_heur_loop.set_current_work(published_work); + } // make sure we have the current incumbent saved (e.g. in the case of a timeout) update_best_solution_kernel<<<1, blocks_resetmoves, 0, climber_stream>>>(v); // check feasibility with the relative tolerance rather than the violation score @@ -969,6 +1201,9 @@ i_t fj_t::host_loop(solution_t& solution, i_t climber_idx) solution.get_feasible(), data.local_minimums_reached.value(climber_stream)); + // compute total time spent + double elapsed_time = timer.elapsed_time(); + CUOPT_LOG_TRACE("best fractional count %d", data.saved_best_fractional_count.value(climber_stream)); @@ -1058,16 +1293,47 @@ template i_t fj_t::solve(solution_t& solution) { raft::common::nvtx::range scope("fj_solve"); - timer_t timer(settings.time_limit); + bool deterministic = (context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS); + if (deterministic) { + settings.time_limit = std::max((f_t)0.0, settings.time_limit); + settings.work_limit = settings.time_limit; + } handle_ptr = const_cast(solution.handle_ptr); pb_ptr = solution.problem_ptr; if (settings.mode != fj_mode_t::ROUNDING) { cuopt_func_call(solution.test_variable_bounds(true)); cuopt_assert(solution.test_number_all_integer(), "All integers must be rounded"); } + if (deterministic && settings.work_limit == 0.0) { + CUOPT_LOG_DEBUG("FJ: skipping solve due to exhausted deterministic work budget"); + return solution.compute_feasibility(); + } + work_limit_timer_t timer(context.gpu_heur_loop, settings.time_limit, *context.termination); pb_ptr->check_problem_representation(true); resize_vectors(solution.handle_ptr); + CUOPT_LOG_DEBUG("FJ: work_limit %f time_limit %f sol hash %x pb hash %x", + settings.work_limit, + settings.time_limit, + solution.get_hash(), + pb_ptr->get_fingerprint()); + CUOPT_LOG_DEBUG("FJ: weights hash %x, left weights hash %x, right weights hash %x", + detail::compute_hash(cstr_weights, handle_ptr->get_stream()), + detail::compute_hash(cstr_left_weights, handle_ptr->get_stream()), + detail::compute_hash(cstr_right_weights, handle_ptr->get_stream())); + + // if work_limit is set: compute an estimate of the number of iterations required + if (deterministic && settings.work_limit != std::numeric_limits::infinity()) { + std::map features_map = get_feature_vector(0); + float iter_prediction = std::max( + (f_t)0.0, (f_t)ceil(context.work_unit_predictors.fj_predictor.predict_scalar(features_map))); + CUOPT_LOG_DEBUG("FJ determ: Estimated number of iterations for %f WU: %f", + settings.work_limit, + iter_prediction); + if (settings.work_limit == 0) iter_prediction = 0; + settings.iteration_limit = std::min(settings.iteration_limit, (i_t)iter_prediction); + } + bool is_initial_feasible = solution.compute_feasibility(); auto initial_solution = solution; // if we're in rounding mode, split the time/iteration limit between the first and second stage @@ -1102,6 +1368,9 @@ i_t fj_t::solve(solution_t& solution) RAFT_CHECK_CUDA(handle_ptr->get_stream()); handle_ptr->sync_stream(); + // Compute and store feature vector for later logging + if (deterministic) { feature_vector = get_feature_vector(0); } + i_t iterations = host_loop(solution); RAFT_CHECK_CUDA(handle_ptr->get_stream()); handle_ptr->sync_stream(); @@ -1149,6 +1418,53 @@ i_t fj_t::solve(solution_t& solution) cuopt_assert(solution.compute_feasibility(), "Reverted solution should be feasible"); } + cuopt_func_call(solution.test_variable_bounds()); + + if (deterministic) { + double work_to_record = settings.work_limit; + + if (iterations < settings.iteration_limit) { + CUOPT_LOG_DEBUG( + "FJ early exit at %d iterations (limit: %d)", iterations, settings.iteration_limit); + // Compute the work unit corresponding to the number of iterations elapsed + // by incrementally guessing work units until the model predicts >= actual iterations + // TODO: awfully ugly, change + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS) && + iterations > 0) { + double guessed_work = 0.0; + const double work_increment = 0.1; + const double max_work = settings.work_limit * 2.0; // Safety limit + float predicted_iters = 0.0f; + + // Make a copy of the feature vector and modify the time/work_limit field + std::map features_for_prediction = feature_vector; + + while (guessed_work <= max_work) { + features_for_prediction["time"] = (float)guessed_work; + predicted_iters = std::max( + 0.0f, + (float)ceil( + context.work_unit_predictors.fj_predictor.predict_scalar(features_for_prediction))); + + if (predicted_iters >= (float)iterations) { + work_to_record = guessed_work; + break; + } + + guessed_work += work_increment; + } + } + } + + CUOPT_LOG_DEBUG("FJ: recording work %fwu for %d iterations", work_to_record, iterations); + const double already_published_work = + std::max(0.0, context.gpu_heur_loop.current_work() - timer.work_units_at_start); + const double remaining_work_to_record = std::max(0.0, work_to_record - already_published_work); + timer.record_work(remaining_work_to_record); + } + + CUOPT_LOG_DEBUG("FJ sol hash %x", solution.get_hash()); + return is_new_feasible; } diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh index e9040a7596..b9495cd282 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump.cuh @@ -19,6 +19,9 @@ #include +#include +#include + #define FJ_DEBUG_LOAD_BALANCING 0 #define FJ_SINGLE_STEP 0 @@ -99,6 +102,7 @@ struct fj_settings_t { fj_mode_t mode{fj_mode_t::FIRST_FEASIBLE}; fj_candidate_selection_t candidate_selection{fj_candidate_selection_t::WEIGHTED_SCORE}; double time_limit{60.0}; + double work_limit{std::numeric_limits::infinity()}; int iteration_limit{std::numeric_limits::max()}; fj_hyper_parameters_t parameters{}; int n_of_minimums_for_exit = 7000; @@ -129,8 +133,15 @@ struct fj_move_t { // as we dont need them to be floating point per the FJ2 scoring scheme // sizeof(fj_staged_score_t) <= 8 is needed to allow for atomic loads struct fj_staged_score_t { - float base{-std::numeric_limits::infinity()}; - float bonus{-std::numeric_limits::infinity()}; + int32_t base{std::numeric_limits::lowest()}; + int32_t bonus{std::numeric_limits::lowest()}; + + fj_staged_score_t() = default; + HDI fj_staged_score_t(int32_t base_, int32_t bonus_) : base(base_), bonus(bonus_) {} + fj_staged_score_t(const fj_staged_score_t&) = default; + fj_staged_score_t(fj_staged_score_t&&) = default; + fj_staged_score_t& operator=(const fj_staged_score_t&) = default; + fj_staged_score_t& operator=(fj_staged_score_t&&) = default; HDI bool operator<(fj_staged_score_t other) const noexcept { @@ -148,7 +159,7 @@ struct fj_staged_score_t { HDI static fj_staged_score_t invalid() { - return {-std::numeric_limits::infinity(), -std::numeric_limits::infinity()}; + return {std::numeric_limits::lowest(), std::numeric_limits::lowest()}; } HDI static fj_staged_score_t zero() { return {0, 0}; } @@ -628,6 +639,10 @@ class fj_t { std::vector> climbers; rmm::device_uvector climber_views; fj_settings_t settings; + std::map feature_vector; + + private: + std::map get_feature_vector(i_t climber_idx = 0) const; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump_impl_common.cuh b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump_impl_common.cuh index e57f0ec9e2..44d3fe55d0 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump_impl_common.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump_impl_common.cuh @@ -103,7 +103,9 @@ HDI std::pair feas_score_constraint( f_t cstr_coeff, f_t c_lb, f_t c_ub, - f_t current_lhs) + f_t current_lhs, + f_t cstr_left_weight = std::numeric_limits::quiet_NaN(), + f_t cstr_right_weight = std::numeric_limits::quiet_NaN()) { cuopt_assert(isfinite(delta), "invalid delta"); cuopt_assert(cstr_coeff != 0 && isfinite(cstr_coeff), "invalid coefficient"); @@ -123,14 +125,13 @@ HDI std::pair feas_score_constraint( // TODO: broadcast left/right weights to a csr_offset-indexed table? local minimums // usually occur on a rarer basis (around 50 iteratiosn to 1 local minimum) // likely unreasonable and overkill however - f_t cstr_weight = - bound_idx == 0 ? fj.cstr_left_weights[cstr_idx] : fj.cstr_right_weights[cstr_idx]; - f_t sign = bound_idx == 0 ? -1 : 1; - f_t rhs = bounds[bound_idx] * sign; - f_t old_lhs = current_lhs * sign; - f_t new_lhs = (current_lhs + cstr_coeff * delta) * sign; - f_t old_slack = rhs - old_lhs; - f_t new_slack = rhs - new_lhs; + f_t cstr_weight = bound_idx == 0 ? cstr_left_weight : cstr_right_weight; + f_t sign = bound_idx == 0 ? -1 : 1; + f_t rhs = bounds[bound_idx] * sign; + f_t old_lhs = current_lhs * sign; + f_t new_lhs = (current_lhs + cstr_coeff * delta) * sign; + f_t old_slack = rhs - old_lhs; + f_t new_slack = rhs - new_lhs; cuopt_assert(isfinite(cstr_weight), "invalid weight"); cuopt_assert(cstr_weight >= 0, "invalid weight"); diff --git a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump_kernels.cu b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump_kernels.cu index ebbb761277..335646d1bc 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump_kernels.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/feasibility_jump_kernels.cu @@ -14,6 +14,9 @@ #include +#include +#include + #include #include "feasibility_jump_impl_common.cuh" @@ -25,6 +28,22 @@ namespace cg = cooperative_groups; namespace cuopt::linear_programming::detail { +template +struct score_with_tiebreaker_comparator { + DI auto operator()(const thrust::pair& a, + const thrust::pair& b) const + { + auto a_score = a.first; + auto a_idx = a.second; + auto b_score = b.first; + auto b_idx = b.second; + + if (a_score > b_score) return a; + if (a_score == b_score && a_idx > b_idx) return a; + return b; + } +}; + template DI thrust::pair move_objective_score( const typename fj_t::climber_data_t::view_t& fj, i_t var_idx, f_t delta) @@ -152,10 +171,7 @@ __global__ void init_lhs_and_violation(typename fj_t::climber_data_t:: fj_kahan_babushka_neumaier_sum(delta_it + offset_begin, delta_it + offset_end); fj.incumbent_lhs_sumcomp[cstr_idx] = 0; - f_t th_violation = fj.excess_score(cstr_idx, fj.incumbent_lhs[cstr_idx]); - f_t weighted_violation = th_violation * fj.cstr_weights[cstr_idx]; - atomicAdd(fj.violation_score, th_violation); - atomicAdd(fj.weighted_violation_score, weighted_violation); + f_t th_violation = fj.excess_score(cstr_idx, fj.incumbent_lhs[cstr_idx]); f_t cstr_tolerance = fj.get_corrected_tolerance(cstr_idx); if (th_violation < -cstr_tolerance) { fj.violated_constraints.insert(cstr_idx); } } @@ -191,8 +207,17 @@ DI typename fj_t::move_score_info_t compute_new_score( f_t c_lb = fj.pb.constraint_lower_bounds[cstr_idx]; f_t c_ub = fj.pb.constraint_upper_bounds[cstr_idx]; - auto [cstr_base_feas, cstr_bonus_robust] = feas_score_constraint( - fj, var_idx, delta, cstr_idx, cstr_coeff, c_lb, c_ub, fj.incumbent_lhs[cstr_idx]); + auto [cstr_base_feas, cstr_bonus_robust] = + feas_score_constraint(fj, + var_idx, + delta, + cstr_idx, + cstr_coeff, + c_lb, + c_ub, + fj.incumbent_lhs[cstr_idx], + fj.cstr_left_weights[cstr_idx], + fj.cstr_right_weights[cstr_idx]); base_feas += cstr_base_feas; bonus_robust += cstr_bonus_robust; @@ -349,7 +374,7 @@ DI std::pair::move_score_info_t> compute_best_mtm( return std::make_pair(best_val, best_score_info); } -template +template DI void update_jump_value(typename fj_t::climber_data_t::view_t fj, i_t var_idx) { cuopt_assert(var_idx >= 0 && var_idx < fj.pb.n_variables, "invalid variable index"); @@ -376,12 +401,11 @@ DI void update_jump_value(typename fj_t::climber_data_t::view_t fj, i_ fj.pb.check_variable_within_bounds(var_idx, fj.incumbent_assignment[var_idx] + delta), "Var not within bounds!"); } - best_score_info = compute_new_score(fj, var_idx, delta); + best_score_info = compute_new_score(fj, var_idx, delta); } else { - auto [best_val, score_info] = - compute_best_mtm(fj, var_idx); - delta = best_val - fj.incumbent_assignment[var_idx]; - best_score_info = score_info; + auto [best_val, score_info] = compute_best_mtm(fj, var_idx); + delta = best_val - fj.incumbent_assignment[var_idx]; + best_score_info = score_info; } } else { delta = round(1.0 - 2 * fj.incumbent_assignment[var_idx]); @@ -577,14 +601,16 @@ __global__ void update_assignment_kernel(typename fj_t::climber_data_t __syncthreads(); - cuopt_assert(isfinite(fj.jump_move_delta[var_idx]), "delta should be finite"); - // Kahan compensated summation - // fj.incumbent_lhs[cstr_idx] = old_lhs + cstr_coeff * fj.jump_move_delta[var_idx]; - f_t y = cstr_coeff * fj.jump_move_delta[var_idx] - fj.incumbent_lhs_sumcomp[cstr_idx]; - f_t t = old_lhs + y; - fj.incumbent_lhs_sumcomp[cstr_idx] = (t - old_lhs) - y; - fj.incumbent_lhs[cstr_idx] = t; - cuopt_assert(isfinite(fj.incumbent_lhs[cstr_idx]), "assignment should be finite"); + if (threadIdx.x == 0) { + cuopt_assert(isfinite(fj.jump_move_delta[var_idx]), "delta should be finite"); + // Kahan compensated summation + // fj.incumbent_lhs[cstr_idx] = old_lhs + cstr_coeff * fj.jump_move_delta[var_idx]; + f_t y = cstr_coeff * fj.jump_move_delta[var_idx] - fj.incumbent_lhs_sumcomp[cstr_idx]; + f_t t = old_lhs + y; + fj.incumbent_lhs_sumcomp[cstr_idx] = (t - old_lhs) - y; + fj.incumbent_lhs[cstr_idx] = t; + cuopt_assert(isfinite(fj.incumbent_lhs[cstr_idx]), "assignment should be finite"); + } } // update the assignment and objective proper @@ -626,8 +652,8 @@ __global__ void update_assignment_kernel(typename fj_t::climber_data_t #if FJ_SINGLE_STEP DEVICE_LOG_DEBUG( - "=---- FJ[%d]: updated %d [%g/%g] :%.4g+{%.4g}=%.4g score {%g,%g}, d_obj %.2g+%.2g=%.2g, " - "err_range %.2g%%, infeas %.2g, total viol %d\n", + "=---- FJ[%d]: updated %d [%g/%g] :%.4g+{%.4g}=%.4g score {%d,%d}, d_obj %.2g+%.2g=%.2g, " + "err_range %.2g%%, infeas %.2g, total viol %d, obj %x, delta %x, coef %x\n", *fj.iterations, var_idx, get_lower(fj.pb.variable_bounds[var_idx]), @@ -642,7 +668,10 @@ __global__ void update_assignment_kernel(typename fj_t::climber_data_t *fj.incumbent_objective + fj.jump_move_delta[var_idx] * fj.pb.objective_coefficients[var_idx], delta_rel_err, fj.jump_move_infeasibility[var_idx], - fj.violated_constraints.size()); + fj.violated_constraints.size(), + detail::compute_hash(*fj.incumbent_objective), + detail::compute_hash(fj.jump_move_delta[var_idx]), + detail::compute_hash(fj.pb.objective_coefficients[var_idx])); #endif // reset the score fj.jump_move_scores[var_idx] = fj_t::move_score_t::invalid(); @@ -862,6 +891,15 @@ DI void update_changed_constraints(typename fj_t::climber_data_t::view if (blockIdx.x == 0) { if (threadIdx.x == 0) { + // sort changed constraints to guarantee determinism + // TODO: horribly slow as it is... block-parallelize at least? but not trivial for arbitrary + // sizes w/ CUB + if (fj.settings->work_limit != std::numeric_limits::infinity()) { + thrust::sort(thrust::seq, + fj.constraints_changed.begin(), + fj.constraints_changed.begin() + *fj.constraints_changed_count); + } + for (i_t i = 0; i < *fj.constraints_changed_count; ++i) { i_t idx = fj.constraints_changed[i]; if ((idx & 1) == CONSTRAINT_FLAG_INSERT) { @@ -953,7 +991,7 @@ __global__ void compute_iteration_related_variables_kernel( compute_iteration_related_variables(fj); } -template +template __device__ void compute_mtm_moves(typename fj_t::climber_data_t::view_t fj, bool ForceRefresh) { @@ -965,11 +1003,14 @@ __device__ void compute_mtm_moves(typename fj_t::climber_data_t::view_ if (*fj.selected_var == std::numeric_limits::max()) full_refresh = true; // always do a full sweep when looking for satisfied mtm moves - if constexpr (move_type == MTMMoveType::FJ_MTM_SATISFIED) full_refresh = true; - - // only update related variables i_t split_begin, split_end; - if (full_refresh) { + if constexpr (move_type == MTMMoveType::FJ_MTM_SATISFIED) { + full_refresh = true; + split_begin = 0; + split_end = fj.objective_vars.size(); + } + // only update related variables + else if (full_refresh) { split_begin = 0; split_end = fj.pb.n_variables; } @@ -992,9 +1033,15 @@ __device__ void compute_mtm_moves(typename fj_t::climber_data_t::view_ if (FIRST_THREAD) *fj.relvar_count_last_update = split_end - split_begin; for (i_t i = blockIdx.x + split_begin; i < split_end; i += gridDim.x) { - i_t var_idx = full_refresh ? i - : fj.pb.related_variables.size() == 0 ? i - : fj.pb.related_variables[i]; + // if sat MTM mode, go over objective variables only + i_t var_idx; + if constexpr (move_type == MTMMoveType::FJ_MTM_SATISFIED) { + var_idx = fj.objective_vars[i]; + } else { + var_idx = full_refresh ? i + : fj.pb.related_variables.size() == 0 ? i + : fj.pb.related_variables[i]; + } // skip if we couldnt precompute a related var table and // this variable isnt in the dynamic related variable table @@ -1017,7 +1064,7 @@ __device__ void compute_mtm_moves(typename fj_t::climber_data_t::view_ } cuopt_assert(var_idx >= 0 && var_idx < fj.pb.n_variables, ""); - update_jump_value(fj, var_idx); + update_jump_value(fj, var_idx); } } @@ -1025,7 +1072,7 @@ template __global__ void compute_mtm_moves_kernel(typename fj_t::climber_data_t::view_t fj, bool ForceRefresh) { - compute_mtm_moves(fj, ForceRefresh); + compute_mtm_moves(fj, ForceRefresh); } template @@ -1037,8 +1084,9 @@ __global__ void select_variable_kernel(typename fj_t::climber_data_t:: fj.settings->seed, *fj.iterations * fj.settings->parameters.max_sampled_moves, 0); using move_score_t = typename fj_t::move_score_t; - __shared__ alignas(move_score_t) char shmem_storage[2 * raft::WarpSize * sizeof(move_score_t)]; - auto* const shmem = (move_score_t*)shmem_storage; + __shared__ alignas(thrust::pair) char + shmem_storage[raft::WarpSize * sizeof(thrust::pair)]; + auto* const shmem = (thrust::pair*)shmem_storage; auto th_best_score = fj_t::move_score_t::invalid(); i_t th_selected_var = std::numeric_limits::max(); @@ -1075,8 +1123,11 @@ __global__ void select_variable_kernel(typename fj_t::climber_data_t:: } } // Block level reduction to get the best variable from the sample + // Use deterministic tie-breaking comparator based on var_idx auto [best_score, reduced_selected_var] = - raft::blockRankedReduce(th_best_score, shmem, th_selected_var, raft::max_op{}); + raft::blockReduce(thrust::make_pair(th_best_score, th_selected_var), + (char*)shmem, + score_with_tiebreaker_comparator{}); if (FIRST_THREAD) { // assign it to print the value outside th_best_score = best_score; @@ -1111,9 +1162,9 @@ __global__ void select_variable_kernel(typename fj_t::climber_data_t:: i_t var_range = get_upper(bounds) - get_lower(bounds); double delta_rel_err = fabs(fj.jump_move_delta[selected_var]) / var_range * 100; DEVICE_LOG_INFO( - "=---- FJ: selected %d [%g/%g] %c :%.4g+{%.4g}=%.4g score {%g,%g}, d_obj %.2g+%.2g->%.2g, " + "=---- FJ: selected %d [%g/%g] %c :%.4g+{%.4g}=%.4g score {%d,%d}, d_obj %.2g+%.2g->%.2g, " "delta_rel_err %.2g%%, " - "infeas %.2g, total viol %d, out of %d\n", + "infeas %.2g, total viol %d, out of %d, obj %x\n", selected_var, get_lower(bounds), get_upper(bounds), @@ -1130,9 +1181,18 @@ __global__ void select_variable_kernel(typename fj_t::climber_data_t:: delta_rel_err, fj.jump_move_infeasibility[selected_var], fj.violated_constraints.size(), - good_var_count); + good_var_count, + detail::compute_hash(*fj.incumbent_objective)); #endif cuopt_assert(fj.jump_move_scores[selected_var].valid(), ""); + } else { +#if FJ_SINGLE_STEP + DEVICE_LOG_INFO("=[%d]---- FJ: no var selected, obj is %g, viol %d, out of %d\n", + *fj.iterations, + *fj.incumbent_objective, + fj.violated_constraints.size(), + good_var_count); +#endif } } } @@ -1202,27 +1262,32 @@ DI thrust::tuple::move_score_t> gridwide_reduc if (blockIdx.x == 0) { using move_score_t = typename fj_t::move_score_t; - __shared__ alignas(move_score_t) char shmem_storage[2 * raft::WarpSize * sizeof(move_score_t)]; - auto* const shmem = (move_score_t*)shmem_storage; + __shared__ alignas(thrust::pair) char + shmem_storage[2 * raft::WarpSize * sizeof(thrust::pair)]; + auto* const shmem = (thrust::pair*)shmem_storage; auto th_best_score = fj_t::move_score_t::invalid(); i_t th_best_block = 0; + i_t th_best_var = -1; for (i_t i = threadIdx.x; i < gridDim.x; i += blockDim.x) { auto var_idx = fj.grid_var_buf[i]; auto move_score = fj.grid_score_buf[i]; - if (move_score > th_best_score || - (move_score == th_best_score && var_idx > fj.grid_var_buf[th_best_block])) { + if (move_score > th_best_score || (move_score == th_best_score && var_idx > th_best_var)) { th_best_score = move_score; th_best_block = i; + th_best_var = var_idx; } } // Block level reduction to get the best variable from all blocks - auto [reduced_best_score, reduced_best_block] = - raft::blockRankedReduce(th_best_score, shmem, th_best_block, raft::max_op{}); - - if (reduced_best_score.valid() && threadIdx.x == 0) { - cuopt_assert(th_best_block < gridDim.x, ""); + auto [reduced_best_score_pair, reduced_best_block] = + raft::blockRankedReduce(thrust::make_pair(th_best_score, th_best_var), + shmem, + th_best_block, + score_with_tiebreaker_comparator{}); + + if (reduced_best_score_pair.first.valid() && threadIdx.x == 0) { + cuopt_assert(reduced_best_block < gridDim.x, ""); best_var = fj.grid_var_buf[reduced_best_block]; best_delta = fj.grid_delta_buf[reduced_best_block]; best_score = fj.grid_score_buf[reduced_best_block]; @@ -1244,6 +1309,9 @@ DI thrust::tuple::move_score_t> best_random_mt raft::random::PCGenerator rng(fj.settings->seed + *fj.iterations, 0, 0); i_t cstr_idx = fj.violated_constraints.contents[rng.next_u32() % fj.violated_constraints.size()]; + cuopt_assert(fj.excess_score(cstr_idx, fj.incumbent_lhs[cstr_idx]) < 0, + "constraint isn't violated"); + auto [offset_begin, offset_end] = fj.pb.range_for_constraint(cstr_idx); return gridwide_reduce_best_move( @@ -1258,7 +1326,9 @@ DI thrust::tuple::move_score_t> best_sat_cstr_ typename fj_t::climber_data_t::view_t fj) { // compute all MTM moves within satisfied constraints - compute_mtm_moves(fj, true); + compute_mtm_moves(fj, true); + // NOTE: grid sync not required since each block only reduces over variables that it updated in + // compute_mtm_moves return gridwide_reduce_best_move( fj, fj.objective_vars.begin(), fj.objective_vars.end(), [fj] __device__(i_t var_idx) { return fj.jump_move_delta[var_idx]; @@ -1413,9 +1483,10 @@ __global__ void handle_local_minimum_kernel(typename fj_t::climber_dat if (sat_best_score.base > 0 && sat_best_score > best_score) { if (FIRST_THREAD) { - best_score = sat_best_score; - best_var = sat_best_var; - best_delta = sat_best_delta; + best_score = sat_best_score; + best_var = sat_best_var; + best_delta = sat_best_delta; + best_movetype = 'S'; } } } @@ -1427,6 +1498,15 @@ __global__ void handle_local_minimum_kernel(typename fj_t::climber_dat best_var, fj.incumbent_assignment[best_var] + best_delta), "assignment not within bounds"); fj.jump_move_delta[best_var] = best_delta; +#if FJ_SINGLE_STEP + DEVICE_LOG_DEBUG("FJ[%d] selected_var: %d, delta %g, score {%d %d}, type %c\n", + *fj.iterations, + best_var, + best_delta, + best_score.base, + best_score.bonus, + best_movetype); +#endif } } } diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu index 4d567c9ecb..1aead97bc0 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cu @@ -11,11 +11,15 @@ #include "feasibility_jump_impl_common.cuh" #include "fj_cpu.cuh" +#include #include #include +#include #include +#include +#include #include #include #include @@ -38,6 +42,24 @@ namespace cuopt::linear_programming::detail { +namespace { + +double read_positive_work_unit_scale(const char* env_name) +{ + const char* env_value = std::getenv(env_name); + if (env_value == nullptr || env_value[0] == '\0') { return 1.0; } + + errno = 0; + char* end_ptr = nullptr; + const double parsed_value = std::strtod(env_value, &end_ptr); + const bool valid_value = errno == 0 && end_ptr != env_value && *end_ptr == '\0' && + std::isfinite(parsed_value) && parsed_value > 0.0; + cuopt_assert(valid_value, "Invalid CPUFJ work-unit scale env var"); + return parsed_value; +} + +} // namespace + template thrust::tuple get_mtm_for_bound(const typename fj_t::climber_data_t::view_t& fj, i_t var_idx, @@ -107,99 +129,6 @@ thrust::tuple get_mtm_for_constraint( return {delta_ij, sign, slack, cstr_tolerance}; } -template -std::pair feas_score_constraint(const typename fj_t::climber_data_t::view_t& fj, - i_t var_idx, - f_t delta, - i_t cstr_idx, - f_t cstr_coeff, - f_t c_lb, - f_t c_ub, - f_t current_lhs, - f_t left_weight, - f_t right_weight) -{ - cuopt_assert(isfinite(delta), "invalid delta"); - cuopt_assert(cstr_coeff != 0 && isfinite(cstr_coeff), "invalid coefficient"); - - f_t base_feas = 0; - f_t bonus_robust = 0; - - f_t bounds[2] = {c_lb, c_ub}; - cuopt_assert(isfinite(c_lb) || isfinite(c_ub), "no range"); - for (i_t bound_idx = 0; bound_idx < 2; ++bound_idx) { - if (!isfinite(bounds[bound_idx])) continue; - - // factor to correct the lhs/rhs to turn a lb <= lhs <= ub constraint into - // two virtual leq constraints "lhs <= ub" and "-lhs <= -lb" in order to match - // the convention of the paper - - // TODO: broadcast left/right weights to a csr_offset-indexed table? local minimums - // usually occur on a rarer basis (around 50 iteratiosn to 1 local minimum) - // likely unreasonable and overkill however - f_t cstr_weight = bound_idx == 0 ? left_weight : right_weight; - f_t sign = bound_idx == 0 ? -1 : 1; - f_t rhs = bounds[bound_idx] * sign; - f_t old_lhs = current_lhs * sign; - f_t new_lhs = (current_lhs + cstr_coeff * delta) * sign; - f_t old_slack = rhs - old_lhs; - f_t new_slack = rhs - new_lhs; - - cuopt_assert(isfinite(cstr_weight), "invalid weight"); - cuopt_assert(cstr_weight >= 0, "invalid weight"); - cuopt_assert(isfinite(old_lhs), ""); - cuopt_assert(isfinite(new_lhs), ""); - cuopt_assert(isfinite(old_slack) && isfinite(new_slack), ""); - - f_t cstr_tolerance = fj.get_corrected_tolerance(cstr_idx, c_lb, c_ub); - - bool old_viol = fj.excess_score(cstr_idx, current_lhs, c_lb, c_ub) < -cstr_tolerance; - bool new_viol = - fj.excess_score(cstr_idx, current_lhs + cstr_coeff * delta, c_lb, c_ub) < -cstr_tolerance; - - bool old_sat = old_lhs < rhs + cstr_tolerance; - bool new_sat = new_lhs < rhs + cstr_tolerance; - - // equality - if (fj.pb.integer_equal(c_lb, c_ub)) { - if (!old_viol) cuopt_assert(old_sat == !old_viol, ""); - if (!new_viol) cuopt_assert(new_sat == !new_viol, ""); - } - - // if it would feasibilize this constraint - if (!old_sat && new_sat) { - cuopt_assert(old_viol, ""); - base_feas += cstr_weight; - } - // would cause this constraint to be violated - else if (old_sat && !new_sat) { - cuopt_assert(new_viol, ""); - base_feas -= cstr_weight; - } - // simple improvement - else if (!old_sat && !new_sat && old_lhs > new_lhs) { - cuopt_assert(old_viol && new_viol, ""); - base_feas += (i_t)(cstr_weight * fj.settings->parameters.excess_improvement_weight); - } - // simple worsening - else if (!old_sat && !new_sat && old_lhs <= new_lhs) { - cuopt_assert(old_viol && new_viol, ""); - base_feas -= (i_t)(cstr_weight * fj.settings->parameters.excess_improvement_weight); - } - - // robustness score bonus if this would leave some strick slack - bool old_stable = old_lhs < rhs - cstr_tolerance; - bool new_stable = new_lhs < rhs - cstr_tolerance; - if (!old_stable && new_stable) { - bonus_robust += cstr_weight; - } else if (old_stable && !new_stable) { - bonus_robust -= cstr_weight; - } - } - - return {base_feas, bonus_robust}; -} - static constexpr double BIGVAL_THRESHOLD = 1e20; template @@ -702,6 +631,21 @@ static void apply_move(fj_cpu_climber_t& fj_cpu, raft::random::PCGenerator rng(fj_cpu.settings.seed + fj_cpu.iterations, 0, 0); cuopt_assert(var_idx < fj_cpu.view.pb.n_variables, "variable index out of bounds"); + f_t old_val = fj_cpu.h_assignment[var_idx]; + f_t new_val = old_val + delta; + if (is_integer_var(fj_cpu, var_idx)) { + cuopt_assert(fj_cpu.view.pb.integer_equal(new_val, round(new_val)), "new_val is not integer"); + new_val = round(new_val); + } + // clamp to var bounds + new_val = std::min(std::max(new_val, get_lower(fj_cpu.h_var_bounds[var_idx].get())), + get_upper(fj_cpu.h_var_bounds[var_idx].get())); + delta = new_val - old_val; + cuopt_assert(isfinite(new_val), "assignment is not finite"); + cuopt_assert(isfinite(delta), "applied delta is not finite"); + cuopt_assert((check_variable_within_bounds(fj_cpu, var_idx, new_val)), + "assignment not within bounds"); + // Update the LHSs of all involved constraints. auto [offset_begin, offset_end] = reverse_range_for_var(fj_cpu, var_idx); @@ -761,17 +705,7 @@ static void apply_move(fj_cpu_climber_t& fj_cpu, } // update the assignment and objective proper - f_t new_val = fj_cpu.h_assignment[var_idx] + delta; - if (is_integer_var(fj_cpu, var_idx)) { - cuopt_assert(fj_cpu.view.pb.integer_equal(new_val, round(new_val)), "new_val is not integer"); - new_val = round(new_val); - } fj_cpu.h_assignment[var_idx] = new_val; - - cuopt_assert((check_variable_within_bounds(fj_cpu, var_idx, new_val)), - "assignment not within bounds"); - cuopt_assert(isfinite(new_val), "assignment is not finite"); - fj_cpu.h_incumbent_objective += fj_cpu.h_obj_coeffs[var_idx] * delta; if (fj_cpu.h_incumbent_objective < fj_cpu.h_best_objective && fj_cpu.violated_constraints.empty()) { @@ -786,11 +720,11 @@ static void apply_move(fj_cpu_climber_t& fj_cpu, fj_cpu.iterations_since_best = 0; CUOPT_LOG_TRACE("%sCPUFJ: new best objective: %g", fj_cpu.log_prefix.c_str(), - fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_best_objective)); + fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_incumbent_objective)); if (fj_cpu.improvement_callback) { double current_work_units = fj_cpu.work_units_elapsed.load(std::memory_order_acquire); fj_cpu.improvement_callback( - fj_cpu.h_best_objective, fj_cpu.h_assignment, current_work_units); + fj_cpu.h_incumbent_objective, fj_cpu.h_assignment, current_work_units); } fj_cpu.feasible_found = true; } @@ -1021,6 +955,13 @@ static void recompute_lhs(fj_cpu_climber_t& fj_cpu) CPUFJ_NVTX_RANGE("CPUFJ::recompute_lhs"); cuopt_assert(fj_cpu.h_lhs.size() == fj_cpu.view.pb.n_constraints, "h_lhs size mismatch"); + // clamp to var bounds - defensive; apply_move should already have clamped appropriately + for (i_t var_idx = 0; var_idx < fj_cpu.view.pb.n_variables; ++var_idx) { + fj_cpu.h_assignment[var_idx] = std::min( + std::max(fj_cpu.h_assignment[var_idx].get(), get_lower(fj_cpu.h_var_bounds[var_idx].get())), + get_upper(fj_cpu.h_var_bounds[var_idx].get())); + } + fj_cpu.violated_constraints.clear(); fj_cpu.satisfied_constraints.clear(); fj_cpu.total_violations = 0; @@ -1389,6 +1330,15 @@ std::unique_ptr> fj_t::create_cpu_climber( // Initialize fj_cpu with all the data init_fj_cpu(*fj_cpu, solution, left_weights, right_weights, objective_weight); + const double cpu_work_unit_scale = + context.settings.cpufj_work_unit_scale != 1.0 + ? context.settings.cpufj_work_unit_scale + : read_positive_work_unit_scale("CUOPT_CPUFJ_WORK_UNIT_SCALE"); + fj_cpu->work_unit_bias *= cpu_work_unit_scale; + if (cpu_work_unit_scale != 1.0) { + CUOPT_DETERMINISM_LOG( + "CPUFJ using work-unit scale %f (bias=%f)", cpu_work_unit_scale, fj_cpu->work_unit_bias); + } fj_cpu->settings = settings; if (randomize_params) { auto rng = std::mt19937(cuopt::seed_generator::get_seed()); @@ -1546,6 +1496,10 @@ bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_l CUOPT_LOG_TRACE("CPUFJ work units: %f incumbent %g", fj_cpu.work_units_elapsed.load(std::memory_order_relaxed), fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_best_objective)); + + if (fj_cpu.work_units_elapsed.load(std::memory_order_relaxed) >= fj_cpu.work_budget) { + break; + } } cuopt_func_call(sanity_checks(fj_cpu)); diff --git a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh index 7dcc8d39b0..d1684a2774 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/fj_cpu.cuh @@ -154,7 +154,8 @@ struct fj_cpu_climber_t { // Work unit tracking for deterministic synchronization std::atomic work_units_elapsed{0.0}; - double work_unit_bias{1.5}; // Bias factor to keep CPUFJ ahead of B&B + double work_unit_bias{1.5}; // Bias factor to keep CPUFJ ahead of B&B + double work_budget{std::numeric_limits::infinity()}; producer_sync_t* producer_sync{nullptr}; // Optional sync utility for notifying progress std::atomic halted{false}; diff --git a/cpp/src/mip_heuristics/feasibility_jump/load_balancing.cuh b/cpp/src/mip_heuristics/feasibility_jump/load_balancing.cuh index dfc9b3c885..92df17ca34 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/load_balancing.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/load_balancing.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -124,12 +124,14 @@ __global__ void load_balancing_prepare_iteration(const __grid_constant__ // alternate codepath in the case of a small related_var/total_var ratio if (!full_refresh && fj.pb.related_variables.size() > 0 && fj.pb.n_variables / fj.work_ids_for_related_vars[*fj.selected_var] >= - fj.settings->parameters.old_codepath_total_var_to_relvar_ratio_threshold) { + fj.settings->parameters.old_codepath_total_var_to_relvar_ratio_threshold && + fj.settings->load_balancing_mode != fj_load_balancing_mode_t::ALWAYS_ON) { auto range = fj.pb.range_for_related_vars(*fj.selected_var); for (i_t i = blockIdx.x + range.first; i < range.second; i += gridDim.x) { i_t var_idx = fj.pb.related_variables[i]; - update_jump_value(fj, var_idx); + update_jump_value(fj, + var_idx); } if (FIRST_THREAD) *fj.load_balancing_skip = true; @@ -334,8 +336,17 @@ __global__ void load_balancing_compute_scores_binary( auto c_lb = fj.constraint_lower_bounds_csr[csr_offset]; auto c_ub = fj.constraint_upper_bounds_csr[csr_offset]; - auto [cstr_base_feas, cstr_bonus_robust] = feas_score_constraint( - fj, var_idx, delta, cstr_idx, cstr_coeff, c_lb, c_ub, fj.incumbent_lhs[cstr_idx]); + auto [cstr_base_feas, cstr_bonus_robust] = + feas_score_constraint(fj, + var_idx, + delta, + cstr_idx, + cstr_coeff, + c_lb, + c_ub, + fj.incumbent_lhs[cstr_idx], + fj.cstr_left_weights[cstr_idx], + fj.cstr_right_weights[cstr_idx]); base_feas += cstr_base_feas; bonus_robust += cstr_bonus_robust; @@ -526,8 +537,8 @@ __launch_bounds__(TPB_loadbalance, 16) __global__ auto& score_info = candidate.score; - f_t base_feas = 0; - f_t bonus_robust = 0; + int32_t base_feas = 0; + int32_t bonus_robust = 0; // same as for the binary var kernel, compute each score compoenent per thread // and merge then via a wapr reduce @@ -535,8 +546,17 @@ __launch_bounds__(TPB_loadbalance, 16) __global__ cuopt_assert(c_lb == fj.pb.constraint_lower_bounds[cstr_idx], "bound sanity check failed"); cuopt_assert(c_ub == fj.pb.constraint_upper_bounds[cstr_idx], "bound sanity check failed"); - auto [cstr_base_feas, cstr_bonus_robust] = feas_score_constraint( - fj, var_idx, delta, cstr_idx, cstr_coeff, c_lb, c_ub, fj.incumbent_lhs[cstr_idx]); + auto [cstr_base_feas, cstr_bonus_robust] = + feas_score_constraint(fj, + var_idx, + delta, + cstr_idx, + cstr_coeff, + c_lb, + c_ub, + fj.incumbent_lhs[cstr_idx], + fj.cstr_left_weights[cstr_idx], + fj.cstr_right_weights[cstr_idx]); base_feas += cstr_base_feas; bonus_robust += cstr_bonus_robust; @@ -565,24 +585,29 @@ __launch_bounds__(TPB_loadbalance, 16) __global__ best_score_ref{fj.jump_move_scores[var_idx]}; auto best_score = best_score_ref.load(cuda::memory_order_relaxed); + cuda::atomic_ref best_delta_ref{ + fj.jump_move_delta[var_idx]}; + auto best_delta = best_delta_ref.load(cuda::memory_order_relaxed); + if (best_score < candidate.score || - (best_score == candidate.score && candidate.delta < fj.jump_move_delta[var_idx])) { + (best_score == candidate.score && candidate.delta < best_delta)) { // update the best move delta acquire_lock(&fj.jump_locks[var_idx]); // reject this move if it would increase the target variable to a numerically unstable // value - if (!fj.move_numerically_stable(fj.incumbent_assignment[var_idx], - fj.incumbent_assignment[var_idx] + delta, - base_feas, - *fj.violation_score)) { - fj.jump_move_scores[var_idx] = fj_t::move_score_t::invalid(); - } else if (fj.jump_move_scores[var_idx] < candidate.score - // determinism for ease of debugging - || (fj.jump_move_scores[var_idx] == candidate.score && - candidate.delta < fj.jump_move_delta[var_idx])) { - fj.jump_move_delta[var_idx] = candidate.delta; - fj.jump_move_scores[var_idx] = candidate.score; + // only skip updating, don't invalidate existing valid moves + if (fj.move_numerically_stable(fj.incumbent_assignment[var_idx], + fj.incumbent_assignment[var_idx] + delta, + base_feas, + *fj.violation_score)) { + if (fj.jump_move_scores[var_idx] < candidate.score + // determinism for ease of debugging + || (fj.jump_move_scores[var_idx] == candidate.score && + candidate.delta < fj.jump_move_delta[var_idx])) { + fj.jump_move_delta[var_idx] = candidate.delta; + fj.jump_move_scores[var_idx] = candidate.score; + } } release_lock(&fj.jump_locks[var_idx]); } @@ -644,7 +669,7 @@ __global__ void load_balancing_sanity_checks(const __grid_constant__ if (!(score_1 == score_1.invalid() && score_2 == score_2.invalid()) && !(v.pb.integer_equal(score_1.base, score_2.base) && v.pb.integer_equal(score_1.bonus, score_2.bonus))) { - printf("(iter %d) [%d, int:%d]: delta %g/%g was %f/%f, is %f/%f\n", + printf("(iter %d) [%d, int:%d]: delta %g/%g was %d/%d, is %d/%d\n", *v.iterations, var_idx, v.pb.is_integer_var(var_idx), diff --git a/cpp/src/mip_heuristics/feasibility_jump/utils.cuh b/cpp/src/mip_heuristics/feasibility_jump/utils.cuh index d98686bcc6..b779e56a21 100644 --- a/cpp/src/mip_heuristics/feasibility_jump/utils.cuh +++ b/cpp/src/mip_heuristics/feasibility_jump/utils.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -10,6 +10,7 @@ #include "feasibility_jump.cuh" #include +#include #include #include #include @@ -133,6 +134,21 @@ struct contiguous_set_t { validity_bitmap.resize(size, stream); } + void sort(const rmm::cuda_stream_view& stream) + { + thrust::sort( + rmm::exec_policy(stream), contents.begin(), contents.begin() + set_size.value(stream)); + thrust::fill(rmm::exec_policy(stream), index_map.begin(), index_map.end(), -1); + thrust::for_each(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(set_size.value(stream)), + [v = view()] __device__(i_t idx) { v.index_map[v.contents[idx]] = idx; }); + + // TODO: remove, only useful for debugging and ensuring the same hashes + thrust::fill( + rmm::exec_policy(stream), contents.begin() + set_size.value(stream), contents.end(), 0); + } + struct view_t { i_t* set_size; i_t* lock; diff --git a/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cu b/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cu index f28faec249..1a62281450 100644 --- a/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cu +++ b/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cu @@ -5,6 +5,13 @@ */ /* clang-format on */ +// uncomment to enable detailed detemrinism logs +#undef CUOPT_DETERMINISM_LOG +#define CUOPT_DETERMINISM_LOG(...) \ + do { \ + CUOPT_LOG_INFO(__VA_ARGS__); \ + } while (0) + #include "feasibility_pump.cuh" #include @@ -52,7 +59,7 @@ feasibility_pump_t::feasibility_pump_t( context.problem_ptr->handle_ptr->get_stream()), lp_optimal_solution(lp_optimal_solution_), rng(cuopt::seed_generator::get_seed()), - timer(20.) + timer(20., *context.termination) { } @@ -147,18 +154,36 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_t temp_p(*solution.problem_ptr); auto h_integer_indices = cuopt::host_copy(solution.problem_ptr->integer_indices, solution.handle_ptr->get_stream()); + cuopt_assert(h_assignment.size() == solution.problem_ptr->n_variables, "Size mismatch"); + cuopt_assert(h_last_projection.size() == solution.problem_ptr->n_variables, "Size mismatch"); + cuopt_assert(h_variable_bounds.size() == solution.problem_ptr->n_variables, "Size mismatch"); + CUOPT_DETERMINISM_LOG( + "FP proj inputs: assign_hash=0x%x last_proj_hash=0x%x integer_idx_hash=0x%x n_vars=%d n_int=%d", + detail::compute_hash(h_assignment), + detail::compute_hash(h_last_projection), + detail::compute_hash(h_integer_indices), + solution.problem_ptr->n_variables, + solution.problem_ptr->n_integer_vars); f_t obj_offset = 0; + i_t n_at_upper = 0; + i_t n_at_lower = 0; + i_t n_interior = 0; + std::vector interior_integer_indices; + interior_integer_indices.reserve(h_integer_indices.size()); // for each integer add the variable and the distance constraints for (auto i : h_integer_indices) { + cuopt_assert(i >= 0 && i < solution.problem_ptr->n_variables, "Index out of bounds"); auto h_var_bounds = h_variable_bounds[i]; if (solution.problem_ptr->integer_equal(h_assignment[i], get_upper(h_var_bounds))) { obj_offset += get_upper(h_var_bounds); // set the objective weight to -1, u - x obj_coefficients[i] = -1; + n_at_upper++; } else if (solution.problem_ptr->integer_equal(h_assignment[i], get_lower(h_var_bounds))) { obj_offset -= get_lower(h_var_bounds); // set the objective weight to +1, x - l obj_coefficients[i] = 1; + n_at_lower++; } else { // objective weight is 1 const f_t obj_weight = 1.; @@ -183,9 +208,30 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_t constr_coeffs_2{1, 1}; h_constraints.add_constraint( constr_indices, constr_coeffs_2, h_assignment[i], (f_t)default_cont_upper); + n_interior++; + interior_integer_indices.push_back(i); } } + CUOPT_DETERMINISM_LOG( + "FP proj build: at_lower=%d at_upper=%d interior=%d interior_idx_hash=0x%x obj_hash=0x%x " + "assign_aug_hash=0x%x vars_added=%d cstr_added=%d cstr_var_hash=0x%x cstr_coeff_hash=0x%x " + "cstr_offset_hash=0x%x cstr_lb_hash=0x%x cstr_ub_hash=0x%x", + n_at_lower, + n_at_upper, + n_interior, + detail::compute_hash(interior_integer_indices), + detail::compute_hash(obj_coefficients), + detail::compute_hash(h_assignment), + h_variables.size(), + h_constraints.n_constraints(), + detail::compute_hash(h_constraints.constraint_variables), + detail::compute_hash(h_constraints.constraint_coefficients), + detail::compute_hash(h_constraints.constraint_offsets), + detail::compute_hash(h_constraints.constraint_lower_bounds), + detail::compute_hash(h_constraints.constraint_upper_bounds)); adjust_objective_with_original(solution, obj_coefficients, longer_lp_run); + CUOPT_DETERMINISM_LOG("FP proj adjusted objective hash=0x%x", + detail::compute_hash(obj_coefficients)); // commit all the changes that were done by the host if (h_variables.size() > 0) { temp_p.insert_variables(h_variables); } if (h_constraints.n_constraints() > 0) { temp_p.insert_constraints(h_constraints); } @@ -196,6 +242,12 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_tget_stream()), + temp_p.n_variables, + temp_p.n_constraints); // copy new objective coefficients raft::copy(temp_p.objective_coefficients.data(), obj_coefficients.data(), @@ -208,14 +260,20 @@ bool feasibility_pump_t::linear_project_onto_polytope(solution_t::round(solution_t& solution) { bool result; CUOPT_LOG_DEBUG("Rounding the point"); - timer_t bounds_prop_timer(std::max(0.05, std::min(0.5, timer.remaining_time() / 10.))); + f_t bounds_prop_time_limit = std::min((f_t)0.5, timer.remaining_time() / 10.); + if (timer.deterministic) { + bounds_prop_time_limit = std::max((f_t)0.0, bounds_prop_time_limit); + } else { + bounds_prop_time_limit = std::max((f_t)0.05, bounds_prop_time_limit); + } + work_limit_timer_t bounds_prop_timer( + context.gpu_heur_loop, bounds_prop_time_limit, *context.termination); const f_t lp_run_time_after_feasible = 0.; bool old_var = constraint_prop.round_all_vars; f_t old_time = constraint_prop.max_time_for_bounds_prop; @@ -307,6 +372,13 @@ bool feasibility_pump_t::test_fj_feasible(solution_t& soluti fj.settings.feasibility_run = true; fj.settings.n_of_minimums_for_exit = 5000; fj.settings.time_limit = std::min(time_limit, timer.remaining_time()); + if (timer.deterministic) { + fj.settings.time_limit = std::max((f_t)0.0, fj.settings.time_limit); + if (fj.settings.time_limit == 0.0) { + CUOPT_LOG_DEBUG("Skipping 20%% FJ run due to exhausted deterministic work budget"); + return false; + } + } cuopt_func_call(solution.test_variable_bounds(true)); is_feasible = fj.solve(solution); cuopt_func_call(solution.test_variable_bounds(true)); @@ -471,14 +543,38 @@ template bool feasibility_pump_t::run_single_fp_descent(solution_t& solution) { raft::common::nvtx::range fun_scope("run_single_fp_descent"); + i_t fp_iter = 0; + CUOPT_DETERMINISM_LOG("FP descent start: hash=0x%x feas=%d obj=%.12f timer_det=%d rem=%.6f", + solution.get_hash(), + (int)solution.get_feasible(), + solution.get_user_objective(), + (int)timer.deterministic, + timer.remaining_time()); // start by doing nearest rounding solution.round_nearest(); + CUOPT_DETERMINISM_LOG("FP descent after initial round: hash=0x%x feas=%d obj=%.12f", + solution.get_hash(), + (int)solution.get_feasible(), + solution.get_user_objective()); + cuopt_assert(last_projection.size() == solution.assignment.size(), "Size mismatch"); + // First projection in a descent has no previous projection history: initialize explicitly + raft::copy(last_projection.data(), + solution.assignment.data(), + solution.assignment.size(), + solution.handle_ptr->get_stream()); raft::copy(last_rounding.data(), solution.assignment.data(), solution.assignment.size(), solution.handle_ptr->get_stream()); while (true) { - if (context.diversity_manager_ptr->check_b_b_preemption() || timer.check_time_limit()) { + CUOPT_DETERMINISM_LOG("FP iter %d pre-projection: hash=0x%x feas=%d obj=%.12f rem=%.6f", + fp_iter, + solution.get_hash(), + (int)solution.get_feasible(), + solution.get_user_objective(), + timer.remaining_time()); + bool preempt = (context.diversity_manager_ptr->check_b_b_preemption()); + if (preempt || timer.check_time_limit()) { CUOPT_LOG_DEBUG("FP time limit reached!"); round(solution); return false; @@ -488,10 +584,25 @@ bool feasibility_pump_t::run_single_fp_descent(solution_t& s f_t ratio_of_assigned_integers = f_t(solution.n_assigned_integers) / solution.problem_ptr->n_integer_vars; bool is_feasible = linear_project_onto_polytope(solution, ratio_of_assigned_integers); - i_t n_integers = solution.compute_number_of_integers(); + const f_t remaining_after_projection = timer.remaining_time(); + i_t n_integers = solution.compute_number_of_integers(); CUOPT_LOG_DEBUG("after fp projection n_integers %d total n_integes %d", n_integers, solution.problem_ptr->n_integer_vars); + CUOPT_DETERMINISM_LOG( + "FP iter %d post-projection: hash=0x%x feasible_after_lp=%d obj=%.12f rem=%.6f lp_stage=%.6f", + fp_iter, + solution.get_hash(), + (int)is_feasible, + solution.get_user_objective(), + remaining_after_projection, + proj_begin - remaining_after_projection); + CUOPT_DETERMINISM_LOG("FP iter %d pre-round: hash=0x%x feas=%d obj=%.12f rem=%.6f", + fp_iter, + solution.get_hash(), + (int)is_feasible, + solution.get_user_objective(), + remaining_after_projection); bool is_cycle = true; // temp comment for presolve run if (config.check_distance_cycle) { @@ -523,30 +634,71 @@ bool feasibility_pump_t::run_single_fp_descent(solution_t& s // run the LP with full precision to check if it actually is feasible const f_t lp_verify_time_limit = 5.; relaxed_lp_settings_t lp_settings; - lp_settings.time_limit = lp_verify_time_limit; + lp_settings.time_limit = lp_verify_time_limit; + bool run_verify_lp = true; + if (timer.deterministic) { + const f_t remaining_work_limit = std::max((f_t)0.0, timer.remaining_time()); + lp_settings.work_limit = std::min(lp_verify_time_limit, remaining_work_limit); + lp_settings.time_limit = lp_settings.work_limit; + if (lp_settings.work_limit == 0.0) { + CUOPT_LOG_DEBUG( + "Skipping FP verification LP due to exhausted deterministic work budget"); + run_verify_lp = false; + } + } + lp_settings.work_context = timer.work_context; lp_settings.tolerance = solution.problem_ptr->tolerances.absolute_tolerance; lp_settings.return_first_feasible = true; lp_settings.save_state = true; - run_lp_with_vars_fixed(*solution.problem_ptr, - solution, - solution.problem_ptr->integer_indices, - lp_settings, - &constraint_prop.bounds_update); - is_feasible = solution.get_feasible(); - n_integers = solution.compute_number_of_integers(); - if (is_feasible && n_integers == solution.problem_ptr->n_integer_vars) { - CUOPT_LOG_DEBUG("Feasible solution verified with LP!"); - return true; + if (run_verify_lp) { + run_lp_with_vars_fixed(*solution.problem_ptr, + solution, + solution.problem_ptr->integer_indices, + lp_settings, + &constraint_prop.bounds_update); + is_feasible = solution.get_feasible(); + n_integers = solution.compute_number_of_integers(); + if (is_feasible && n_integers == solution.problem_ptr->n_integer_vars) { + CUOPT_LOG_TRACE("Feasible solution verified with LP!"); + return true; + } } } } cuopt_func_call(solution.test_variable_bounds(false)); is_feasible = round(solution); cuopt_func_call(solution.test_variable_bounds(true)); - proj_and_round_time = proj_begin - timer.remaining_time(); + const f_t remaining_after_round = timer.remaining_time(); + proj_and_round_time = proj_begin - remaining_after_round; + CUOPT_DETERMINISM_LOG( + "FP iter %d post-round: hash=0x%x feasible_after_round=%d obj=%.12f rem=%.6f " + "round_stage=%.6f proj_round_total=%.6f", + fp_iter, + solution.get_hash(), + (int)is_feasible, + solution.get_user_objective(), + remaining_after_round, + remaining_after_projection - remaining_after_round, + proj_and_round_time); if (!is_feasible) { const f_t time_ratio = 0.2; - is_feasible = test_fj_feasible(solution, time_ratio * proj_and_round_time); + const f_t fj_budget = time_ratio * proj_and_round_time; + CUOPT_DETERMINISM_LOG("FP iter %d pre-fj-fallback: hash=0x%x rem=%.6f fj_budget=%.6f", + fp_iter, + solution.get_hash(), + remaining_after_round, + fj_budget); + is_feasible = test_fj_feasible(solution, fj_budget); + const f_t remaining_after_fj = timer.remaining_time(); + CUOPT_DETERMINISM_LOG( + "FP iter %d post-fj-fallback: hash=0x%x feasible_after_fj=%d obj=%.12f rem=%.6f " + "fj_stage=%.6f", + fp_iter, + solution.get_hash(), + (int)is_feasible, + solution.get_user_objective(), + remaining_after_fj, + remaining_after_round - remaining_after_fj); } if (timer.check_time_limit()) { CUOPT_LOG_DEBUG("FP time limit reached!"); @@ -575,6 +727,7 @@ bool feasibility_pump_t::run_single_fp_descent(solution_t& s return false; } cycle_queue.n_iterations_without_cycle++; + fp_iter++; } // unreachable return false; diff --git a/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cuh b/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cuh index 4653e38615..2d1135b48b 100644 --- a/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cuh +++ b/cpp/src/mip_heuristics/local_search/feasibility_pump/feasibility_pump.cuh @@ -105,7 +105,6 @@ class feasibility_pump_t { feasibility_pump_t() = delete; feasibility_pump_t(mip_solver_context_t& context, fj_t& fj, - // fj_tree_t& fj_tree_, constraint_prop_t& constraint_prop_, line_segment_search_t& line_segment_search_, rmm::device_uvector& lp_optimal_solution_); @@ -127,7 +126,7 @@ class feasibility_pump_t { bool check_distance_cycle(solution_t& solution); void reset(); void resize_vectors(problem_t& problem, const raft::handle_t* handle_ptr); - bool random_round_with_fj(solution_t& solution, timer_t& round_timer); + bool random_round_with_fj(solution_t& solution, work_limit_timer_t& round_timer); bool round_multiple_points(solution_t& solution); void relax_general_integers(solution_t& solution); void revert_relaxation(solution_t& solution); @@ -136,7 +135,6 @@ class feasibility_pump_t { mip_solver_context_t& context; // keep a reference from upstream local search fj_t& fj; - // fj_tree_t& fj_tree; line_segment_search_t& line_segment_search; cycle_queue_t cycle_queue; constraint_prop_t& constraint_prop; @@ -155,7 +153,7 @@ class feasibility_pump_t { f_t proj_begin; i_t n_fj_single_descents; i_t max_n_of_integers = 0; - cuopt::timer_t timer; + cuopt::work_limit_timer_t timer; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/local_search/line_segment_search/line_segment_search.cu b/cpp/src/mip_heuristics/local_search/line_segment_search/line_segment_search.cu index ce70aec745..094a45cd17 100644 --- a/cpp/src/mip_heuristics/local_search/line_segment_search/line_segment_search.cu +++ b/cpp/src/mip_heuristics/local_search/line_segment_search/line_segment_search.cu @@ -17,8 +17,10 @@ namespace cuopt::linear_programming::detail { template line_segment_search_t::line_segment_search_t( - fj_t& fj_, constraint_prop_t& constraint_prop_) - : fj(fj_), constraint_prop(constraint_prop_) + mip_solver_context_t& context_, + fj_t& fj_, + constraint_prop_t& constraint_prop_) + : context(context_), fj(fj_), constraint_prop(constraint_prop_) { } @@ -128,7 +130,7 @@ bool line_segment_search_t::search_line_segment( const rmm::device_uvector& point_2, const rmm::device_uvector& delta_vector, bool is_feasibility_run, - cuopt::timer_t& timer) + cuopt::work_limit_timer_t& timer) { CUOPT_LOG_DEBUG("Running line segment search with a given delta vector"); cuopt_assert(point_1.size() == point_2.size(), "size mismatch"); @@ -263,7 +265,7 @@ bool line_segment_search_t::search_line_segment(solution_t& const rmm::device_uvector& point_1, const rmm::device_uvector& point_2, bool is_feasibility_run, - cuopt::timer_t& timer) + cuopt::work_limit_timer_t& timer) { CUOPT_LOG_DEBUG("Running line segment search"); cuopt_assert(point_1.size() == point_2.size(), "size mismatch"); diff --git a/cpp/src/mip_heuristics/local_search/line_segment_search/line_segment_search.cuh b/cpp/src/mip_heuristics/local_search/line_segment_search/line_segment_search.cuh index 30e169e9d9..7a040ddbd2 100644 --- a/cpp/src/mip_heuristics/local_search/line_segment_search/line_segment_search.cuh +++ b/cpp/src/mip_heuristics/local_search/line_segment_search/line_segment_search.cuh @@ -9,7 +9,7 @@ #include #include -#include +#include namespace cuopt::linear_programming::detail { @@ -26,19 +26,21 @@ template class line_segment_search_t { public: line_segment_search_t() = delete; - line_segment_search_t(fj_t& fj, constraint_prop_t& constraint_prop); + line_segment_search_t(mip_solver_context_t& context, + fj_t& fj, + constraint_prop_t& constraint_prop); bool search_line_segment(solution_t& solution, const rmm::device_uvector& point_1, const rmm::device_uvector& point_2, bool is_feasibility_run, - cuopt::timer_t& timer); + cuopt::work_limit_timer_t& timer); bool search_line_segment(solution_t& solution, const rmm::device_uvector& point_1, const rmm::device_uvector& point_2, const rmm::device_uvector& delta_vector, bool is_feasibility_run, - cuopt::timer_t& timer); + cuopt::work_limit_timer_t& timer); void save_solution_if_better(solution_t& solution, const rmm::device_uvector& point_1, @@ -49,6 +51,7 @@ class line_segment_search_t { f_t& best_feasible_cost, f_t curr_cost); + mip_solver_context_t& context; fj_t& fj; constraint_prop_t& constraint_prop; line_segment_settings_t settings; diff --git a/cpp/src/mip_heuristics/local_search/local_search.cu b/cpp/src/mip_heuristics/local_search/local_search.cu index 16d0a3f874..75dc93002c 100644 --- a/cpp/src/mip_heuristics/local_search/local_search.cu +++ b/cpp/src/mip_heuristics/local_search/local_search.cu @@ -5,6 +5,13 @@ */ /* clang-format on */ +// uncomment to enable detailed detemrinism logs +#undef CUOPT_DETERMINISM_LOG +#define CUOPT_DETERMINISM_LOG(...) \ + do { \ + CUOPT_LOG_INFO(__VA_ARGS__); \ + } while (0) + #include "lagrangian.cuh" #include "local_search.cuh" @@ -15,8 +22,9 @@ #include #include #include +#include #include -#include +#include #include @@ -36,7 +44,7 @@ local_search_t::local_search_t(mip_solver_context_t& context fj(context), // fj_tree(fj), constraint_prop(context), - line_segment_search(fj, constraint_prop), + line_segment_search(context, fj, constraint_prop), fp(context, fj, // fj_tree, @@ -53,16 +61,15 @@ local_search_t::local_search_t(mip_solver_context_t& context cpu_fj.fj_ptr = &fj; } scratch_cpu_fj_on_lp_opt.fj_ptr = &fj; + CUOPT_DETERMINISM_LOG("Deterministic solve start local_search state: seed_state=%lld", + (long long)cuopt::seed_generator::peek_seed()); } -static double local_search_best_obj = std::numeric_limits::max(); -static population_t* pop_ptr = nullptr; - template void local_search_t::start_cpufj_scratch_threads(population_t& population) { - pop_ptr = &population; - + cuopt_assert(!(context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS), + "Scratch CPUFJ must remain opportunistic-only"); std::vector default_weights(context.problem_ptr->n_constraints, 1.); solution_t solution(*context.problem_ptr); @@ -84,18 +91,9 @@ void local_search_t::start_cpufj_scratch_threads(population_tlog_prefix = "******* scratch " + std::to_string(counter) + ": "; cpu_fj.fj_cpu->improvement_callback = - [&population, problem_ptr = context.problem_ptr]( - f_t obj, const std::vector& h_vec, double /*work_units*/) { - population.add_external_solution(h_vec, obj, solution_origin_t::CPUFJ); - (void)problem_ptr; - if (obj < local_search_best_obj) { - CUOPT_LOG_TRACE("******* New local search best obj %g, best overall %g", - problem_ptr->get_user_obj_from_solver_obj(obj), - problem_ptr->get_user_obj_from_solver_obj( - population.is_feasible() ? population.best_feasible().get_objective() - : std::numeric_limits::max())); - local_search_best_obj = obj; - } + [&population](f_t obj, const std::vector& h_vec, double /*work_units*/) { + population.add_external_solution( + h_vec, obj, internals::mip_solution_origin_t::CPU_FEASIBILITY_JUMP); }; counter++; }; @@ -109,7 +107,8 @@ template void local_search_t::start_cpufj_lptopt_scratch_threads( population_t& population) { - pop_ptr = &population; + cuopt_assert(!(context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS), + "LP-opt CPUFJ scratch must remain opportunistic-only"); std::vector default_weights(context.problem_ptr->n_constraints, 1.); @@ -121,16 +120,9 @@ void local_search_t::start_cpufj_lptopt_scratch_threads( solution_lp, default_weights, default_weights, 0., context.preempt_heuristic_solver_); scratch_cpu_fj_on_lp_opt.fj_cpu->log_prefix = "******* scratch on LP optimal: "; scratch_cpu_fj_on_lp_opt.fj_cpu->improvement_callback = - [this, &population](f_t obj, const std::vector& h_vec, double /*work_units*/) { - population.add_external_solution(h_vec, obj, solution_origin_t::CPUFJ); - if (obj < local_search_best_obj) { - CUOPT_LOG_DEBUG("******* New local search best obj %g, best overall %g", - context.problem_ptr->get_user_obj_from_solver_obj(obj), - context.problem_ptr->get_user_obj_from_solver_obj( - population.is_feasible() ? population.best_feasible().get_objective() - : std::numeric_limits::max())); - local_search_best_obj = obj; - } + [&population](f_t obj, const std::vector& h_vec, double /*work_units*/) { + population.add_external_solution( + h_vec, obj, internals::mip_solution_origin_t::CPU_FEASIBILITY_JUMP); }; // default weights @@ -178,8 +170,11 @@ void local_search_t::start_cpufj_deterministic( // Set up callback to send solutions to B&B with work unit timestamps deterministic_cpu_fj.fj_cpu->improvement_callback = - [&bb](f_t obj, const std::vector& h_vec, double work_units) { - bb.queue_external_solution_deterministic(h_vec, work_units); + [&bb, problem_ptr = context.problem_ptr]( + f_t obj, const std::vector& h_vec, double work_units) { + f_t user_obj = problem_ptr->get_user_obj_from_solver_obj(obj); + bb.queue_external_solution_deterministic( + h_vec, user_obj, work_units, cuopt::internals::mip_solution_origin_t::CPU_FEASIBILITY_JUMP); }; deterministic_cpu_fj.start_cpu_solver(); @@ -207,19 +202,44 @@ bool local_search_t::do_fj_solve(solution_t& solution, const std::string& source) { if (time_limit == 0.) return solution.get_feasible(); + const bool deterministic = (context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS); + + work_limit_timer_t timer(context.gpu_heur_loop, time_limit, *context.termination); + const auto old_n_cstr_weights = in_fj.cstr_weights.size(); + const auto expected_n_cstr_weights = static_cast(solution.problem_ptr->n_constraints); + // in case this is the first time run, resize + if (old_n_cstr_weights != expected_n_cstr_weights) { + in_fj.cstr_weights.resize(solution.problem_ptr->n_constraints, + solution.handle_ptr->get_stream()); + cuopt_assert(in_fj.cstr_weights.size() == expected_n_cstr_weights, + "Constraint weights must match constraint count after resize"); + // Initialize only newly grown entries; shrinking does not need initialization. + if (old_n_cstr_weights < expected_n_cstr_weights) { + cuopt_assert(old_n_cstr_weights <= in_fj.cstr_weights.size(), + "Constraint weight fill start must be within range"); + thrust::uninitialized_fill(solution.handle_ptr->get_thrust_policy(), + in_fj.cstr_weights.begin() + old_n_cstr_weights, + in_fj.cstr_weights.end(), + 1.); + } + } - timer_t timer(time_limit); - - auto h_weights = cuopt::host_copy(in_fj.cstr_weights, solution.handle_ptr->get_stream()); - auto h_objective_weight = in_fj.objective_weight.value(solution.handle_ptr->get_stream()); - for (auto& cpu_fj : ls_cpu_fj) { - cpu_fj.fj_cpu = cpu_fj.fj_ptr->create_cpu_climber(solution, - h_weights, - h_weights, - h_objective_weight, - context.preempt_heuristic_solver_, - fj_settings_t{}, - true); + { + auto h_weights = cuopt::host_copy(in_fj.cstr_weights, solution.handle_ptr->get_stream()); + auto h_objective_weight = in_fj.objective_weight.value(solution.handle_ptr->get_stream()); + for (auto& cpu_fj : ls_cpu_fj) { + cpu_fj.fj_cpu = cpu_fj.fj_ptr->create_cpu_climber(solution, + h_weights, + h_weights, + h_objective_weight, + context.preempt_heuristic_solver_, + fj_settings_t{}, + true); + if (deterministic) { + cpu_fj.fj_cpu->work_units_elapsed = 0.0; + cpu_fj.fj_cpu->work_budget = time_limit; + } + } } auto solution_copy = solution; @@ -234,9 +254,10 @@ bool local_search_t::do_fj_solve(solution_t& solution, in_fj.settings.time_limit = timer.remaining_time(); in_fj.solve(solution); - // Stop CPU solver - for (auto& cpu_fj : ls_cpu_fj) { - cpu_fj.stop_cpu_solver(); + if (!deterministic) { + for (auto& cpu_fj : ls_cpu_fj) { + cpu_fj.stop_cpu_solver(); + } } auto gpu_fj_end = std::chrono::high_resolution_clock::now(); @@ -245,7 +266,6 @@ bool local_search_t::do_fj_solve(solution_t& solution, solution_t solution_cpu(*solution.problem_ptr); f_t best_cpu_obj = std::numeric_limits::max(); - // // Wait for CPU solver to finish for (auto& cpu_fj : ls_cpu_fj) { bool cpu_sol_found = cpu_fj.wait_for_cpu_solver(); if (cpu_sol_found) { @@ -291,8 +311,10 @@ bool local_search_t::do_fj_solve(solution_t& solution, } template -void local_search_t::generate_fast_solution(solution_t& solution, timer_t timer) +void local_search_t::generate_fast_solution(solution_t& solution, + work_limit_timer_t timer) { + CUOPT_LOG_DEBUG("Running FJ fast sol"); thrust::fill(solution.handle_ptr->get_thrust_policy(), solution.assignment.begin(), solution.assignment.end(), @@ -303,8 +325,11 @@ void local_search_t::generate_fast_solution(solution_t& solu fj.settings.update_weights = true; fj.settings.feasibility_run = true; fj.settings.time_limit = std::min(30., timer.remaining_time()); - while (!context.diversity_manager_ptr->check_b_b_preemption() && !timer.check_time_limit()) { - timer_t constr_prop_timer = timer_t(std::min(timer.remaining_time(), 2.)); + while ((context.diversity_manager_ptr == nullptr || + !context.diversity_manager_ptr->check_b_b_preemption()) && + !timer.check_time_limit()) { + work_limit_timer_t constr_prop_timer = work_limit_timer_t( + context.gpu_heur_loop, std::min(timer.remaining_time(), 2.), *context.termination); // do constraint prop on lp optimal solution constraint_prop.apply_round(solution, 1., constr_prop_timer); if (solution.compute_feasibility()) { return; } @@ -321,7 +346,7 @@ void local_search_t::generate_fast_solution(solution_t& solu template bool local_search_t::run_local_search(solution_t& solution, const weight_t& weights, - timer_t timer, + work_limit_timer_t timer, const ls_config_t& ls_config) { raft::common::nvtx::range fun_scope("local search"); @@ -331,11 +356,10 @@ bool local_search_t::run_local_search(solution_t& solution, if (!solution.get_feasible()) { if (ls_config.at_least_one_parent_feasible) { fj_settings.time_limit = 0.5; - timer = timer_t(fj_settings.time_limit); } else { fj_settings.time_limit = 0.25; - timer = timer_t(fj_settings.time_limit); } + timer = work_limit_timer_t(context.gpu_heur_loop, fj_settings.time_limit, *context.termination); } else { fj_settings.time_limit = std::min(1., timer.remaining_time()); } @@ -365,8 +389,9 @@ bool local_search_t::run_local_search(solution_t& solution, template bool local_search_t::run_fj_until_timer(solution_t& solution, const weight_t& weights, - timer_t timer) + work_limit_timer_t timer) { + CUOPT_LOG_DEBUG("Running FJ until timer"); bool is_feasible; fj.settings.n_of_minimums_for_exit = 1e6; fj.settings.mode = fj_mode_t::EXIT_NON_IMPROVING; @@ -383,7 +408,7 @@ bool local_search_t::run_fj_until_timer(solution_t& solution template bool local_search_t::run_fj_annealing(solution_t& solution, - timer_t timer, + work_limit_timer_t timer, const ls_config_t& ls_config) { raft::common::nvtx::range fun_scope("run_fj_annealing"); @@ -413,7 +438,7 @@ bool local_search_t::run_fj_annealing(solution_t& solution, template bool local_search_t::run_fj_line_segment(solution_t& solution, - timer_t timer, + work_limit_timer_t timer, const ls_config_t& ls_config) { raft::common::nvtx::range fun_scope("run_fj_line_segment"); @@ -436,7 +461,7 @@ bool local_search_t::run_fj_line_segment(solution_t& solutio template bool local_search_t::check_fj_on_lp_optimal(solution_t& solution, bool perturb, - timer_t timer) + work_limit_timer_t timer) { raft::common::nvtx::range fun_scope("check_fj_on_lp_optimal"); if (lp_optimal_exists) { @@ -452,15 +477,21 @@ bool local_search_t::check_fj_on_lp_optimal(solution_t& solu solution.assign_random_within_bounds(perturbation_ratio); } cuopt_func_call(solution.test_variable_bounds(false)); - f_t lp_run_time_after_feasible = std::min(1., timer.remaining_time()); - timer_t bounds_prop_timer = timer_t(std::min(timer.remaining_time(), 10.)); + f_t lp_run_time_after_feasible = std::min(1., timer.remaining_time()); + work_limit_timer_t bounds_prop_timer = work_limit_timer_t( + context.gpu_heur_loop, std::min(timer.remaining_time(), 10.), *context.termination); bool is_feasible = constraint_prop.apply_round(solution, lp_run_time_after_feasible, bounds_prop_timer); if (!is_feasible) { const f_t lp_run_time = 2.; relaxed_lp_settings_t lp_settings; lp_settings.time_limit = std::min(lp_run_time, timer.remaining_time()); - lp_settings.tolerance = solution.problem_ptr->tolerances.absolute_tolerance; + if (timer.deterministic) { + lp_settings.work_limit = lp_settings.time_limit; + lp_settings.work_context = timer.work_context; + cuopt_assert(lp_settings.work_context != nullptr, "Missing deterministic work context"); + } + lp_settings.tolerance = solution.problem_ptr->tolerances.absolute_tolerance; run_lp_with_vars_fixed( *solution.problem_ptr, solution, solution.problem_ptr->integer_indices, lp_settings); } else { @@ -477,7 +508,8 @@ bool local_search_t::check_fj_on_lp_optimal(solution_t& solu } template -bool local_search_t::run_fj_on_zero(solution_t& solution, timer_t timer) +bool local_search_t::run_fj_on_zero(solution_t& solution, + work_limit_timer_t timer) { raft::common::nvtx::range fun_scope("run_fj_on_zero"); thrust::fill(solution.handle_ptr->get_thrust_policy(), @@ -496,7 +528,7 @@ bool local_search_t::run_fj_on_zero(solution_t& solution, ti template bool local_search_t::run_staged_fp(solution_t& solution, - timer_t timer, + work_limit_timer_t timer, population_t* population_ptr) { raft::common::nvtx::range fun_scope("run_staged_fp"); @@ -524,7 +556,8 @@ bool local_search_t::run_staged_fp(solution_t& solution, } CUOPT_LOG_DEBUG("Running staged FP from beginning it %d", i); fp.relax_general_integers(solution); - timer_t binary_timer(timer.remaining_time() / 3); + work_limit_timer_t binary_timer( + context.gpu_heur_loop, timer.remaining_time() / 3, *context.termination); i_t binary_it_counter = 0; for (; binary_it_counter < 100; ++binary_it_counter) { population_ptr->add_external_solutions_to_population(); @@ -636,7 +669,8 @@ void local_search_t::reset_alpha_and_save_solution( solution_t solution_copy(solution); solution_copy.problem_ptr = old_problem_ptr; solution_copy.resize_to_problem(); - population_ptr->add_solution(std::move(solution_copy)); + population_ptr->add_solution(std::move(solution_copy), + internals::mip_solution_origin_t::LOCAL_SEARCH); population_ptr->add_external_solutions_to_population(); if (!cutting_plane_added_for_active_run) { solution.problem_ptr = &problem_with_objective_cut; @@ -689,19 +723,20 @@ void local_search_t::reset_alpha_and_run_recombiners( template bool local_search_t::run_fp(solution_t& solution, - timer_t timer, - population_t* population_ptr) + work_limit_timer_t timer, + population_t* population_ptr, + i_t n_fp_iterations) { raft::common::nvtx::range fun_scope("run_fp"); cuopt_assert(population_ptr != nullptr, "Population pointer must not be null"); - const i_t n_fp_iterations = 1000000; bool is_feasible = solution.compute_feasibility(); cutting_plane_added_for_active_run = is_feasible; double best_objective = is_feasible ? solution.get_objective() : std::numeric_limits::max(); rmm::device_uvector best_solution(solution.assignment, solution.handle_ptr->get_stream()); problem_t* old_problem_ptr = solution.problem_ptr; - fp.timer = timer_t(timer.remaining_time()); + fp.timer = + work_limit_timer_t(context.gpu_heur_loop, timer.remaining_time(), *context.termination); // if it has not been initialized yet, create a new problem and move it to the cut problem if (!problem_with_objective_cut.cutting_plane_added) { problem_with_objective_cut = std::move(problem_t(*old_problem_ptr)); @@ -802,7 +837,7 @@ bool local_search_t::generate_solution(solution_t& solution, { raft::common::nvtx::range fun_scope("generate_solution"); cuopt_assert(population_ptr != nullptr, "Population pointer must not be null"); - timer_t timer(time_limit); + work_limit_timer_t timer(context.gpu_heur_loop, time_limit, *context.termination); auto n_vars = solution.problem_ptr->n_variables; auto n_binary_vars = solution.problem_ptr->get_n_binary_variables(); auto n_integer_vars = solution.problem_ptr->n_integer_vars; diff --git a/cpp/src/mip_heuristics/local_search/local_search.cuh b/cpp/src/mip_heuristics/local_search/local_search.cuh index a36688d71d..fc1dd6135c 100644 --- a/cpp/src/mip_heuristics/local_search/local_search.cuh +++ b/cpp/src/mip_heuristics/local_search/local_search.cuh @@ -13,13 +13,7 @@ #include #include #include -#include - -#include -#include -#include -#include -#include +#include namespace cuopt::linear_programming::dual_simplex { template @@ -58,32 +52,35 @@ class local_search_t { void start_cpufj_scratch_threads(population_t& population); void start_cpufj_lptopt_scratch_threads(population_t& population); void stop_cpufj_scratch_threads(); - void generate_fast_solution(solution_t& solution, timer_t timer); + void generate_fast_solution(solution_t& solution, work_limit_timer_t timer); bool generate_solution(solution_t& solution, bool perturb, population_t* population_ptr, f_t time_limit = 300.); bool run_fj_until_timer(solution_t& solution, const weight_t& weights, - timer_t timer); + work_limit_timer_t timer); bool run_local_search(solution_t& solution, const weight_t& weights, - timer_t timer, + work_limit_timer_t timer, const ls_config_t& ls_config); bool run_fj_annealing(solution_t& solution, - timer_t timer, + work_limit_timer_t timer, const ls_config_t& ls_config); bool run_fj_line_segment(solution_t& solution, - timer_t timer, + work_limit_timer_t timer, const ls_config_t& ls_config); - bool run_fj_on_zero(solution_t& solution, timer_t timer); - bool check_fj_on_lp_optimal(solution_t& solution, bool perturb, timer_t timer); + bool run_fj_on_zero(solution_t& solution, work_limit_timer_t timer); + bool check_fj_on_lp_optimal(solution_t& solution, + bool perturb, + work_limit_timer_t timer); bool run_staged_fp(solution_t& solution, - timer_t timer, + work_limit_timer_t timer, population_t* population_ptr); bool run_fp(solution_t& solution, - timer_t timer, - population_t* population_ptr = nullptr); + work_limit_timer_t timer, + population_t* population_ptr = nullptr, + i_t n_fp_iterations = std::numeric_limits::max()); void resize_vectors(problem_t& problem, const raft::handle_t* handle_ptr); bool do_fj_solve(solution_t& solution, diff --git a/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cu b/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cu index f3233cc8f4..23cd9b41e3 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cu +++ b/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cu @@ -16,8 +16,95 @@ #include #include +#include + namespace cuopt::linear_programming::detail { +namespace { + +constexpr double bounds_repair_setup_base_work = 5e-4; +constexpr double bounds_repair_violation_base_work = 4e-4; +constexpr double bounds_repair_violation_nnz_work = 2e-6; +constexpr double bounds_repair_violation_constraint_work = 3e-6; +constexpr double bounds_repair_best_bounds_variable_work = 2e-6; +constexpr double bounds_repair_shift_base_work = 3e-4; +constexpr double bounds_repair_shift_row_entry_work = 3e-6; +constexpr double bounds_repair_shift_candidate_work = 8e-6; +constexpr double bounds_repair_shift_neighbor_entry_work = 3e-6; +constexpr double bounds_repair_shift_sort_work = 5e-6; +constexpr double bounds_repair_damage_base_work = 3e-4; +constexpr double bounds_repair_damage_neighbor_entry_work = 8e-6; +constexpr double bounds_repair_damage_sort_work = 5e-6; +constexpr double bounds_repair_move_base_work = 5e-5; +constexpr double bounds_repair_no_candidate_base_work = 4e-4; +constexpr double bounds_repair_cycle_penalty_work = 3e-4; + +template +double estimate_bounds_repair_violation_refresh_work(const problem_t& problem, + bool update_best_bounds) +{ + double estimate = bounds_repair_violation_base_work + + bounds_repair_violation_nnz_work * (double)problem.nnz + + bounds_repair_violation_constraint_work * (double)problem.n_constraints; + if (update_best_bounds) { + estimate += bounds_repair_best_bounds_variable_work * (double)problem.n_variables; + } + return estimate; +} + +template +double estimate_bounds_repair_setup_work(const problem_t& problem) +{ + return bounds_repair_setup_base_work + + estimate_bounds_repair_violation_refresh_work(problem, true); +} + +template +double estimate_bounds_repair_shift_work(const problem_t& problem, + i_t curr_cstr, + i_t n_candidates, + bool is_cycle) +{ + const auto stream = problem.handle_ptr->get_stream(); + const i_t cstr_begin = problem.offsets.element(curr_cstr, stream); + const i_t cstr_end = problem.offsets.element(curr_cstr + 1, stream); + const double row_nnz = cstr_end - cstr_begin; + const double avg_rev_degree = + problem.n_variables > 0 ? ((double)problem.nnz / (double)problem.n_variables) : 0.0; + const double sort_work = + n_candidates > 1 ? (double)n_candidates * std::log2((double)n_candidates) : 0.0; + double estimate = bounds_repair_shift_base_work + bounds_repair_shift_row_entry_work * row_nnz; + if (n_candidates == 0) { estimate = bounds_repair_no_candidate_base_work + estimate; } + estimate += bounds_repair_shift_candidate_work * (double)n_candidates; + estimate += bounds_repair_shift_neighbor_entry_work * (double)n_candidates * avg_rev_degree; + estimate += bounds_repair_shift_sort_work * sort_work; + if (is_cycle) { estimate += bounds_repair_cycle_penalty_work; } + return estimate; +} + +template +double estimate_bounds_repair_damage_work(const problem_t& problem, i_t n_candidates) +{ + if (n_candidates == 0) { return 0.0; } + const double avg_rev_degree = + problem.n_variables > 0 ? ((double)problem.nnz / (double)problem.n_variables) : 0.0; + const double sort_work = + n_candidates > 1 ? (double)n_candidates * std::log2((double)n_candidates) : 0.0; + return bounds_repair_damage_base_work + + bounds_repair_damage_neighbor_entry_work * (double)n_candidates * avg_rev_degree + + bounds_repair_damage_sort_work * sort_work; +} + +template +void record_estimated_work(timer_t& timer, double* total_estimated_work, double work) +{ + cuopt_assert(std::isfinite(work) && work >= 0.0, "Bounds repair work estimate must be finite"); + timer.record_work(work); + *total_estimated_work += work; +} + +} // namespace + template bounds_repair_t::bounds_repair_t(const problem_t& pb, bound_presolve_t& bound_presolve_) @@ -30,7 +117,8 @@ bounds_repair_t::bounds_repair_t(const problem_t& pb, violated_cstr_map(0, pb.handle_ptr->get_stream()), total_vio(pb.handle_ptr->get_stream()), gen(cuopt::seed_generator::get_seed()), - cycle_vector(MAX_CYCLE_SEQUENCE, -1) + cycle_vector(MAX_CYCLE_SEQUENCE, -1), + timer(0.0, cuopt::termination_checker_t::root_tag_t{}) { } @@ -68,8 +156,7 @@ f_t bounds_repair_t::get_ii_violation(problem_t& problem) min_act = bound_presolve.upd.min_activity.data(), max_act = bound_presolve.upd.max_activity.data(), cstr_violations_up = cstr_violations_up.data(), - cstr_violations_down = cstr_violations_down.data(), - total_vio = total_vio.data()] __device__(i_t cstr_idx) { + cstr_violations_down = cstr_violations_down.data()] __device__(i_t cstr_idx) { f_t cnst_lb = pb_v.constraint_lower_bounds[cstr_idx]; f_t cnst_ub = pb_v.constraint_upper_bounds[cstr_idx]; f_t eps = get_cstr_tolerance( @@ -79,21 +166,31 @@ f_t bounds_repair_t::get_ii_violation(problem_t& problem) f_t violation = max(curr_cstr_violation_up, curr_cstr_violation_down); if (violation >= ROUNDOFF_TOLERANCE) { violated_cstr_map[cstr_idx] = 1; - atomicAdd(total_vio, violation); } else { violated_cstr_map[cstr_idx] = 0; } cstr_violations_up[cstr_idx] = curr_cstr_violation_up; cstr_violations_down[cstr_idx] = curr_cstr_violation_down; }); - auto iter = thrust::copy_if(handle_ptr->get_thrust_policy(), + auto iter = thrust::copy_if(handle_ptr->get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + problem.n_constraints, violated_cstr_map.data(), violated_constraints.data(), cuda::std::identity{}); - h_n_violated_cstr = iter - violated_constraints.data(); - f_t total_violation = total_vio.value(handle_ptr->get_stream()); + h_n_violated_cstr = iter - violated_constraints.data(); + // Use deterministic reduction instead of non-deterministic atomicAdd + f_t total_violation = thrust::transform_reduce( + handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + problem.n_constraints, + [cstr_violations_up = cstr_violations_up.data(), + cstr_violations_down = cstr_violations_down.data()] __device__(i_t cstr_idx) -> f_t { + auto violation = max(cstr_violations_up[cstr_idx], cstr_violations_down[cstr_idx]); + return violation >= ROUNDOFF_TOLERANCE ? violation : 0.; + }, + (f_t)0, + thrust::plus()); CUOPT_LOG_TRACE( "Repair: n_violated_cstr %d total_violation %f", h_n_violated_cstr, total_violation); return total_violation; @@ -190,7 +287,15 @@ i_t bounds_repair_t::compute_best_shift(problem_t& problem, } }); handle_ptr->sync_stream(); - return candidates.n_candidates.value(handle_ptr->get_stream()); + i_t n_candidates = candidates.n_candidates.value(handle_ptr->get_stream()); + + // Sort by variable index to ensure deterministic ordering + thrust::sort_by_key(handle_ptr->get_thrust_policy(), + candidates.variable_index.begin(), + candidates.variable_index.begin() + n_candidates, + candidates.bound_shift.begin()); + + return n_candidates; } template @@ -377,30 +482,88 @@ void bounds_repair_t::apply_move(problem_t& problem, template bool bounds_repair_t::repair_problem(problem_t& problem, problem_t& original_problem, - timer_t timer_, + work_limit_timer_t timer_, const raft::handle_t* handle_ptr_) { CUOPT_LOG_DEBUG("Running bounds repair"); handle_ptr = handle_ptr_; timer = timer_; + cuopt_assert(timer.deterministic == problem.deterministic, + "Bounds repair timer/problem determinism mismatch"); resize(problem); reset(); best_violation = get_ii_violation(problem); curr_violation = best_violation; best_bounds.update_from(problem, handle_ptr); + double total_estimated_work = 0.0; + i_t repair_iterations = 0; + if (timer.deterministic) { + const double setup_work = estimate_bounds_repair_setup_work(problem); + record_estimated_work(timer, &total_estimated_work, setup_work); + CUOPT_DETERMINISM_LOG( + "Repair entry: pb_hash=0x%x bounds_hash=0x%x violated_hash=0x%x n_violated=%d " + "best_violation=%.6f timer_rem=%.6f total_work=%.6f setup_work=%.6f", + problem.get_fingerprint(), + detail::compute_hash(make_span(problem.variable_bounds), handle_ptr->get_stream()), + detail::compute_hash(make_span(violated_constraints, 0, h_n_violated_cstr), + handle_ptr->get_stream()), + h_n_violated_cstr, + best_violation, + timer.remaining_time(), + total_estimated_work, + setup_work); + } i_t no_candidate_in_a_row = 0; - while (h_n_violated_cstr > 0) { + // TODO: do this better + i_t iter_limit = std::numeric_limits::max(); + if (timer.deterministic) { iter_limit = 20; } + while (h_n_violated_cstr > 0 && iter_limit-- > 0) { + repair_iterations++; CUOPT_LOG_TRACE("Bounds repair loop: n_violated %d best_violation %f curr_violation %f", h_n_violated_cstr, best_violation, curr_violation); + if (timer.deterministic) { + CUOPT_DETERMINISM_LOG( + "Repair iter entry: iter=%d pb_hash=0x%x bounds_hash=0x%x violated_hash=0x%x " + "n_violated=%d best_violation=%.6f curr_violation=%.6f timer_rem=%.6f total_work=%.6f", + repair_iterations, + problem.get_fingerprint(), + detail::compute_hash(make_span(problem.variable_bounds), handle_ptr->get_stream()), + detail::compute_hash(make_span(violated_constraints, 0, h_n_violated_cstr), + handle_ptr->get_stream()), + h_n_violated_cstr, + best_violation, + curr_violation, + timer.remaining_time(), + total_estimated_work); + } if (timer.check_time_limit()) { break; } i_t curr_cstr = get_random_cstr(); // best way would be to check a variable cycle, but this is easier and more performant bool is_cycle = detect_cycle(curr_cstr); if (is_cycle) { CUOPT_LOG_DEBUG("Repair: cycle detected at cstr %d", curr_cstr); } // in parallel compute the best shift and best respective damage - i_t n_candidates = compute_best_shift(problem, original_problem, curr_cstr); + i_t n_candidates = compute_best_shift(problem, original_problem, curr_cstr); + double shift_work = 0.0; + if (timer.deterministic) { + shift_work = estimate_bounds_repair_shift_work(problem, curr_cstr, n_candidates, is_cycle); + record_estimated_work(timer, &total_estimated_work, shift_work); + CUOPT_DETERMINISM_LOG( + "Repair iter shift: iter=%d curr_cstr=%d cycle=%d n_candidates=%d cand_var_hash=0x%x " + "cand_shift_hash=0x%x shift_work=%.6f timer_rem=%.6f total_work=%.6f", + repair_iterations, + curr_cstr, + (int)is_cycle, + n_candidates, + detail::compute_hash(make_span(candidates.variable_index, 0, n_candidates), + handle_ptr->get_stream()), + detail::compute_hash(make_span(candidates.bound_shift, 0, n_candidates), + handle_ptr->get_stream()), + shift_work, + timer.remaining_time(), + total_estimated_work); + } // if no candidate is there continue with another constraint if (n_candidates == 0) { CUOPT_LOG_DEBUG("Repair: no candidate var found for cstr %d", curr_cstr); @@ -415,12 +578,31 @@ bool bounds_repair_t::repair_problem(problem_t& problem, CUOPT_LOG_TRACE("Repair: number of candidates %d", n_candidates); // among the ones that have a valid shift value, compute the damage compute_damages(problem, n_candidates); + double damage_work = 0.0; + if (timer.deterministic) { + damage_work = estimate_bounds_repair_damage_work(problem, n_candidates); + record_estimated_work(timer, &total_estimated_work, damage_work); + CUOPT_DETERMINISM_LOG( + "Repair iter damage: iter=%d curr_cstr=%d cand_cdelta_hash=0x%x cand_damage_hash=0x%x " + "damage_work=%.6f timer_rem=%.6f total_work=%.6f", + repair_iterations, + curr_cstr, + detail::compute_hash(make_span(candidates.cstr_delta, 0, n_candidates), + handle_ptr->get_stream()), + detail::compute_hash(make_span(candidates.damage, 0, n_candidates), + handle_ptr->get_stream()), + damage_work, + timer.remaining_time(), + total_estimated_work); + } // get the best damage i_t best_cstr_delta = candidates.cstr_delta.front_element(handle_ptr->get_stream()); f_t best_damage = candidates.damage.front_element(handle_ptr->get_stream()); CUOPT_LOG_TRACE( "Repair: best_cstr_delta value %d best_damage %f", best_cstr_delta, best_damage); i_t best_move_idx; + i_t n_of_eligible_candidates = -1; + // if the best damage is positive and we are within the prop (paper uses 0.75) if ((best_cstr_delta > 0 && rand_double(0, 1, gen) < p) || is_cycle) { // pick a random move from the candidate list @@ -428,7 +610,7 @@ bool bounds_repair_t::repair_problem(problem_t& problem, } else { // filter the moves with best_damage(it can be zero or not) and then pick a candidate among // them - i_t n_of_eligible_candidates = + n_of_eligible_candidates = find_cutoff_index(candidates, best_cstr_delta, best_damage, n_candidates); cuopt_assert(n_of_eligible_candidates > 0, ""); CUOPT_LOG_TRACE("n_of_eligible_candidates %d", n_of_eligible_candidates); @@ -443,9 +625,38 @@ bool bounds_repair_t::repair_problem(problem_t& problem, apply_move(problem, original_problem, best_move_idx); reset(); // TODO we might optimize this to only calculate the changed constraints - curr_violation = get_ii_violation(problem); + curr_violation = get_ii_violation(problem); + const bool improved_violation = curr_violation < best_violation; + double refresh_work = 0.0; + if (timer.deterministic) { + refresh_work = bounds_repair_move_base_work + + estimate_bounds_repair_violation_refresh_work(problem, improved_violation); + record_estimated_work(timer, &total_estimated_work, refresh_work); + CUOPT_DETERMINISM_LOG( + "Repair iter post: iter=%d pb_hash=0x%x bounds_hash=0x%x violated_hash=0x%x " + "n_violated=%d curr_violation=%.6f improved=%d refresh_work=%.6f total_work=%.6f " + "timer_rem=%.6f", + repair_iterations, + problem.get_fingerprint(), + detail::compute_hash(make_span(problem.variable_bounds), handle_ptr->get_stream()), + detail::compute_hash(make_span(violated_constraints, 0, h_n_violated_cstr), + handle_ptr->get_stream()), + h_n_violated_cstr, + curr_violation, + (int)improved_violation, + refresh_work, + total_estimated_work, + timer.remaining_time()); + CUOPT_DETERMINISM_LOG( + "Repair iter work: cstr=%d candidates=%d cycle=%d improved=%d total=%.6f", + curr_cstr, + n_candidates, + (int)is_cycle, + (int)improved_violation, + total_estimated_work); + } - if (curr_violation < best_violation) { + if (improved_violation) { best_violation = curr_violation; // update best bounds best_bounds.update_from(problem, handle_ptr); diff --git a/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cuh b/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cuh index 29161c5d25..26bb84478e 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cuh +++ b/cpp/src/mip_heuristics/local_search/rounding/bounds_repair.cuh @@ -120,7 +120,7 @@ class bounds_repair_t { void compute_damages(problem_t& problem, i_t n_candidates); bool repair_problem(problem_t& problem, problem_t& original_problem, - timer_t timer_, + work_limit_timer_t timer_, const raft::handle_t* handle_ptr_); void apply_move(problem_t& problem, problem_t& original_problem, @@ -144,7 +144,7 @@ class bounds_repair_t { i_t h_n_violated_cstr; const raft::handle_t* handle_ptr; std::mt19937 gen; - timer_t timer{0.}; + work_limit_timer_t timer; std::vector cycle_vector; i_t cycle_write_pos = 0; }; diff --git a/cpp/src/mip_heuristics/local_search/rounding/constraint_prop.cu b/cpp/src/mip_heuristics/local_search/rounding/constraint_prop.cu index 8db4d7ae85..ed371676cc 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/constraint_prop.cu +++ b/cpp/src/mip_heuristics/local_search/rounding/constraint_prop.cu @@ -19,6 +19,8 @@ #include #include +#include + namespace cuopt::linear_programming::detail { template @@ -39,7 +41,8 @@ constraint_prop_t::constraint_prop_t(mip_solver_context_t& c ub_restore(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), assignment_restore(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), - rng(cuopt::seed_generator::get_seed(), 0, 0) + rng(cuopt::seed_generator::get_seed(), 0, 0), + max_timer(0.0, cuopt::termination_checker_t::root_tag_t{}) { } @@ -755,9 +758,11 @@ void constraint_prop_t::restore_original_bounds_on_unfixed( template bool constraint_prop_t::run_repair_procedure(problem_t& problem, problem_t& original_problem, - timer_t& timer, + work_limit_timer_t& timer, const raft::handle_t* handle_ptr) { + CUOPT_LOG_TRACE("Running repair procedure"); + // select the first probing value i_t select = 0; multi_probe.set_updated_bounds(problem, select, handle_ptr); @@ -765,9 +770,14 @@ bool constraint_prop_t::run_repair_procedure(problem_t& prob repair_stats.repair_attempts++; f_t repair_start_time = timer.remaining_time(); i_t n_of_repairs_needed_for_feasible = 0; + // TODO: do this better + i_t iter_limit = std::numeric_limits::max(); + if ((this->context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS)) { + iter_limit = 100; + } do { n_of_repairs_needed_for_feasible++; - if (timer.check_time_limit()) { + if (timer.check_time_limit() || iter_limit-- <= 0) { CUOPT_LOG_DEBUG("Time limit is reached in repair loop!"); f_t repair_end_time = timer.remaining_time(); repair_stats.total_time_spent_on_repair += repair_start_time - repair_end_time; @@ -841,7 +851,7 @@ bool constraint_prop_t::find_integer( solution_t& sol, solution_t& orig_sol, f_t lp_run_time_after_feasible, - timer_t& timer, + work_limit_timer_t& timer, std::optional>> probing_config) { using crit_t = termination_criterion_t; @@ -871,6 +881,7 @@ bool constraint_prop_t::find_integer( sol.problem_ptr->integer_indices.data(), sol.problem_ptr->n_integer_vars, sol.handle_ptr->get_stream()); + CUOPT_LOG_DEBUG("sol hash 0x%x", sol.get_hash()); } else { find_unset_integer_vars(sol, unset_integer_vars); sort_by_frac(sol, make_span(unset_integer_vars)); @@ -895,16 +906,17 @@ bool constraint_prop_t::find_integer( set_bounds_on_fixed_vars(sol); } - CUOPT_LOG_DEBUG("Bounds propagation rounding: unset vars %lu", unset_integer_vars.size()); + CUOPT_LOG_TRACE("Bounds propagation rounding: unset vars %lu", unset_integer_vars.size()); if (unset_integer_vars.size() == 0) { - CUOPT_LOG_DEBUG("No integer variables provided in the bounds prop rounding"); + CUOPT_LOG_TRACE("No integer variables provided in the bounds prop rounding"); expand_device_copy(orig_sol.assignment, sol.assignment, sol.handle_ptr->get_stream()); cuopt_func_call(orig_sol.test_variable_bounds()); return orig_sol.compute_feasibility(); } // this is needed for the sort inside of the loop bool problem_ii = is_problem_ii(*sol.problem_ptr); - // if the problem is ii, run the bounds prop in the beginning + CUOPT_LOG_TRACE("is problem ii %d", problem_ii); + // if the problem is ii, run the bounds prop in the beginning if (problem_ii) { bool bounds_repaired = bounds_repair.repair_problem(*sol.problem_ptr, *orig_sol.problem_ptr, timer, sol.handle_ptr); @@ -930,6 +942,8 @@ bool constraint_prop_t::find_integer( i_t n_failed_repair_iterations = 0; while (set_count < unset_integer_vars.size()) { CUOPT_LOG_TRACE("n_set_vars %d vars to set %lu", set_count, unset_integer_vars.size()); + CUOPT_LOG_TRACE("unset_integer_vars size %lu", unset_integer_vars.size()); + const size_t set_count_before = set_count; update_host_assignment(sol); if (max_timer.check_time_limit()) { CUOPT_LOG_DEBUG("Second time limit is reached returning nearest rounding!"); @@ -954,7 +968,8 @@ bool constraint_prop_t::find_integer( bounds_prop_interval = 1; } } - i_t n_vars_to_set = recovery_mode ? 1 : bounds_prop_interval; + i_t n_vars_to_set = recovery_mode ? 1 : bounds_prop_interval; + const bool did_sort = n_vars_to_set != 1; // if we are not at the last stage or if we are in recovery mode, don't sort if (n_vars_to_set != 1) { sort_by_implied_slack_consumption( @@ -969,9 +984,14 @@ bool constraint_prop_t::find_integer( generate_bulk_rounding_vector(sol, orig_sol, host_vars_to_set, probing_config); probe( sol, orig_sol.problem_ptr, var_probe_vals, &set_count, unset_integer_vars, probing_config); + [[maybe_unused]] bool repair_attempted = false; + bool bounds_repaired = false; + i_t n_fixed_vars = 0; if (!(n_failed_repair_iterations >= max_n_failed_repair_iterations) && rounding_ii && !timeout_happened) { - timer_t repair_timer{std::min(timer.remaining_time() / 5, timer.elapsed_time() / 3)}; + // timer_t repair_timer{std::min(timer.remaining_time() / 5, timer.elapsed_time() / 3)}; + work_limit_timer_t repair_timer( + context.gpu_heur_loop, timer.remaining_time() / 5, *context.termination); save_bounds(sol); // update bounds and run repair procedure bool bounds_repaired = @@ -998,7 +1018,7 @@ bool constraint_prop_t::find_integer( make_span(sol.problem_ptr->variable_bounds), make_span(orig_sol.problem_ptr->variable_bounds), make_span(sol.assignment)}); - i_t n_fixed_vars = (iter - (unset_vars.begin() + set_count)); + n_fixed_vars = (iter - (unset_vars.begin() + set_count)); CUOPT_LOG_TRACE("After repair procedure, number of additional fixed vars %d", n_fixed_vars); set_count += n_fixed_vars; } @@ -1026,7 +1046,7 @@ bool constraint_prop_t::find_integer( // which is the unchanged problem bounds multi_probe.update_host_bounds(sol.handle_ptr, make_span(sol.problem_ptr->variable_bounds)); } - CUOPT_LOG_DEBUG( + CUOPT_LOG_TRACE( "Bounds propagation rounding end: ii constraint count first buffer %d, second buffer %d", multi_probe.infeas_constraints_count_0, multi_probe.infeas_constraints_count_1); @@ -1038,7 +1058,12 @@ bool constraint_prop_t::find_integer( multi_probe.infeas_constraints_count_1 == 0) && !timeout_happened && lp_run_time_after_feasible > 0) { relaxed_lp_settings_t lp_settings; - lp_settings.time_limit = lp_run_time_after_feasible; + lp_settings.time_limit = lp_run_time_after_feasible; + if (timer.deterministic) { + lp_settings.work_limit = lp_settings.time_limit; + lp_settings.work_context = timer.work_context; + cuopt_assert(lp_settings.work_context != nullptr, "Missing deterministic work context"); + } lp_settings.tolerance = orig_sol.problem_ptr->tolerances.absolute_tolerance; lp_settings.save_state = false; lp_settings.return_first_feasible = true; @@ -1057,11 +1082,14 @@ template bool constraint_prop_t::apply_round( solution_t& sol, f_t lp_run_time_after_feasible, - timer_t& timer, + work_limit_timer_t& timer, std::optional>> probing_config) { raft::common::nvtx::range fun_scope("constraint prop round"); - max_timer = timer_t{max_time_for_bounds_prop}; + + sol.compute_feasibility(); + max_timer = + work_limit_timer_t{context.gpu_heur_loop, max_time_for_bounds_prop, *context.termination}; if (check_brute_force_rounding(sol)) { return true; } recovery_mode = false; rounding_ii = false; @@ -1076,9 +1104,9 @@ bool constraint_prop_t::apply_round( f_t bounds_prop_end_time = max_timer.remaining_time(); repair_stats.total_time_spent_on_bounds_prop += bounds_prop_start_time - bounds_prop_end_time; - CUOPT_LOG_DEBUG( - "repair_success %lu repair_attempts %lu intermediate_repair_success %lu total_repair_loops %lu " - "total_time_spent_on_repair %f total_time_spent_bounds_prop_after_repair %f " + CUOPT_LOG_TRACE( + "repair_success %lu repair_attempts %lu intermediate_repair_success %lu total_repair_loops" + "%lu total_time_spent_on_repair %f total_time_spent_bounds_prop_after_repair %f " "total_time_spent_on_bounds_prop %f", repair_stats.repair_success, repair_stats.repair_attempts, diff --git a/cpp/src/mip_heuristics/local_search/rounding/constraint_prop.cuh b/cpp/src/mip_heuristics/local_search/rounding/constraint_prop.cuh index 2c609228e8..7ad4253cc4 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/constraint_prop.cuh +++ b/cpp/src/mip_heuristics/local_search/rounding/constraint_prop.cuh @@ -43,7 +43,7 @@ struct constraint_prop_t { constraint_prop_t(mip_solver_context_t& context); bool apply_round(solution_t& sol, f_t lp_run_time_after_feasible, - timer_t& timer, + work_limit_timer_t& timer, std::optional>> probing_config = std::nullopt); void sort_by_implied_slack_consumption(solution_t& sol, @@ -56,7 +56,7 @@ struct constraint_prop_t { bool find_integer(solution_t& sol, solution_t& orig_sol, f_t lp_run_time_after_feasible, - timer_t& timer, + work_limit_timer_t& timer, std::optional>> probing_config = std::nullopt); void find_set_integer_vars(solution_t& sol, rmm::device_uvector& set_vars); @@ -121,7 +121,7 @@ struct constraint_prop_t { const raft::handle_t* handle_ptr); bool run_repair_procedure(problem_t& problem, problem_t& original_problem, - timer_t& timer, + work_limit_timer_t& timer, const raft::handle_t* handle_ptr); bool handle_fixed_vars( solution_t& sol, @@ -149,7 +149,7 @@ struct constraint_prop_t { i_t bounds_prop_interval = 1; i_t n_iter_in_recovery = 0; i_t max_n_failed_repair_iterations = 1; - timer_t max_timer{0.}; + work_limit_timer_t max_timer; bool use_probing_cache = true; static repair_stats_t repair_stats; bool single_rounding_only = false; diff --git a/cpp/src/mip_heuristics/local_search/rounding/lb_bounds_repair.cu b/cpp/src/mip_heuristics/local_search/rounding/lb_bounds_repair.cu index 7d074aea5e..b3d53f43b2 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/lb_bounds_repair.cu +++ b/cpp/src/mip_heuristics/local_search/rounding/lb_bounds_repair.cu @@ -26,7 +26,8 @@ lb_bounds_repair_t::lb_bounds_repair_t(const raft::handle_t* handle_pt violated_cstr_map(0, handle_ptr->get_stream()), total_vio(handle_ptr->get_stream()), gen(cuopt::seed_generator::get_seed()), - cycle_vector(MAX_CYCLE_SEQUENCE, -1) + cycle_vector(MAX_CYCLE_SEQUENCE, -1), + timer(0.0, cuopt::termination_checker_t::root_tag_t{}) { } @@ -68,8 +69,7 @@ std::tuple lb_bounds_repair_t::get_ii_violation( constraint_upper_bounds = problem.constraint_upper_bounds, cnst_slack = make_span_2(lb_bound_presolve.cnst_slack), cstr_violations_up = cstr_violations_up.data(), - cstr_violations_down = cstr_violations_down.data(), - total_vio = total_vio.data()] __device__(i_t cstr_idx) { + cstr_violations_down = cstr_violations_down.data()] __device__(i_t cstr_idx) { f_t cnst_lb = constraint_lower_bounds[cstr_idx]; f_t cnst_ub = constraint_upper_bounds[cstr_idx]; f_t2 slack = cnst_slack[cstr_idx]; @@ -80,7 +80,6 @@ std::tuple lb_bounds_repair_t::get_ii_violation( f_t violation = max(curr_cstr_violation_up, curr_cstr_violation_down); if (violation >= ROUNDOFF_TOLERANCE) { violated_cstr_map[cstr_idx] = 1; - atomicAdd(total_vio, violation); } else { violated_cstr_map[cstr_idx] = 0; } @@ -94,7 +93,18 @@ std::tuple lb_bounds_repair_t::get_ii_violation( violated_constraints.data(), cuda::std::identity{}); i_t n_violated_cstr = iter - violated_constraints.data(); - f_t total_violation = total_vio.value(handle_ptr->get_stream()); + // Use deterministic reduction instead of non-deterministic atomicAdd + f_t total_violation = thrust::transform_reduce( + handle_ptr->get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(0) + problem.n_constraints, + [cstr_violations_up = cstr_violations_up.data(), + cstr_violations_down = cstr_violations_down.data()] __device__(i_t cstr_idx) -> f_t { + auto violation = max(cstr_violations_up[cstr_idx], cstr_violations_down[cstr_idx]); + return violation >= ROUNDOFF_TOLERANCE ? violation : 0.; + }, + (f_t)0, + thrust::plus()); CUOPT_LOG_TRACE( "Repair: n_violated_cstr %d total_violation %f", n_violated_cstr, total_violation); return std::make_tuple(total_violation, n_violated_cstr); @@ -400,7 +410,8 @@ bool lb_bounds_repair_t::repair_problem( timer_t timer_, const raft::handle_t* handle_ptr_) { - CUOPT_LOG_DEBUG("Running bounds repair"); + nvtx::range fun_scope("LB repair_problem"); + CUOPT_LOG_DEBUG("LB Running bounds repair"); handle_ptr = handle_ptr_; timer = timer_; resize(*problem); diff --git a/cpp/src/mip_heuristics/local_search/rounding/lb_bounds_repair.cuh b/cpp/src/mip_heuristics/local_search/rounding/lb_bounds_repair.cuh index 0b549c684d..3c4e4cf404 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/lb_bounds_repair.cuh +++ b/cpp/src/mip_heuristics/local_search/rounding/lb_bounds_repair.cuh @@ -58,7 +58,7 @@ class lb_bounds_repair_t { bool repair_problem(load_balanced_problem_t* problem, load_balanced_bounds_presolve_t& lb_bound_presolve, problem_t& original_problem, - timer_t timer_, + work_limit_timer_t timer_, const raft::handle_t* handle_ptr_); void apply_move(load_balanced_problem_t* problem, problem_t& original_problem, @@ -82,7 +82,7 @@ class lb_bounds_repair_t { i_t h_n_violated_cstr; const raft::handle_t* handle_ptr; std::mt19937 gen; - timer_t timer{0.}; + work_limit_timer_t timer; std::vector cycle_vector; i_t cycle_write_pos = 0; }; diff --git a/cpp/src/mip_heuristics/local_search/rounding/lb_constraint_prop.cu b/cpp/src/mip_heuristics/local_search/rounding/lb_constraint_prop.cu index bb72834ab4..d8e3bcc040 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/lb_constraint_prop.cu +++ b/cpp/src/mip_heuristics/local_search/rounding/lb_constraint_prop.cu @@ -33,7 +33,8 @@ lb_constraint_prop_t::lb_constraint_prop_t(mip_solver_context_thandle_ptr->get_stream()), assignment_restore(context.problem_ptr->n_variables, context.problem_ptr->handle_ptr->get_stream()), - rng(cuopt::seed_generator::get_seed(), 0, 0) + rng(cuopt::seed_generator::get_seed(), 0, 0), + max_timer(0.0, cuopt::termination_checker_t::root_tag_t{}) { } @@ -700,14 +701,15 @@ template bool lb_constraint_prop_t::apply_round( solution_t& sol, f_t lp_run_time_after_feasible, - timer_t& timer, + work_limit_timer_t& timer, std::optional>> probing_candidates) { raft::common::nvtx::range fun_scope("constraint prop round"); // this is second timer that can continue but without recovery mode const f_t max_time_for_bounds_prop = 5.; - max_timer = timer_t{max_time_for_bounds_prop}; + max_timer = + work_limit_timer_t{context.gpu_heur_loop, max_time_for_bounds_prop, *context.termination}; if (check_brute_force_rounding(sol)) { return true; } recovery_mode = false; rounding_ii = false; diff --git a/cpp/src/mip_heuristics/local_search/rounding/lb_constraint_prop.cuh b/cpp/src/mip_heuristics/local_search/rounding/lb_constraint_prop.cuh index 20e28e7cb9..6fb88467ab 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/lb_constraint_prop.cuh +++ b/cpp/src/mip_heuristics/local_search/rounding/lb_constraint_prop.cuh @@ -23,7 +23,7 @@ struct lb_constraint_prop_t { bool apply_round( solution_t& sol, f_t lp_run_time_after_feasible, - timer_t& timer, + work_limit_timer_t& timer, std::optional>> probing_candidates = std::nullopt); void sort_by_implied_slack_consumption( problem_t& original_problem, @@ -40,7 +40,7 @@ struct lb_constraint_prop_t { load_balanced_bounds_presolve_t& lb_bounds_update, solution_t& orig_sol, f_t lp_run_time_after_feasible, - timer_t& timer, + work_limit_timer_t& timer, std::optional>> probing_candidates); std::tuple probing_values( load_balanced_bounds_presolve_t& lb_bounds_update, @@ -83,7 +83,7 @@ struct lb_constraint_prop_t { bool run_repair_procedure(load_balanced_problem_t* problem, load_balanced_bounds_presolve_t& lb_bounds_update, problem_t& original_problem, - timer_t& timer, + work_limit_timer_t& timer, const raft::handle_t* handle_ptr); mip_solver_context_t& context; @@ -100,7 +100,7 @@ struct lb_constraint_prop_t { bool rounding_ii = false; i_t bounds_prop_interval = 1; i_t n_iter_in_recovery = 0; - timer_t max_timer{0.}; + work_limit_timer_t max_timer; bool use_probing_cache = true; size_t repair_attempts = 0; diff --git a/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu b/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu index c9a6dd0eda..4f3a015a6c 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu +++ b/cpp/src/mip_heuristics/local_search/rounding/simple_rounding.cu @@ -179,7 +179,7 @@ void invoke_correct_integers(solution_t& solution, f_t tol) template void invoke_correct_integers(solution_t & solution, \ F_TYPE tol); -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/mip_heuristics/local_search/rounding/simple_rounding_kernels.cuh b/cpp/src/mip_heuristics/local_search/rounding/simple_rounding_kernels.cuh index 2edca8fb08..a0b8468ea7 100644 --- a/cpp/src/mip_heuristics/local_search/rounding/simple_rounding_kernels.cuh +++ b/cpp/src/mip_heuristics/local_search/rounding/simple_rounding_kernels.cuh @@ -131,7 +131,7 @@ __global__ void brute_force_check_kernel(typename solution_t::view_t s __shared__ i_t shbuf[raft::WarpSize]; i_t total_feasible = raft::blockReduce(th_feasible_count, (char*)shbuf); if (threadIdx.x == 0) { - if (total_feasible == solution.problem.n_constraints) { atomicExch(best_config, config); } + if (total_feasible == solution.problem.n_constraints) { atomicMin(best_config, config); } } } diff --git a/cpp/src/mip_heuristics/mip_constants.hpp b/cpp/src/mip_heuristics/mip_constants.hpp index 66f5ebd273..47d3d22de4 100644 --- a/cpp/src/mip_heuristics/mip_constants.hpp +++ b/cpp/src/mip_heuristics/mip_constants.hpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -11,3 +11,5 @@ #define MIP_INSTANTIATE_FLOAT CUOPT_INSTANTIATE_FLOAT #define MIP_INSTANTIATE_DOUBLE CUOPT_INSTANTIATE_DOUBLE + +#define PDLP_INSTANTIATE_FLOAT 1 diff --git a/cpp/src/mip_heuristics/presolve/bounds_presolve.cu b/cpp/src/mip_heuristics/presolve/bounds_presolve.cu index d78f8beb16..de6ae8c51a 100644 --- a/cpp/src/mip_heuristics/presolve/bounds_presolve.cu +++ b/cpp/src/mip_heuristics/presolve/bounds_presolve.cu @@ -171,6 +171,12 @@ termination_criterion_t bound_presolve_t::bound_update_loop(problem_t< { termination_criterion_t criteria = termination_criterion_t::ITERATION_LIMIT; + // CHANGE once we have a work predictor + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS)) { + timer = timer_t(std::numeric_limits::infinity()); + settings.iteration_limit = std::min(settings.iteration_limit, 50); + } + i_t iter; upd.init_changed_constraints(pb.handle_ptr); for (iter = 0; iter < settings.iteration_limit; ++iter) { diff --git a/cpp/src/mip_heuristics/presolve/bounds_update_data.cu b/cpp/src/mip_heuristics/presolve/bounds_update_data.cu index 487549aa4a..29eab4e69c 100644 --- a/cpp/src/mip_heuristics/presolve/bounds_update_data.cu +++ b/cpp/src/mip_heuristics/presolve/bounds_update_data.cu @@ -35,6 +35,35 @@ void bounds_update_data_t::resize(problem_t& problem) changed_constraints.resize(problem.n_constraints, problem.handle_ptr->get_stream()); next_changed_constraints.resize(problem.n_constraints, problem.handle_ptr->get_stream()); changed_variables.resize(problem.n_variables, problem.handle_ptr->get_stream()); + + thrust::fill(problem.handle_ptr->get_thrust_policy(), + min_activity.begin(), + min_activity.end(), + std::numeric_limits::signaling_NaN()); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + max_activity.begin(), + max_activity.end(), + std::numeric_limits::signaling_NaN()); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + lb.begin(), + lb.end(), + std::numeric_limits::signaling_NaN()); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + ub.begin(), + ub.end(), + std::numeric_limits::signaling_NaN()); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + changed_constraints.begin(), + changed_constraints.end(), + -1); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + next_changed_constraints.begin(), + next_changed_constraints.end(), + -1); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + changed_variables.begin(), + changed_variables.end(), + -1); } template diff --git a/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cu b/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cu new file mode 100644 index 0000000000..70855267df --- /dev/null +++ b/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cu @@ -0,0 +1,1152 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define DEBUG_KNAPSACK_CONSTRAINTS 0 + +#include "clique_table.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +// do constraints with only binary variables. +template +void find_cliques_from_constraint(const knapsack_constraint_t& kc, + clique_table_t& clique_table, + cuopt::timer_t& timer) +{ + i_t size = kc.entries.size(); + cuopt_assert(size > 1, "Constraint has not enough variables"); + if (kc.entries[size - 1].val + kc.entries[size - 2].val <= kc.rhs) { return; } + + std::vector clique; + i_t k = size - 1; + // find the first clique, which is the largest + // FIXME: do binary search + // require k >= 1 so kc.entries[k-1] is always valid + while (k >= 1 && kc.entries[k].val + kc.entries[k - 1].val > kc.rhs) { + k--; + } + for (i_t idx = k; idx < size; idx++) { + clique.push_back(kc.entries[idx].col); + } + clique_table.first.push_back(clique); + const i_t original_clique_start_idx = k; + // find the additional cliques + k--; + while (k >= 0) { + if (timer.check_time_limit()) { return; } + f_t curr_val = kc.entries[k].val; + i_t curr_col = kc.entries[k].col; + // do a binary search in the clique coefficients to find f, such that coeff_k + coeff_f > rhs + // this means that we get a subset of the original clique and extend it with a variable + f_t val_to_find = kc.rhs - curr_val + clique_table.tolerances.absolute_tolerance; + auto it = std::lower_bound( + kc.entries.begin() + original_clique_start_idx, kc.entries.end(), val_to_find); + if (it != kc.entries.end()) { + i_t position_on_knapsack_constraint = std::distance(kc.entries.begin(), it); + i_t start_pos_on_clique = position_on_knapsack_constraint - original_clique_start_idx; + cuopt_assert(start_pos_on_clique >= 1, "Start position on clique is negative"); + cuopt_assert(it->val + curr_val > kc.rhs, "RHS mismatch"); +#if DEBUG_KNAPSACK_CONSTRAINTS + CUOPT_LOG_DEBUG("Found additional clique: %d, %d, %d", + curr_col, + clique_table.first.size() - 1, + start_pos_on_clique); +#endif + clique_table.addtl_cliques.push_back( + {curr_col, (i_t)clique_table.first.size() - 1, start_pos_on_clique}); + } else { + break; + } + k--; + } +} + +// sort CSR by constraint coefficients +template +void sort_csr_by_constraint_coefficients( + std::vector>& knapsack_constraints) +{ + // sort the rows of the CSR matrix by the coefficients of the constraint + for (auto& knapsack_constraint : knapsack_constraints) { + std::sort(knapsack_constraint.entries.begin(), knapsack_constraint.entries.end()); + } +} + +template +void make_coeff_positive_knapsack_constraint( + const dual_simplex::user_problem_t& problem, + std::vector>& knapsack_constraints, + std::unordered_set& set_packing_constraints, + typename mip_solver_settings_t::tolerances_t tolerances) +{ + for (i_t i = 0; i < (i_t)knapsack_constraints.size(); i++) { + auto& knapsack_constraint = knapsack_constraints[i]; + f_t rhs_offset = 0; + bool all_coeff_are_equal = true; + f_t first_coeff = std::abs(knapsack_constraint.entries[0].val); + for (auto& entry : knapsack_constraint.entries) { + if (entry.val < 0) { + entry.val = -entry.val; + rhs_offset += entry.val; + // negation of a variable is var + num_cols + entry.col = entry.col + problem.num_cols; + } + if (!integer_equal(entry.val, first_coeff, tolerances.absolute_tolerance)) { + all_coeff_are_equal = false; + } + } + knapsack_constraint.rhs += rhs_offset; + if (!integer_equal(knapsack_constraint.rhs, first_coeff, tolerances.absolute_tolerance)) { + all_coeff_are_equal = false; + } + knapsack_constraint.is_set_packing = all_coeff_are_equal; + if (!all_coeff_are_equal) { knapsack_constraint.is_set_partitioning = false; } + if (knapsack_constraint.is_set_packing) { set_packing_constraints.insert(i); } + cuopt_assert(knapsack_constraint.rhs >= 0, "RHS must be non-negative"); + } +} + +// convert all the knapsack constraints +// if a binary variable has a negative coefficient, put its negation in the constraint +template +void fill_knapsack_constraints(const dual_simplex::user_problem_t& problem, + std::vector>& knapsack_constraints, + dual_simplex::csr_matrix_t& A) +{ + // we might add additional constraints for the equality constraints + i_t added_constraints = 0; + // in user problems, ranged constraint ids monotonically increase. + // when a row sense is "E", check if it is ranged constraint and treat accordingly + i_t ranged_constraint_counter = 0; + for (i_t i = 0; i < A.m; i++) { + std::pair constraint_range = A.get_constraint_range(i); + if (constraint_range.second - constraint_range.first < 2) { + CUOPT_LOG_DEBUG("Constraint %d has less than 2 variables, skipping", i); + continue; + } + bool all_binary = true; + // check if all variables are binary (any non-continuous with bounds [0,1]) + for (i_t j = constraint_range.first; j < constraint_range.second; j++) { + if (problem.var_types[A.j[j]] == dual_simplex::variable_type_t::CONTINUOUS || + problem.lower[A.j[j]] != 0 || problem.upper[A.j[j]] != 1) { + all_binary = false; + break; + } + } + // if all variables are binary, convert the constraint to a knapsack constraint + if (!all_binary) { continue; } + knapsack_constraint_t knapsack_constraint; + + knapsack_constraint.cstr_idx = i; + if (problem.row_sense[i] == 'L') { + knapsack_constraint.rhs = problem.rhs[i]; + for (i_t j = constraint_range.first; j < constraint_range.second; j++) { + knapsack_constraint.entries.push_back({A.j[j], A.x[j]}); + } + } else if (problem.row_sense[i] == 'G') { + knapsack_constraint.rhs = -problem.rhs[i]; + for (i_t j = constraint_range.first; j < constraint_range.second; j++) { + knapsack_constraint.entries.push_back({A.j[j], -A.x[j]}); + } + } + // equality part + else { + // For equality rows, partitioning status should not depend on raw rhs scale here. + // The exact set-packing/partitioning check is finalized later in + // make_coeff_positive_knapsack_constraint after coefficient normalization. + bool is_set_partitioning = true; + bool ranged_constraint = ranged_constraint_counter < problem.num_range_rows && + problem.range_rows[ranged_constraint_counter] == i; + // less than part + knapsack_constraint.rhs = problem.rhs[i]; + if (ranged_constraint) { + knapsack_constraint.rhs += problem.range_value[ranged_constraint_counter]; + is_set_partitioning = problem.range_value[ranged_constraint_counter] == 0.; + ranged_constraint_counter++; + } + for (i_t j = constraint_range.first; j < constraint_range.second; j++) { + knapsack_constraint.entries.push_back({A.j[j], A.x[j]}); + } + // greater than part: convert it to less than + knapsack_constraint_t knapsack_constraint2; + // Mark synthetic rows from equality splitting with negative ids so they never alias real row + // indices (including rows appended later by clique extension). + knapsack_constraint2.cstr_idx = -(added_constraints + 1); + added_constraints++; + knapsack_constraint2.rhs = -problem.rhs[i]; + for (i_t j = constraint_range.first; j < constraint_range.second; j++) { + knapsack_constraint2.entries.push_back({A.j[j], -A.x[j]}); + } + knapsack_constraint.is_set_partitioning = is_set_partitioning; + knapsack_constraint2.is_set_partitioning = is_set_partitioning; + knapsack_constraints.push_back(knapsack_constraint2); + } + knapsack_constraints.push_back(knapsack_constraint); + } + CUOPT_LOG_DEBUG("Number of knapsack constraints: %d added %d constraints", + knapsack_constraints.size(), + added_constraints); +} + +template +void remove_small_cliques(clique_table_t& clique_table, cuopt::timer_t& timer) +{ + i_t num_removed_first = 0; + i_t num_removed_addtl = 0; + std::vector to_delete(clique_table.first.size(), false); + // if a clique is small, we remove it from the cliques and add it to adjlist + for (size_t clique_idx = 0; clique_idx < clique_table.first.size(); clique_idx++) { + if (timer.check_time_limit()) { return; } + const auto& clique = clique_table.first[clique_idx]; + if (clique.size() <= (size_t)clique_table.min_clique_size) { + for (size_t i = 0; i < clique.size(); i++) { + for (size_t j = 0; j < clique.size(); j++) { + if (i == j) { continue; } + clique_table.adj_list_small_cliques[clique[i]].insert(clique[j]); + } + } + num_removed_first++; + to_delete[clique_idx] = true; + } + } + for (size_t addtl_c = 0; addtl_c < clique_table.addtl_cliques.size(); addtl_c++) { + const auto& addtl_clique = clique_table.addtl_cliques[addtl_c]; + const auto base_clique_idx = static_cast(addtl_clique.clique_idx); + cuopt_assert(base_clique_idx < to_delete.size(), + "Additional clique points to invalid base clique index"); + // Remove additional cliques whose base clique is scheduled for deletion. + if (to_delete[base_clique_idx]) { + // Materialize conflicts represented by: + // addtl_clique.vertex_idx + first[base_clique_idx][start_pos_on_clique:] + // before deleting both the additional and base clique entries. + for (size_t i = addtl_clique.start_pos_on_clique; + i < clique_table.first[base_clique_idx].size(); + i++) { + clique_table.adj_list_small_cliques[clique_table.first[base_clique_idx][i]].insert( + addtl_clique.vertex_idx); + clique_table.adj_list_small_cliques[addtl_clique.vertex_idx].insert( + clique_table.first[base_clique_idx][i]); + } + clique_table.addtl_cliques.erase(clique_table.addtl_cliques.begin() + addtl_c); + addtl_c--; + num_removed_addtl++; + continue; + } + i_t size_of_clique = + clique_table.first[base_clique_idx].size() - addtl_clique.start_pos_on_clique + 1; + if (size_of_clique < clique_table.min_clique_size) { + // the items from first clique are already added to the adjlist + // only add the items that are coming from the new var in the additional clique + for (size_t i = addtl_clique.start_pos_on_clique; + i < clique_table.first[base_clique_idx].size(); + i++) { + // insert conflicts both way + clique_table.adj_list_small_cliques[clique_table.first[base_clique_idx][i]].insert( + addtl_clique.vertex_idx); + clique_table.adj_list_small_cliques[addtl_clique.vertex_idx].insert( + clique_table.first[base_clique_idx][i]); + } + clique_table.addtl_cliques.erase(clique_table.addtl_cliques.begin() + addtl_c); + addtl_c--; + num_removed_addtl++; + } + } + CUOPT_LOG_DEBUG("Number of removed cliques from first: %d, additional: %d", + num_removed_first, + num_removed_addtl); + size_t i = 0; + size_t old_idx = 0; + std::vector index_mapping(clique_table.first.size(), -1); + auto it = std::remove_if(clique_table.first.begin(), clique_table.first.end(), [&](auto& clique) { + bool res = false; + if (to_delete[old_idx]) { + res = true; + } else { + index_mapping[old_idx] = i++; + } + old_idx++; + return res; + }); + clique_table.first.erase(it, clique_table.first.end()); + // renumber the reference indices in the additional cliques, since we removed some cliques + for (size_t addtl_c = 0; addtl_c < clique_table.addtl_cliques.size(); addtl_c++) { + i_t new_clique_idx = index_mapping[clique_table.addtl_cliques[addtl_c].clique_idx]; + cuopt_assert(new_clique_idx != -1, "New clique index is -1"); + clique_table.addtl_cliques[addtl_c].clique_idx = new_clique_idx; + cuopt_assert(clique_table.first[new_clique_idx].size() - + clique_table.addtl_cliques[addtl_c].start_pos_on_clique + 1 >= + (size_t)clique_table.min_clique_size, + "A small clique remained after removing small cliques"); + } + // Clique removals/edge materialization can change degrees; force recompute on next query. + std::fill(clique_table.var_degrees.begin(), clique_table.var_degrees.end(), -1); +} + +template +std::unordered_set clique_table_t::get_adj_set_of_var(i_t var_idx) +{ + std::unordered_set adj_set; + for (const auto& clique_idx : var_clique_map_first[var_idx]) { + adj_set.insert(first[clique_idx].begin(), first[clique_idx].end()); + } + + for (const auto& addtl_clique_idx : var_clique_map_addtl[var_idx]) { + adj_set.insert(addtl_cliques[addtl_clique_idx].vertex_idx); + adj_set.insert(first[addtl_cliques[addtl_clique_idx].clique_idx].begin() + + addtl_cliques[addtl_clique_idx].start_pos_on_clique, + first[addtl_cliques[addtl_clique_idx].clique_idx].end()); + } + // Reverse lookup for additional cliques using position map: + // if var_idx is in first[clique_idx][start_pos_on_clique:], it is adjacent to vertex_idx. + for (const auto& addtl : addtl_cliques) { + if (addtl.vertex_idx == var_idx) { continue; } + if (static_cast(addtl.clique_idx) < first_var_positions.size()) { + const auto& pos_map = first_var_positions[addtl.clique_idx]; + auto it = pos_map.find(var_idx); + if (it != pos_map.end() && it->second >= addtl.start_pos_on_clique) { + adj_set.insert(addtl.vertex_idx); + } + } + } + + for (const auto& adj_vertex : adj_list_small_cliques[var_idx]) { + adj_set.insert(adj_vertex); + } + // Add the complement of var_idx to the adjacency set + i_t complement_idx = (var_idx >= n_variables) ? (var_idx - n_variables) : (var_idx + n_variables); + adj_set.insert(complement_idx); + adj_set.erase(var_idx); + return adj_set; +} + +template +i_t clique_table_t::get_degree_of_var(i_t var_idx) +{ + // if it is not already computed, compute it and return + if (var_degrees[var_idx] == -1) { var_degrees[var_idx] = get_adj_set_of_var(var_idx).size(); } + return var_degrees[var_idx]; +} + +template +bool clique_table_t::check_adjacency(i_t var_idx1, i_t var_idx2) +{ + if (var_idx1 == var_idx2) { return false; } + if (var_idx1 % n_variables == var_idx2 % n_variables) { return true; } + + { + auto it = adj_list_small_cliques.find(var_idx1); + if (it != adj_list_small_cliques.end() && it->second.count(var_idx2) > 0) { return true; } + } + + // Iterate whichever variable belongs to fewer first-cliques + { + i_t probe_var = var_idx1; + i_t target_var = var_idx2; + if (var_clique_map_first[var_idx1].size() > var_clique_map_first[var_idx2].size()) { + probe_var = var_idx2; + target_var = var_idx1; + } + for (const auto& clique_idx : var_clique_map_first[probe_var]) { + if (first_var_positions[clique_idx].count(target_var) > 0) { return true; } + } + } + + for (const auto& addtl_idx : var_clique_map_addtl[var_idx1]) { + const auto& addtl = addtl_cliques[addtl_idx]; + const auto& pos_map = first_var_positions[addtl.clique_idx]; + auto it = pos_map.find(var_idx2); + if (it != pos_map.end() && it->second >= addtl.start_pos_on_clique) { return true; } + } + + for (const auto& addtl_idx : var_clique_map_addtl[var_idx2]) { + const auto& addtl = addtl_cliques[addtl_idx]; + const auto& pos_map = first_var_positions[addtl.clique_idx]; + auto it = pos_map.find(var_idx1); + if (it != pos_map.end() && it->second >= addtl.start_pos_on_clique) { return true; } + } + + return false; +} + +// this function should only be called within extend clique +// if this is called outside extend clique, csr matrix should be converted into csc and copied into +// problem because the problem is partly modified +template +void insert_clique_into_problem(const std::vector& clique, + dual_simplex::user_problem_t& problem, + dual_simplex::csr_matrix_t& A, + f_t coeff_scale) +{ + // convert vertices into original vars + f_t rhs_offset = 0.; + std::vector new_vars; + std::vector new_coeffs; + for (size_t i = 0; i < clique.size(); i++) { + f_t coeff = coeff_scale; + i_t var_idx = clique[i]; + if (var_idx >= problem.num_cols) { + coeff = -coeff_scale; + var_idx = var_idx - problem.num_cols; + rhs_offset += coeff_scale; + } + new_vars.push_back(var_idx); + new_coeffs.push_back(coeff); + } + // coeff_scale * (1 - x) = coeff_scale - coeff_scale * x + // Move constants to the right, so rhs must decrease by rhs_offset. + f_t rhs = coeff_scale - rhs_offset; + // insert the new clique into the problem as a new constraint + dual_simplex::sparse_vector_t new_row(A.n, new_vars.size()); + new_row.i = std::move(new_vars); + new_row.x = std::move(new_coeffs); + A.append_row(new_row); + problem.row_sense.push_back('L'); + problem.rhs.push_back(rhs); + problem.row_names.push_back("Clique" + std::to_string(problem.row_names.size())); +} + +template +bool extend_clique(const std::vector& clique, + clique_table_t& clique_table, + dual_simplex::user_problem_t& problem, + dual_simplex::csr_matrix_t& A, + f_t coeff_scale, + bool modify_problem, + i_t min_extension_gain, + i_t remaining_rows_budget, + i_t remaining_nnz_budget, + i_t& inserted_row_nnz) +{ + inserted_row_nnz = 0; + i_t smallest_degree = std::numeric_limits::max(); + i_t smallest_degree_var = -1; + // find smallest degree vertex in the current set packing constraint + for (size_t idx = 0; idx < clique.size(); idx++) { + i_t var_idx = clique[idx]; + i_t degree = clique_table.get_degree_of_var(var_idx); + if (degree < smallest_degree) { + smallest_degree = degree; + smallest_degree_var = var_idx; + } + } + std::vector extension_candidates; + auto smallest_degree_adj_set = clique_table.get_adj_set_of_var(smallest_degree_var); + std::unordered_set clique_members(clique.begin(), clique.end()); + for (const auto& candidate : smallest_degree_adj_set) { + if (clique_members.find(candidate) == clique_members.end()) { + extension_candidates.push_back(candidate); + } + } + std::sort(extension_candidates.begin(), extension_candidates.end(), [&](i_t a, i_t b) { + return clique_table.get_degree_of_var(a) > clique_table.get_degree_of_var(b); + }); + auto new_clique = clique; + i_t n_of_complement_conflicts = 0; + i_t complement_conflict_var = -1; + for (size_t idx = 0; idx < extension_candidates.size(); idx++) { + i_t var_idx = extension_candidates[idx]; + bool add = true; + bool complement_conflict = false; + i_t complement_conflict_idx = -1; + for (size_t i = 0; i < new_clique.size(); i++) { + if (var_idx % clique_table.n_variables == new_clique[i] % clique_table.n_variables) { + complement_conflict = true; + complement_conflict_idx = var_idx % clique_table.n_variables; + } + // check if the tested variable conflicts with all vars in the new clique + if (!clique_table.check_adjacency(var_idx, new_clique[i])) { + add = false; + break; + } + } + if (add) { + new_clique.push_back(var_idx); + if (complement_conflict) { + n_of_complement_conflicts++; + complement_conflict_var = complement_conflict_idx; + } + } + } + // if we found a larger cliqe, insert it into the formulation + if (new_clique.size() > clique.size()) { + if (n_of_complement_conflicts > 0) { + CUOPT_LOG_DEBUG("Found %d complement conflicts on var %d", + n_of_complement_conflicts, + complement_conflict_var); + cuopt_assert(n_of_complement_conflicts == 1, "There can only be one complement conflict"); + // Keep the discovered extension in the clique table for downstream dominance checks. + clique_table.first.push_back(new_clique); + for (const auto& var_idx : new_clique) { + clique_table.var_degrees[var_idx] = -1; + } + if (modify_problem) { + // fix all other variables other than complementing var + for (size_t i = 0; i < new_clique.size(); i++) { + if (new_clique[i] % clique_table.n_variables != complement_conflict_var) { + CUOPT_LOG_DEBUG("Fixing variable %d", new_clique[i]); + if (new_clique[i] >= problem.num_cols) { + cuopt_assert(problem.lower[new_clique[i] - problem.num_cols] != 0 || + problem.upper[new_clique[i] - problem.num_cols] != 0, + "Variable is fixed to other side"); + problem.lower[new_clique[i] - problem.num_cols] = 1; + problem.upper[new_clique[i] - problem.num_cols] = 1; + } else { + cuopt_assert(problem.lower[new_clique[i]] != 1 || problem.upper[new_clique[i]] != 1, + "Variable is fixed to other side"); + problem.lower[new_clique[i]] = 0; + problem.upper[new_clique[i]] = 0; + } + } + } + } + return true; + } else { + // Keep the discovered extension in the clique table even when row insertion is skipped by + // row/nnz budgets. + clique_table.first.push_back(new_clique); + for (const auto& var_idx : new_clique) { + clique_table.var_degrees[var_idx] = -1; + } +#if DEBUG_KNAPSACK_CONSTRAINTS + CUOPT_LOG_DEBUG("Extended clique: %lu from %lu", new_clique.size(), clique.size()); +#endif + i_t extension_gain = static_cast(new_clique.size() - clique.size()); + if (extension_gain < min_extension_gain) { return true; } + if (remaining_rows_budget <= 0 || + remaining_nnz_budget < static_cast(new_clique.size())) { + return true; + } + // Row insertion is now deferred until dominance is confirmed against model rows. + // This keeps extension and replacement sequential: detect dominance first, then replace. + inserted_row_nnz = 0; + } + } + return new_clique.size() > clique.size(); +} + +template +struct clique_sig_t { + i_t knapsack_idx; + i_t size; + long long signature; +}; + +template +struct extension_candidate_t { + i_t knapsack_idx; + i_t estimated_gain; + i_t clique_size; +}; + +template +bool compare_clique_sig(const clique_sig_t& a, const clique_sig_t& b) +{ + if (a.signature != b.signature) { return a.signature < b.signature; } + return a.size < b.size; +} + +template +bool compare_signature_value(long long value, const clique_sig_t& a) +{ + return value < a.signature; +} + +template +bool compare_extension_candidate(const extension_candidate_t& a, + const extension_candidate_t& b) +{ + if (a.estimated_gain != b.estimated_gain) { return a.estimated_gain > b.estimated_gain; } + if (a.clique_size != b.clique_size) { return a.clique_size < b.clique_size; } + return a.knapsack_idx < b.knapsack_idx; +} + +template +bool is_sorted_subset(const std::vector& a, const std::vector& b) +{ + size_t i = 0; + size_t j = 0; + while (i < a.size() && j < b.size()) { + if (a[i] == b[j]) { + i++; + j++; + } else if (a[i] > b[j]) { + j++; + } else { + return false; + } + } + return i == a.size(); +} + +template +void fix_difference(const std::vector& superset, + const std::vector& subset, + dual_simplex::user_problem_t& problem) +{ + cuopt_assert(std::is_sorted(subset.begin(), subset.end()), + "subset vector passed to fix_difference is not sorted"); + for (auto var_idx : superset) { + if (std::binary_search(subset.begin(), subset.end(), var_idx)) { continue; } + if (var_idx >= problem.num_cols) { + i_t orig_idx = var_idx - problem.num_cols; + CUOPT_LOG_DEBUG("Fixing variable %d", orig_idx); + cuopt_assert(problem.lower[orig_idx] != 0 || problem.upper[orig_idx] != 0, + "Variable is fixed to other side"); + problem.lower[orig_idx] = 1; + problem.upper[orig_idx] = 1; + } else { + CUOPT_LOG_DEBUG("Fixing variable %d", var_idx); + cuopt_assert(problem.lower[var_idx] != 1 || problem.upper[var_idx] != 1, + "Variable is fixed to other side"); + problem.lower[var_idx] = 0; + problem.upper[var_idx] = 0; + } + } +} + +template +void remove_marked_elements(std::vector& vec, const std::vector& removal_marker) +{ + size_t write_idx = 0; + for (size_t i = 0; i < vec.size(); i++) { + if (!removal_marker[i]) { + if (write_idx != i) { vec[write_idx] = std::move(vec[i]); } + write_idx++; + } + } + vec.resize(write_idx); +} + +template +void remove_dominated_cliques_in_problem_for_single_extended_clique( + const std::vector& curr_clique, + f_t coeff_scale, + i_t remaining_rows_budget, + i_t remaining_nnz_budget, + i_t& inserted_row_nnz, + const std::vector>& sp_sigs, + const std::vector>& cstr_vars, + const std::vector>& knapsack_constraints, + std::vector& original_to_current_row_idx, + dual_simplex::user_problem_t& problem, + dual_simplex::csr_matrix_t& A, + cuopt::timer_t& timer) +{ + inserted_row_nnz = 0; + if (curr_clique.empty() || sp_sigs.empty()) { return; } + std::vector curr_clique_vars(curr_clique.begin(), curr_clique.end()); + std::sort(curr_clique_vars.begin(), curr_clique_vars.end()); + curr_clique_vars.erase(std::unique(curr_clique_vars.begin(), curr_clique_vars.end()), + curr_clique_vars.end()); + long long signature = 0; + for (auto v : curr_clique_vars) { + signature += static_cast(v); + } + constexpr size_t dominance_window = 20000; + auto end_it = + std::upper_bound(sp_sigs.begin(), sp_sigs.end(), signature, compare_signature_value); + size_t end = static_cast(std::distance(sp_sigs.begin(), end_it)); + size_t start = (end > dominance_window) ? (end - dominance_window) : 0; + std::vector rows_to_remove; + bool covering_clique_implied_by_partitioning = false; + for (size_t idx = end; idx > start; idx--) { + if (timer.check_time_limit()) { break; } + const auto& sp = sp_sigs[idx - 1]; + const auto& vars_sp = cstr_vars[sp.knapsack_idx]; + if (vars_sp.size() > curr_clique_vars.size()) { continue; } + cuopt_assert(std::is_sorted(vars_sp.begin(), vars_sp.end()), + "vars_sp vector passed to is_sorted_subset is not sorted"); + if (!is_sorted_subset(vars_sp, curr_clique_vars)) { continue; } + if (knapsack_constraints[sp.knapsack_idx].is_set_partitioning) { + if (vars_sp.size() != curr_clique_vars.size()) { + fix_difference(curr_clique_vars, vars_sp, problem); + covering_clique_implied_by_partitioning = true; + } + continue; + } + i_t original_row_idx = knapsack_constraints[sp.knapsack_idx].cstr_idx; + if (original_row_idx < 0) { continue; } + cuopt_assert(original_row_idx < static_cast(original_to_current_row_idx.size()), + "Invalid original row index in knapsack constraint"); + i_t current_row_idx = original_to_current_row_idx[original_row_idx]; + if (current_row_idx < 0) { continue; } + cuopt_assert(current_row_idx < static_cast(problem.row_sense.size()), + "Invalid current row index in row mapping"); + rows_to_remove.push_back(current_row_idx); + } + if (rows_to_remove.empty()) { return; } + std::sort(rows_to_remove.begin(), rows_to_remove.end()); + rows_to_remove.erase(std::unique(rows_to_remove.begin(), rows_to_remove.end()), + rows_to_remove.end()); + if (!covering_clique_implied_by_partitioning) { + if (remaining_rows_budget <= 0 || + remaining_nnz_budget < static_cast(curr_clique_vars.size())) { + return; + } + insert_clique_into_problem(curr_clique_vars, problem, A, coeff_scale); + inserted_row_nnz = static_cast(curr_clique_vars.size()); + } + std::vector removal_marker(problem.row_sense.size(), 0); + for (auto row_idx : rows_to_remove) { + cuopt_assert(row_idx >= 0 && row_idx < static_cast(removal_marker.size()), + "Invalid dominated row index"); + CUOPT_LOG_DEBUG("Removing dominated row %d", row_idx); + removal_marker[row_idx] = true; + } + dual_simplex::csr_matrix_t A_removed(0, 0, 0); + A.remove_rows(removal_marker, A_removed); + A = std::move(A_removed); + problem.num_rows = A.m; + remove_marked_elements(problem.row_sense, removal_marker); + remove_marked_elements(problem.rhs, removal_marker); + remove_marked_elements(problem.row_names, removal_marker); + cuopt_assert(problem.rhs.size() == problem.row_sense.size(), "rhs and row sense size mismatch"); + cuopt_assert(problem.row_names.size() == problem.rhs.size(), "row names and rhs size mismatch"); + cuopt_assert(problem.num_rows == static_cast(problem.rhs.size()), + "matrix and num rows mismatch after removal"); + if (!problem.range_rows.empty()) { + std::vector old_to_new_indices; + old_to_new_indices.reserve(removal_marker.size()); + i_t new_idx = 0; + for (size_t i = 0; i < removal_marker.size(); ++i) { + if (!removal_marker[i]) { + old_to_new_indices.push_back(new_idx++); + } else { + old_to_new_indices.push_back(-1); + } + } + std::vector new_range_rows; + std::vector new_range_values; + for (size_t i = 0; i < problem.range_rows.size(); ++i) { + i_t old_row = problem.range_rows[i]; + cuopt_assert(old_row >= 0 && old_row < static_cast(removal_marker.size()), + "Invalid row index in range_rows"); + if (!removal_marker[old_row]) { + i_t new_row = old_to_new_indices[old_row]; + cuopt_assert(new_row != -1, "Invalid new row index for ranged row renumbering"); + new_range_rows.push_back(new_row); + new_range_values.push_back(problem.range_value[i]); + } + } + problem.range_rows = std::move(new_range_rows); + problem.range_value = std::move(new_range_values); + } + problem.num_range_rows = static_cast(problem.range_rows.size()); + std::vector removed_prefix(removal_marker.size() + 1, 0); + for (size_t row_idx = 0; row_idx < removal_marker.size(); row_idx++) { + removed_prefix[row_idx + 1] = + removed_prefix[row_idx] + static_cast(removal_marker[row_idx]); + } + for (i_t row_idx = 0; row_idx < static_cast(original_to_current_row_idx.size()); row_idx++) { + i_t current_row_idx = original_to_current_row_idx[row_idx]; + if (current_row_idx < 0) { continue; } + cuopt_assert(current_row_idx < static_cast(removal_marker.size()), + "Row index map is out of bounds"); + if (removal_marker[current_row_idx]) { + original_to_current_row_idx[row_idx] = -1; + } else { + original_to_current_row_idx[row_idx] = current_row_idx - removed_prefix[current_row_idx]; + } + } +} + +// Also known as clique merging. Infer larger clique constraints which allows inclusion of vars from +// other constraints. This only extends the original cliques in the formulation for now. +// TODO: consider a heuristic on how much of the cliques derived from knapsacks to include here +template +i_t extend_cliques(const std::vector>& knapsack_constraints, + const std::unordered_set& set_packing_constraints, + clique_table_t& clique_table, + dual_simplex::user_problem_t& problem, + dual_simplex::csr_matrix_t& A, + bool modify_problem, + cuopt::timer_t& timer, + double* work_estimate_out, + double max_work_estimate) +{ + constexpr i_t min_extension_gain = 2; + constexpr i_t extension_yield_window = 64; + constexpr i_t min_successes_per_window = 1; + + double local_work = 0.0; + double& work = work_estimate_out ? *work_estimate_out : local_work; + + i_t base_rows = A.m; + i_t base_nnz = A.row_start[A.m]; + i_t max_added_rows = std::max(8, base_rows / 50); + i_t max_added_nnz = std::max(8 * clique_table.max_clique_size_for_extension, base_nnz / 50); + + i_t added_rows = 0; + i_t added_nnz = 0; + i_t window_attempts = 0; + i_t window_successes = 0; + + CUOPT_LOG_DEBUG("Clique extension heuristics: min_gain=%d row_budget=%d nnz_budget=%d", + min_extension_gain, + max_added_rows, + max_added_nnz); + std::vector> cstr_vars(knapsack_constraints.size()); + std::vector> sp_sigs; + sp_sigs.reserve(set_packing_constraints.size()); + for (const auto knapsack_idx : set_packing_constraints) { + cuopt_assert(knapsack_idx >= 0 && knapsack_idx < static_cast(knapsack_constraints.size()), + "Invalid set packing constraint index"); + const auto& vars = knapsack_constraints[knapsack_idx].entries; + cstr_vars[knapsack_idx].reserve(vars.size()); + for (const auto& entry : vars) { + cstr_vars[knapsack_idx].push_back(entry.col); + } + std::sort(cstr_vars[knapsack_idx].begin(), cstr_vars[knapsack_idx].end()); + cstr_vars[knapsack_idx].erase( + std::unique(cstr_vars[knapsack_idx].begin(), cstr_vars[knapsack_idx].end()), + cstr_vars[knapsack_idx].end()); + long long signature = 0; + for (auto v : cstr_vars[knapsack_idx]) { + signature += static_cast(v); + } + sp_sigs.push_back({knapsack_idx, static_cast(cstr_vars[knapsack_idx].size()), signature}); + work += cstr_vars[knapsack_idx].size(); + } + if (work > max_work_estimate) { return 0; } + std::sort(sp_sigs.begin(), sp_sigs.end(), compare_clique_sig); + std::vector original_to_current_row_idx(problem.row_sense.size(), -1); + for (i_t row_idx = 0; row_idx < static_cast(original_to_current_row_idx.size()); row_idx++) { + original_to_current_row_idx[row_idx] = row_idx; + } + std::vector> extension_worklist; + extension_worklist.reserve(knapsack_constraints.size()); + for (i_t knapsack_idx = 0; knapsack_idx < static_cast(knapsack_constraints.size()); + knapsack_idx++) { + if (timer.check_time_limit()) { break; } + if (work > max_work_estimate) { break; } + const auto& knapsack_constraint = knapsack_constraints[knapsack_idx]; + if (!knapsack_constraint.is_set_packing) { continue; } + i_t clique_size = static_cast(knapsack_constraint.entries.size()); + if (clique_size >= clique_table.max_clique_size_for_extension) { continue; } + i_t smallest_degree = std::numeric_limits::max(); + for (const auto& entry : knapsack_constraint.entries) { + smallest_degree = std::min(smallest_degree, clique_table.get_degree_of_var(entry.col)); + } + i_t estimated_gain = std::max(0, smallest_degree - (clique_size - 1)); + if (estimated_gain < min_extension_gain) { continue; } + extension_worklist.push_back({knapsack_idx, estimated_gain, clique_size}); + work += knapsack_constraint.entries.size(); + } + std::stable_sort( + extension_worklist.begin(), extension_worklist.end(), compare_extension_candidate); + CUOPT_LOG_DEBUG("Clique extension candidates after scoring: %zu", extension_worklist.size()); + + i_t n_extended_cliques = 0; + for (const auto& candidate : extension_worklist) { + if (timer.check_time_limit()) { break; } + if (work > max_work_estimate) { break; } + if (added_rows >= max_added_rows || added_nnz >= max_added_nnz) { + CUOPT_LOG_DEBUG( + "Stopping clique extension: budget reached (rows=%d nnz=%d)", added_rows, added_nnz); + break; + } + window_attempts++; + const auto& knapsack_constraint = knapsack_constraints[candidate.knapsack_idx]; + std::vector clique; + for (const auto& entry : knapsack_constraint.entries) { + clique.push_back(entry.col); + } + i_t inserted_row_nnz = 0; + f_t coeff_scale = knapsack_constraint.entries[0].val; + bool extended_clique = extend_clique(clique, + clique_table, + problem, + A, + coeff_scale, + modify_problem, + min_extension_gain, + max_added_rows - added_rows, + max_added_nnz - added_nnz, + inserted_row_nnz); + work += clique.size() * clique.size(); + if (extended_clique) { + n_extended_cliques++; + i_t replacement_row_nnz = 0; + if (modify_problem) { + remove_dominated_cliques_in_problem_for_single_extended_clique(clique_table.first.back(), + coeff_scale, + max_added_rows - added_rows, + max_added_nnz - added_nnz, + replacement_row_nnz, + sp_sigs, + cstr_vars, + knapsack_constraints, + original_to_current_row_idx, + problem, + A, + timer); + } + if (replacement_row_nnz > 0) { + window_successes++; + added_rows++; + added_nnz += replacement_row_nnz; + } + } + if (window_attempts >= extension_yield_window) { + if (window_successes < min_successes_per_window) { + CUOPT_LOG_DEBUG( + "Stopping clique extension: low yield (%d/%d)", window_successes, window_attempts); + break; + } + window_attempts = 0; + window_successes = 0; + } + } + if (modify_problem) { + // copy modified matrix back to problem + A.to_compressed_col(problem.A); + } + CUOPT_LOG_DEBUG("Number of extended cliques: %d", n_extended_cliques); + return n_extended_cliques; +} + +template +void fill_var_clique_maps(clique_table_t& clique_table) +{ + clique_table.first_var_positions.resize(clique_table.first.size()); + for (size_t clique_idx = 0; clique_idx < clique_table.first.size(); clique_idx++) { + const auto& clique = clique_table.first[clique_idx]; + auto& pos_map = clique_table.first_var_positions[clique_idx]; + pos_map.reserve(clique.size()); + for (size_t idx = 0; idx < clique.size(); idx++) { + i_t var_idx = clique[idx]; + clique_table.var_clique_map_first[var_idx].insert(clique_idx); + pos_map[var_idx] = static_cast(idx); + } + } + for (size_t addtl_c = 0; addtl_c < clique_table.addtl_cliques.size(); addtl_c++) { + const auto& addtl_clique = clique_table.addtl_cliques[addtl_c]; + clique_table.var_clique_map_addtl[addtl_clique.vertex_idx].insert(addtl_c); + } +} + +template +void build_clique_table(const dual_simplex::user_problem_t& problem, + clique_table_t& clique_table, + typename mip_solver_settings_t::tolerances_t tolerances, + bool remove_small_cliques_flag, + bool fill_var_clique_maps_flag, + cuopt::timer_t& timer) +{ + if (timer.check_time_limit()) { return; } + cuopt_assert(clique_table.n_variables == problem.num_cols, "Clique table size mismatch"); + cuopt_assert(problem.var_types.size() == static_cast(problem.num_cols), + "Problem variable types size mismatch"); + std::vector> knapsack_constraints; + std::unordered_set set_packing_constraints; + dual_simplex::csr_matrix_t A(problem.num_rows, problem.num_cols, 0); + problem.A.to_compressed_row(A); + fill_knapsack_constraints(problem, knapsack_constraints, A); + make_coeff_positive_knapsack_constraint( + problem, knapsack_constraints, set_packing_constraints, tolerances); + sort_csr_by_constraint_coefficients(knapsack_constraints); + clique_table.tolerances = tolerances; + for (const auto& knapsack_constraint : knapsack_constraints) { + if (timer.check_time_limit()) { return; } + find_cliques_from_constraint(knapsack_constraint, clique_table, timer); + } + if (timer.check_time_limit()) { return; } + if (remove_small_cliques_flag) { remove_small_cliques(clique_table, timer); } + if (timer.check_time_limit()) { return; } + if (fill_var_clique_maps_flag) { fill_var_clique_maps(clique_table); } +} + +template +void print_knapsack_constraints( + const std::vector>& knapsack_constraints, + bool print_only_set_packing = false) +{ +#if DEBUG_KNAPSACK_CONSTRAINTS + std::cout << "Number of knapsack constraints: " << knapsack_constraints.size() << "\n"; + for (const auto& knapsack : knapsack_constraints) { + if (print_only_set_packing && !knapsack.is_set_packing) { continue; } + std::cout << "Knapsack constraint idx: " << knapsack.cstr_idx << "\n"; + std::cout << " RHS: " << knapsack.rhs << "\n"; + std::cout << " Is set packing: " << knapsack.is_set_packing << "\n"; + std::cout << " Entries:\n"; + for (const auto& entry : knapsack.entries) { + std::cout << " col: " << entry.col << ", val: " << entry.val << "\n"; + } + std::cout << "----------\n"; + } +#endif +} + +template +void print_clique_table(const clique_table_t& clique_table) +{ +#if DEBUG_KNAPSACK_CONSTRAINTS + std::cout << "Number of cliques: " << clique_table.first.size() << "\n"; + for (const auto& clique : clique_table.first) { + std::cout << "Clique: "; + for (const auto& var : clique) { + std::cout << var << " "; + } + } + std::cout << "Number of additional cliques: " << clique_table.addtl_cliques.size() << "\n"; + for (const auto& addtl_clique : clique_table.addtl_cliques) { + std::cout << "Additional clique: " << addtl_clique.vertex_idx << ", " << addtl_clique.clique_idx + << ", " << addtl_clique.start_pos_on_clique << "\n"; + } +#endif +} + +template +void find_initial_cliques(dual_simplex::user_problem_t& problem, + typename mip_solver_settings_t::tolerances_t tolerances, + std::shared_ptr>* clique_table_out, + cuopt::timer_t& timer, + bool modify_problem, + std::atomic* signal_extend) +{ + cuopt::timer_t stage_timer(std::numeric_limits::infinity()); +#ifdef DEBUG_CLIQUE_TABLE + double t_fill = 0.; + double t_coeff = 0.; + double t_sort = 0.; + double t_find = 0.; + double t_small = 0.; + double t_maps = 0.; + double t_extend = 0.; + double t_remove = 0.; +#endif + std::vector> knapsack_constraints; + std::unordered_set set_packing_constraints; + dual_simplex::csr_matrix_t A(problem.num_rows, problem.num_cols, 0); + problem.A.to_compressed_row(A); + fill_knapsack_constraints(problem, knapsack_constraints, A); +#ifdef DEBUG_CLIQUE_TABLE + t_fill = stage_timer.elapsed_time(); +#endif + make_coeff_positive_knapsack_constraint( + problem, knapsack_constraints, set_packing_constraints, tolerances); +#ifdef DEBUG_CLIQUE_TABLE + t_coeff = stage_timer.elapsed_time(); +#endif + sort_csr_by_constraint_coefficients(knapsack_constraints); +#ifdef DEBUG_CLIQUE_TABLE + t_sort = stage_timer.elapsed_time(); +#endif + clique_config_t clique_config; + std::shared_ptr> clique_table_shared; + clique_table_t clique_table_local(2 * problem.num_cols, + clique_config.min_clique_size, + clique_config.max_clique_size_for_extension); + clique_table_t* clique_table_ptr = &clique_table_local; + if (clique_table_out != nullptr) { + clique_table_shared = + std::make_shared>(2 * problem.num_cols, + clique_config.min_clique_size, + clique_config.max_clique_size_for_extension); + clique_table_ptr = clique_table_shared.get(); + } + clique_table_ptr->tolerances = tolerances; + double time_limit_for_additional_cliques = timer.remaining_time() / 2; + cuopt::timer_t additional_cliques_timer(time_limit_for_additional_cliques); + double find_work_estimate = 0.0; + for (const auto& knapsack_constraint : knapsack_constraints) { + if (timer.check_time_limit()) { break; } + if (signal_extend && signal_extend->load(std::memory_order_acquire)) { break; } + find_cliques_from_constraint(knapsack_constraint, *clique_table_ptr, additional_cliques_timer); + find_work_estimate += knapsack_constraint.entries.size(); + } +#ifdef DEBUG_CLIQUE_TABLE + t_find = stage_timer.elapsed_time(); +#endif + CUOPT_LOG_DEBUG("Number of cliques: %d, additional cliques: %d, find_work=%.0f", + clique_table_ptr->first.size(), + clique_table_ptr->addtl_cliques.size(), + find_work_estimate); + remove_small_cliques(*clique_table_ptr, timer); +#ifdef DEBUG_CLIQUE_TABLE + t_small = stage_timer.elapsed_time(); +#endif + fill_var_clique_maps(*clique_table_ptr); +#ifdef DEBUG_CLIQUE_TABLE + t_maps = stage_timer.elapsed_time(); +#endif + if (clique_table_out != nullptr) { *clique_table_out = std::move(clique_table_shared); } + double extend_work = 0.0; + constexpr double max_extend_work = 2e9; + i_t n_extended_cliques = extend_cliques(knapsack_constraints, + set_packing_constraints, + *clique_table_ptr, + problem, + A, + modify_problem, + timer, + &extend_work, + max_extend_work); +#ifdef DEBUG_CLIQUE_TABLE + t_extend = stage_timer.elapsed_time(); + CUOPT_LOG_DEBUG( + "Clique table timing (s): fill=%.6f coeff=%.6f sort=%.6f find=%.6f small=%.6f maps=%.6f " + "extend=%.6f total=%.6f find_work=%.0f extend_work=%.0f", + t_fill, + t_coeff - t_fill, + t_sort - t_coeff, + t_find - t_sort, + t_small - t_find, + t_maps - t_small, + t_extend - t_maps, + t_extend, + find_work_estimate, + extend_work); +#endif +} + +#define INSTANTIATE(F_TYPE) \ + template void find_initial_cliques( \ + dual_simplex::user_problem_t & problem, \ + typename mip_solver_settings_t::tolerances_t tolerances, \ + std::shared_ptr> * clique_table_out, \ + cuopt::timer_t & timer, \ + bool modify_problem, \ + std::atomic* signal_extend); \ + template void build_clique_table( \ + const dual_simplex::user_problem_t& problem, \ + clique_table_t& clique_table, \ + typename mip_solver_settings_t::tolerances_t tolerances, \ + bool remove_small_cliques_flag, \ + bool fill_var_clique_maps_flag, \ + cuopt::timer_t& timer); \ + template class clique_table_t; + +#if MIP_INSTANTIATE_FLOAT +INSTANTIATE(float) +#endif +#if MIP_INSTANTIATE_DOUBLE +INSTANTIATE(double) +#endif +#undef INSTANTIATE + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cuh b/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cuh new file mode 100644 index 0000000000..944241b4f0 --- /dev/null +++ b/cpp/src/mip_heuristics/presolve/conflict_graph/clique_table.cuh @@ -0,0 +1,208 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +struct clique_config_t { + int min_clique_size = 512; + int max_clique_size_for_extension = 128; +}; + +template +struct entry_t { + i_t col; + f_t val; + bool operator<(const entry_t& other) const { return val < other.val; } + bool operator<(double other) const { return val < other; } +}; + +template +struct knapsack_constraint_t { + std::vector> entries; + f_t rhs; + i_t cstr_idx; + bool is_set_packing = false; + bool is_set_partitioning = false; +}; + +template +struct addtl_clique_t { + i_t vertex_idx; + i_t clique_idx; + i_t start_pos_on_clique; +}; + +template +struct clique_table_t { + clique_table_t(i_t n_vertices, i_t min_clique_size_, i_t max_clique_size_for_extension_) + : min_clique_size(min_clique_size_), + max_clique_size_for_extension(max_clique_size_for_extension_), + var_clique_map_first(n_vertices), + var_clique_map_addtl(n_vertices), + adj_list_small_cliques(n_vertices), + var_degrees(n_vertices, -1), + n_variables(n_vertices / 2) + { + } + + std::unordered_set get_adj_set_of_var(i_t var_idx); + i_t get_degree_of_var(i_t var_idx); + bool check_adjacency(i_t var_idx1, i_t var_idx2); + + // keeps the large cliques in each constraint + std::vector> first; + // keeps the additional cliques + std::vector> addtl_cliques; + // TODO figure out the performance of lookup for the following: unordered_set vs vector + // keeps the indices of original(first) cliques that contain variable x + std::vector> var_clique_map_first; + // keeps the indices of additional cliques that contain variable x + std::vector> var_clique_map_addtl; + // var_idx -> position mapping for each first clique, enabling O(1) membership/position checks + std::vector> first_var_positions; + // adjacency list to keep small cliques, this basically keeps the vars share a small clique + // constraint + std::unordered_map> adj_list_small_cliques; + // degrees of each vertex + std::vector var_degrees; + // number of variables in the original problem + const i_t n_variables; + const i_t min_clique_size; + const i_t max_clique_size_for_extension; + typename mip_solver_settings_t::tolerances_t tolerances; +}; + +template +void find_initial_cliques(dual_simplex::user_problem_t& problem, + typename mip_solver_settings_t::tolerances_t tolerances, + std::shared_ptr>* clique_table_out, + cuopt::timer_t& timer, + bool modify_problem, + std::atomic* signal_extend = nullptr); + +template +void build_clique_table(const dual_simplex::user_problem_t& problem, + clique_table_t& clique_table, + typename mip_solver_settings_t::tolerances_t tolerances, + bool remove_small_cliques, + bool fill_var_clique_maps, + cuopt::timer_t& timer); + +} // namespace cuopt::linear_programming::detail + +// Possible application to rounding procedure, keeping it as reference + +// fix set of variables x_1, x_2, x_3,... in a bulk. Consider sorting according largest size GUB +// constraint(or some other criteria). + +// compute new activities on changed constraints, given that x_1=v_1, x_2=v_2, x_3=v_3: + +// if the current constraint is GUB + +// if at least two binary vars(note that some can be full integer) are common: (needs +// binary_vars_in_bulk^2 number of checks) + +// return infeasible + +// else + +// set L_r to 1. + +// else(non-GUB constraints) + +// greedy clique partitioning algorithm: + +// set L_r = sum(all positive coefficients on binary vars) + sum(min_activity contribution on +// non-binary vars) # note that the paper doesn't contain this part, since it only deals with binary + +// # iterate only on binary variables(i.e. vertices of B- and complements of B+) + +// start with highest weight vertex (v) among unmarked and mark it + +// find maximal clique among unmarked containing the vertex: (there are various algorithms to +// find maximal clique) + +// max_clique = {v} + +// L_r -= w_v + +// # prioritization is on higher weight vertex when there are equivalent max cliques? +// # we could try BFS to search multiple greedy paths +// for each unmarked vertex(w): + +// counter = 0 + +// for each vertex(k) in max_clique: + +// if(check_if_pair_shares_an_edge(w,k)) + +// counter++ + +// if counter == max_clique.size() + +// max_clique = max_clique U {w} + +// mark w as marked + +// if(L_r > UB) return infeasible + +// remove all fixed variables(original and newly propagated) from the conflict graph. !!!!!! still a +// bit unclear how to remove it from the adjaceny list data structure since it only supports +// additions!!!! + +// add newly discovered GUB constraints into dynamic adjacency list + +// do double probing to infer new edges(we need a heuristic to choose which pairs to probe) + +// check_if_pair_shares_an_edge(w,v): + +// check GUB constraints by traversing the double linked list: + +// on the column of variable w: + +// for each row: + +// if v is contained on the row + +// return true + +// check added edges on adjacency list: + +// k <- last[w] + +// while k != 0 + +// if(adj[k] == v) + +// return true + +// k <-next[k] + +// return false diff --git a/cpp/src/mip_heuristics/presolve/gf2_presolve.cpp b/cpp/src/mip_heuristics/presolve/gf2_presolve.cpp index 45ea4e420f..8ab0176cc4 100644 --- a/cpp/src/mip_heuristics/presolve/gf2_presolve.cpp +++ b/cpp/src/mip_heuristics/presolve/gf2_presolve.cpp @@ -247,7 +247,7 @@ papilo::PresolveStatus GF2Presolve::execute(const papilo::Problem& pro #define INSTANTIATE(F_TYPE) template class GF2Presolve; -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/mip_heuristics/presolve/lb_probing_cache.cu b/cpp/src/mip_heuristics/presolve/lb_probing_cache.cu index 3a6d1bce21..fc6c7fe4b6 100644 --- a/cpp/src/mip_heuristics/presolve/lb_probing_cache.cu +++ b/cpp/src/mip_heuristics/presolve/lb_probing_cache.cu @@ -309,7 +309,7 @@ inline std::vector compute_prioritized_integer_indices( template void compute_probing_cache(load_balanced_bounds_presolve_t& bound_presolve, load_balanced_problem_t& problem, - timer_t timer) + work_limit_timer_t timer) { // we dont want to compute the probing cache for all variables for time and computation resources auto priority_indices = compute_prioritized_integer_indices(bound_presolve, problem); @@ -400,7 +400,7 @@ void compute_probing_cache(load_balanced_bounds_presolve_t& bound_pres template void compute_probing_cache( \ load_balanced_bounds_presolve_t & bound_presolve, \ load_balanced_problem_t & problem, \ - timer_t timer); \ + work_limit_timer_t timer); \ template class lb_probing_cache_t; #if MIP_INSTANTIATE_FLOAT diff --git a/cpp/src/mip_heuristics/presolve/probing_cache.cu b/cpp/src/mip_heuristics/presolve/probing_cache.cu index 5ae89c700a..f152e440a8 100644 --- a/cpp/src/mip_heuristics/presolve/probing_cache.cu +++ b/cpp/src/mip_heuristics/presolve/probing_cache.cu @@ -6,7 +6,6 @@ /* clang-format on */ #include "probing_cache.cuh" -#include "trivial_presolve.cuh" #include #include @@ -19,6 +18,8 @@ #include #include +#include + namespace cuopt::linear_programming::detail { template @@ -366,7 +367,7 @@ void compute_cache_for_var(i_t var_idx, std::atomic& problem_is_infeasible, std::vector>& modification_vector, std::vector>& substitution_vector, - timer_t timer, + const work_limit_timer_t& timer, i_t device_id) { RAFT_CUDA_TRY(cudaSetDevice(device_id)); @@ -842,7 +843,7 @@ std::vector compute_priority_indices_by_implied_integers(problem_t bool compute_probing_cache(bound_presolve_t& bound_presolve, problem_t& problem, - timer_t timer) + work_limit_timer_t timer) { raft::common::nvtx::range fun_scope("compute_probing_cache"); // we dont want to compute the probing cache for all variables for time and computation resources @@ -856,6 +857,12 @@ bool compute_probing_cache(bound_presolve_t& bound_presolve, bound_presolve.settings.iteration_limit = 50; bound_presolve.settings.time_limit = timer.remaining_time(); + // TODO: proper work unit accounting in deterministic mode for the probing cache + if ((bound_presolve.context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS)) { + bound_presolve.settings.iteration_limit = 1; + priority_indices.resize(std::min(priority_indices.size(), 2048)); + } + size_t num_threads = bound_presolve.settings.num_threads < 0 ? 0.2 * omp_get_max_threads() : bound_presolve.settings.num_threads; @@ -943,7 +950,7 @@ bool compute_probing_cache(bound_presolve_t& bound_presolve, #define INSTANTIATE(F_TYPE) \ template bool compute_probing_cache(bound_presolve_t & bound_presolve, \ problem_t & problem, \ - timer_t timer); \ + work_limit_timer_t timer); \ template class probing_cache_t; #if MIP_INSTANTIATE_FLOAT diff --git a/cpp/src/mip_heuristics/presolve/probing_cache.cuh b/cpp/src/mip_heuristics/presolve/probing_cache.cuh index abb0145054..76c4c5dfff 100644 --- a/cpp/src/mip_heuristics/presolve/probing_cache.cuh +++ b/cpp/src/mip_heuristics/presolve/probing_cache.cuh @@ -12,6 +12,7 @@ #include #include +#include namespace cuopt::linear_programming::detail { @@ -87,7 +88,9 @@ class probing_cache_t { f_t first_probe, f_t second_probe, f_t integrality_tolerance); - + // add the results of probing cache to secondary CG structure if not already in a gub constraint. + // use the same activity computation that we will use in BP rounding. + // use GUB constraints to find fixings in bulk rounding std::unordered_map, 2>> probing_cache; std::mutex probing_cache_mutex; }; @@ -117,6 +120,6 @@ class lb_probing_cache_t { template bool compute_probing_cache(bound_presolve_t& bound_presolve, problem_t& problem, - timer_t timer); + work_limit_timer_t timer); } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp index 5a89393a6a..af11265551 100644 --- a/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp +++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.cpp @@ -432,6 +432,7 @@ optimization_problem_t build_optimization_problem( const int* cols = constraint_matrix.getConstraintMatrix().getColumns(); const f_t* coeffs = constraint_matrix.getConstraintMatrix().getValues(); + op_problem.set_csr_constraint_matrix( &(coeffs[start]), nnz, &(cols[start]), nnz, offsets.data(), nrows + 1); @@ -491,7 +492,8 @@ void check_postsolve_status(const papilo::PostsolveStatus& status) template void set_presolve_methods(papilo::Presolve& presolver, problem_category_t category, - bool dual_postsolve) + bool dual_postsolve, + bool deterministic) { using uptr = std::unique_ptr>; @@ -518,7 +520,7 @@ void set_presolve_methods(papilo::Presolve& presolver, // exhaustive presolvers presolver.addPresolveMethod(uptr(new papilo::ImplIntDetection())); presolver.addPresolveMethod(uptr(new papilo::DominatedCols())); - presolver.addPresolveMethod(uptr(new papilo::Probing())); + if (!deterministic) { presolver.addPresolveMethod(uptr(new papilo::Probing())); } if (!dual_postsolve) { presolver.addPresolveMethod(uptr(new papilo::DualInfer())); @@ -535,7 +537,7 @@ void set_presolve_options(papilo::Presolve& presolver, problem_category_t category, f_t absolute_tolerance, f_t relative_tolerance, - double time_limit, + f_t time_limit, bool dual_postsolve, i_t num_cpu_threads) { @@ -552,17 +554,20 @@ template void set_presolve_parameters(papilo::Presolve& presolver, problem_category_t category, int nrows, - int ncols) + int ncols, + bool deterministic = false) { // It looks like a copy. But this copy has the pointers to relevant variables in papilo auto params = presolver.getParameters(); if (category == problem_category_t::MIP) { - // Papilo has work unit measurements for probing. Because of this when the first batch fails to - // produce any reductions, the algorithm stops. To avoid stopping the algorithm, we set a - // minimum badge size to a huge value. The time limit makes sure that we exit if it takes too - // long - int min_badgesize = std::max(ncols / 2, 32); - params.setParameter("probing.minbadgesize", min_badgesize); + if (!deterministic) { + // Papilo has work unit measurements for probing. Because of this when the first batch fails + // to produce any reductions, the algorithm stops. To avoid stopping the algorithm, we set a + // minimum badge size to a huge value. The time limit makes sure that we exit if it takes too + // long + int min_badgesize = std::max(ncols / 2, 32); + params.setParameter("probing.minbadgesize", min_badgesize); + } params.setParameter("cliquemerging.enabled", true); params.setParameter("cliquemerging.maxcalls", 50); } @@ -572,24 +577,30 @@ template std::optional> third_party_presolve_t::apply_pslp( optimization_problem_t const& op_problem, const double time_limit) { - f_t original_obj_offset = op_problem.get_objective_offset(); - auto ctx = build_and_run_pslp_presolver(op_problem, maximize_, time_limit); + if constexpr (std::is_same_v) { + double original_obj_offset = op_problem.get_objective_offset(); + auto ctx = build_and_run_pslp_presolver(op_problem, maximize_, time_limit); - // Free previously allocated presolver and settings - if (pslp_presolver_ != nullptr) { free_presolver(pslp_presolver_); } - if (pslp_stgs_ != nullptr) { free_settings(pslp_stgs_); } + // Free previously allocated presolver and settings if they exist + if (pslp_presolver_ != nullptr) { free_presolver(pslp_presolver_); } + if (pslp_stgs_ != nullptr) { free_settings(pslp_stgs_); } + + pslp_presolver_ = ctx.presolver; + pslp_stgs_ = ctx.settings; - pslp_presolver_ = ctx.presolver; - pslp_stgs_ = ctx.settings; + if (ctx.status == PresolveStatus_::INFEASIBLE || ctx.status == PresolveStatus_::UNBNDORINFEAS) { + return std::nullopt; + } - if (ctx.status == PresolveStatus_::INFEASIBLE || ctx.status == PresolveStatus_::UNBNDORINFEAS) { + auto opt_problem = build_optimization_problem_from_pslp( + pslp_presolver_, op_problem.get_handle_ptr(), maximize_, original_obj_offset); + opt_problem.set_problem_name(op_problem.get_problem_name()); + return std::make_optional(third_party_presolve_result_t{opt_problem, {}}); + } else { + cuopt_expects( + false, error_type_t::ValidationError, "PSLP presolver only supports double precision"); return std::nullopt; } - - auto opt_problem = build_optimization_problem_from_pslp( - pslp_presolver_, op_problem.get_handle_ptr(), maximize_, original_obj_offset); - - return std::make_optional(third_party_presolve_result_t{opt_problem, {}}); } template @@ -625,7 +636,7 @@ std::optional> third_party_presolve_t papilo_presolver; - set_presolve_methods(papilo_presolver, category, dual_postsolve); + set_presolve_methods(papilo_presolver, category, dual_postsolve, deterministic_); set_presolve_options(papilo_presolver, category, absolute_tolerance, @@ -633,8 +644,11 @@ std::optional> third_party_presolve_t( - papilo_presolver, category, op_problem.get_n_constraints(), op_problem.get_n_variables()); + set_presolve_parameters(papilo_presolver, + category, + op_problem.get_n_constraints(), + op_problem.get_n_variables(), + deterministic_); // Disable papilo logs papilo_presolver.setVerbosityLevel(papilo::VerbosityLevel::kQuiet); @@ -662,6 +676,7 @@ std::optional> third_party_presolve_t( papilo_problem, op_problem.get_handle_ptr(), category, maximize_); + opt_problem.set_problem_name(op_problem.get_problem_name()); auto col_flags = papilo_problem.getColFlags(); std::vector implied_integer_indices; for (size_t i = 0; i < col_flags.size(); i++) { @@ -697,12 +712,14 @@ void third_party_presolve_t::undo(rmm::device_uvector& primal_sol } if (status_to_skip) { return; } + std::vector primal_sol_vec_h(primal_solution.size()); raft::copy(primal_sol_vec_h.data(), primal_solution.data(), primal_solution.size(), stream_view); std::vector dual_sol_vec_h(dual_solution.size()); raft::copy(dual_sol_vec_h.data(), dual_solution.data(), dual_solution.size(), stream_view); std::vector reduced_costs_vec_h(reduced_costs.size()); raft::copy(reduced_costs_vec_h.data(), reduced_costs.data(), reduced_costs.size(), stream_view); + papilo::Solution reduced_sol(primal_sol_vec_h); if (dual_postsolve) { reduced_sol.dual = dual_sol_vec_h; @@ -734,26 +751,34 @@ void third_party_presolve_t::undo_pslp(rmm::device_uvector& prima rmm::device_uvector& reduced_costs, rmm::cuda_stream_view stream_view) { - std::vector h_primal_solution(primal_solution.size()); - std::vector h_dual_solution(dual_solution.size()); - std::vector h_reduced_costs(reduced_costs.size()); - raft::copy(h_primal_solution.data(), primal_solution.data(), primal_solution.size(), stream_view); - raft::copy(h_dual_solution.data(), dual_solution.data(), dual_solution.size(), stream_view); - raft::copy(h_reduced_costs.data(), reduced_costs.data(), reduced_costs.size(), stream_view); - - postsolve( - pslp_presolver_, h_primal_solution.data(), h_dual_solution.data(), h_reduced_costs.data()); - - auto uncrushed_sol = pslp_presolver_->sol; - int n_cols = uncrushed_sol->dim_x; - int n_rows = uncrushed_sol->dim_y; - - primal_solution.resize(n_cols, stream_view); - dual_solution.resize(n_rows, stream_view); - reduced_costs.resize(n_cols, stream_view); - raft::copy(primal_solution.data(), uncrushed_sol->x, n_cols, stream_view); - raft::copy(dual_solution.data(), uncrushed_sol->y, n_rows, stream_view); - raft::copy(reduced_costs.data(), uncrushed_sol->z, n_cols, stream_view); + if constexpr (std::is_same_v) { + // PSLP uses double internally, so we can use the data directly + std::vector h_primal_solution(primal_solution.size()); + std::vector h_dual_solution(dual_solution.size()); + std::vector h_reduced_costs(reduced_costs.size()); + raft::copy( + h_primal_solution.data(), primal_solution.data(), primal_solution.size(), stream_view); + raft::copy(h_dual_solution.data(), dual_solution.data(), dual_solution.size(), stream_view); + raft::copy(h_reduced_costs.data(), reduced_costs.data(), reduced_costs.size(), stream_view); + stream_view.synchronize(); + + postsolve( + pslp_presolver_, h_primal_solution.data(), h_dual_solution.data(), h_reduced_costs.data()); + + auto uncrushed_sol = pslp_presolver_->sol; + int n_cols = uncrushed_sol->dim_x; + int n_rows = uncrushed_sol->dim_y; + + primal_solution.resize(n_cols, stream_view); + dual_solution.resize(n_rows, stream_view); + reduced_costs.resize(n_cols, stream_view); + raft::copy(primal_solution.data(), uncrushed_sol->x, n_cols, stream_view); + raft::copy(dual_solution.data(), uncrushed_sol->y, n_rows, stream_view); + raft::copy(reduced_costs.data(), uncrushed_sol->z, n_cols, stream_view); + } else { + cuopt_expects( + false, error_type_t::ValidationError, "PSLP postsolve only supports double precision"); + } stream_view.synchronize(); } @@ -795,7 +820,7 @@ void papilo_postsolve_deleter::operator()(papilo::PostsolveStorage* pt delete ptr; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template struct papilo_postsolve_deleter; template class third_party_presolve_t; #endif diff --git a/cpp/src/mip_heuristics/presolve/third_party_presolve.hpp b/cpp/src/mip_heuristics/presolve/third_party_presolve.hpp index ee273b6497..156c546742 100644 --- a/cpp/src/mip_heuristics/presolve/third_party_presolve.hpp +++ b/cpp/src/mip_heuristics/presolve/third_party_presolve.hpp @@ -82,6 +82,12 @@ class third_party_presolve_t { rmm::device_uvector& reduced_costs, rmm::cuda_stream_view stream_view); + bool deterministic_ = false; + + public: + void set_deterministic(bool d) { deterministic_ = d; } + + private: bool maximize_ = false; cuopt::linear_programming::presolver_t presolver_ = cuopt::linear_programming::presolver_t::PSLP; // PSLP settings diff --git a/cpp/src/mip_heuristics/presolve/utils.cuh b/cpp/src/mip_heuristics/presolve/utils.cuh index 4870b3180c..404c614108 100644 --- a/cpp/src/mip_heuristics/presolve/utils.cuh +++ b/cpp/src/mip_heuristics/presolve/utils.cuh @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -12,6 +12,7 @@ namespace cuopt::linear_programming::detail { enum class termination_criterion_t { TIME_LIMIT, ITERATION_LIMIT, + WORK_LIMIT, CONVERGENCE, INFEASIBLE, NO_UPDATE diff --git a/cpp/src/mip_heuristics/problem/presolve_data.cu b/cpp/src/mip_heuristics/problem/presolve_data.cu index b11f7b108a..bf05efa875 100644 --- a/cpp/src/mip_heuristics/problem/presolve_data.cu +++ b/cpp/src/mip_heuristics/problem/presolve_data.cu @@ -245,7 +245,7 @@ void presolve_data_t::papilo_uncrush_assignment( problem.handle_ptr->sync_stream(); } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class presolve_data_t; #endif diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index d77e2e5f65..87df8a55f0 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -64,6 +64,10 @@ void problem_t::op_problem_cstr_body(const optimization_problem_tget_thrust_policy(), + integer_fixed_variable_map.begin(), + integer_fixed_variable_map.end(), + -1); const bool is_mip = original_problem_ptr->get_problem_category() != problem_category_t::LP; if (is_mip) { @@ -136,13 +140,14 @@ problem_t::problem_t( nonbinary_indices(0, problem_.get_handle_ptr()->get_stream()), is_binary_variable(0, problem_.get_handle_ptr()->get_stream()), related_variables(0, problem_.get_handle_ptr()->get_stream()), - related_variables_offsets(n_variables, problem_.get_handle_ptr()->get_stream()), + related_variables_offsets(0, problem_.get_handle_ptr()->get_stream()), var_names(problem_.get_variable_names()), row_names(problem_.get_row_names()), objective_name(problem_.get_objective_name()), objective_offset(problem_.get_objective_offset()), lp_state(*this, problem_.get_handle_ptr()->get_stream()), fixing_helpers(n_constraints, n_variables, handle_ptr), + clique_table(nullptr), Q_offsets(problem_.get_quadratic_objective_offsets()), Q_indices(problem_.get_quadratic_objective_indices()), Q_values(problem_.get_quadratic_objective_values()) @@ -199,6 +204,7 @@ problem_t::problem_t(const problem_t& problem_) objective_is_integral(problem_.objective_is_integral), lp_state(problem_.lp_state), fixing_helpers(problem_.fixing_helpers, handle_ptr), + clique_table(problem_.clique_table), vars_with_objective_coeffs(problem_.vars_with_objective_coeffs), expensive_to_fix_vars(problem_.expensive_to_fix_vars), Q_offsets(problem_.Q_offsets), @@ -255,6 +261,7 @@ problem_t::problem_t(const problem_t& problem_, objective_is_integral(problem_.objective_is_integral), lp_state(problem_.lp_state, handle_ptr), fixing_helpers(problem_.fixing_helpers, handle_ptr), + clique_table(problem_.clique_table), vars_with_objective_coeffs(problem_.vars_with_objective_coeffs), expensive_to_fix_vars(problem_.expensive_to_fix_vars), Q_offsets(problem_.Q_offsets), @@ -279,6 +286,7 @@ problem_t::problem_t(const problem_t& problem_, bool no_deep maximize(problem_.maximize), empty(problem_.empty), is_binary_pb(problem_.is_binary_pb), + clique_table(problem_.clique_table), // Copy constructor used by PDLP and MIP // PDLP uses the version with no_deep_copy = false which deep copy some fields but doesn't // allocate others that are not needed in PDLP @@ -940,8 +948,12 @@ void problem_t::compute_related_variables(double time_limit) handle_ptr->sync_stream(); - // CHANGE - if (deterministic) { time_limit = std::numeric_limits::infinity(); } + if (deterministic) { + // TODO: Re-enable deterministic related-variable construction once we have a work estimator. + related_variables.resize(0, handle_ptr->get_stream()); + related_variables_offsets.resize(0, handle_ptr->get_stream()); + return; + } // previously used constants were based on 40GB of memory. Scale accordingly on smaller GPUs // We can't rely on querying free memory or allocation try/catch @@ -1097,6 +1109,7 @@ void problem_t::resize_constraints(size_t matrix_size, size_t n_variables) { raft::common::nvtx::range fun_scope("resize_constraints"); + auto prev_dual_size = lp_state.prev_dual.size(); coefficients.resize(matrix_size, handle_ptr->get_stream()); variables.resize(matrix_size, handle_ptr->get_stream()); reverse_constraints.resize(matrix_size, handle_ptr->get_stream()); @@ -1107,6 +1120,13 @@ void problem_t::resize_constraints(size_t matrix_size, combined_bounds.resize(constraint_size, handle_ptr->get_stream()); offsets.resize(constraint_size + 1, handle_ptr->get_stream()); reverse_offsets.resize(n_variables + 1, handle_ptr->get_stream()); + lp_state.prev_dual.resize(constraint_size, handle_ptr->get_stream()); + if (constraint_size > prev_dual_size) { + thrust::fill(handle_ptr->get_thrust_policy(), + lp_state.prev_dual.begin() + prev_dual_size, + lp_state.prev_dual.end(), + f_t{0}); + } } // note that these don't change the reverse structure @@ -2037,6 +2057,87 @@ void problem_t::preprocess_problem() preprocess_called = true; } +template +void problem_t::set_constraints_from_host_user_problem( + const cuopt::linear_programming::dual_simplex::user_problem_t& user_problem) +{ + raft::common::nvtx::range fun_scope("set_constraints_from_host_user_problem"); + cuopt_assert(user_problem.handle_ptr == handle_ptr, "handle mismatch"); + cuopt_assert(user_problem.num_cols == n_variables, "num cols mismatch"); + n_constraints = user_problem.num_rows; + cuopt_assert(user_problem.rhs.size() == static_cast(n_constraints), "rhs size mismatch"); + cuopt_assert(user_problem.row_sense.size() == static_cast(n_constraints), + "row sense size mismatch"); + cuopt_assert(user_problem.range_rows.size() == user_problem.range_value.size(), + "range rows/value size mismatch"); + + dual_simplex::csr_matrix_t csr_A(n_constraints, n_variables, user_problem.A.nnz()); + user_problem.A.to_compressed_row(csr_A); + nnz = csr_A.row_start[n_constraints]; + empty = (nnz == 0 && n_constraints == 0 && n_variables == 0); + + auto stream = handle_ptr->get_stream(); + cuopt::device_copy(coefficients, csr_A.x, stream); + cuopt::device_copy(variables, csr_A.j, stream); + cuopt::device_copy(offsets, csr_A.row_start, stream); + + std::vector h_constraint_lower_bounds(n_constraints); + std::vector h_constraint_upper_bounds(n_constraints); + std::vector range_value_per_row(n_constraints, f_t{0}); + std::vector is_range_row(n_constraints, 0); + for (size_t idx = 0; idx < user_problem.range_rows.size(); ++idx) { + auto row = user_problem.range_rows[idx]; + cuopt_assert(row >= 0 && row < n_constraints, "range row out of bounds"); + is_range_row[row] = 1; + range_value_per_row[row] = user_problem.range_value[idx]; + } + + const auto inf = std::numeric_limits::infinity(); + for (i_t i = 0; i < n_constraints; ++i) { + const f_t rhs = user_problem.rhs[i]; + const char sense = user_problem.row_sense[i]; + if (sense == 'E') { + h_constraint_lower_bounds[i] = rhs; + h_constraint_upper_bounds[i] = rhs; + if (is_range_row[i]) { h_constraint_upper_bounds[i] = rhs + range_value_per_row[i]; } + } else if (sense == 'G') { + h_constraint_lower_bounds[i] = rhs; + h_constraint_upper_bounds[i] = inf; + } else if (sense == 'L') { + h_constraint_lower_bounds[i] = -inf; + h_constraint_upper_bounds[i] = rhs; + } else { + cuopt_assert(false, "Unsupported row sense"); + } + } + + cuopt::device_copy(constraint_lower_bounds, h_constraint_lower_bounds, stream); + cuopt::device_copy(constraint_upper_bounds, h_constraint_upper_bounds, stream); + + if (!user_problem.row_names.empty()) { + row_names = user_problem.row_names; + } else if (row_names.size() != static_cast(n_constraints)) { + row_names.clear(); + } + + integer_fixed_problem = nullptr; + fixing_helpers.reduction_in_rhs.resize(n_constraints, stream); + auto prev_dual_size = lp_state.prev_dual.size(); + lp_state.prev_dual.resize(n_constraints, stream); + if (n_constraints > (i_t)prev_dual_size) { + thrust::fill(handle_ptr->get_thrust_policy(), + lp_state.prev_dual.begin() + prev_dual_size, + lp_state.prev_dual.end(), + f_t{0}); + } + handle_ptr->sync_stream(); + RAFT_CHECK_CUDA(stream); + + compute_transpose_of_problem(); + combined_bounds.resize(n_constraints, stream); + combine_constraint_bounds(*this, combined_bounds); +} + template bool problem_t::pre_process_assignment(rmm::device_uvector& assignment) { @@ -2095,7 +2196,6 @@ void problem_t::get_host_user_problem( csr_A.row_start = std::vector(cuopt::host_copy(offsets, stream)); csr_A.to_compressed_col(user_problem.A); - user_problem.rhs.resize(m); user_problem.row_sense.resize(m); user_problem.range_rows.clear(); @@ -2292,7 +2392,7 @@ void problem_t::update_variable_bounds(const std::vector& var_ind RAFT_CHECK_CUDA(handle_ptr->get_stream()); } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class problem_t; #endif diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh index 6cd180a800..9771bab568 100644 --- a/cpp/src/mip_heuristics/problem/problem.cuh +++ b/cpp/src/mip_heuristics/problem/problem.cuh @@ -24,6 +24,7 @@ #include +#include #include #include #include @@ -37,6 +38,9 @@ namespace cuopt { namespace linear_programming::detail { +template +struct clique_table_t; + template class solution_t; @@ -119,8 +123,12 @@ class problem_t { bool is_integer(f_t val) const; bool integer_equal(f_t val1, f_t val2) const; + std::shared_ptr> clique_table; + void get_host_user_problem( cuopt::linear_programming::dual_simplex::user_problem_t& user_problem) const; + void set_constraints_from_host_user_problem( + const cuopt::linear_programming::dual_simplex::user_problem_t& user_problem); uint32_t get_fingerprint() const; diff --git a/cpp/src/mip_heuristics/problem/problem_fixing.cuh b/cpp/src/mip_heuristics/problem/problem_fixing.cuh index 820b74e329..c462838d96 100644 --- a/cpp/src/mip_heuristics/problem/problem_fixing.cuh +++ b/cpp/src/mip_heuristics/problem/problem_fixing.cuh @@ -1,12 +1,13 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #pragma once +#include #include namespace cuopt { @@ -18,6 +19,10 @@ struct problem_fixing_helpers_t { : reduction_in_rhs(n_constraints, handle_ptr->get_stream()), variable_fix_mask(n_variables, handle_ptr->get_stream()) { + thrust::fill( + handle_ptr->get_thrust_policy(), reduction_in_rhs.begin(), reduction_in_rhs.end(), f_t(0)); + thrust::fill( + handle_ptr->get_thrust_policy(), variable_fix_mask.begin(), variable_fix_mask.end(), i_t(0)); } problem_fixing_helpers_t(const problem_fixing_helpers_t& other, const raft::handle_t* handle_ptr) diff --git a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu index e2bbc8feb1..d1fd0a6392 100644 --- a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cu @@ -21,6 +21,15 @@ #include +#include + +// uncomment to enable detailed detemrinism logs +#undef CUOPT_DETERMINISM_LOG +#define CUOPT_DETERMINISM_LOG(...) \ + do { \ + CUOPT_LOG_INFO(__VA_ARGS__); \ + } while (0) + namespace cuopt::linear_programming::detail { template @@ -40,6 +49,9 @@ optimization_problem_solution_t get_relaxed_lp_solution( const relaxed_lp_settings_t& settings) { raft::common::nvtx::range fun_scope("get_relaxed_lp_solution"); + static std::atomic lp_call_counter{0}; + const uint64_t lp_call_id = lp_call_counter.fetch_add(1, std::memory_order_relaxed); + pdlp_solver_settings_t pdlp_settings{}; pdlp_settings.detect_infeasibility = settings.check_infeasibility; pdlp_settings.set_optimality_tolerance(settings.tolerance); @@ -49,17 +61,57 @@ optimization_problem_solution_t get_relaxed_lp_solution( pdlp_settings.tolerances.relative_primal_tolerance = settings.tolerance / tolerance_divisor; pdlp_settings.tolerances.relative_dual_tolerance = settings.tolerance / tolerance_divisor; pdlp_settings.time_limit = settings.time_limit; - pdlp_settings.concurrent_halt = settings.concurrent_halt; - pdlp_settings.per_constraint_residual = settings.per_constraint_residual; - pdlp_settings.first_primal_feasible = settings.return_first_feasible; - pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; - pdlp_settings.presolver = presolver_t::None; + pdlp_settings.iteration_limit = settings.iteration_limit; + + const f_t work_limit = settings.work_limit; + const bool determinism_mode = std::isfinite(work_limit); + pdlp_settings.concurrent_halt = settings.concurrent_halt; + pdlp_settings.per_constraint_residual = settings.per_constraint_residual; + pdlp_settings.first_primal_feasible = settings.return_first_feasible; + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; + int estim_iters = pdlp_settings.iteration_limit; + if (determinism_mode) { + // try to estimate the iteration count based on the requested work limit + estim_iters = 100; + if (!std::isinf(work_limit)) { + do { + // TODO: use an actual predictor model here + double estim_ms = 313 + 200 * op_problem.n_variables - 400 * op_problem.n_constraints + + 600 * op_problem.coefficients.size() + 7100 * estim_iters; + estim_ms = std::max(0.0, estim_ms); + if (estim_ms > work_limit * 1000) { break; } + estim_iters += 100; + } while (true); + } else { + estim_iters = std::numeric_limits::max(); + } + CUOPT_DETERMINISM_LOG( + "estimated iterations %d for work limit %f", estim_iters, settings.work_limit); + pdlp_settings.iteration_limit = estim_iters; + pdlp_settings.time_limit = std::numeric_limits::infinity(); + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; + pdlp_settings.presolver = presolver_t::None; + } + CUOPT_DETERMINISM_LOG( + "LP call %lu config: det=%d work_limit=%.6f time_limit=%.6f iter_limit=%d method=%d mode=%d " + "presolver=%d save_state=%d has_initial=%d assignment_hash=0x%x", + lp_call_id, + (int)determinism_mode, + settings.work_limit, + pdlp_settings.time_limit, + pdlp_settings.iteration_limit, + (int)pdlp_settings.method, + (int)pdlp_settings.pdlp_solver_mode, + (int)pdlp_settings.presolver, + (int)settings.save_state, + (int)settings.has_initial_primal, + detail::compute_hash(assignment, op_problem.handle_ptr->get_stream())); set_pdlp_solver_mode(pdlp_settings); // TODO: set Stable3 here? pdlp_solver_t lp_solver(op_problem, pdlp_settings); if (settings.has_initial_primal) { i_t prev_size = lp_state.prev_dual.size(); - CUOPT_LOG_DEBUG( + CUOPT_LOG_TRACE( "setting initial primal solution of size %d dual size %d problem vars %d cstrs %d", assignment.size(), lp_state.prev_dual.size(), @@ -73,25 +125,68 @@ optimization_problem_solution_t get_relaxed_lp_solution( lp_state.prev_dual.data(), lp_state.prev_dual.data() + op_problem.n_constraints, [prev_size, dual = make_span(lp_state.prev_dual)] __device__(i_t i) { + // early exit to avoid a false positive in compute-sanitizer initcheck + if (i >= prev_size) { return 0.0; } f_t x = dual[i]; - if (!isfinite(x) || i >= prev_size) { return 0.0; } + if (!isfinite(x)) { return 0.0; } return x; }); lp_solver.set_initial_primal_solution(assignment); lp_solver.set_initial_dual_solution(lp_state.prev_dual); } - CUOPT_LOG_DEBUG( + CUOPT_LOG_TRACE( "running LP with n_vars %d n_cstr %d", op_problem.n_variables, op_problem.n_constraints); // before LP flush the logs as it takes quite some time cuopt::default_logger().flush(); // temporarily add timer auto start_time = timer_t(pdlp_settings.time_limit); lp_solver.set_inside_mip(true); + CUOPT_DETERMINISM_LOG( + "prev solution sizes primal=%lu dual=%lu", assignment.size(), lp_state.prev_dual.size()); + if (determinism_mode) { + auto init_primal_hash = + detail::compute_hash(make_span(assignment), op_problem.handle_ptr->get_stream()); + auto init_dual_hash = + settings.has_initial_primal + ? detail::compute_hash(make_span(lp_state.prev_dual), op_problem.handle_ptr->get_stream()) + : 0u; + CUOPT_DETERMINISM_LOG("LP call %lu pre-solve state: init_primal_hash=0x%x init_dual_hash=0x%x", + lp_call_id, + init_primal_hash, + init_dual_hash); + } auto solver_response = lp_solver.run_solver(start_time); + CUOPT_DETERMINISM_LOG("post LP primal size %lu", solver_response.get_primal_solution().size()); + const int actual_iters = + solver_response.get_additional_termination_information().number_of_steps_taken; + CUOPT_DETERMINISM_LOG("LP call %lu result: status=%d iters=%d primal_hash=0x%x", + lp_call_id, + (int)solver_response.get_termination_status(), + actual_iters, + solver_response.get_primal_solution().size() != 0 + ? detail::compute_hash(solver_response.get_primal_solution(), + op_problem.handle_ptr->get_stream()) + : 0u); + + if (determinism_mode && settings.work_context != nullptr) { + double work_to_record = settings.work_limit; + if (estim_iters > 0) { + work_to_record = + settings.work_limit * std::clamp((double)actual_iters / (double)estim_iters, 0.0, 1.0); + } + CUOPT_DETERMINISM_LOG( + "LP call %lu recording %.6fwu (actual_iters=%d estim_iters=%d requested=%.6f)", + lp_call_id, + work_to_record, + actual_iters, + estim_iters, + settings.work_limit); + settings.work_context->record_work_sync_on_horizon(work_to_record); + } if (solver_response.get_primal_solution().size() != 0 && solver_response.get_dual_solution().size() != 0 && settings.save_state) { - CUOPT_LOG_DEBUG("saving initial primal solution of size %d", lp_state.prev_primal.size()); + CUOPT_LOG_TRACE("saving initial primal solution of size %d", lp_state.prev_primal.size()); lp_state.set_state(solver_response.get_primal_solution(), solver_response.get_dual_solution()); } if (solver_response.get_primal_solution().size() != 0) { @@ -101,11 +196,17 @@ optimization_problem_solution_t get_relaxed_lp_solution( solver_response.get_primal_solution().size(), op_problem.handle_ptr->get_stream()); } + CUOPT_DETERMINISM_LOG("LP call %lu assignment_after_copy hash=0x%x", + lp_call_id, + detail::compute_hash(assignment, op_problem.handle_ptr->get_stream())); if (solver_response.get_termination_status() == pdlp_termination_status_t::Optimal) { - CUOPT_LOG_DEBUG("feasible solution found with LP objective %f", + CUOPT_LOG_TRACE("feasible solution found with LP objective %f", solver_response.get_objective_value()); } else { - CUOPT_LOG_DEBUG("LP returned with reason %d", solver_response.get_termination_status()); + CUOPT_DETERMINISM_LOG( + "LP returned with reason %d, %d iterations", + solver_response.get_termination_status(), + solver_response.get_additional_termination_information().number_of_steps_taken); } return solver_response; diff --git a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cuh b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cuh index 9fe5fb9071..06698d79ae 100644 --- a/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cuh +++ b/cpp/src/mip_heuristics/relaxed_lp/relaxed_lp.cuh @@ -12,19 +12,23 @@ #include #include #include +#include #include "lp_state.cuh" namespace cuopt::linear_programming::detail { struct relaxed_lp_settings_t { - double tolerance = 1e-4; - double time_limit = 1.0; - bool check_infeasibility = true; - bool return_first_feasible = false; - bool save_state = true; - bool per_constraint_residual = true; - bool has_initial_primal = true; - std::atomic* concurrent_halt = nullptr; + double tolerance = 1e-4; + double time_limit = 1.0; + int iteration_limit = std::numeric_limits::max(); + double work_limit = std::numeric_limits::infinity(); + bool check_infeasibility = true; + bool return_first_feasible = false; + bool save_state = true; + bool per_constraint_residual = true; + bool has_initial_primal = true; + std::atomic* concurrent_halt = nullptr; + cuopt::work_limit_context_t* work_context = nullptr; }; template diff --git a/cpp/src/mip_heuristics/solution/solution.cu b/cpp/src/mip_heuristics/solution/solution.cu index 5f1c13199b..bd6a3861ea 100644 --- a/cpp/src/mip_heuristics/solution/solution.cu +++ b/cpp/src/mip_heuristics/solution/solution.cu @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -46,8 +47,6 @@ solution_t::solution_t(problem_t& problem_) assignment(std::move(get_lower_bounds(problem_.variable_bounds, handle_ptr))), lower_excess(problem_.n_constraints, handle_ptr->get_stream()), upper_excess(problem_.n_constraints, handle_ptr->get_stream()), - lower_slack(problem_.n_constraints, handle_ptr->get_stream()), - upper_slack(problem_.n_constraints, handle_ptr->get_stream()), constraint_value(problem_.n_constraints, handle_ptr->get_stream()), obj_val(handle_ptr->get_stream()), n_feasible_constraints(handle_ptr->get_stream()), @@ -63,8 +62,6 @@ solution_t::solution_t(const solution_t& other) assignment(other.assignment, handle_ptr->get_stream()), lower_excess(other.lower_excess, handle_ptr->get_stream()), upper_excess(other.upper_excess, handle_ptr->get_stream()), - lower_slack(other.lower_slack, handle_ptr->get_stream()), - upper_slack(other.upper_slack, handle_ptr->get_stream()), constraint_value(other.constraint_value, handle_ptr->get_stream()), obj_val(other.obj_val, handle_ptr->get_stream()), n_feasible_constraints(other.n_feasible_constraints, handle_ptr->get_stream()), @@ -91,10 +88,18 @@ void solution_t::copy_from(const solution_t& other_sol) h_user_obj = other_sol.h_user_obj; h_infeasibility_cost = other_sol.h_infeasibility_cost; expand_device_copy(assignment, other_sol.assignment, handle_ptr->get_stream()); + + // slack, excess, and constraint value may be uninitialized (and computed later). Mark them as + // such + cuopt::mark_span_as_initialized(make_span(other_sol.lower_excess), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.upper_excess), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.constraint_value), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.obj_val), handle_ptr->get_stream()); + cuopt::mark_span_as_initialized(make_span(other_sol.n_feasible_constraints), + handle_ptr->get_stream()); + expand_device_copy(lower_excess, other_sol.lower_excess, handle_ptr->get_stream()); expand_device_copy(upper_excess, other_sol.upper_excess, handle_ptr->get_stream()); - expand_device_copy(lower_slack, other_sol.lower_slack, handle_ptr->get_stream()); - expand_device_copy(upper_slack, other_sol.upper_slack, handle_ptr->get_stream()); expand_device_copy(constraint_value, other_sol.constraint_value, handle_ptr->get_stream()); raft::copy(obj_val.data(), other_sol.obj_val.data(), 1, handle_ptr->get_stream()); raft::copy(n_feasible_constraints.data(), @@ -113,14 +118,26 @@ void solution_t::copy_from(const solution_t& other_sol) template void solution_t::resize_to_problem() { + i_t old_n_vars = lp_state.prev_primal.size(); + i_t old_n_cstrs = lp_state.prev_dual.size(); assignment.resize(problem_ptr->n_variables, handle_ptr->get_stream()); lower_excess.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); upper_excess.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); - lower_slack.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); - upper_slack.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); constraint_value.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); lp_state.prev_primal.resize(problem_ptr->n_variables, handle_ptr->get_stream()); lp_state.prev_dual.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); + if (problem_ptr->n_variables > old_n_vars) { + thrust::fill(handle_ptr->get_thrust_policy(), + lp_state.prev_primal.data() + old_n_vars, + lp_state.prev_primal.data() + problem_ptr->n_variables, + f_t(0)); + } + if (problem_ptr->n_constraints > old_n_cstrs) { + thrust::fill(handle_ptr->get_thrust_policy(), + lp_state.prev_dual.data() + old_n_cstrs, + lp_state.prev_dual.data() + problem_ptr->n_constraints, + f_t(0)); + } } template @@ -131,10 +148,6 @@ void solution_t::resize_to_original_problem() handle_ptr->get_stream()); upper_excess.resize(problem_ptr->original_problem_ptr->get_n_constraints(), handle_ptr->get_stream()); - lower_slack.resize(problem_ptr->original_problem_ptr->get_n_constraints(), - handle_ptr->get_stream()); - upper_slack.resize(problem_ptr->original_problem_ptr->get_n_constraints(), - handle_ptr->get_stream()); constraint_value.resize(problem_ptr->original_problem_ptr->get_n_constraints(), handle_ptr->get_stream()); lp_state.prev_primal.resize(problem_ptr->original_problem_ptr->get_n_variables(), @@ -149,8 +162,6 @@ void solution_t::resize_copy(const solution_t& other_sol) assignment.resize(other_sol.assignment.size(), handle_ptr->get_stream()); lower_excess.resize(other_sol.lower_excess.size(), handle_ptr->get_stream()); upper_excess.resize(other_sol.upper_excess.size(), handle_ptr->get_stream()); - lower_slack.resize(other_sol.lower_slack.size(), handle_ptr->get_stream()); - upper_slack.resize(other_sol.upper_slack.size(), handle_ptr->get_stream()); constraint_value.resize(other_sol.constraint_value.size(), handle_ptr->get_stream()); lp_state.prev_primal.resize(other_sol.lp_state.prev_primal.size(), handle_ptr->get_stream()); lp_state.prev_dual.resize(other_sol.lp_state.prev_dual.size(), handle_ptr->get_stream()); @@ -165,8 +176,6 @@ typename solution_t::view_t solution_t::view() v.assignment = raft::device_span{assignment.data(), assignment.size()}; v.lower_excess = raft::device_span{lower_excess.data(), lower_excess.size()}; v.upper_excess = raft::device_span{upper_excess.data(), upper_excess.size()}; - v.lower_slack = raft::device_span{lower_slack.data(), lower_slack.size()}; - v.upper_slack = raft::device_span{upper_slack.data(), upper_slack.size()}; v.constraint_value = raft::device_span{constraint_value.data(), constraint_value.size()}; v.obj_val = obj_val.data(); v.n_feasible_constraints = n_feasible_constraints.data(); @@ -626,7 +635,7 @@ mip_solution_t solution_t::get_solution(bool output_feasible "Solution objective: %f , relative_mip_gap %f solution_bound %f presolve_time %f " "total_solve_time %f " "max constraint violation %f max int violation %f max var bounds violation %f " - "nodes %d simplex_iterations %d", + "nodes %d simplex_iterations %d hash %x", h_user_obj, rel_mip_gap, solution_bound, @@ -636,7 +645,8 @@ mip_solution_t solution_t::get_solution(bool output_feasible max_int_violation, max_variable_bound_violation, num_nodes, - num_simplex_iterations); + num_simplex_iterations, + get_hash()); } const bool not_optimal = rel_mip_gap > problem_ptr->tolerances.relative_mip_gap && abs_mip_gap > problem_ptr->tolerances.absolute_mip_gap; @@ -660,7 +670,14 @@ mip_solution_t solution_t::get_solution(bool output_feasible } } -#if MIP_INSTANTIATE_FLOAT +template +uint32_t solution_t::get_hash() const +{ + auto h_assignment = host_copy(assignment, handle_ptr->get_stream()); + return compute_hash(h_assignment); +} + +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class solution_t; #endif diff --git a/cpp/src/mip_heuristics/solution/solution.cuh b/cpp/src/mip_heuristics/solution/solution.cuh index f6c2c2f802..ae271aaf08 100644 --- a/cpp/src/mip_heuristics/solution/solution.cuh +++ b/cpp/src/mip_heuristics/solution/solution.cuh @@ -99,6 +99,7 @@ class solution_t { f_t compute_max_constraint_violation(); f_t compute_max_int_violation(); f_t compute_max_variable_violation(); + uint32_t get_hash() const; struct view_t { // let's not bloat the class for every simple getter and setters @@ -112,8 +113,6 @@ class solution_t { raft::device_span assignment; raft::device_span lower_excess; raft::device_span upper_excess; - raft::device_span lower_slack; - raft::device_span upper_slack; raft::device_span constraint_value; f_t* obj_val; i_t* n_feasible_constraints; @@ -128,8 +127,6 @@ class solution_t { rmm::device_uvector assignment; rmm::device_uvector lower_excess; rmm::device_uvector upper_excess; - rmm::device_uvector lower_slack; - rmm::device_uvector upper_slack; rmm::device_uvector constraint_value; rmm::device_scalar obj_val; rmm::device_scalar n_feasible_constraints; diff --git a/cpp/src/mip_heuristics/solve.cu b/cpp/src/mip_heuristics/solve.cu index f5a2172f2e..b783281d61 100644 --- a/cpp/src/mip_heuristics/solve.cu +++ b/cpp/src/mip_heuristics/solve.cu @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -24,6 +25,7 @@ #include #include #include +#include #include #include @@ -61,7 +63,7 @@ static void init_handler(const raft::handle_t* handle_ptr) template mip_solution_t run_mip(detail::problem_t& problem, mip_solver_settings_t const& settings, - timer_t& timer) + cuopt::termination_checker_t& timer) { raft::common::nvtx::range fun_scope("run_mip"); auto constexpr const running_mip = true; @@ -95,30 +97,18 @@ mip_solution_t run_mip(detail::problem_t& problem, solution.compute_objective(); // just to ensure h_user_obj is set auto stats = solver_stats_t{}; stats.set_solution_bound(solution.get_user_objective()); - // log the objective for scripts which need it CUOPT_LOG_INFO("Best feasible: %f", solution.get_user_objective()); - for (auto callback : settings.get_mip_callbacks()) { - if (callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { - auto temp_sol(solution); - auto get_sol_callback = static_cast(callback); - std::vector user_objective_vec(1); - std::vector user_bound_vec(1); - user_objective_vec[0] = solution.get_user_objective(); - user_bound_vec[0] = stats.get_solution_bound(); - if (problem.has_papilo_presolve_data()) { - problem.papilo_uncrush_assignment(temp_sol.assignment); - } - std::vector user_assignment_vec(temp_sol.assignment.size()); - raft::copy(user_assignment_vec.data(), - temp_sol.assignment.data(), - temp_sol.assignment.size(), - temp_sol.handle_ptr->get_stream()); - solution.handle_ptr->sync_stream(); - get_sol_callback->get_solution(user_assignment_vec.data(), - user_objective_vec.data(), - user_bound_vec.data(), - get_sol_callback->get_user_data()); + { + detail::solution_callback_payload_t payload{}; + payload.user_objective = solution.get_user_objective(); + payload.solver_objective = solution.get_objective(); + detail::solution_t temp_sol(solution); + if (problem.has_papilo_presolve_data()) { + problem.papilo_uncrush_assignment(temp_sol.assignment); } + payload.assignment = temp_sol.get_host_assignment(); + detail::solution_publication_t pub(settings, stats); + pub.publish_terminal_solution(payload); } return solution.get_solution(true, stats, false); } @@ -218,6 +208,15 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, // Initialize seed generator if a specific seed is requested if (settings.seed >= 0) { cuopt::seed_generator::set_seed(settings.seed); } + CUOPT_DETERMINISM_LOG( + "Deterministic solve start settings: seed=%lld seed_state=%lld det_mode=%d " + "work_limit=%.6f max_cut_passes=%d num_cpu_threads=%d", + (long long)settings.seed, + (long long)cuopt::seed_generator::peek_seed(), + (int)settings.determinism_mode, + (double)settings.work_limit, + settings.max_cut_passes, + settings.num_cpu_threads); raft::common::nvtx::range fun_scope("Running solver"); @@ -240,13 +239,14 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, op_problem.get_handle_ptr()->get_stream()); } - auto timer = timer_t(time_limit); + auto timer = + cuopt::termination_checker_t(time_limit, cuopt::termination_checker_t::root_tag_t{}); + const bool deterministic_run = (settings.determinism_mode != CUOPT_DETERMINISM_NONE); double presolve_time = 0.0; std::unique_ptr> presolver; std::optional> presolve_result; - detail::problem_t problem( - op_problem, settings.get_tolerances(), settings.determinism_mode == CUOPT_MODE_DETERMINISTIC); + detail::problem_t problem(op_problem, settings.get_tolerances(), deterministic_run); auto run_presolve = settings.presolver != presolver_t::None; run_presolve = run_presolve && settings.initial_solutions.size() == 0; @@ -271,10 +271,9 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, // allocate not more than 10% of the time limit to presolve. // Note that this is not the presolve time, but the time limit for presolve. double presolve_time_limit = std::min(0.1 * time_limit, 60.0); - if (settings.determinism_mode == CUOPT_MODE_DETERMINISTIC) { - presolve_time_limit = std::numeric_limits::infinity(); - } - presolver = std::make_unique>(); + if (deterministic_run) { presolve_time_limit = std::numeric_limits::infinity(); } + presolver = std::make_unique>(); + presolver->set_deterministic(deterministic_run); auto result = presolver->apply(op_problem, cuopt::linear_programming::problem_category_t::MIP, settings.presolver, @@ -290,7 +289,8 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, } presolve_result.emplace(std::move(*result)); - problem = detail::problem_t(presolve_result->reduced_problem); + problem = detail::problem_t( + presolve_result->reduced_problem, settings.get_tolerances(), deterministic_run); problem.set_papilo_presolve_data(presolver.get(), presolve_result->reduced_to_original_map, presolve_result->original_to_reduced_map, @@ -333,7 +333,8 @@ mip_solution_t solve_mip(optimization_problem_t& op_problem, reduced_costs.data(), reduced_costs.data() + reduced_costs.size(), std::numeric_limits::signaling_NaN()); - detail::problem_t full_problem(op_problem); + detail::problem_t full_problem( + op_problem, settings.get_tolerances(), deterministic_run); detail::solution_t full_sol(full_problem); full_sol.copy_new_assignment( cuopt::host_copy(primal_solution, op_problem.get_handle_ptr()->get_stream())); diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index 235d4500d2..11b4c26e2e 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -17,6 +17,10 @@ #include #include #include +#include + +#undef CUOPT_DETERMINISM_LOG +#define CUOPT_DETERMINISM_LOG(...) CUOPT_LOG_INFO(__VA_ARGS__) #include #include @@ -42,7 +46,7 @@ template mip_solver_t::mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, pdlp_initial_scaling_strategy_t& scaling, - timer_t timer) + cuopt::termination_checker_t& timer) : op_problem_(op_problem), solver_settings_(solver_settings), context(op_problem.handle_ptr, @@ -51,19 +55,33 @@ mip_solver_t::mip_solver_t(const problem_t& op_problem, scaling), timer_(timer) { + context.termination = &timer_; init_handler(op_problem.handle_ptr); } template -struct branch_and_bound_solution_helper_t { - branch_and_bound_solution_helper_t(diversity_manager_t* dm, - dual_simplex::simplex_solver_settings_t& settings) - : dm(dm), settings_(settings) {}; - - void solution_callback(std::vector& solution, f_t objective) +struct bb_observer_adapter_t { + bb_observer_adapter_t(mip_solver_context_t* context, diversity_manager_t* dm) + : context(context), dm(dm) {}; + + void new_incumbent_callback(std::vector& solution, + f_t objective, + const internals::mip_solution_callback_info_t& info, + double work_timestamp) { - dm->population.add_external_solution(solution, objective, solution_origin_t::BRANCH_AND_BOUND); - dm->rins.new_best_incumbent_callback(solution); + if (context->settings.determinism_mode & CUOPT_DETERMINISM_BB) { + solution_t temp_sol(*context->problem_ptr); + temp_sol.copy_new_assignment(solution); + temp_sol.compute_feasibility(); + const auto payload = context->solution_publication.build_payload( + context->problem_ptr, context->scaling, temp_sol, info.origin, work_timestamp); + context->solution_publication.publish_new_best_feasible(payload, dm->timer.elapsed_time()); + } + if (context->diversity_manager_ptr != nullptr) { + context->diversity_manager_ptr->population.add_external_solution( + solution, objective, internals::mip_solution_origin_t::BRANCH_AND_BOUND_NODE); + context->diversity_manager_ptr->rins.new_best_incumbent_callback(solution); + } } void set_simplex_solution(std::vector& solution, @@ -79,8 +97,8 @@ struct branch_and_bound_solution_helper_t { } void preempt_heuristic_solver() { dm->population.preempt_heuristic_solver(); } + mip_solver_context_t* context; diversity_manager_t* dm; - dual_simplex::simplex_solver_settings_t& settings_; }; template @@ -98,21 +116,27 @@ solution_t mip_solver_t::run_solver() CUOPT_LOG_INFO("Problem fully reduced in presolve"); solution_t sol(*context.problem_ptr); sol.set_problem_fully_reduced(); - for (auto callback : context.settings.get_mip_callbacks()) { - if (callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { - auto get_sol_callback = static_cast(callback); - dm.population.invoke_get_solution_callback(sol, get_sol_callback); - } - } + const auto payload = context.solution_publication.build_payload( + context.problem_ptr, context.scaling, sol, internals::mip_solution_origin_t::UNKNOWN, 0.0); + context.solution_publication.publish_terminal_solution(payload); context.problem_ptr->post_process_solution(sol); return sol; } - dm.timer = timer_; + const bool deterministic_run = + (context.settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS); + const f_t gpu_heur_work_limit = + deterministic_run ? context.settings.work_limit : timer_.get_time_limit(); + if (deterministic_run) + cuopt_assert(gpu_heur_work_limit >= 0.0, + "Deterministic GPU heuristic work limit must be non-negative"); + dm.timer = cuopt::termination_checker_t(context.gpu_heur_loop, gpu_heur_work_limit, timer_); const bool run_presolve = context.settings.presolver != presolver_t::None; - f_t time_limit = context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC - ? std::numeric_limits::infinity() - : timer_.remaining_time(); - bool presolve_success = run_presolve ? dm.run_presolve(time_limit) : true; + f_t time_limit = + deterministic_run ? std::numeric_limits::infinity() : timer_.remaining_time(); + double presolve_time_limit = std::min(0.1 * time_limit, 60.0); + presolve_time_limit = + deterministic_run ? std::numeric_limits::infinity() : presolve_time_limit; + bool presolve_success = run_presolve ? dm.run_presolve(presolve_time_limit, timer_) : true; if (!presolve_success) { CUOPT_LOG_INFO("Problem proven infeasible in presolve"); solution_t sol(*context.problem_ptr); @@ -124,12 +148,9 @@ solution_t mip_solver_t::run_solver() CUOPT_LOG_INFO("Problem full reduced in presolve"); solution_t sol(*context.problem_ptr); sol.set_problem_fully_reduced(); - for (auto callback : context.settings.get_mip_callbacks()) { - if (callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { - auto get_sol_callback = static_cast(callback); - dm.population.invoke_get_solution_callback(sol, get_sol_callback); - } - } + const auto payload = context.solution_publication.build_payload( + context.problem_ptr, context.scaling, sol, internals::mip_solution_origin_t::UNKNOWN, 0.0); + context.solution_publication.publish_terminal_solution(payload); context.problem_ptr->post_process_solution(sol); return sol; } @@ -162,12 +183,9 @@ solution_t mip_solver_t::run_solver() sol.set_problem_fully_reduced(); } if (opt_sol.get_termination_status() == pdlp_termination_status_t::Optimal) { - for (auto callback : context.settings.get_mip_callbacks()) { - if (callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { - auto get_sol_callback = static_cast(callback); - dm.population.invoke_get_solution_callback(sol, get_sol_callback); - } - } + const auto payload = context.solution_publication.build_payload( + context.problem_ptr, context.scaling, sol, internals::mip_solution_origin_t::UNKNOWN, 0.0); + context.solution_publication.publish_terminal_solution(payload); } context.problem_ptr->post_process_solution(sol); return sol; @@ -185,7 +203,7 @@ solution_t mip_solver_t::run_solver() branch_and_bound_problem.objective_is_integral = context.problem_ptr->is_objective_integral(); dual_simplex::simplex_solver_settings_t branch_and_bound_settings; std::unique_ptr> branch_and_bound; - branch_and_bound_solution_helper_t solution_helper(&dm, branch_and_bound_settings); + bb_observer_adapter_t solution_helper(&context, &dm); dual_simplex::mip_solution_t branch_and_bound_solution(1); bool run_bb = !context.settings.heuristics_only; @@ -206,9 +224,9 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.max_cut_passes = context.settings.max_cut_passes; branch_and_bound_settings.mir_cuts = context.settings.mir_cuts; branch_and_bound_settings.deterministic = - context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC; + (context.settings.determinism_mode & CUOPT_DETERMINISM_BB); - if (context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC) { + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB)) { branch_and_bound_settings.work_limit = context.settings.work_limit; } else { branch_and_bound_settings.work_limit = std::numeric_limits::infinity(); @@ -216,6 +234,7 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.mixed_integer_gomory_cuts = context.settings.mixed_integer_gomory_cuts; branch_and_bound_settings.knapsack_cuts = context.settings.knapsack_cuts; + branch_and_bound_settings.clique_cuts = context.settings.clique_cuts; branch_and_bound_settings.strong_chvatal_gomory_cuts = context.settings.strong_chvatal_gomory_cuts; branch_and_bound_settings.reduced_cost_strengthening = @@ -224,32 +243,40 @@ solution_t mip_solver_t::run_solver() branch_and_bound_settings.cut_min_orthogonality = context.settings.cut_min_orthogonality; branch_and_bound_settings.mip_batch_pdlp_strong_branching = context.settings.mip_batch_pdlp_strong_branching; + branch_and_bound_settings.reduced_cost_strengthening = + context.settings.reduced_cost_strengthening == -1 + ? 2 + : context.settings.reduced_cost_strengthening; + branch_and_bound_settings.bnb_work_unit_scale = solver_settings_.bnb_work_unit_scale; + branch_and_bound_settings.gpu_heur_wait_for_exploration = + solver_settings_.gpu_heur_wait_for_exploration; if (context.settings.num_cpu_threads < 0) { branch_and_bound_settings.num_threads = std::max(1, omp_get_max_threads() - 1); } else { branch_and_bound_settings.num_threads = std::max(1, context.settings.num_cpu_threads); } + CUOPT_LOG_INFO("Using %d CPU threads for B&B", branch_and_bound_settings.num_threads); - // Set the branch and bound -> primal heuristics callback - branch_and_bound_settings.solution_callback = - std::bind(&branch_and_bound_solution_helper_t::solution_callback, + branch_and_bound_settings.new_incumbent_callback = + std::bind(&bb_observer_adapter_t::new_incumbent_callback, &solution_helper, std::placeholders::_1, - std::placeholders::_2); - // heuristic_preemption_callback is needed in both modes to properly stop the heuristic thread - branch_and_bound_settings.heuristic_preemption_callback = std::bind( - &branch_and_bound_solution_helper_t::preempt_heuristic_solver, &solution_helper); - if (context.settings.determinism_mode == CUOPT_MODE_OPPORTUNISTIC) { + std::placeholders::_2, + std::placeholders::_3, + std::placeholders::_4); + branch_and_bound_settings.heuristic_preemption_callback = + std::bind(&bb_observer_adapter_t::preempt_heuristic_solver, &solution_helper); + if (!(context.settings.determinism_mode & CUOPT_DETERMINISM_BB)) { branch_and_bound_settings.set_simplex_solution_callback = - std::bind(&branch_and_bound_solution_helper_t::set_simplex_solution, + std::bind(&bb_observer_adapter_t::set_simplex_solution, &solution_helper, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); branch_and_bound_settings.node_processed_callback = - std::bind(&branch_and_bound_solution_helper_t::node_processed_callback, + std::bind(&bb_observer_adapter_t::node_processed_callback, &solution_helper, std::placeholders::_1, std::placeholders::_2); @@ -257,21 +284,24 @@ solution_t mip_solver_t::run_solver() // Create the branch and bound object branch_and_bound = std::make_unique>( - branch_and_bound_problem, branch_and_bound_settings, timer_.get_tic_start()); + branch_and_bound_problem, + branch_and_bound_settings, + timer_.get_tic_start(), + context.problem_ptr->clique_table); context.branch_and_bound_ptr = branch_and_bound.get(); auto* stats_ptr = &context.stats; branch_and_bound->set_user_bound_callback( [stats_ptr](f_t user_bound) { stats_ptr->set_solution_bound(user_bound); }); // Set the primal heuristics -> branch and bound callback - if (context.settings.determinism_mode == CUOPT_MODE_OPPORTUNISTIC) { + if (!(context.settings.determinism_mode & CUOPT_DETERMINISM_BB)) { branch_and_bound->set_concurrent_lp_root_solve(true); context.problem_ptr->branch_and_bound_callback = std::bind(&dual_simplex::branch_and_bound_t::set_new_solution, branch_and_bound.get(), std::placeholders::_1); - } else if (context.settings.determinism_mode == CUOPT_MODE_DETERMINISTIC) { + } else if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB)) { branch_and_bound->set_concurrent_lp_root_solve(false); // TODO once deterministic GPU heuristics are integrated // context.problem_ptr->branch_and_bound_callback = @@ -281,17 +311,20 @@ solution_t mip_solver_t::run_solver() } context.work_unit_scheduler_.register_context(branch_and_bound->get_work_unit_context()); - // context.work_unit_scheduler_.verbose = true; - context.problem_ptr->set_root_relaxation_solution_callback = - std::bind(&dual_simplex::branch_and_bound_t::set_root_relaxation_solution, - branch_and_bound.get(), - std::placeholders::_1, - std::placeholders::_2, - std::placeholders::_3, - std::placeholders::_4, - std::placeholders::_5, - std::placeholders::_6); + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB)) { + context.problem_ptr->set_root_relaxation_solution_callback = nullptr; + } else { + context.problem_ptr->set_root_relaxation_solution_callback = + std::bind(&dual_simplex::branch_and_bound_t::set_root_relaxation_solution, + branch_and_bound.get(), + std::placeholders::_1, + std::placeholders::_2, + std::placeholders::_3, + std::placeholders::_4, + std::placeholders::_5, + std::placeholders::_6); + } if (timer_.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached during B&B setup"); @@ -305,10 +338,12 @@ solution_t mip_solver_t::run_solver() // std::async and std::future allow us to get the return value of bb::solve() // without having to manually manage the thread // std::future.get() performs a join() operation to wait until the return status is available - branch_and_bound_status_future = std::async(std::launch::async, - &dual_simplex::branch_and_bound_t::solve, - branch_and_bound.get(), - std::ref(branch_and_bound_solution)); + int bb_device_id = context.handle_ptr->get_device(); + branch_and_bound_status_future = + std::async(std::launch::async, [&branch_and_bound, &branch_and_bound_solution, bb_device_id] { + RAFT_CUDA_TRY(cudaSetDevice(bb_device_id)); + return branch_and_bound->solve(branch_and_bound_solution); + }); } // Start the primal heuristics @@ -321,9 +356,37 @@ solution_t mip_solver_t::run_solver() context.stats.set_solution_bound( context.problem_ptr->get_user_obj_from_solver_obj(branch_and_bound_solution.lower_bound)); } + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB) && + std::isfinite(branch_and_bound_solution.objective)) { + solution_t bb_sol(*context.problem_ptr); + bb_sol.copy_new_assignment(branch_and_bound_solution.x); + bool bb_feasible = bb_sol.compute_feasibility(); + if (bb_feasible) { sol = std::move(bb_sol); } + } else if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB)) { + // In deterministic mode, only solutions formally retired by B&B are valid output. + // Discard the GPU heuristic incumbent that B&B never processed. + sol = solution_t(*context.problem_ptr); + } if (bb_status == dual_simplex::mip_status_t::INFEASIBLE) { sol.set_problem_fully_reduced(); } context.stats.num_nodes = branch_and_bound_solution.nodes_explored; context.stats.num_simplex_iterations = branch_and_bound_solution.simplex_iterations; + + if ((context.settings.determinism_mode & CUOPT_DETERMINISM_BB)) { + double bnb_work = branch_and_bound->get_work_unit_context().current_work(); + double gpu_work = context.gpu_heur_loop.current_work(); + double bnb_scale = solver_settings_.bnb_work_unit_scale; + double gpu_scale = solver_settings_.gpu_heur_work_unit_scale; + CUOPT_LOG_INFO( + "Work unit summary: B&B=%.2f (scale=%.3f, raw=%.2f) GPU_heur=%.2f (scale=%.3f, raw=%.2f) " + "ratio=%.2fx", + bnb_work, + bnb_scale, + bnb_scale > 0 ? bnb_work / bnb_scale : 0.0, + gpu_work, + gpu_scale, + gpu_scale > 0 ? gpu_work / gpu_scale : 0.0, + gpu_work > 0 ? bnb_work / gpu_work : 0.0); + } } sol.compute_feasibility(); rmm::device_scalar is_feasible(sol.handle_ptr->get_stream()); diff --git a/cpp/src/mip_heuristics/solver.cuh b/cpp/src/mip_heuristics/solver.cuh index 1b5fe17244..177fe6e14a 100644 --- a/cpp/src/mip_heuristics/solver.cuh +++ b/cpp/src/mip_heuristics/solver.cuh @@ -10,7 +10,7 @@ #include #include #include -#include +#include #pragma once namespace cuopt::linear_programming::detail { @@ -21,7 +21,7 @@ class mip_solver_t { explicit mip_solver_t(const problem_t& op_problem, const mip_solver_settings_t& solver_settings, pdlp_initial_scaling_strategy_t& scaling, - timer_t timer); + cuopt::termination_checker_t& timer); solution_t run_solver(); solver_stats_t& get_solver_stats() { return context.stats; } @@ -30,7 +30,7 @@ class mip_solver_t { // reference to the original problem const problem_t& op_problem_; const mip_solver_settings_t& solver_settings_; - timer_t timer_; + cuopt::termination_checker_t& timer_; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/solver_context.cuh b/cpp/src/mip_heuristics/solver_context.cuh index baac1dd9d6..1acf57c5f7 100644 --- a/cpp/src/mip_heuristics/solver_context.cuh +++ b/cpp/src/mip_heuristics/solver_context.cuh @@ -7,13 +7,21 @@ #include +#include #include #include +#include #include #include #include #include +#include +#include + +#include +#include +#include #pragma once @@ -25,9 +33,199 @@ class branch_and_bound_t; namespace cuopt::linear_programming::detail { +struct mip_solver_work_unit_predictors_t { + work_unit_predictor_t fj_predictor{}; +}; template class diversity_manager_t; +template +struct solution_callback_payload_t { + std::vector assignment{}; + f_t user_objective{}; + f_t solver_objective{}; + internals::mip_solution_callback_info_t callback_info{}; +}; + +template +class solution_publication_t { + public: + solution_publication_t(const mip_solver_settings_t& settings_, + solver_stats_t& stats_) + : settings(settings_), stats(stats_) + { + } + + void reset_published_best(f_t objective = std::numeric_limits::max()) + { + best_callback_feasible_objective_ = objective; + } + + solution_callback_payload_t build_payload( + problem_t* problem_ptr, + pdlp_initial_scaling_strategy_t& scaling, + solution_t& sol, + internals::mip_solution_origin_t origin, + double work_timestamp) + { + cuopt_assert(problem_ptr != nullptr, "Callback payload problem pointer must not be null"); + cuopt_assert(work_timestamp >= 0.0, "work_timestamp must not be negative"); + solution_callback_payload_t payload{}; + payload.user_objective = sol.get_user_objective(); + payload.solver_objective = sol.get_objective(); + payload.callback_info.origin = origin; + payload.callback_info.work_timestamp = work_timestamp; + solution_t temp_sol(sol); + problem_ptr->post_process_assignment(temp_sol.assignment); + if (settings.mip_scaling) { + rmm::device_uvector dummy(0, temp_sol.handle_ptr->get_stream()); + scaling.unscale_solutions(temp_sol.assignment, dummy); + } + if (problem_ptr->has_papilo_presolve_data()) { + problem_ptr->papilo_uncrush_assignment(temp_sol.assignment); + } + payload.assignment = temp_sol.get_host_assignment(); + return payload; + } + + bool publish_new_best_feasible(const solution_callback_payload_t& payload, + double elapsed_time = -1.0) + { + std::lock_guard lock(solution_callback_mutex_); + cuopt_assert(std::isfinite(payload.solver_objective), + "Feasible incumbent objective must be finite"); + if (!(payload.solver_objective < best_callback_feasible_objective_)) { return false; } + + if (settings.benchmark_info_ptr != nullptr && elapsed_time >= 0.0) { + settings.benchmark_info_ptr->last_improvement_of_best_feasible = elapsed_time; + } + invoke_get_solution_callbacks(payload); + best_callback_feasible_objective_ = payload.solver_objective; + return true; + } + + void publish_terminal_solution(const solution_callback_payload_t& payload) + { + std::lock_guard lock(solution_callback_mutex_); + invoke_get_solution_callbacks(payload); + best_callback_feasible_objective_ = payload.solver_objective; + } + + private: + void invoke_get_solution_callbacks(const solution_callback_payload_t& payload) + { + auto user_callbacks = settings.get_mip_callbacks(); + CUOPT_LOG_DEBUG("Publishing incumbent: obj=%g wut=%.6f origin=%s callbacks=%zu", + payload.user_objective, + payload.callback_info.work_timestamp, + internals::mip_solution_origin_to_string(payload.callback_info.origin), + user_callbacks.size()); + + std::vector user_objective_vec(1); + std::vector user_bound_vec(1); + user_objective_vec[0] = payload.user_objective; + user_bound_vec[0] = stats.get_solution_bound(); + + for (auto callback : user_callbacks) { + if (callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION_EXT) { + auto get_sol_callback_ext = static_cast(callback); + get_sol_callback_ext->get_solution(const_cast(payload.assignment.data()), + user_objective_vec.data(), + user_bound_vec.data(), + &payload.callback_info, + get_sol_callback_ext->get_user_data()); + } else if (callback->get_type() == internals::base_solution_callback_type::GET_SOLUTION) { + auto get_sol_callback = static_cast(callback); + get_sol_callback->get_solution(const_cast(payload.assignment.data()), + user_objective_vec.data(), + user_bound_vec.data(), + get_sol_callback->get_user_data()); + } + } + } + + const mip_solver_settings_t& settings; + solver_stats_t& stats; + std::mutex solution_callback_mutex_; + f_t best_callback_feasible_objective_{std::numeric_limits::max()}; +}; + +// Processes SET_SOLUTION user callbacks: invokes the callback, validates/scales/preprocesses +// the returned assignment, and returns it for the caller to reinject. +template +class solution_injection_t { + public: + solution_injection_t(const mip_solver_settings_t& settings_, + solver_stats_t& stats_) + : settings(settings_), stats(stats_) + { + } + + // Invokes SET_SOLUTION callbacks with current incumbent info. + // For each callback that returns a valid solution, calls `on_injected` with the processed + // host assignment and solver-space objective. + template + void invoke_set_solution_callbacks(problem_t* problem_ptr, + pdlp_initial_scaling_strategy_t& scaling, + solution_t& current_incumbent, + OnInjectedFn&& on_injected) + { + auto user_callbacks = settings.get_mip_callbacks(); + for (auto callback : user_callbacks) { + if (callback->get_type() != internals::base_solution_callback_type::SET_SOLUTION) { + continue; + } + auto set_sol_callback = static_cast(callback); + f_t user_bound = stats.get_solution_bound(); + auto callback_num_variables = problem_ptr->original_problem_ptr->get_n_variables(); + rmm::device_uvector incumbent_assignment(callback_num_variables, + current_incumbent.handle_ptr->get_stream()); + auto inf = std::numeric_limits::infinity(); + current_incumbent.handle_ptr->sync_stream(); + std::vector h_incumbent_assignment(incumbent_assignment.size()); + std::vector h_outside_sol_objective(1, inf); + std::vector h_user_bound(1, user_bound); + set_sol_callback->set_solution(h_incumbent_assignment.data(), + h_outside_sol_objective.data(), + h_user_bound.data(), + set_sol_callback->get_user_data()); + f_t outside_sol_objective = h_outside_sol_objective[0]; + if (outside_sol_objective == inf) { continue; } + + raft::copy(incumbent_assignment.data(), + h_incumbent_assignment.data(), + incumbent_assignment.size(), + current_incumbent.handle_ptr->get_stream()); + if (settings.mip_scaling) { scaling.scale_solutions(incumbent_assignment); } + bool is_valid = problem_ptr->pre_process_assignment(incumbent_assignment); + if (!is_valid) { continue; } + + solution_t outside_sol(current_incumbent); + cuopt_assert(outside_sol.assignment.size() == incumbent_assignment.size(), + "Incumbent assignment size mismatch"); + raft::copy(outside_sol.assignment.data(), + incumbent_assignment.data(), + incumbent_assignment.size(), + current_incumbent.handle_ptr->get_stream()); + outside_sol.compute_feasibility(); + + CUOPT_LOG_DEBUG("Injected solution feasibility = %d objective = %g excess = %g", + outside_sol.get_feasible(), + outside_sol.get_user_objective(), + outside_sol.get_total_excess()); + cuopt_assert(std::abs(outside_sol.get_user_objective() - outside_sol_objective) <= 1e-6, + "External solution objective mismatch"); + on_injected(outside_sol.get_host_assignment(), + outside_sol.get_objective(), + internals::mip_solution_origin_t::USER_INITIAL); + } + } + + private: + const mip_solver_settings_t& settings; + solver_stats_t& stats; +}; + // Aggregate structure containing the global context of the solving process for convenience: // The current problem, user settings, raft handle and statistics objects template @@ -36,12 +234,21 @@ struct mip_solver_context_t { problem_t* problem_ptr_, mip_solver_settings_t settings_, pdlp_initial_scaling_strategy_t& scaling) - : handle_ptr(handle_ptr_), problem_ptr(problem_ptr_), settings(settings_), scaling(scaling) + : handle_ptr(handle_ptr_), + problem_ptr(problem_ptr_), + settings(settings_), + scaling(scaling), + solution_publication(settings, stats), + solution_injection(settings, stats) { cuopt_assert(problem_ptr != nullptr, "problem_ptr is nullptr"); stats.set_solution_bound(problem_ptr->maximize ? std::numeric_limits::infinity() : -std::numeric_limits::infinity()); - gpu_heur_loop.deterministic = settings.determinism_mode == CUOPT_MODE_DETERMINISTIC; + gpu_heur_loop.deterministic = (settings.determinism_mode & CUOPT_DETERMINISM_GPU_HEURISTICS); + cuopt_assert(settings.cpufj_work_unit_scale > 0.0, "CPUFJ work-unit scale must be positive"); + cuopt_assert(settings.gpu_heur_work_unit_scale > 0.0, + "GPU heuristic work-unit scale must be positive"); + gpu_heur_loop.work_unit_scale = settings.gpu_heur_work_unit_scale; } mip_solver_context_t(const mip_solver_context_t&) = delete; @@ -55,9 +262,17 @@ struct mip_solver_context_t { const mip_solver_settings_t settings; pdlp_initial_scaling_strategy_t& scaling; solver_stats_t stats; + // TODO: ensure thread local (or use locks...?) + mip_solver_work_unit_predictors_t work_unit_predictors; // Work limit context for tracking work units in deterministic mode (shared across all timers in // GPU heuristic loop) work_limit_context_t gpu_heur_loop{"GPUHeur"}; + solution_publication_t solution_publication; + solution_injection_t solution_injection; + + // Root termination checker — set by mip_solver_t after construction. + // All sub-timers should use this as parent for wall-clock safety. + cuopt::termination_checker_t* termination{nullptr}; // synchronization every 5 seconds for deterministic mode work_unit_scheduler_t work_unit_scheduler_{5.0}; diff --git a/cpp/src/mip_heuristics/solver_solution.cu b/cpp/src/mip_heuristics/solver_solution.cu index 60556884c9..e497a21c8f 100644 --- a/cpp/src/mip_heuristics/solver_solution.cu +++ b/cpp/src/mip_heuristics/solver_solution.cu @@ -209,8 +209,8 @@ void mip_solution_t::write_to_sol_file(std::string_view filename, status = "Infeasible"; } - double objective_value = get_objective_value(); - auto& var_names = get_variable_names(); + f_t objective_value = get_objective_value(); + auto& var_names = get_variable_names(); std::vector solution; solution.resize(solution_.size()); raft::copy(solution.data(), solution_.data(), solution_.size(), stream_view.value()); @@ -234,7 +234,7 @@ void mip_solution_t::log_summary() const CUOPT_LOG_INFO("Total Solve Time: %f", get_total_solve_time()); } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class mip_solution_t; #endif diff --git a/cpp/src/mip_heuristics/utils.cuh b/cpp/src/mip_heuristics/utils.cuh index 33712635e9..ffadc1f510 100644 --- a/cpp/src/mip_heuristics/utils.cuh +++ b/cpp/src/mip_heuristics/utils.cuh @@ -339,8 +339,9 @@ static void inline run_device_lambda(const rmm::cuda_stream_view& stream, Func f template f_t compute_rel_mip_gap(f_t user_obj, f_t solution_bound) { - if (user_obj == 0.0) { - return solution_bound == 0.0 ? 0.0 : std::numeric_limits::infinity(); + if (integer_equal(user_obj, 0.0, 1e-6)) { + return integer_equal(solution_bound, 0.0, 1e-6) ? 0.0 + : std::numeric_limits::infinity(); } return std::abs(user_obj - solution_bound) / std::abs(user_obj); } diff --git a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu index f9a73dff06..b078bc4779 100644 --- a/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu +++ b/cpp/src/pdlp/cpu_pdlp_warm_start_data.cu @@ -108,14 +108,14 @@ pdlp_warm_start_data_t convert_to_gpu_warmstart( return gpu_data; } -// Explicit template instantiations +#if MIP_INSTANTIATE_DOUBLE template cpu_pdlp_warm_start_data_t convert_to_cpu_warmstart( const pdlp_warm_start_data_t&, rmm::cuda_stream_view); - template pdlp_warm_start_data_t convert_to_gpu_warmstart( const cpu_pdlp_warm_start_data_t&, rmm::cuda_stream_view); +#endif -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template cpu_pdlp_warm_start_data_t convert_to_cpu_warmstart( const pdlp_warm_start_data_t&, rmm::cuda_stream_view); diff --git a/cpp/src/pdlp/cuopt_c.cpp b/cpp/src/pdlp/cuopt_c.cpp index ed2eab02f2..0c9736646f 100644 --- a/cpp/src/pdlp/cuopt_c.cpp +++ b/cpp/src/pdlp/cuopt_c.cpp @@ -49,6 +49,40 @@ class c_get_solution_callback_t : public cuopt::internals::get_solution_callback cuOptMIPGetSolutionCallback callback_; }; +class c_get_solution_callback_ext_t : public cuopt::internals::get_solution_callback_ext_t { + public: + explicit c_get_solution_callback_ext_t(cuOptMIPGetSolutionCallbackExt callback) + : callback_(callback) + { + } + + void get_solution(void* data, + void* objective_value, + void* solution_bound, + const cuopt::internals::mip_solution_callback_info_t* callback_info, + void* user_data) override + { + if (callback_ == nullptr) { return; } + cuOptMIPSolutionCallbackInfo c_callback_info{}; + c_callback_info.struct_size = sizeof(cuOptMIPSolutionCallbackInfo); + if (callback_info != nullptr) { + c_callback_info.origin = (uint32_t)callback_info->origin; + c_callback_info.work_timestamp = callback_info->work_timestamp; + } else { + c_callback_info.origin = CUOPT_MIP_SOLUTION_ORIGIN_UNKNOWN; + c_callback_info.work_timestamp = -1.0; + } + callback_(static_cast(data), + static_cast(objective_value), + static_cast(solution_bound), + &c_callback_info, + user_data); + } + + private: + cuOptMIPGetSolutionCallbackExt callback_; +}; + class c_set_solution_callback_t : public cuopt::internals::set_solution_callback_t { public: explicit c_set_solution_callback_t(cuOptMIPSetSolutionCallback callback) : callback_(callback) {} @@ -767,6 +801,19 @@ cuopt_int_t cuOptSetMIPGetSolutionCallback(cuOptSolverSettings settings, return CUOPT_SUCCESS; } +cuopt_int_t cuOptSetMIPGetSolutionCallbackExt(cuOptSolverSettings settings, + cuOptMIPGetSolutionCallbackExt callback, + void* user_data) +{ + if (settings == nullptr) { return CUOPT_INVALID_ARGUMENT; } + if (callback == nullptr) { return CUOPT_INVALID_ARGUMENT; } + solver_settings_handle_t* settings_handle = get_settings_handle(settings); + auto callback_wrapper = std::make_unique(callback); + settings_handle->settings->set_mip_callback(callback_wrapper.get(), user_data); + settings_handle->callbacks.push_back(std::move(callback_wrapper)); + return CUOPT_SUCCESS; +} + cuopt_int_t cuOptSetMIPSetSolutionCallback(cuOptSolverSettings settings, cuOptMIPSetSolutionCallback callback, void* user_data) diff --git a/cpp/src/pdlp/cusparse_view.cu b/cpp/src/pdlp/cusparse_view.cu index ca36dde421..64ec44f5ef 100644 --- a/cpp/src/pdlp/cusparse_view.cu +++ b/cpp/src/pdlp/cusparse_view.cu @@ -21,6 +21,12 @@ #include #include +#include + +struct double_to_float_functor { + __host__ __device__ float operator()(double val) const { return static_cast(val); } +}; + namespace cuopt::linear_programming::detail { // cusparse_sp_mat_descr_wrapper_t implementation @@ -277,7 +283,8 @@ cusparse_view_t::cusparse_view_t( rmm::device_uvector& _potential_next_dual_solution, rmm::device_uvector& _reflected_primal_solution, const std::vector& climber_strategies, - const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params) + const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params, + bool enable_mixed_precision_spmv) : batch_mode_(climber_strategies.size() > 1), handle_ptr_(handle_ptr), A{}, @@ -304,7 +311,12 @@ cusparse_view_t::cusparse_view_t( A_{op_problem_scaled.coefficients}, A_offsets_{op_problem_scaled.offsets}, A_indices_{op_problem_scaled.variables}, - climber_strategies_(climber_strategies) + climber_strategies_(climber_strategies), + A_float_{0, handle_ptr->get_stream()}, + A_T_float_{0, handle_ptr->get_stream()}, + buffer_non_transpose_mixed_{0, handle_ptr->get_stream()}, + buffer_transpose_mixed_{0, handle_ptr->get_stream()}, + mixed_precision_enabled_{false} { raft::common::nvtx::range fun_scope("Initializing cuSparse view"); @@ -583,6 +595,92 @@ cusparse_view_t::cusparse_view_t( handle_ptr->get_stream()); } #endif + + if constexpr (std::is_same_v) { + if (enable_mixed_precision_spmv && !batch_mode_) { + mixed_precision_enabled_ = true; + + A_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); + A_T_float_.resize(op_problem_scaled.nnz, handle_ptr->get_stream()); + + RAFT_CUDA_TRY(cub::DeviceTransform::Transform(op_problem_scaled.coefficients.data(), + A_float_.data(), + op_problem_scaled.nnz, + double_to_float_functor{}, + handle_ptr->get_stream().value())); + + RAFT_CUDA_TRY(cub::DeviceTransform::Transform(A_T_.data(), + A_T_float_.data(), + op_problem_scaled.nnz, + double_to_float_functor{}, + handle_ptr->get_stream().value())); + + A_mixed_.create(op_problem_scaled.n_constraints, + op_problem_scaled.n_variables, + op_problem_scaled.nnz, + const_cast(op_problem_scaled.offsets.data()), + const_cast(op_problem_scaled.variables.data()), + A_float_.data()); + + A_T_mixed_.create(op_problem_scaled.n_variables, + op_problem_scaled.n_constraints, + op_problem_scaled.nnz, + const_cast(A_T_offsets_.data()), + const_cast(A_T_indices_.data()), + A_T_float_.data()); + + const rmm::device_scalar alpha_d{1.0, handle_ptr->get_stream()}; + const rmm::device_scalar beta_d{0.0, handle_ptr->get_stream()}; + + size_t buffer_size_non_transpose_mixed = + mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_mixed_, + c, + beta_d.data(), + dual_solution, + CUSPARSE_SPMV_CSR_ALG2, + handle_ptr->get_stream()); + buffer_non_transpose_mixed_.resize(buffer_size_non_transpose_mixed, handle_ptr->get_stream()); + + size_t buffer_size_transpose_mixed = + mixed_precision_spmv_buffersize(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_T_mixed_, + dual_solution, + beta_d.data(), + c, + CUSPARSE_SPMV_CSR_ALG2, + handle_ptr->get_stream()); + buffer_transpose_mixed_.resize(buffer_size_transpose_mixed, handle_ptr->get_stream()); + +#if CUDA_VER_12_4_UP + mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_mixed_, + c, + beta_d.data(), + dual_solution, + CUSPARSE_SPMV_CSR_ALG2, + buffer_non_transpose_mixed_.data(), + handle_ptr->get_stream()); + + mixed_precision_spmv_preprocess(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha_d.data(), + A_T_mixed_, + dual_solution, + beta_d.data(), + c, + CUSPARSE_SPMV_CSR_ALG2, + buffer_transpose_mixed_.data(), + handle_ptr->get_stream()); +#endif + } + } } // Used by pdlp object for current and average termination condition @@ -625,7 +723,12 @@ cusparse_view_t::cusparse_view_t( A_{op_problem.coefficients}, A_offsets_{op_problem.offsets}, A_indices_{op_problem.variables}, - climber_strategies_(climber_strategies) + climber_strategies_(climber_strategies), + A_float_{0, handle_ptr->get_stream()}, + A_T_float_{0, handle_ptr->get_stream()}, + buffer_non_transpose_mixed_{0, handle_ptr->get_stream()}, + buffer_transpose_mixed_{0, handle_ptr->get_stream()}, + mixed_precision_enabled_{false} { #ifdef PDLP_DEBUG_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); @@ -832,7 +935,12 @@ cusparse_view_t::cusparse_view_t( A_{existing_cusparse_view.A_}, A_offsets_{existing_cusparse_view.A_offsets_}, A_indices_{existing_cusparse_view.A_indices_}, - climber_strategies_(existing_cusparse_view.climber_strategies_) + climber_strategies_(existing_cusparse_view.climber_strategies_), + A_float_{0, handle_ptr->get_stream()}, + A_T_float_{0, handle_ptr->get_stream()}, + buffer_non_transpose_mixed_{0, handle_ptr->get_stream()}, + buffer_transpose_mixed_{0, handle_ptr->get_stream()}, + mixed_precision_enabled_{false} { #ifdef PDLP_DEBUG_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); @@ -942,11 +1050,105 @@ cusparse_view_t::cusparse_view_t( A_(dummy_float), A_offsets_(dummy_int), A_indices_(dummy_int), - climber_strategies_(climber_strategies) + climber_strategies_(climber_strategies), + A_float_{0, handle_ptr->get_stream()}, + A_T_float_{0, handle_ptr->get_stream()}, + buffer_non_transpose_mixed_{0, handle_ptr->get_stream()}, + buffer_transpose_mixed_{0, handle_ptr->get_stream()}, + mixed_precision_enabled_{false} +{ +} + +// Update FP32 matrix copies after scaling (must be called after scale_problem()) +template +void cusparse_view_t::update_mixed_precision_matrices() +{ + if constexpr (std::is_same_v) { + if (!mixed_precision_enabled_) { return; } + + RAFT_CUDA_TRY(cub::DeviceTransform::Transform(A_.data(), + A_float_.data(), + A_.size(), + double_to_float_functor{}, + handle_ptr_->get_stream().value())); + + RAFT_CUDA_TRY(cub::DeviceTransform::Transform(A_T_.data(), + A_T_float_.data(), + A_T_.size(), + double_to_float_functor{}, + handle_ptr_->get_stream().value())); + + handle_ptr_->get_stream().synchronize(); + } +} + +// Mixed precision SpMV implementation: FP32 matrix with FP64 vectors and FP64 compute type +size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + cudaStream_t stream) +{ + size_t bufferSize = 0; + RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream)); + RAFT_CUSPARSE_TRY(cusparseSpMV_bufferSize( + handle, opA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, alg, &bufferSize)); + return bufferSize; +} + +void mixed_precision_spmv(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + void* externalBuffer, + cudaStream_t stream) +{ + RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream)); + RAFT_CUSPARSE_TRY( + cusparseSpMV(handle, opA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, alg, externalBuffer)); +} + +#if CUDA_VER_12_4_UP +void mixed_precision_spmv_preprocess(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + void* externalBuffer, + cudaStream_t stream) +{ + static const auto func = + dynamic_load_runtime::function("cusparseSpMV_preprocess"); + if (func.has_value()) { + RAFT_CUSPARSE_TRY(cusparseSetStream(handle, stream)); + RAFT_CUSPARSE_TRY( + (*func)(handle, opA, alpha, matA, vecX, beta, vecY, CUDA_R_64F, alg, externalBuffer)); + } +} +#endif + +bool is_cusparse_runtime_mixed_precision_supported() { + int major = 0, minor = 0; + auto status = cusparseGetProperty(libraryPropertyType_t::MAJOR_VERSION, &major); + if (status != CUSPARSE_STATUS_SUCCESS) return false; + status = cusparseGetProperty(libraryPropertyType_t::MINOR_VERSION, &minor); + if (status != CUSPARSE_STATUS_SUCCESS) return false; + return (major > 12) || (major == 12 && minor >= 5); } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class cusparse_sp_mat_descr_wrapper_t; template class cusparse_dn_vec_descr_wrapper_t; template class cusparse_dn_mat_descr_wrapper_t; @@ -960,7 +1162,7 @@ template class cusparse_view_t; #endif #if CUDA_VER_12_4_UP -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template void my_cusparsespmm_preprocess(cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, diff --git a/cpp/src/pdlp/cusparse_view.hpp b/cpp/src/pdlp/cusparse_view.hpp index cbbc856924..416a0b1e5f 100644 --- a/cpp/src/pdlp/cusparse_view.hpp +++ b/cpp/src/pdlp/cusparse_view.hpp @@ -90,7 +90,8 @@ class cusparse_view_t { rmm::device_uvector& _potential_next_dual_solution, rmm::device_uvector& _reflected_primal_solution, const std::vector& climber_strategies, - const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params); + const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params, + bool enable_mixed_precision_spmv); cusparse_view_t(raft::handle_t const* handle_ptr, const problem_t& op_problem, @@ -194,8 +195,56 @@ class cusparse_view_t { const rmm::device_uvector& A_indices_; const std::vector& climber_strategies_; + + // Mixed precision SpMV support (FP32 matrix with FP64 vectors/compute) + // Only used when mixed_precision_enabled_ is true and f_t = double + rmm::device_uvector A_float_; // FP32 copy of A values + rmm::device_uvector A_T_float_; // FP32 copy of A_T values + cusparse_sp_mat_descr_wrapper_t A_mixed_; // FP32 matrix descriptor for A + cusparse_sp_mat_descr_wrapper_t A_T_mixed_; // FP32 matrix descriptor for A_T + rmm::device_uvector buffer_non_transpose_mixed_; // SpMV buffer for mixed precision A + rmm::device_uvector buffer_transpose_mixed_; // SpMV buffer for mixed precision A_T + bool mixed_precision_enabled_{false}; + + // Update FP32 matrix copies after scaling (must be called after scale_problem()) + void update_mixed_precision_matrices(); }; +// Mixed precision SpMV: FP32 matrix with FP64 vectors and FP64 compute type +void mixed_precision_spmv(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + void* externalBuffer, + cudaStream_t stream); + +size_t mixed_precision_spmv_buffersize(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + cudaStream_t stream); + +#if CUDA_VER_12_4_UP +void mixed_precision_spmv_preprocess(cusparseHandle_t handle, + cusparseOperation_t opA, + const double* alpha, + cusparseSpMatDescr_t matA, // FP32 matrix + cusparseDnVecDescr_t vecX, // FP64 vector + const double* beta, + cusparseDnVecDescr_t vecY, // FP64 vector + cusparseSpMVAlg_t alg, + void* externalBuffer, + cudaStream_t stream); +#endif + #if CUDA_VER_12_4_UP template < typename T, @@ -213,4 +262,6 @@ void my_cusparsespmm_preprocess(cusparseHandle_t handle, cudaStream_t stream); #endif +bool is_cusparse_runtime_mixed_precision_supported(); + } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu index afa3ee5fb7..b618550f6e 100644 --- a/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/pdlp/initial_scaling_strategy/initial_scaling.cu @@ -858,7 +858,7 @@ pdlp_initial_scaling_strategy_t::view() int* A_T_offsets, \ int* A_T_indices); -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu index deb6b759aa..cbfb03618d 100644 --- a/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu +++ b/cpp/src/pdlp/optimal_batch_size_handler/optimal_batch_size_handler.cu @@ -434,7 +434,7 @@ int optimal_batch_size_handler(const optimization_problem_t& op_proble return 0; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template int optimal_batch_size_handler( const optimization_problem_t& op_problem, int max_batch_size); #endif diff --git a/cpp/src/pdlp/optimization_problem.cu b/cpp/src/pdlp/optimization_problem.cu index d0888dd3ac..9b3016a113 100644 --- a/cpp/src/pdlp/optimization_problem.cu +++ b/cpp/src/pdlp/optimization_problem.cu @@ -40,6 +40,7 @@ #include #include +#include #include #include @@ -1505,15 +1506,105 @@ void optimization_problem_t::copy_variable_types_to_host(var_t* output cudaMemcpy(output, variable_types_.data(), size * sizeof(var_t), cudaMemcpyDeviceToHost)); } +template +struct cast_op { + HDI To operator()(From val) const { return static_cast(val); } +}; + +template +rmm::device_uvector gpu_cast(const rmm::device_uvector& src, rmm::cuda_stream_view stream) +{ + rmm::device_uvector dst(src.size(), stream); + if (src.size() > 0) { + RAFT_CUDA_TRY(cub::DeviceTransform::Transform( + src.data(), dst.data(), src.size(), cast_op{}, stream.value())); + } + return dst; +} + +template rmm::device_uvector gpu_cast(const rmm::device_uvector&, + rmm::cuda_stream_view); +template rmm::device_uvector gpu_cast(const rmm::device_uvector&, + rmm::cuda_stream_view); + +template +template +optimization_problem_t optimization_problem_t::convert_to_other_prec( + rmm::cuda_stream_view stream) const +{ + optimization_problem_t other(handle_ptr_); + + other.set_maximize(maximize_); + other.set_objective_offset(static_cast(objective_offset_)); + other.set_objective_scaling_factor(static_cast(objective_scaling_factor_)); + + if (A_.size() > 0) { + auto other_A = gpu_cast(A_, stream); + other.set_csr_constraint_matrix(other_A.data(), + static_cast(other_A.size()), + A_indices_.data(), + static_cast(A_indices_.size()), + A_offsets_.data(), + static_cast(A_offsets_.size())); + } + + if (c_.size() > 0) { + auto other_c = gpu_cast(c_, stream); + other.set_objective_coefficients(other_c.data(), static_cast(other_c.size())); + } + + if (b_.size() > 0) { + auto other_b = gpu_cast(b_, stream); + other.set_constraint_bounds(other_b.data(), static_cast(other_b.size())); + } + + if (constraint_lower_bounds_.size() > 0) { + auto other_clb = gpu_cast(constraint_lower_bounds_, stream); + other.set_constraint_lower_bounds(other_clb.data(), static_cast(other_clb.size())); + } + + if (constraint_upper_bounds_.size() > 0) { + auto other_cub = gpu_cast(constraint_upper_bounds_, stream); + other.set_constraint_upper_bounds(other_cub.data(), static_cast(other_cub.size())); + } + + if (variable_lower_bounds_.size() > 0) { + auto other_vlb = gpu_cast(variable_lower_bounds_, stream); + other.set_variable_lower_bounds(other_vlb.data(), static_cast(other_vlb.size())); + } + + if (variable_upper_bounds_.size() > 0) { + auto other_vub = gpu_cast(variable_upper_bounds_, stream); + other.set_variable_upper_bounds(other_vub.data(), static_cast(other_vub.size())); + } + + if (variable_types_.size() > 0) { + other.set_variable_types(variable_types_.data(), static_cast(variable_types_.size())); + } + + other.set_variable_names(var_names_); + other.set_row_names(row_names_); + other.set_objective_name(objective_name_); + other.set_problem_category(problem_category_); + + return other; +} + // ============================================================================== // Template instantiations // ============================================================================== // Explicit template instantiations matching MIP constants -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class optimization_problem_t; #endif #if MIP_INSTANTIATE_DOUBLE template class optimization_problem_t; #endif +#if PDLP_INSTANTIATE_FLOAT || MIP_INSTANTIATE_FLOAT +template optimization_problem_t + optimization_problem_t::convert_to_other_prec( + rmm::cuda_stream_view) const; +#endif + } // namespace cuopt::linear_programming diff --git a/cpp/src/pdlp/pdhg.cu b/cpp/src/pdlp/pdhg.cu index 51e0b29381..74df7fee01 100644 --- a/cpp/src/pdlp/pdhg.cu +++ b/cpp/src/pdlp/pdhg.cu @@ -41,7 +41,8 @@ pdhg_solver_t::pdhg_solver_t( bool is_legacy_batch_mode, // Batch mode with streams const std::vector& climber_strategies, const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params, - const std::vector>& new_bounds) + const std::vector>& new_bounds, + bool enable_mixed_precision_spmv) : batch_mode_(climber_strategies.size() > 1), handle_ptr_(handle_ptr), stream_view_(handle_ptr_->get_stream()), @@ -77,7 +78,8 @@ pdhg_solver_t::pdhg_solver_t( potential_next_dual_solution_, reflected_primal_, climber_strategies, - hyper_params}, + hyper_params, + enable_mixed_precision_spmv}, reusable_device_scalar_value_1_{1.0, stream_view_}, reusable_device_scalar_value_0_{0.0, stream_view_}, reusable_device_scalar_value_neg_1_{f_t(-1.0), stream_view_}, @@ -249,17 +251,33 @@ void pdhg_solver_t::compute_next_dual_solution(rmm::device_uvectorget_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), // 1 - cusparse_view_.A, - cusparse_view_.tmp_primal, - reusable_device_scalar_value_0_.data(), // 1 - cusparse_view_.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_non_transpose.data(), - stream_view_)); + if constexpr (std::is_same_v) { + if (cusparse_view_.mixed_precision_enabled_) { + mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_mixed_, + cusparse_view_.tmp_primal, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + cusparse_view_.buffer_non_transpose_mixed_.data(), + stream_view_); + } + } + if (!cusparse_view_.mixed_precision_enabled_) { + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A, + cusparse_view_.tmp_primal, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); + } // y - (sigma*dual_gradient) // max(min(0, sigma*constraint_upper+primal_product), sigma*constraint_lower+primal_product) @@ -287,17 +305,33 @@ void pdhg_solver_t::compute_At_y() // A_t @ y if (!batch_mode_) { - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A_T, - cusparse_view_.dual_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.current_AtY, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_transpose.data(), - stream_view_)); + if constexpr (std::is_same_v) { + if (cusparse_view_.mixed_precision_enabled_) { + mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T_mixed_, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + cusparse_view_.buffer_transpose_mixed_.data(), + stream_view_); + } + } + if (!cusparse_view_.mixed_precision_enabled_) { + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T, + cusparse_view_.dual_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.current_AtY, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_transpose.data(), + stream_view_)); + } } else { RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm( handle_ptr_->get_cusparse_handle(), @@ -319,17 +353,33 @@ void pdhg_solver_t::compute_A_x() { // A @ x if (!batch_mode_) { - RAFT_CUSPARSE_TRY( - raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), - CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_1_.data(), - cusparse_view_.A, - cusparse_view_.reflected_primal_solution, - reusable_device_scalar_value_0_.data(), - cusparse_view_.dual_gradient, - CUSPARSE_SPMV_CSR_ALG2, - (f_t*)cusparse_view_.buffer_non_transpose.data(), - stream_view_)); + if constexpr (std::is_same_v) { + if (cusparse_view_.mixed_precision_enabled_) { + mixed_precision_spmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_mixed_, + cusparse_view_.reflected_primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + cusparse_view_.buffer_non_transpose_mixed_.data(), + stream_view_); + } + } + if (!cusparse_view_.mixed_precision_enabled_) { + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A, + cusparse_view_.reflected_primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); + } } else { RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmm( handle_ptr_->get_cusparse_handle(), @@ -1196,7 +1246,7 @@ rmm::device_uvector& pdhg_solver_t::get_dual_solution() return current_saddle_point_state_.get_dual_solution(); } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class pdhg_solver_t; #endif #if MIP_INSTANTIATE_DOUBLE diff --git a/cpp/src/pdlp/pdhg.hpp b/cpp/src/pdlp/pdhg.hpp index 8ff45ac0ce..0a64e49efb 100644 --- a/cpp/src/pdlp/pdhg.hpp +++ b/cpp/src/pdlp/pdhg.hpp @@ -29,7 +29,8 @@ class pdhg_solver_t { bool is_legacy_batch_mode, const std::vector& climber_strategies, const pdlp_hyper_params::pdlp_hyper_params_t& hyper_params, - const std::vector>& new_bounds); + const std::vector>& new_bounds, + bool enable_mixed_precision_spmv = false); saddle_point_state_t& get_saddle_point_state(); cusparse_view_t& get_cusparse_view(); diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index cda60cf5ff..a6f16ec972 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -43,6 +43,59 @@ namespace cuopt::linear_programming::detail { +// Templated wrapper for cuBLAS geam function +// cublasSgeam for float, cublasDgeam for double +template +inline cublasStatus_t cublasGeam(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int m, + int n, + const T* alpha, + const T* A, + int lda, + const T* beta, + const T* B, + int ldb, + T* C, + int ldc); + +template <> +inline cublasStatus_t cublasGeam(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int m, + int n, + const float* alpha, + const float* A, + int lda, + const float* beta, + const float* B, + int ldb, + float* C, + int ldc) +{ + return cublasSgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc); +} + +template <> +inline cublasStatus_t cublasGeam(cublasHandle_t handle, + cublasOperation_t transa, + cublasOperation_t transb, + int m, + int n, + const double* alpha, + const double* A, + int lda, + const double* beta, + const double* B, + int ldb, + double* C, + int ldc) +{ + return cublasDgeam(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, ldc); +} + template static size_t batch_size_handler(const problem_t& op_problem, const pdlp_solver_settings_t& settings) @@ -88,7 +141,8 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, is_legacy_batch_mode, climber_strategies_, settings_.hyper_params, - settings_.new_bounds}, + settings_.new_bounds, + settings_.pdlp_precision == pdlp_precision_t::MixedPrecision}, initial_scaling_strategy_{handle_ptr_, op_problem_scaled_, settings_.hyper_params.default_l_inf_ruiz_iterations, @@ -1925,48 +1979,48 @@ void pdlp_solver_t::transpose_primal_dual_to_row( is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_); RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_)); - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - climber_strategies_.size(), - primal_size_h_, - reusable_device_scalar_value_1_.data(), - primal_to_transpose.data(), - primal_size_h_, - reusable_device_scalar_value_0_.data(), - nullptr, - climber_strategies_.size(), - primal_transposed.data(), - climber_strategies_.size())); + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + climber_strategies_.size(), + primal_size_h_, + reusable_device_scalar_value_1_.data(), + primal_to_transpose.data(), + primal_size_h_, + reusable_device_scalar_value_0_.data(), + nullptr, + climber_strategies_.size(), + primal_transposed.data(), + climber_strategies_.size())); if (!is_dual_slack_empty) { - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - climber_strategies_.size(), - primal_size_h_, - reusable_device_scalar_value_1_.data(), - dual_slack_to_transpose.data(), - primal_size_h_, - reusable_device_scalar_value_0_.data(), - nullptr, - climber_strategies_.size(), - dual_slack_transposed.data(), - climber_strategies_.size())); + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + climber_strategies_.size(), + primal_size_h_, + reusable_device_scalar_value_1_.data(), + dual_slack_to_transpose.data(), + primal_size_h_, + reusable_device_scalar_value_0_.data(), + nullptr, + climber_strategies_.size(), + dual_slack_transposed.data(), + climber_strategies_.size())); } - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - climber_strategies_.size(), - dual_size_h_, - reusable_device_scalar_value_1_.data(), - dual_to_transpose.data(), - dual_size_h_, - reusable_device_scalar_value_0_.data(), - nullptr, - climber_strategies_.size(), - dual_transposed.data(), - climber_strategies_.size())); + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + climber_strategies_.size(), + dual_size_h_, + reusable_device_scalar_value_1_.data(), + dual_to_transpose.data(), + dual_size_h_, + reusable_device_scalar_value_0_.data(), + nullptr, + climber_strategies_.size(), + dual_transposed.data(), + climber_strategies_.size())); // Copy that holds the tranpose to the original vector raft::copy(primal_to_transpose.data(), @@ -2002,49 +2056,49 @@ void pdlp_solver_t::transpose_primal_dual_back_to_col( is_dual_slack_empty ? 0 : primal_size_h_ * climber_strategies_.size(), stream_view_); RAFT_CUBLAS_TRY(cublasSetStream(handle_ptr_->get_cublas_handle(), stream_view_)); - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - primal_size_h_, - climber_strategies_.size(), - reusable_device_scalar_value_1_.data(), - primal_to_transpose.data(), - climber_strategies_.size(), - reusable_device_scalar_value_0_.data(), - nullptr, - primal_size_h_, - primal_transposed.data(), - primal_size_h_)); + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + primal_size_h_, + climber_strategies_.size(), + reusable_device_scalar_value_1_.data(), + primal_to_transpose.data(), + climber_strategies_.size(), + reusable_device_scalar_value_0_.data(), + nullptr, + primal_size_h_, + primal_transposed.data(), + primal_size_h_)); if (!is_dual_slack_empty) { - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - primal_size_h_, - climber_strategies_.size(), - reusable_device_scalar_value_1_.data(), - dual_slack_to_transpose.data(), - climber_strategies_.size(), - reusable_device_scalar_value_0_.data(), - nullptr, - primal_size_h_, - dual_slack_transposed.data(), - primal_size_h_)); + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + primal_size_h_, + climber_strategies_.size(), + reusable_device_scalar_value_1_.data(), + dual_slack_to_transpose.data(), + climber_strategies_.size(), + reusable_device_scalar_value_0_.data(), + nullptr, + primal_size_h_, + dual_slack_transposed.data(), + primal_size_h_)); } - CUBLAS_CHECK(cublasDgeam(handle_ptr_->get_cublas_handle(), - CUBLAS_OP_T, - CUBLAS_OP_N, - dual_size_h_, - climber_strategies_.size(), - reusable_device_scalar_value_1_.data(), - dual_to_transpose.data(), - climber_strategies_.size(), - reusable_device_scalar_value_0_.data(), - nullptr, - dual_size_h_, - dual_transposed.data(), - dual_size_h_)); + CUBLAS_CHECK(cublasGeam(handle_ptr_->get_cublas_handle(), + CUBLAS_OP_T, + CUBLAS_OP_N, + dual_size_h_, + climber_strategies_.size(), + reusable_device_scalar_value_1_.data(), + dual_to_transpose.data(), + climber_strategies_.size(), + reusable_device_scalar_value_0_.data(), + nullptr, + dual_size_h_, + dual_transposed.data(), + dual_size_h_)); // Copy that holds the tranpose to the original vector raft::copy(primal_to_transpose.data(), @@ -2090,6 +2144,9 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co initial_scaling_strategy_.scale_problem(); + // Update FP32 matrix copies for mixed precision SpMV after scaling + pdhg_solver_.get_cusparse_view().update_mixed_precision_matrices(); + if (!settings_.hyper_params.compute_initial_step_size_before_scaling && !settings_.get_initial_step_size().has_value()) compute_initial_step_size(); @@ -2200,6 +2257,10 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co cuopt_expects(!batch_mode_, cuopt::error_type_t::ValidationError, "Restart to average not supported in batch mode"); + raft::copy(unscaled_primal_avg_solution_.data(), + pdhg_solver_.get_primal_solution().data(), + primal_size_h_, + stream_view_); cub::DeviceTransform::Transform( cuda::std::make_tuple(unscaled_primal_avg_solution_.data(), op_problem_scaled_.variable_bounds.data()), @@ -2914,7 +2975,7 @@ pdlp_solver_t::get_current_termination_strategy() return current_termination_strategy_; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class pdlp_solver_t; template __global__ void compute_weights_initial_primal_weight_from_squared_norms( diff --git a/cpp/src/pdlp/pdlp_warm_start_data.cu b/cpp/src/pdlp/pdlp_warm_start_data.cu index 66bfe66914..80abf015d8 100644 --- a/cpp/src/pdlp/pdlp_warm_start_data.cu +++ b/cpp/src/pdlp/pdlp_warm_start_data.cu @@ -178,7 +178,7 @@ void pdlp_warm_start_data_t::check_sizes() "All dual vectors should be of same size"); } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class pdlp_warm_start_data_t; #endif diff --git a/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu b/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu index 2f2e2c7333..bb79e5b6e6 100644 --- a/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu +++ b/cpp/src/pdlp/restart_strategy/localized_duality_gap_container.cu @@ -144,7 +144,7 @@ localized_duality_gap_container_t::view() return v; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template struct localized_duality_gap_container_t; #endif #if MIP_INSTANTIATE_DOUBLE diff --git a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu index 8eacd4d246..149e99a431 100644 --- a/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/pdlp/restart_strategy/pdlp_restart_strategy.cu @@ -2523,7 +2523,7 @@ bool pdlp_restart_strategy_t::get_last_restart_was_average() const const typename localized_duality_gap_container_t::view_t duality_gap_view, \ F_TYPE* primal_product); -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu b/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu index 03c76d79ae..70a448a9de 100644 --- a/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu +++ b/cpp/src/pdlp/restart_strategy/weighted_average_solution.cu @@ -139,7 +139,7 @@ i_t weighted_average_solution_t::get_iterations_since_last_restart() c return iterations_since_last_restart_; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template __global__ void add_weight_sums(const float* primal_weight, const float* dual_weight, float* sum_primal_solution_weights, diff --git a/cpp/src/pdlp/saddle_point.cu b/cpp/src/pdlp/saddle_point.cu index c516ab7355..157e7fa389 100644 --- a/cpp/src/pdlp/saddle_point.cu +++ b/cpp/src/pdlp/saddle_point.cu @@ -166,7 +166,7 @@ rmm::device_uvector& saddle_point_state_t::get_next_AtY() return next_AtY_; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class saddle_point_state_t; #endif diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 5e1e25bbee..2fc9ec08d5 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -60,6 +60,10 @@ namespace cuopt::linear_programming { +template +extern rmm::device_uvector gpu_cast(const rmm::device_uvector& src, + rmm::cuda_stream_view stream); + // This serves as both a warm up but also a mandatory initial call to setup cuSparse and cuBLAS static void init_handler(const raft::handle_t* handle_ptr) { @@ -560,6 +564,122 @@ optimization_problem_solution_t run_dual_simplex( 0); } +#if PDLP_INSTANTIATE_FLOAT || CUOPT_INSTANTIATE_FLOAT + +template +static optimization_problem_solution_t run_pdlp_solver_in_fp32( + detail::problem_t& problem, + pdlp_solver_settings_t const& settings, + const timer_t& timer, + bool is_batch_mode) +{ + CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "Running PDLP in FP32 precision"); + auto stream = problem.handle_ptr->get_stream(); + + // Convert the optimization problem stored inside problem_t to float + auto float_op = problem.original_problem_ptr->template convert_to_other_prec(stream); + float_op.set_objective_offset(static_cast(problem.presolve_data.objective_offset)); + float_op.set_objective_scaling_factor( + static_cast(problem.presolve_data.objective_scaling_factor)); + + detail::problem_t float_problem(float_op); + + auto objective_name = problem.objective_name; + auto var_names = problem.var_names; + auto row_names = problem.row_names; + // When crossover is off, free double-precision GPU memory to reduce peak usage. + // When crossover is on, run_pdlp needs the problem data after we return. + if (!settings.crossover) { + { + [[maybe_unused]] auto discard = detail::problem_t(std::move(problem)); + } + } + + // Create float settings from double settings + pdlp_solver_settings_t fs; + fs.tolerances.absolute_dual_tolerance = + static_cast(settings.tolerances.absolute_dual_tolerance); + fs.tolerances.relative_dual_tolerance = + static_cast(settings.tolerances.relative_dual_tolerance); + fs.tolerances.absolute_primal_tolerance = + static_cast(settings.tolerances.absolute_primal_tolerance); + fs.tolerances.relative_primal_tolerance = + static_cast(settings.tolerances.relative_primal_tolerance); + fs.tolerances.absolute_gap_tolerance = + static_cast(settings.tolerances.absolute_gap_tolerance); + fs.tolerances.relative_gap_tolerance = + static_cast(settings.tolerances.relative_gap_tolerance); + fs.tolerances.primal_infeasible_tolerance = + static_cast(settings.tolerances.primal_infeasible_tolerance); + fs.tolerances.dual_infeasible_tolerance = + static_cast(settings.tolerances.dual_infeasible_tolerance); + fs.detect_infeasibility = settings.detect_infeasibility; + fs.strict_infeasibility = settings.strict_infeasibility; + fs.iteration_limit = settings.iteration_limit; + fs.time_limit = static_cast(settings.time_limit); + fs.pdlp_solver_mode = settings.pdlp_solver_mode; + fs.log_to_console = settings.log_to_console; + fs.log_file = settings.log_file; + fs.per_constraint_residual = settings.per_constraint_residual; + fs.save_best_primal_so_far = settings.save_best_primal_so_far; + fs.first_primal_feasible = settings.first_primal_feasible; + fs.eliminate_dense_columns = settings.eliminate_dense_columns; + fs.pdlp_precision = pdlp_precision_t::DefaultPrecision; + fs.method = method_t::PDLP; + fs.inside_mip = settings.inside_mip; + fs.hyper_params = settings.hyper_params; + fs.presolver = settings.presolver; + fs.num_gpus = settings.num_gpus; + fs.concurrent_halt = settings.concurrent_halt; + + detail::pdlp_solver_t solver(float_problem, fs, is_batch_mode); + if (settings.inside_mip) { solver.set_inside_mip(true); } + auto float_sol = solver.run_solver(timer); + + // Convert float solution back to double on GPU (gpu_cast defined in optimization_problem.cu) + auto dev_primal = gpu_cast(float_sol.get_primal_solution(), stream); + auto dev_dual = gpu_cast(float_sol.get_dual_solution(), stream); + auto dev_reduced = gpu_cast(float_sol.get_reduced_cost(), stream); + + // Convert termination info (small host-side struct, stays on CPU) + auto float_term_infos = float_sol.get_additional_termination_informations(); + using double_term_info_t = + typename optimization_problem_solution_t::additional_termination_information_t; + std::vector term_infos; + for (auto& fi : float_term_infos) { + double_term_info_t di; + di.number_of_steps_taken = fi.number_of_steps_taken; + di.total_number_of_attempted_steps = fi.total_number_of_attempted_steps; + di.l2_primal_residual = static_cast(fi.l2_primal_residual); + di.l2_relative_primal_residual = static_cast(fi.l2_relative_primal_residual); + di.l2_dual_residual = static_cast(fi.l2_dual_residual); + di.l2_relative_dual_residual = static_cast(fi.l2_relative_dual_residual); + di.primal_objective = static_cast(fi.primal_objective); + di.dual_objective = static_cast(fi.dual_objective); + di.gap = static_cast(fi.gap); + di.relative_gap = static_cast(fi.relative_gap); + di.max_primal_ray_infeasibility = static_cast(fi.max_primal_ray_infeasibility); + di.primal_ray_linear_objective = static_cast(fi.primal_ray_linear_objective); + di.max_dual_ray_infeasibility = static_cast(fi.max_dual_ray_infeasibility); + di.dual_ray_linear_objective = static_cast(fi.dual_ray_linear_objective); + di.solve_time = fi.solve_time; + di.solved_by_pdlp = fi.solved_by_pdlp; + term_infos.push_back(di); + } + + auto status_vec = float_sol.get_terminations_status(); + + return optimization_problem_solution_t(dev_primal, + dev_dual, + dev_reduced, + objective_name, + var_names, + row_names, + std::move(term_infos), + std::move(status_vec)); +} +#endif + template static optimization_problem_solution_t run_pdlp_solver( detail::problem_t& problem, @@ -574,6 +694,13 @@ static optimization_problem_solution_t run_pdlp_solver( return optimization_problem_solution_t{pdlp_termination_status_t::NumericalError, problem.handle_ptr->get_stream()}; } +#if PDLP_INSTANTIATE_FLOAT || CUOPT_INSTANTIATE_FLOAT + if constexpr (std::is_same_v) { + if (settings.pdlp_precision == pdlp_precision_t::SinglePrecision) { + return run_pdlp_solver_in_fp32(problem, settings, timer, is_batch_mode); + } + } +#endif detail::pdlp_solver_t solver(problem, settings, is_batch_mode); if (settings.inside_mip) { solver.set_inside_mip(true); } return solver.run_solver(timer); @@ -585,6 +712,24 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& const timer_t& timer, bool is_batch_mode) { + if constexpr (!std::is_same_v) { + cuopt_expects(!is_batch_mode, + error_type_t::ValidationError, + "PDLP batch mode is not supported for float precision. Use double precision."); + } + cuopt_expects(!(settings.pdlp_precision == pdlp_precision_t::MixedPrecision && + !detail::is_cusparse_runtime_mixed_precision_supported()), + error_type_t::ValidationError, + "Mixed-precision SpMV requires cuSPARSE runtime 12.5 or later."); + cuopt_expects( + !(is_batch_mode && settings.pdlp_precision == pdlp_precision_t::MixedPrecision), + error_type_t::ValidationError, + "Mixed-precision SpMV is not supported in batch mode. Set pdlp_precision=-1 (default) " + "or disable batch mode."); + cuopt_expects(!(settings.pdlp_precision == pdlp_precision_t::SinglePrecision && is_batch_mode), + error_type_t::ValidationError, + "Single-precision PDLP is not supported in batch mode."); + auto start_solver = std::chrono::high_resolution_clock::now(); timer_t timer_pdlp(timer.remaining_time()); auto sol = run_pdlp_solver(problem, settings, timer, is_batch_mode); @@ -606,82 +751,89 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& sol.get_solve_time()); } - const bool do_crossover = settings.crossover; - i_t crossover_info = 0; - if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { - crossover_info = -1; - - dual_simplex::lp_problem_t lp(problem.handle_ptr, 1, 1, 1); - dual_simplex::lp_solution_t initial_solution(1, 1); - translate_to_crossover_problem(problem, sol, lp, initial_solution); - dual_simplex::simplex_solver_settings_t dual_simplex_settings; - dual_simplex_settings.time_limit = settings.time_limit; - dual_simplex_settings.iteration_limit = settings.iteration_limit; - dual_simplex_settings.concurrent_halt = settings.concurrent_halt; - dual_simplex::lp_solution_t vertex_solution(lp.num_rows, lp.num_cols); - std::vector vstatus(lp.num_cols); - dual_simplex::crossover_status_t crossover_status = dual_simplex::crossover( - lp, dual_simplex_settings, initial_solution, timer.get_tic_start(), vertex_solution, vstatus); - pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit; - auto to_termination_status = [](dual_simplex::crossover_status_t status) { - switch (status) { - case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal; - case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE: - return pdlp_termination_status_t::PrimalFeasible; - case dual_simplex::crossover_status_t::DUAL_FEASIBLE: - return pdlp_termination_status_t::NumericalError; - case dual_simplex::crossover_status_t::NUMERICAL_ISSUES: - return pdlp_termination_status_t::NumericalError; - case dual_simplex::crossover_status_t::CONCURRENT_LIMIT: - return pdlp_termination_status_t::ConcurrentLimit; - case dual_simplex::crossover_status_t::TIME_LIMIT: - return pdlp_termination_status_t::TimeLimit; - default: return pdlp_termination_status_t::NumericalError; - } - }; - termination_status = to_termination_status(crossover_status); - if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; } - rmm::device_uvector final_primal_solution = - cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream()); - rmm::device_uvector final_dual_solution = - cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream()); - rmm::device_uvector final_reduced_cost = - cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream()); - problem.handle_ptr->sync_stream(); - // Negate dual variables and reduced costs for maximization problems - if (problem.maximize) { - adjust_dual_solution_and_reduced_cost( - final_dual_solution, final_reduced_cost, problem.handle_ptr->get_stream()); + if constexpr (std::is_same_v) { + const bool do_crossover = settings.crossover; + i_t crossover_info = 0; + if (do_crossover && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { + crossover_info = -1; + + dual_simplex::lp_problem_t lp(problem.handle_ptr, 1, 1, 1); + dual_simplex::lp_solution_t initial_solution(1, 1); + translate_to_crossover_problem(problem, sol, lp, initial_solution); + dual_simplex::simplex_solver_settings_t dual_simplex_settings; + dual_simplex_settings.time_limit = settings.time_limit; + dual_simplex_settings.iteration_limit = settings.iteration_limit; + dual_simplex_settings.concurrent_halt = settings.concurrent_halt; + dual_simplex::lp_solution_t vertex_solution(lp.num_rows, lp.num_cols); + std::vector vstatus(lp.num_cols); + dual_simplex::crossover_status_t crossover_status = + dual_simplex::crossover(lp, + dual_simplex_settings, + initial_solution, + timer.get_tic_start(), + vertex_solution, + vstatus); + pdlp_termination_status_t termination_status = pdlp_termination_status_t::TimeLimit; + auto to_termination_status = [](dual_simplex::crossover_status_t status) { + switch (status) { + case dual_simplex::crossover_status_t::OPTIMAL: return pdlp_termination_status_t::Optimal; + case dual_simplex::crossover_status_t::PRIMAL_FEASIBLE: + return pdlp_termination_status_t::PrimalFeasible; + case dual_simplex::crossover_status_t::DUAL_FEASIBLE: + return pdlp_termination_status_t::NumericalError; + case dual_simplex::crossover_status_t::NUMERICAL_ISSUES: + return pdlp_termination_status_t::NumericalError; + case dual_simplex::crossover_status_t::CONCURRENT_LIMIT: + return pdlp_termination_status_t::ConcurrentLimit; + case dual_simplex::crossover_status_t::TIME_LIMIT: + return pdlp_termination_status_t::TimeLimit; + default: return pdlp_termination_status_t::NumericalError; + } + }; + termination_status = to_termination_status(crossover_status); + if (crossover_status == dual_simplex::crossover_status_t::OPTIMAL) { crossover_info = 0; } + rmm::device_uvector final_primal_solution = + cuopt::device_copy(vertex_solution.x, problem.handle_ptr->get_stream()); + rmm::device_uvector final_dual_solution = + cuopt::device_copy(vertex_solution.y, problem.handle_ptr->get_stream()); + rmm::device_uvector final_reduced_cost = + cuopt::device_copy(vertex_solution.z, problem.handle_ptr->get_stream()); problem.handle_ptr->sync_stream(); - } + // Negate dual variables and reduced costs for maximization problems + if (problem.maximize) { + adjust_dual_solution_and_reduced_cost( + final_dual_solution, final_reduced_cost, problem.handle_ptr->get_stream()); + problem.handle_ptr->sync_stream(); + } - // Should be filled with more information from dual simplex - std::vector< - typename optimization_problem_solution_t::additional_termination_information_t> - info(1); - info[0].primal_objective = vertex_solution.user_objective; - info[0].number_of_steps_taken = vertex_solution.iterations; - auto crossover_end = std::chrono::high_resolution_clock::now(); - auto crossover_duration = - std::chrono::duration_cast(crossover_end - start_solver); - info[0].solve_time = crossover_duration.count() / 1000.0; - auto sol_crossover = optimization_problem_solution_t(final_primal_solution, - final_dual_solution, - final_reduced_cost, - problem.objective_name, - problem.var_names, - problem.row_names, - std::move(info), - {termination_status}); - sol.copy_from(problem.handle_ptr, sol_crossover); - CUOPT_LOG_CONDITIONAL_INFO( - !settings.inside_mip, "Crossover status %s", sol.get_termination_status_string().c_str()); - } - if (settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr && - crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { - // We finished. Tell dual simplex to stop if it is still running. - CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop"); - *settings.concurrent_halt = 1; + // Should be filled with more information from dual simplex + std::vector< + typename optimization_problem_solution_t::additional_termination_information_t> + info(1); + info[0].primal_objective = vertex_solution.user_objective; + info[0].number_of_steps_taken = vertex_solution.iterations; + auto crossover_end = std::chrono::high_resolution_clock::now(); + auto crossover_duration = + std::chrono::duration_cast(crossover_end - start_solver); + info[0].solve_time = crossover_duration.count() / 1000.0; + auto sol_crossover = optimization_problem_solution_t(final_primal_solution, + final_dual_solution, + final_reduced_cost, + problem.objective_name, + problem.var_names, + problem.row_names, + std::move(info), + {termination_status}); + sol.copy_from(problem.handle_ptr, sol_crossover); + CUOPT_LOG_CONDITIONAL_INFO( + !settings.inside_mip, "Crossover status %s", sol.get_termination_status_string().c_str()); + } + if (settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr && + crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { + // We finished. Tell dual simplex to stop if it is still running. + CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop"); + *settings.concurrent_halt = 1; + } } return sol; } @@ -1117,13 +1269,22 @@ optimization_problem_solution_t solve_lp_with_method( const timer_t& timer, bool is_batch_mode) { - if (settings.method == method_t::DualSimplex) { - return run_dual_simplex(problem, settings, timer); - } else if (settings.method == method_t::Barrier) { - return run_barrier(problem, settings, timer); - } else if (settings.method == method_t::Concurrent) { - return run_concurrent(problem, settings, timer, is_batch_mode); + if constexpr (std::is_same_v) { + if (settings.method == method_t::DualSimplex) { + return run_dual_simplex(problem, settings, timer); + } else if (settings.method == method_t::Barrier) { + return run_barrier(problem, settings, timer); + } else if (settings.method == method_t::Concurrent) { + return run_concurrent(problem, settings, timer, is_batch_mode); + } else { + return run_pdlp(problem, settings, timer, is_batch_mode); + } } else { + // Float precision only supports PDLP without presolve/crossover + cuopt_expects(settings.method == method_t::PDLP, + error_type_t::ValidationError, + "Float precision only supports PDLP method. DualSimplex, Barrier, and Concurrent " + "require double precision."); return run_pdlp(problem, settings, timer, is_batch_mode); } } @@ -1199,9 +1360,6 @@ optimization_problem_solution_t solve_lp( std::unique_ptr> presolver; auto run_presolve = settings.presolver != presolver_t::None; run_presolve = run_presolve && settings.get_pdlp_warm_start_data().total_pdlp_iterations_ == -1; - if (!run_presolve && !settings_const.inside_mip) { - CUOPT_LOG_INFO("Third-party presolve is disabled, skipping"); - } // Declare result at outer scope so that result->reduced_problem (which may be // referenced by problem.original_problem_ptr) remains alive through the solve. @@ -1413,9 +1571,8 @@ cuopt::linear_programming::optimization_problem_t mps_data_model_to_op if (data_model.get_objective_name().size() != 0) { op_problem.set_objective_name(data_model.get_objective_name()); } - if (data_model.get_problem_name().size() != 0) { - op_problem.set_problem_name(data_model.get_problem_name().data()); - } + auto problem_name = data_model.get_problem_name(); + op_problem.set_problem_name(problem_name); if (data_model.get_variable_names().size() != 0) { op_problem.set_variable_names(data_model.get_variable_names()); } diff --git a/cpp/src/pdlp/solver_settings.cu b/cpp/src/pdlp/solver_settings.cu index 560e40f302..7acfc7481c 100644 --- a/cpp/src/pdlp/solver_settings.cu +++ b/cpp/src/pdlp/solver_settings.cu @@ -382,7 +382,7 @@ pdlp_solver_settings_t::get_pdlp_warm_start_data_view() const noexcept return pdlp_warm_start_data_view_; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class pdlp_solver_settings_t; #endif diff --git a/cpp/src/pdlp/solver_solution.cu b/cpp/src/pdlp/solver_solution.cu index a8001b91c1..10e6a80593 100644 --- a/cpp/src/pdlp/solver_solution.cu +++ b/cpp/src/pdlp/solver_solution.cu @@ -448,7 +448,7 @@ void optimization_problem_solution_t::write_to_sol_file( std::string(filename), status, objective_value, var_names_, solution); } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class optimization_problem_solution_t; #endif diff --git a/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu index 24ef29b243..d17a88dd29 100644 --- a/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu +++ b/cpp/src/pdlp/step_size_strategy/adaptive_step_size_strategy.cu @@ -578,7 +578,7 @@ adaptive_step_size_strategy_t::view() F_TYPE * dual_step_size, \ int* pdhg_iteration); -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/pdlp/termination_strategy/convergence_information.cu b/cpp/src/pdlp/termination_strategy/convergence_information.cu index 9b01608b47..ab0c921cc7 100644 --- a/cpp/src/pdlp/termination_strategy/convergence_information.cu +++ b/cpp/src/pdlp/termination_strategy/convergence_information.cu @@ -996,7 +996,7 @@ convergence_information_t::to_primal_quality_adapter( primal_objective_.element(0, stream_view_)}; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class convergence_information_t; template __global__ void compute_remaining_stats_kernel( diff --git a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu index 14114d306f..dbb35b732d 100644 --- a/cpp/src/pdlp/termination_strategy/infeasibility_information.cu +++ b/cpp/src/pdlp/termination_strategy/infeasibility_information.cu @@ -745,7 +745,7 @@ typename infeasibility_information_t::view_t infeasibility_information return v; } -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT template class infeasibility_information_t; template __global__ void compute_remaining_stats_kernel( diff --git a/cpp/src/pdlp/termination_strategy/termination_strategy.cu b/cpp/src/pdlp/termination_strategy/termination_strategy.cu index 033cbdbfda..7179df6a49 100644 --- a/cpp/src/pdlp/termination_strategy/termination_strategy.cu +++ b/cpp/src/pdlp/termination_strategy/termination_strategy.cu @@ -681,7 +681,7 @@ void pdlp_termination_strategy_t::print_termination_criteria(i_t itera bool per_constraint_residual, \ int batch_size); -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/pdlp/translate.hpp b/cpp/src/pdlp/translate.hpp index b8e0075733..b143a206d4 100644 --- a/cpp/src/pdlp/translate.hpp +++ b/cpp/src/pdlp/translate.hpp @@ -133,7 +133,7 @@ void translate_to_crossover_problem(const detail::problem_t& problem, std::vector slack(problem.n_constraints); std::vector tmp_x = cuopt::host_copy(sol.get_primal_solution(), stream); stream.synchronize(); - dual_simplex::matrix_vector_multiply(lp.A, 1.0, tmp_x, 0.0, slack); + dual_simplex::matrix_vector_multiply(lp.A, f_t(1.0), tmp_x, f_t(0.0), slack); CUOPT_LOG_DEBUG("Multiplied A and x"); lp.A.col_start.resize(problem.n_variables + problem.n_constraints + 1); diff --git a/cpp/src/pdlp/utilities/problem_checking.cu b/cpp/src/pdlp/utilities/problem_checking.cu index f970f8740d..b10850de27 100644 --- a/cpp/src/pdlp/utilities/problem_checking.cu +++ b/cpp/src/pdlp/utilities/problem_checking.cu @@ -340,7 +340,7 @@ bool problem_checking_t::has_crossing_bounds( #define INSTANTIATE(F_TYPE) template class problem_checking_t; -#if MIP_INSTANTIATE_FLOAT +#if MIP_INSTANTIATE_FLOAT || PDLP_INSTANTIATE_FLOAT INSTANTIATE(float) #endif diff --git a/cpp/src/utilities/copy_helpers.hpp b/cpp/src/utilities/copy_helpers.hpp index 36a4659059..3558d31ea5 100644 --- a/cpp/src/utilities/copy_helpers.hpp +++ b/cpp/src/utilities/copy_helpers.hpp @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -335,6 +336,17 @@ raft::device_span make_span(rmm::device_uvector const& container) return raft::device_span(container.data(), container.size()); } +template +raft::device_span make_span(rmm::device_scalar& scalar) +{ + return raft::device_span(scalar.data(), 1); +} + +template +raft::device_span make_span(rmm::device_scalar const& scalar) +{ + return raft::device_span(scalar.data(), 1); +} // resizes the device vector if it the std vector is larger template inline void expand_device_copy(rmm::device_uvector& device_vec, diff --git a/cpp/src/utilities/cuda_helpers.cuh b/cpp/src/utilities/cuda_helpers.cuh index 946099648d..7c591624d2 100644 --- a/cpp/src/utilities/cuda_helpers.cuh +++ b/cpp/src/utilities/cuda_helpers.cuh @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -20,6 +21,17 @@ #include #include +#if CUDART_VERSION >= 12080 +// TODO: investigate why this is necessary? dependency conflict? file NVBUG if necessary +#include +#ifndef NVTX_NULLPTR +#define NVTX_NULLPTR nullptr +#endif +#ifndef NVTX_REINTERPRET_CAST +#define NVTX_REINTERPRET_CAST(type, value) (reinterpret_cast(value)) +#endif +#include +#endif namespace cuopt { #if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700) @@ -237,4 +249,48 @@ inline size_t get_device_memory_size() } } +// NOTE: this marks a range of virtual memory as initialized. This is not tied to any object's +// lifetime As such, when using a pool for allocations, false negatives could occurs e.g. a range +// previously marked as initialized is now occupied by a new uninitialized object Unlikely to cause +// issues in practice - but worth noting (RAII? I'm not even sure the API allows to un-mark a range +// as initialized) +static inline void mark_memory_as_initialized(const void* ptr, size_t size, cudaStream_t stream = 0) +{ +#if CUDART_VERSION >= 12080 + + if (size == 0 || ptr == nullptr) return; + +#if defined(CUDA_API_PER_THREAD_DEFAULT_STREAM) + constexpr auto PerThreadDefaultStream = true; +#else + constexpr auto PerThreadDefaultStream = false; +#endif + + nvtxMemVirtualRangeDesc_t nvtxRangeDesc = {}; + nvtxRangeDesc.size = size; + nvtxRangeDesc.ptr = ptr; + + nvtxMemMarkInitializedBatch_t nvtxRegionsDesc = {}; + nvtxRegionsDesc.extCompatID = NVTX_EXT_COMPATID_MEM; + nvtxRegionsDesc.structSize = sizeof(nvtxRegionsDesc); + nvtxRegionsDesc.regionType = NVTX_MEM_TYPE_VIRTUAL_ADDRESS; + nvtxRegionsDesc.regionDescCount = 1; + nvtxRegionsDesc.regionDescElementSize = sizeof(nvtxRangeDesc); + nvtxRegionsDesc.regionDescElements = &nvtxRangeDesc; + + nvtxMemCudaMarkInitialized( + raft::common::nvtx::detail::domain_store::value(), + stream, + PerThreadDefaultStream, + &nvtxRegionsDesc); +#endif +} + +template +static inline void mark_span_as_initialized(const raft::device_span span, + rmm::cuda_stream_view stream) +{ + mark_memory_as_initialized(span.data(), span.size() * sizeof(T), stream.value()); +} + } // namespace cuopt diff --git a/cpp/src/utilities/determinism_log.hpp b/cpp/src/utilities/determinism_log.hpp new file mode 100644 index 0000000000..5cad81c249 --- /dev/null +++ b/cpp/src/utilities/determinism_log.hpp @@ -0,0 +1,35 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#ifndef CUOPT_DETERMINISM_LOG +#define CUOPT_DETERMINISM_LOG(logger, ...) \ + do { \ + } while (0) +#endif + +#ifndef CUOPT_DETERMINISM_LOG +#define CUOPT_DETERMINISM_LOG(...) \ + do { \ + } while (0) +#endif + +#ifndef CUOPT_DETERMINISM_LOG +#define CUOPT_DETERMINISM_LOG(...) \ + do { \ + } while (0) +#endif diff --git a/cpp/src/utilities/models/fj_predictor/header.h b/cpp/src/utilities/models/fj_predictor/header.h new file mode 100644 index 0000000000..ccae87627f --- /dev/null +++ b/cpp/src/utilities/models/fj_predictor/header.h @@ -0,0 +1,47 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +class fj_predictor { + public: + union Entry { + int missing; + double fvalue; + int qvalue; + }; + + static int32_t get_num_target(void); + static void get_num_class(int32_t* out); + static int32_t get_num_feature(void); + static const char* get_threshold_type(void); + static const char* get_leaf_output_type(void); + static void predict(union Entry* data, int pred_margin, double* result); + static void postprocess(double* result); + static int quantize(double val, unsigned fid); + + // Feature names + static constexpr int NUM_FEATURES = 12; + static const char* feature_names[NUM_FEATURES]; +}; // class fj_predictor diff --git a/cpp/src/utilities/models/fj_predictor/main.cpp b/cpp/src/utilities/models/fj_predictor/main.cpp new file mode 100644 index 0000000000..ac5cd1ed11 --- /dev/null +++ b/cpp/src/utilities/models/fj_predictor/main.cpp @@ -0,0 +1,11828 @@ + +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "header.h" + +#if defined(__clang__) || defined(__GNUC__) +#define LIKELY(x) __builtin_expect(!!(x), 1) +#define UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif +#define N_TARGET 1 +#define MAX_N_CLASS 1 + +const unsigned char is_categorical[] = { + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, +}; +static const int32_t num_class[] = { + 1, +}; + +int32_t fj_predictor::get_num_target(void) { return N_TARGET; } +void fj_predictor::get_num_class(int32_t* out) +{ + for (int i = 0; i < N_TARGET; ++i) { + out[i] = num_class[i]; + } +} +int32_t fj_predictor::get_num_feature(void) { return 12; } +const char* fj_predictor::get_threshold_type(void) { return "float64"; } +const char* fj_predictor::get_leaf_output_type(void) { return "float64"; } + +void fj_predictor::predict(union Entry* data, int pred_margin, double* result) +{ + // Quantize data + for (int i = 0; i < 12; ++i) { + if (data[i].missing != -1 && !is_categorical[i]) { + data[i].qvalue = quantize(data[i].fvalue, i); + } + } + + unsigned int tmp; + if (UNLIKELY(false || (data[0].qvalue <= 186))) { + if (LIKELY(false || (data[0].qvalue <= 98))) { + if (UNLIKELY(false || (data[0].qvalue <= 44))) { + result[0] += 22901.255344838846; + } else { + if (LIKELY(false || (data[0].qvalue <= 74))) { + result[0] += 23329.74375127941; + } else { + result[0] += 23615.614222033248; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 144))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + result[0] += 24209.472824119093; + } else { + if (LIKELY(false || (data[6].qvalue <= 100))) { + result[0] += 23869.313116934638; + } else { + result[0] += 23405.54395751655; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 88))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 24902.415701614485; + } else { + result[0] += 24520.27915996134; + } + } else { + result[0] += 23891.56119652762; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 262))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[0].qvalue <= 218))) { + result[0] += 25411.420502457517; + } else { + result[0] += 25917.434788937655; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[1].qvalue <= 116))) { + if (LIKELY(false || (data[0].qvalue <= 224))) { + result[0] += 24824.90282590602; + } else { + result[0] += 25260.302972714355; + } + } else { + result[0] += 24241.872897906287; + } + } else { + result[0] += 23668.087700157346; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 112))) { + if (UNLIKELY(false || (data[0].qvalue <= 312))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 26558.857562746103; + } else { + result[0] += 26202.38213380121; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 102))) { + result[0] += 25849.940581446823; + } else { + result[0] += 25230.335505164217; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 92))) { + if (LIKELY(false || (data[8].qvalue <= 126))) { + result[0] += 26911.44811674291; + } else { + result[0] += 26503.327098291113; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 14))) { + result[0] += 25835.236675109372; + } else { + result[0] += 26394.354620052833; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 430))) { + if (LIKELY(false || (data[6].qvalue <= 152))) { + if (UNLIKELY(false || (data[0].qvalue <= 352))) { + result[0] += 25138.58875113828; + } else { + result[0] += 25959.666864308958; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 406))) { + result[0] += 23920.482800539634; + } else { + result[0] += 24831.920082473396; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 174))) { + if (LIKELY(false || (data[1].qvalue <= 152))) { + result[0] += 26247.05905907499; + } else { + result[0] += 25812.009772798196; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 464))) { + result[0] += 24485.25504118496; + } else { + result[0] += 25971.80731105497; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 188))) { + if (LIKELY(false || (data[0].qvalue <= 104))) { + if (LIKELY(false || (data[0].qvalue <= 50))) { + if (LIKELY(false || (data[0].qvalue <= 24))) { + result[0] += -2102.2335038546057; + } else { + result[0] += -1814.143470277424; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 62))) { + result[0] += -1350.4930205440783; + } else { + result[0] += -1691.59714939238; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 62))) { + if (LIKELY(false || (data[0].qvalue <= 156))) { + if (UNLIKELY(false || (data[0].qvalue <= 126))) { + result[0] += -925.0448121271659; + } else { + result[0] += -609.5991225040165; + } + } else { + result[0] += -202.96527072586937; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 116))) { + if (LIKELY(false || (data[0].qvalue <= 152))) { + result[0] += -1181.817220363881; + } else { + result[0] += -759.0207821022941; + } + } else { + result[0] += -1560.8110503642508; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 266))) { + if (LIKELY(false || (data[7].qvalue <= 66))) { + if (LIKELY(false || (data[0].qvalue <= 228))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 578.7799431040581; + } else { + result[0] += 127.52408840507688; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 933.7806696735934; + } else { + result[0] += 471.418662728938; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 142))) { + if (LIKELY(false || (data[7].qvalue <= 128))) { + if (LIKELY(false || (data[6].qvalue <= 100))) { + result[0] += 24.61431879503551; + } else { + result[0] += -514.7441916892188; + } + } else { + result[0] += -840.288058328428; + } + } else { + result[0] += -1369.132803656919; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 88))) { + if (UNLIKELY(false || (data[0].qvalue <= 320))) { + if (LIKELY(false || (data[6].qvalue <= 60))) { + result[0] += 1297.8937605229921; + } else { + if (UNLIKELY(false || (data[9].qvalue <= 48))) { + result[0] += 159.84923661334855; + } else { + result[0] += 806.892953963784; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 92))) { + result[0] += 1631.4555218267105; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 14))) { + result[0] += 602.1591975811427; + } else { + result[0] += 1225.4498129353617; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 346))) { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[7].qvalue <= 134))) { + result[0] += 477.26815448941943; + } else { + result[0] += -266.49089797874365; + } + } else { + result[0] += -1085.9174116108277; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 136))) { + if (UNLIKELY(false || (data[9].qvalue <= 30))) { + result[0] += 710.5340339475686; + } else { + result[0] += 1160.9702839561826; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 426))) { + result[0] += -336.2082424932214; + } else { + result[0] += 697.2377446648411; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 184))) { + if (UNLIKELY(false || (data[0].qvalue <= 92))) { + if (UNLIKELY(false || (data[0].qvalue <= 38))) { + result[0] += -1827.1103815856493; + } else { + if (LIKELY(false || (data[6].qvalue <= 92))) { + if (LIKELY(false || (data[0].qvalue <= 68))) { + result[0] += -1460.9279929556785; + } else { + result[0] += -1204.5954946215916; + } + } else { + result[0] += -1703.764420874855; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 136))) { + if (LIKELY(false || (data[7].qvalue <= 62))) { + result[0] += -834.0606253314141; + } else { + result[0] += -1268.097736051363; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 62))) { + if (LIKELY(false || (data[0].qvalue <= 168))) { + result[0] += -450.7445362623278; + } else { + result[0] += -151.5065914814049; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + result[0] += -813.8840731408728; + } else { + result[0] += -1552.9886128870096; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 258))) { + if (LIKELY(false || (data[7].qvalue <= 62))) { + if (UNLIKELY(false || (data[0].qvalue <= 212))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 300.18949268400604; + } else { + result[0] += -82.16209082891513; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 821.3155839598878; + } else { + result[0] += 395.4300146794792; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 142))) { + if (LIKELY(false || (data[7].qvalue <= 134))) { + if (LIKELY(false || (data[6].qvalue <= 94))) { + result[0] += -36.622858072571994; + } else { + result[0] += -472.4429272195769; + } + } else { + result[0] += -886.8842878988258; + } + } else { + result[0] += -1274.9255101678516; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 82))) { + if (UNLIKELY(false || (data[0].qvalue <= 302))) { + if (LIKELY(false || (data[6].qvalue <= 50))) { + result[0] += 1102.9306261545728; + } else { + if (UNLIKELY(false || (data[9].qvalue <= 48))) { + result[0] += 4.269252395168786; + } else { + result[0] += 673.9694942587568; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 48))) { + result[0] += 844.0310059878134; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 336))) { + result[0] += 1254.6320984291067; + } else { + result[0] += 1496.534953542473; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 342))) { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[7].qvalue <= 134))) { + result[0] += 369.8856508010498; + } else { + result[0] += -301.745500216277; + } + } else { + result[0] += -971.5869097857602; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 152))) { + if (LIKELY(false || (data[1].qvalue <= 124))) { + result[0] += 1049.0667116337365; + } else { + result[0] += 655.8288897611133; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 430))) { + result[0] += -483.20221095743983; + } else { + result[0] += 574.5979992744856; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 192))) { + if (LIKELY(false || (data[0].qvalue <= 110))) { + if (LIKELY(false || (data[0].qvalue <= 56))) { + if (UNLIKELY(false || (data[0].qvalue <= 20))) { + result[0] += -1731.6694255800344; + } else { + result[0] += -1463.8734271807368; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 62))) { + result[0] += -1013.7125063069707; + } else { + result[0] += -1295.792308734329; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (LIKELY(false || (data[0].qvalue <= 162))) { + result[0] += -512.040169492541; + } else { + result[0] += -62.54166123570826; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 90))) { + if (LIKELY(false || (data[0].qvalue <= 160))) { + result[0] += -807.711585982142; + } else { + result[0] += -447.3244114975565; + } + } else { + result[0] += -1112.4075523200308; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 274))) { + if (LIKELY(false || (data[7].qvalue <= 68))) { + if (LIKELY(false || (data[0].qvalue <= 236))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 434.1102182190951; + } else { + result[0] += 64.6833316545062; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 24))) { + result[0] += 911.5851039227344; + } else { + result[0] += 509.70074610455003; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 36))) { + result[0] += -922.6420870752214; + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + result[0] += -43.11646969985167; + } else { + result[0] += -711.5443228439684; + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 112))) { + if (LIKELY(false || (data[7].qvalue <= 50))) { + if (UNLIKELY(false || (data[0].qvalue <= 322))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 1217.5857081225881; + } else { + result[0] += 862.7945760545485; + } + } else { + result[0] += 1381.341515141895; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 340))) { + if (LIKELY(false || (data[7].qvalue <= 128))) { + result[0] += 618.2916036455348; + } else { + result[0] += -156.99063121911638; + } + } else { + if (LIKELY(false || (data[10].qvalue <= 78))) { + result[0] += 863.993956946003; + } else { + result[0] += 1217.2914460169281; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 432))) { + if (LIKELY(false || (data[1].qvalue <= 138))) { + if (LIKELY(false || (data[2].qvalue <= 202))) { + result[0] += 554.0458524017479; + } else { + result[0] += -451.2824254081932; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 132))) { + result[0] += -205.44955983215206; + } else { + result[0] += -1114.6634922865608; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 174))) { + if (UNLIKELY(false || (data[8].qvalue <= 44))) { + result[0] += 434.76778433173723; + } else { + result[0] += 874.836013742844; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + result[0] += -415.8493090687832; + } else { + result[0] += 895.0728029685396; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 180))) { + if (UNLIKELY(false || (data[0].qvalue <= 86))) { + if (UNLIKELY(false || (data[0].qvalue <= 32))) { + result[0] += -1510.0101454113396; + } else { + if (LIKELY(false || (data[1].qvalue <= 78))) { + result[0] += -1126.5760081015337; + } else { + result[0] += -1392.4200114552195; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 128))) { + if (LIKELY(false || (data[1].qvalue <= 78))) { + result[0] += -742.4787091372192; + } else { + result[0] += -1128.3169223737384; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 82))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += -217.1747383855503; + } else { + result[0] += -505.7066609860621; + } + } else { + result[0] += -926.1157369421153; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 252))) { + if (LIKELY(false || (data[7].qvalue <= 50))) { + if (UNLIKELY(false || (data[0].qvalue <= 208))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 312.45787313138067; + } else { + result[0] += -10.665507478881368; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 562.9336214602067; + } else { + result[0] += 238.03704733164884; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 116))) { + if (LIKELY(false || (data[7].qvalue <= 126))) { + if (LIKELY(false || (data[0].qvalue <= 212))) { + result[0] += -279.2242547615078; + } else { + result[0] += 42.62552906704628; + } + } else { + result[0] += -677.2319392589053; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 152))) { + result[0] += -619.1669830813266; + } else { + result[0] += -1313.98491170941; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 76))) { + if (UNLIKELY(false || (data[0].qvalue <= 296))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 874.524930910277; + } else { + result[0] += 493.74627459103635; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 48))) { + if (UNLIKELY(false || (data[10].qvalue <= 32))) { + result[0] += 305.25905254947634; + } else { + result[0] += 893.8383559035191; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 330))) { + result[0] += 1017.5358679778622; + } else { + result[0] += 1243.0211544735387; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 330))) { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (UNLIKELY(false || (data[9].qvalue <= 40))) { + result[0] += -310.9808580488324; + } else { + result[0] += 277.1404180803976; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 68))) { + result[0] += -1191.3641199442363; + } else { + result[0] += -376.5469216582162; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[7].qvalue <= 134))) { + result[0] += 906.2480551197423; + } else { + result[0] += 554.4928258443013; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 424))) { + result[0] += -334.1159618884123; + } else { + result[0] += 515.865642037843; + } + } + } + } + } + } + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (UNLIKELY(false || (data[3].qvalue <= 44))) { + if (LIKELY(false || (data[7].qvalue <= 24))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[2].qvalue <= 128))) { + result[0] += 200.2716866984727; + } else { + result[0] += -33.31807134567322; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 34))) { + if (LIKELY(false || (data[2].qvalue <= 124))) { + result[0] += 2.1267132045491657; + } else { + result[0] += -376.52573890556465; + } + } else { + result[0] += 146.5594659396158; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 84))) { + if (UNLIKELY(false || (data[6].qvalue <= 40))) { + if (LIKELY(false || (data[2].qvalue <= 88))) { + result[0] += -78.39211587508055; + } else { + result[0] += -211.1780949469055; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 94))) { + result[0] += -50.54335036012877; + } else { + result[0] += 80.00020842727285; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 40))) { + result[0] += -483.63985186717866; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 56))) { + result[0] += 31.9766310228433; + } else { + result[0] += -151.25515710703033; + } + } + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 152))) { + if (LIKELY(false || (data[10].qvalue <= 108))) { + if (UNLIKELY(false || (data[8].qvalue <= 20))) { + if (UNLIKELY(false || (data[8].qvalue <= 0))) { + result[0] += 111.60897433537673; + } else { + result[0] += -38.459174422337334; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 152))) { + result[0] += 39.893177773342615; + } else { + result[0] += 117.1746253290821; + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 40))) { + if (UNLIKELY(false || (data[3].qvalue <= 58))) { + result[0] += -269.8313885970149; + } else { + result[0] += -44.81099090001982; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 50))) { + result[0] += 44.36810505858426; + } else { + result[0] += -17.031285003457658; + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 66))) { + result[0] += 29.134840065248483; + } else { + result[0] += -142.57181028298447; + } + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 100))) { + if (LIKELY(false || (data[7].qvalue <= 196))) { + if (UNLIKELY(false || (data[4].qvalue <= 52))) { + result[0] += -122.23715116402353; + } else { + if (UNLIKELY(false || (data[8].qvalue <= 124))) { + result[0] += 97.82653352215696; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 88))) { + result[0] += 19.540553416535854; + } else { + result[0] += -98.26047713895753; + } + } + } + } else { + result[0] += -208.9897177114526; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += -9.013216655506389; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 154))) { + result[0] += -171.41131706373193; + } else { + if (LIKELY(false || (data[7].qvalue <= 188))) { + result[0] += -228.5813161018793; + } else { + result[0] += -325.7003509988099; + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 196))) { + if (LIKELY(false || (data[0].qvalue <= 114))) { + if (LIKELY(false || (data[0].qvalue <= 60))) { + if (UNLIKELY(false || (data[0].qvalue <= 16))) { + result[0] += -1431.9188559535999; + } else { + result[0] += -1190.2844870798897; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 82))) { + result[0] += -829.9317534554965; + } else { + result[0] += -1180.4578431880107; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 94))) { + if (LIKELY(false || (data[0].qvalue <= 166))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += -258.2731590553095; + } else { + result[0] += -523.7930385002326; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 24))) { + result[0] += 148.2458712176789; + } else { + result[0] += -208.97850691995546; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 152))) { + result[0] += -766.4846303120403; + } else { + result[0] += -1387.2597830966474; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 282))) { + if (LIKELY(false || (data[7].qvalue <= 82))) { + if (LIKELY(false || (data[6].qvalue <= 54))) { + if (UNLIKELY(false || (data[0].qvalue <= 234))) { + result[0] += 347.56940876633854; + } else { + result[0] += 659.3973774082955; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 48))) { + result[0] += -299.8000443073724; + } else { + result[0] += 213.67905068511027; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + result[0] += -241.4835022721958; + } else { + result[0] += -964.8334869254049; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 96))) { + if (LIKELY(false || (data[7].qvalue <= 50))) { + if (UNLIKELY(false || (data[0].qvalue <= 326))) { + if (LIKELY(false || (data[6].qvalue <= 54))) { + result[0] += 1024.8241421866958; + } else { + result[0] += 691.4425294017764; + } + } else { + result[0] += 1134.476489243314; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 348))) { + if (LIKELY(false || (data[7].qvalue <= 178))) { + result[0] += 607.890368257065; + } else { + result[0] += -428.4659382893519; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 414))) { + result[0] += 1014.748894801393; + } else { + result[0] += 597.6335980398155; + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 156))) { + if (UNLIKELY(false || (data[0].qvalue <= 366))) { + if (LIKELY(false || (data[7].qvalue <= 102))) { + result[0] += 342.26282113720123; + } else { + result[0] += -230.30740062057927; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 104))) { + result[0] += 813.8543527953774; + } else { + result[0] += 415.6624160363879; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 446))) { + if (LIKELY(false || (data[6].qvalue <= 174))) { + result[0] += -259.7429923675005; + } else { + result[0] += -1426.7951683026927; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 174))) { + result[0] += 762.73962100054; + } else { + result[0] += 231.27954235732147; + } + } + } + } + } + } + if (LIKELY(false || (data[1].qvalue <= 152))) { + if (UNLIKELY(false || (data[3].qvalue <= 44))) { + if (LIKELY(false || (data[7].qvalue <= 24))) { + if (UNLIKELY(false || (data[7].qvalue <= 2))) { + if (UNLIKELY(false || (data[2].qvalue <= 10))) { + result[0] += 301.84658146708784; + } else { + result[0] += 144.51444115717683; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 34))) { + if (UNLIKELY(false || (data[3].qvalue <= 0))) { + result[0] += -133.3520185035297; + } else { + result[0] += 7.825198511802189; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 186.55141223016244; + } else { + result[0] += 66.07816401815487; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 84))) { + if (UNLIKELY(false || (data[9].qvalue <= 30))) { + if (LIKELY(false || (data[1].qvalue <= 30))) { + result[0] += 190.81711934969553; + } else { + result[0] += -49.06287353594854; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 6))) { + result[0] += -190.1399731591398; + } else { + result[0] += -58.312564377257914; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 40))) { + result[0] += -486.8565320491534; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 122))) { + result[0] += -23.79376937838657; + } else { + result[0] += -193.64446213252586; + } + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (LIKELY(false || (data[10].qvalue <= 108))) { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + if (UNLIKELY(false || (data[10].qvalue <= 0))) { + result[0] += 154.980784796019; + } else { + result[0] += -70.70779749773081; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 166))) { + result[0] += 42.70546399312704; + } else { + result[0] += 231.67165411885554; + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 40))) { + if (UNLIKELY(false || (data[3].qvalue <= 58))) { + result[0] += -276.0507296705326; + } else { + result[0] += -48.24184527320793; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 50))) { + result[0] += 46.29969181016952; + } else { + result[0] += -22.46817209136037; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 196))) { + if (LIKELY(false || (data[2].qvalue <= 184))) { + if (UNLIKELY(false || (data[9].qvalue <= 70))) { + result[0] += 259.7266642867935; + } else { + result[0] += -33.9062832423356; + } + } else { + result[0] += -130.08036962157004; + } + } else { + result[0] += -206.5135454754138; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 188))) { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 21.01496012390576; + } else { + if (LIKELY(false || (data[9].qvalue <= 16))) { + if (LIKELY(false || (data[9].qvalue <= 2))) { + result[0] += -166.20109103415467; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 22))) { + result[0] += 43.76493225425393; + } else { + result[0] += -110.8984323288921; + } + } + } else { + result[0] += -265.77242681150005; + } + } + } else { + result[0] += -316.46151789497696; + } + } + if (LIKELY(false || (data[1].qvalue <= 152))) { + if (UNLIKELY(false || (data[3].qvalue <= 44))) { + if (LIKELY(false || (data[7].qvalue <= 42))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[2].qvalue <= 128))) { + result[0] += 196.29561739518834; + } else { + result[0] += -31.524131824672523; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 154))) { + if (UNLIKELY(false || (data[4].qvalue <= 6))) { + result[0] += 101.91406015487678; + } else { + result[0] += 1.3921745492571436; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 20))) { + result[0] += -150.8055895533361; + } else { + result[0] += 146.0669399096082; + } + } + } + } else { + if (LIKELY(false || (data[3].qvalue <= 38))) { + if (LIKELY(false || (data[7].qvalue <= 122))) { + if (LIKELY(false || (data[4].qvalue <= 76))) { + result[0] += -101.79580164134511; + } else { + result[0] += 51.3302149880617; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 108))) { + result[0] += -334.80985020089327; + } else { + result[0] += -145.88767910265673; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 82))) { + result[0] += -560.2505700484534; + } else { + result[0] += -213.96025392317247; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (LIKELY(false || (data[10].qvalue <= 96))) { + if (UNLIKELY(false || (data[8].qvalue <= 20))) { + if (UNLIKELY(false || (data[8].qvalue <= 0))) { + result[0] += 105.68674793264172; + } else { + result[0] += -43.57721621182288; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + result[0] += -147.9863609603396; + } else { + result[0] += 50.11914192647265; + } + } + } else { + if (LIKELY(false || (data[8].qvalue <= 138))) { + if (LIKELY(false || (data[3].qvalue <= 120))) { + result[0] += -21.50623989909495; + } else { + result[0] += 36.087102588326; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 172))) { + result[0] += -401.56511373374224; + } else { + result[0] += -47.54506742735205; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 196))) { + if (LIKELY(false || (data[10].qvalue <= 136))) { + if (LIKELY(false || (data[4].qvalue <= 70))) { + result[0] += -58.788243146060125; + } else { + result[0] += 35.01063889842818; + } + } else { + result[0] += -167.15525202440935; + } + } else { + result[0] += -185.86643283746199; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 188))) { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 18.914144121003567; + } else { + if (LIKELY(false || (data[9].qvalue <= 16))) { + if (LIKELY(false || (data[9].qvalue <= 2))) { + result[0] += -149.58318971768668; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 22))) { + result[0] += 39.39279508530201; + } else { + result[0] += -99.81036974381612; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 28))) { + result[0] += -574.5809531834742; + } else { + result[0] += -223.51903615680277; + } + } + } + } else { + result[0] += -284.8251127560464; + } + } + if (UNLIKELY(false || (data[0].qvalue <= 178))) { + if (UNLIKELY(false || (data[0].qvalue <= 84))) { + if (UNLIKELY(false || (data[0].qvalue <= 34))) { + result[0] += -1224.3672336316197; + } else { + if (LIKELY(false || (data[6].qvalue <= 62))) { + result[0] += -890.0738935144229; + } else { + result[0] += -1083.6868213970283; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 88))) { + if (LIKELY(false || (data[0].qvalue <= 132))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += -449.253175915189; + } else { + result[0] += -666.9058347442864; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 36))) { + result[0] += -171.0454878027209; + } else { + result[0] += -404.9936499215746; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 108))) { + result[0] += -738.8736900979811; + } else { + result[0] += -1156.8338657457468; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 246))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 376.54293691682807; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 204))) { + result[0] += -19.164395129970544; + } else { + result[0] += 229.71203236622483; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 116))) { + if (LIKELY(false || (data[7].qvalue <= 92))) { + result[0] += -93.71602815702187; + } else { + result[0] += -442.3743373003131; + } + } else { + result[0] += -794.9240557679232; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 96))) { + if (UNLIKELY(false || (data[0].qvalue <= 294))) { + if (LIKELY(false || (data[7].qvalue <= 44))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 776.7722682804047; + } else { + result[0] += 551.7601378282178; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 126))) { + result[0] += 359.00317634305014; + } else { + result[0] += -261.0799150747045; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 54))) { + if (UNLIKELY(false || (data[0].qvalue <= 340))) { + result[0] += 868.4127360515845; + } else { + result[0] += 1030.2866556923334; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 174))) { + result[0] += 759.8335007432447; + } else { + result[0] += 375.57847778528725; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 362))) { + if (LIKELY(false || (data[1].qvalue <= 124))) { + if (LIKELY(false || (data[7].qvalue <= 152))) { + result[0] += 192.76419647400402; + } else { + result[0] += -627.210504284817; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 122))) { + result[0] += -248.81615213762672; + } else { + result[0] += -969.1369200758126; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 152))) { + if (LIKELY(false || (data[1].qvalue <= 124))) { + result[0] += 773.3204928636587; + } else { + result[0] += 406.4377394893374; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 448))) { + result[0] += -133.64446813672; + } else { + result[0] += 531.8523379173158; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 198))) { + if (LIKELY(false || (data[0].qvalue <= 116))) { + if (LIKELY(false || (data[0].qvalue <= 64))) { + if (UNLIKELY(false || (data[0].qvalue <= 14))) { + result[0] += -1181.6504962663746; + } else { + result[0] += -963.5370227469084; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 70))) { + result[0] += -638.280415142769; + } else { + result[0] += -906.2068637569266; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 78))) { + if (UNLIKELY(false || (data[0].qvalue <= 160))) { + result[0] += -355.3777345047579; + } else { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 38.63569197354808; + } else { + result[0] += -203.4298594886395; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 106))) { + result[0] += -501.25518228442854; + } else { + result[0] += -904.0956495312481; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 284))) { + if (LIKELY(false || (data[6].qvalue <= 74))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + if (LIKELY(false || (data[0].qvalue <= 242))) { + result[0] += 325.0351233983085; + } else { + result[0] += 574.7980032330352; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 240))) { + result[0] += 50.603122386774714; + } else { + result[0] += 307.4265167122256; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 124))) { + if (LIKELY(false || (data[2].qvalue <= 200))) { + result[0] += -75.63749275285782; + } else { + result[0] += -878.8670678802445; + } + } else { + result[0] += -784.825049044781; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 108))) { + if (LIKELY(false || (data[1].qvalue <= 82))) { + if (LIKELY(false || (data[9].qvalue <= 128))) { + if (LIKELY(false || (data[0].qvalue <= 354))) { + result[0] += 768.9030375927771; + } else { + result[0] += 982.7632961026947; + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 26))) { + result[0] += 881.7519427243232; + } else { + result[0] += 460.8225104674566; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 352))) { + result[0] += 336.6542893696312; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 12))) { + result[0] += 232.48634402594894; + } else { + result[0] += 754.2862591795262; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 434))) { + if (LIKELY(false || (data[1].qvalue <= 138))) { + if (LIKELY(false || (data[10].qvalue <= 116))) { + result[0] += 486.1115775722369; + } else { + result[0] += -240.0215539033738; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 152))) { + result[0] += -340.1982716887718; + } else { + result[0] += -1142.2235138961084; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 174))) { + if (UNLIKELY(false || (data[2].qvalue <= 106))) { + result[0] += 356.11751909622217; + } else { + result[0] += 708.5483451506925; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + result[0] += -425.3011321722016; + } else { + result[0] += 737.8486431169086; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 176))) { + if (UNLIKELY(false || (data[0].qvalue <= 82))) { + if (UNLIKELY(false || (data[0].qvalue <= 30))) { + result[0] += -1008.2347845448372; + } else { + if (LIKELY(false || (data[6].qvalue <= 92))) { + result[0] += -759.2605409681978; + } else { + result[0] += -1002.0185264155406; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 90))) { + if (UNLIKELY(false || (data[0].qvalue <= 122))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += -430.8085418741898; + } else { + result[0] += -615.9206678101054; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += -160.1037858692929; + } else { + result[0] += -361.25591627953935; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + result[0] += -677.2268849569755; + } else { + result[0] += -1084.1504346623935; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 256))) { + if (LIKELY(false || (data[7].qvalue <= 48))) { + if (LIKELY(false || (data[0].qvalue <= 216))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 219.18790221063765; + } else { + result[0] += 8.224416225928314; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 473.5370228126004; + } else { + result[0] += 241.42467866161678; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 126))) { + if (LIKELY(false || (data[6].qvalue <= 100))) { + result[0] += -37.498571958273004; + } else { + result[0] += -365.5446484967875; + } + } else { + result[0] += -627.0927668810837; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 88))) { + if (UNLIKELY(false || (data[0].qvalue <= 306))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 703.8669257065242; + } else { + result[0] += 489.86267404805426; + } + } else { + result[0] += 234.87338838535948; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 64))) { + if (LIKELY(false || (data[0].qvalue <= 356))) { + result[0] += 717.6793071720917; + } else { + result[0] += 869.7251021367146; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 6))) { + result[0] += 129.6795962020335; + } else { + result[0] += 601.4460117834096; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 374))) { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[7].qvalue <= 176))) { + result[0] += 214.20152575675183; + } else { + result[0] += -537.860116518967; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 24))) { + result[0] += 39.286767186140054; + } else { + result[0] += -860.9173103201238; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 156))) { + if (UNLIKELY(false || (data[6].qvalue <= 36))) { + result[0] += -207.54940006849526; + } else { + result[0] += 555.8581322313454; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 448))) { + result[0] += -233.03910512550647; + } else { + result[0] += 411.0057503246483; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 200))) { + if (UNLIKELY(false || (data[0].qvalue <= 102))) { + if (UNLIKELY(false || (data[0].qvalue <= 42))) { + result[0] += -879.2505320646146; + } else { + if (LIKELY(false || (data[6].qvalue <= 100))) { + result[0] += -613.8765129117388; + } else { + result[0] += -898.2402256092773; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[0].qvalue <= 152))) { + result[0] += -310.0254085675876; + } else { + result[0] += -23.926126961634566; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (UNLIKELY(false || (data[0].qvalue <= 150))) { + result[0] += -532.7012177870323; + } else { + result[0] += -303.0535459955343; + } + } else { + result[0] += -885.9999951852365; + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 96))) { + if (UNLIKELY(false || (data[0].qvalue <= 288))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[0].qvalue <= 236))) { + result[0] += 215.01429644416353; + } else { + result[0] += 421.53246886972823; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 48))) { + result[0] += -286.0785455584352; + } else { + result[0] += 116.1256792417381; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 120))) { + if (LIKELY(false || (data[8].qvalue <= 118))) { + if (LIKELY(false || (data[0].qvalue <= 414))) { + result[0] += 706.4179843328923; + } else { + result[0] += 374.35058115512874; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 28))) { + result[0] += -23.434409262940505; + } else { + result[0] += 567.1115038047287; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 386))) { + if (LIKELY(false || (data[8].qvalue <= 18))) { + result[0] += 273.6642642670474; + } else { + result[0] += -1046.2285662989975; + } + } else { + result[0] += 487.25359122330957; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 354))) { + if (LIKELY(false || (data[6].qvalue <= 152))) { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (LIKELY(false || (data[4].qvalue <= 98))) { + result[0] += 86.77607178167547; + } else { + result[0] += -327.9227215464309; + } + } else { + result[0] += -702.536120284601; + } + } else { + result[0] += -816.76285821889; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 168))) { + if (LIKELY(false || (data[4].qvalue <= 104))) { + if (LIKELY(false || (data[2].qvalue <= 202))) { + result[0] += 585.7714665368296; + } else { + result[0] += 211.97898615341128; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 446))) { + result[0] += 45.872711470194794; + } else { + result[0] += 596.7727695600357; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 460))) { + if (LIKELY(false || (data[6].qvalue <= 174))) { + result[0] += -110.3734753492252; + } else { + result[0] += -993.643156799906; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 134))) { + result[0] += 513.9132377712247; + } else { + result[0] += -156.94258247387484; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 174))) { + if (UNLIKELY(false || (data[0].qvalue <= 78))) { + if (UNLIKELY(false || (data[0].qvalue <= 28))) { + result[0] += -825.7182786124127; + } else { + if (LIKELY(false || (data[1].qvalue <= 78))) { + result[0] += -620.5383438315085; + } else { + result[0] += -812.4844205870289; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 78))) { + if (UNLIKELY(false || (data[0].qvalue <= 124))) { + result[0] += -409.88504033635036; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += -72.60757573397173; + } else { + result[0] += -250.66315233138621; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 32))) { + result[0] += -830.1444150421403; + } else { + result[0] += -512.6141494941938; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 244))) { + if (LIKELY(false || (data[7].qvalue <= 48))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + if (UNLIKELY(false || (data[10].qvalue <= 8))) { + result[0] += 654.5140923867807; + } else { + result[0] += 216.223797321274; + } + } else { + result[0] += 59.127335346599445; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 42))) { + if (LIKELY(false || (data[7].qvalue <= 120))) { + result[0] += -408.9401043985653; + } else { + result[0] += -900.5928970741843; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + result[0] += -88.4273489833162; + } else { + result[0] += -494.5124549516256; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 126))) { + if (UNLIKELY(false || (data[0].qvalue <= 316))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 577.7159751120596; + } else { + result[0] += 390.25605025593114; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 100))) { + result[0] += 225.37808240171967; + } else { + result[0] += -134.1896384710644; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 40))) { + if (UNLIKELY(false || (data[0].qvalue <= 380))) { + result[0] += 26.38536976794332; + } else { + result[0] += 404.9857810684197; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 206))) { + result[0] += 640.5372498970368; + } else { + result[0] += 146.7850504947837; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 398))) { + if (UNLIKELY(false || (data[9].qvalue <= 46))) { + if (UNLIKELY(false || (data[2].qvalue <= 32))) { + result[0] += -69.72606543209433; + } else { + result[0] += -890.4547484128167; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 194))) { + result[0] += 43.29913016336363; + } else { + result[0] += -1135.7449808644187; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 156))) { + if (UNLIKELY(false || (data[4].qvalue <= 72))) { + result[0] += 619.8183215898755; + } else { + result[0] += 285.5489715529262; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 448))) { + result[0] += -358.75757336334124; + } else { + result[0] += 304.16119148341835; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 202))) { + if (LIKELY(false || (data[0].qvalue <= 118))) { + if (LIKELY(false || (data[0].qvalue <= 66))) { + if (UNLIKELY(false || (data[0].qvalue <= 10))) { + result[0] += -817.0480700539837; + } else { + result[0] += -635.8069750267712; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + result[0] += -345.96669659122944; + } else { + result[0] += -534.0482572373161; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 90))) { + if (LIKELY(false || (data[0].qvalue <= 170))) { + result[0] += -211.68858516839794; + } else { + if (LIKELY(false || (data[6].qvalue <= 50))) { + result[0] += 62.76650514311201; + } else { + result[0] += -136.44892442998687; + } + } + } else { + result[0] += -491.2641598575405; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + if (UNLIKELY(false || (data[0].qvalue <= 292))) { + if (LIKELY(false || (data[7].qvalue <= 48))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + if (LIKELY(false || (data[6].qvalue <= 32))) { + result[0] += 331.57071848199166; + } else { + result[0] += 679.3219985689888; + } + } else { + result[0] += 218.00776335345145; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 42))) { + result[0] += -277.030356546739; + } else { + result[0] += 67.28341346905603; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 40))) { + if (LIKELY(false || (data[0].qvalue <= 428))) { + if (LIKELY(false || (data[6].qvalue <= 134))) { + result[0] += 324.7275088735298; + } else { + result[0] += -419.6036244916827; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 90))) { + result[0] += 170.7438608997793; + } else { + result[0] += 713.0065984964028; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (LIKELY(false || (data[0].qvalue <= 356))) { + result[0] += 471.87466419027; + } else { + result[0] += 637.1788630246149; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 408))) { + result[0] += -529.7467726888689; + } else { + result[0] += 412.8057422873779; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 404))) { + if (UNLIKELY(false || (data[9].qvalue <= 46))) { + result[0] += -712.0507049531527; + } else { + if (LIKELY(false || (data[7].qvalue <= 194))) { + if (LIKELY(false || (data[2].qvalue <= 202))) { + result[0] += 36.347398334304295; + } else { + result[0] += -675.8496819191378; + } + } else { + result[0] += -1026.391767021813; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + if (LIKELY(false || (data[6].qvalue <= 174))) { + if (LIKELY(false || (data[2].qvalue <= 218))) { + result[0] += 300.59738999141763; + } else { + result[0] += -768.8823600737215; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 462))) { + result[0] += -815.3971702532364; + } else { + result[0] += -98.98096938280344; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 188))) { + result[0] += 772.6721143780254; + } else { + result[0] += 184.03196143296145; + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 204))) { + if (UNLIKELY(false || (data[0].qvalue <= 106))) { + if (LIKELY(false || (data[0].qvalue <= 52))) { + result[0] += -628.1023483072281; + } else { + if (LIKELY(false || (data[7].qvalue <= 90))) { + result[0] += -417.2006348985446; + } else { + result[0] += -652.72384554584; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 78))) { + if (UNLIKELY(false || (data[0].qvalue <= 150))) { + result[0] += -244.5303558275764; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += 81.86220471198173; + } else { + result[0] += -93.51539141150997; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 42))) { + result[0] += -604.2798075362174; + } else { + result[0] += -312.7206975646505; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 76))) { + if (UNLIKELY(false || (data[0].qvalue <= 280))) { + if (UNLIKELY(false || (data[7].qvalue <= 30))) { + if (UNLIKELY(false || (data[7].qvalue <= 2))) { + result[0] += 807.0167727723248; + } else { + result[0] += 289.0442076002897; + } + } else { + result[0] += 124.5150431079561; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 78))) { + if (UNLIKELY(false || (data[0].qvalue <= 330))) { + if (UNLIKELY(false || (data[7].qvalue <= 30))) { + result[0] += 565.5660231034293; + } else { + result[0] += 342.6451209197193; + } + } else { + result[0] += 592.3830913005597; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 6))) { + if (LIKELY(false || (data[0].qvalue <= 444))) { + result[0] += -430.7701175762352; + } else { + result[0] += 555.2205548100666; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 356))) { + result[0] += 220.55563498286244; + } else { + result[0] += 609.8515804605789; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 364))) { + if (UNLIKELY(false || (data[9].qvalue <= 40))) { + if (LIKELY(false || (data[7].qvalue <= 112))) { + result[0] += -186.288076755465; + } else { + result[0] += -654.6528922882809; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (LIKELY(false || (data[7].qvalue <= 180))) { + result[0] += 134.73234216388812; + } else { + result[0] += -411.6205521150166; + } + } else { + result[0] += -601.6441239663638; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 152))) { + if (LIKELY(false || (data[2].qvalue <= 214))) { + if (UNLIKELY(false || (data[8].qvalue <= 48))) { + result[0] += 159.54229602414563; + } else { + result[0] += 399.7957684439738; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -623.3887244049713; + } else { + result[0] += 412.91921366956416; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 448))) { + if (LIKELY(false || (data[9].qvalue <= 18))) { + result[0] += -725.573342147161; + } else { + result[0] += 232.5245626809719; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 62))) { + result[0] += 781.8973840507092; + } else { + result[0] += 102.27963999730173; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 206))) { + if (LIKELY(false || (data[0].qvalue <= 118))) { + if (LIKELY(false || (data[0].qvalue <= 70))) { + result[0] += -541.6072754781043; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 40))) { + result[0] += -266.3645156358689; + } else { + result[0] += -438.26985938213795; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 90))) { + if (LIKELY(false || (data[0].qvalue <= 170))) { + result[0] += -171.34773263938322; + } else { + result[0] += -11.303977068049283; + } + } else { + result[0] += -407.71415355827446; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 126))) { + if (UNLIKELY(false || (data[0].qvalue <= 298))) { + if (LIKELY(false || (data[1].qvalue <= 76))) { + if (UNLIKELY(false || (data[7].qvalue <= 16))) { + if (LIKELY(false || (data[5].qvalue <= 46))) { + result[0] += 305.49013378917147; + } else { + result[0] += 766.8660544561868; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 12))) { + result[0] += -212.69432422518977; + } else { + result[0] += 203.45215339752818; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 100))) { + if (UNLIKELY(false || (data[2].qvalue <= 6))) { + result[0] += -1306.979742409561; + } else { + result[0] += 84.56250938982618; + } + } else { + result[0] += -296.8758971391223; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 92))) { + if (LIKELY(false || (data[2].qvalue <= 212))) { + if (UNLIKELY(false || (data[7].qvalue <= 16))) { + result[0] += 634.2960749274997; + } else { + result[0] += 446.27587321034673; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 442))) { + result[0] += -656.3271831376737; + } else { + result[0] += 649.7533136866848; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 346))) { + if (LIKELY(false || (data[6].qvalue <= 100))) { + result[0] += 272.18984613419735; + } else { + result[0] += -183.6208158354411; + } + } else { + result[0] += 335.6468220356205; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 418))) { + if (LIKELY(false || (data[1].qvalue <= 136))) { + if (LIKELY(false || (data[2].qvalue <= 190))) { + if (LIKELY(false || (data[7].qvalue <= 194))) { + result[0] += 155.81121294253342; + } else { + result[0] += -775.8409014979492; + } + } else { + result[0] += -546.6139529208316; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 279.2996966849545; + } else { + result[0] += -746.3491117424761; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 174))) { + if (UNLIKELY(false || (data[0].qvalue <= 444))) { + if (UNLIKELY(false || (data[2].qvalue <= 76))) { + result[0] += -794.9146466173518; + } else { + result[0] += 238.59142556484136; + } + } else { + result[0] += 427.50310495259396; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + if (LIKELY(false || (data[4].qvalue <= 112))) { + result[0] += -91.71744859578774; + } else { + result[0] += -899.7782100028055; + } + } else { + result[0] += 481.37569784189236; + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 182))) { + if (UNLIKELY(false || (data[0].qvalue <= 90))) { + if (UNLIKELY(false || (data[0].qvalue <= 26))) { + result[0] += -559.2465183228969; + } else { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + result[0] += -339.48268673463207; + } else { + result[0] += -477.7038556835741; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 100))) { + if (LIKELY(false || (data[0].qvalue <= 142))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += -173.83777524062293; + } else { + result[0] += -308.64588699940225; + } + } else { + result[0] += -87.70355551216173; + } + } else { + result[0] += -460.0791599316315; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 264))) { + if (LIKELY(false || (data[1].qvalue <= 76))) { + if (LIKELY(false || (data[6].qvalue <= 46))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + if (UNLIKELY(false || (data[4].qvalue <= 2))) { + result[0] += 519.5076559696931; + } else { + result[0] += 36.579262885789255; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 8))) { + result[0] += 850.2335034126149; + } else { + result[0] += 225.76397079413547; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + result[0] += 31.634082363361337; + } else { + result[0] += -491.7231693795383; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 134))) { + result[0] += -143.97621323045203; + } else { + result[0] += -587.7222010882514; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[1].qvalue <= 76))) { + if (LIKELY(false || (data[9].qvalue <= 128))) { + if (LIKELY(false || (data[0].qvalue <= 328))) { + result[0] += 341.70375333102265; + } else { + result[0] += 492.9064526262677; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 32))) { + result[0] += 291.11846627413223; + } else { + result[0] += -67.70442593342548; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 336))) { + if (UNLIKELY(false || (data[9].qvalue <= 42))) { + result[0] += -178.5440203148846; + } else { + result[0] += 126.46282552428471; + } + } else { + if (LIKELY(false || (data[10].qvalue <= 76))) { + result[0] += 222.41058195803035; + } else { + result[0] += 434.73453399120257; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 426))) { + if (LIKELY(false || (data[9].qvalue <= 68))) { + if (LIKELY(false || (data[0].qvalue <= 416))) { + result[0] += -640.7399976249752; + } else { + result[0] += -167.72938945515875; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 46))) { + result[0] += -1344.5726371900926; + } else { + result[0] += 230.06133289043484; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 168))) { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + result[0] += -84.2627136369062; + } else { + result[0] += 415.7428703783992; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 460))) { + result[0] += -384.29322175155045; + } else { + result[0] += 296.1675101952608; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 214))) { + if (LIKELY(false || (data[0].qvalue <= 122))) { + if (UNLIKELY(false || (data[0].qvalue <= 46))) { + result[0] += -470.73918641723026; + } else { + if (LIKELY(false || (data[7].qvalue <= 90))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += -198.32164705298905; + } else { + result[0] += -321.39079912240294; + } + } else { + result[0] += -498.8080870646863; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 44))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 317.2121545851497; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 172))) { + result[0] += -88.36582704181914; + } else { + result[0] += 46.798653356184104; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 126))) { + if (LIKELY(false || (data[1].qvalue <= 92))) { + result[0] += -119.53396623757338; + } else { + result[0] += -262.3878864323929; + } + } else { + result[0] += -497.48244794654096; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + if (UNLIKELY(false || (data[0].qvalue <= 304))) { + if (LIKELY(false || (data[7].qvalue <= 44))) { + if (UNLIKELY(false || (data[7].qvalue <= 2))) { + result[0] += 746.0045878549049; + } else { + if (LIKELY(false || (data[10].qvalue <= 124))) { + result[0] += 255.0791650107591; + } else { + result[0] += -320.7492571200465; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 90))) { + result[0] += 96.84399754507324; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 18))) { + result[0] += -880.8032147114733; + } else { + result[0] += -73.20637853492896; + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 38))) { + result[0] += 461.5993365359995; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 358))) { + if (LIKELY(false || (data[2].qvalue <= 180))) { + result[0] += 212.73235839758036; + } else { + result[0] += -154.54318766724592; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 124))) { + result[0] += 395.0997024493214; + } else { + result[0] += 210.88997718544783; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 404))) { + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (LIKELY(false || (data[10].qvalue <= 112))) { + if (UNLIKELY(false || (data[5].qvalue <= 50))) { + result[0] += -969.6328168630031; + } else { + result[0] += -86.652192967571; + } + } else { + result[0] += -593.8688924415013; + } + } else { + result[0] += -859.0755121136438; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[5].qvalue <= 120))) { + if (LIKELY(false || (data[2].qvalue <= 222))) { + result[0] += 179.3501661826762; + } else { + result[0] += -755.4195928241176; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -1101.7895944684535; + } else { + result[0] += -169.1089745696061; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 132))) { + result[0] += 735.5941243655279; + } else { + result[0] += -9.689280563857821; + } + } + } + } + } + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + if (LIKELY(false || (data[7].qvalue <= 16))) { + if (UNLIKELY(false || (data[7].qvalue <= 2))) { + result[0] += 241.07465429893853; + } else { + if (UNLIKELY(false || (data[5].qvalue <= 2))) { + result[0] += -167.05234881826266; + } else { + if (LIKELY(false || (data[9].qvalue <= 160))) { + result[0] += 23.635093021555477; + } else { + result[0] += 247.95569232139462; + } + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 24))) { + if (UNLIKELY(false || (data[5].qvalue <= 26))) { + result[0] += -11.35953345193686; + } else { + if (LIKELY(false || (data[8].qvalue <= 46))) { + result[0] += -241.87320597288937; + } else { + result[0] += -558.2222728620737; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 22))) { + result[0] += -717.6843774931726; + } else { + if (UNLIKELY(false || (data[5].qvalue <= 18))) { + result[0] += -119.9905475497901; + } else { + result[0] += 12.781811278483842; + } + } + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 54))) { + if (UNLIKELY(false || (data[5].qvalue <= 78))) { + if (UNLIKELY(false || (data[8].qvalue <= 68))) { + if (UNLIKELY(false || (data[8].qvalue <= 22))) { + result[0] += -383.60019278447663; + } else { + result[0] += 70.39617109267411; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 62))) { + result[0] += 14.894390887322576; + } else { + result[0] += -246.88224072530934; + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 134))) { + if (LIKELY(false || (data[9].qvalue <= 32))) { + result[0] += -26.468777991984183; + } else { + result[0] += 124.62962202584231; + } + } else { + result[0] += -125.32103441709326; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 98))) { + if (UNLIKELY(false || (data[6].qvalue <= 50))) { + if (LIKELY(false || (data[8].qvalue <= 120))) { + result[0] += 79.94063628810217; + } else { + result[0] += -99.91998726218483; + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 54))) { + result[0] += -102.42726831173952; + } else { + result[0] += 19.419260663393466; + } + } + } else { + if (LIKELY(false || (data[8].qvalue <= 120))) { + if (LIKELY(false || (data[10].qvalue <= 90))) { + result[0] += 179.7748013112496; + } else { + result[0] += -7.882776932047753; + } + } else { + result[0] += 448.60994004642566; + } + } + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 134))) { + if (LIKELY(false || (data[7].qvalue <= 198))) { + if (LIKELY(false || (data[3].qvalue <= 168))) { + if (LIKELY(false || (data[6].qvalue <= 178))) { + result[0] += -30.950902106565053; + } else { + result[0] += 476.7456995983057; + } + } else { + result[0] += -198.82706671026182; + } + } else { + result[0] += -249.8175053383445; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 188))) { + if (UNLIKELY(false || (data[3].qvalue <= 98))) { + result[0] += -378.6732367385603; + } else { + result[0] += -109.64341915005078; + } + } else { + result[0] += -425.2420138470435; + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 220))) { + if (LIKELY(false || (data[0].qvalue <= 134))) { + if (LIKELY(false || (data[0].qvalue <= 72))) { + if (UNLIKELY(false || (data[0].qvalue <= 8))) { + result[0] += -532.059203726686; + } else { + result[0] += -372.9090262225109; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 82))) { + result[0] += -209.07044382236805; + } else { + result[0] += -400.00349533828; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[7].qvalue <= 2))) { + result[0] += 412.17374136976844; + } else { + result[0] += -14.036003053192212; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 116))) { + result[0] += -154.44905243467502; + } else { + result[0] += -436.1506043761763; + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 116))) { + if (LIKELY(false || (data[0].qvalue <= 324))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (LIKELY(false || (data[0].qvalue <= 276))) { + result[0] += 180.81716708771435; + } else { + result[0] += 323.80885408996255; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 4))) { + result[0] += -804.687929563034; + } else { + if (LIKELY(false || (data[7].qvalue <= 144))) { + result[0] += 75.45041773548873; + } else { + result[0] += -308.6438664376922; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 198))) { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + if (UNLIKELY(false || (data[9].qvalue <= 60))) { + result[0] += -136.42148889369125; + } else { + result[0] += 347.35402497810156; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 40))) { + result[0] += 519.5328967780671; + } else { + result[0] += 317.09389087410756; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 442))) { + result[0] += -830.9681326018041; + } else { + result[0] += 776.5378702508172; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 392))) { + if (LIKELY(false || (data[1].qvalue <= 120))) { + if (UNLIKELY(false || (data[3].qvalue <= 96))) { + result[0] += -916.9602444683026; + } else { + if (LIKELY(false || (data[2].qvalue <= 190))) { + result[0] += 151.14410198089988; + } else { + result[0] += -534.6138410777346; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 64))) { + result[0] += -20.321297171216408; + } else { + result[0] += -553.6897184690705; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 152))) { + if (LIKELY(false || (data[2].qvalue <= 214))) { + if (UNLIKELY(false || (data[8].qvalue <= 44))) { + result[0] += -94.32615099423269; + } else { + result[0] += 326.8245357585306; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -617.350459951314; + } else { + result[0] += 276.127751759582; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 442))) { + result[0] += -733.0199493164838; + } else { + if (LIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -116.42395706265687; + } else { + result[0] += 335.32040559141655; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 172))) { + if (UNLIKELY(false || (data[0].qvalue <= 78))) { + if (UNLIKELY(false || (data[0].qvalue <= 6))) { + result[0] += -493.1717265491526; + } else { + if (LIKELY(false || (data[1].qvalue <= 72))) { + result[0] += -307.1327071737299; + } else { + result[0] += -442.19784021780106; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 116))) { + if (LIKELY(false || (data[1].qvalue <= 72))) { + if (LIKELY(false || (data[0].qvalue <= 138))) { + result[0] += -167.9842807798525; + } else { + result[0] += -70.31231390063265; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 120))) { + result[0] += -254.86498043371085; + } else { + result[0] += -703.766647232002; + } + } + } else { + result[0] += -458.17666670208604; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 240))) { + if (LIKELY(false || (data[6].qvalue <= 72))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 400.7108318320836; + } else { + if (LIKELY(false || (data[4].qvalue <= 128))) { + if (UNLIKELY(false || (data[4].qvalue <= 6))) { + result[0] += 227.72272060240076; + } else { + result[0] += 16.088229395762443; + } + } else { + result[0] += -679.3987618413844; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 146))) { + if (UNLIKELY(false || (data[3].qvalue <= 112))) { + result[0] += -287.0516773864561; + } else { + result[0] += -82.35735858878537; + } + } else { + result[0] += -473.44254046258266; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (UNLIKELY(false || (data[0].qvalue <= 326))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + result[0] += 167.85776493357912; + } else { + result[0] += 297.3702646687886; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 116))) { + result[0] += 67.52512843170207; + } else { + result[0] += -189.67754872059376; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 40))) { + if (UNLIKELY(false || (data[3].qvalue <= 36))) { + result[0] += 818.1555179158704; + } else { + result[0] += 130.45031708535106; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 454))) { + result[0] += 327.4166938133286; + } else { + result[0] += -120.66748291707741; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 428))) { + if (LIKELY(false || (data[9].qvalue <= 68))) { + if (LIKELY(false || (data[0].qvalue <= 418))) { + result[0] += -535.7497746001039; + } else { + result[0] += -120.65339683228727; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 46))) { + result[0] += -1201.76536866533; + } else { + result[0] += 160.46676494029293; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 152))) { + if (LIKELY(false || (data[2].qvalue <= 214))) { + result[0] += 379.9797915421976; + } else { + result[0] += -12.579853903701677; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 452))) { + result[0] += -539.1034312262643; + } else { + result[0] += 118.1495781414129; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 210))) { + if (UNLIKELY(false || (data[0].qvalue <= 96))) { + if (UNLIKELY(false || (data[0].qvalue <= 12))) { + result[0] += -414.82218364010805; + } else { + if (LIKELY(false || (data[6].qvalue <= 100))) { + result[0] += -261.7354787568685; + } else { + result[0] += -436.9782510571122; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 126))) { + if (LIKELY(false || (data[6].qvalue <= 50))) { + if (UNLIKELY(false || (data[0].qvalue <= 158))) { + result[0] += -86.99960309582809; + } else { + result[0] += 36.01621466834996; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 54))) { + result[0] += -276.15518055633515; + } else { + result[0] += -117.47322502980383; + } + } + } else { + result[0] += -418.57103843448124; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + if (UNLIKELY(false || (data[0].qvalue <= 300))) { + if (LIKELY(false || (data[6].qvalue <= 100))) { + if (UNLIKELY(false || (data[7].qvalue <= 24))) { + result[0] += 221.38591506553996; + } else { + if (UNLIKELY(false || (data[5].qvalue <= 34))) { + result[0] += -114.10943519348092; + } else { + result[0] += 124.41383507415142; + } + } + } else { + result[0] += -124.776312637069; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 40))) { + if (LIKELY(false || (data[0].qvalue <= 434))) { + if (LIKELY(false || (data[6].qvalue <= 134))) { + result[0] += 173.27180862569062; + } else { + result[0] += -230.0263780267813; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 90))) { + result[0] += 93.55822981381293; + } else { + result[0] += 531.9618157197915; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 138))) { + if (UNLIKELY(false || (data[7].qvalue <= 12))) { + result[0] += 529.342886411045; + } else { + result[0] += 275.38241535187143; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 32))) { + result[0] += 168.94637195832345; + } else { + result[0] += -664.1470891916572; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 442))) { + if (LIKELY(false || (data[6].qvalue <= 174))) { + if (LIKELY(false || (data[7].qvalue <= 198))) { + if (UNLIKELY(false || (data[9].qvalue <= 68))) { + result[0] += -259.076207652103; + } else { + result[0] += 97.97671807149003; + } + } else { + result[0] += -695.1606225313006; + } + } else { + result[0] += -796.0742621530271; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (UNLIKELY(false || (data[2].qvalue <= 104))) { + if (LIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -153.93531708038253; + } else { + result[0] += 448.69780310463796; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 216))) { + result[0] += 477.95650201824304; + } else { + result[0] += -28.61899971217077; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -909.6241555301945; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -188.47445082878266; + } else { + result[0] += 377.35719169796516; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 172))) { + if (UNLIKELY(false || (data[0].qvalue <= 76))) { + if (UNLIKELY(false || (data[0].qvalue <= 8))) { + result[0] += -390.84856797995377; + } else { + if (LIKELY(false || (data[1].qvalue <= 64))) { + result[0] += -240.40122514681448; + } else { + result[0] += -347.4550413344793; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + result[0] += -39.25816550113254; + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[0].qvalue <= 142))) { + result[0] += -186.1240171676131; + } else { + result[0] += -98.0111804997198; + } + } else { + result[0] += -436.0524831294065; + } + } + } else { + result[0] += -471.83682540539394; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 238))) { + if (LIKELY(false || (data[1].qvalue <= 76))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 348.9273763963965; + } else { + if (LIKELY(false || (data[7].qvalue <= 104))) { + result[0] += 30.53430231649784; + } else { + result[0] += -202.87449384945464; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 120))) { + if (UNLIKELY(false || (data[2].qvalue <= 4))) { + result[0] += -836.9509277269852; + } else { + if (UNLIKELY(false || (data[6].qvalue <= 22))) { + result[0] += -1508.3799402707123; + } else { + result[0] += -94.06253649645048; + } + } + } else { + result[0] += -348.5371075782411; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 126))) { + if (LIKELY(false || (data[0].qvalue <= 346))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[0].qvalue <= 286))) { + result[0] += 162.75459965948465; + } else { + result[0] += 282.56625214886793; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 10))) { + result[0] += -440.06926455976475; + } else { + result[0] += 35.45780394100668; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 212))) { + if (UNLIKELY(false || (data[8].qvalue <= 72))) { + result[0] += 180.74606636101373; + } else { + result[0] += 317.0047465523858; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 438))) { + result[0] += -588.0578066297068; + } else { + result[0] += 433.5588979682989; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 422))) { + if (LIKELY(false || (data[1].qvalue <= 136))) { + if (LIKELY(false || (data[7].qvalue <= 192))) { + result[0] += 45.9325195881808; + } else { + result[0] += -679.3813065404768; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 270.29103457509785; + } else { + result[0] += -517.5531688785617; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 118))) { + if (UNLIKELY(false || (data[10].qvalue <= 56))) { + result[0] += 117.54636858837371; + } else { + result[0] += 647.912058999977; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -41.193603415175254; + } else { + result[0] += 296.9100448809772; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 222))) { + if (LIKELY(false || (data[0].qvalue <= 138))) { + if (UNLIKELY(false || (data[0].qvalue <= 58))) { + result[0] += -275.2115013121635; + } else { + if (LIKELY(false || (data[6].qvalue <= 116))) { + result[0] += -155.46609912259618; + } else { + result[0] += -389.7954080308127; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 100))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + result[0] += 384.7520448135499; + } else { + if (LIKELY(false || (data[9].qvalue <= 154))) { + if (UNLIKELY(false || (data[4].qvalue <= 6))) { + result[0] += 292.1491245847539; + } else { + result[0] += -30.543242192258454; + } + } else { + result[0] += -385.76320075157514; + } + } + } else { + result[0] += -224.14550590009833; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 68))) { + if (UNLIKELY(false || (data[0].qvalue <= 272))) { + result[0] += 106.3555671944755; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + if (UNLIKELY(false || (data[9].qvalue <= 66))) { + if (UNLIKELY(false || (data[0].qvalue <= 410))) { + result[0] += -941.4297952572747; + } else { + result[0] += 56.28649828645251; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 8))) { + result[0] += 507.91991997031147; + } else { + result[0] += 35.20700760987173; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 348))) { + if (UNLIKELY(false || (data[7].qvalue <= 24))) { + result[0] += 324.4895405944488; + } else { + result[0] += 153.59057096574398; + } + } else { + if (LIKELY(false || (data[5].qvalue <= 60))) { + result[0] += 236.3224976398137; + } else { + result[0] += 444.2538183675668; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 372))) { + if (UNLIKELY(false || (data[9].qvalue <= 42))) { + if (UNLIKELY(false || (data[10].qvalue <= 64))) { + result[0] += 22.100327800787746; + } else { + result[0] += -285.1797635732232; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 182))) { + if (UNLIKELY(false || (data[3].qvalue <= 12))) { + result[0] += -916.6873662194294; + } else { + result[0] += 129.80380277726957; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 208))) { + result[0] += -121.90878827484735; + } else { + result[0] += -647.3935446930476; + } + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + if (UNLIKELY(false || (data[0].qvalue <= 444))) { + if (LIKELY(false || (data[3].qvalue <= 168))) { + result[0] += -658.7332388174912; + } else { + result[0] += -100.44944033027124; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 174))) { + result[0] += 286.17922512478475; + } else { + result[0] += -166.4328133167238; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 94))) { + if (LIKELY(false || (data[3].qvalue <= 92))) { + result[0] += 56.97705669543027; + } else { + result[0] += -910.3064928974122; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 108))) { + result[0] += 459.24011774105895; + } else { + result[0] += 187.54878983259766; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 170))) { + if (UNLIKELY(false || (data[0].qvalue <= 80))) { + if (UNLIKELY(false || (data[11].qvalue <= 0))) { + if (UNLIKELY(false || (data[0].qvalue <= 18))) { + result[0] += -272.8015586187466; + } else { + result[0] += -158.01002915185765; + } + } else { + result[0] += -270.0109117909505; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + if (LIKELY(false || (data[5].qvalue <= 44))) { + result[0] += -47.655816285394785; + } else { + result[0] += 148.45176574086; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 48))) { + result[0] += -274.2183049595403; + } else { + result[0] += -120.42739359905393; + } + } + } else { + result[0] += -404.63297777523053; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 250))) { + if (LIKELY(false || (data[7].qvalue <= 86))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 310.3087236621802; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 6))) { + result[0] += -261.86875131184433; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 30))) { + result[0] += 77.33362724018633; + } else { + result[0] += -9.835991514809445; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 178))) { + if (LIKELY(false || (data[10].qvalue <= 94))) { + if (UNLIKELY(false || (data[3].qvalue <= 18))) { + result[0] += -491.7147359309727; + } else { + result[0] += -26.424686375898183; + } + } else { + result[0] += -256.5814333154503; + } + } else { + result[0] += -401.38775248670413; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + if (LIKELY(false || (data[0].qvalue <= 364))) { + if (UNLIKELY(false || (data[9].qvalue <= 54))) { + if (LIKELY(false || (data[8].qvalue <= 78))) { + result[0] += 24.807904435156793; + } else { + result[0] += -255.08075851432937; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 180))) { + result[0] += 171.3936836582958; + } else { + result[0] += -146.48986963094893; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + if (UNLIKELY(false || (data[0].qvalue <= 446))) { + result[0] += -527.9370629462896; + } else { + result[0] += 311.74412615306; + } + } else { + if (UNLIKELY(false || (data[11].qvalue <= 0))) { + result[0] += 61.275444153721516; + } else { + result[0] += 266.8647138392115; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 464))) { + if (LIKELY(false || (data[2].qvalue <= 218))) { + if (UNLIKELY(false || (data[9].qvalue <= 2))) { + result[0] += -553.7238254263838; + } else { + result[0] += 30.964493797225533; + } + } else { + result[0] += -1039.405250832381; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 132))) { + if (LIKELY(false || (data[0].qvalue <= 470))) { + result[0] += 221.3473424703826; + } else { + result[0] += 578.2036295192621; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -1067.1325237122935; + } else { + result[0] += -72.41418684700736; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 222))) { + if (LIKELY(false || (data[0].qvalue <= 120))) { + if (UNLIKELY(false || (data[0].qvalue <= 36))) { + result[0] += -245.63449384858325; + } else { + if (LIKELY(false || (data[1].qvalue <= 102))) { + result[0] += -141.07821937677804; + } else { + result[0] += -311.46095137887164; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 78))) { + result[0] += -18.828476188261668; + } else { + result[0] += -158.60479952768688; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[0].qvalue <= 334))) { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + if (UNLIKELY(false || (data[5].qvalue <= 28))) { + if (LIKELY(false || (data[2].qvalue <= 122))) { + result[0] += 85.17185179060556; + } else { + result[0] += -1488.3371391950336; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 120))) { + result[0] += 251.47188880049714; + } else { + result[0] += -337.59370375551794; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 176))) { + if (LIKELY(false || (data[7].qvalue <= 122))) { + result[0] += 64.67449717812703; + } else { + result[0] += -198.4096965608494; + } + } else { + result[0] += -109.54175072971603; + } + } + } else { + if (LIKELY(false || (data[5].qvalue <= 96))) { + if (UNLIKELY(false || (data[7].qvalue <= 64))) { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + result[0] += 25.027805981227203; + } else { + result[0] += 254.11335581266104; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 22))) { + result[0] += -327.20805261276865; + } else { + result[0] += 158.88433974650883; + } + } + } else { + result[0] += 635.3039299555891; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 422))) { + if (UNLIKELY(false || (data[5].qvalue <= 98))) { + if (LIKELY(false || (data[4].qvalue <= 116))) { + result[0] += -674.4791597614653; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 154))) { + result[0] += -621.8229750354634; + } else { + result[0] += 421.9420604815607; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 102))) { + if (LIKELY(false || (data[0].qvalue <= 390))) { + result[0] += 85.124602127931; + } else { + result[0] += 949.6197614387934; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 144))) { + result[0] += 251.2728036404487; + } else { + result[0] += -309.15702771287084; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 184))) { + if (LIKELY(false || (data[0].qvalue <= 460))) { + if (UNLIKELY(false || (data[10].qvalue <= 38))) { + result[0] += -633.4429247336111; + } else { + result[0] += 160.50792355093574; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 176))) { + result[0] += 479.12605526304554; + } else { + result[0] += 116.33385550515398; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -930.2431709848779; + } else { + if (LIKELY(false || (data[1].qvalue <= 156))) { + result[0] += 214.15884274598116; + } else { + result[0] += -277.81632186374355; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 168))) { + if (UNLIKELY(false || (data[0].qvalue <= 72))) { + if (LIKELY(false || (data[6].qvalue <= 88))) { + if (UNLIKELY(false || (data[0].qvalue <= 4))) { + result[0] += -294.9481294960577; + } else { + result[0] += -165.43274636129723; + } + } else { + result[0] += -283.98106032598395; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 137.94773272215207; + } else { + result[0] += -74.90747793962899; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + result[0] += -146.0253896376931; + } else { + result[0] += -355.419967719613; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 268))) { + if (LIKELY(false || (data[1].qvalue <= 76))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 312.51485279750943; + } else { + if (LIKELY(false || (data[2].qvalue <= 208))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + result[0] += -26.67715098965897; + } else { + result[0] += 59.16521507626189; + } + } else { + result[0] += -514.0817805406466; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 22))) { + result[0] += -1382.1700903888081; + } else { + if (LIKELY(false || (data[1].qvalue <= 148))) { + if (UNLIKELY(false || (data[6].qvalue <= 84))) { + result[0] += 2.2238582017569652; + } else { + result[0] += -149.07329069335975; + } + } else { + result[0] += -389.67704456086886; + } + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 134))) { + if (LIKELY(false || (data[2].qvalue <= 212))) { + if (UNLIKELY(false || (data[1].qvalue <= 8))) { + if (LIKELY(false || (data[0].qvalue <= 462))) { + result[0] += -139.5342233564615; + } else { + result[0] += 425.4054517675065; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 370))) { + result[0] += 119.1045005005382; + } else { + result[0] += 225.06622562801806; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 428))) { + if (UNLIKELY(false || (data[10].qvalue <= 116))) { + result[0] += -314.0145429164164; + } else { + result[0] += -791.7715009965185; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 156))) { + result[0] += 299.89845236333366; + } else { + result[0] += -121.70249001324471; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 438))) { + if (LIKELY(false || (data[6].qvalue <= 152))) { + if (UNLIKELY(false || (data[2].qvalue <= 42))) { + result[0] += 308.86651700943054; + } else { + result[0] += -123.15739696081778; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 162))) { + result[0] += -543.1460199892057; + } else { + result[0] += -268.8247484125225; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 170))) { + if (LIKELY(false || (data[5].qvalue <= 114))) { + result[0] += 219.69052341180478; + } else { + result[0] += 768.652154361283; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -330.30502416245446; + } else { + result[0] += 226.83056005764217; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 194))) { + if (UNLIKELY(false || (data[0].qvalue <= 94))) { + result[0] += -164.85997854401103; + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + if (LIKELY(false || (data[6].qvalue <= 32))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + result[0] += 272.37997423411696; + } else { + result[0] += -38.01170141177444; + } + } else { + result[0] += 179.2649038053183; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 40))) { + if (UNLIKELY(false || (data[10].qvalue <= 14))) { + result[0] += -447.1000423177568; + } else { + result[0] += -129.8926570286136; + } + } else { + result[0] += -58.31804328980124; + } + } + } else { + result[0] += -307.58593821488535; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (UNLIKELY(false || (data[0].qvalue <= 308))) { + if (LIKELY(false || (data[6].qvalue <= 116))) { + if (UNLIKELY(false || (data[7].qvalue <= 2))) { + result[0] += 484.75118404823917; + } else { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 101.86133520052235; + } else { + result[0] += 18.262784206337113; + } + } + } else { + result[0] += -171.38940241656513; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 76))) { + if (LIKELY(false || (data[0].qvalue <= 420))) { + if (LIKELY(false || (data[4].qvalue <= 80))) { + result[0] += 164.27136795332518; + } else { + result[0] += 420.19522838999177; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 70))) { + result[0] += 288.60771862284395; + } else { + result[0] += -317.37860129812316; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 380))) { + if (UNLIKELY(false || (data[3].qvalue <= 112))) { + result[0] += -302.83673649889585; + } else { + result[0] += 54.622892990725916; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 98))) { + result[0] += -14.652198735183687; + } else { + result[0] += 188.62633009457414; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 410))) { + if (LIKELY(false || (data[1].qvalue <= 88))) { + if (UNLIKELY(false || (data[10].qvalue <= 68))) { + result[0] += -453.6048808127357; + } else { + result[0] += 39.27686618322581; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 20))) { + result[0] += 0.2890822714437353; + } else { + result[0] += -681.1445664481982; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 114))) { + if (LIKELY(false || (data[7].qvalue <= 198))) { + result[0] += 448.49867267672437; + } else { + if (LIKELY(false || (data[0].qvalue <= 446))) { + result[0] += -510.11854071816606; + } else { + result[0] += 725.828009865931; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + if (UNLIKELY(false || (data[10].qvalue <= 70))) { + result[0] += 82.63133907289594; + } else { + result[0] += -564.6009321848745; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 188))) { + result[0] += 485.04570202598234; + } else { + result[0] += -44.949579597135376; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 226))) { + if (LIKELY(false || (data[0].qvalue <= 140))) { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + result[0] += -89.22639695175398; + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + result[0] += -154.49086546432346; + } else { + result[0] += -329.5903225571293; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 126))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + result[0] += 318.48761171753574; + } else { + result[0] += -18.571568031593554; + } + } else { + result[0] += -220.49206840771905; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 202))) { + if (LIKELY(false || (data[1].qvalue <= 152))) { + if (LIKELY(false || (data[0].qvalue <= 366))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (LIKELY(false || (data[8].qvalue <= 120))) { + result[0] += 150.42799764081784; + } else { + result[0] += -76.38500429830752; + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 56))) { + result[0] += -211.0027314849032; + } else { + result[0] += 24.230230872826798; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 164))) { + if (LIKELY(false || (data[8].qvalue <= 142))) { + result[0] += 132.68242055886515; + } else { + result[0] += -882.8096755983888; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 192))) { + result[0] += 334.11768457333994; + } else { + result[0] += -53.48084930478878; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 450))) { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 233.21157693531032; + } else { + if (LIKELY(false || (data[10].qvalue <= 138))) { + result[0] += -582.6982780907397; + } else { + result[0] += 0.7508374869349193; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 174))) { + if (UNLIKELY(false || (data[2].qvalue <= 66))) { + result[0] += 844.2241348987232; + } else { + result[0] += 166.47618379149745; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -770.8958350747589; + } else { + result[0] += 106.39982734044628; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 424))) { + if (UNLIKELY(false || (data[10].qvalue <= 116))) { + if (LIKELY(false || (data[10].qvalue <= 114))) { + result[0] += -318.826948941992; + } else { + result[0] += 300.5051761631031; + } + } else { + result[0] += -663.2498735346456; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 68))) { + if (LIKELY(false || (data[7].qvalue <= 176))) { + result[0] += 330.2362671942275; + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + result[0] += -261.9859689644222; + } else { + result[0] += 470.337710070637; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + if (UNLIKELY(false || (data[8].qvalue <= 134))) { + result[0] += 52.55390876938948; + } else { + result[0] += -820.466862953795; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 172))) { + result[0] += 678.0504747681679; + } else { + result[0] += -30.65402416986749; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 230))) { + if (LIKELY(false || (data[0].qvalue <= 146))) { + if (LIKELY(false || (data[7].qvalue <= 124))) { + if (UNLIKELY(false || (data[0].qvalue <= 40))) { + result[0] += -163.25748720727836; + } else { + result[0] += -83.1950205740847; + } + } else { + result[0] += -269.08136712131557; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 54))) { + result[0] += -164.01997292662278; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 228.21827507549563; + } else { + result[0] += -9.402288377382439; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 142))) { + if (UNLIKELY(false || (data[7].qvalue <= 16))) { + if (LIKELY(false || (data[6].qvalue <= 30))) { + if (UNLIKELY(false || (data[7].qvalue <= 4))) { + result[0] += 381.64939051951137; + } else { + if (LIKELY(false || (data[0].qvalue <= 302))) { + result[0] += 58.00057357423904; + } else { + result[0] += 223.74792550499748; + } + } + } else { + result[0] += 454.5662709049777; + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 36))) { + if (LIKELY(false || (data[8].qvalue <= 128))) { + if (LIKELY(false || (data[0].qvalue <= 378))) { + result[0] += -93.01200660095587; + } else { + result[0] += 134.32252549902134; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 444))) { + result[0] += -2078.6414691042096; + } else { + result[0] += -159.18640228976722; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 32))) { + if (UNLIKELY(false || (data[0].qvalue <= 396))) { + result[0] += -163.73514121181952; + } else { + result[0] += 100.75922992115221; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 360))) { + result[0] += 84.30972820399808; + } else { + result[0] += 197.85022347944687; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 442))) { + if (LIKELY(false || (data[7].qvalue <= 198))) { + if (UNLIKELY(false || (data[6].qvalue <= 114))) { + if (LIKELY(false || (data[0].qvalue <= 406))) { + result[0] += -124.87742350731226; + } else { + result[0] += 371.4284844619934; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 130))) { + result[0] += -1022.9075285275169; + } else { + result[0] += -154.11889775108097; + } + } + } else { + result[0] += -609.5631862224477; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (UNLIKELY(false || (data[2].qvalue <= 104))) { + if (LIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -172.46788335138166; + } else { + result[0] += 267.92756125694655; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 216))) { + result[0] += 344.21102212490365; + } else { + result[0] += -63.20922723234142; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (UNLIKELY(false || (data[4].qvalue <= 88))) { + result[0] += -98.6686560628389; + } else { + result[0] += -824.7416534209821; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 162))) { + result[0] += 423.0262534565941; + } else { + result[0] += -172.900534785997; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 164))) { + if (UNLIKELY(false || (data[7].qvalue <= 36))) { + if (UNLIKELY(false || (data[0].qvalue <= 22))) { + result[0] += -151.00440148400935; + } else { + result[0] += -40.118590528856686; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 134))) { + result[0] += -118.83158145049806; + } else { + result[0] += -265.5488061301905; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (UNLIKELY(false || (data[0].qvalue <= 276))) { + if (LIKELY(false || (data[1].qvalue <= 76))) { + if (UNLIKELY(false || (data[7].qvalue <= 2))) { + result[0] += 337.5770961546419; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + result[0] += -45.53149621844132; + } else { + result[0] += 57.69099405123824; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 22))) { + result[0] += -1262.4368580820865; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 4))) { + result[0] += -499.135631046245; + } else { + result[0] += -51.130463115279184; + } + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + if (UNLIKELY(false || (data[0].qvalue <= 446))) { + if (LIKELY(false || (data[10].qvalue <= 132))) { + result[0] += -452.52727046733617; + } else { + result[0] += 207.2426999628595; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 186))) { + result[0] += 147.30212851601848; + } else { + result[0] += -378.60903812177116; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 8))) { + if (LIKELY(false || (data[7].qvalue <= 116))) { + result[0] += 60.647838286510584; + } else { + result[0] += -403.6642612098349; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 144))) { + result[0] += 97.61644906336824; + } else { + result[0] += 243.6057368289787; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 440))) { + if (LIKELY(false || (data[10].qvalue <= 116))) { + if (LIKELY(false || (data[0].qvalue <= 412))) { + if (LIKELY(false || (data[10].qvalue <= 114))) { + result[0] += -372.46208551501405; + } else { + result[0] += 150.12385303560893; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 154))) { + result[0] += 378.2084024000228; + } else { + result[0] += -115.22596011254524; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 104))) { + result[0] += -1013.8954317267925; + } else { + result[0] += -458.7466793813837; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 162))) { + if (LIKELY(false || (data[0].qvalue <= 460))) { + if (UNLIKELY(false || (data[10].qvalue <= 118))) { + result[0] += 573.7495286165021; + } else { + result[0] += -39.64820127548395; + } + } else { + result[0] += 747.9256037479381; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 464))) { + if (UNLIKELY(false || (data[2].qvalue <= 218))) { + result[0] += -322.32729930754635; + } else { + result[0] += -889.5131323414915; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -49.77314604177496; + } else { + result[0] += 452.922151113344; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 230))) { + if (UNLIKELY(false || (data[0].qvalue <= 100))) { + result[0] += -110.27471806521078; + } else { + if (UNLIKELY(false || (data[9].qvalue <= 54))) { + if (UNLIKELY(false || (data[1].qvalue <= 36))) { + result[0] += 405.5242126958143; + } else { + result[0] += -170.43432845409902; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + result[0] += 256.63050409714157; + } else { + if (LIKELY(false || (data[9].qvalue <= 154))) { + if (UNLIKELY(false || (data[4].qvalue <= 6))) { + result[0] += 199.22002184764415; + } else { + result[0] += -21.456223281242632; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 20))) { + result[0] += -394.6713467594481; + } else { + result[0] += 400.4595635278145; + } + } + } + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (UNLIKELY(false || (data[9].qvalue <= 2))) { + if (LIKELY(false || (data[0].qvalue <= 466))) { + if (LIKELY(false || (data[6].qvalue <= 170))) { + if (UNLIKELY(false || (data[1].qvalue <= 146))) { + result[0] += -1171.2407195011317; + } else { + result[0] += 62.25614012625213; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 462))) { + result[0] += -747.9426165897981; + } else { + result[0] += -245.0583143986272; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 184))) { + result[0] += 389.8618473333866; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -714.1912396897699; + } else { + result[0] += 105.56232441101614; + } + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 30))) { + if (LIKELY(false || (data[6].qvalue <= 152))) { + if (LIKELY(false || (data[0].qvalue <= 452))) { + result[0] += 8.267421890159907; + } else { + result[0] += 532.9815498007549; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 450))) { + result[0] += -998.5163824104064; + } else { + result[0] += 67.53749679257139; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 38))) { + if (LIKELY(false || (data[0].qvalue <= 314))) { + result[0] += 128.68056584630503; + } else { + result[0] += 304.65594504617775; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 378))) { + result[0] += 38.864839831291896; + } else { + result[0] += 132.95011491298308; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 422))) { + if (LIKELY(false || (data[10].qvalue <= 116))) { + result[0] += -200.50051859519212; + } else { + result[0] += -570.0695420065967; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 76))) { + if (LIKELY(false || (data[2].qvalue <= 214))) { + result[0] += 284.7667993502012; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 452))) { + result[0] += -439.2896395697744; + } else { + result[0] += 195.2703219505385; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 448))) { + result[0] += -719.4958031369861; + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -48.870417355715304; + } else { + result[0] += 558.0516523176542; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 158))) { + if (LIKELY(false || (data[1].qvalue <= 102))) { + if (UNLIKELY(false || (data[0].qvalue <= 28))) { + result[0] += -132.8644217361974; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 20))) { + if (LIKELY(false || (data[5].qvalue <= 44))) { + if (UNLIKELY(false || (data[6].qvalue <= 0))) { + result[0] += 184.7567664677024; + } else { + result[0] += -31.01544671430151; + } + } else { + result[0] += 93.76937878382813; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 158))) { + result[0] += -70.3541078253051; + } else { + result[0] += -256.18565932246213; + } + } + } + } else { + result[0] += -186.9601821997848; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (UNLIKELY(false || (data[0].qvalue <= 272))) { + if (UNLIKELY(false || (data[7].qvalue <= 22))) { + if (LIKELY(false || (data[6].qvalue <= 32))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + result[0] += 301.99251039980805; + } else { + result[0] += 8.317443045497088; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 285.9447580619998; + } else { + result[0] += -199.02985809976943; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 34))) { + if (UNLIKELY(false || (data[7].qvalue <= 28))) { + result[0] += -757.7155546033136; + } else { + result[0] += -100.8798688536487; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 54))) { + result[0] += -108.18906289397617; + } else { + result[0] += 16.427955171508895; + } + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 32))) { + if (LIKELY(false || (data[0].qvalue <= 450))) { + if (LIKELY(false || (data[6].qvalue <= 168))) { + result[0] += -31.079261573945725; + } else { + result[0] += -439.24160085690585; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 176))) { + result[0] += 196.17828368025368; + } else { + result[0] += -96.92802902335563; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 8))) { + if (LIKELY(false || (data[7].qvalue <= 150))) { + result[0] += 5.472886535824474; + } else { + result[0] += -322.6822453952255; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 104))) { + result[0] += 71.15954913651206; + } else { + result[0] += 170.40770380917564; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 434))) { + if (LIKELY(false || (data[1].qvalue <= 98))) { + result[0] += -431.7182112009705; + } else { + result[0] += -761.9070576282425; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 174))) { + if (LIKELY(false || (data[0].qvalue <= 450))) { + if (LIKELY(false || (data[3].qvalue <= 170))) { + result[0] += 223.6763270971365; + } else { + result[0] += -863.1311602081029; + } + } else { + result[0] += 633.5684765097419; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -822.025766073113; + } else { + if (LIKELY(false || (data[8].qvalue <= 144))) { + result[0] += -357.65137650350306; + } else { + result[0] += 161.10489597769953; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 230))) { + if (UNLIKELY(false || (data[0].qvalue <= 94))) { + result[0] += -92.57565870144303; + } else { + if (LIKELY(false || (data[7].qvalue <= 142))) { + result[0] += -18.109772204939024; + } else { + result[0] += -209.49572955406617; + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 116))) { + if (LIKELY(false || (data[1].qvalue <= 152))) { + if (UNLIKELY(false || (data[10].qvalue <= 30))) { + if (LIKELY(false || (data[0].qvalue <= 462))) { + if (LIKELY(false || (data[5].qvalue <= 102))) { + result[0] += 3.2575921560660412; + } else { + result[0] += -901.3597327991677; + } + } else { + result[0] += 507.2677983999125; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 128))) { + if (UNLIKELY(false || (data[5].qvalue <= 12))) { + result[0] += -59.086766158297294; + } else { + result[0] += 112.28787145525055; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 58))) { + result[0] += 323.6859359964492; + } else { + result[0] += -58.36682033574347; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 460))) { + if (LIKELY(false || (data[2].qvalue <= 84))) { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 300.9633139092185; + } else { + result[0] += -192.01700771623888; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 166))) { + result[0] += -1598.762896930197; + } else { + result[0] += -603.9295642909062; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 16))) { + if (UNLIKELY(false || (data[6].qvalue <= 174))) { + result[0] += 461.56736902180194; + } else { + result[0] += 8.737578542205178; + } + } else { + result[0] += -643.983821829543; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 440))) { + if (LIKELY(false || (data[6].qvalue <= 104))) { + if (UNLIKELY(false || (data[2].qvalue <= 148))) { + if (LIKELY(false || (data[2].qvalue <= 134))) { + result[0] += 179.91548506297573; + } else { + result[0] += -912.5235714991372; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 174))) { + result[0] += 301.8534329221878; + } else { + result[0] += 38.18076419361558; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 94))) { + if (LIKELY(false || (data[0].qvalue <= 406))) { + result[0] += -361.8709774599796; + } else { + result[0] += 270.3263916029786; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 6))) { + result[0] += 104.81627908969739; + } else { + result[0] += -535.2156269218489; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 170))) { + if (LIKELY(false || (data[4].qvalue <= 122))) { + if (UNLIKELY(false || (data[7].qvalue <= 114))) { + result[0] += 594.5833564826081; + } else { + result[0] += 64.11583431949299; + } + } else { + result[0] += 652.8845003559351; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -763.9957389916799; + } else { + if (UNLIKELY(false || (data[4].qvalue <= 102))) { + result[0] += 315.48470650562535; + } else { + result[0] += -203.67379945157032; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 154))) { + if (UNLIKELY(false || (data[4].qvalue <= 36))) { + if (UNLIKELY(false || (data[0].qvalue <= 12))) { + result[0] += -126.5718042782856; + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 111.5057760402395; + } else { + result[0] += -28.4538638965687; + } + } else { + result[0] += -313.9852425571987; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 142))) { + if (UNLIKELY(false || (data[9].qvalue <= 48))) { + result[0] += -159.27654309724835; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 54))) { + result[0] += -114.2328146382158; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 18))) { + result[0] += -169.53442010531597; + } else { + result[0] += -51.819614520602755; + } + } + } + } else { + result[0] += -237.49538829330947; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (UNLIKELY(false || (data[7].qvalue <= 16))) { + if (LIKELY(false || (data[5].qvalue <= 46))) { + if (LIKELY(false || (data[0].qvalue <= 290))) { + if (UNLIKELY(false || (data[7].qvalue <= 2))) { + result[0] += 287.9491607426837; + } else { + result[0] += 22.668345828570306; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 0))) { + result[0] += -77.53473409498233; + } else { + result[0] += 204.6386641118168; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 16))) { + result[0] += 593.7874627921306; + } else { + result[0] += 312.8524644986319; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 42))) { + if (UNLIKELY(false || (data[6].qvalue <= 24))) { + if (LIKELY(false || (data[2].qvalue <= 122))) { + result[0] += -142.83280406635228; + } else { + result[0] += -693.2501230727516; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 122))) { + result[0] += 37.74683956549833; + } else { + result[0] += -198.748856011046; + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 108))) { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + result[0] += -89.22275972296083; + } else { + result[0] += 86.32327653910806; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 442))) { + result[0] += -55.02130370736227; + } else { + result[0] += 177.35435049468094; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 432))) { + if (UNLIKELY(false || (data[2].qvalue <= 74))) { + result[0] += -882.5317237443489; + } else { + result[0] += -481.30010328266326; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 132))) { + if (LIKELY(false || (data[0].qvalue <= 450))) { + if (UNLIKELY(false || (data[7].qvalue <= 198))) { + result[0] += 647.320083633933; + } else { + result[0] += 38.11111140878454; + } + } else { + result[0] += 745.2928537341078; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -789.9197396160231; + } else { + if (UNLIKELY(false || (data[8].qvalue <= 144))) { + result[0] += -357.5527732739339; + } else { + result[0] += 102.50578713894222; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 232))) { + if (LIKELY(false || (data[6].qvalue <= 116))) { + if (UNLIKELY(false || (data[0].qvalue <= 62))) { + result[0] += -81.42467173085487; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 16))) { + if (LIKELY(false || (data[5].qvalue <= 48))) { + result[0] += 9.502633625283542; + } else { + result[0] += 279.42600545633803; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 28))) { + if (UNLIKELY(false || (data[6].qvalue <= 18))) { + result[0] += -361.821022373354; + } else { + result[0] += -88.28610792754384; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 120))) { + result[0] += -20.102121037240444; + } else { + result[0] += -420.12674867604005; + } + } + } + } + } else { + result[0] += -159.62279487679336; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 38))) { + if (LIKELY(false || (data[10].qvalue <= 124))) { + if (UNLIKELY(false || (data[6].qvalue <= 10))) { + if (LIKELY(false || (data[7].qvalue <= 18))) { + result[0] += 88.01385578444962; + } else { + if (UNLIKELY(false || (data[6].qvalue <= 4))) { + result[0] += 493.08930447766016; + } else { + result[0] += -285.19813158141295; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 82))) { + result[0] += 106.27335201243972; + } else { + result[0] += 226.93623921451922; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 50))) { + if (LIKELY(false || (data[0].qvalue <= 412))) { + result[0] += -1343.712162589175; + } else { + result[0] += 132.58954443144634; + } + } else { + result[0] += 91.6622701261032; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 44))) { + if (LIKELY(false || (data[3].qvalue <= 38))) { + if (LIKELY(false || (data[4].qvalue <= 84))) { + if (LIKELY(false || (data[0].qvalue <= 368))) { + result[0] += -236.33002335788538; + } else { + result[0] += 20.986096347583267; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 128))) { + result[0] += 344.9152673102923; + } else { + result[0] += -105.45712817788554; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 328.6203717386541; + } else { + if (LIKELY(false || (data[0].qvalue <= 448))) { + result[0] += -650.199452548778; + } else { + result[0] += -47.9905026661036; + } + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 132))) { + if (LIKELY(false || (data[0].qvalue <= 366))) { + if (LIKELY(false || (data[2].qvalue <= 180))) { + result[0] += 42.03195219241939; + } else { + result[0] += -158.93413279533422; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 74))) { + result[0] += -34.071317241183344; + } else { + result[0] += 156.0271506153512; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 440))) { + if (UNLIKELY(false || (data[6].qvalue <= 102))) { + result[0] += 133.87095453891186; + } else { + result[0] += -228.7502100788784; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 108))) { + result[0] += 347.50113932020776; + } else { + result[0] += 14.191860694706207; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 230))) { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + if (LIKELY(false || (data[2].qvalue <= 122))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + result[0] += 177.32409766602134; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 10))) { + result[0] += -222.24918504973667; + } else { + result[0] += -35.30855443997724; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 4))) { + result[0] += -176.9665709227076; + } else { + result[0] += -753.9332796594691; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 116))) { + result[0] += -28.342368225619726; + } else { + if (UNLIKELY(false || (data[4].qvalue <= 8))) { + result[0] += 421.668098564295; + } else { + if (LIKELY(false || (data[3].qvalue <= 70))) { + result[0] += 43.5107313173905; + } else { + result[0] += 227.90844897767985; + } + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + result[0] += -57.17411824360336; + } else { + result[0] += -189.687353931762; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 202))) { + if (UNLIKELY(false || (data[3].qvalue <= 0))) { + if (LIKELY(false || (data[4].qvalue <= 54))) { + if (LIKELY(false || (data[9].qvalue <= 156))) { + result[0] += 143.81130747694493; + } else { + if (LIKELY(false || (data[0].qvalue <= 378))) { + result[0] += -412.9308129291959; + } else { + result[0] += 174.61191720224775; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 422))) { + if (LIKELY(false || (data[9].qvalue <= 88))) { + result[0] += -729.9085027051653; + } else { + result[0] += -1582.3957851064256; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 82))) { + result[0] += 255.17536551772466; + } else { + result[0] += -411.7747098985702; + } + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 0))) { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -687.4964190637061; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 156))) { + result[0] += 579.2665867229997; + } else { + result[0] += -202.15282940470115; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 378))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + result[0] += 87.73491261546084; + } else { + result[0] += -32.02969863953809; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 166))) { + result[0] += 64.28717265542389; + } else { + result[0] += 229.17843828172778; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[6].qvalue <= 176))) { + if (LIKELY(false || (data[0].qvalue <= 442))) { + if (UNLIKELY(false || (data[10].qvalue <= 116))) { + result[0] += -44.67740033036628; + } else { + result[0] += -394.97654476097733; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 222))) { + result[0] += 206.95175345062236; + } else { + result[0] += -422.41503848096585; + } + } + } else { + result[0] += -637.136586228852; + } + } else { + result[0] += 376.9378106237276; + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 162))) { + if (LIKELY(false || (data[1].qvalue <= 64))) { + if (UNLIKELY(false || (data[0].qvalue <= 4))) { + result[0] += -145.63790783710485; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 92.69381886875624; + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + result[0] += -29.52565796333189; + } else { + result[0] += -264.4833581843737; + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 80))) { + result[0] += -65.00862983366554; + } else { + result[0] += -128.3574858335414; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (LIKELY(false || (data[10].qvalue <= 116))) { + if (LIKELY(false || (data[0].qvalue <= 334))) { + if (LIKELY(false || (data[7].qvalue <= 44))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + result[0] += 2.638363742314313; + } else { + result[0] += 90.02482928313995; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 46))) { + result[0] += -159.46662656973402; + } else { + result[0] += -6.479090823017612; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + if (LIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -181.28880117455623; + } else { + result[0] += 348.385125441513; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 8))) { + result[0] += -95.60162541279402; + } else { + result[0] += 108.90915732052065; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 448))) { + if (LIKELY(false || (data[6].qvalue <= 104))) { + if (UNLIKELY(false || (data[8].qvalue <= 92))) { + result[0] += -264.2515788707746; + } else { + result[0] += 101.55470547812884; + } + } else { + if (LIKELY(false || (data[8].qvalue <= 148))) { + result[0] += -178.22322886037145; + } else { + result[0] += -572.0017383471462; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 176))) { + if (LIKELY(false || (data[2].qvalue <= 216))) { + result[0] += 360.0600154694514; + } else { + result[0] += -2.277701777775852; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -592.4446434254904; + } else { + result[0] += 311.89032842128967; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 434))) { + if (LIKELY(false || (data[1].qvalue <= 98))) { + if (LIKELY(false || (data[3].qvalue <= 170))) { + result[0] += -269.5216917245177; + } else { + result[0] += -763.2129798350616; + } + } else { + result[0] += -630.6861669974678; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 132))) { + if (LIKELY(false || (data[0].qvalue <= 452))) { + if (UNLIKELY(false || (data[1].qvalue <= 98))) { + result[0] += 491.16969306572764; + } else { + result[0] += -1.8378346933226348; + } + } else { + result[0] += 735.4661909012816; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -714.8393720262166; + } else { + if (LIKELY(false || (data[9].qvalue <= 124))) { + result[0] += -185.85739524553466; + } else { + result[0] += 534.3912650447504; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 234))) { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + if (LIKELY(false || (data[2].qvalue <= 122))) { + if (LIKELY(false || (data[7].qvalue <= 34))) { + result[0] += -17.516199145518204; + } else { + if (LIKELY(false || (data[5].qvalue <= 40))) { + result[0] += -106.45476753502933; + } else { + result[0] += -903.8252212915366; + } + } + } else { + result[0] += -429.58429904156355; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 108))) { + result[0] += -24.64488868029451; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 6))) { + result[0] += 442.263546116001; + } else { + result[0] += 55.12907189754925; + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + result[0] += -46.866944249531215; + } else { + result[0] += -159.04948180790302; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (UNLIKELY(false || (data[3].qvalue <= 0))) { + if (LIKELY(false || (data[7].qvalue <= 110))) { + result[0] += -7.415599980597751; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 420))) { + if (LIKELY(false || (data[2].qvalue <= 196))) { + result[0] += -1399.7966503370856; + } else { + result[0] += -610.3226946773707; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 96))) { + result[0] += 354.8632074839545; + } else { + result[0] += -377.08280725284146; + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 12))) { + if (LIKELY(false || (data[3].qvalue <= 54))) { + if (LIKELY(false || (data[0].qvalue <= 312))) { + result[0] += 78.41314807466219; + } else { + result[0] += 229.81473981996055; + } + } else { + result[0] += 656.2836431782829; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 370))) { + if (UNLIKELY(false || (data[6].qvalue <= 62))) { + result[0] += 59.2010780966252; + } else { + result[0] += -33.600013356306874; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + result[0] += -40.555513916002155; + } else { + result[0] += 98.08389359507521; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 412))) { + if (UNLIKELY(false || (data[8].qvalue <= 136))) { + if (LIKELY(false || (data[8].qvalue <= 122))) { + result[0] += -319.74524116929115; + } else { + result[0] += -767.7796729203433; + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 82))) { + result[0] += 122.08054350335395; + } else { + result[0] += -301.9722820682188; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 114))) { + if (LIKELY(false || (data[7].qvalue <= 198))) { + result[0] += 313.9124097005065; + } else { + result[0] += -8.79354690498097; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[6].qvalue <= 180))) { + result[0] += -72.7211246346664; + } else { + result[0] += -510.6446546492408; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 188))) { + result[0] += 560.0836128367328; + } else { + result[0] += 17.985013800397187; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 148))) { + if (UNLIKELY(false || (data[4].qvalue <= 36))) { + if (UNLIKELY(false || (data[0].qvalue <= 2))) { + result[0] += -141.96709416955443; + } else { + result[0] += -15.705324092558222; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 152))) { + result[0] += -61.382323220403975; + } else { + result[0] += -187.5709723722473; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (LIKELY(false || (data[0].qvalue <= 352))) { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + if (LIKELY(false || (data[7].qvalue <= 16))) { + result[0] += 44.92602753182352; + } else { + result[0] += -211.1546710682142; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 70))) { + result[0] += 84.82973193387151; + } else { + result[0] += 297.5136117027242; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 54))) { + if (LIKELY(false || (data[8].qvalue <= 80))) { + result[0] += 17.890869053801463; + } else { + result[0] += -233.23923140903472; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 42))) { + result[0] += -115.18248063786203; + } else { + result[0] += 19.654733381799502; + } + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 38))) { + if (UNLIKELY(false || (data[8].qvalue <= 48))) { + if (UNLIKELY(false || (data[6].qvalue <= 108))) { + result[0] += 73.48571042663086; + } else { + result[0] += -228.89571182954265; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 412))) { + result[0] += -172.0892135464257; + } else { + result[0] += 115.89684884981358; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 74))) { + if (UNLIKELY(false || (data[2].qvalue <= 2))) { + result[0] += -192.70183326405345; + } else { + result[0] += 228.22523400305982; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 164))) { + result[0] += -14.495336023506457; + } else { + result[0] += 164.58338032467216; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 418))) { + if (UNLIKELY(false || (data[6].qvalue <= 110))) { + result[0] += 92.44304518392212; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 142))) { + result[0] += -549.5535909490123; + } else { + result[0] += -283.7280102420611; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 170))) { + if (UNLIKELY(false || (data[3].qvalue <= 110))) { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -751.8919251402435; + } else { + result[0] += 185.12085880810656; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 442))) { + result[0] += 106.48103521611722; + } else { + result[0] += 402.22669875787847; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 460))) { + if (UNLIKELY(false || (data[4].qvalue <= 14))) { + result[0] += -36.04581623793381; + } else { + result[0] += -576.7489848172199; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 204))) { + result[0] += 811.3997608577275; + } else { + result[0] += 54.07893988355018; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 190))) { + if (LIKELY(false || (data[1].qvalue <= 118))) { + if (UNLIKELY(false || (data[0].qvalue <= 40))) { + result[0] += -66.6266648878957; + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (UNLIKELY(false || (data[7].qvalue <= 0))) { + result[0] += 218.52501238124137; + } else { + if (LIKELY(false || (data[7].qvalue <= 184))) { + result[0] += -13.543587342327356; + } else { + result[0] += -215.10060994815572; + } + } + } else { + result[0] += -235.9788695560659; + } + } + } else { + result[0] += -124.00456972107527; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (LIKELY(false || (data[10].qvalue <= 122))) { + if (UNLIKELY(false || (data[10].qvalue <= 30))) { + if (LIKELY(false || (data[0].qvalue <= 462))) { + if (LIKELY(false || (data[5].qvalue <= 102))) { + result[0] += -18.111064688282102; + } else { + result[0] += -830.4402494030709; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 114))) { + result[0] += 549.3375303639418; + } else { + result[0] += -471.1944868822796; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 38))) { + if (LIKELY(false || (data[0].qvalue <= 306))) { + result[0] += 58.15499047997176; + } else { + result[0] += 202.68651234520863; + } + } else { + if (LIKELY(false || (data[5].qvalue <= 84))) { + result[0] += 5.292449943571185; + } else { + result[0] += 88.29265314882701; + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 136))) { + if (LIKELY(false || (data[0].qvalue <= 414))) { + if (LIKELY(false || (data[1].qvalue <= 128))) { + result[0] += -1380.2032030523546; + } else { + result[0] += -290.5204067553985; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 428))) { + result[0] += -114.53892152149231; + } else { + result[0] += 514.5293420314608; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 58))) { + if (UNLIKELY(false || (data[4].qvalue <= 48))) { + result[0] += -61.89760896238173; + } else { + result[0] += 238.5724533454146; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 440))) { + result[0] += -129.87454115256398; + } else { + result[0] += 80.61545671612187; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 436))) { + if (UNLIKELY(false || (data[0].qvalue <= 346))) { + result[0] += -205.98214313987833; + } else { + result[0] += -501.7217730494733; + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 94))) { + if (UNLIKELY(false || (data[9].qvalue <= 74))) { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -1647.808151855469; + } else { + result[0] += -111.63345227808891; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 448))) { + result[0] += 135.65529729266765; + } else { + result[0] += 542.5839931646244; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -605.3494305872034; + } else { + if (LIKELY(false || (data[7].qvalue <= 200))) { + result[0] += 23.37492749029042; + } else { + result[0] += -504.33767145881876; + } + } + } + } + } + } + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + if (LIKELY(false || (data[3].qvalue <= 134))) { + if (UNLIKELY(false || (data[8].qvalue <= 6))) { + if (UNLIKELY(false || (data[2].qvalue <= 28))) { + result[0] += 147.6732444791957; + } else { + if (LIKELY(false || (data[2].qvalue <= 46))) { + result[0] += -10.796001760756946; + } else { + result[0] += 119.76151703653773; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 16))) { + result[0] += -80.64564777701082; + } else { + result[0] += -137.81918275187488; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 188))) { + result[0] += -301.13288115967265; + } else { + result[0] += -677.8347078414617; + } + } + } else { + if (LIKELY(false || (data[3].qvalue <= 102))) { + if (LIKELY(false || (data[7].qvalue <= 48))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + if (LIKELY(false || (data[1].qvalue <= 66))) { + result[0] += 177.5420680296681; + } else { + result[0] += -218.59350937699685; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 20))) { + result[0] += -72.74967625197816; + } else { + result[0] += 20.742745610897746; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 84))) { + if (LIKELY(false || (data[4].qvalue <= 78))) { + result[0] += -90.48006952525736; + } else { + result[0] += -828.7521117629706; + } + } else { + if (LIKELY(false || (data[5].qvalue <= 66))) { + result[0] += 120.02128594767765; + } else { + result[0] += -101.64457682331833; + } + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 32))) { + if (UNLIKELY(false || (data[8].qvalue <= 48))) { + if (LIKELY(false || (data[8].qvalue <= 24))) { + result[0] += -29.977235441363906; + } else { + result[0] += -517.2676348797457; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 138))) { + result[0] += 79.82233930374132; + } else { + result[0] += -65.66862801060336; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 176))) { + if (UNLIKELY(false || (data[2].qvalue <= 4))) { + result[0] += -246.17761450724754; + } else { + result[0] += 128.63335245716783; + } + } else { + if (LIKELY(false || (data[10].qvalue <= 134))) { + result[0] += 25.606045866379198; + } else { + result[0] += -125.59366437898994; + } + } + } + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 32))) { + result[0] += -1022.6641412510904; + } else { + if (LIKELY(false || (data[7].qvalue <= 200))) { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 256.4779043789177; + } else { + if (LIKELY(false || (data[1].qvalue <= 126))) { + if (LIKELY(false || (data[4].qvalue <= 68))) { + result[0] += -87.1798391962891; + } else { + result[0] += 43.00407443678074; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 62))) { + result[0] += -130.4952039313895; + } else { + result[0] += -240.59828244124444; + } + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 164))) { + result[0] += -424.70860218767706; + } else { + result[0] += -596.5247694936194; + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 260))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + if (UNLIKELY(false || (data[8].qvalue <= 2))) { + result[0] += 174.72777599410847; + } else { + result[0] += 11.536037972850965; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 6))) { + if (LIKELY(false || (data[9].qvalue <= 96))) { + if (LIKELY(false || (data[7].qvalue <= 72))) { + if (LIKELY(false || (data[0].qvalue <= 132))) { + result[0] += -134.39734952576543; + } else { + result[0] += -367.3809402303026; + } + } else { + result[0] += 85.4518440435695; + } + } else { + result[0] += 30.478639892590373; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 124))) { + if (UNLIKELY(false || (data[0].qvalue <= 68))) { + result[0] += -50.893812931197466; + } else { + if (LIKELY(false || (data[2].qvalue <= 200))) { + result[0] += -4.69601116410534; + } else { + result[0] += -265.1217064966412; + } + } + } else { + result[0] += -104.769463503831; + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 64))) { + if (UNLIKELY(false || (data[9].qvalue <= 36))) { + result[0] += 428.87545627753326; + } else { + if (UNLIKELY(false || (data[9].qvalue <= 38))) { + if (UNLIKELY(false || (data[0].qvalue <= 442))) { + result[0] += -1773.848510347332; + } else { + result[0] += 55.659715716371515; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 359.22617617244686; + } else { + if (UNLIKELY(false || (data[8].qvalue <= 20))) { + result[0] += -44.87315510911888; + } else { + result[0] += 74.38569188031644; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 382))) { + if (UNLIKELY(false || (data[2].qvalue <= 140))) { + if (UNLIKELY(false || (data[3].qvalue <= 12))) { + if (UNLIKELY(false || (data[10].qvalue <= 40))) { + result[0] += -1410.981056764837; + } else { + result[0] += -637.2921393197482; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 56))) { + result[0] += -70.36055469147344; + } else { + result[0] += 175.7175261222059; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 146))) { + result[0] += -613.9698554996957; + } else { + if (LIKELY(false || (data[8].qvalue <= 154))) { + result[0] += -92.27993947562106; + } else { + result[0] += -383.51208633267225; + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 88))) { + if (UNLIKELY(false || (data[7].qvalue <= 80))) { + if (UNLIKELY(false || (data[2].qvalue <= 64))) { + result[0] += -11.98521625723796; + } else { + result[0] += -714.7673310356126; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 96))) { + result[0] += 284.6663603250349; + } else { + result[0] += -61.60569809902938; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 40))) { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -1260.7217681991474; + } else { + result[0] += 135.29439352456583; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 114))) { + result[0] += 204.2658499096425; + } else { + result[0] += 26.57998407789056; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 278))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[1].qvalue <= 10))) { + result[0] += 88.29835564376643; + } else { + result[0] += 433.48530065382903; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 158))) { + if (UNLIKELY(false || (data[4].qvalue <= 6))) { + if (UNLIKELY(false || (data[9].qvalue <= 134))) { + result[0] += 355.11754045683864; + } else { + result[0] += 50.689614734965716; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 10))) { + if (UNLIKELY(false || (data[9].qvalue <= 142))) { + result[0] += -67.45276443587568; + } else { + result[0] += -258.89458735449244; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 150))) { + result[0] += -21.308012427711205; + } else { + result[0] += 244.9050850901425; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 166))) { + result[0] += -159.91554699358346; + } else { + result[0] += -467.3807915108788; + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (LIKELY(false || (data[0].qvalue <= 460))) { + if (LIKELY(false || (data[2].qvalue <= 214))) { + if (UNLIKELY(false || (data[9].qvalue <= 2))) { + if (UNLIKELY(false || (data[3].qvalue <= 98))) { + result[0] += -888.854216500035; + } else { + result[0] += -60.23569984287638; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 10))) { + result[0] += -107.7332013111863; + } else { + result[0] += 44.24648581216298; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 32))) { + if (LIKELY(false || (data[0].qvalue <= 446))) { + result[0] += -226.15311381622098; + } else { + result[0] += 710.1847247221976; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -521.4681372500785; + } else { + result[0] += -121.81009418188125; + } + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 156))) { + if (LIKELY(false || (data[4].qvalue <= 108))) { + if (UNLIKELY(false || (data[9].qvalue <= 36))) { + result[0] += 578.8864769214038; + } else { + result[0] += -65.43432145431503; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -1140.9993698987162; + } else { + result[0] += -277.1047243415079; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 124))) { + if (LIKELY(false || (data[2].qvalue <= 218))) { + result[0] += 625.8667118148287; + } else { + result[0] += 133.56293016701576; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -257.6768725464037; + } else { + result[0] += 235.27089761951143; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (UNLIKELY(false || (data[4].qvalue <= 88))) { + if (LIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -510.7544463506701; + } else { + result[0] += 309.52861873156445; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 186))) { + result[0] += -475.554284048819; + } else { + result[0] += -931.2955241523669; + } + } + } else { + result[0] += 204.12262051329452; + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 148))) { + if (LIKELY(false || (data[1].qvalue <= 60))) { + if (UNLIKELY(false || (data[0].qvalue <= 4))) { + result[0] += -105.41033083691937; + } else { + if (LIKELY(false || (data[7].qvalue <= 118))) { + result[0] += -9.957538169716495; + } else { + result[0] += -116.44279176689716; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 80))) { + if (LIKELY(false || (data[7].qvalue <= 138))) { + result[0] += -36.47666931987333; + } else { + result[0] += -181.90346126434108; + } + } else { + result[0] += -92.3026154859271; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (LIKELY(false || (data[0].qvalue <= 380))) { + if (UNLIKELY(false || (data[9].qvalue <= 54))) { + if (UNLIKELY(false || (data[1].qvalue <= 34))) { + result[0] += 635.4266731901135; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 56))) { + result[0] += -956.053733984008; + } else { + result[0] += -104.63367772570996; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 132))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + result[0] += -20.173896617001866; + } else { + result[0] += 65.25040747317426; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 136))) { + result[0] += -570.0499875540565; + } else { + result[0] += -4.395243842020363; + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 96))) { + if (LIKELY(false || (data[2].qvalue <= 150))) { + if (UNLIKELY(false || (data[7].qvalue <= 56))) { + result[0] += 82.3347657422682; + } else { + result[0] += -151.85939690566192; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 202))) { + result[0] += 306.7932008675081; + } else { + result[0] += -193.98901427646175; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 30))) { + if (UNLIKELY(false || (data[10].qvalue <= 80))) { + result[0] += -101.31894339788478; + } else { + result[0] += 57.66783954209549; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 160))) { + result[0] += 304.3481193163534; + } else { + result[0] += 101.37427473767308; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 438))) { + if (LIKELY(false || (data[1].qvalue <= 98))) { + if (LIKELY(false || (data[3].qvalue <= 170))) { + result[0] += -173.79619958480487; + } else { + result[0] += -611.7431741530887; + } + } else { + result[0] += -479.5994428796921; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 174))) { + if (LIKELY(false || (data[0].qvalue <= 452))) { + if (LIKELY(false || (data[2].qvalue <= 194))) { + result[0] += 205.09978107028988; + } else { + result[0] += -552.4708829680587; + } + } else { + result[0] += 465.99445615301477; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 466))) { + if (UNLIKELY(false || (data[6].qvalue <= 180))) { + result[0] += -38.14040161366896; + } else { + result[0] += -654.1612824926979; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 164))) { + result[0] += 89.03239010969071; + } else { + result[0] += -334.95950567599243; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 324))) { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + if (LIKELY(false || (data[3].qvalue <= 70))) { + if (LIKELY(false || (data[3].qvalue <= 66))) { + if (LIKELY(false || (data[3].qvalue <= 34))) { + if (LIKELY(false || (data[2].qvalue <= 122))) { + result[0] += -18.433723366725584; + } else { + result[0] += -472.18904133491907; + } + } else { + result[0] += 41.144657963793065; + } + } else { + if (LIKELY(false || (data[10].qvalue <= 126))) { + if (UNLIKELY(false || (data[2].qvalue <= 70))) { + result[0] += -791.0825554146527; + } else { + result[0] += -77.87558566467489; + } + } else { + result[0] += -1273.8300701538087; + } + } + } else { + result[0] += 162.91309292890534; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + result[0] += -374.78034701138165; + } else { + result[0] += -30.23314802748434; + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 116))) { + if (LIKELY(false || (data[1].qvalue <= 152))) { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + if (LIKELY(false || (data[0].qvalue <= 458))) { + if (LIKELY(false || (data[6].qvalue <= 82))) { + result[0] += 19.475576346353392; + } else { + result[0] += -352.9733362293148; + } + } else { + result[0] += 349.1018486741264; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 28))) { + if (LIKELY(false || (data[0].qvalue <= 416))) { + result[0] += 212.94808346197698; + } else { + result[0] += -374.8210809697834; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 46))) { + result[0] += -34.43423741643343; + } else { + result[0] += 75.34901971114162; + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 312.18232536892253; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 454))) { + result[0] += -380.2729414924723; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 74))) { + result[0] += 384.2278192657615; + } else { + result[0] += -48.08512286278479; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 448))) { + if (LIKELY(false || (data[6].qvalue <= 124))) { + if (UNLIKELY(false || (data[2].qvalue <= 148))) { + result[0] += -339.175501983996; + } else { + result[0] += 73.82088972486432; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 122))) { + if (UNLIKELY(false || (data[3].qvalue <= 114))) { + result[0] += -690.6636434137862; + } else { + result[0] += -233.4604631893612; + } + } else { + result[0] += 61.13592985094737; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 176))) { + if (LIKELY(false || (data[2].qvalue <= 216))) { + if (LIKELY(false || (data[0].qvalue <= 466))) { + result[0] += 336.27393994775974; + } else { + result[0] += -1244.2348096296523; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -288.7929275242086; + } else { + result[0] += 303.1087488248426; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -522.5254243038571; + } else { + result[0] += 147.10931954259047; + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 150))) { + if (UNLIKELY(false || (data[0].qvalue <= 2))) { + result[0] += -114.0935731263952; + } else { + if (LIKELY(false || (data[1].qvalue <= 102))) { + if (LIKELY(false || (data[4].qvalue <= 36))) { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (LIKELY(false || (data[5].qvalue <= 42))) { + result[0] += -18.695361121395486; + } else { + result[0] += 10.588982002368144; + } + } else { + result[0] += -172.85563078964458; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 30))) { + if (LIKELY(false || (data[10].qvalue <= 22))) { + result[0] += -51.938265058110325; + } else { + result[0] += -246.80507747694102; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 94))) { + result[0] += -27.959993628946236; + } else { + result[0] += 73.25664987336096; + } + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 154))) { + if (UNLIKELY(false || (data[8].qvalue <= 66))) { + if (UNLIKELY(false || (data[9].qvalue <= 34))) { + result[0] += -101.97494811248283; + } else { + result[0] += 17.881559562839353; + } + } else { + result[0] += -100.47614812809579; + } + } else { + result[0] += -213.33125633625428; + } + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[1].qvalue <= 10))) { + if (UNLIKELY(false || (data[2].qvalue <= 24))) { + if (LIKELY(false || (data[0].qvalue <= 222))) { + result[0] += 434.3221282795032; + } else { + result[0] += 823.0770567103796; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 258))) { + if (LIKELY(false || (data[2].qvalue <= 128))) { + result[0] += 87.83445346165345; + } else { + result[0] += -212.4695657744424; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 128))) { + result[0] += 464.3977186968871; + } else { + result[0] += 163.9938488166673; + } + } + } + } else { + result[0] += 637.6504024717447; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 154))) { + if (UNLIKELY(false || (data[8].qvalue <= 0))) { + if (LIKELY(false || (data[0].qvalue <= 406))) { + if (LIKELY(false || (data[9].qvalue <= 90))) { + result[0] += 282.83342280184934; + } else { + result[0] += 120.66910156313747; + } + } else { + result[0] += 1028.390014143319; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + if (LIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -292.64750935522983; + } else { + result[0] += 696.0738841280834; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + result[0] += -98.38508109993742; + } else { + result[0] += 20.07618047102507; + } + } + } + } else { + if (LIKELY(false || (data[3].qvalue <= 20))) { + if (LIKELY(false || (data[0].qvalue <= 384))) { + if (UNLIKELY(false || (data[1].qvalue <= 14))) { + result[0] += -153.79848410394547; + } else { + result[0] += -491.2192559340425; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 430))) { + result[0] += 170.48551507912254; + } else { + result[0] += 726.1771275054432; + } + } + } else { + result[0] += 582.2067601142297; + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 384))) { + if (LIKELY(false || (data[6].qvalue <= 144))) { + if (LIKELY(false || (data[4].qvalue <= 120))) { + if (LIKELY(false || (data[8].qvalue <= 138))) { + if (LIKELY(false || (data[3].qvalue <= 122))) { + if (LIKELY(false || (data[6].qvalue <= 78))) { + result[0] += 6.611296900045464; + } else { + result[0] += -146.7747758716442; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 106))) { + result[0] += 117.39498296773701; + } else { + result[0] += -52.139056254522906; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 64))) { + result[0] += -786.3744050601753; + } else { + result[0] += -75.75876514356197; + } + } + } else { + result[0] += -251.69019507498334; + } + } else { + result[0] += -159.81350735348502; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + if (UNLIKELY(false || (data[0].qvalue <= 450))) { + if (LIKELY(false || (data[3].qvalue <= 168))) { + if (UNLIKELY(false || (data[8].qvalue <= 78))) { + result[0] += -655.1579692026768; + } else { + result[0] += -234.92840903417084; + } + } else { + result[0] += 129.69681018242764; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 174))) { + if (LIKELY(false || (data[7].qvalue <= 174))) { + if (UNLIKELY(false || (data[8].qvalue <= 54))) { + result[0] += -139.81070363809008; + } else { + result[0] += 236.49651798822757; + } + } else { + result[0] += 997.2638005655676; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 468))) { + result[0] += -440.5340347804863; + } else { + result[0] += 133.34746925104315; + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 96))) { + if (LIKELY(false || (data[3].qvalue <= 76))) { + if (UNLIKELY(false || (data[8].qvalue <= 80))) { + if (UNLIKELY(false || (data[9].qvalue <= 16))) { + result[0] += 953.3985019502752; + } else { + result[0] += 115.36154895354196; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 68))) { + result[0] += -75.90121598789128; + } else { + result[0] += 321.1318897275337; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 78))) { + if (LIKELY(false || (data[0].qvalue <= 442))) { + result[0] += -896.7922993531249; + } else { + result[0] += -192.64263220387716; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 82))) { + result[0] += -157.71189744728704; + } else { + result[0] += 388.1757773943014; + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 128))) { + if (LIKELY(false || (data[0].qvalue <= 426))) { + if (LIKELY(false || (data[6].qvalue <= 140))) { + result[0] += 239.69457507298395; + } else { + result[0] += -339.18339092719594; + } + } else { + result[0] += 440.89438255994565; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 48))) { + if (LIKELY(false || (data[8].qvalue <= 28))) { + result[0] += 94.78432124431386; + } else { + result[0] += -585.5090672534587; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 118))) { + result[0] += 261.6165504519048; + } else { + result[0] += -0.9669883257810821; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 130))) { + if (UNLIKELY(false || (data[0].qvalue <= 6))) { + result[0] += -85.38452227119578; + } else { + if (LIKELY(false || (data[1].qvalue <= 60))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + result[0] += 71.39641555324053; + } else { + result[0] += -12.679337084314707; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 70))) { + if (UNLIKELY(false || (data[9].qvalue <= 46))) { + result[0] += -328.3515745861993; + } else { + result[0] += -24.65068942389365; + } + } else { + result[0] += -65.54687009952785; + } + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 122))) { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[1].qvalue <= 10))) { + if (UNLIKELY(false || (data[2].qvalue <= 24))) { + result[0] += 448.8186025793332; + } else { + if (LIKELY(false || (data[0].qvalue <= 262))) { + result[0] += 28.507570304853807; + } else { + result[0] += 334.06403364310586; + } + } + } else { + result[0] += 565.4514138078015; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 154))) { + if (LIKELY(false || (data[9].qvalue <= 148))) { + if (UNLIKELY(false || (data[1].qvalue <= 16))) { + result[0] += -96.69821678256282; + } else { + result[0] += 21.631660184261378; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 204))) { + result[0] += 354.2452862416222; + } else { + result[0] += -51.61557505903728; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 12))) { + if (LIKELY(false || (data[0].qvalue <= 372))) { + result[0] += -328.0629299855443; + } else { + result[0] += 135.76618575654695; + } + } else { + result[0] += 497.32398174001605; + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 136))) { + if (LIKELY(false || (data[0].qvalue <= 416))) { + if (UNLIKELY(false || (data[2].qvalue <= 134))) { + if (UNLIKELY(false || (data[0].qvalue <= 354))) { + result[0] += -15.704885988027627; + } else { + result[0] += -432.2700366601563; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 398))) { + result[0] += -1249.1625247677366; + } else { + result[0] += -669.3611102779328; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 428))) { + result[0] += -58.35712345907448; + } else { + if (LIKELY(false || (data[0].qvalue <= 438))) { + result[0] += 338.03248915141415; + } else { + result[0] += 777.2208330972686; + } + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 84))) { + if (LIKELY(false || (data[0].qvalue <= 426))) { + if (LIKELY(false || (data[1].qvalue <= 154))) { + result[0] += 120.71513738942753; + } else { + result[0] += -308.17490477956534; + } + } else { + result[0] += 833.731541937934; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 438))) { + if (UNLIKELY(false || (data[9].qvalue <= 6))) { + result[0] += 149.7928136109008; + } else { + result[0] += -223.98734671939062; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 152))) { + result[0] += 365.23772234337326; + } else { + result[0] += -24.192037541431123; + } + } + } + } + } + } + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (LIKELY(false || (data[0].qvalue <= 388))) { + if (LIKELY(false || (data[1].qvalue <= 124))) { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (LIKELY(false || (data[3].qvalue <= 122))) { + if (LIKELY(false || (data[8].qvalue <= 116))) { + result[0] += 4.536955058087513; + } else { + result[0] += -72.38105398500589; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + result[0] += -344.4937340104233; + } else { + result[0] += 54.04370832477616; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 352))) { + result[0] += -155.8470275480651; + } else { + result[0] += -517.2572856835848; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 64))) { + if (LIKELY(false || (data[8].qvalue <= 64))) { + if (UNLIKELY(false || (data[3].qvalue <= 62))) { + result[0] += 140.37617439343134; + } else { + result[0] += -55.112467004567236; + } + } else { + result[0] += 774.980876755253; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 346))) { + if (UNLIKELY(false || (data[8].qvalue <= 38))) { + result[0] += -397.1965385921262; + } else { + result[0] += -92.74338066885616; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 36))) { + result[0] += 475.3460832912639; + } else { + result[0] += -489.2890878250337; + } + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 96))) { + if (UNLIKELY(false || (data[8].qvalue <= 10))) { + result[0] += 386.3622072624303; + } else { + if (LIKELY(false || (data[2].qvalue <= 150))) { + if (LIKELY(false || (data[0].qvalue <= 452))) { + result[0] += -138.2046386901946; + } else { + result[0] += 109.19811106405169; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 202))) { + result[0] += 279.484508613883; + } else { + result[0] += -160.50141487078906; + } + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 30))) { + if (UNLIKELY(false || (data[8].qvalue <= 48))) { + if (LIKELY(false || (data[2].qvalue <= 46))) { + result[0] += 96.50006530829478; + } else { + result[0] += -311.4323024052799; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 114))) { + result[0] += 221.35954689453885; + } else { + result[0] += -11.21222614010631; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 12))) { + if (LIKELY(false || (data[0].qvalue <= 462))) { + result[0] += -250.44082389008545; + } else { + result[0] += 259.77584765088653; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 86))) { + result[0] += 361.4943477960914; + } else { + result[0] += 91.34722785277711; + } + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 436))) { + if (UNLIKELY(false || (data[0].qvalue <= 350))) { + result[0] += -160.4323829658149; + } else { + result[0] += -425.59656420877354; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 146))) { + result[0] += 180.15448233040198; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -433.82026787118116; + } else { + result[0] += -59.26399277285995; + } + } + } + } + if (LIKELY(false || (data[7].qvalue <= 178))) { + if (LIKELY(false || (data[0].qvalue <= 460))) { + if (UNLIKELY(false || (data[9].qvalue <= 2))) { + if (LIKELY(false || (data[7].qvalue <= 152))) { + if (LIKELY(false || (data[0].qvalue <= 442))) { + result[0] += -308.1244882642975; + } else { + if (LIKELY(false || (data[1].qvalue <= 154))) { + result[0] += 509.7187192299381; + } else { + result[0] += -455.2150339355469; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 428))) { + result[0] += -189.49508286587985; + } else { + result[0] += -760.2093864229564; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 212))) { + if (LIKELY(false || (data[7].qvalue <= 166))) { + if (LIKELY(false || (data[7].qvalue <= 164))) { + result[0] += 3.4436391818207746; + } else { + result[0] += -682.0310140822548; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 388))) { + result[0] += 92.34103234198413; + } else { + result[0] += 380.69157714277566; + } + } + } else { + result[0] += -165.7227313266843; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 156))) { + if (LIKELY(false || (data[4].qvalue <= 108))) { + if (UNLIKELY(false || (data[9].qvalue <= 36))) { + result[0] += 549.4816677105838; + } else { + if (LIKELY(false || (data[1].qvalue <= 58))) { + result[0] += 194.29034273702734; + } else { + result[0] += -515.5656251305937; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + if (UNLIKELY(false || (data[9].qvalue <= 6))) { + result[0] += 310.8152990014213; + } else { + result[0] += -1141.971330367939; + } + } else { + result[0] += -60.374245828492555; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 176))) { + result[0] += 351.2745651968375; + } else { + result[0] += 86.38384679612086; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 416))) { + if (UNLIKELY(false || (data[2].qvalue <= 162))) { + if (LIKELY(false || (data[2].qvalue <= 142))) { + result[0] += -197.5086799826019; + } else { + if (LIKELY(false || (data[0].qvalue <= 332))) { + result[0] += -304.0321579015652; + } else { + result[0] += -814.8576146638849; + } + } + } else { + result[0] += -77.31156609799358; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 114))) { + if (LIKELY(false || (data[7].qvalue <= 198))) { + result[0] += 364.03422368739893; + } else { + if (LIKELY(false || (data[0].qvalue <= 448))) { + result[0] += -262.12017291355977; + } else { + result[0] += 355.1467205041031; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (LIKELY(false || (data[2].qvalue <= 222))) { + result[0] += 34.07266539417179; + } else { + result[0] += -461.43650166079533; + } + } else { + result[0] += -392.65138527512556; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 188))) { + result[0] += 424.2608283476747; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -401.74970612209404; + } else { + result[0] += 149.51617217256646; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 318))) { + if (UNLIKELY(false || (data[7].qvalue <= 16))) { + if (LIKELY(false || (data[5].qvalue <= 46))) { + result[0] += 6.274580412893512; + } else { + result[0] += 170.65850290785482; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 28))) { + if (UNLIKELY(false || (data[7].qvalue <= 28))) { + result[0] += -321.26713386171923; + } else { + if (LIKELY(false || (data[10].qvalue <= 24))) { + if (UNLIKELY(false || (data[3].qvalue <= 0))) { + result[0] += -883.3009822319002; + } else { + result[0] += -30.131313419296436; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 44))) { + result[0] += 145.86018225187783; + } else { + result[0] += -533.3653617127321; + } + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + if (LIKELY(false || (data[8].qvalue <= 114))) { + if (UNLIKELY(false || (data[3].qvalue <= 32))) { + result[0] += -64.30753875016195; + } else { + result[0] += 70.480411627325; + } + } else { + if (LIKELY(false || (data[8].qvalue <= 132))) { + result[0] += -282.2392212961099; + } else { + result[0] += 67.65616622116742; + } + } + } else { + result[0] += -24.453451185191113; + } + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 116))) { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + if (LIKELY(false || (data[0].qvalue <= 466))) { + if (UNLIKELY(false || (data[8].qvalue <= 26))) { + result[0] += -607.6977777871718; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 442))) { + result[0] += -273.64198324787975; + } else { + result[0] += 45.47924148212965; + } + } + } else { + result[0] += 215.11954437098967; + } + } else { + result[0] += 45.61441585551685; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 440))) { + if (LIKELY(false || (data[6].qvalue <= 124))) { + if (LIKELY(false || (data[0].qvalue <= 414))) { + if (UNLIKELY(false || (data[6].qvalue <= 42))) { + result[0] += -454.72131809174454; + } else { + result[0] += 0.13988354882495432; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 56))) { + result[0] += 38.582001327207514; + } else { + result[0] += 525.027278044562; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 20))) { + if (UNLIKELY(false || (data[6].qvalue <= 132))) { + result[0] += -698.2304850150304; + } else { + result[0] += -55.05691788452429; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 130))) { + result[0] += -678.96875763753; + } else { + result[0] += -223.46103248730483; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 170))) { + if (LIKELY(false || (data[4].qvalue <= 122))) { + if (LIKELY(false || (data[9].qvalue <= 16))) { + result[0] += -90.61321602369662; + } else { + result[0] += 261.0377505016003; + } + } else { + result[0] += 460.8651737454858; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -518.3029692442528; + } else { + if (UNLIKELY(false || (data[4].qvalue <= 102))) { + result[0] += 251.99443410934916; + } else { + result[0] += -168.5258406883114; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 88))) { + if (UNLIKELY(false || (data[0].qvalue <= 2))) { + result[0] += -93.52752297088361; + } else { + if (LIKELY(false || (data[1].qvalue <= 92))) { + if (UNLIKELY(false || (data[1].qvalue <= 38))) { + if (UNLIKELY(false || (data[0].qvalue <= 22))) { + result[0] += -28.36906776557731; + } else { + if (LIKELY(false || (data[4].qvalue <= 94))) { + result[0] += -3.2423893406338657; + } else { + result[0] += 138.541654434318; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 22))) { + result[0] += -46.28911386105568; + } else { + if (LIKELY(false || (data[3].qvalue <= 150))) { + result[0] += -22.298009717414818; + } else { + result[0] += -81.59335058549901; + } + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 88))) { + if (LIKELY(false || (data[4].qvalue <= 132))) { + result[0] += -66.17939589753632; + } else { + result[0] += -205.33954408242673; + } + } else { + result[0] += 8.493295470465377; + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 2))) { + if (LIKELY(false || (data[5].qvalue <= 6))) { + if (LIKELY(false || (data[0].qvalue <= 264))) { + if (UNLIKELY(false || (data[5].qvalue <= 0))) { + if (LIKELY(false || (data[0].qvalue <= 224))) { + result[0] += 326.1908307836102; + } else { + result[0] += 712.5672110896917; + } + } else { + result[0] += 165.97117149989344; + } + } else { + result[0] += 512.7394095968342; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 290))) { + result[0] += -88.61952314673287; + } else { + result[0] += 257.63671136127255; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 8))) { + if (LIKELY(false || (data[0].qvalue <= 464))) { + if (LIKELY(false || (data[3].qvalue <= 166))) { + if (LIKELY(false || (data[4].qvalue <= 82))) { + result[0] += -64.2972163819839; + } else { + result[0] += -678.5598075508324; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -743.1003478462518; + } else { + result[0] += 22.380609435146425; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (LIKELY(false || (data[1].qvalue <= 2))) { + result[0] += 668.8097667814556; + } else { + result[0] += 172.2438155603644; + } + } else { + result[0] += -103.48953914040013; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 14))) { + if (LIKELY(false || (data[9].qvalue <= 150))) { + if (UNLIKELY(false || (data[9].qvalue <= 112))) { + result[0] += 210.79347178390668; + } else { + result[0] += -66.34475012824036; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 8))) { + result[0] += 79.44162948474975; + } else { + result[0] += 423.58682172057206; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 154))) { + if (UNLIKELY(false || (data[1].qvalue <= 34))) { + result[0] += 58.63080216913425; + } else { + result[0] += 0.56660428680394; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 388))) { + result[0] += -354.99641077814067; + } else { + result[0] += 163.82541020213822; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 428))) { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[0].qvalue <= 364))) { + result[0] += -4.43201919585627; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 2))) { + if (LIKELY(false || (data[3].qvalue <= 148))) { + if (LIKELY(false || (data[4].qvalue <= 96))) { + result[0] += -461.34171468334483; + } else { + result[0] += -1974.1613360699155; + } + } else { + result[0] += 464.12272874367665; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 16))) { + if (UNLIKELY(false || (data[2].qvalue <= 116))) { + result[0] += -2425.075299189815; + } else { + result[0] += -330.4655251495208; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 72))) { + result[0] += 166.19696442999253; + } else { + result[0] += 9.303779051447991; + } + } + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 42))) { + result[0] += -481.590488188672; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 84))) { + if (UNLIKELY(false || (data[2].qvalue <= 154))) { + result[0] += -201.8241770516161; + } else { + if (LIKELY(false || (data[0].qvalue <= 400))) { + result[0] += 51.47643177844849; + } else { + result[0] += 557.8587391367337; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 356))) { + result[0] += -68.84035678322192; + } else { + result[0] += -259.4256502962815; + } + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 104))) { + if (LIKELY(false || (data[5].qvalue <= 100))) { + if (UNLIKELY(false || (data[0].qvalue <= 454))) { + if (UNLIKELY(false || (data[4].qvalue <= 78))) { + result[0] += 156.73836309324633; + } else { + if (LIKELY(false || (data[4].qvalue <= 138))) { + result[0] += -607.0488890568329; + } else { + result[0] += 9.03668551193256; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 100))) { + if (UNLIKELY(false || (data[6].qvalue <= 40))) { + result[0] += -1017.2723550166346; + } else { + result[0] += 91.3800484259382; + } + } else { + result[0] += -942.7966219121105; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 26))) { + if (UNLIKELY(false || (data[0].qvalue <= 464))) { + if (UNLIKELY(false || (data[4].qvalue <= 114))) { + result[0] += 346.1853239128133; + } else { + result[0] += -870.2639669615536; + } + } else { + result[0] += 136.37634000604268; + } + } else { + result[0] += 483.8076223319302; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 104))) { + if (UNLIKELY(false || (data[8].qvalue <= 76))) { + result[0] += 632.1932202439316; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 114))) { + result[0] += 859.7961658998064; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 110))) { + result[0] += 6.445434627545658; + } else { + result[0] += 232.20573728889286; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 450))) { + if (UNLIKELY(false || (data[10].qvalue <= 62))) { + result[0] += -766.6301885459153; + } else { + result[0] += -66.31539816252801; + } + } else { + result[0] += 73.09136207866754; + } + } + } + } + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (LIKELY(false || (data[10].qvalue <= 122))) { + if (LIKELY(false || (data[3].qvalue <= 102))) { + if (LIKELY(false || (data[7].qvalue <= 44))) { + if (UNLIKELY(false || (data[6].qvalue <= 24))) { + if (LIKELY(false || (data[7].qvalue <= 8))) { + result[0] += 28.34884333239003; + } else { + result[0] += -46.41782030735656; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 73.71689530827445; + } else { + result[0] += 1.4571574901978375; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 68))) { + if (UNLIKELY(false || (data[3].qvalue <= 2))) { + result[0] += -319.74328138081097; + } else { + result[0] += 28.72734330489961; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 74))) { + result[0] += -392.30286128897285; + } else { + result[0] += -56.63552344488173; + } + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + if (LIKELY(false || (data[8].qvalue <= 148))) { + if (UNLIKELY(false || (data[3].qvalue <= 134))) { + result[0] += -61.214933692783916; + } else { + result[0] += -207.6113943042064; + } + } else { + result[0] += -632.4056164963666; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 36))) { + if (LIKELY(false || (data[8].qvalue <= 28))) { + result[0] += 0.776759507119416; + } else { + result[0] += -563.4328564341248; + } + } else { + if (LIKELY(false || (data[8].qvalue <= 140))) { + result[0] += 80.66532309064849; + } else { + result[0] += -37.64303198703362; + } + } + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 128))) { + if (LIKELY(false || (data[9].qvalue <= 80))) { + if (UNLIKELY(false || (data[1].qvalue <= 142))) { + result[0] += 3.0907605764638824; + } else { + result[0] += -191.9470886917915; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 42))) { + result[0] += -535.3539427021846; + } else { + result[0] += -747.1267209093315; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 84))) { + if (UNLIKELY(false || (data[4].qvalue <= 48))) { + if (UNLIKELY(false || (data[1].qvalue <= 48))) { + result[0] += 39.65250739364507; + } else { + result[0] += -83.33176284276131; + } + } else { + if (LIKELY(false || (data[10].qvalue <= 144))) { + result[0] += 137.86981971555903; + } else { + result[0] += -47.78103683170895; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 6))) { + if (LIKELY(false || (data[8].qvalue <= 108))) { + result[0] += 265.07465212054825; + } else { + result[0] += -112.5511941043707; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 130))) { + result[0] += -185.10561993544567; + } else { + result[0] += -55.42540507337779; + } + } + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 200))) { + if (LIKELY(false || (data[1].qvalue <= 164))) { + result[0] += -71.05204569728328; + } else { + if (LIKELY(false || (data[3].qvalue <= 174))) { + result[0] += -229.96092256575585; + } else { + result[0] += -663.863689584278; + } + } + } else { + result[0] += -404.1617066176453; + } + } + if (LIKELY(false || (data[0].qvalue <= 434))) { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[0].qvalue <= 374))) { + if (LIKELY(false || (data[4].qvalue <= 120))) { + if (LIKELY(false || (data[2].qvalue <= 176))) { + if (LIKELY(false || (data[5].qvalue <= 80))) { + result[0] += -4.033785114047066; + } else { + result[0] += 51.688746674364644; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 42))) { + result[0] += -308.48199870168713; + } else { + result[0] += -36.130910480232906; + } + } + } else { + result[0] += -195.1197382760659; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 2))) { + if (LIKELY(false || (data[4].qvalue <= 110))) { + if (LIKELY(false || (data[4].qvalue <= 96))) { + result[0] += -408.3567730759215; + } else { + result[0] += -1707.6448987068966; + } + } else { + result[0] += 485.8821080747238; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 16))) { + if (UNLIKELY(false || (data[2].qvalue <= 116))) { + result[0] += -2096.4662113083464; + } else { + result[0] += -276.14738618844734; + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 68))) { + result[0] += -8.171064906722554; + } else { + result[0] += 155.11413889664198; + } + } + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 46))) { + result[0] += -426.0821034776104; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 84))) { + if (UNLIKELY(false || (data[2].qvalue <= 154))) { + if (LIKELY(false || (data[0].qvalue <= 420))) { + result[0] += -251.87299899417505; + } else { + result[0] += 171.7813777604329; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 188))) { + result[0] += 266.03680241951855; + } else { + result[0] += -230.94110204440864; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 98))) { + if (UNLIKELY(false || (data[9].qvalue <= 14))) { + result[0] += -51.4712788511078; + } else { + result[0] += -349.1585791389674; + } + } else { + result[0] += -87.62955894052737; + } + } + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 14))) { + result[0] += -962.914071451823; + } else { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (UNLIKELY(false || (data[2].qvalue <= 104))) { + if (UNLIKELY(false || (data[10].qvalue <= 4))) { + result[0] += 410.01446934535943; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 144))) { + result[0] += -158.79533984060308; + } else { + result[0] += 54.19442054392661; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 64))) { + result[0] += 800.9582024560236; + } else { + if (LIKELY(false || (data[2].qvalue <= 214))) { + result[0] += 152.70184903575273; + } else { + result[0] += -39.258349058638146; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + if (LIKELY(false || (data[10].qvalue <= 106))) { + if (UNLIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -385.9941662617043; + } else { + result[0] += 109.98038694597618; + } + } else { + result[0] += -519.7043617216136; + } + } else { + result[0] += 312.6413190598439; + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 450))) { + if (LIKELY(false || (data[6].qvalue <= 146))) { + if (LIKELY(false || (data[7].qvalue <= 198))) { + if (LIKELY(false || (data[0].qvalue <= 366))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + if (LIKELY(false || (data[1].qvalue <= 74))) { + result[0] += 137.297555227154; + } else { + result[0] += -254.9833262356102; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + result[0] += -123.87983644564534; + } else { + result[0] += -2.4145863679975013; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 6))) { + if (LIKELY(false || (data[6].qvalue <= 82))) { + result[0] += 51.07636765965205; + } else { + result[0] += -390.1200629057833; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 28))) { + result[0] += 215.48035596295546; + } else { + result[0] += 39.12939193887718; + } + } + } + } else { + result[0] += -235.5281315434429; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 28))) { + result[0] += -534.086589766028; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 156))) { + if (LIKELY(false || (data[7].qvalue <= 166))) { + result[0] += -253.52810060662947; + } else { + if (UNLIKELY(false || (data[6].qvalue <= 164))) { + result[0] += 451.54660129512826; + } else { + result[0] += -6.922248765373611; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 168))) { + if (LIKELY(false || (data[0].qvalue <= 422))) { + result[0] += -87.41127255128306; + } else { + result[0] += 161.7062666527756; + } + } else { + if (LIKELY(false || (data[8].qvalue <= 96))) { + result[0] += -362.4373441748169; + } else { + result[0] += -47.688779690032106; + } + } + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (UNLIKELY(false || (data[6].qvalue <= 70))) { + if (LIKELY(false || (data[3].qvalue <= 118))) { + result[0] += -109.71948701870082; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -3168.276688179348; + } else { + result[0] += -655.8877042161603; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 108))) { + if (UNLIKELY(false || (data[7].qvalue <= 148))) { + if (LIKELY(false || (data[6].qvalue <= 138))) { + result[0] += 211.55392830640332; + } else { + result[0] += 544.2392075566188; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 460))) { + result[0] += -137.766397826111; + } else { + result[0] += 227.29084096946204; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 114))) { + if (UNLIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -1219.702639935603; + } else { + result[0] += 19.073660451518418; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 168))) { + result[0] += 180.8219311362737; + } else { + result[0] += -23.62961539090255; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (UNLIKELY(false || (data[4].qvalue <= 88))) { + result[0] += -24.096509384288314; + } else { + result[0] += -474.0824089535872; + } + } else { + result[0] += 151.94918465665242; + } + } + } + if (LIKELY(false || (data[8].qvalue <= 156))) { + if (LIKELY(false || (data[0].qvalue <= 450))) { + if (LIKELY(false || (data[6].qvalue <= 146))) { + if (LIKELY(false || (data[0].qvalue <= 344))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + if (LIKELY(false || (data[0].qvalue <= 222))) { + result[0] += 39.972789080579226; + } else { + result[0] += 214.53042647495874; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + result[0] += -103.36098764508137; + } else { + result[0] += -3.5862855800631728; + } + } + } else { + if (LIKELY(false || (data[3].qvalue <= 144))) { + if (LIKELY(false || (data[5].qvalue <= 92))) { + result[0] += 33.48501259384832; + } else { + result[0] += -526.1972342777782; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 188))) { + result[0] += 291.7875686572814; + } else { + result[0] += -154.8400445536322; + } + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 50))) { + if (UNLIKELY(false || (data[6].qvalue <= 152))) { + if (LIKELY(false || (data[0].qvalue <= 436))) { + result[0] += -209.00331042067378; + } else { + result[0] += 476.30810024115374; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 284))) { + result[0] += -174.50986784466022; + } else { + result[0] += -612.8591006144763; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 154))) { + if (LIKELY(false || (data[6].qvalue <= 160))) { + result[0] += -17.361861237499827; + } else { + result[0] += -227.84455664039766; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 170))) { + result[0] += 141.6717472259187; + } else { + result[0] += -152.85677116643248; + } + } + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 16))) { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -2290.8527835669884; + } else { + result[0] += -90.00394883304172; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 0))) { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -670.6849931746262; + } else { + result[0] += -81.08626512407443; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 124))) { + if (UNLIKELY(false || (data[10].qvalue <= 44))) { + result[0] += 252.92553318096964; + } else { + result[0] += -237.02792802064724; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 26))) { + result[0] += -131.25931683115354; + } else { + result[0] += 155.92595301503033; + } + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 416))) { + if (LIKELY(false || (data[0].qvalue <= 350))) { + result[0] += -135.57120230355562; + } else { + result[0] += -444.05965584753494; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 130))) { + if (UNLIKELY(false || (data[0].qvalue <= 454))) { + result[0] += -617.9283577463137; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 168))) { + result[0] += 379.3453933956734; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 468))) { + result[0] += -555.1703256848654; + } else { + result[0] += 183.81435330137998; + } + } + } + } else { + result[0] += 287.9785027492729; + } + } + } + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (LIKELY(false || (data[0].qvalue <= 462))) { + if (LIKELY(false || (data[1].qvalue <= 160))) { + if (LIKELY(false || (data[6].qvalue <= 174))) { + if (LIKELY(false || (data[7].qvalue <= 166))) { + if (LIKELY(false || (data[7].qvalue <= 164))) { + result[0] += 1.9227744347010873; + } else { + result[0] += -372.9806790342511; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 162))) { + result[0] += 38.027655688643975; + } else { + result[0] += 330.84712805066437; + } + } + } else { + result[0] += -254.51641015239204; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 430))) { + result[0] += -29.47274599196618; + } else { + result[0] += -712.7969392811483; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 156))) { + if (LIKELY(false || (data[4].qvalue <= 108))) { + if (UNLIKELY(false || (data[9].qvalue <= 36))) { + result[0] += 557.7390203473873; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 58))) { + result[0] += 232.35239699183904; + } else { + result[0] += -374.7341949643208; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 470))) { + if (UNLIKELY(false || (data[9].qvalue <= 6))) { + result[0] += 438.92836082611086; + } else { + result[0] += -1439.2171157594041; + } + } else { + result[0] += -146.03654903994473; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 176))) { + if (LIKELY(false || (data[4].qvalue <= 130))) { + if (LIKELY(false || (data[5].qvalue <= 108))) { + result[0] += 321.2116732677101; + } else { + result[0] += 754.9708259292751; + } + } else { + result[0] += -22.672436608018547; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 70))) { + result[0] += 795.3453671875; + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -213.941419408782; + } else { + result[0] += 282.16809176472924; + } + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (UNLIKELY(false || (data[5].qvalue <= 32))) { + result[0] += -1600.845136858259; + } else { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (LIKELY(false || (data[0].qvalue <= 440))) { + if (UNLIKELY(false || (data[5].qvalue <= 58))) { + result[0] += -521.5318192486991; + } else { + result[0] += -96.30084920460428; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 218))) { + result[0] += 166.08593687552778; + } else { + result[0] += -341.6197044372843; + } + } + } else { + result[0] += -270.16752261143085; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 188))) { + result[0] += 379.4922879662453; + } else { + if (LIKELY(false || (data[1].qvalue <= 164))) { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + if (LIKELY(false || (data[3].qvalue <= 178))) { + result[0] += 59.33023477478028; + } else { + result[0] += -944.8980152625645; + } + } else { + result[0] += 362.7230938296502; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -1894.5816613281252; + } else { + result[0] += -151.54009237179278; + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 466))) { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (LIKELY(false || (data[2].qvalue <= 218))) { + if (LIKELY(false || (data[0].qvalue <= 428))) { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[10].qvalue <= 146))) { + result[0] += 2.61150493746397; + } else { + result[0] += -294.8989575123201; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 154))) { + result[0] += -144.860709390818; + } else { + result[0] += 49.05217218783569; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 104))) { + if (LIKELY(false || (data[2].qvalue <= 102))) { + result[0] += -69.84846406106546; + } else { + result[0] += -1099.694205050492; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 64))) { + result[0] += 547.2763598289783; + } else { + result[0] += 121.02960542039312; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 430))) { + if (UNLIKELY(false || (data[1].qvalue <= 112))) { + result[0] += -299.9131036348163; + } else { + result[0] += 87.20580911412755; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 162))) { + if (LIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -434.7498970777533; + } else { + result[0] += 375.0325198630567; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 174))) { + result[0] += -761.131324712887; + } else { + result[0] += -387.49766429457327; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 392))) { + result[0] += -94.7425416912909; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 154))) { + result[0] += -190.41458346534543; + } else { + result[0] += -405.4827892391865; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 200))) { + if (UNLIKELY(false || (data[7].qvalue <= 78))) { + if (UNLIKELY(false || (data[2].qvalue <= 46))) { + result[0] += 373.04881527855287; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -1545.4975544084823; + } else { + result[0] += -255.72717774402136; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 98))) { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[1].qvalue <= 116))) { + result[0] += 450.93153398469025; + } else { + result[0] += 1014.2956421685988; + } + } else { + result[0] += 1245.0624796875002; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 148))) { + if (UNLIKELY(false || (data[8].qvalue <= 34))) { + result[0] += -547.0456131590985; + } else { + result[0] += -9.966885090949628; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 104))) { + result[0] += 305.58289721685486; + } else { + result[0] += 51.01148885239249; + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + if (LIKELY(false || (data[1].qvalue <= 162))) { + result[0] += -921.1548865874474; + } else { + result[0] += -1870.7389783653846; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 162))) { + result[0] += 236.7134294836506; + } else { + result[0] += -285.6914099083137; + } + } + } + } + if (LIKELY(false || (data[7].qvalue <= 174))) { + if (UNLIKELY(false || (data[5].qvalue <= 14))) { + if (UNLIKELY(false || (data[4].qvalue <= 2))) { + if (LIKELY(false || (data[2].qvalue <= 58))) { + if (LIKELY(false || (data[2].qvalue <= 52))) { + result[0] += 138.46445954751613; + } else { + result[0] += 30.25739363045126; + } + } else { + result[0] += 308.1558521464535; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 40))) { + if (LIKELY(false || (data[10].qvalue <= 52))) { + if (LIKELY(false || (data[8].qvalue <= 12))) { + result[0] += -8.275799037633549; + } else { + result[0] += -133.54923320626136; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 6))) { + result[0] += 175.8079980773678; + } else { + result[0] += 60.614919146381446; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 42))) { + result[0] += -434.16667085044185; + } else { + if (LIKELY(false || (data[4].qvalue <= 38))) { + result[0] += -41.16427694169654; + } else { + result[0] += -185.9750821120982; + } + } + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 22))) { + if (LIKELY(false || (data[3].qvalue <= 18))) { + if (LIKELY(false || (data[5].qvalue <= 20))) { + if (UNLIKELY(false || (data[4].qvalue <= 12))) { + result[0] += -122.25698184102133; + } else { + result[0] += 80.57705277421712; + } + } else { + result[0] += -153.60843419206896; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 22))) { + result[0] += 408.69803721206245; + } else { + if (UNLIKELY(false || (data[8].qvalue <= 90))) { + result[0] += 136.97672358425646; + } else { + result[0] += 270.9923573487174; + } + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 28))) { + if (LIKELY(false || (data[2].qvalue <= 122))) { + if (UNLIKELY(false || (data[3].qvalue <= 24))) { + result[0] += -159.08908713728795; + } else { + result[0] += -9.923411608506871; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 148))) { + result[0] += -521.1873971145586; + } else { + result[0] += 17.25752566881542; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 2))) { + if (UNLIKELY(false || (data[3].qvalue <= 64))) { + result[0] += 301.7622074237134; + } else { + result[0] += 122.86657003996795; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 4))) { + result[0] += -254.85463978758668; + } else { + result[0] += 5.494987706460857; + } + } + } + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 32))) { + result[0] += -834.7163352102; + } else { + if (LIKELY(false || (data[7].qvalue <= 200))) { + if (UNLIKELY(false || (data[8].qvalue <= 6))) { + result[0] += 172.7220039048895; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += 161.15720525096583; + } else { + if (UNLIKELY(false || (data[8].qvalue <= 136))) { + result[0] += -107.08335478914324; + } else { + result[0] += -12.489971356584766; + } + } + } + } else { + if (LIKELY(false || (data[3].qvalue <= 180))) { + result[0] += -405.35269670345906; + } else { + result[0] += -207.23569477608535; + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (LIKELY(false || (data[0].qvalue <= 392))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (LIKELY(false || (data[0].qvalue <= 292))) { + if (UNLIKELY(false || (data[3].qvalue <= 34))) { + result[0] += -26.43222222269896; + } else { + result[0] += 7.909229573729924; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 30))) { + result[0] += 8.617729518857145; + } else { + result[0] += 108.90607449231234; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 74))) { + if (UNLIKELY(false || (data[10].qvalue <= 70))) { + result[0] += -1.9284671174533046; + } else { + result[0] += -105.2363711810878; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 106))) { + result[0] += 35.07140832310961; + } else { + result[0] += -52.187791710622804; + } + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 222))) { + if (UNLIKELY(false || (data[2].qvalue <= 86))) { + if (LIKELY(false || (data[2].qvalue <= 64))) { + result[0] += 53.903161702785724; + } else { + result[0] += -171.4159662862328; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 114))) { + result[0] += 123.56275048732999; + } else { + result[0] += 26.26029225107184; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -422.00141694181985; + } else { + result[0] += -107.9116343442738; + } + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 88))) { + if (LIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -288.46097298039064; + } else { + if (LIKELY(false || (data[7].qvalue <= 196))) { + result[0] += 375.28961351398226; + } else { + result[0] += -281.2952544835409; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 446))) { + result[0] += -17.901971338973112; + } else { + if (LIKELY(false || (data[6].qvalue <= 186))) { + result[0] += -312.93686064175296; + } else { + result[0] += -716.0857770302855; + } + } + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 88))) { + result[0] += 451.3160600734765; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 54))) { + if (UNLIKELY(false || (data[8].qvalue <= 16))) { + result[0] += 120.12627461414955; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -1770.8570323768029; + } else { + result[0] += -297.6523306745511; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 184))) { + if (LIKELY(false || (data[7].qvalue <= 196))) { + if (LIKELY(false || (data[0].qvalue <= 472))) { + result[0] += 199.35980767050484; + } else { + result[0] += 503.56640093198286; + } + } else { + result[0] += -518.0686588935853; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + if (LIKELY(false || (data[7].qvalue <= 188))) { + result[0] += -150.74680039702164; + } else { + result[0] += -1055.9052195002068; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 176))) { + result[0] += -46.963809276019184; + } else { + result[0] += 394.9533442330038; + } + } + } + } + } + } + if (LIKELY(false || (data[10].qvalue <= 120))) { + if (LIKELY(false || (data[5].qvalue <= 84))) { + if (LIKELY(false || (data[6].qvalue <= 72))) { + if (LIKELY(false || (data[1].qvalue <= 96))) { + if (LIKELY(false || (data[1].qvalue <= 92))) { + if (LIKELY(false || (data[8].qvalue <= 116))) { + result[0] += 14.869440540791798; + } else { + result[0] += -36.73557311757522; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 64))) { + result[0] += -538.0900500719082; + } else { + result[0] += -27.241161506715144; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 174))) { + result[0] += 132.0541996118715; + } else { + result[0] += -113.04252572422735; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 24))) { + if (LIKELY(false || (data[8].qvalue <= 68))) { + if (LIKELY(false || (data[1].qvalue <= 160))) { + result[0] += 244.86366814112222; + } else { + result[0] += -95.24641504542937; + } + } else { + result[0] += -76.8715657600925; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 114))) { + if (LIKELY(false || (data[5].qvalue <= 76))) { + result[0] += -120.29792651308429; + } else { + result[0] += 5.15502948111146; + } + } else { + result[0] += -227.26424899945584; + } + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 48))) { + if (LIKELY(false || (data[10].qvalue <= 72))) { + if (LIKELY(false || (data[6].qvalue <= 126))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += -511.47464005858114; + } else { + result[0] += 99.31151638687581; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 2))) { + result[0] += 139.52958863442427; + } else { + result[0] += -222.6339569962306; + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 112))) { + result[0] += -706.5550247771737; + } else { + result[0] += 32.62538379320284; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 152))) { + if (LIKELY(false || (data[8].qvalue <= 150))) { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + result[0] += -225.27583884895458; + } else { + result[0] += 97.88878371179209; + } + } else { + result[0] += -91.48452493663109; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 156))) { + if (UNLIKELY(false || (data[1].qvalue <= 148))) { + result[0] += -972.0166516770522; + } else { + result[0] += -3.8444739361171365; + } + } else { + result[0] += -30.275824314188043; + } + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 136))) { + if (LIKELY(false || (data[1].qvalue <= 128))) { + result[0] += -591.4492984187848; + } else { + result[0] += 52.69617940013822; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 86))) { + if (LIKELY(false || (data[2].qvalue <= 180))) { + result[0] += 84.41514196973255; + } else { + result[0] += -58.25664931323567; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 6))) { + result[0] += 79.8372134768803; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 190))) { + result[0] += -166.72699212355798; + } else { + result[0] += -39.28083649832746; + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 436))) { + if (LIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[0].qvalue <= 384))) { + if (LIKELY(false || (data[1].qvalue <= 126))) { + if (LIKELY(false || (data[4].qvalue <= 102))) { + if (LIKELY(false || (data[1].qvalue <= 102))) { + result[0] += 0.7528430355879543; + } else { + result[0] += -81.79712338566776; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 108))) { + result[0] += 169.93142796462274; + } else { + result[0] += -185.79991240603783; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 126))) { + if (LIKELY(false || (data[2].qvalue <= 120))) { + result[0] += -28.331180770557967; + } else { + result[0] += 672.7509955188011; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 274))) { + result[0] += -114.78306332631581; + } else { + result[0] += -408.53260808925637; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 198))) { + if (LIKELY(false || (data[6].qvalue <= 128))) { + if (UNLIKELY(false || (data[9].qvalue <= 16))) { + result[0] += -1018.0208814686271; + } else { + result[0] += 59.74134293981621; + } + } else { + result[0] += 293.35493295005193; + } + } else { + result[0] += -400.56582852441784; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 98))) { + if (UNLIKELY(false || (data[9].qvalue <= 14))) { + if (LIKELY(false || (data[0].qvalue <= 420))) { + result[0] += -74.3543481633016; + } else { + if (LIKELY(false || (data[1].qvalue <= 150))) { + result[0] += 571.9786783643856; + } else { + result[0] += -219.12600540624658; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 342))) { + result[0] += -101.20421903443618; + } else { + result[0] += -425.1879279279357; + } + } + } else { + result[0] += -38.8853994688072; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 0))) { + result[0] += -129.9772116102948; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 100))) { + if (UNLIKELY(false || (data[7].qvalue <= 56))) { + result[0] += 352.2167561334443; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 452))) { + if (LIKELY(false || (data[4].qvalue <= 104))) { + result[0] += -179.85826237208437; + } else { + result[0] += -1026.3738449938005; + } + } else { + result[0] += 86.91047265123943; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 140))) { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + if (UNLIKELY(false || (data[0].qvalue <= 448))) { + result[0] += -194.66018635911382; + } else { + result[0] += 165.22462004866262; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 112))) { + result[0] += 280.5581946286645; + } else { + result[0] += 803.432975071445; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 464))) { + if (LIKELY(false || (data[2].qvalue <= 218))) { + result[0] += 2.697588396559468; + } else { + result[0] += -478.252898298942; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 154))) { + result[0] += -208.4543974390897; + } else { + result[0] += 185.3089271840308; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 426))) { + if (LIKELY(false || (data[1].qvalue <= 124))) { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (LIKELY(false || (data[5].qvalue <= 86))) { + if (LIKELY(false || (data[6].qvalue <= 76))) { + if (LIKELY(false || (data[4].qvalue <= 80))) { + result[0] += -1.486803701497208; + } else { + result[0] += 82.51456493961574; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 26))) { + result[0] += -1724.8354696321032; + } else { + result[0] += -82.71048025173705; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 22))) { + if (UNLIKELY(false || (data[4].qvalue <= 110))) { + result[0] += -790.0620438667581; + } else { + result[0] += 106.26605184431772; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 378))) { + result[0] += 33.9153156537546; + } else { + result[0] += 173.09121370877705; + } + } + } + } else { + result[0] += -154.93054492116286; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 54))) { + if (LIKELY(false || (data[2].qvalue <= 50))) { + if (LIKELY(false || (data[2].qvalue <= 42))) { + if (LIKELY(false || (data[0].qvalue <= 392))) { + result[0] += -1.9879083035835015; + } else { + result[0] += 398.5109346540218; + } + } else { + result[0] += -786.5885010553891; + } + } else { + result[0] += 430.4466811037737; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 342))) { + result[0] += -47.917740653423564; + } else { + if (LIKELY(false || (data[0].qvalue <= 412))) { + if (LIKELY(false || (data[2].qvalue <= 164))) { + result[0] += -361.3887876613474; + } else { + result[0] += -59.68743934462101; + } + } else { + result[0] += -24.34286763077365; + } + } + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 10))) { + result[0] += 711.5911056007745; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 88))) { + if (LIKELY(false || (data[0].qvalue <= 454))) { + if (LIKELY(false || (data[2].qvalue <= 64))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += -939.5055510805191; + } else { + result[0] += 49.38415531008706; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 128))) { + result[0] += -607.7336297438973; + } else { + result[0] += 231.05599252528052; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 40))) { + result[0] += -676.7676630675595; + } else { + result[0] += 107.56624878796568; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 104))) { + if (UNLIKELY(false || (data[3].qvalue <= 110))) { + if (UNLIKELY(false || (data[7].qvalue <= 52))) { + result[0] += 470.39671526527695; + } else { + result[0] += 19.64177427856826; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 456))) { + result[0] += 257.8561111510292; + } else { + result[0] += -156.72749393540863; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 112))) { + result[0] += 541.0799146065921; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 444))) { + result[0] += -223.96127054701944; + } else { + result[0] += 35.39412132031614; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 458))) { + if (LIKELY(false || (data[2].qvalue <= 200))) { + if (LIKELY(false || (data[3].qvalue <= 172))) { + if (LIKELY(false || (data[2].qvalue <= 198))) { + if (LIKELY(false || (data[0].qvalue <= 392))) { + if (LIKELY(false || (data[2].qvalue <= 180))) { + result[0] += -1.1948610469474203; + } else { + result[0] += -80.88158309915174; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + result[0] += -96.14071101510345; + } else { + result[0] += 63.358216911141255; + } + } + } else { + result[0] += 176.15313222545365; + } + } else { + result[0] += -326.1906626097021; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 116))) { + if (LIKELY(false || (data[0].qvalue <= 430))) { + if (UNLIKELY(false || (data[8].qvalue <= 146))) { + if (LIKELY(false || (data[0].qvalue <= 400))) { + result[0] += 40.65129948483353; + } else { + result[0] += 623.8755186699341; + } + } else { + result[0] += -139.52193586780237; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (LIKELY(false || (data[0].qvalue <= 446))) { + result[0] += 215.51009220017923; + } else { + result[0] += 626.7644565408536; + } + } else { + result[0] += -57.77880667756001; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 78))) { + result[0] += -345.1537827658937; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 206))) { + if (UNLIKELY(false || (data[0].qvalue <= 368))) { + result[0] += 120.97750370682058; + } else { + result[0] += -530.0146207114068; + } + } else { + result[0] += -90.90617312134356; + } + } + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 128))) { + if (LIKELY(false || (data[2].qvalue <= 222))) { + if (UNLIKELY(false || (data[8].qvalue <= 44))) { + if (UNLIKELY(false || (data[2].qvalue <= 30))) { + if (LIKELY(false || (data[4].qvalue <= 114))) { + result[0] += 405.6146661410597; + } else { + result[0] += 22.37476339760467; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 78))) { + result[0] += -499.28634142340854; + } else { + result[0] += 68.9305749234034; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 90))) { + if (LIKELY(false || (data[9].qvalue <= 86))) { + result[0] += 529.6389184175102; + } else { + result[0] += 95.91732717933235; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 158))) { + result[0] += 194.2271286977213; + } else { + result[0] += -22.243321533686604; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -322.8706246634995; + } else { + result[0] += 171.93787845774688; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -280.2205757227194; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 16))) { + result[0] += 805.6007065468135; + } else { + if (LIKELY(false || (data[9].qvalue <= 16))) { + if (LIKELY(false || (data[7].qvalue <= 192))) { + result[0] += 103.74300879356734; + } else { + result[0] += -219.28913112606025; + } + } else { + result[0] += -516.2170829126527; + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 54))) { + if (UNLIKELY(false || (data[0].qvalue <= 0))) { + result[0] += -98.93297408058446; + } else { + if (LIKELY(false || (data[4].qvalue <= 60))) { + if (LIKELY(false || (data[3].qvalue <= 126))) { + if (UNLIKELY(false || (data[0].qvalue <= 18))) { + result[0] += -35.246449455925166; + } else { + result[0] += -11.913885864248398; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 12))) { + result[0] += 244.38629528569243; + } else { + result[0] += -59.63764056448275; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 136))) { + result[0] += -67.53814213480176; + } else { + result[0] += 13.863392069333836; + } + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[1].qvalue <= 10))) { + if (LIKELY(false || (data[0].qvalue <= 232))) { + if (UNLIKELY(false || (data[2].qvalue <= 24))) { + result[0] += 243.2273344714578; + } else { + if (LIKELY(false || (data[2].qvalue <= 52))) { + result[0] += 56.08540095837141; + } else { + result[0] += -73.3251905895068; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 128))) { + if (UNLIKELY(false || (data[2].qvalue <= 24))) { + result[0] += 643.1260108343162; + } else { + result[0] += 308.232898319556; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 318))) { + result[0] += -85.94227185477129; + } else { + result[0] += 339.32448533355216; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 196))) { + result[0] += 295.57345918020815; + } else { + result[0] += 541.3180556951017; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 156))) { + if (UNLIKELY(false || (data[4].qvalue <= 6))) { + if (LIKELY(false || (data[4].qvalue <= 4))) { + if (UNLIKELY(false || (data[6].qvalue <= 2))) { + result[0] += 176.28854142574565; + } else { + result[0] += -16.26659984192861; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 224))) { + result[0] += 272.5245026026528; + } else { + result[0] += 630.8310847742418; + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 10))) { + if (LIKELY(false || (data[10].qvalue <= 76))) { + result[0] += -99.36810379968507; + } else { + result[0] += -462.5017447936385; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 146))) { + result[0] += 3.9186464796296914; + } else { + result[0] += 225.22750105396563; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 364))) { + if (UNLIKELY(false || (data[0].qvalue <= 178))) { + if (LIKELY(false || (data[1].qvalue <= 22))) { + result[0] += -168.88397604209945; + } else { + result[0] += 38.12118179055833; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 14))) { + result[0] += -208.349456121144; + } else { + result[0] += -434.7176030497106; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 428))) { + if (LIKELY(false || (data[1].qvalue <= 22))) { + result[0] += 147.36975160660015; + } else { + result[0] += -427.1472748332084; + } + } else { + result[0] += 468.6802177857324; + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[5].qvalue <= 118))) { + if (LIKELY(false || (data[0].qvalue <= 438))) { + if (LIKELY(false || (data[7].qvalue <= 194))) { + if (LIKELY(false || (data[1].qvalue <= 130))) { + if (LIKELY(false || (data[4].qvalue <= 106))) { + result[0] += 0.3368638092539733; + } else { + result[0] += 173.775034577441; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 60))) { + result[0] += 48.85885497450373; + } else { + result[0] += -96.87483192360062; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 346))) { + result[0] += -78.03276293917031; + } else { + if (LIKELY(false || (data[0].qvalue <= 428))) { + result[0] += -418.6164374426745; + } else { + result[0] += -98.44042608027418; + } + } + } + } else { + if (LIKELY(false || (data[5].qvalue <= 112))) { + if (UNLIKELY(false || (data[9].qvalue <= 8))) { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -275.91209245174423; + } else { + result[0] += 99.25590915465185; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 82))) { + result[0] += -41.70434165862219; + } else { + result[0] += 130.83735776026924; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 4))) { + if (UNLIKELY(false || (data[0].qvalue <= 444))) { + result[0] += 208.90021667361708; + } else { + result[0] += 614.78246500265; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 452))) { + result[0] += -186.66325247313335; + } else { + result[0] += 252.69463305407146; + } + } + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 158))) { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -243.87323720092883; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 160))) { + result[0] += 607.3841937390166; + } else { + if (LIKELY(false || (data[2].qvalue <= 224))) { + result[0] += -4.642945887934479; + } else { + result[0] += -425.1676612575311; + } + } + } + } else { + result[0] += -226.78722863564013; + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 88))) { + if (LIKELY(false || (data[2].qvalue <= 226))) { + result[0] += 457.093631103365; + } else { + result[0] += -195.01605350860177; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 54))) { + if (UNLIKELY(false || (data[8].qvalue <= 16))) { + result[0] += 121.71032578695974; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -1535.090860877404; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 48))) { + result[0] += -642.94729679988; + } else { + result[0] += -89.58887867028852; + } + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 88))) { + if (UNLIKELY(false || (data[5].qvalue <= 72))) { + result[0] += -178.884099836437; + } else { + result[0] += 349.2407402511536; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 144))) { + result[0] += -1194.8087333496094; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -154.55357593204099; + } else { + result[0] += 257.1888927003237; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 112))) { + if (UNLIKELY(false || (data[0].qvalue <= 0))) { + result[0] += -89.14531598408416; + } else { + result[0] += -18.084829562955424; + } + } else { + if (LIKELY(false || (data[8].qvalue <= 138))) { + if (LIKELY(false || (data[3].qvalue <= 116))) { + if (LIKELY(false || (data[8].qvalue <= 118))) { + if (UNLIKELY(false || (data[4].qvalue <= 38))) { + if (LIKELY(false || (data[1].qvalue <= 44))) { + result[0] += 16.943599637772344; + } else { + result[0] += 106.06836417622998; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 42))) { + result[0] += -528.7742969051144; + } else { + result[0] += -9.291722918418776; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 136))) { + if (LIKELY(false || (data[2].qvalue <= 132))) { + result[0] += -118.09966496394628; + } else { + result[0] += -560.1406224711944; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 140))) { + result[0] += -51.02960535934701; + } else { + result[0] += 338.4129630811006; + } + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 104))) { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + if (LIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -248.21126531471361; + } else { + result[0] += 386.6061213337075; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 376))) { + result[0] += 94.34550988961043; + } else { + result[0] += 304.2723529855085; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 436))) { + if (LIKELY(false || (data[8].qvalue <= 106))) { + result[0] += -8.896943765616806; + } else { + result[0] += -245.20061258455573; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 60))) { + result[0] += -113.87022533457596; + } else { + result[0] += 116.77525773183898; + } + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 152))) { + if (LIKELY(false || (data[0].qvalue <= 428))) { + if (UNLIKELY(false || (data[0].qvalue <= 224))) { + result[0] += -624.1711191522509; + } else { + if (LIKELY(false || (data[0].qvalue <= 414))) { + result[0] += -1347.2302644189722; + } else { + result[0] += -733.7942435464515; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 60))) { + result[0] += 471.0712008104827; + } else { + result[0] += -142.56709666372134; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + if (UNLIKELY(false || (data[9].qvalue <= 104))) { + if (UNLIKELY(false || (data[2].qvalue <= 192))) { + result[0] += -1567.6200324035817; + } else { + result[0] += -98.50162535806925; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 398))) { + result[0] += -50.25901125894962; + } else { + result[0] += 60.40281064144929; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 102))) { + if (LIKELY(false || (data[3].qvalue <= 178))) { + result[0] += 379.87726438502557; + } else { + result[0] += -199.4001040982219; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -287.5563657821928; + } else { + result[0] += 332.3956673905034; + } + } + } + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 232))) { + if (UNLIKELY(false || (data[4].qvalue <= 36))) { + if (LIKELY(false || (data[1].qvalue <= 40))) { + if (LIKELY(false || (data[4].qvalue <= 30))) { + result[0] += -1.565030040146726; + } else { + result[0] += -104.9566604701055; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 138))) { + if (UNLIKELY(false || (data[0].qvalue <= 76))) { + result[0] += -17.037935890389736; + } else { + result[0] += 63.289328847278654; + } + } else { + result[0] += -1324.6271531918176; + } + } + } else { + result[0] += -20.81789198275515; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 34))) { + if (LIKELY(false || (data[8].qvalue <= 100))) { + if (LIKELY(false || (data[8].qvalue <= 98))) { + if (LIKELY(false || (data[2].qvalue <= 134))) { + if (UNLIKELY(false || (data[0].qvalue <= 290))) { + result[0] += 2.015327035742837; + } else { + result[0] += 76.49554789592639; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 402))) { + result[0] += -823.5010456874027; + } else { + result[0] += 182.88330508869717; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 376))) { + result[0] += -684.3245220762328; + } else { + result[0] += 114.75508963752516; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 92))) { + result[0] += 584.9961795342024; + } else { + if (UNLIKELY(false || (data[9].qvalue <= 118))) { + result[0] += 284.6160485461698; + } else { + if (LIKELY(false || (data[0].qvalue <= 342))) { + result[0] += -0.3173057744465385; + } else { + result[0] += 255.28193335274855; + } + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 44))) { + if (UNLIKELY(false || (data[5].qvalue <= 24))) { + if (LIKELY(false || (data[3].qvalue <= 18))) { + if (LIKELY(false || (data[0].qvalue <= 368))) { + result[0] += -109.2669803718638; + } else { + result[0] += 49.7918489437198; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 26))) { + result[0] += -35.33314939837475; + } else { + result[0] += 383.1659807220039; + } + } + } else { + if (UNLIKELY(false || (data[11].qvalue <= 0))) { + if (LIKELY(false || (data[8].qvalue <= 80))) { + result[0] += -119.6036066769413; + } else { + result[0] += -530.947990035426; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + result[0] += -80.63511119863223; + } else { + result[0] += 415.2734672989832; + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 8))) { + if (UNLIKELY(false || (data[3].qvalue <= 68))) { + result[0] += 704.1141519325657; + } else { + if (UNLIKELY(false || (data[8].qvalue <= 2))) { + result[0] += 84.60147882812862; + } else { + result[0] += -187.8254210699395; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 70))) { + if (LIKELY(false || (data[5].qvalue <= 90))) { + result[0] += 121.89790445304102; + } else { + result[0] += -115.0763981218671; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 72))) { + result[0] += -654.9303551006227; + } else { + result[0] += 7.981521248472231; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 420))) { + if (LIKELY(false || (data[6].qvalue <= 144))) { + if (LIKELY(false || (data[10].qvalue <= 146))) { + if (LIKELY(false || (data[2].qvalue <= 210))) { + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (LIKELY(false || (data[0].qvalue <= 358))) { + result[0] += -2.9256693373180407; + } else { + result[0] += 52.86899999050905; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 162))) { + result[0] += -236.3933458780788; + } else { + result[0] += -14.339195301019586; + } + } + } else { + result[0] += -219.14539537485498; + } + } else { + result[0] += -318.42234277715806; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 154))) { + if (UNLIKELY(false || (data[0].qvalue <= 274))) { + result[0] += -22.289702159300546; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 42))) { + result[0] += 180.36521926437015; + } else { + result[0] += -260.09045231531485; + } + } + } else { + result[0] += 44.37903776463972; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 86))) { + if (UNLIKELY(false || (data[0].qvalue <= 440))) { + if (UNLIKELY(false || (data[7].qvalue <= 46))) { + if (LIKELY(false || (data[1].qvalue <= 90))) { + result[0] += -68.81869559889574; + } else { + result[0] += 716.3498421781522; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 106))) { + if (LIKELY(false || (data[4].qvalue <= 78))) { + result[0] += -171.83362174216776; + } else { + result[0] += -984.3537857945884; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 100))) { + result[0] += -338.7075403591485; + } else { + result[0] += 123.96422662235261; + } + } + } + } else { + if (LIKELY(false || (data[3].qvalue <= 164))) { + if (LIKELY(false || (data[9].qvalue <= 126))) { + if (UNLIKELY(false || (data[0].qvalue <= 454))) { + result[0] += -204.18509525988378; + } else { + result[0] += 64.71659005621359; + } + } else { + result[0] += 817.3359237308946; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 26))) { + result[0] += -262.7801965080877; + } else { + result[0] += 612.6525648856328; + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 140))) { + if (LIKELY(false || (data[8].qvalue <= 128))) { + if (LIKELY(false || (data[6].qvalue <= 150))) { + if (LIKELY(false || (data[1].qvalue <= 148))) { + result[0] += 369.7289746082688; + } else { + result[0] += -103.32913617071652; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 22))) { + result[0] += -25.94812018229829; + } else { + result[0] += 299.29536905910857; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + result[0] += 18.803068756821325; + } else { + result[0] += 861.4705320103184; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 118))) { + result[0] += 148.92751319365524; + } else { + if (UNLIKELY(false || (data[6].qvalue <= 126))) { + if (UNLIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -1775.7163414874296; + } else { + result[0] += 74.55151140491274; + } + } else { + result[0] += -10.06076847351882; + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 450))) { + if (LIKELY(false || (data[1].qvalue <= 148))) { + if (UNLIKELY(false || (data[10].qvalue <= 30))) { + if (LIKELY(false || (data[6].qvalue <= 152))) { + if (UNLIKELY(false || (data[9].qvalue <= 92))) { + if (LIKELY(false || (data[8].qvalue <= 12))) { + result[0] += 21.302765815704674; + } else { + result[0] += -269.2691797738821; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 14))) { + result[0] += -63.12874563850693; + } else { + result[0] += 62.93227886630939; + } + } + } else { + result[0] += -400.8612725252076; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 28))) { + if (LIKELY(false || (data[0].qvalue <= 424))) { + if (LIKELY(false || (data[0].qvalue <= 386))) { + result[0] += 58.49853608139253; + } else { + result[0] += 379.42843601568325; + } + } else { + result[0] += -1179.5767049009407; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 30))) { + if (LIKELY(false || (data[1].qvalue <= 100))) { + result[0] += 19.778425640043217; + } else { + result[0] += -856.1976997050459; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 424))) { + result[0] += -5.97846202630444; + } else { + result[0] += 86.90154167537219; + } + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 102))) { + result[0] += 410.7033838491514; + } else { + if (UNLIKELY(false || (data[4].qvalue <= 112))) { + result[0] += -278.12154040332797; + } else { + result[0] += -70.26805980080006; + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 180))) { + if (LIKELY(false || (data[4].qvalue <= 136))) { + if (UNLIKELY(false || (data[8].qvalue <= 46))) { + if (LIKELY(false || (data[9].qvalue <= 52))) { + if (UNLIKELY(false || (data[4].qvalue <= 108))) { + result[0] += 202.68141819077738; + } else { + result[0] += -167.2132011812022; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -1384.5104673530457; + } else { + result[0] += -189.4566731860187; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 140))) { + if (LIKELY(false || (data[7].qvalue <= 132))) { + result[0] += 187.09927614728926; + } else { + result[0] += 567.0117546161409; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -93.51709182438407; + } else { + result[0] += 121.54129938504501; + } + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 140))) { + result[0] += 465.57152458676364; + } else { + result[0] += -479.2019554780659; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + if (LIKELY(false || (data[10].qvalue <= 106))) { + if (LIKELY(false || (data[7].qvalue <= 200))) { + if (UNLIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -220.95806736832415; + } else { + result[0] += 116.17247093054512; + } + } else { + result[0] += -729.6717337371826; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 164))) { + result[0] += -312.7238552339285; + } else { + result[0] += -1140.467401624576; + } + } + } else { + result[0] += 195.02478855827175; + } + } + } + if (LIKELY(false || (data[8].qvalue <= 154))) { + if (LIKELY(false || (data[0].qvalue <= 392))) { + if (LIKELY(false || (data[6].qvalue <= 62))) { + if (LIKELY(false || (data[0].qvalue <= 310))) { + if (LIKELY(false || (data[8].qvalue <= 120))) { + if (LIKELY(false || (data[8].qvalue <= 100))) { + result[0] += -9.71572720333726; + } else { + result[0] += 67.59923309810226; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 52))) { + result[0] += -351.55140420047667; + } else { + result[0] += -16.616159911071314; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 72))) { + if (LIKELY(false || (data[4].qvalue <= 66))) { + result[0] += 69.88081147655924; + } else { + result[0] += -969.7781215245079; + } + } else { + result[0] += 228.67764875201647; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 4))) { + if (UNLIKELY(false || (data[10].qvalue <= 2))) { + result[0] += 74.35381121983716; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 138))) { + result[0] += -62.154687912270276; + } else { + result[0] += -796.2456169363136; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 56))) { + if (LIKELY(false || (data[4].qvalue <= 86))) { + result[0] += -156.1506230350774; + } else { + result[0] += 119.82853149552272; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 106))) { + result[0] += 13.900388157788383; + } else { + result[0] += -42.87238835775265; + } + } + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 78))) { + if (UNLIKELY(false || (data[6].qvalue <= 58))) { + if (LIKELY(false || (data[6].qvalue <= 56))) { + if (LIKELY(false || (data[2].qvalue <= 156))) { + result[0] += -25.79300413884613; + } else { + result[0] += 449.6201546856599; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 424))) { + result[0] += -1246.7840846746014; + } else { + result[0] += -60.57121668348967; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 18))) { + if (UNLIKELY(false || (data[0].qvalue <= 416))) { + result[0] += -507.2811346565345; + } else { + result[0] += 1.6959367259683802; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 130))) { + result[0] += 256.5119388340715; + } else { + result[0] += 61.901222326179735; + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 4))) { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -712.841039622884; + } else { + result[0] += -50.285859444198785; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 168))) { + if (UNLIKELY(false || (data[7].qvalue <= 80))) { + result[0] += -89.82284215276091; + } else { + result[0] += 57.2312430185315; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 462))) { + result[0] += -196.41216488031796; + } else { + result[0] += 64.87819986422424; + } + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 422))) { + if (LIKELY(false || (data[0].qvalue <= 352))) { + result[0] += -54.40201249787878; + } else { + result[0] += -323.09540305923275; + } + } else { + result[0] += 22.58190522531515; + } + } + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[2].qvalue <= 212))) { + if (LIKELY(false || (data[0].qvalue <= 396))) { + if (UNLIKELY(false || (data[9].qvalue <= 62))) { + if (UNLIKELY(false || (data[1].qvalue <= 34))) { + result[0] += 320.8771929320019; + } else { + result[0] += -48.74134204044459; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 64))) { + result[0] += 144.15507685887556; + } else { + result[0] += 2.061937768452418; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 168))) { + if (LIKELY(false || (data[8].qvalue <= 142))) { + result[0] += 14.310048813060831; + } else { + result[0] += -891.4821913653136; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 186))) { + result[0] += 230.68364280672665; + } else { + result[0] += 31.828982825162395; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 460))) { + if (UNLIKELY(false || (data[8].qvalue <= 148))) { + if (LIKELY(false || (data[0].qvalue <= 446))) { + result[0] += 5.3923912609412765; + } else { + result[0] += -296.2815883663324; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 18))) { + result[0] += 0.33503809236265447; + } else { + result[0] += -222.23518809470139; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 162))) { + result[0] += 435.30908950076656; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 220))) { + result[0] += 151.71039930723558; + } else { + result[0] += -220.68651987712957; + } + } + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 158))) { + if (LIKELY(false || (data[5].qvalue <= 106))) { + if (LIKELY(false || (data[8].qvalue <= 34))) { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -761.8192641261176; + } else { + result[0] += -24.696202075383436; + } + } else { + result[0] += 297.8870552382013; + } + } else { + result[0] += -1111.3451507308962; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 180))) { + result[0] += 399.4329665421262; + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -33.631317828261764; + } else { + if (LIKELY(false || (data[6].qvalue <= 186))) { + result[0] += 521.4533408866705; + } else { + result[0] += -47.855811708641056; + } + } + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 438))) { + if (UNLIKELY(false || (data[0].qvalue <= 352))) { + result[0] += -42.87267583810598; + } else { + result[0] += -282.3784003570174; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 132))) { + result[0] += 156.2716050426377; + } else { + if (UNLIKELY(false || (data[8].qvalue <= 144))) { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + if (UNLIKELY(false || (data[2].qvalue <= 154))) { + result[0] += -106.3379635111628; + } else { + result[0] += -799.4368728340669; + } + } else { + result[0] += -30.901543195994442; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -278.9996333283842; + } else { + result[0] += 97.21542436889949; + } + } + } + } + } + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[0].qvalue <= 198))) { + if (LIKELY(false || (data[1].qvalue <= 10))) { + if (UNLIKELY(false || (data[2].qvalue <= 24))) { + if (UNLIKELY(false || (data[0].qvalue <= 62))) { + result[0] += -71.54944060418909; + } else { + result[0] += 200.91246808639335; + } + } else { + result[0] += -16.524995964984008; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 56))) { + result[0] += -6.615663904946698; + } else { + result[0] += 277.0038505094405; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 128))) { + if (LIKELY(false || (data[1].qvalue <= 10))) { + result[0] += 242.12226853180888; + } else { + result[0] += 495.40205234661437; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 308))) { + result[0] += -144.40748272556144; + } else { + result[0] += 244.60940232429508; + } + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 156))) { + if (UNLIKELY(false || (data[4].qvalue <= 6))) { + if (LIKELY(false || (data[4].qvalue <= 4))) { + if (LIKELY(false || (data[9].qvalue <= 144))) { + if (UNLIKELY(false || (data[10].qvalue <= 12))) { + result[0] += 151.95679595717183; + } else { + result[0] += -75.40462882328015; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 248))) { + result[0] += 59.16238539562008; + } else { + result[0] += 258.58095117618336; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 194))) { + if (UNLIKELY(false || (data[0].qvalue <= 78))) { + result[0] += 34.722618898156114; + } else { + result[0] += 229.7578373765491; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 254))) { + result[0] += 416.11542151307907; + } else { + result[0] += 657.1399509168027; + } + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 10))) { + if (LIKELY(false || (data[0].qvalue <= 396))) { + if (LIKELY(false || (data[10].qvalue <= 76))) { + result[0] += -99.76580517101934; + } else { + result[0] += -406.79586814929917; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 410))) { + result[0] += 67.21864653476916; + } else { + result[0] += 318.3788762612092; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 152))) { + if (UNLIKELY(false || (data[1].qvalue <= 8))) { + result[0] += -89.91859493216367; + } else { + result[0] += 2.265520650788892; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 182))) { + result[0] += 86.60159851878495; + } else { + result[0] += 446.4563385244049; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 426))) { + if (UNLIKELY(false || (data[0].qvalue <= 166))) { + result[0] += -85.71042457905949; + } else { + if (LIKELY(false || (data[0].qvalue <= 356))) { + if (LIKELY(false || (data[1].qvalue <= 14))) { + result[0] += -197.6560417052055; + } else { + result[0] += -382.76909364031883; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 22))) { + result[0] += 86.90468637807797; + } else { + result[0] += -409.69153384359265; + } + } + } + } else { + result[0] += 370.0893122985466; + } + } + } + if (LIKELY(false || (data[0].qvalue <= 450))) { + if (LIKELY(false || (data[1].qvalue <= 130))) { + if (LIKELY(false || (data[4].qvalue <= 106))) { + if (UNLIKELY(false || (data[9].qvalue <= 16))) { + result[0] += -778.7936840058063; + } else { + if (UNLIKELY(false || (data[9].qvalue <= 20))) { + result[0] += 194.97101648633375; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 30))) { + result[0] += -35.3509348883947; + } else { + result[0] += 5.30025526653546; + } + } + } + } else { + result[0] += 128.95468067576573; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 106))) { + if (UNLIKELY(false || (data[8].qvalue <= 68))) { + if (LIKELY(false || (data[2].qvalue <= 120))) { + result[0] += -34.18950997644831; + } else { + result[0] += 621.8798392741791; + } + } else { + result[0] += -155.4137360966938; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 108))) { + if (LIKELY(false || (data[0].qvalue <= 430))) { + if (LIKELY(false || (data[7].qvalue <= 108))) { + result[0] += 256.5188225022511; + } else { + result[0] += -99.67316593885579; + } + } else { + result[0] += 620.485300659375; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 100))) { + result[0] += -2286.0077275800704; + } else { + if (LIKELY(false || (data[5].qvalue <= 116))) { + result[0] += -53.309956527903466; + } else { + result[0] += 190.13761434101764; + } + } + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 136))) { + if (UNLIKELY(false || (data[2].qvalue <= 78))) { + if (LIKELY(false || (data[1].qvalue <= 142))) { + if (UNLIKELY(false || (data[7].qvalue <= 76))) { + if (LIKELY(false || (data[2].qvalue <= 68))) { + result[0] += 253.43645195632422; + } else { + result[0] += -1413.0786233422066; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 462))) { + result[0] += -690.7561180054086; + } else { + result[0] += -63.707054709028625; + } + } + } else { + result[0] += 291.7091149857328; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 120))) { + if (UNLIKELY(false || (data[8].qvalue <= 128))) { + result[0] += 478.56900909093986; + } else { + result[0] += 167.99342824787573; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 464))) { + result[0] += 60.9554692627185; + } else { + result[0] += -1073.8503825143669; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 464))) { + if (LIKELY(false || (data[4].qvalue <= 136))) { + if (UNLIKELY(false || (data[8].qvalue <= 52))) { + result[0] += -539.7433428985337; + } else { + if (LIKELY(false || (data[1].qvalue <= 160))) { + result[0] += 20.89762446651438; + } else { + result[0] += -416.20782851777005; + } + } + } else { + result[0] += 361.5994213025975; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 172))) { + if (LIKELY(false || (data[1].qvalue <= 160))) { + if (UNLIKELY(false || (data[10].qvalue <= 34))) { + result[0] += 537.4931809409429; + } else { + result[0] += 93.3123402983442; + } + } else { + result[0] += 509.56219742394296; + } + } else { + result[0] += -30.502598904934388; + } + } + } + } + if (UNLIKELY(false || (data[4].qvalue <= 18))) { + if (UNLIKELY(false || (data[9].qvalue <= 108))) { + if (UNLIKELY(false || (data[0].qvalue <= 170))) { + result[0] += 32.02621609860586; + } else { + result[0] += 195.61137056108709; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 116))) { + result[0] += -56.120014772691675; + } else { + if (UNLIKELY(false || (data[9].qvalue <= 120))) { + if (UNLIKELY(false || (data[8].qvalue <= 38))) { + result[0] += 258.16901404153344; + } else { + result[0] += 53.61413424410968; + } + } else { + result[0] += 3.8083516584014774; + } + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 30))) { + if (LIKELY(false || (data[8].qvalue <= 12))) { + if (UNLIKELY(false || (data[10].qvalue <= 2))) { + result[0] += 95.68803265337215; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 8))) { + if (LIKELY(false || (data[3].qvalue <= 132))) { + result[0] += -103.63143270824092; + } else { + result[0] += -416.841719094304; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 452))) { + result[0] += 29.00180398543003; + } else { + result[0] += 739.6504496232433; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 442))) { + if (UNLIKELY(false || (data[9].qvalue <= 44))) { + result[0] += -1170.886850360489; + } else { + if (UNLIKELY(false || (data[9].qvalue <= 78))) { + result[0] += -23.599978133418663; + } else { + result[0] += -233.86794128469188; + } + } + } else { + if (LIKELY(false || (data[3].qvalue <= 100))) { + result[0] += 296.8371355378329; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -578.3205363336284; + } else { + result[0] += 28.31820900489086; + } + } + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 110))) { + if (UNLIKELY(false || (data[2].qvalue <= 34))) { + if (LIKELY(false || (data[0].qvalue <= 270))) { + result[0] += 15.084899225242879; + } else { + if (UNLIKELY(false || (data[6].qvalue <= 44))) { + result[0] += 227.37067720434766; + } else { + result[0] += 79.75576158468904; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 66))) { + if (LIKELY(false || (data[2].qvalue <= 64))) { + result[0] += -21.445175019440377; + } else { + result[0] += -736.583490831738; + } + } else { + if (LIKELY(false || (data[10].qvalue <= 122))) { + result[0] += 19.304014117388515; + } else { + result[0] += -31.807962860341632; + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 52))) { + if (LIKELY(false || (data[6].qvalue <= 20))) { + if (LIKELY(false || (data[2].qvalue <= 76))) { + result[0] += -2.770553189772293; + } else { + result[0] += -459.67841100976295; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 402))) { + result[0] += -654.9440592987792; + } else { + result[0] += -113.53853189933453; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 52))) { + result[0] += 141.69815316018148; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 74))) { + result[0] += -502.039703472783; + } else { + result[0] += -32.3855029558517; + } + } + } + } + } + } + if (LIKELY(false || (data[7].qvalue <= 176))) { + if (LIKELY(false || (data[3].qvalue <= 122))) { + if (LIKELY(false || (data[6].qvalue <= 68))) { + if (LIKELY(false || (data[7].qvalue <= 96))) { + if (LIKELY(false || (data[7].qvalue <= 92))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += 92.56955997074317; + } else { + result[0] += -0.9922155287745735; + } + } else { + result[0] += -500.10035950605436; + } + } else { + if (LIKELY(false || (data[10].qvalue <= 82))) { + if (UNLIKELY(false || (data[3].qvalue <= 18))) { + result[0] += -349.0415762351967; + } else { + result[0] += 171.85769845927226; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 156))) { + result[0] += -451.9129687933409; + } else { + result[0] += -27.180822178728683; + } + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 24))) { + if (UNLIKELY(false || (data[5].qvalue <= 52))) { + if (LIKELY(false || (data[6].qvalue <= 170))) { + result[0] += 257.80955723579154; + } else { + result[0] += 14.067277977079232; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 78))) { + result[0] += -187.1084046068555; + } else { + result[0] += -16.47644404213514; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 58))) { + if (UNLIKELY(false || (data[1].qvalue <= 68))) { + result[0] += -459.1713788092935; + } else { + result[0] += -121.28232839389386; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 82))) { + result[0] += 45.911193575042404; + } else { + result[0] += -65.09204602729294; + } + } + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + if (LIKELY(false || (data[8].qvalue <= 148))) { + result[0] += -112.3564328986315; + } else { + result[0] += -581.5931838175455; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 104))) { + if (LIKELY(false || (data[8].qvalue <= 130))) { + if (LIKELY(false || (data[7].qvalue <= 140))) { + result[0] += 99.49014850118782; + } else { + result[0] += 283.8674441857837; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 166))) { + result[0] += -196.33975801441395; + } else { + result[0] += 5.911638064587168; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 128))) { + if (UNLIKELY(false || (data[1].qvalue <= 114))) { + result[0] += 220.39824119047162; + } else { + result[0] += 86.35899353404471; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 136))) { + result[0] += -135.11781706233776; + } else { + result[0] += 1.7254921740277542; + } + } + } + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 32))) { + result[0] += -729.2271141921474; + } else { + if (LIKELY(false || (data[1].qvalue <= 164))) { + if (UNLIKELY(false || (data[8].qvalue <= 6))) { + result[0] += 163.01861591388177; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 182))) { + result[0] += -90.15761066666771; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 184))) { + result[0] += 105.67463769590789; + } else { + result[0] += -40.65848461097406; + } + } + } + } else { + result[0] += -149.36907697589524; + } + } + } + if (LIKELY(false || (data[0].qvalue <= 436))) { + if (UNLIKELY(false || (data[9].qvalue <= 54))) { + if (UNLIKELY(false || (data[2].qvalue <= 14))) { + if (UNLIKELY(false || (data[10].qvalue <= 2))) { + result[0] += 73.61434867220245; + } else { + if (LIKELY(false || (data[4].qvalue <= 92))) { + result[0] += -219.52771218319805; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 182))) { + result[0] += -261.44554618186174; + } else { + result[0] += -1230.408535736753; + } + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 24))) { + if (LIKELY(false || (data[0].qvalue <= 386))) { + if (LIKELY(false || (data[2].qvalue <= 112))) { + result[0] += -5.198946846457009; + } else { + result[0] += 249.1763981732631; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 34))) { + result[0] += 641.3825622819111; + } else { + result[0] += 205.60287406527715; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 32))) { + if (UNLIKELY(false || (data[4].qvalue <= 126))) { + result[0] += -887.6161494419807; + } else { + result[0] += -171.10035394400404; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 408))) { + result[0] += -58.06284362537737; + } else { + result[0] += 25.218332263091998; + } + } + } + } + } else { + if (LIKELY(false || (data[8].qvalue <= 154))) { + if (LIKELY(false || (data[0].qvalue <= 346))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + if (LIKELY(false || (data[0].qvalue <= 232))) { + result[0] += 25.584773238822454; + } else { + result[0] += 219.8658888633042; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 20))) { + result[0] += -51.10525196126827; + } else { + result[0] += 0.46333939293857984; + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 72))) { + if (LIKELY(false || (data[8].qvalue <= 130))) { + result[0] += 164.15191019337982; + } else { + result[0] += -335.2814106754929; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 84))) { + result[0] += -89.39119339130295; + } else { + result[0] += 34.512913855389506; + } + } + } + } else { + result[0] += -108.08090718754343; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 158))) { + if (UNLIKELY(false || (data[8].qvalue <= 46))) { + if (LIKELY(false || (data[2].qvalue <= 118))) { + if (UNLIKELY(false || (data[7].qvalue <= 76))) { + result[0] += 131.26162098672742; + } else { + if (LIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -442.8722988609171; + } else { + result[0] += 73.24052956825756; + } + } + } else { + result[0] += 686.9491978638054; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 54))) { + result[0] += 687.8496107636925; + } else { + if (LIKELY(false || (data[0].qvalue <= 456))) { + if (UNLIKELY(false || (data[9].qvalue <= 12))) { + result[0] += -77.24307302441612; + } else { + result[0] += 147.57566065994; + } + } else { + if (LIKELY(false || (data[5].qvalue <= 110))) { + result[0] += 138.145849125284; + } else { + result[0] += 445.5193884395197; + } + } + } + } + } else { + result[0] += -13.479785177056158; + } + } + if (LIKELY(false || (data[0].qvalue <= 418))) { + if (LIKELY(false || (data[1].qvalue <= 124))) { + if (LIKELY(false || (data[4].qvalue <= 68))) { + if (LIKELY(false || (data[1].qvalue <= 80))) { + if (UNLIKELY(false || (data[9].qvalue <= 50))) { + result[0] += -577.1611446622182; + } else { + if (LIKELY(false || (data[10].qvalue <= 142))) { + result[0] += 3.8314250912323637; + } else { + result[0] += -329.28055580520333; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 26))) { + if (UNLIKELY(false || (data[0].qvalue <= 198))) { + result[0] += -194.40768818753736; + } else { + result[0] += -854.4809931534361; + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 70))) { + result[0] += -160.52174054303293; + } else { + result[0] += 2.741586163284305; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 372))) { + if (UNLIKELY(false || (data[1].qvalue <= 46))) { + if (UNLIKELY(false || (data[0].qvalue <= 168))) { + result[0] += 18.984540004822804; + } else { + result[0] += 327.11800611524313; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 20))) { + result[0] += -129.20406003130722; + } else { + result[0] += 17.92278831555766; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 150))) { + if (UNLIKELY(false || (data[2].qvalue <= 6))) { + result[0] += -221.08946446962568; + } else { + result[0] += 229.63359317462343; + } + } else { + result[0] += -223.7223770166941; + } + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 64))) { + if (LIKELY(false || (data[0].qvalue <= 394))) { + result[0] += 5.729375564939926; + } else { + if (LIKELY(false || (data[4].qvalue <= 136))) { + result[0] += 349.6954434120244; + } else { + result[0] += -447.2340327004826; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 346))) { + result[0] += -18.934394820420263; + } else { + if (LIKELY(false || (data[2].qvalue <= 164))) { + if (LIKELY(false || (data[1].qvalue <= 140))) { + result[0] += -512.2810219610269; + } else { + result[0] += -123.2775600600666; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 176))) { + result[0] += 335.8072972282146; + } else { + result[0] += -135.40134282126365; + } + } + } + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 10))) { + result[0] += 408.30240571312294; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 56))) { + result[0] += 203.85368488394235; + } else { + if (UNLIKELY(false || (data[7].qvalue <= 100))) { + if (LIKELY(false || (data[0].qvalue <= 448))) { + if (LIKELY(false || (data[10].qvalue <= 96))) { + result[0] += -457.3360616844447; + } else { + result[0] += 124.47860255821743; + } + } else { + if (LIKELY(false || (data[4].qvalue <= 108))) { + result[0] += 110.48892123347983; + } else { + result[0] += -391.09864189814425; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 114))) { + result[0] += 211.6228610309497; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 100))) { + result[0] += 56.71696470804632; + } else { + result[0] += -35.66565365297374; + } + } + } + } + } + } + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[1].qvalue <= 10))) { + if (LIKELY(false || (data[2].qvalue <= 52))) { + if (UNLIKELY(false || (data[2].qvalue <= 24))) { + result[0] += 147.91960717089773; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + if (LIKELY(false || (data[2].qvalue <= 46))) { + result[0] += 43.27359384398152; + } else { + result[0] += 111.57729094083193; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 8))) { + result[0] += 113.34997104546899; + } else { + result[0] += 72.55342688666062; + } + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 128))) { + if (LIKELY(false || (data[1].qvalue <= 8))) { + if (LIKELY(false || (data[1].qvalue <= 6))) { + result[0] += 31.246573605653715; + } else { + result[0] += 46.606138483456206; + } + } else { + result[0] += 25.9373059644741; + } + } else { + result[0] += -31.626364376909578; + } + } + } else { + result[0] += 240.78696773473405; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 156))) { + if (UNLIKELY(false || (data[4].qvalue <= 6))) { + if (LIKELY(false || (data[4].qvalue <= 4))) { + if (LIKELY(false || (data[1].qvalue <= 20))) { + if (UNLIKELY(false || (data[9].qvalue <= 136))) { + result[0] += -48.976438574076354; + } else { + result[0] += 84.65299344607978; + } + } else { + result[0] += -130.88113764489268; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 72))) { + if (UNLIKELY(false || (data[1].qvalue <= 22))) { + result[0] += 338.1827440503927; + } else { + result[0] += 298.08608821916727; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 22))) { + result[0] += 204.95094375285473; + } else { + result[0] += 165.99111228283587; + } + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 10))) { + if (LIKELY(false || (data[10].qvalue <= 76))) { + if (LIKELY(false || (data[2].qvalue <= 92))) { + result[0] += -108.03954054155902; + } else { + result[0] += 14.418807341740987; + } + } else { + result[0] += -321.311952308832; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 146))) { + if (UNLIKELY(false || (data[1].qvalue <= 8))) { + result[0] += -76.25024484457143; + } else { + result[0] += 1.263274912881202; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 6))) { + result[0] += -66.04814325674907; + } else { + result[0] += 205.46869851639258; + } + } + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 36))) { + if (UNLIKELY(false || (data[1].qvalue <= 14))) { + result[0] += -104.7552531611455; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 18))) { + if (LIKELY(false || (data[1].qvalue <= 16))) { + result[0] += -179.2056109143998; + } else { + result[0] += -196.72295793090106; + } + } else { + result[0] += -228.7848599462564; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + result[0] += -62.70206554713903; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 22))) { + result[0] += -109.57286387525; + } else { + result[0] += -86.90439645392793; + } + } + } + } + } + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[1].qvalue <= 10))) { + if (LIKELY(false || (data[2].qvalue <= 52))) { + if (UNLIKELY(false || (data[2].qvalue <= 24))) { + result[0] += 133.14242400479026; + } else { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + if (LIKELY(false || (data[2].qvalue <= 46))) { + result[0] += 38.96136485201495; + } else { + result[0] += 100.46528939864675; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 8))) { + result[0] += 102.02497851147024; + } else { + result[0] += 65.31936047920384; + } + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 128))) { + if (LIKELY(false || (data[1].qvalue <= 8))) { + if (LIKELY(false || (data[1].qvalue <= 6))) { + result[0] += 28.128267190495155; + } else { + result[0] += 42.006052734820884; + } + } else { + result[0] += 23.349456995785104; + } + } else { + result[0] += -28.466887237596946; + } + } + } else { + result[0] += 216.73249472971176; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 154))) { + if (LIKELY(false || (data[9].qvalue <= 148))) { + if (UNLIKELY(false || (data[10].qvalue <= 2))) { + if (LIKELY(false || (data[2].qvalue <= 156))) { + if (LIKELY(false || (data[9].qvalue <= 114))) { + result[0] += 101.975488812464; + } else { + result[0] += -25.287212545092885; + } + } else { + result[0] += -99.57496398377107; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 30))) { + if (UNLIKELY(false || (data[4].qvalue <= 16))) { + result[0] += 58.55564779565954; + } else { + result[0] += -70.77906117415272; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 18))) { + result[0] += 56.64272064066464; + } else { + result[0] += -1.102421664746258; + } + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 204))) { + if (UNLIKELY(false || (data[4].qvalue <= 4))) { + if (UNLIKELY(false || (data[1].qvalue <= 12))) { + result[0] += 76.95158297864288; + } else { + result[0] += 30.779853089335912; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 12))) { + result[0] += 184.47632299122395; + } else { + result[0] += 240.0561058810638; + } + } + } else { + result[0] += -59.44992973601068; + } + } + } else { + if (LIKELY(false || (data[3].qvalue <= 20))) { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + if (UNLIKELY(false || (data[2].qvalue <= 36))) { + result[0] += 90.32707990001026; + } else { + result[0] += -56.438123774542795; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 92))) { + if (LIKELY(false || (data[2].qvalue <= 88))) { + result[0] += -97.11633504657749; + } else { + result[0] += -327.052109099788; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 24))) { + result[0] += -88.46877715199902; + } else { + result[0] += 36.38893057075731; + } + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 14))) { + result[0] += 164.4902900819997; + } else { + result[0] += 201.51887436344938; + } + } + } + } + if (LIKELY(false || (data[7].qvalue <= 190))) { + if (LIKELY(false || (data[3].qvalue <= 102))) { + if (LIKELY(false || (data[6].qvalue <= 72))) { + if (LIKELY(false || (data[8].qvalue <= 112))) { + if (LIKELY(false || (data[8].qvalue <= 100))) { + if (LIKELY(false || (data[10].qvalue <= 124))) { + result[0] += 2.677490165277401; + } else { + result[0] += -230.8237998800555; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 106))) { + result[0] += 25.99249929961273; + } else { + result[0] += 143.56253715641475; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 158))) { + if (LIKELY(false || (data[4].qvalue <= 32))) { + result[0] += -56.96864498437293; + } else { + result[0] += -361.08262388656516; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 130))) { + result[0] += -1.2571125993192631; + } else { + result[0] += 139.96081546293442; + } + } + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 24))) { + if (LIKELY(false || (data[4].qvalue <= 126))) { + if (LIKELY(false || (data[8].qvalue <= 68))) { + result[0] += 157.68101550278936; + } else { + result[0] += -24.160628623142866; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 100))) { + result[0] += -87.49349951075376; + } else { + result[0] += -651.2774869908153; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 104))) { + if (UNLIKELY(false || (data[8].qvalue <= 82))) { + result[0] += -72.49802639529806; + } else { + result[0] += -213.66048931924266; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 54))) { + result[0] += 78.72485385773093; + } else { + result[0] += -55.249394110207554; + } + } + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 134))) { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + if (LIKELY(false || (data[8].qvalue <= 148))) { + result[0] += -94.97855070131733; + } else { + result[0] += -527.4611841577268; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 32))) { + if (LIKELY(false || (data[9].qvalue <= 28))) { + result[0] += 0.9950631307094829; + } else { + result[0] += -188.09164012193523; + } + } else { + if (LIKELY(false || (data[8].qvalue <= 140))) { + result[0] += 64.8246177611995; + } else { + result[0] += -21.226393353713657; + } + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 140))) { + if (UNLIKELY(false || (data[10].qvalue <= 138))) { + result[0] += -69.54835276237708; + } else { + if (LIKELY(false || (data[1].qvalue <= 158))) { + result[0] += -236.0745244061219; + } else { + result[0] += -12.63559195083826; + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 52))) { + result[0] += -236.792523303151; + } else { + if (LIKELY(false || (data[10].qvalue <= 144))) { + result[0] += 52.206924149774636; + } else { + result[0] += -39.51021768049611; + } + } + } + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 140))) { + if (LIKELY(false || (data[2].qvalue <= 226))) { + result[0] += -52.42843817107941; + } else { + result[0] += -306.65785355623103; + } + } else { + result[0] += -385.0662454001109; + } + } + if (LIKELY(false || (data[0].qvalue <= 398))) { + if (LIKELY(false || (data[1].qvalue <= 110))) { + if (LIKELY(false || (data[4].qvalue <= 70))) { + if (LIKELY(false || (data[1].qvalue <= 86))) { + if (UNLIKELY(false || (data[5].qvalue <= 42))) { + if (LIKELY(false || (data[5].qvalue <= 40))) { + result[0] += -11.28284578878582; + } else { + result[0] += -558.1753204017739; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 26))) { + result[0] += 133.21891876863342; + } else { + result[0] += 2.7832862656512685; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 26))) { + if (LIKELY(false || (data[0].qvalue <= 246))) { + result[0] += -324.1019695078817; + } else { + result[0] += -1005.9270602557989; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 88))) { + result[0] += -134.86731075700266; + } else { + result[0] += 10.66748293923189; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 336))) { + if (LIKELY(false || (data[6].qvalue <= 98))) { + result[0] += 2.8200100312590166; + } else { + result[0] += 101.03470891273177; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 14))) { + if (LIKELY(false || (data[6].qvalue <= 82))) { + result[0] += 102.04557161945678; + } else { + result[0] += -1199.8811078319284; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 178))) { + result[0] += 209.67226520445524; + } else { + result[0] += -245.61747308533063; + } + } + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 60))) { + result[0] += 42.785881512673626; + } else { + if (LIKELY(false || (data[6].qvalue <= 154))) { + if (LIKELY(false || (data[0].qvalue <= 270))) { + result[0] += -45.95588289959001; + } else { + if (UNLIKELY(false || (data[6].qvalue <= 90))) { + result[0] += -6.815612347789953; + } else { + result[0] += -266.0110871919803; + } + } + } else { + result[0] += 39.02030164716074; + } + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 8))) { + result[0] += 324.9267206663035; + } else { + if (LIKELY(false || (data[2].qvalue <= 214))) { + if (UNLIKELY(false || (data[1].qvalue <= 16))) { + result[0] += -112.18559323452085; + } else { + if (LIKELY(false || (data[1].qvalue <= 132))) { + if (UNLIKELY(false || (data[2].qvalue <= 80))) { + result[0] += -56.2055045870944; + } else { + result[0] += 116.07989618400472; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 118))) { + result[0] += 141.36324262903074; + } else { + result[0] += -22.395839800711574; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 458))) { + if (UNLIKELY(false || (data[10].qvalue <= 118))) { + result[0] += 100.73381070844493; + } else { + if (LIKELY(false || (data[6].qvalue <= 174))) { + result[0] += -365.7173433437338; + } else { + result[0] += 21.744193287907777; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 162))) { + result[0] += 280.70190659426964; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 466))) { + result[0] += -456.7905672944539; + } else { + result[0] += 86.74089207808544; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 438))) { + if (LIKELY(false || (data[6].qvalue <= 146))) { + if (LIKELY(false || (data[7].qvalue <= 198))) { + if (LIKELY(false || (data[0].qvalue <= 388))) { + if (LIKELY(false || (data[1].qvalue <= 126))) { + if (LIKELY(false || (data[4].qvalue <= 102))) { + result[0] += -3.0799897635647393; + } else { + result[0] += 83.18986261549072; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 36))) { + result[0] += 225.15234009730352; + } else { + result[0] += -85.63258649231685; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 82))) { + if (LIKELY(false || (data[3].qvalue <= 80))) { + result[0] += 35.515553203945544; + } else { + result[0] += -529.1932694931496; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 124))) { + result[0] += 183.78815671074506; + } else { + result[0] += 39.81733903217535; + } + } + } + } else { + result[0] += -186.6083734780767; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 50))) { + if (UNLIKELY(false || (data[0].qvalue <= 272))) { + result[0] += 16.882292766847286; + } else { + result[0] += -340.80931681790474; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 154))) { + if (LIKELY(false || (data[3].qvalue <= 170))) { + if (LIKELY(false || (data[10].qvalue <= 102))) { + result[0] += -8.494744851894465; + } else { + result[0] += -138.4317062673716; + } + } else { + result[0] += -365.5767086328917; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 172))) { + if (UNLIKELY(false || (data[0].qvalue <= 304))) { + result[0] += -4.6291599966349635; + } else { + result[0] += 233.27933991789052; + } + } else { + result[0] += -47.89610265065409; + } + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 182))) { + if (LIKELY(false || (data[10].qvalue <= 148))) { + if (UNLIKELY(false || (data[8].qvalue <= 46))) { + if (UNLIKELY(false || (data[7].qvalue <= 56))) { + result[0] += 408.84774771757384; + } else { + if (LIKELY(false || (data[4].qvalue <= 136))) { + result[0] += -81.22211370546637; + } else { + result[0] += 149.79791446348088; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 60))) { + result[0] += 507.31584249730014; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 108))) { + result[0] += 3.6960386503909706; + } else { + result[0] += 132.51686944699037; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + if (LIKELY(false || (data[7].qvalue <= 192))) { + if (UNLIKELY(false || (data[0].qvalue <= 460))) { + result[0] += -289.98766205200593; + } else { + result[0] += -37.620573467202114; + } + } else { + result[0] += -1064.0260380735235; + } + } else { + result[0] += 301.66872530386996; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + if (LIKELY(false || (data[1].qvalue <= 160))) { + if (LIKELY(false || (data[3].qvalue <= 178))) { + result[0] += -66.47823654447284; + } else { + result[0] += -479.9069173542369; + } + } else { + result[0] += -776.3112829887955; + } + } else { + result[0] += 122.73633086275382; + } + } + } + if (LIKELY(false || (data[0].qvalue <= 458))) { + if (LIKELY(false || (data[6].qvalue <= 168))) { + if (LIKELY(false || (data[6].qvalue <= 166))) { + if (LIKELY(false || (data[6].qvalue <= 164))) { + if (LIKELY(false || (data[0].qvalue <= 424))) { + if (LIKELY(false || (data[6].qvalue <= 144))) { + result[0] += -1.16853189584804; + } else { + result[0] += -76.18237147015647; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += -565.1080728858356; + } else { + result[0] += 55.856898322571; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 428))) { + result[0] += -22.36169324873774; + } else { + result[0] += -588.8674501865916; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 444))) { + result[0] += 45.149287996873234; + } else { + result[0] += 472.9437979483076; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 110))) { + if (UNLIKELY(false || (data[0].qvalue <= 426))) { + result[0] += -14.394012939959836; + } else { + result[0] += -486.3581999710685; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 58))) { + result[0] += -223.70340084114315; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 88))) { + if (LIKELY(false || (data[0].qvalue <= 400))) { + result[0] += -62.55465904994526; + } else { + result[0] += 711.6569177288845; + } + } else { + result[0] += -22.12709356395005; + } + } + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 176))) { + if (UNLIKELY(false || (data[6].qvalue <= 136))) { + if (LIKELY(false || (data[4].qvalue <= 108))) { + if (UNLIKELY(false || (data[9].qvalue <= 36))) { + result[0] += 353.08425684031425; + } else { + if (UNLIKELY(false || (data[8].qvalue <= 70))) { + result[0] += -488.1757777871809; + } else { + result[0] += 63.64288315962378; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[1].qvalue <= 164))) { + result[0] += -1340.3345402421837; + } else { + result[0] += 611.7688119589316; + } + } else { + result[0] += -5.555477568010339; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 124))) { + if (LIKELY(false || (data[2].qvalue <= 216))) { + if (UNLIKELY(false || (data[10].qvalue <= 48))) { + result[0] += 101.80136762073124; + } else { + result[0] += 394.0454932921462; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + result[0] += -54.51295272481636; + } else { + result[0] += 369.30662351586926; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 464))) { + if (LIKELY(false || (data[2].qvalue <= 168))) { + result[0] += -280.5682849399643; + } else { + result[0] += 363.49964354819656; + } + } else { + if (LIKELY(false || (data[2].qvalue <= 94))) { + result[0] += 287.00161641580763; + } else { + result[0] += -27.192437609622456; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (UNLIKELY(false || (data[4].qvalue <= 102))) { + result[0] += 4.886466438190986; + } else { + result[0] += -273.6325616457949; + } + } else { + result[0] += 83.09343404390128; + } + } + } + if (LIKELY(false || (data[0].qvalue <= 466))) { + if (LIKELY(false || (data[6].qvalue <= 168))) { + if (LIKELY(false || (data[0].qvalue <= 426))) { + if (UNLIKELY(false || (data[9].qvalue <= 42))) { + if (LIKELY(false || (data[9].qvalue <= 28))) { + if (LIKELY(false || (data[9].qvalue <= 26))) { + result[0] += -12.741637583507675; + } else { + result[0] += 237.9588004111412; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 22))) { + result[0] += -668.1312059622767; + } else { + result[0] += -74.96557902347062; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 96))) { + if (LIKELY(false || (data[1].qvalue <= 92))) { + result[0] += 1.2337343498563946; + } else { + result[0] += -70.38800250372651; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 98))) { + result[0] += 57.91224145436725; + } else { + result[0] += -291.61695102438495; + } + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 46))) { + if (LIKELY(false || (data[3].qvalue <= 138))) { + if (LIKELY(false || (data[2].qvalue <= 78))) { + result[0] += -16.243170204026153; + } else { + result[0] += 316.91551138966975; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 122))) { + result[0] += -653.2681828545267; + } else { + result[0] += -140.38689952692195; + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 116))) { + if (UNLIKELY(false || (data[9].qvalue <= 76))) { + result[0] += 227.09410027903155; + } else { + result[0] += 41.580049580383246; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 112))) { + result[0] += 421.02462529251864; + } else { + result[0] += -29.85412788966988; + } + } + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 84))) { + if (LIKELY(false || (data[0].qvalue <= 462))) { + if (UNLIKELY(false || (data[0].qvalue <= 430))) { + result[0] += -40.88170364268475; + } else { + if (LIKELY(false || (data[0].qvalue <= 458))) { + result[0] += -438.26722679955304; + } else { + result[0] += -153.8759027518579; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 170))) { + result[0] += 596.3699073028564; + } else { + result[0] += -65.1138629905028; + } + } + } else { + if (LIKELY(false || (data[2].qvalue <= 170))) { + if (LIKELY(false || (data[0].qvalue <= 450))) { + if (UNLIKELY(false || (data[8].qvalue <= 96))) { + result[0] += -174.18902213573074; + } else { + result[0] += 92.92329912293484; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 140))) { + result[0] += 1029.8844129302536; + } else { + result[0] += 173.42569967619715; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 398))) { + result[0] += 117.61714884981923; + } else { + result[0] += -165.11066227116845; + } + } + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 132))) { + if (UNLIKELY(false || (data[3].qvalue <= 140))) { + if (LIKELY(false || (data[8].qvalue <= 152))) { + result[0] += 275.79848862506384; + } else { + result[0] += -208.23065829261407; + } + } else { + result[0] += 44.68615918739647; + } + } else { + result[0] += -104.44556106745557; + } + } + if (LIKELY(false || (data[0].qvalue <= 452))) { + if (LIKELY(false || (data[7].qvalue <= 178))) { + if (LIKELY(false || (data[2].qvalue <= 212))) { + if (LIKELY(false || (data[7].qvalue <= 166))) { + if (LIKELY(false || (data[6].qvalue <= 168))) { + if (LIKELY(false || (data[6].qvalue <= 166))) { + result[0] += 0.17111650034346793; + } else { + result[0] += 170.19199658576656; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 114))) { + result[0] += -7.762253214369399; + } else { + result[0] += -229.65839448385577; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 154))) { + if (UNLIKELY(false || (data[7].qvalue <= 168))) { + result[0] += -1527.4651782070064; + } else { + result[0] += -32.341146195408825; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 400))) { + result[0] += 53.78926807880971; + } else { + result[0] += 365.35095312342645; + } + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 40))) { + result[0] += -195.8247595287494; + } else { + if (LIKELY(false || (data[0].qvalue <= 446))) { + result[0] += -6.730419002246698; + } else { + result[0] += -485.7584640261381; + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 68))) { + result[0] += -302.41592403705386; + } else { + result[0] += -41.37635294838609; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 146))) { + if (UNLIKELY(false || (data[8].qvalue <= 68))) { + if (LIKELY(false || (data[9].qvalue <= 52))) { + if (LIKELY(false || (data[4].qvalue <= 108))) { + if (LIKELY(false || (data[0].qvalue <= 462))) { + result[0] += 105.6696751827489; + } else { + result[0] += 422.3152713156167; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 154))) { + result[0] += -338.9649649852936; + } else { + result[0] += 94.81444748000506; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 472))) { + if (UNLIKELY(false || (data[2].qvalue <= 32))) { + result[0] += 307.59982556152346; + } else { + result[0] += -2059.814399773849; + } + } else { + result[0] += -114.64047998985878; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 120))) { + if (LIKELY(false || (data[0].qvalue <= 468))) { + if (UNLIKELY(false || (data[6].qvalue <= 56))) { + result[0] += -105.53208465517486; + } else { + result[0] += 242.79092160164484; + } + } else { + result[0] += 794.8344802547236; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 462))) { + result[0] += 211.67574313790456; + } else { + result[0] += -1052.0226856718868; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 470))) { + if (LIKELY(false || (data[2].qvalue <= 220))) { + if (LIKELY(false || (data[5].qvalue <= 122))) { + if (UNLIKELY(false || (data[8].qvalue <= 14))) { + result[0] += 607.7257898100032; + } else { + result[0] += 31.625663758552975; + } + } else { + result[0] += -165.26478050781773; + } + } else { + result[0] += -212.34245839321358; + } + } else { + if (LIKELY(false || (data[7].qvalue <= 188))) { + result[0] += 195.00720541235728; + } else { + result[0] += -16.708118864398536; + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 442))) { + if (LIKELY(false || (data[1].qvalue <= 148))) { + if (LIKELY(false || (data[8].qvalue <= 154))) { + if (UNLIKELY(false || (data[0].qvalue <= 54))) { + result[0] += -25.89138246104899; + } else { + if (LIKELY(false || (data[5].qvalue <= 84))) { + if (LIKELY(false || (data[8].qvalue <= 116))) { + result[0] += 6.395755334851975; + } else { + result[0] += -41.8874025902268; + } + } else { + if (LIKELY(false || (data[10].qvalue <= 136))) { + result[0] += 43.17739670120483; + } else { + result[0] += -113.04299411317503; + } + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 342))) { + result[0] += -9.406748281219956; + } else { + result[0] += -151.8391684094208; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 102))) { + if (LIKELY(false || (data[0].qvalue <= 308))) { + result[0] += 68.15272951691435; + } else { + result[0] += 653.7243028200384; + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 118))) { + if (UNLIKELY(false || (data[0].qvalue <= 360))) { + result[0] += 4.132956299238849; + } else { + if (LIKELY(false || (data[2].qvalue <= 140))) { + result[0] += -391.93226074781654; + } else { + result[0] += -64.14020916590836; + } + } + } else { + result[0] += -42.79033951970415; + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 140))) { + if (UNLIKELY(false || (data[3].qvalue <= 16))) { + result[0] += 673.2372143809691; + } else { + if (LIKELY(false || (data[8].qvalue <= 72))) { + if (UNLIKELY(false || (data[0].qvalue <= 456))) { + if (LIKELY(false || (data[2].qvalue <= 68))) { + result[0] += 72.78571585943284; + } else { + result[0] += -372.52419624016306; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 84))) { + result[0] += -470.8106385006224; + } else { + result[0] += 129.07388124253447; + } + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 448))) { + if (UNLIKELY(false || (data[3].qvalue <= 106))) { + result[0] += -167.83421517433203; + } else { + result[0] += 120.89570583762936; + } + } else { + if (LIKELY(false || (data[1].qvalue <= 144))) { + result[0] += 166.50278950598; + } else { + result[0] += 479.51545503859353; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + if (LIKELY(false || (data[2].qvalue <= 216))) { + if (LIKELY(false || (data[8].qvalue <= 102))) { + if (LIKELY(false || (data[4].qvalue <= 136))) { + result[0] += -85.88265694904688; + } else { + result[0] += 158.08464637061354; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 160))) { + result[0] += 566.823827422359; + } else { + result[0] += 36.90095759323719; + } + } + } else { + result[0] += -221.08459607201044; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 162))) { + result[0] += 279.3051543277019; + } else { + if (UNLIKELY(false || (data[4].qvalue <= 88))) { + result[0] += 225.3999401051908; + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + result[0] += -146.03870534454174; + } else { + result[0] += 135.266458595812; + } + } + } + } + } + } + if (UNLIKELY(false || (data[7].qvalue <= 10))) { + if (LIKELY(false || (data[3].qvalue <= 54))) { + if (LIKELY(false || (data[8].qvalue <= 56))) { + if (LIKELY(false || (data[5].qvalue <= 30))) { + if (UNLIKELY(false || (data[4].qvalue <= 2))) { + result[0] += 95.33481345166494; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 36))) { + result[0] += -51.475241127929905; + } else { + result[0] += 45.54996053721217; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 18))) { + result[0] += 193.47951816282492; + } else { + result[0] += 41.14647330634432; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 8))) { + result[0] += -256.5011877316934; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 44))) { + result[0] += -78.031723920287; + } else { + if (UNLIKELY(false || (data[10].qvalue <= 58))) { + result[0] += -70.18547029244044; + } else { + result[0] += 58.299178728803355; + } + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 56))) { + result[0] += 377.13982577866017; + } else { + result[0] += 125.3708679348752; + } + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 22))) { + if (LIKELY(false || (data[6].qvalue <= 20))) { + if (LIKELY(false || (data[3].qvalue <= 48))) { + if (LIKELY(false || (data[5].qvalue <= 38))) { + if (LIKELY(false || (data[2].qvalue <= 122))) { + result[0] += -27.262907617303707; + } else { + result[0] += -296.1225155241601; + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 94))) { + result[0] += -744.6855172554128; + } else { + result[0] += -295.5306383117119; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 74))) { + result[0] += 131.65876610363532; + } else { + result[0] += 275.9221477857152; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 54))) { + result[0] += -266.18327484029714; + } else { + result[0] += -500.5391151498484; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 14))) { + if (LIKELY(false || (data[4].qvalue <= 34))) { + if (LIKELY(false || (data[8].qvalue <= 86))) { + result[0] += -10.58236111787111; + } else { + if (UNLIKELY(false || (data[5].qvalue <= 8))) { + result[0] += -39.932805444944165; + } else { + result[0] += 159.2150451080038; + } + } + } else { + result[0] += 204.44418412478637; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 30))) { + if (LIKELY(false || (data[8].qvalue <= 12))) { + if (UNLIKELY(false || (data[1].qvalue <= 62))) { + result[0] += 125.99859295088723; + } else { + result[0] += -10.15551076665495; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 20))) { + result[0] += -190.22917265383091; + } else { + result[0] += -33.04391029665733; + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 98))) { + if (UNLIKELY(false || (data[7].qvalue <= 32))) { + result[0] += 63.784678651859224; + } else { + result[0] += 4.687841489245975; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 70))) { + result[0] += -193.67925649583466; + } else { + result[0] += -11.97318656822869; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 410))) { + if (UNLIKELY(false || (data[9].qvalue <= 54))) { + if (UNLIKELY(false || (data[4].qvalue <= 74))) { + if (LIKELY(false || (data[0].qvalue <= 298))) { + result[0] += -37.83989902430271; + } else { + result[0] += -246.41304263463073; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 36))) { + if (UNLIKELY(false || (data[0].qvalue <= 196))) { + result[0] += -24.698954353394658; + } else { + result[0] += 269.8421976574775; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 82))) { + if (LIKELY(false || (data[4].qvalue <= 128))) { + result[0] += 121.42816960444512; + } else { + result[0] += -204.12026096629276; + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 86))) { + result[0] += -150.30284718707338; + } else { + result[0] += -9.526517783514938; + } + } + } + } + } else { + result[0] += 0.6374046148668437; + } + } else { + if (UNLIKELY(false || (data[9].qvalue <= 10))) { + if (LIKELY(false || (data[0].qvalue <= 456))) { + if (UNLIKELY(false || (data[3].qvalue <= 160))) { + if (UNLIKELY(false || (data[2].qvalue <= 56))) { + result[0] += -844.9505630005908; + } else { + result[0] += -266.4489226017028; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 448))) { + result[0] += -59.61765022395716; + } else { + if (UNLIKELY(false || (data[9].qvalue <= 0))) { + result[0] += -422.1597338477869; + } else { + result[0] += 307.5185329018873; + } + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 84))) { + result[0] += 259.874337152467; + } else { + result[0] += -0.14677676347402133; + } + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 76))) { + if (LIKELY(false || (data[2].qvalue <= 60))) { + if (UNLIKELY(false || (data[4].qvalue <= 82))) { + if (LIKELY(false || (data[6].qvalue <= 66))) { + result[0] += 6.901818537027612; + } else { + result[0] += 346.73948874314306; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 0))) { + result[0] += 767.3329148187853; + } else { + result[0] += -55.71428668797978; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 462))) { + if (UNLIKELY(false || (data[8].qvalue <= 62))) { + result[0] += -1186.4827177702393; + } else { + result[0] += -248.10230638115846; + } + } else { + result[0] += 317.2999956644618; + } + } + } else { + if (LIKELY(false || (data[8].qvalue <= 128))) { + if (UNLIKELY(false || (data[10].qvalue <= 46))) { + if (UNLIKELY(false || (data[8].qvalue <= 6))) { + result[0] += 424.73738367278276; + } else { + result[0] += -69.29711136275134; + } + } else { + if (LIKELY(false || (data[9].qvalue <= 38))) { + result[0] += 93.41031827005968; + } else { + result[0] += 256.43713558484785; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 86))) { + if (LIKELY(false || (data[0].qvalue <= 456))) { + result[0] += -194.80499023272242; + } else { + result[0] += 84.16720477646794; + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 130))) { + result[0] += 1117.9205291606104; + } else { + result[0] += 32.52858858984742; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 442))) { + if (LIKELY(false || (data[6].qvalue <= 146))) { + if (LIKELY(false || (data[0].qvalue <= 360))) { + if (UNLIKELY(false || (data[4].qvalue <= 38))) { + if (UNLIKELY(false || (data[6].qvalue <= 24))) { + if (UNLIKELY(false || (data[9].qvalue <= 94))) { + result[0] += -509.2763123794946; + } else { + result[0] += -10.28309254566162; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 46))) { + result[0] += 65.60802974294101; + } else { + result[0] += -11.172650598142237; + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 40))) { + if (UNLIKELY(false || (data[0].qvalue <= 170))) { + result[0] += -379.1246497217576; + } else { + result[0] += -880.7154812437998; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 38))) { + result[0] += 33.59073760216112; + } else { + result[0] += -19.2301464816471; + } + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 198))) { + if (LIKELY(false || (data[2].qvalue <= 186))) { + if (LIKELY(false || (data[3].qvalue <= 144))) { + result[0] += 33.69989668265042; + } else { + result[0] += 208.11777026466976; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 126))) { + result[0] += -4.6241213678677076; + } else { + result[0] += -258.29323587381907; + } + } + } else { + result[0] += -240.03769405637382; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 94))) { + if (UNLIKELY(false || (data[4].qvalue <= 20))) { + if (LIKELY(false || (data[0].qvalue <= 400))) { + result[0] += -1.3019828319392528; + } else { + result[0] += 595.7005340010636; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 258))) { + result[0] += -9.190887637624767; + } else { + result[0] += -285.64672594943085; + } + } + } else { + if (LIKELY(false || (data[7].qvalue <= 154))) { + if (UNLIKELY(false || (data[0].qvalue <= 344))) { + result[0] += 40.545254733724846; + } else { + if (UNLIKELY(false || (data[0].qvalue <= 416))) { + result[0] += -233.93172162375063; + } else { + result[0] += -22.286785557997515; + } + } + } else { + result[0] += 70.10413972244889; + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 140))) { + if (LIKELY(false || (data[4].qvalue <= 114))) { + if (LIKELY(false || (data[4].qvalue <= 112))) { + if (UNLIKELY(false || (data[3].qvalue <= 16))) { + result[0] += 634.2191665019159; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 78))) { + result[0] += 6.665393953447264; + } else { + result[0] += 149.39801863389712; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 154))) { + result[0] += 1226.4539406622023; + } else { + result[0] += 294.9277796445255; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 158))) { + result[0] += 235.1520118587282; + } else { + result[0] += -166.61985769647302; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + if (LIKELY(false || (data[2].qvalue <= 216))) { + result[0] += 5.169623713259927; + } else { + result[0] += -199.41738547469845; + } + } else { + result[0] += 71.9476904143754; + } + } + } + if (UNLIKELY(false || (data[0].qvalue <= 48))) { + if (UNLIKELY(false || (data[0].qvalue <= 0))) { + result[0] += -78.00407393572347; + } else { + if (LIKELY(false || (data[2].qvalue <= 202))) { + if (LIKELY(false || (data[3].qvalue <= 104))) { + if (LIKELY(false || (data[4].qvalue <= 90))) { + if (UNLIKELY(false || (data[2].qvalue <= 0))) { + result[0] += -70.6888999910638; + } else { + result[0] += -10.627670242516414; + } + } else { + if (LIKELY(false || (data[10].qvalue <= 66))) { + result[0] += -107.69047239009149; + } else { + result[0] += -13.414907874287316; + } + } + } else { + if (LIKELY(false || (data[1].qvalue <= 138))) { + if (UNLIKELY(false || (data[10].qvalue <= 12))) { + result[0] += 69.98431682634853; + } else { + result[0] += -59.43734140381587; + } + } else { + result[0] += 78.30655993653257; + } + } + } else { + result[0] += 82.01495795861331; + } + } + } else { + if (UNLIKELY(false || (data[4].qvalue <= 0))) { + if (LIKELY(false || (data[2].qvalue <= 128))) { + if (LIKELY(false || (data[0].qvalue <= 226))) { + if (LIKELY(false || (data[2].qvalue <= 58))) { + if (UNLIKELY(false || (data[2].qvalue <= 24))) { + result[0] += 150.52913267303913; + } else { + result[0] += 6.758849638068968; + } + } else { + result[0] += 219.10434569977826; + } + } else { + result[0] += 274.1316917335302; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 332))) { + result[0] += -84.8367008241603; + } else { + result[0] += 317.87177416169243; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 14))) { + if (LIKELY(false || (data[0].qvalue <= 406))) { + if (LIKELY(false || (data[4].qvalue <= 28))) { + if (UNLIKELY(false || (data[9].qvalue <= 108))) { + result[0] += 152.21239710120096; + } else { + result[0] += -49.218505477191925; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 30))) { + result[0] += -369.09166571523195; + } else { + result[0] += -59.62264580017301; + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 122))) { + if (UNLIKELY(false || (data[2].qvalue <= 72))) { + result[0] += 225.48342290509692; + } else { + result[0] += 454.8914585206501; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 420))) { + result[0] += -316.42278833229403; + } else { + result[0] += 89.88278332332641; + } + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 22))) { + if (LIKELY(false || (data[3].qvalue <= 18))) { + if (LIKELY(false || (data[4].qvalue <= 26))) { + result[0] += 83.70108736546864; + } else { + result[0] += -47.03238163468578; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 172))) { + result[0] += 64.00534235112683; + } else { + result[0] += 241.06317309770884; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 28))) { + if (UNLIKELY(false || (data[9].qvalue <= 108))) { + result[0] += 41.85255742051009; + } else { + result[0] += -122.59387579963109; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 12))) { + result[0] += 72.23509636314206; + } else { + result[0] += 2.3792085384878088; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 460))) { + if (LIKELY(false || (data[6].qvalue <= 168))) { + if (LIKELY(false || (data[0].qvalue <= 432))) { + if (LIKELY(false || (data[1].qvalue <= 124))) { + if (LIKELY(false || (data[5].qvalue <= 62))) { + if (LIKELY(false || (data[8].qvalue <= 110))) { + result[0] += -1.818322290568549; + } else { + result[0] += -86.51820960810343; + } + } else { + if (UNLIKELY(false || (data[6].qvalue <= 48))) { + result[0] += 122.64593189161003; + } else { + result[0] += 8.90299476140962; + } + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 64))) { + if (LIKELY(false || (data[8].qvalue <= 64))) { + result[0] += 27.811889151480592; + } else { + result[0] += 564.1322969672179; + } + } else { + if (UNLIKELY(false || (data[10].qvalue <= 66))) { + result[0] += -961.542648801726; + } else { + result[0] += -67.5677271302586; + } + } + } + } else { + if (LIKELY(false || (data[10].qvalue <= 148))) { + if (UNLIKELY(false || (data[1].qvalue <= 0))) { + result[0] += -271.4318221979432; + } else { + if (UNLIKELY(false || (data[5].qvalue <= 16))) { + result[0] += 688.4114417873475; + } else { + result[0] += 58.42035899984279; + } + } + } else { + result[0] += -307.4296972195184; + } + } + } else { + if (UNLIKELY(false || (data[8].qvalue <= 78))) { + if (UNLIKELY(false || (data[0].qvalue <= 434))) { + result[0] += -47.84229037947061; + } else { + result[0] += -341.83457990151675; + } + } else { + result[0] += -24.69473744066359; + } + } + } else { + if (LIKELY(false || (data[6].qvalue <= 176))) { + if (UNLIKELY(false || (data[6].qvalue <= 156))) { + if (LIKELY(false || (data[4].qvalue <= 108))) { + if (LIKELY(false || (data[1].qvalue <= 90))) { + if (LIKELY(false || (data[0].qvalue <= 468))) { + result[0] += -237.2028222385908; + } else { + result[0] += 235.23343926289132; + } + } else { + result[0] += 275.68252452796696; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 470))) { + if (UNLIKELY(false || (data[9].qvalue <= 6))) { + result[0] += 276.655894681045; + } else { + result[0] += -1138.9810647848005; + } + } else { + result[0] += -24.815816631740525; + } + } + } else { + if (LIKELY(false || (data[4].qvalue <= 124))) { + if (UNLIKELY(false || (data[8].qvalue <= 94))) { + result[0] += 468.636846801675; + } else { + result[0] += 173.66365185212408; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 464))) { + result[0] += -156.75158265385107; + } else { + if (UNLIKELY(false || (data[5].qvalue <= 72))) { + result[0] += 418.66415661473707; + } else { + result[0] += 16.60197418193807; + } + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 472))) { + if (UNLIKELY(false || (data[4].qvalue <= 102))) { + if (LIKELY(false || (data[3].qvalue <= 178))) { + result[0] += 112.29216412676556; + } else { + result[0] += -589.6749782826543; + } + } else { + if (LIKELY(false || (data[6].qvalue <= 186))) { + result[0] += -126.14743791646418; + } else { + result[0] += -697.0503549298137; + } + } + } else { + result[0] += 130.01903028897132; + } + } + } + if (LIKELY(false || (data[0].qvalue <= 250))) { + if (LIKELY(false || (data[2].qvalue <= 96))) { + if (LIKELY(false || (data[10].qvalue <= 110))) { + result[0] += -15.209203725866741; + } else { + result[0] += -151.01702601859964; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 116))) { + result[0] += 33.169088504059346; + } else { + if (UNLIKELY(false || (data[2].qvalue <= 136))) { + if (LIKELY(false || (data[2].qvalue <= 134))) { + if (LIKELY(false || (data[9].qvalue <= 132))) { + result[0] += -0.5460734226255192; + } else { + result[0] += -166.18365097405575; + } + } else { + result[0] += -630.4456930959705; + } + } else { + result[0] += 9.610897218995794; + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 12))) { + if (UNLIKELY(false || (data[5].qvalue <= 4))) { + if (UNLIKELY(false || (data[7].qvalue <= 4))) { + result[0] += 292.5131264640451; + } else { + if (LIKELY(false || (data[0].qvalue <= 346))) { + if (LIKELY(false || (data[2].qvalue <= 46))) { + result[0] += -90.96452643248735; + } else { + result[0] += -554.2646443755457; + } + } else { + result[0] += 78.62976969599482; + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 316))) { + if (LIKELY(false || (data[3].qvalue <= 54))) { + if (UNLIKELY(false || (data[9].qvalue <= 102))) { + result[0] += 178.17593860823325; + } else { + result[0] += 25.683590998151445; + } + } else { + result[0] += 478.77786108066925; + } + } else { + result[0] += 181.52015147560186; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 122))) { + if (UNLIKELY(false || (data[6].qvalue <= 54))) { + if (UNLIKELY(false || (data[3].qvalue <= 10))) { + if (LIKELY(false || (data[4].qvalue <= 46))) { + result[0] += 392.13693925483733; + } else { + result[0] += 93.50921667351855; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 26))) { + result[0] += -171.2455308856315; + } else { + result[0] += 65.00191321740634; + } + } + } else { + if (UNLIKELY(false || (data[5].qvalue <= 54))) { + if (LIKELY(false || (data[3].qvalue <= 60))) { + result[0] += -29.670526192638437; + } else { + result[0] += -250.61118646403057; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 72))) { + result[0] += 139.50112391138077; + } else { + result[0] += 5.183830796614544; + } + } + } + } else { + if (LIKELY(false || (data[0].qvalue <= 410))) { + if (UNLIKELY(false || (data[6].qvalue <= 6))) { + if (LIKELY(false || (data[0].qvalue <= 376))) { + result[0] += 72.4511482444548; + } else { + result[0] += 480.9499826432033; + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 50))) { + result[0] += -339.82229606896135; + } else { + result[0] += -34.16011839278788; + } + } + } else { + if (UNLIKELY(false || (data[1].qvalue <= 52))) { + if (LIKELY(false || (data[4].qvalue <= 22))) { + result[0] += 110.97363027456811; + } else { + result[0] += 409.03280051924963; + } + } else { + if (LIKELY(false || (data[0].qvalue <= 468))) { + result[0] += -25.656918184151955; + } else { + result[0] += 673.645954177365; + } + } + } + } + } + } + if (LIKELY(false || (data[0].qvalue <= 338))) { + if (LIKELY(false || (data[2].qvalue <= 96))) { + if (LIKELY(false || (data[10].qvalue <= 110))) { + if (LIKELY(false || (data[2].qvalue <= 86))) { + if (LIKELY(false || (data[2].qvalue <= 82))) { + if (LIKELY(false || (data[10].qvalue <= 74))) { + result[0] += -0.803059421195404; + } else { + result[0] += -41.251005891223926; + } + } else { + result[0] += 143.4559149834235; + } + } else { + if (LIKELY(false || (data[3].qvalue <= 116))) { + if (UNLIKELY(false || (data[7].qvalue <= 26))) { + result[0] += 2.624252580548391; + } else { + result[0] += -227.38952669158417; + } + } else { + result[0] += 45.837014796557646; + } + } + } else { + result[0] += -195.38067959274719; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 116))) { + if (LIKELY(false || (data[0].qvalue <= 176))) { + result[0] += 12.646781663778135; + } else { + if (UNLIKELY(false || (data[8].qvalue <= 50))) { + result[0] += 217.37905842024804; + } else { + if (LIKELY(false || (data[9].qvalue <= 100))) { + result[0] += 18.490879448389602; + } else { + result[0] += 157.2895805126459; + } + } + } + } else { + if (LIKELY(false || (data[5].qvalue <= 88))) { + if (LIKELY(false || (data[10].qvalue <= 130))) { + if (LIKELY(false || (data[10].qvalue <= 124))) { + result[0] += -25.376404193377994; + } else { + result[0] += -234.0117708294; + } + } else { + result[0] += 44.70662468521388; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 130))) { + if (UNLIKELY(false || (data[0].qvalue <= 146))) { + result[0] += 12.86322888112687; + } else { + result[0] += 200.2312601015275; + } + } else { + if (UNLIKELY(false || (data[2].qvalue <= 162))) { + result[0] += -49.82927755893318; + } else { + result[0] += 34.59858546002059; + } + } + } + } + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 64))) { + if (LIKELY(false || (data[7].qvalue <= 60))) { + result[0] += 44.53611652626999; + } else { + if (UNLIKELY(false || (data[3].qvalue <= 100))) { + result[0] += 378.92912612636616; + } else { + result[0] += 114.52994706164273; + } + } + } else { + if (UNLIKELY(false || (data[3].qvalue <= 12))) { + if (UNLIKELY(false || (data[0].qvalue <= 416))) { + if (LIKELY(false || (data[10].qvalue <= 92))) { + result[0] += -628.9086683084666; + } else { + result[0] += 107.65691728510829; + } + } else { + if (UNLIKELY(false || (data[7].qvalue <= 148))) { + if (LIKELY(false || (data[0].qvalue <= 432))) { + result[0] += 30.295081044275346; + } else { + result[0] += 409.9041492892673; + } + } else { + result[0] += -143.73134957553725; + } + } + } else { + if (LIKELY(false || (data[9].qvalue <= 146))) { + if (LIKELY(false || (data[9].qvalue <= 138))) { + if (UNLIKELY(false || (data[6].qvalue <= 54))) { + result[0] += 149.66087572300066; + } else { + result[0] += -2.415057229240939; + } + } else { + if (UNLIKELY(false || (data[0].qvalue <= 418))) { + result[0] += -377.3930807574788; + } else { + result[0] += 56.193799271376434; + } + } + } else { + result[0] += 545.2850103362496; + } + } + } + } + + // Apply base_scores + result[0] += 0; + + // Apply postprocessor + if (!pred_margin) { postprocess(result); } +} + +void fj_predictor::postprocess(double* result) +{ + // Do nothing +} + +// Feature names array +const char* fj_predictor::feature_names[fj_predictor::NUM_FEATURES] = {"time", + "initial_violation_count", + "max_nnz_per_row", + "n_binary_vars", + "n_constraints", + "n_integer_vars", + "n_variables", + "nnz", + "nnz_stddev", + "sparsity", + "unbalancedness", + "uses_load_balancing"}; diff --git a/cpp/src/utilities/models/fj_predictor/quantize.cpp b/cpp/src/utilities/models/fj_predictor/quantize.cpp new file mode 100644 index 0000000000..4bd50efafc --- /dev/null +++ b/cpp/src/utilities/models/fj_predictor/quantize.cpp @@ -0,0 +1,1180 @@ + +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "header.h" + +static const double threshold[] = { + 0.054247050000000012, + 0.077229550000000022, + 0.099138400000000002, + 0.12088200000000002, + 0.14051650000000002, + 0.15852850000000004, + 0.17679700000000004, + 0.19352650000000002, + 0.21021800000000002, + 0.22558550000000002, + 0.24112700000000004, + 0.25703600000000004, + 0.27504900000000004, + 0.29268150000000009, + 0.31128750000000005, + 0.33090700000000001, + 0.34973950000000004, + 0.36837650000000005, + 0.38755100000000003, + 0.40692750000000005, + 0.42464700000000005, + 0.44212900000000005, + 0.46115050000000007, + 0.47875750000000006, + 0.49512500000000004, + 0.51183000000000012, + 0.52816850000000015, + 0.54488850000000011, + 0.56229750000000012, + 0.57881900000000008, + 0.59550200000000009, + 0.61214950000000012, + 0.62885000000000013, + 0.64696700000000018, + 0.66350950000000009, + 0.67997150000000006, + 0.69847550000000014, + 0.7159675000000002, + 0.73463250000000013, + 0.7525940000000001, + 0.77200500000000016, + 0.79034150000000014, + 0.80839950000000005, + 0.82770850000000007, + 0.8455950000000001, + 0.86400500000000013, + 0.88264050000000005, + 0.9004295000000001, + 0.91857200000000006, + 0.93668050000000014, + 0.95404050000000018, + 0.97391950000000016, + 0.99200750000000004, + 1.0099000000000002, + 1.0289150000000002, + 1.0465600000000002, + 1.0646550000000004, + 1.08342, + 1.1014450000000002, + 1.1397000000000002, + 1.1587800000000004, + 1.1770050000000001, + 1.1970650000000003, + 1.2149600000000003, + 1.2334100000000003, + 1.2509550000000003, + 1.2698400000000001, + 1.2892250000000003, + 1.3068250000000001, + 1.3257700000000001, + 1.3439900000000002, + 1.3627650000000002, + 1.3818950000000003, + 1.3997500000000003, + 1.4181450000000002, + 1.4367350000000003, + 1.4547850000000002, + 1.472415, + 1.4914700000000003, + 1.5101100000000003, + 1.5280200000000004, + 1.5462100000000001, + 1.5645300000000002, + 1.5825350000000002, + 1.6018750000000004, + 1.6398350000000002, + 1.6586050000000003, + 1.6957200000000003, + 1.7142950000000001, + 1.7329100000000002, + 1.7519050000000003, + 1.7889300000000004, + 1.8084400000000003, + 1.84535, + 1.8819950000000003, + 1.9005400000000001, + 1.9191650000000002, + 1.9559750000000002, + 1.9729050000000001, + 1.9908450000000002, + 2.0093400000000003, + 2.0269500000000007, + 2.0634850000000005, + 2.0812900000000005, + 2.0995600000000008, + 2.1174250000000003, + 2.1351400000000003, + 2.1524100000000006, + 2.1703750000000004, + 2.1880300000000004, + 2.2057200000000008, + 2.2244050000000004, + 2.2413600000000007, + 2.2589850000000005, + 2.2762350000000002, + 2.2941550000000004, + 2.3126850000000005, + 2.3300800000000002, + 2.3486300000000004, + 2.3668800000000005, + 2.3849800000000001, + 2.4039000000000006, + 2.4207550000000002, + 2.4380750000000004, + 2.4563950000000006, + 2.4738150000000005, + 2.4917700000000003, + 2.5087150000000005, + 2.5256750000000001, + 2.541865, + 2.5750850000000001, + 2.5926700000000005, + 2.6103800000000006, + 2.6462700000000008, + 2.6646600000000005, + 2.6821750000000004, + 2.6995250000000008, + 2.7161250000000003, + 2.7339100000000003, + 2.7519550000000002, + 2.7684900000000003, + 2.7861950000000006, + 2.8210050000000004, + 2.8391800000000003, + 2.8555100000000002, + 2.8724000000000003, + 2.8905950000000007, + 2.9086600000000007, + 2.9273950000000002, + 2.9445150000000004, + 2.9631150000000006, + 2.9813800000000006, + 3.0161400000000005, + 3.0351250000000003, + 3.0535200000000002, + 3.0715450000000004, + 3.0891000000000006, + 3.1077850000000002, + 3.1259600000000005, + 3.1445250000000002, + 3.1638600000000001, + 3.2029950000000005, + 3.2231000000000005, + 3.2427850000000005, + 3.2622000000000004, + 3.2821500000000001, + 3.3022150000000008, + 3.3210900000000003, + 3.3419400000000006, + 3.3625650000000005, + 3.3821600000000003, + 3.4255600000000004, + 3.4476550000000006, + 3.4694350000000003, + 3.4931250000000005, + 3.5172500000000002, + 3.5405750000000005, + 3.5633350000000004, + 3.5881150000000006, + 3.6123700000000007, + 3.6388050000000005, + 3.6645250000000007, + 3.6929500000000002, + 3.7206100000000002, + 3.7487850000000003, + 3.8103150000000006, + 3.8410400000000005, + 3.8753900000000003, + 3.9096100000000003, + 3.9434900000000002, + 3.9779100000000001, + 4.0157400000000001, + 4.0538750000000006, + 4.0947550000000001, + 4.134875000000001, + 4.1789850000000008, + 4.2277500000000012, + 4.2804200000000003, + 4.3315450000000011, + 4.3857550000000005, + 4.4510850000000008, + 4.5179800000000006, + 4.5945550000000006, + 4.6744550000000009, + 4.7680650000000009, + 4.8691100000000009, + 4.9748750000000008, + 5.1040400000000004, + 5.2357750000000012, + 5.3817050000000011, + 5.5487400000000013, + 5.7226800000000013, + 5.9013650000000011, + 6.0933350000000006, + 6.2959750000000012, + 6.5019050000000016, + 6.7376150000000008, + 6.9850100000000008, + 7.2452850000000009, + 7.5387250000000003, + 7.8497500000000011, + 8.206685000000002, + 8.5881250000000016, + 9.0114350000000005, + 9.5016150000000028, + 10.006050000000002, + 10.534950000000002, + 11.213450000000003, + 12.138300000000003, + 13.343550000000002, + 14.829750000000002, + 16.520350000000004, + 19.149450000000005, + 22.676800000000004, + 27.456400000000002, + 34.351750000000003, + 47.268800000000006, + 1.0000000180025095e-35, + 1.5000000000000002, + 2.5000000000000004, + 3.5000000000000004, + 4.5000000000000009, + 7.5000000000000009, + 9.5000000000000018, + 13.500000000000002, + 20.500000000000004, + 27.500000000000004, + 32.500000000000007, + 35.500000000000007, + 73.500000000000014, + 145.50000000000003, + 210.50000000000003, + 256.50000000000006, + 267.50000000000006, + 271.50000000000006, + 338.50000000000006, + 416.50000000000006, + 627.50000000000011, + 646.50000000000011, + 654.50000000000011, + 728.50000000000011, + 785.50000000000011, + 917.50000000000011, + 943.50000000000011, + 1105.5000000000002, + 1168.5000000000002, + 1414.5000000000002, + 1511.5000000000002, + 1766.5000000000002, + 2142.5000000000005, + 2424.5000000000005, + 2476.5000000000005, + 2824.5000000000005, + 3182.5000000000005, + 3233.5000000000005, + 3334.5000000000005, + 3428.5000000000005, + 3734.5000000000005, + 3862.0000000000005, + 4068.5000000000005, + 4221.5000000000009, + 4296.5000000000009, + 4701.5000000000009, + 4781.5000000000009, + 6990.5000000000009, + 7213.5000000000009, + 7449.5000000000009, + 8540.5000000000018, + 9118.5000000000018, + 9521.5000000000018, + 10539.500000000002, + 11717.500000000002, + 12095.500000000002, + 13029.500000000002, + 14400.000000000002, + 14615.500000000002, + 15096.500000000002, + 15817.500000000002, + 16830.500000000004, + 17346.500000000004, + 20007.000000000004, + 20988.500000000004, + 21532.500000000004, + 23313.500000000004, + 24562.500000000004, + 30402.000000000004, + 35983.500000000007, + 40040.000000000007, + 41987.500000000007, + 43540.000000000007, + 44215.500000000007, + 45601.500000000007, + 47232.500000000007, + 54221.000000000007, + 68471.500000000015, + 74712.500000000015, + 89913.000000000015, + 125519.00000000001, + 149049.00000000003, + 230573.00000000003, + 3.5000000000000004, + 4.5000000000000009, + 5.5000000000000009, + 6.5000000000000009, + 7.5000000000000009, + 8.5000000000000018, + 11.500000000000002, + 13.500000000000002, + 14.500000000000002, + 15.500000000000002, + 17.500000000000004, + 24.500000000000004, + 29.500000000000004, + 30.500000000000004, + 31.500000000000004, + 33.500000000000007, + 35.500000000000007, + 37.500000000000007, + 40.500000000000007, + 41.500000000000007, + 42.500000000000007, + 43.500000000000007, + 45.500000000000007, + 49.500000000000007, + 50.500000000000007, + 55.500000000000007, + 59.500000000000007, + 60.500000000000007, + 65.500000000000014, + 68.000000000000014, + 71.000000000000014, + 72.500000000000014, + 73.500000000000014, + 74.500000000000014, + 80.500000000000014, + 90.000000000000014, + 96.500000000000014, + 98.500000000000014, + 99.500000000000014, + 105.50000000000001, + 108.50000000000001, + 112.50000000000001, + 113.50000000000001, + 116.50000000000001, + 117.50000000000001, + 118.50000000000001, + 121.50000000000001, + 124.50000000000001, + 127.00000000000001, + 156.50000000000003, + 158.50000000000003, + 174.50000000000003, + 182.00000000000003, + 186.50000000000003, + 190.50000000000003, + 193.00000000000003, + 197.50000000000003, + 215.50000000000003, + 230.50000000000003, + 238.50000000000003, + 242.50000000000003, + 274.50000000000006, + 287.50000000000006, + 310.00000000000006, + 317.00000000000006, + 475.50000000000006, + 478.00000000000006, + 501.50000000000006, + 508.00000000000006, + 558.00000000000011, + 586.50000000000011, + 594.50000000000011, + 644.50000000000011, + 665.50000000000011, + 686.00000000000011, + 829.00000000000011, + 933.00000000000011, + 973.00000000000011, + 1000.5000000000001, + 1023.0000000000001, + 1101.5000000000002, + 1594.0000000000002, + 1773.5000000000002, + 1868.0000000000002, + 1935.0000000000002, + 2075.5000000000005, + 2658.5000000000005, + 2842.0000000000005, + 2928.0000000000005, + 3159.0000000000005, + 3844.0000000000005, + 3959.0000000000005, + 5956.0000000000009, + 6744.5000000000009, + 7399.5000000000009, + 8990.0000000000018, + 9523.5000000000018, + 11378.500000000002, + 12174.500000000002, + 15191.500000000002, + 18400.500000000004, + 20392.500000000004, + 21043.500000000004, + 21811.000000000004, + 30397.000000000004, + 30926.500000000004, + 33002.500000000007, + 48554.500000000007, + 56727.500000000007, + 77009.500000000015, + 97457.000000000015, + 117653.00000000001, + 137723.50000000003, + 145615.50000000003, + 1.0000000180025095e-35, + 19.000000000000004, + 31.000000000000004, + 55.500000000000007, + 69.500000000000014, + 70.500000000000014, + 99.000000000000014, + 119.00000000000001, + 176.50000000000003, + 189.50000000000003, + 196.00000000000003, + 201.00000000000003, + 219.00000000000003, + 263.00000000000006, + 290.00000000000006, + 306.00000000000006, + 317.50000000000006, + 361.50000000000006, + 384.00000000000006, + 417.00000000000006, + 448.50000000000006, + 534.50000000000011, + 593.50000000000011, + 622.50000000000011, + 693.50000000000011, + 889.50000000000011, + 927.50000000000011, + 1116.5000000000002, + 1455.5000000000002, + 1498.0000000000002, + 1995.5000000000002, + 2027.5000000000002, + 2105.5000000000005, + 2463.5000000000005, + 2518.5000000000005, + 2595.5000000000005, + 2630.0000000000005, + 3336.0000000000005, + 4222.5000000000009, + 4319.5000000000009, + 4543.5000000000009, + 5196.5000000000009, + 5211.0000000000009, + 5279.0000000000009, + 5647.0000000000009, + 6426.5000000000009, + 6471.5000000000009, + 6781.5000000000009, + 7008.0000000000009, + 7273.0000000000009, + 8219.0000000000018, + 8313.0000000000018, + 8346.0000000000018, + 8890.5000000000018, + 9579.5000000000018, + 10096.000000000002, + 10296.500000000002, + 10412.000000000002, + 10570.000000000002, + 10722.500000000002, + 11284.000000000002, + 11482.000000000002, + 12403.500000000002, + 13743.000000000002, + 16623.500000000004, + 17719.000000000004, + 20045.500000000004, + 22896.000000000004, + 24893.000000000004, + 29297.500000000004, + 32360.500000000004, + 32826.000000000007, + 42917.000000000007, + 45655.500000000007, + 49866.500000000007, + 55546.000000000007, + 57005.000000000007, + 60339.000000000007, + 70968.500000000015, + 77454.500000000015, + 86012.000000000015, + 86932.000000000015, + 88579.000000000015, + 98965.000000000015, + 127333.50000000001, + 169734.00000000003, + 233398.00000000003, + 320960.50000000006, + 341815.50000000006, + 1315285.0000000002, + 2747995.0000000005, + 10.500000000000002, + 16.500000000000004, + 38.000000000000007, + 42.500000000000007, + 61.500000000000007, + 76.500000000000014, + 101.50000000000001, + 300.50000000000006, + 464.00000000000006, + 578.50000000000011, + 640.00000000000011, + 653.50000000000011, + 721.00000000000011, + 849.00000000000011, + 1140.0000000000002, + 1197.0000000000002, + 1359.0000000000002, + 1627.5000000000002, + 1925.0000000000002, + 2298.5000000000005, + 2356.0000000000005, + 2394.5000000000005, + 2529.0000000000005, + 3626.5000000000005, + 4099.0000000000009, + 5157.0000000000009, + 5521.5000000000009, + 7489.0000000000009, + 8797.5000000000018, + 9813.0000000000018, + 11172.500000000002, + 11823.000000000002, + 12490.500000000002, + 12827.000000000002, + 13934.000000000002, + 14718.000000000002, + 14944.500000000002, + 15924.500000000002, + 16641.500000000004, + 17124.000000000004, + 17409.000000000004, + 17805.000000000004, + 18541.500000000004, + 20890.500000000004, + 22641.000000000004, + 23763.500000000004, + 27226.000000000004, + 28849.000000000004, + 34014.000000000007, + 39933.000000000007, + 48730.000000000007, + 55362.000000000007, + 58384.000000000007, + 91637.500000000015, + 97665.000000000015, + 108046.50000000001, + 111482.00000000001, + 127740.00000000001, + 141620.50000000003, + 146171.50000000003, + 164305.50000000003, + 167857.50000000003, + 206722.00000000003, + 215085.50000000003, + 271255.50000000006, + 364916.50000000006, + 376899.50000000006, + 446870.00000000006, + 504658.00000000006, + 534428.50000000012, + 2917540.0000000005, + 8.0000000000000018, + 40.500000000000007, + 69.500000000000014, + 76.000000000000014, + 99.000000000000014, + 102.50000000000001, + 116.50000000000001, + 128.50000000000003, + 147.50000000000003, + 161.00000000000003, + 190.50000000000003, + 201.00000000000003, + 218.00000000000003, + 246.00000000000003, + 301.50000000000006, + 306.50000000000006, + 411.00000000000006, + 448.50000000000006, + 490.50000000000006, + 739.00000000000011, + 786.50000000000011, + 835.00000000000011, + 1435.5000000000002, + 1490.0000000000002, + 1585.0000000000002, + 1947.5000000000002, + 2089.5000000000005, + 2208.0000000000005, + 2262.0000000000005, + 2504.0000000000005, + 2595.5000000000005, + 2694.0000000000005, + 3290.5000000000005, + 3486.0000000000005, + 5199.5000000000009, + 5448.5000000000009, + 7246.0000000000009, + 8305.0000000000018, + 8345.5000000000018, + 8884.0000000000018, + 11484.500000000002, + 12542.000000000002, + 13993.500000000002, + 14321.000000000002, + 14486.500000000002, + 29297.500000000004, + 41026.500000000007, + 45655.500000000007, + 57005.000000000007, + 62415.500000000007, + 69861.000000000015, + 77483.500000000015, + 82860.500000000015, + 86012.000000000015, + 105975.50000000001, + 122661.50000000001, + 135210.50000000003, + 154808.00000000003, + 165244.00000000003, + 195544.50000000003, + 321530.50000000006, + 341815.50000000006, + 14.500000000000002, + 66.500000000000014, + 242.50000000000003, + 265.50000000000006, + 306.50000000000006, + 341.50000000000006, + 518.00000000000011, + 730.50000000000011, + 758.50000000000011, + 896.50000000000011, + 982.50000000000011, + 1007.5000000000001, + 1108.0000000000002, + 1133.5000000000002, + 1178.0000000000002, + 1436.0000000000002, + 1599.5000000000002, + 1916.5000000000002, + 2036.0000000000002, + 2066.5000000000005, + 2456.0000000000005, + 2816.0000000000005, + 2849.0000000000005, + 3190.5000000000005, + 3226.5000000000005, + 3564.0000000000005, + 3726.5000000000005, + 4004.5000000000005, + 4719.5000000000009, + 5175.0000000000009, + 5663.0000000000009, + 5848.0000000000009, + 7347.5000000000009, + 8946.0000000000018, + 10887.500000000002, + 11141.500000000002, + 12386.000000000002, + 12893.000000000002, + 12981.000000000002, + 13236.000000000002, + 13742.500000000002, + 13874.000000000002, + 14396.000000000002, + 14769.500000000002, + 16616.000000000004, + 17772.500000000004, + 18503.000000000004, + 18928.000000000004, + 19963.500000000004, + 22431.000000000004, + 22656.000000000004, + 23089.500000000004, + 25009.500000000004, + 25158.500000000004, + 29801.500000000004, + 30930.500000000004, + 32941.500000000007, + 33925.000000000007, + 35517.000000000007, + 35966.500000000007, + 37351.500000000007, + 38399.000000000007, + 40769.000000000007, + 42352.500000000007, + 43749.000000000007, + 45226.000000000007, + 46621.500000000007, + 57435.500000000007, + 58829.500000000007, + 60692.000000000007, + 62587.500000000007, + 64024.000000000007, + 67166.000000000015, + 72662.000000000015, + 73712.500000000015, + 74556.500000000015, + 78614.500000000015, + 83046.500000000015, + 98968.000000000015, + 102883.00000000001, + 124003.00000000001, + 131103.50000000003, + 135356.50000000003, + 145271.00000000003, + 167192.00000000003, + 172253.00000000003, + 198236.00000000003, + 209702.50000000003, + 298248.50000000006, + 333792.50000000006, + 343878.50000000006, + 404776.00000000006, + 460381.00000000006, + 628991.00000000012, + 83.500000000000014, + 183.50000000000003, + 468.00000000000006, + 1396.0000000000002, + 2433.0000000000005, + 3465.0000000000005, + 3645.5000000000005, + 5270.0000000000009, + 6121.5000000000009, + 6510.5000000000009, + 7307.0000000000009, + 8358.5000000000018, + 8618.5000000000018, + 8828.5000000000018, + 9135.0000000000018, + 13322.500000000002, + 14156.000000000002, + 14773.000000000002, + 18059.000000000004, + 19199.000000000004, + 20030.000000000004, + 21288.500000000004, + 22830.500000000004, + 39487.500000000007, + 51294.500000000007, + 55184.000000000007, + 58766.500000000007, + 78090.000000000015, + 79203.500000000015, + 85545.500000000015, + 86497.500000000015, + 96575.500000000015, + 101879.50000000001, + 104890.50000000001, + 106509.00000000001, + 111473.00000000001, + 133804.50000000003, + 143238.00000000003, + 152266.50000000003, + 188714.00000000003, + 193144.50000000003, + 201859.00000000003, + 204145.00000000003, + 211177.00000000003, + 216282.00000000003, + 227905.50000000003, + 233158.50000000003, + 242732.00000000003, + 247534.50000000003, + 252735.00000000003, + 263001.50000000006, + 267680.00000000006, + 286053.50000000006, + 301277.50000000006, + 316160.00000000006, + 326485.50000000006, + 341223.00000000006, + 382538.50000000006, + 397214.50000000006, + 420753.50000000006, + 427014.00000000006, + 474030.00000000006, + 484374.50000000006, + 490157.50000000006, + 493561.50000000006, + 517275.50000000006, + 525159.50000000012, + 536338.50000000012, + 555454.50000000012, + 577752.00000000012, + 616908.00000000012, + 643671.50000000012, + 665158.00000000012, + 688029.50000000012, + 748092.00000000012, + 791728.50000000012, + 805917.00000000012, + 833331.00000000012, + 846131.50000000012, + 889587.00000000012, + 925745.00000000012, + 960960.50000000012, + 993471.50000000012, + 1000860.0000000001, + 1033940.0000000001, + 1088030.0000000002, + 1104190.0000000002, + 1141090.0000000002, + 1227215.0000000002, + 1302410.0000000002, + 1327790.0000000002, + 1565560.0000000002, + 1713120.0000000002, + 1986515.0000000002, + 2295955.0000000005, + 2459445.0000000005, + 2786970.0000000005, + 4311770.0000000009, + 4897840.0000000009, + 7675510.0000000009, + 12103450.000000002, + 1.0000000180025095e-35, + 0.025444000000000005, + 0.079041650000000005, + 0.47263850000000002, + 0.51316300000000015, + 0.61466950000000009, + 0.70002100000000012, + 0.84333400000000014, + 1.0452850000000002, + 1.2015150000000003, + 1.4797750000000003, + 1.8327900000000001, + 2.2462950000000004, + 2.4300000000000006, + 2.5927000000000002, + 2.6670150000000006, + 2.8069300000000004, + 3.1766550000000007, + 3.2475100000000006, + 3.3747950000000002, + 3.4595750000000005, + 3.6730550000000002, + 3.8561550000000007, + 4.4209650000000007, + 4.6392950000000015, + 4.865940000000001, + 4.9636350000000009, + 5.1358750000000013, + 5.5377350000000005, + 5.745820000000001, + 5.9460500000000005, + 6.3433350000000006, + 6.3778100000000011, + 6.5389250000000008, + 6.6173200000000012, + 7.1363800000000017, + 7.4741750000000016, + 7.6210850000000008, + 8.6734450000000027, + 9.4143250000000016, + 11.746350000000001, + 12.052000000000001, + 12.250050000000002, + 13.267300000000001, + 17.041250000000002, + 17.986650000000001, + 21.122200000000003, + 21.844500000000004, + 23.696150000000006, + 25.750650000000004, + 26.183450000000004, + 27.539350000000002, + 31.875650000000004, + 36.846500000000006, + 38.188850000000009, + 44.081200000000003, + 52.333200000000005, + 54.716150000000006, + 60.769450000000006, + 65.705850000000012, + 85.315750000000023, + 108.21300000000001, + 122.06700000000002, + 150.02500000000001, + 157.58100000000002, + 167.36850000000001, + 180.69350000000003, + 206.27400000000003, + 282.04500000000002, + 301.06900000000002, + 431.21800000000002, + 513.42400000000009, + 780.49650000000008, + 813.8660000000001, + 850.00950000000012, + 1134.5900000000004, + 1346.6950000000002, + 2370.7800000000002, + 2928.5150000000008, + 1.3466250000000002e-05, + 2.2664350000000006e-05, + 2.9555800000000003e-05, + 3.5335850000000012e-05, + 3.6073500000000006e-05, + 3.9458500000000002e-05, + 4.3825250000000005e-05, + 5.8516e-05, + 6.4233050000000024e-05, + 7.0834150000000017e-05, + 8.9559350000000012e-05, + 9.5430800000000015e-05, + 0.00011455550000000001, + 0.00012689600000000001, + 0.00014240450000000004, + 0.00016513700000000001, + 0.00018171050000000003, + 0.00018756350000000003, + 0.00020882050000000003, + 0.00021563800000000001, + 0.00022961900000000003, + 0.00026418050000000004, + 0.00027278350000000008, + 0.00029956750000000005, + 0.00032692350000000002, + 0.00033880400000000006, + 0.00035918650000000006, + 0.00038193300000000004, + 0.00042447950000000008, + 0.00043747650000000002, + 0.00044906850000000007, + 0.00049152850000000004, + 0.00052591450000000011, + 0.00057647100000000017, + 0.00058207650000000012, + 0.00098674500000000003, + 0.0011041950000000003, + 0.0012247250000000001, + 0.0012767100000000001, + 0.0013355900000000002, + 0.001509605, + 0.0017419350000000002, + 0.0018100400000000004, + 0.0018508650000000004, + 0.0021851550000000007, + 0.0024035350000000005, + 0.00336228, + 0.0038408950000000004, + 0.004200225000000001, + 0.0047036750000000009, + 0.0051918100000000007, + 0.0055236550000000014, + 0.0057092000000000002, + 0.0059764100000000006, + 0.0069381250000000007, + 0.0073699550000000009, + 0.007565500000000001, + 0.0084957950000000022, + 0.0089344600000000017, + 0.0099261550000000007, + 0.014203600000000002, + 0.021387700000000006, + 0.023916750000000004, + 0.026089700000000004, + 0.05008255000000001, + 0.055832150000000004, + 0.060545100000000004, + 0.070805450000000006, + 0.087852150000000004, + 0.11160100000000002, + 0.12906450000000003, + 0.18728800000000004, + 0.22001300000000004, + 0.25453400000000004, + 0.26550900000000005, + 0.31291300000000005, + 0.35104150000000006, + 0.41306200000000004, + 0.57042700000000013, + 0.64485350000000008, + 0.96875000000000011, + 1.0000000180025095e-35, + 0.0039979100000000012, + 0.0082457700000000012, + 0.038986650000000005, + 0.054666050000000001, + 0.070373600000000022, + 0.097335900000000017, + 0.19564850000000003, + 0.20630450000000003, + 0.24236000000000002, + 0.25417800000000007, + 0.25770150000000008, + 0.27507600000000004, + 0.28204950000000006, + 0.28742100000000009, + 0.29918200000000006, + 0.30476150000000007, + 0.33851400000000004, + 0.37035050000000008, + 0.38581450000000006, + 0.44627100000000003, + 0.45825600000000005, + 0.4758035000000001, + 0.52170800000000017, + 0.54062450000000017, + 0.54514950000000006, + 0.57222350000000011, + 0.67992200000000003, + 0.70028650000000014, + 0.75992800000000005, + 0.81040200000000018, + 0.85826700000000011, + 0.95305950000000006, + 0.98670650000000015, + 1.0208650000000001, + 1.0782450000000001, + 1.1067700000000003, + 1.1631450000000003, + 1.2699600000000004, + 1.2872250000000001, + 1.2987750000000002, + 1.3993100000000003, + 1.5141650000000002, + 1.5734200000000003, + 1.6142300000000003, + 1.6834550000000001, + 1.8165200000000001, + 1.9725750000000002, + 2.0513250000000007, + 2.0544200000000008, + 2.1066100000000003, + 2.2829500000000005, + 2.3961550000000003, + 2.6080850000000004, + 2.7410500000000004, + 3.0611950000000001, + 3.5134100000000008, + 3.5836800000000006, + 4.7010550000000011, + 4.7679900000000011, + 5.106815000000001, + 5.1937250000000015, + 5.2231500000000013, + 5.349705000000001, + 5.4792200000000006, + 5.6649900000000004, + 5.8800200000000009, + 6.6231050000000007, + 6.9449300000000003, + 7.3326800000000008, + 8.199600000000002, + 11.794000000000002, + 16.289250000000003, + 19.559350000000006, + 24.766300000000005, + 1.0000000180025095e-35, +}; + +static const int th_begin[] = { + 0, + 237, + 320, + 434, + 525, + 596, + 658, + 752, + 853, + 932, + 1013, + 1088, +}; + +static const int th_len[] = { + 237, + 83, + 114, + 91, + 71, + 62, + 94, + 101, + 79, + 81, + 75, + 1, +}; + +/* + * \brief Function to convert a feature value into bin index. + * \param val Feature value, in floating-point + * \param fid Feature identifier + * \return bin Index corresponding to given feature value + */ +int fj_predictor::quantize(double val, unsigned fid) +{ + const size_t offset = th_begin[fid]; + const double* array = &threshold[offset]; + int len = th_len[fid]; + int low = 0; + int high = len; + int mid; + double mval; + // It is possible th_begin[i] == [total_num_threshold]. This means that + // all features i, (i+1), ... are not used for any of the splits in the model. + // So in this case, just return something + if (offset == 1089 || val < array[0]) { return -10; } + while (low + 1 < high) { + mid = (low + high) / 2; + mval = array[mid]; + if (val == mval) { + return mid * 2; + } else if (val < mval) { + high = mid; + } else { + low = mid; + } + } + if (array[low] == val) { + return low * 2; + } else if (high == len) { + return len * 2; + } else { + return low * 2 + 1; + } +} diff --git a/cpp/src/utilities/seed_generator.cu b/cpp/src/utilities/seed_generator.cu index 1da6662bc1..612093a7a8 100644 --- a/cpp/src/utilities/seed_generator.cu +++ b/cpp/src/utilities/seed_generator.cu @@ -1,10 +1,11 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #include -int64_t cuopt::seed_generator::seed_ = 0; +int64_t cuopt::seed_generator::base_seed_ = 0; +std::atomic cuopt::seed_generator::epoch_{0}; diff --git a/cpp/src/utilities/seed_generator.cuh b/cpp/src/utilities/seed_generator.cuh index dd5e79d847..888936eb79 100644 --- a/cpp/src/utilities/seed_generator.cuh +++ b/cpp/src/utilities/seed_generator.cuh @@ -1,29 +1,50 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ #pragma once +#include #include #include namespace cuopt { -// TODO: should be thread local? class seed_generator { - static int64_t seed_; + static int64_t base_seed_; + // Monotonically increasing epoch; incremented on every set_seed() call. + // Thread-local state compares against this to detect resets, even when + // the same seed value is set again (e.g., repeated solve_mip() calls). + static std::atomic epoch_; + + struct thread_state_t { + int64_t counter{0}; + int64_t last_epoch{-1}; + }; + + static thread_state_t& local_state() + { + thread_local thread_state_t state; + int64_t current_epoch = epoch_.load(std::memory_order_acquire); + if (state.last_epoch != current_epoch) { + state.counter = base_seed_; + state.last_epoch = current_epoch; + } + return state; + } public: template static void set_seed(seed_t seed) { #ifdef BENCHMARK - seed_ = std::random_device{}(); + base_seed_ = std::random_device{}(); #else - seed_ = static_cast(seed); + base_seed_ = static_cast(seed); #endif + epoch_.fetch_add(1, std::memory_order_release); } template static void set_seed(arg0 seed0, arg1 seed1, args... seeds) @@ -31,7 +52,19 @@ class seed_generator { set_seed(seed1 + ((seed0 + seed1) * (seed0 + seed1 + 1) / 2), seeds...); } - static int64_t get_seed() { return seed_++; } +#if SEED_GENERATOR_DEBUG + static int64_t get_seed(const char* caller = __builtin_FUNCTION(), + const char* file = __builtin_FILE(), + int line = __builtin_LINE()) + { + printf("&&&&&&& SEED CALLED BY %s:%d: %s() ***\n", file, line, caller); + return local_state().counter++; + } +#else + static int64_t get_seed() { return local_state().counter++; } +#endif + + static int64_t peek_seed() { return local_state().counter; } public: seed_generator(seed_generator const&) = delete; diff --git a/cpp/src/utilities/termination_checker.hpp b/cpp/src/utilities/termination_checker.hpp new file mode 100644 index 0000000000..180b194608 --- /dev/null +++ b/cpp/src/utilities/termination_checker.hpp @@ -0,0 +1,219 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include + +#include + +#include "timer.hpp" +#include "work_limit_context.hpp" + +namespace cuopt { + +/** + * Unified termination checker that subsumes timer_t and work_limit_timer_t. + * + * In non-deterministic mode: checks wall-clock time. + * In deterministic mode: checks work units via work_limit_context_t. + * In BOTH modes: checks parent chain (inheriting root wall-clock limit) and user callbacks. + * + * This is the single timer type used throughout the solver. It replaces work_limit_timer_t. + */ +class termination_checker_t { + public: + struct root_tag_t {}; + + // Root constructor (top-level solver, wall-clock only) + explicit termination_checker_t(double time_limit, root_tag_t) + : deterministic(false), + work_limit(time_limit), + timer(time_limit), + work_context(nullptr), + work_units_at_start(0), + parent_(nullptr) + { + } + + // Non-deterministic constructor with parent + termination_checker_t(double time_limit_, const termination_checker_t& parent) + : deterministic(false), + work_limit(time_limit_), + timer(time_limit_), + work_context(nullptr), + work_units_at_start(0), + parent_(&parent) + { + } + + // Deterministic constructor with parent (inherits parent's termination) + termination_checker_t(work_limit_context_t& context, + double work_limit_, + const termination_checker_t& parent) + : deterministic(context.deterministic), + work_limit(work_limit_), + timer(work_limit_), + work_context(&context), + work_units_at_start(context.deterministic ? context.current_work() : 0), + parent_(&parent) + { + } + + void set_parent(const termination_checker_t* parent) { parent_ = parent; } + const termination_checker_t* get_parent() const { return parent_; } + + void set_termination_callback(bool (*cb)(void*), void* data) + { + termination_callback_ = cb; + termination_callback_data_ = data; + } + + bool check(const char* caller = __builtin_FUNCTION(), + const char* file = __builtin_FILE(), + int line = __builtin_LINE()) const noexcept + { + if (termination_callback_ != nullptr && termination_callback_(termination_callback_data_)) { + return true; + } + + if (parent_ != nullptr && parent_->check()) { return true; } + + if (deterministic) { + if (!work_context) { return false; } + double elapsed_since_start = work_context->current_work() - work_units_at_start; + bool finished_now = elapsed_since_start >= work_limit; + if (finished_now && !finished) { + finished = true; + double actual_elapsed_time = timer.elapsed_time(); + + if (work_limit > 0 && std::abs(actual_elapsed_time - work_limit) / work_limit > 0.10) { + CUOPT_LOG_ERROR( + "%s:%d: %s(): Work limit timer finished with a large discrepancy: %fs for %fwu " + "(global: %g, start: %g)", + file, + line, + caller, + actual_elapsed_time, + work_limit, + work_context->current_work(), + work_units_at_start); + } + } + return finished; + } else { + return timer.check_time_limit(); + } + } + + // Aliases for compatibility with work_limit_timer_t and timer_t interfaces + bool check_time_limit(const char* caller = __builtin_FUNCTION(), + const char* file = __builtin_FILE(), + int line = __builtin_LINE()) const noexcept + { + return check(caller, file, line); + } + + bool check_limit(const char* caller = __builtin_FUNCTION(), + const char* file = __builtin_FILE(), + int line = __builtin_LINE()) const noexcept + { + return check(caller, file, line); + } + + void record_work(double work_units, + const char* caller = __builtin_FUNCTION(), + const char* file = __builtin_FILE(), + int line = __builtin_LINE()) + { + if (deterministic && work_context) { + // debugging info + double parent_elapsed_time = parent_ != nullptr ? parent_->timer.elapsed_time() : 0.0; + double parent_time_limit = parent_ != nullptr ? parent_->timer.get_time_limit() : 0.0; + + CUOPT_LOG_DEBUG("%s:%d: %s(): Recorded %f work units in %fs, total %f (parent time: %g/%g)", + file, + line, + caller, + work_units, + timer.elapsed_time(), + work_context->current_work(), + parent_elapsed_time, + parent_time_limit); + work_context->record_work_sync_on_horizon(work_units); + } + } + + double remaining_units() const noexcept + { + if (deterministic) { + if (!work_context) { return work_limit; } + double elapsed_since_start = work_context->current_work() - work_units_at_start; + return std::max(0.0, work_limit - elapsed_since_start); + } else { + return timer.remaining_time(); + } + } + + double remaining_time() const noexcept { return remaining_units(); } + + double elapsed_time() const noexcept + { + if (deterministic) { + if (!work_context) { return 0.0; } + return work_context->current_work() - work_units_at_start; + } else { + return timer.elapsed_time(); + } + } + + bool check_half_time() const noexcept + { + if (deterministic) { + if (!work_context) { return false; } + double elapsed_since_start = work_context->current_work() - work_units_at_start; + return elapsed_since_start >= work_limit / 2; + } else { + return timer.check_half_time(); + } + } + + double clamp_remaining_time(double desired_time) const noexcept + { + return std::min(desired_time, remaining_time()); + } + + double get_time_limit() const noexcept + { + if (deterministic) { + return work_limit; + } else { + return timer.get_time_limit(); + } + } + + double get_tic_start() const noexcept { return timer.get_tic_start(); } + + timer_t timer; + double work_limit{}; + mutable bool finished{false}; + bool deterministic{false}; + work_limit_context_t* work_context{nullptr}; + double work_units_at_start{0}; + + private: + const termination_checker_t* parent_{nullptr}; + bool (*termination_callback_)(void*) = nullptr; + void* termination_callback_data_ = nullptr; +}; + +// Backward compatibility +using work_limit_timer_t = termination_checker_t; + +} // namespace cuopt diff --git a/cpp/src/utilities/timer.hpp b/cpp/src/utilities/timer.hpp index b7ab6a63bd..ccfab4c57f 100644 --- a/cpp/src/utilities/timer.hpp +++ b/cpp/src/utilities/timer.hpp @@ -34,7 +34,21 @@ class timer_t { elapsed_time()); } - bool check_time_limit() const noexcept { return elapsed_time() >= time_limit; } + bool check_time_limit(const char* caller = __builtin_FUNCTION(), + const char* file = __builtin_FILE(), + int line = __builtin_LINE()) const noexcept + { + bool elapsed = elapsed_time() >= time_limit; + // if (elapsed) { + // printf("************ TIME LIMIT (%.2gs) REACHED BY %s:%d: %s() ***\n", + // time_limit, + // file, + // line, + // caller); + // //__builtin_trap(); + // } + return elapsed; + } bool check_half_time() const noexcept { return elapsed_time() >= time_limit / 2; } diff --git a/cpp/src/utilities/version_info.cpp b/cpp/src/utilities/version_info.cpp index ec9db5130b..bfcb02ce16 100644 --- a/cpp/src/utilities/version_info.cpp +++ b/cpp/src/utilities/version_info.cpp @@ -1,6 +1,6 @@ /* clang-format off */ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ /* clang-format on */ @@ -163,6 +163,44 @@ static double get_available_memory_gb() return kb / (1024.0 * 1024.0); // Convert KB to GB } +double get_cpu_max_clock_mhz() +{ + // Cache the result - CPU max clock doesn't change during execution + // thread_local to avoid an unecessary sync inserted by the compiler + // due to the standard mandating thread-safe static local variable initialization + // the extra work here is minimal. + thread_local static double cached_mhz = []() { + // Try sysfs cpufreq interface first (returns frequency in KHz) + // FIXME: assumes all available CPUs have the same max clock as CPU0 + std::ifstream freq_file("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq"); + if (freq_file.is_open()) { + long khz = 0; + freq_file >> khz; + if (khz > 0) { return khz / 1e3; } + } + + // Fallback: parse /proc/cpuinfo for "cpu MHz" + std::ifstream cpuinfo("/proc/cpuinfo"); + if (!cpuinfo.is_open()) return 0.0; + + std::string line; + double max_mhz = 0.0; + while (std::getline(cpuinfo, line)) { + if (line.find("cpu MHz") != std::string::npos) { + std::size_t colon = line.find(':'); + if (colon != std::string::npos) { + double mhz = std::stod(line.substr(colon + 1)); + if (mhz > max_mhz) { max_mhz = mhz; } + } + } + } + + return max_mhz; + }(); + + return cached_mhz; +} + void print_version_info() { int device_id = 0; diff --git a/cpp/src/utilities/version_info.hpp b/cpp/src/utilities/version_info.hpp index dbadea8ecd..ea909e7c19 100644 --- a/cpp/src/utilities/version_info.hpp +++ b/cpp/src/utilities/version_info.hpp @@ -8,4 +8,5 @@ namespace cuopt { void print_version_info(); -} +double get_cpu_max_clock_mhz(); +} // namespace cuopt diff --git a/cpp/src/utilities/work_limit_context.hpp b/cpp/src/utilities/work_limit_context.hpp index c75a37b818..0c8435c77a 100644 --- a/cpp/src/utilities/work_limit_context.hpp +++ b/cpp/src/utilities/work_limit_context.hpp @@ -17,30 +17,155 @@ #pragma once #include +#include +#include +#include +#include +#include #include #include +#include +#include +#include "producer_sync.hpp" #include "timer.hpp" #include "work_unit_scheduler.hpp" namespace cuopt { +inline double read_work_unit_scale_env_or_default(const char* env_name, double default_value) +{ + const char* env_value = std::getenv(env_name); + if (env_value == nullptr || env_value[0] == '\0') { return default_value; } + + errno = 0; + char* end_ptr = nullptr; + const double parsed_value = std::strtod(env_value, &end_ptr); + const bool valid_value = errno == 0 && end_ptr != env_value && *end_ptr == '\0' && + std::isfinite(parsed_value) && parsed_value > 0.0; + cuopt_assert(valid_value, "Invalid work-unit scale env var"); + return parsed_value; +} + struct work_limit_context_t { double global_work_units_elapsed{0.0}; double total_sync_time{0.0}; // Total time spent waiting at sync barriers (seconds) bool deterministic{false}; work_unit_scheduler_t* scheduler{nullptr}; + producer_sync_t* producer_sync{nullptr}; std::string name; + std::unique_ptr> producer_work_units_elapsed{ + std::make_unique>(0.0)}; + double producer_progress_scale{ + read_work_unit_scale_env_or_default("CUOPT_GPU_HEUR_WORK_UNIT_SCALE", 1.0)}; + double work_unit_scale{1.0}; work_limit_context_t(const std::string& name) : name(name) {} + work_limit_context_t(const work_limit_context_t& other) + : global_work_units_elapsed(other.global_work_units_elapsed), + total_sync_time(other.total_sync_time), + deterministic(other.deterministic), + scheduler(other.scheduler), + producer_sync(other.producer_sync), + name(other.name), + producer_work_units_elapsed(std::make_unique>( + other.producer_work_units_elapsed->load(std::memory_order_acquire))), + producer_progress_scale(other.producer_progress_scale), + work_unit_scale(other.work_unit_scale) + { + } + + work_limit_context_t(work_limit_context_t&& other) noexcept + : global_work_units_elapsed(other.global_work_units_elapsed), + total_sync_time(other.total_sync_time), + deterministic(other.deterministic), + scheduler(other.scheduler), + producer_sync(other.producer_sync), + name(std::move(other.name)), + producer_work_units_elapsed(std::make_unique>( + other.producer_work_units_elapsed->load(std::memory_order_acquire))), + producer_progress_scale(other.producer_progress_scale), + work_unit_scale(other.work_unit_scale) + { + } + + work_limit_context_t& operator=(const work_limit_context_t& other) + { + if (this == &other) { return *this; } + global_work_units_elapsed = other.global_work_units_elapsed; + total_sync_time = other.total_sync_time; + deterministic = other.deterministic; + scheduler = other.scheduler; + producer_sync = other.producer_sync; + name = other.name; + producer_work_units_elapsed = std::make_unique>( + other.producer_work_units_elapsed->load(std::memory_order_acquire)); + producer_progress_scale = other.producer_progress_scale; + work_unit_scale = other.work_unit_scale; + return *this; + } + + work_limit_context_t& operator=(work_limit_context_t&& other) noexcept + { + if (this == &other) { return *this; } + global_work_units_elapsed = other.global_work_units_elapsed; + total_sync_time = other.total_sync_time; + deterministic = other.deterministic; + scheduler = other.scheduler; + producer_sync = other.producer_sync; + name = std::move(other.name); + producer_work_units_elapsed = std::make_unique>( + other.producer_work_units_elapsed->load(std::memory_order_acquire)); + producer_progress_scale = other.producer_progress_scale; + work_unit_scale = other.work_unit_scale; + return *this; + } + + double current_work() const noexcept { return global_work_units_elapsed; } + + double current_producer_work() const noexcept { return current_work() * producer_progress_scale; } + + std::atomic* producer_progress_ptr() noexcept + { + return producer_work_units_elapsed.get(); + } + + void attach_producer_sync(producer_sync_t* producer_sync_) + { + producer_sync = producer_sync_; + producer_work_units_elapsed->store(current_producer_work(), std::memory_order_release); + if (work_unit_scale != 1.0) { + CUOPT_DETERMINISM_LOG("[%s] Using work-unit scale %f", name.c_str(), work_unit_scale); + } + } + + void detach_producer_sync() noexcept { producer_sync = nullptr; } + + void set_current_work(double total_work, bool notify_producer = true) + { + if (!deterministic) return; + cuopt_assert(total_work + 1e-12 >= global_work_units_elapsed, + "Deterministic work progress must be monotonic"); + global_work_units_elapsed = total_work; + producer_work_units_elapsed->store(current_producer_work(), std::memory_order_release); + if (notify_producer && producer_sync != nullptr) { producer_sync->notify_progress(); } + } + void record_work_sync_on_horizon(double work) { if (!deterministic) return; - global_work_units_elapsed += work; - if (scheduler) { scheduler->on_work_recorded(*this, global_work_units_elapsed); } + cuopt_assert(std::isfinite(work), "Recorded work must be finite"); + cuopt_assert(work >= 0.0, "Recorded work must be non-negative"); + const double scaled_work = work * work_unit_scale; + const double total_work = global_work_units_elapsed + scaled_work; + set_current_work(total_work, false); + if (scheduler) { scheduler->on_work_recorded(*this, total_work); } + if (producer_sync != nullptr) { producer_sync->notify_progress(); } } + + void record_work(double work) { record_work_sync_on_horizon(work); } }; } // namespace cuopt diff --git a/cpp/src/utilities/work_limit_timer.hpp b/cpp/src/utilities/work_limit_timer.hpp new file mode 100644 index 0000000000..801a3e5ee9 --- /dev/null +++ b/cpp/src/utilities/work_limit_timer.hpp @@ -0,0 +1,11 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ +#pragma once + +// work_limit_timer_t is now an alias for termination_checker_t. +// This header exists for backward compatibility. +#include "termination_checker.hpp" diff --git a/cpp/src/utilities/work_unit_predictor.cpp b/cpp/src/utilities/work_unit_predictor.cpp new file mode 100644 index 0000000000..4c9512768d --- /dev/null +++ b/cpp/src/utilities/work_unit_predictor.cpp @@ -0,0 +1,89 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "work_unit_predictor.hpp" + +#include +#include +#include +#include +#include +#include + +#include "models/fj_predictor/header.h" + +namespace cuopt { + +template +static inline uint32_t compute_hash(std::vector h_contents) +{ + // FNV-1a hash + + uint32_t hash = 2166136261u; // FNV-1a 32-bit offset basis + std::vector byte_contents(h_contents.size() * sizeof(i_t)); + std::memcpy(byte_contents.data(), h_contents.data(), h_contents.size() * sizeof(i_t)); + for (size_t i = 0; i < byte_contents.size(); ++i) { + hash ^= byte_contents[i]; + hash *= 16777619u; + } + return hash; +} + +template +float work_unit_predictor_t::predict_scalar( + const std::map& features) const +{ + raft::common::nvtx::range range("work_unit_predictor_t::predict_scalar"); + + typename model_t::Entry data[model_t::NUM_FEATURES]; + for (int i = 0; i < model_t::NUM_FEATURES; ++i) { + if (features.find(std::string(model_t::feature_names[i])) == features.end()) { + data[i].missing = -1; + CUOPT_LOG_WARN("Feature %s: missing\n", model_t::feature_names[i]); + } else { + data[i].fvalue = features.at(std::string(model_t::feature_names[i])); + } + } + + std::vector cache_vec; + cache_vec.reserve(model_t::NUM_FEATURES); + for (int i = 0; i < model_t::NUM_FEATURES; ++i) { + cache_vec.push_back(data[i].missing != -1 ? data[i].fvalue + : std::numeric_limits::quiet_NaN()); + } + uint32_t key = compute_hash(cache_vec); + + auto cached_it = prediction_cache.find(key); + if (cached_it != prediction_cache.end()) { return cached_it->second; } + + double result = 0.0; + auto start = std::chrono::high_resolution_clock::now(); + model_t::predict(data, 0, &result); + auto end = std::chrono::high_resolution_clock::now(); + std::chrono::duration elapsed = end - start; + if (debug) CUOPT_LOG_DEBUG("Prediction time: %f ms", elapsed.count()); + + float scaled_result = scaler_.scale_work_units(result); + prediction_cache[key] = scaled_result; + if (debug) CUOPT_LOG_DEBUG("Result: %f (scaled: %f)", result, scaled_result); + + return scaled_result; +} + +template class work_unit_predictor_t; + +} // namespace cuopt diff --git a/cpp/src/utilities/work_unit_predictor.hpp b/cpp/src/utilities/work_unit_predictor.hpp new file mode 100644 index 0000000000..9d445c437e --- /dev/null +++ b/cpp/src/utilities/work_unit_predictor.hpp @@ -0,0 +1,65 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace cuopt { + +// Temporary scaling classes until I figure out better ways to do this +// to account for performance differences between the regression learning machine and the user +// machine. (e.g. integrate memory latency/bandwidth, cache topology, user-provided tuning...) +struct cpu_work_unit_scaler_t { + cpu_work_unit_scaler_t() + { + constexpr double baseline_max_clock = 3800.0; + double max_clock = get_cpu_max_clock_mhz(); + if (max_clock <= 0.0) { max_clock = baseline_max_clock; } + scaling_factor_ = baseline_max_clock / max_clock; + } + + double scale_work_units(double work_units) const { return work_units * scaling_factor_; } + + private: + double scaling_factor_; +}; + +struct gpu_work_unit_scaler_t { + double scale_work_units(double work_units) const { return work_units; } +}; + +template +class work_unit_predictor_t { + public: + float predict_scalar(const std::map& features) const; + + public: + bool debug{false}; + + private: + mutable std::unordered_map prediction_cache; + scaler_t scaler_; +}; + +} // namespace cuopt diff --git a/cpp/src/utilities/work_unit_scheduler.cpp b/cpp/src/utilities/work_unit_scheduler.cpp index b0e5c5f12f..84088b10e8 100644 --- a/cpp/src/utilities/work_unit_scheduler.cpp +++ b/cpp/src/utilities/work_unit_scheduler.cpp @@ -79,8 +79,8 @@ void work_unit_scheduler_t::wait_for_next_sync(work_limit_context_t& ctx) { if (is_shutdown()) return; - double next_sync = current_sync_target(); - ctx.global_work_units_elapsed = next_sync; + double next_sync = current_sync_target(); + ctx.set_current_work(next_sync, false); wait_at_sync_point(ctx, next_sync); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a73a3361ce..fe9dd4fde9 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -# cmake-format: off +# cmake-format: off # SPDX-FileCopyrightText: Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # cmake-format: on @@ -33,6 +33,40 @@ endif() set(CUOPT_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +if (EXISTS "${CUDAToolkit_LIBRARY_ROOT}/extras/CUPTI/lib64") + # NVIDIA installer layout: + set(cuopt_cupti_root "${CUDAToolkit_LIBRARY_ROOT}/extras/CUPTI") +else() + # Ubuntu package layout: + set(cuopt_cupti_root "${CUDAToolkit_LIBRARY_ROOT}") +endif() +message(STATUS "cuopt_cupti_root = ${cuopt_cupti_root}") + +# The CUPTI targets in FindCUDAToolkit are broken: +# - The dll locations are not specified +# - Dependent libraries nvperf_* are not linked. +# So we create our own targets: +function(cuopt_add_cupti_dep dep_name) + string(TOLOWER ${dep_name} dep_name_lower) + string(TOUPPER ${dep_name} dep_name_upper) + + add_library(cuopt::${dep_name_lower} SHARED IMPORTED) + + find_library(CUOPT_${dep_name_upper}_LIBRARY ${dep_name_lower} REQUIRED + DOC "The full path to lib${dep_name_lower}.so from the CUDA Toolkit." + HINTS "${cuopt_cupti_root}/lib64" "${cuopt_cupti_root}/lib" + ) + mark_as_advanced(CUOPT_${dep_name_upper}_LIBRARY) + + set_target_properties(cuopt::${dep_name_lower} PROPERTIES + IMPORTED_LOCATION "${CUOPT_${dep_name_upper}_LIBRARY}" + ) +endfunction() + +#cuopt_add_cupti_dep(nvperf_target) +#cuopt_add_cupti_dep(nvperf_host) +#cuopt_add_cupti_dep(cupti) + # ################################################################ ------------------------------------------------------------------ function(ConfigureTest CMAKE_TEST_NAME) add_executable(${CMAKE_TEST_NAME} ${ARGN}) diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_test.c b/cpp/tests/linear_programming/c_api_tests/c_api_test.c index ecf610041c..996d60deae 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_test.c +++ b/cpp/tests/linear_programming/c_api_tests/c_api_test.c @@ -2122,3 +2122,135 @@ cuopt_int_t test_cpu_only_mip_execution(const char* filename) cuOptDestroySolution(&solution); return status; } + +cuopt_int_t test_pdlp_precision_mixed(const char* filename, + cuopt_int_t* termination_status_ptr, + cuopt_float_t* objective_ptr) +{ + cuOptOptimizationProblem problem = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + cuopt_int_t status; + cuopt_int_t termination_status = -1; + cuopt_float_t objective_value; + + status = cuOptReadProblem(filename, &problem); + if (status != CUOPT_SUCCESS) { + printf("Error reading problem\n"); + goto DONE; + } + + status = cuOptCreateSolverSettings(&settings); + if (status != CUOPT_SUCCESS) { + printf("Error creating solver settings\n"); + goto DONE; + } + + status = cuOptSetIntegerParameter(settings, CUOPT_METHOD, CUOPT_METHOD_PDLP); + if (status != CUOPT_SUCCESS) { + printf("Error setting method\n"); + goto DONE; + } + + status = cuOptSetIntegerParameter(settings, CUOPT_PDLP_PRECISION, CUOPT_PDLP_MIXED_PRECISION); + if (status != CUOPT_SUCCESS) { + printf("Error setting pdlp_precision\n"); + goto DONE; + } + + status = cuOptSolve(problem, settings, &solution); + if (status != CUOPT_SUCCESS) { + printf("Error solving problem with pdlp_precision=mixed\n"); + goto DONE; + } + + status = cuOptGetTerminationStatus(solution, &termination_status); + if (status != CUOPT_SUCCESS) { + printf("Error getting termination status\n"); + goto DONE; + } + *termination_status_ptr = termination_status; + + status = cuOptGetObjectiveValue(solution, &objective_value); + if (status != CUOPT_SUCCESS) { + printf("Error getting objective value\n"); + goto DONE; + } + *objective_ptr = objective_value; + + printf("PDLP precision=mixed test passed: status=%s, objective=%f\n", + termination_status_to_string(termination_status), + objective_value); + +DONE: + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); + return status; +} + +cuopt_int_t test_pdlp_precision_single(const char* filename, + cuopt_int_t* termination_status_ptr, + cuopt_float_t* objective_ptr) +{ + cuOptOptimizationProblem problem = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + cuopt_int_t status; + cuopt_int_t termination_status = -1; + cuopt_float_t objective_value; + + status = cuOptReadProblem(filename, &problem); + if (status != CUOPT_SUCCESS) { + printf("Error reading problem\n"); + goto DONE; + } + + status = cuOptCreateSolverSettings(&settings); + if (status != CUOPT_SUCCESS) { + printf("Error creating solver settings\n"); + goto DONE; + } + + status = cuOptSetIntegerParameter(settings, CUOPT_METHOD, CUOPT_METHOD_PDLP); + if (status != CUOPT_SUCCESS) { + printf("Error setting method\n"); + goto DONE; + } + + status = cuOptSetIntegerParameter(settings, CUOPT_PDLP_PRECISION, CUOPT_PDLP_SINGLE_PRECISION); + if (status != CUOPT_SUCCESS) { + printf("Error setting pdlp_precision\n"); + goto DONE; + } + + status = cuOptSolve(problem, settings, &solution); + if (status != CUOPT_SUCCESS) { + printf("Error solving problem with pdlp_precision=single\n"); + goto DONE; + } + + status = cuOptGetTerminationStatus(solution, &termination_status); + if (status != CUOPT_SUCCESS) { + printf("Error getting termination status\n"); + goto DONE; + } + *termination_status_ptr = termination_status; + + status = cuOptGetObjectiveValue(solution, &objective_value); + if (status != CUOPT_SUCCESS) { + printf("Error getting objective value\n"); + goto DONE; + } + *objective_ptr = objective_value; + + printf("PDLP precision=single test passed: status=%s, objective=%f\n", + termination_status_to_string(termination_status), + objective_value); + +DONE: + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); + return status; +} diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp index 33fb42cc9d..995b2194c4 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp @@ -18,6 +18,10 @@ #include #include +namespace cuopt::linear_programming::detail { +bool is_cusparse_runtime_mixed_precision_supported(); +} + #include TEST(c_api, int_size) { EXPECT_EQ(test_int_size(), sizeof(int32_t)); } @@ -271,6 +275,43 @@ INSTANTIATE_TEST_SUITE_P(c_api, // Different instance std::make_tuple("/mip/bb_optimality.mps", 8, 60.0, 2))); +// ============================================================================= +// PDLP Precision Tests +// ============================================================================= + +TEST(c_api, pdlp_precision_single) +{ + const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); + std::string filename = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps"; + cuopt_int_t termination_status; + cuopt_float_t objective; + EXPECT_EQ(test_pdlp_precision_single(filename.c_str(), &termination_status, &objective), + CUOPT_SUCCESS); + EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_NEAR(objective, -464.7531, 1e-1); +} + +TEST(c_api, pdlp_precision_mixed) +{ + using namespace cuopt::linear_programming::detail; + const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); + std::string filename = rapidsDatasetRootDir + "/linear_programming/afiro_original.mps"; + cuopt_int_t termination_status = -1; + cuopt_float_t objective; + if (!is_cusparse_runtime_mixed_precision_supported()) { + auto status = test_pdlp_precision_mixed(filename.c_str(), &termination_status, &objective); + bool solve_returned_error = (status != CUOPT_SUCCESS); + bool solve_returned_non_optimal = + (status == CUOPT_SUCCESS && termination_status != CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_TRUE(solve_returned_error || solve_returned_non_optimal); + return; + } + EXPECT_EQ(test_pdlp_precision_mixed(filename.c_str(), &termination_status, &objective), + CUOPT_SUCCESS); + EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_NEAR(objective, -464.7531, 1e-1); +} + // ============================================================================= // Solution Interface Polymorphism Tests // ============================================================================= @@ -341,7 +382,7 @@ class CPUOnlyTestEnvironment { // TODO: Add numerical assertions once gRPC remote solver replaces the stub implementation. // Currently validates that the CPU-only C API path completes without errors. -TEST(c_api_cpu_only, lp_solve) +TEST(c_api_cpu_only, DISABLED_lp_solve) { CPUOnlyTestEnvironment env; const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); @@ -350,7 +391,7 @@ TEST(c_api_cpu_only, lp_solve) } // TODO: Add numerical assertions once gRPC remote solver replaces the stub implementation. -TEST(c_api_cpu_only, mip_solve) +TEST(c_api_cpu_only, DISABLED_mip_solve) { CPUOnlyTestEnvironment env; const std::string& rapidsDatasetRootDir = cuopt::test::get_rapids_dataset_root_dir(); diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h index e541316567..402c7d06a5 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.h +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.h @@ -53,6 +53,14 @@ cuopt_int_t test_deterministic_bb(const char* filename, cuopt_int_t test_lp_solution_mip_methods(); cuopt_int_t test_mip_solution_lp_methods(); +cuopt_int_t test_pdlp_precision_single(const char* filename, + cuopt_int_t* termination_status_ptr, + cuopt_float_t* objective_ptr); + +cuopt_int_t test_pdlp_precision_mixed(const char* filename, + cuopt_int_t* termination_status_ptr, + cuopt_float_t* objective_ptr); + /* CPU-only execution tests (require env vars CUDA_VISIBLE_DEVICES="" and CUOPT_REMOTE_HOST) */ cuopt_int_t test_cpu_only_execution(const char* filename); cuopt_int_t test_cpu_only_mip_execution(const char* filename); diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index 8bf759367e..d5a8d69008 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -6,6 +6,7 @@ /* clang-format on */ #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include +#include #include #include @@ -45,10 +47,10 @@ namespace cuopt::linear_programming::test { -constexpr double afiro_primal_objective = -464; - +constexpr double afiro_primal_objective = -464.0; // Accept a 1% error -static bool is_incorrect_objective(double reference, double objective) +template +static bool is_incorrect_objective(f_t reference, f_t objective) { if (reference == 0) { return std::abs(objective) > 0.01; } if (objective == 0) { return std::abs(reference) > 0.01; } @@ -73,6 +75,58 @@ TEST(pdlp_class, run_double) afiro_primal_objective, solution.get_additional_termination_information().primal_objective)); } +TEST(pdlp_class, precision_mixed) +{ + using namespace cuopt::linear_programming::detail; + if (!is_cusparse_runtime_mixed_precision_supported()) { + const raft::handle_t handle_{}; + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto settings = pdlp_solver_settings_t{}; + settings.method = cuopt::linear_programming::method_t::PDLP; + settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::MixedPrecision; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, settings); + EXPECT_EQ(solution.get_error_status().get_error_type(), cuopt::error_type_t::ValidationError); + return; + } + + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto settings_mixed = pdlp_solver_settings_t{}; + settings_mixed.method = cuopt::linear_programming::method_t::PDLP; + settings_mixed.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::MixedPrecision; + + optimization_problem_solution_t solution_mixed = + solve_lp(&handle_, op_problem, settings_mixed); + EXPECT_EQ((int)solution_mixed.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective, + solution_mixed.get_additional_termination_information().primal_objective)); + + auto settings_full = pdlp_solver_settings_t{}; + settings_full.method = cuopt::linear_programming::method_t::PDLP; + settings_full.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::DefaultPrecision; + + optimization_problem_solution_t solution_full = + solve_lp(&handle_, op_problem, settings_full); + EXPECT_EQ((int)solution_full.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective, + solution_full.get_additional_termination_information().primal_objective)); + + EXPECT_NEAR(solution_mixed.get_additional_termination_information().primal_objective, + solution_full.get_additional_termination_information().primal_objective, + 1e-2); +} + TEST(pdlp_class, run_double_very_low_accuracy) { const raft::handle_t handle_{}; @@ -1888,6 +1942,107 @@ TEST(pdlp_class, some_climber_hit_iteration_limit) } } +TEST(pdlp_class, precision_single) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective, solution.get_additional_termination_information().primal_objective)); +} + +TEST(pdlp_class, precision_single_crossover) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision; + solver_settings.crossover = true; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective, solution.get_additional_termination_information().primal_objective)); +} + +TEST(pdlp_class, precision_single_concurrent) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::Concurrent; + solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective, solution.get_additional_termination_information().primal_objective)); +} + +TEST(pdlp_class, precision_single_papilo_presolve) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision; + solver_settings.presolver = cuopt::linear_programming::presolver_t::Papilo; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective, solution.get_additional_termination_information().primal_objective)); +} + +TEST(pdlp_class, precision_single_pslp_presolve) +{ + const raft::handle_t handle_{}; + + auto path = make_path_absolute("linear_programming/afiro_original.mps"); + cuopt::mps_parser::mps_data_model_t op_problem = + cuopt::mps_parser::parse_mps(path, true); + + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_precision = cuopt::linear_programming::pdlp_precision_t::SinglePrecision; + solver_settings.presolver = cuopt::linear_programming::presolver_t::PSLP; + + optimization_problem_solution_t solution = + solve_lp(&handle_, op_problem, solver_settings); + EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); + EXPECT_FALSE(is_incorrect_objective( + afiro_primal_objective, solution.get_additional_termination_information().primal_objective)); +} + } // namespace cuopt::linear_programming::test CUOPT_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/mip/CMakeLists.txt b/cpp/tests/mip/CMakeLists.txt index 2f2139890f..24a4b3ddb5 100644 --- a/cpp/tests/mip/CMakeLists.txt +++ b/cpp/tests/mip/CMakeLists.txt @@ -40,12 +40,18 @@ ConfigureTest(PRESOLVE_TEST ${CMAKE_CURRENT_SOURCE_DIR}/presolve_test.cu ) # Disable for now -# ConfigureTest(FEASIBILITY_JUMP_TEST -# ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump_tests.cu -# ) +ConfigureTest(FEASIBILITY_JUMP_TEST + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump_tests.cu +) ConfigureTest(MIP_TERMINATION_STATUS_TEST ${CMAKE_CURRENT_SOURCE_DIR}/termination_test.cu ) ConfigureTest(DETERMINISM_TEST ${CMAKE_CURRENT_SOURCE_DIR}/determinism_test.cu ) +ConfigureTest(LOCAL_SEARCH_TEST + ${CMAKE_CURRENT_SOURCE_DIR}/local_search_test.cu +) +ConfigureTest(DIVERSITY_TEST + ${CMAKE_CURRENT_SOURCE_DIR}/diversity_test.cu +) diff --git a/cpp/tests/mip/cuts_test.cu b/cpp/tests/mip/cuts_test.cu index 1a360b41eb..7968d5513f 100644 --- a/cpp/tests/mip/cuts_test.cu +++ b/cpp/tests/mip/cuts_test.cu @@ -8,25 +8,829 @@ #include "../linear_programming/utilities/pdlp_test_utilities.cuh" #include "mip_utils.cuh" +#include +#include #include +#include +#include +#include #include #include +#include #include +#include #include #include #include +#include #include #include #include +#include +#include +#include #include #include +#include #include namespace cuopt::linear_programming::test { +namespace { + +constexpr double kCliqueTestTol = 1e-6; + +mps_parser::mps_data_model_t create_pairwise_triangle_set_packing_problem() +{ + // Maximize x0 + x1 + x2 via minimizing -x0 - x1 - x2. + // Pairwise conflicts: + // x0 + x1 <= 1 + // x1 + x2 <= 1 + // x0 + x2 <= 1 + mps_parser::mps_data_model_t problem; + std::vector offsets = {0, 2, 4, 6}; + std::vector indices = {0, 1, 1, 2, 0, 2}; + std::vector coefficients = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + problem.set_csr_constraint_matrix(coefficients.data(), + coefficients.size(), + indices.data(), + indices.size(), + offsets.data(), + offsets.size()); + std::vector lower_bounds = {-std::numeric_limits::infinity(), + -std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + std::vector upper_bounds = {1.0, 1.0, 1.0}; + problem.set_constraint_lower_bounds(lower_bounds.data(), lower_bounds.size()); + problem.set_constraint_upper_bounds(upper_bounds.data(), upper_bounds.size()); + std::vector var_lower_bounds = {0.0, 0.0, 0.0}; + std::vector var_upper_bounds = {1.0, 1.0, 1.0}; + problem.set_variable_lower_bounds(var_lower_bounds.data(), var_lower_bounds.size()); + problem.set_variable_upper_bounds(var_upper_bounds.data(), var_upper_bounds.size()); + std::vector objective_coefficients = {-1.0, -1.0, -1.0}; + problem.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size()); + std::vector variable_types = {'I', 'I', 'I'}; + problem.set_variable_types(variable_types); + problem.set_maximize(false); + return problem; +} + +mps_parser::mps_data_model_t create_pairwise_triangle_with_isolated_variable_problem() +{ + // Same triangle conflicts as create_pairwise_triangle_set_packing_problem(), + // plus an isolated binary variable x3 with no conflict rows. + mps_parser::mps_data_model_t problem; + std::vector offsets = {0, 2, 4, 6}; + std::vector indices = {0, 1, 1, 2, 0, 2}; + std::vector coefficients = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + problem.set_csr_constraint_matrix(coefficients.data(), + coefficients.size(), + indices.data(), + indices.size(), + offsets.data(), + offsets.size()); + std::vector lower_bounds = {-std::numeric_limits::infinity(), + -std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + std::vector upper_bounds = {1.0, 1.0, 1.0}; + problem.set_constraint_lower_bounds(lower_bounds.data(), lower_bounds.size()); + problem.set_constraint_upper_bounds(upper_bounds.data(), upper_bounds.size()); + std::vector var_lower_bounds = {0.0, 0.0, 0.0, 0.0}; + std::vector var_upper_bounds = {1.0, 1.0, 1.0, 1.0}; + problem.set_variable_lower_bounds(var_lower_bounds.data(), var_lower_bounds.size()); + problem.set_variable_upper_bounds(var_upper_bounds.data(), var_upper_bounds.size()); + std::vector objective_coefficients = {-1.0, -1.0, -1.0, 0.0}; + problem.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size()); + std::vector variable_types = {'I', 'I', 'I', 'I'}; + problem.set_variable_types(variable_types); + problem.set_maximize(false); + return problem; +} + +mps_parser::mps_data_model_t create_binary_continuous_mixed_conflict_problem() +{ + // x0 + y1 <= 1 (must be ignored for clique graph because y1 is continuous) + // x0 + x2 <= 1 (must generate a conflict edge) + mps_parser::mps_data_model_t problem; + std::vector offsets = {0, 2, 4}; + std::vector indices = {0, 1, 0, 2}; + std::vector coefficients = {1.0, 1.0, 1.0, 1.0}; + problem.set_csr_constraint_matrix(coefficients.data(), + coefficients.size(), + indices.data(), + indices.size(), + offsets.data(), + offsets.size()); + std::vector lower_bounds = {-std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + std::vector upper_bounds = {1.0, 1.0}; + problem.set_constraint_lower_bounds(lower_bounds.data(), lower_bounds.size()); + problem.set_constraint_upper_bounds(upper_bounds.data(), upper_bounds.size()); + std::vector var_lower_bounds = {0.0, 0.0, 0.0}; + std::vector var_upper_bounds = {1.0, 1.0, 1.0}; + problem.set_variable_lower_bounds(var_lower_bounds.data(), var_lower_bounds.size()); + problem.set_variable_upper_bounds(var_upper_bounds.data(), var_upper_bounds.size()); + std::vector objective_coefficients = {0.0, 0.0, 0.0}; + problem.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size()); + std::vector variable_types = {'I', 'C', 'I'}; + problem.set_variable_types(variable_types); + problem.set_maximize(false); + return problem; +} + +mps_parser::mps_data_model_t create_near_binary_bound_conflict_problem() +{ + // x0 + x1 <= 1 but x1 has upper bound 0.9999999, so this row should not be + // treated as a binary conflict row. + mps_parser::mps_data_model_t problem; + std::vector offsets = {0, 2}; + std::vector indices = {0, 1}; + std::vector coefficients = {1.0, 1.0}; + problem.set_csr_constraint_matrix(coefficients.data(), + coefficients.size(), + indices.data(), + indices.size(), + offsets.data(), + offsets.size()); + std::vector lower_bounds = {-std::numeric_limits::infinity()}; + std::vector upper_bounds = {1.0}; + problem.set_constraint_lower_bounds(lower_bounds.data(), lower_bounds.size()); + problem.set_constraint_upper_bounds(upper_bounds.data(), upper_bounds.size()); + std::vector var_lower_bounds = {0.0, 0.0}; + std::vector var_upper_bounds = {1.0, 0.9999999}; + problem.set_variable_lower_bounds(var_lower_bounds.data(), var_lower_bounds.size()); + problem.set_variable_upper_bounds(var_upper_bounds.data(), var_upper_bounds.size()); + std::vector objective_coefficients = {0.0, 0.0}; + problem.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size()); + std::vector variable_types = {'I', 'I'}; + problem.set_variable_types(variable_types); + problem.set_maximize(false); + return problem; +} + +mps_parser::mps_data_model_t create_weighted_addtl_conflict_problem() +{ + // One weighted binary knapsack row: + // 1*x0 + 2*x1 + 3*x2 + 4*x3 <= 5 + // This creates base clique {x2, x3} and additional clique inducing conflict {x1, x3}. + mps_parser::mps_data_model_t problem; + std::vector offsets = {0, 4}; + std::vector indices = {0, 1, 2, 3}; + std::vector coefficients = {1.0, 2.0, 3.0, 4.0}; + problem.set_csr_constraint_matrix(coefficients.data(), + coefficients.size(), + indices.data(), + indices.size(), + offsets.data(), + offsets.size()); + std::vector lower_bounds = {-std::numeric_limits::infinity()}; + std::vector upper_bounds = {5.0}; + problem.set_constraint_lower_bounds(lower_bounds.data(), lower_bounds.size()); + problem.set_constraint_upper_bounds(upper_bounds.data(), upper_bounds.size()); + std::vector var_lower_bounds = {0.0, 0.0, 0.0, 0.0}; + std::vector var_upper_bounds = {1.0, 1.0, 1.0, 1.0}; + problem.set_variable_lower_bounds(var_lower_bounds.data(), var_lower_bounds.size()); + problem.set_variable_upper_bounds(var_upper_bounds.data(), var_upper_bounds.size()); + std::vector objective_coefficients = {0.0, 0.0, 0.0, 0.0}; + problem.set_objective_coefficients(objective_coefficients.data(), objective_coefficients.size()); + std::vector variable_types = {'I', 'I', 'I', 'I'}; + problem.set_variable_types(variable_types); + problem.set_maximize(false); + return problem; +} + +detail::clique_table_t build_clique_table_for_model_with_min_size( + const raft::handle_t& handle, + const mps_parser::mps_data_model_t& model, + int min_clique_size) +{ + auto op_problem = mps_data_model_to_optimization_problem(&handle, model); + detail::problem_t mip_problem(op_problem); + dual_simplex::user_problem_t host_problem(op_problem.get_handle_ptr()); + mip_problem.get_host_user_problem(host_problem); + + detail::clique_config_t clique_config; + clique_config.min_clique_size = min_clique_size; + detail::clique_table_t clique_table(2 * host_problem.num_cols, + clique_config.min_clique_size, + clique_config.max_clique_size_for_extension); + + mip_solver_settings_t settings; + cuopt::timer_t timer(std::numeric_limits::infinity()); + detail::build_clique_table(host_problem, clique_table, settings.tolerances, true, true, timer); + return clique_table; +} + +detail::clique_table_t build_clique_table_for_model( + const raft::handle_t& handle, const mps_parser::mps_data_model_t& model) +{ + return build_clique_table_for_model_with_min_size(handle, model, 1); +} + +mps_parser::mps_data_model_t& get_neos8_model_cached() +{ + static std::once_flag init_flag; + static std::unique_ptr> model_ptr; + std::call_once(init_flag, []() { + const auto neos8_path = make_path_absolute("mip/neos8.mps"); + auto neos8_model = cuopt::mps_parser::parse_mps(neos8_path, false); + model_ptr = std::make_unique>(std::move(neos8_model)); + }); + cuopt_assert(model_ptr != nullptr, "Failed to initialize cached neos8 model"); + return *model_ptr; +} + +detail::clique_table_t& get_neos8_clique_table_cached() +{ + static std::once_flag init_flag; + static std::unique_ptr> clique_table_ptr; + std::call_once(init_flag, []() { + const raft::handle_t handle{}; + auto& neos8_model = get_neos8_model_cached(); + auto clique_table = build_clique_table_for_model(handle, neos8_model); + clique_table_ptr = + std::make_unique>(std::move(clique_table)); + }); + cuopt_assert(clique_table_ptr != nullptr, "Failed to initialize cached neos8 clique table"); + return *clique_table_ptr; +} + +std::vector> build_original_adjacency_matrix( + detail::clique_table_t& clique_table, int num_vars) +{ + std::vector> adj(num_vars, std::vector(num_vars, 0)); + for (int i = 0; i < num_vars; ++i) { + for (int j = i + 1; j < num_vars; ++j) { + if (clique_table.check_adjacency(i, j)) { + adj[i][j] = 1; + adj[j][i] = 1; + } + } + } + return adj; +} + +std::vector> maximal_cliques_bruteforce(const std::vector>& adj) +{ + const int n = static_cast(adj.size()); + if (n <= 0 || n > 20) { return {}; } + const uint64_t total_masks = (uint64_t{1} << n); + std::vector> maximal_cliques; + + auto is_mask_clique = [&](uint64_t mask) { + for (int i = 0; i < n; ++i) { + if ((mask & (uint64_t{1} << i)) == 0) { continue; } + for (int j = i + 1; j < n; ++j) { + if ((mask & (uint64_t{1} << j)) == 0) { continue; } + if (!adj[i][j]) { return false; } + } + } + return true; + }; + + for (uint64_t mask = 1; mask < total_masks; ++mask) { + if (!is_mask_clique(mask)) { continue; } + bool is_maximal = true; + for (int v = 0; v < n && is_maximal; ++v) { + if (mask & (uint64_t{1} << v)) { continue; } + bool can_extend = true; + for (int u = 0; u < n; ++u) { + if ((mask & (uint64_t{1} << u)) == 0) { continue; } + if (!adj[v][u]) { + can_extend = false; + break; + } + } + if (can_extend) { is_maximal = false; } + } + if (!is_maximal) { continue; } + std::vector clique; + for (int u = 0; u < n; ++u) { + if (mask & (uint64_t{1} << u)) { clique.push_back(u); } + } + maximal_cliques.push_back(std::move(clique)); + } + return maximal_cliques; +} + +std::vector> canonicalize_cliques(std::vector> cliques) +{ + for (auto& clique : cliques) { + std::sort(clique.begin(), clique.end()); + } + std::sort(cliques.begin(), cliques.end(), [](const auto& a, const auto& b) { + if (a.size() != b.size()) { return a.size() < b.size(); } + return a < b; + }); + cliques.erase(std::unique(cliques.begin(), cliques.end()), cliques.end()); + return cliques; +} + +std::vector> adjacency_matrix_to_list(const std::vector>& adj) +{ + const int n = static_cast(adj.size()); + std::vector> adj_list(n); + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + if (adj[i][j]) { adj_list[i].push_back(j); } + } + } + return adj_list; +} + +std::vector> maximal_cliques_from_production_algorithm( + const std::vector>& adj) +{ + const auto adj_list = adjacency_matrix_to_list(adj); + std::vector weights(adj_list.size(), 1.0); + auto cliques = dual_simplex::find_maximal_cliques_for_test( + adj_list, weights, 0.0, 100000, std::numeric_limits::infinity()); + return canonicalize_cliques(std::move(cliques)); +} + +double original_clique_sum(const std::vector& clique_vars, + const std::vector& assignment) +{ + double lhs = 0.0; + for (const auto var : clique_vars) { + lhs += assignment[var]; + } + return lhs; +} + +std::string format_phase2_panic_dump(const mps_parser::mps_data_model_t& problem, + const std::vector& clique_vars, + const std::vector& x_star) +{ + std::ostringstream out; + const auto& var_lb = problem.get_variable_lower_bounds(); + const auto& var_ub = problem.get_variable_upper_bounds(); + out << "\nClique vars:"; + for (auto v : clique_vars) { + out << " x" << v << "(value=" << x_star[v] << ", lb=" << var_lb[v] << ", ub=" << var_ub[v] + << ")"; + } + + std::unordered_set clique_var_set(clique_vars.begin(), clique_vars.end()); + const auto& values = problem.get_constraint_matrix_values(); + const auto& cols = problem.get_constraint_matrix_indices(); + const auto& rows = problem.get_constraint_matrix_offsets(); + const auto& clb = problem.get_constraint_lower_bounds(); + const auto& cub = problem.get_constraint_upper_bounds(); + + out << "\nRelated constraints:"; + for (size_t row = 0; row + 1 < rows.size(); ++row) { + bool touches_clique = false; + for (int p = rows[row]; p < rows[row + 1]; ++p) { + if (clique_var_set.count(cols[p]) > 0) { + touches_clique = true; + break; + } + } + if (!touches_clique) { continue; } + out << "\n row " << row << ": "; + for (int p = rows[row]; p < rows[row + 1]; ++p) { + if (p > rows[row]) { out << " + "; } + out << values[p] << "*x" << cols[p]; + } + out << " in [" << clb[row] << ", " << cub[row] << "]"; + } + return out.str(); +} + +void disable_non_clique_cuts(mip_solver_settings_t& settings) +{ + settings.clique_cuts = 1; + settings.max_cut_passes = 10; + settings.mixed_integer_gomory_cuts = 0; + settings.knapsack_cuts = 0; + settings.mir_cuts = 0; + settings.strong_chvatal_gomory_cuts = 0; +} + +void disable_all_cuts(mip_solver_settings_t& settings) +{ + settings.max_cut_passes = 0; + settings.clique_cuts = 0; + settings.mixed_integer_gomory_cuts = 0; + settings.knapsack_cuts = 0; + settings.mir_cuts = 0; + settings.strong_chvatal_gomory_cuts = 0; +} + +bool cut_is_invalid_for_incumbent(const std::vector& cut_vars, + const std::vector& incumbent, + double tol) +{ + return original_clique_sum(cut_vars, incumbent) > 1.0 + tol; +} + +bool prefix_has_invalid_cut(const std::vector>& dumped_cuts, + size_t prefix_end_exclusive, + const std::vector& incumbent, + double tol) +{ + for (size_t i = 0; i < prefix_end_exclusive; ++i) { + if (cut_is_invalid_for_incumbent(dumped_cuts[i], incumbent, tol)) { return true; } + } + return false; +} + +std::optional isolate_first_invalid_cut_by_bisection( + const std::vector>& dumped_cuts, + const std::vector& incumbent, + double tol) +{ + if (!prefix_has_invalid_cut(dumped_cuts, dumped_cuts.size(), incumbent, tol)) { + return std::nullopt; + } + size_t lo = 0; + size_t hi = dumped_cuts.size() - 1; + while (lo < hi) { + const size_t mid = lo + (hi - lo) / 2; + if (prefix_has_invalid_cut(dumped_cuts, mid + 1, incumbent, tol)) { + hi = mid; + } else { + lo = mid + 1; + } + } + return lo; +} + +struct neos8_mip_solution_cache_t { + mip_termination_status_t status; + std::vector primal; + double objective; +}; + +struct neos8_lp_solution_cache_t { + pdlp_termination_status_t status; + std::vector primal; +}; + +neos8_mip_solution_cache_t& get_neos8_optimal_solution_no_cuts_cached() +{ + static std::once_flag init_flag; + static std::unique_ptr solution_ptr; + std::call_once(init_flag, []() { + const raft::handle_t handle{}; + auto& neos8_model = get_neos8_model_cached(); + mip_solver_settings_t settings; + settings.time_limit = 120.0; + settings.presolver = presolver_t::None; + disable_all_cuts(settings); + + auto mip_solution = solve_mip(&handle, neos8_model, settings); + auto cache = std::make_unique(); + cache->status = mip_solution.get_termination_status(); + cache->objective = mip_solution.get_objective_value(); + cache->primal = cuopt::host_copy(mip_solution.get_solution(), handle.get_stream()); + solution_ptr = std::move(cache); + }); + cuopt_assert(solution_ptr != nullptr, "Failed to initialize cached neos8 no-cut MIP solution"); + return *solution_ptr; +} + +neos8_lp_solution_cache_t& get_neos8_lp_relaxation_solution_cached() +{ + static std::once_flag init_flag; + static std::unique_ptr solution_ptr; + std::call_once(init_flag, []() { + const raft::handle_t handle{}; + auto lp_relaxation = get_neos8_model_cached(); + std::vector all_continuous(lp_relaxation.get_n_variables(), 'C'); + lp_relaxation.set_variable_types(all_continuous); + + pdlp_solver_settings_t lp_settings{}; + lp_settings.time_limit = 120.0; + lp_settings.presolver = presolver_t::None; + lp_settings.set_optimality_tolerance(1e-8); + + auto lp_solution = solve_lp(&handle, lp_relaxation, lp_settings); + auto cache = std::make_unique(); + cache->status = lp_solution.get_termination_status(); + cache->primal = cuopt::host_copy(lp_solution.get_primal_solution(), handle.get_stream()); + solution_ptr = std::move(cache); + }); + cuopt_assert(solution_ptr != nullptr, "Failed to initialize cached neos8 LP relaxation solution"); + return *solution_ptr; +} + +bool is_binary_var_for_clique_literals(const mps_parser::mps_data_model_t& problem, + int var_idx, + double bound_tol) +{ + const auto& var_types = problem.get_variable_types(); + const auto& var_lb = problem.get_variable_lower_bounds(); + const auto& var_ub = problem.get_variable_upper_bounds(); + return var_types[var_idx] != 'C' && var_lb[var_idx] >= -bound_tol && + var_ub[var_idx] <= 1.0 + bound_tol; +} + +std::vector> build_fractional_literal_cliques_for_assignment( + const mps_parser::mps_data_model_t& problem, + detail::clique_table_t& clique_table, + const std::vector& assignment, + double integer_tol, + double bound_tol, + int max_calls) +{ + const int num_vars = problem.get_n_variables(); + cuopt_assert(static_cast(assignment.size()) >= num_vars, + "Assignment size mismatch in fractional literal clique builder"); + + std::vector vertices; + std::vector weights; + vertices.reserve(2 * num_vars); + weights.reserve(2 * num_vars); + for (int j = 0; j < num_vars; ++j) { + if (!is_binary_var_for_clique_literals(problem, j, bound_tol)) { continue; } + const double xj = assignment[j]; + if (std::abs(xj - std::round(xj)) <= integer_tol) { continue; } + vertices.push_back(j); + weights.push_back(xj); + vertices.push_back(j + num_vars); + weights.push_back(1.0 - xj); + } + if (vertices.empty()) { return {}; } + + std::vector vertex_to_local(2 * num_vars, -1); + std::vector in_subgraph(2 * num_vars, 0); + for (size_t idx = 0; idx < vertices.size(); ++idx) { + vertex_to_local[vertices[idx]] = static_cast(idx); + in_subgraph[vertices[idx]] = 1; + } + + std::vector> adj_local(vertices.size()); + for (size_t idx = 0; idx < vertices.size(); ++idx) { + const auto vertex_idx = vertices[idx]; + auto adj_set = clique_table.get_adj_set_of_var(vertex_idx); + auto& adj = adj_local[idx]; + adj.reserve(adj_set.size()); + for (const auto neighbor : adj_set) { + cuopt_assert(neighbor >= 0 && neighbor < 2 * num_vars, + "Neighbor out of range in fractional literal clique builder"); + if (!in_subgraph[neighbor]) { continue; } + const auto local_neighbor = vertex_to_local[neighbor]; + if (local_neighbor >= 0) { adj.push_back(local_neighbor); } + } + } + + auto cliques_local = dual_simplex::find_maximal_cliques_for_test( + adj_local, weights, 1.0 + kCliqueTestTol, max_calls, std::numeric_limits::infinity()); + std::vector> cliques_global; + cliques_global.reserve(cliques_local.size()); + for (auto& local_clique : cliques_local) { + std::vector global_clique; + global_clique.reserve(local_clique.size()); + for (const auto local_idx : local_clique) { + cuopt_assert(local_idx >= 0 && static_cast(local_idx) < vertices.size(), + "Local clique index out of range"); + global_clique.push_back(vertices[local_idx]); + } + cliques_global.push_back(std::move(global_clique)); + } + return canonicalize_cliques(std::move(cliques_global)); +} + +std::vector>& get_neos8_fractional_literal_cliques_cached() +{ + static std::once_flag init_flag; + static std::unique_ptr>> cliques_ptr; + std::call_once(init_flag, []() { + auto& neos8_model = get_neos8_model_cached(); + auto& clique_table = get_neos8_clique_table_cached(); + auto& lp_relaxation = get_neos8_lp_relaxation_solution_cached(); + auto cliques = build_fractional_literal_cliques_for_assignment( + neos8_model, clique_table, lp_relaxation.primal, kCliqueTestTol, kCliqueTestTol, 100000); + cliques_ptr = std::make_unique>>(std::move(cliques)); + }); + cuopt_assert(cliques_ptr != nullptr, "Failed to initialize cached neos8 dumped literal cliques"); + return *cliques_ptr; +} + +double literal_clique_cut_violation(const std::vector& literal_clique, + const std::vector& assignment, + int num_vars) +{ + cuopt_assert(static_cast(assignment.size()) >= num_vars, + "Assignment size mismatch in literal clique violation"); + double dot = 0.0; + int num_complement_vars = 0; + for (const auto literal : literal_clique) { + cuopt_assert(literal >= 0 && literal < 2 * num_vars, "Literal out of range"); + const int var_idx = literal % num_vars; + const bool is_complement = literal >= num_vars; + if (is_complement) { + num_complement_vars++; + dot += assignment[var_idx]; + } else { + dot -= assignment[var_idx]; + } + } + const double rhs = static_cast(num_complement_vars - 1); + return rhs - dot; +} + +std::string format_phase2_literal_panic_dump(const std::vector& literal_clique, + const std::vector& incumbent, + int num_vars) +{ + std::ostringstream out; + out << "\nLiteral clique:"; + for (const auto literal : literal_clique) { + const bool is_complement = literal >= num_vars; + const int var_idx = literal % num_vars; + out << " " << (is_complement ? "~x" : "x") << var_idx << "(value=" << incumbent[var_idx] << ")"; + } + out << "\nViolation: " << literal_clique_cut_violation(literal_clique, incumbent, num_vars); + return out.str(); +} + +bool literal_cut_is_invalid_for_incumbent(const std::vector& literal_clique, + const std::vector& incumbent, + int num_vars, + double tol) +{ + return literal_clique_cut_violation(literal_clique, incumbent, num_vars) > tol; +} + +bool prefix_has_invalid_literal_cut(const std::vector>& dumped_cuts, + size_t prefix_end_exclusive, + const std::vector& incumbent, + int num_vars, + double tol) +{ + for (size_t i = 0; i < prefix_end_exclusive; ++i) { + if (literal_cut_is_invalid_for_incumbent(dumped_cuts[i], incumbent, num_vars, tol)) { + return true; + } + } + return false; +} + +std::optional isolate_first_invalid_literal_cut_by_bisection( + const std::vector>& dumped_cuts, + const std::vector& incumbent, + int num_vars, + double tol) +{ + if (!prefix_has_invalid_literal_cut(dumped_cuts, dumped_cuts.size(), incumbent, num_vars, tol)) { + return std::nullopt; + } + size_t lo = 0; + size_t hi = dumped_cuts.size() - 1; + while (lo < hi) { + const size_t mid = lo + (hi - lo) / 2; + if (prefix_has_invalid_literal_cut(dumped_cuts, mid + 1, incumbent, num_vars, tol)) { + hi = mid; + } else { + lo = mid + 1; + } + } + return lo; +} + +mps_parser::mps_data_model_t& get_neos8_lp_relaxation_model_cached() +{ + static std::once_flag init_flag; + static std::unique_ptr> model_ptr; + std::call_once(init_flag, []() { + auto lp_relaxation = get_neos8_model_cached(); + std::vector all_continuous(lp_relaxation.get_n_variables(), 'C'); + lp_relaxation.set_variable_types(all_continuous); + model_ptr = + std::make_unique>(std::move(lp_relaxation)); + }); + cuopt_assert(model_ptr != nullptr, "Failed to initialize cached neos8 LP relaxation model"); + return *model_ptr; +} + +mps_parser::mps_data_model_t append_literal_cut_prefix_to_lp_model( + const mps_parser::mps_data_model_t& base_lp_model, + const std::vector>& dumped_cuts, + size_t prefix_end_exclusive, + int num_vars) +{ + auto model_with_cuts = base_lp_model; + if (prefix_end_exclusive == 0) { return model_with_cuts; } + + std::vector matrix_values = base_lp_model.get_constraint_matrix_values(); + std::vector matrix_indices = base_lp_model.get_constraint_matrix_indices(); + std::vector matrix_offsets = base_lp_model.get_constraint_matrix_offsets(); + std::vector constraint_lbs = base_lp_model.get_constraint_lower_bounds(); + std::vector constraint_ubs = base_lp_model.get_constraint_upper_bounds(); + std::vector row_names = base_lp_model.get_row_names(); + if (matrix_offsets.empty()) { matrix_offsets.push_back(0); } + + const size_t cuts_to_apply = std::min(prefix_end_exclusive, dumped_cuts.size()); + for (size_t cut_idx = 0; cut_idx < cuts_to_apply; ++cut_idx) { + const auto& literal_cut = dumped_cuts[cut_idx]; + + std::vector row_vars; + std::vector row_coeffs; + row_vars.reserve(literal_cut.size()); + row_coeffs.reserve(literal_cut.size()); + + int num_complements = 0; + for (const auto literal : literal_cut) { + cuopt_assert(literal >= 0 && literal < 2 * num_vars, + "Literal out of range for LP cut append"); + const int var_idx = literal % num_vars; + const bool is_complement = literal >= num_vars; + if (is_complement) { num_complements++; } + const double coeff = is_complement ? 1.0 : -1.0; + + bool found = false; + for (size_t t = 0; t < row_vars.size(); ++t) { + if (row_vars[t] == var_idx) { + row_coeffs[t] += coeff; + found = true; + break; + } + } + if (!found) { + row_vars.push_back(var_idx); + row_coeffs.push_back(coeff); + } + } + + std::vector order(row_vars.size()); + std::iota(order.begin(), order.end(), 0); + std::sort(order.begin(), order.end(), [&](int a, int b) { return row_vars[a] < row_vars[b]; }); + for (const auto pos : order) { + const double coeff = row_coeffs[pos]; + if (std::abs(coeff) <= 1e-12) { continue; } + matrix_indices.push_back(row_vars[pos]); + matrix_values.push_back(coeff); + } + matrix_offsets.push_back(static_cast(matrix_indices.size())); + constraint_lbs.push_back(static_cast(num_complements - 1)); + constraint_ubs.push_back(std::numeric_limits::infinity()); + row_names.push_back("literal_cut_" + std::to_string(cut_idx)); + } + + model_with_cuts.set_csr_constraint_matrix(matrix_values.data(), + matrix_values.size(), + matrix_indices.data(), + matrix_indices.size(), + matrix_offsets.data(), + matrix_offsets.size()); + model_with_cuts.set_constraint_lower_bounds(constraint_lbs.data(), constraint_lbs.size()); + model_with_cuts.set_constraint_upper_bounds(constraint_ubs.data(), constraint_ubs.size()); + model_with_cuts.set_row_names(row_names); + return model_with_cuts; +} + +pdlp_termination_status_t solve_lp_with_literal_cut_prefix( + const std::vector>& dumped_cuts, size_t prefix_end_exclusive, int num_vars) +{ + const raft::handle_t handle{}; + auto& base_lp_model = get_neos8_lp_relaxation_model_cached(); + auto model_with_cuts = append_literal_cut_prefix_to_lp_model( + base_lp_model, dumped_cuts, prefix_end_exclusive, num_vars); + + pdlp_solver_settings_t lp_settings{}; + lp_settings.time_limit = 120.0; + lp_settings.presolver = presolver_t::None; + lp_settings.set_optimality_tolerance(1e-8); + + auto lp_solution = solve_lp(&handle, model_with_cuts, lp_settings); + return lp_solution.get_termination_status(); +} + +bool prefix_makes_lp_relaxation_infeasible(const std::vector>& dumped_cuts, + size_t prefix_end_exclusive, + int num_vars) +{ + const auto status = solve_lp_with_literal_cut_prefix(dumped_cuts, prefix_end_exclusive, num_vars); + return status == pdlp_termination_status_t::PrimalInfeasible; +} + +std::optional isolate_first_lp_infeasible_literal_cut_by_bisection( + const std::vector>& dumped_cuts, int num_vars) +{ + if (!prefix_makes_lp_relaxation_infeasible(dumped_cuts, dumped_cuts.size(), num_vars)) { + return std::nullopt; + } + size_t lo = 0; + size_t hi = dumped_cuts.size() - 1; + while (lo < hi) { + const size_t mid = lo + (hi - lo) / 2; + if (prefix_makes_lp_relaxation_infeasible(dumped_cuts, mid + 1, num_vars)) { + hi = mid; + } else { + lo = mid + 1; + } + } + return lo; +} + +} // namespace + // Problem data for the mixed integer linear programming problem mps_parser::mps_data_model_t create_cuts_problem_1() { @@ -93,7 +897,7 @@ TEST(cuts, test_cuts_1) // Expected objective value from documentation example is approximately -28 EXPECT_NEAR(-28, obj_val, 1e-3); - EXPECT_EQ(solution.get_num_nodes(), 0); + EXPECT_LE(solution.get_num_nodes(), 2); } // Problem data for the mixed integer linear programming problem @@ -165,4 +969,416 @@ TEST(cuts, test_cuts_2) EXPECT_EQ(solution.get_num_nodes(), 0); } +TEST(cuts, clique_phase1_smoke_conflict_graph_edges) +{ + const raft::handle_t handle{}; + auto problem = create_pairwise_triangle_with_isolated_variable_problem(); + auto clique_table = build_clique_table_for_model(handle, problem); + + // Positive edges from triangle. + EXPECT_TRUE(clique_table.check_adjacency(0, 1)); + EXPECT_TRUE(clique_table.check_adjacency(1, 0)); + EXPECT_TRUE(clique_table.check_adjacency(1, 2)); + EXPECT_TRUE(clique_table.check_adjacency(2, 1)); + EXPECT_TRUE(clique_table.check_adjacency(0, 2)); + EXPECT_TRUE(clique_table.check_adjacency(2, 0)); + + // Negative edges to isolated x3. + EXPECT_FALSE(clique_table.check_adjacency(0, 3)); + EXPECT_FALSE(clique_table.check_adjacency(3, 0)); + EXPECT_FALSE(clique_table.check_adjacency(1, 3)); + EXPECT_FALSE(clique_table.check_adjacency(3, 1)); + EXPECT_FALSE(clique_table.check_adjacency(2, 3)); + EXPECT_FALSE(clique_table.check_adjacency(3, 2)); + + // Self is never an edge. + EXPECT_FALSE(clique_table.check_adjacency(3, 3)); +} + +TEST(cuts, clique_phase1_unit_maximal_clique_finder_hardcoded_adj) +{ + // Hardcoded graph: + // triangle (0,1,2) and an extra edge (2,3) + std::vector> adj = { + {0, 1, 1, 0}, + {1, 0, 1, 0}, + {1, 1, 0, 1}, + {0, 0, 1, 0}, + }; + + auto maximal_bruteforce = canonicalize_cliques(maximal_cliques_bruteforce(adj)); + auto maximal_internal = maximal_cliques_from_production_algorithm(adj); + EXPECT_EQ(maximal_internal, maximal_bruteforce); + bool found_triangle = false; + for (const auto& clique : maximal_internal) { + if (clique.size() == 3 && clique[0] == 0 && clique[1] == 1 && clique[2] == 2) { + found_triangle = true; + break; + } + } + EXPECT_TRUE(found_triangle); +} + +TEST(cuts, clique_phase1_addtl_conflict_symmetry_and_reverse_lookup) +{ + const raft::handle_t handle{}; + auto problem = create_weighted_addtl_conflict_problem(); + auto clique_table = build_clique_table_for_model_with_min_size(handle, problem, 1); + + ASSERT_FALSE(clique_table.addtl_cliques.empty()); + + // Conflict introduced through additional clique path must be symmetric. + EXPECT_TRUE(clique_table.check_adjacency(1, 3)); + EXPECT_TRUE(clique_table.check_adjacency(3, 1)); + + // get_adj_set_of_var() must also include reverse lookup for addtl membership. + auto adj_of_1 = clique_table.get_adj_set_of_var(1); + auto adj_of_3 = clique_table.get_adj_set_of_var(3); + EXPECT_TRUE(adj_of_1.count(3) > 0); + EXPECT_TRUE(adj_of_3.count(1) > 0); +} + +TEST(cuts, clique_phase1_remove_small_cliques_preserves_addtl_conflicts) +{ + const raft::handle_t handle{}; + auto problem = create_weighted_addtl_conflict_problem(); + // Force base clique {x2,x3} to be considered "small" and removed. + auto clique_table = build_clique_table_for_model_with_min_size(handle, problem, 2); + + EXPECT_TRUE(clique_table.first.empty()); + EXPECT_TRUE(clique_table.addtl_cliques.empty()); + + // Conflicts must remain materialized in adj_list_small_cliques after removals. + EXPECT_TRUE(clique_table.check_adjacency(1, 3)); + EXPECT_TRUE(clique_table.check_adjacency(3, 1)); + EXPECT_TRUE(clique_table.check_adjacency(2, 3)); + EXPECT_TRUE(clique_table.check_adjacency(3, 2)); + EXPECT_FALSE(clique_table.check_adjacency(0, 3)); +} + +TEST(cuts, clique_phase2_no_cut_off_optimal_solution_validation) +{ + const raft::handle_t handle{}; + auto problem = create_pairwise_triangle_set_packing_problem(); + + mip_solver_settings_t settings; + settings.time_limit = 10.0; + settings.presolver = presolver_t::None; + disable_all_cuts(settings); + + auto mip_solution = solve_mip(&handle, problem, settings); + ASSERT_EQ(mip_solution.get_termination_status(), mip_termination_status_t::Optimal); + auto x_star = cuopt::host_copy(mip_solution.get_solution(), handle.get_stream()); + + auto clique_table = build_clique_table_for_model(handle, problem); + auto adj = build_original_adjacency_matrix(clique_table, problem.get_n_variables()); + auto maximal = maximal_cliques_bruteforce(adj); + ASSERT_FALSE(maximal.empty()); + + for (const auto& clique_vars : maximal) { + if (clique_vars.size() < 2) { continue; } + const double lhs = original_clique_sum(clique_vars, x_star); + ASSERT_LE(lhs, 1.0 + kCliqueTestTol) << format_phase2_panic_dump(problem, clique_vars, x_star); + } +} + +TEST(cuts, clique_phase3_fractional_separation_must_cut_off) +{ + const raft::handle_t handle{}; + auto mip_problem = create_pairwise_triangle_set_packing_problem(); + + auto lp_relaxation = mip_problem; + std::vector all_continuous(lp_relaxation.get_n_variables(), 'C'); + lp_relaxation.set_variable_types(all_continuous); + + pdlp_solver_settings_t lp_settings{}; + lp_settings.time_limit = 10.0; + lp_settings.presolver = presolver_t::None; + lp_settings.set_optimality_tolerance(1e-8); + + auto lp_solution = solve_lp(&handle, lp_relaxation, lp_settings); + ASSERT_EQ(lp_solution.get_termination_status(), pdlp_termination_status_t::Optimal); + auto x_bar = cuopt::host_copy(lp_solution.get_primal_solution(), handle.get_stream()); + + auto clique_table = build_clique_table_for_model(handle, mip_problem); + auto adj = build_original_adjacency_matrix(clique_table, mip_problem.get_n_variables()); + auto maximal = maximal_cliques_from_production_algorithm(adj); + + bool found_separating_clique = false; + for (const auto& clique_vars : maximal) { + if (clique_vars.size() < 2) { continue; } + const double lhs = original_clique_sum(clique_vars, x_bar); + if (lhs > 1.0 + kCliqueTestTol) { + found_separating_clique = true; + break; + } + } + EXPECT_TRUE(found_separating_clique); +} + +TEST(cuts, clique_phase4_fault_isolation_binary_search) +{ + // Simulated incumbent x* and dumped cuts. + // First invalid cut is at index 2: {0,1} gives 2 > 1. + const std::vector incumbent = {1.0, 1.0, 0.0, 0.0}; + const std::vector> dumped_cuts = { + {0, 2}, // valid + {1, 3}, // valid + {0, 1}, // invalid + {2, 3}, // valid + }; + + auto first_invalid = + isolate_first_invalid_cut_by_bisection(dumped_cuts, incumbent, kCliqueTestTol); + ASSERT_TRUE(first_invalid.has_value()); + EXPECT_EQ(first_invalid.value(), 2); +} + +TEST(cuts, clique_phase4_tree_depth_limit_smoke) +{ + const raft::handle_t handle{}; + auto problem = create_pairwise_triangle_set_packing_problem(); + + mip_solver_settings_t root_only_settings; + root_only_settings.time_limit = 10.0; + root_only_settings.presolver = presolver_t::None; + root_only_settings.node_limit = 0; + disable_non_clique_cuts(root_only_settings); + + mip_solver_settings_t deeper_settings = root_only_settings; + deeper_settings.node_limit = 100; + + auto root_only_solution = solve_mip(&handle, problem, root_only_settings); + auto deeper_solution = solve_mip(&handle, problem, deeper_settings); + + EXPECT_EQ(deeper_solution.get_termination_status(), mip_termination_status_t::Optimal); + EXPECT_NE(root_only_solution.get_termination_status(), mip_termination_status_t::Infeasible); + if (root_only_solution.get_termination_status() == mip_termination_status_t::Optimal) { + EXPECT_NEAR( + root_only_solution.get_objective_value(), deeper_solution.get_objective_value(), 1e-6); + } +} + +TEST(cuts, clique_phase5_ignores_non_binary_variables) +{ + const raft::handle_t handle{}; + auto problem = create_binary_continuous_mixed_conflict_problem(); + auto clique_table = build_clique_table_for_model(handle, problem); + + EXPECT_TRUE(clique_table.check_adjacency(0, 2)); + EXPECT_FALSE(clique_table.check_adjacency(0, 1)); + EXPECT_FALSE(clique_table.check_adjacency(1, 2)); +} + +TEST(cuts, clique_phase5_ignores_fractional_binary_bounds) +{ + const raft::handle_t handle{}; + auto problem = create_near_binary_bound_conflict_problem(); + auto clique_table = build_clique_table_for_model(handle, problem); + + EXPECT_FALSE(clique_table.check_adjacency(0, 1)); +} + +TEST(cuts, clique_neos8_phase1_addtl_indices_and_nonempty_graph) +{ + auto& clique_table = get_neos8_clique_table_cached(); + EXPECT_TRUE(!clique_table.first.empty() || !clique_table.addtl_cliques.empty()); + + const size_t max_addtl_to_check = std::min(clique_table.addtl_cliques.size(), 400); + for (size_t k = 0; k < max_addtl_to_check; ++k) { + const auto& addtl = clique_table.addtl_cliques[k]; + ASSERT_GE(addtl.clique_idx, 0); + ASSERT_LT(static_cast(addtl.clique_idx), clique_table.first.size()); + const auto& base = clique_table.first[addtl.clique_idx]; + ASSERT_GE(addtl.start_pos_on_clique, 0); + ASSERT_LE(static_cast(addtl.start_pos_on_clique), base.size()); + } +} + +TEST(cuts, clique_neos8_phase1_addtl_suffix_conflicts_materialized) +{ + auto& clique_table = get_neos8_clique_table_cached(); + if (clique_table.addtl_cliques.empty()) { + GTEST_SKIP() << "neos8 produced no additional cliques in this configuration"; + } + + size_t checked_addtl = 0; + const size_t max_addtl_to_check = std::min(clique_table.addtl_cliques.size(), 200); + for (size_t k = 0; k < max_addtl_to_check; ++k) { + const auto& addtl = clique_table.addtl_cliques[k]; + if (addtl.clique_idx < 0 || + static_cast(addtl.clique_idx) >= clique_table.first.size()) { + continue; + } + const auto& base = clique_table.first[addtl.clique_idx]; + const size_t start_at = static_cast(addtl.start_pos_on_clique); + if (start_at >= base.size()) { continue; } + + const size_t end_at = std::min(base.size(), start_at + 8); + for (size_t p = start_at; p < end_at; ++p) { + EXPECT_TRUE(clique_table.check_adjacency(addtl.vertex_idx, base[p])); + EXPECT_TRUE(clique_table.check_adjacency(base[p], addtl.vertex_idx)); + } + checked_addtl++; + } + EXPECT_GT(checked_addtl, 0); +} + +TEST(cuts, clique_neos8_phase1_symmetry_and_degree_cache_consistency) +{ + auto& clique_table = get_neos8_clique_table_cached(); + const int n_vertices = static_cast(clique_table.var_clique_map_first.size()); + ASSERT_GT(n_vertices, 0); + + const int sample_size = std::min(n_vertices, 24); + const int stride = std::max(1, n_vertices / sample_size); + std::vector sampled_vertices(sample_size); + for (int i = 0; i < sample_size; ++i) { + sampled_vertices[i] = (i * stride) % n_vertices; + } + + for (const auto v : sampled_vertices) { + const auto deg_cached = clique_table.get_degree_of_var(v); + const auto adj_set = clique_table.get_adj_set_of_var(v); + EXPECT_EQ(deg_cached, static_cast(adj_set.size())); + EXPECT_EQ(deg_cached, clique_table.get_degree_of_var(v)); + } + + for (int i = 0; i < sample_size; ++i) { + for (int j = i + 1; j < sample_size; ++j) { + const auto v1 = sampled_vertices[i]; + const auto v2 = sampled_vertices[j]; + EXPECT_EQ(clique_table.check_adjacency(v1, v2), clique_table.check_adjacency(v2, v1)); + } + } +} + +TEST(cuts, clique_neos8_phase2_no_cut_off_optimal_solution_validation) +{ + auto& no_cut_mip = get_neos8_optimal_solution_no_cuts_cached(); + ASSERT_EQ(no_cut_mip.status, mip_termination_status_t::Optimal); + + auto& lp_relaxation = get_neos8_lp_relaxation_solution_cached(); + ASSERT_EQ(lp_relaxation.status, pdlp_termination_status_t::Optimal); + + auto& dumped_literal_cuts = get_neos8_fractional_literal_cliques_cached(); + if (dumped_literal_cuts.empty()) { + GTEST_SKIP() << "neos8 produced no candidate literal cliques from LP relaxation"; + } + + const int num_vars = get_neos8_model_cached().get_n_variables(); + for (size_t i = 0; i < dumped_literal_cuts.size(); ++i) { + const double violation = + literal_clique_cut_violation(dumped_literal_cuts[i], no_cut_mip.primal, num_vars); + ASSERT_LE(violation, kCliqueTestTol) + << "Invalid clique cut at index " << i + << format_phase2_literal_panic_dump(dumped_literal_cuts[i], no_cut_mip.primal, num_vars); + } +} + +TEST(cuts, clique_neos8_phase3_fractional_separation_must_cut_off) +{ + auto& lp_relaxation = get_neos8_lp_relaxation_solution_cached(); + ASSERT_EQ(lp_relaxation.status, pdlp_termination_status_t::Optimal); + + auto& dumped_literal_cuts = get_neos8_fractional_literal_cliques_cached(); + if (dumped_literal_cuts.empty()) { + GTEST_SKIP() << "neos8 produced no candidate literal cliques from LP relaxation"; + } + + const int num_vars = get_neos8_model_cached().get_n_variables(); + for (size_t i = 0; i < dumped_literal_cuts.size(); ++i) { + const double violation = + literal_clique_cut_violation(dumped_literal_cuts[i], lp_relaxation.primal, num_vars); + ASSERT_GT(violation, kCliqueTestTol) + << "Non-separating clique cut at index " << i + << format_phase2_literal_panic_dump(dumped_literal_cuts[i], lp_relaxation.primal, num_vars); + } +} + +TEST(cuts, clique_neos8_phase4_fault_isolation_binary_search) +{ + auto& no_cut_mip = get_neos8_optimal_solution_no_cuts_cached(); + ASSERT_EQ(no_cut_mip.status, mip_termination_status_t::Optimal); + + auto& dumped_literal_cuts = get_neos8_fractional_literal_cliques_cached(); + if (dumped_literal_cuts.empty()) { + GTEST_SKIP() << "neos8 produced no candidate literal cliques from LP relaxation"; + } + + const auto& model = get_neos8_model_cached(); + const int num_vars = model.get_n_variables(); + + // Real dumped cuts should not invalidate the no-cut incumbent. + EXPECT_FALSE(prefix_has_invalid_literal_cut( + dumped_literal_cuts, dumped_literal_cuts.size(), no_cut_mip.primal, num_vars, kCliqueTestTol)); + + // Inject a known-invalid cut and verify bisection isolates it. + std::vector incumbent_ones; + incumbent_ones.reserve(2); + for (int j = 0; j < num_vars && incumbent_ones.size() < 2; ++j) { + if (!is_binary_var_for_clique_literals(model, j, kCliqueTestTol)) { continue; } + if (no_cut_mip.primal[j] >= 1.0 - kCliqueTestTol) { incumbent_ones.push_back(j); } + } + if (incumbent_ones.size() < 2) { + GTEST_SKIP() << "Could not find two binary variables fixed to one in neos8 incumbent"; + } + + auto cuts_with_injected_bug = dumped_literal_cuts; + const size_t injected_index = cuts_with_injected_bug.size(); + cuts_with_injected_bug.push_back({incumbent_ones[0], incumbent_ones[1]}); + + auto first_invalid = isolate_first_invalid_literal_cut_by_bisection( + cuts_with_injected_bug, no_cut_mip.primal, num_vars, kCliqueTestTol); + ASSERT_TRUE(first_invalid.has_value()); + EXPECT_EQ(first_invalid.value(), injected_index); +} + +TEST(cuts, clique_neos8_phase4_lp_infeasibility_binary_search) +{ + auto& dumped_literal_cuts = get_neos8_fractional_literal_cliques_cached(); + if (dumped_literal_cuts.empty()) { + GTEST_SKIP() << "neos8 produced no candidate literal cliques from LP relaxation"; + } + + const auto& model = get_neos8_model_cached(); + const int num_vars = model.get_n_variables(); + + std::vector> cuts_for_lp_search; + const size_t max_real_cuts = std::min(dumped_literal_cuts.size(), 64); + cuts_for_lp_search.insert(cuts_for_lp_search.end(), + dumped_literal_cuts.begin(), + dumped_literal_cuts.begin() + max_real_cuts); + + int inject_var = -1; + for (int j = 0; j < num_vars; ++j) { + if (is_binary_var_for_clique_literals(model, j, kCliqueTestTol)) { + inject_var = j; + break; + } + } + if (inject_var < 0) { + GTEST_SKIP() << "Could not find a binary variable for LP infeasibility injection"; + } + + const size_t injected_index = cuts_for_lp_search.size(); + cuts_for_lp_search.push_back( + {inject_var, inject_var, inject_var + num_vars, inject_var + num_vars}); + + // Prefix before injected cut should remain LP-feasible. + const auto status_before_injection = + solve_lp_with_literal_cut_prefix(cuts_for_lp_search, injected_index, num_vars); + EXPECT_NE(status_before_injection, pdlp_termination_status_t::PrimalInfeasible); + + // Full prefix should be LP-infeasible due to injected contradictory cut. + const auto status_with_injection = + solve_lp_with_literal_cut_prefix(cuts_for_lp_search, cuts_for_lp_search.size(), num_vars); + EXPECT_EQ(status_with_injection, pdlp_termination_status_t::PrimalInfeasible); + + auto first_infeasible = + isolate_first_lp_infeasible_literal_cut_by_bisection(cuts_for_lp_search, num_vars); + ASSERT_TRUE(first_infeasible.has_value()); + EXPECT_EQ(first_infeasible.value(), injected_index); +} + } // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/determinism_test.cu b/cpp/tests/mip/determinism_test.cu index 1e59fba649..0623cbf8f2 100644 --- a/cpp/tests/mip/determinism_test.cu +++ b/cpp/tests/mip/determinism_test.cu @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -24,6 +25,7 @@ #include #include +#include #include #include @@ -31,6 +33,34 @@ namespace cuopt::linear_programming::test { namespace { +class scoped_env_var_t { + public: + scoped_env_var_t(const char* name, const char* value) : name_(name) + { + cuopt_assert(name != nullptr, "Environment variable name must be non-null"); + cuopt_assert(value != nullptr, "Environment variable value must be non-null"); + const char* original_value = std::getenv(name_); + was_set_ = (original_value != nullptr); + if (was_set_) { original_value_ = original_value; } + const int status = setenv(name_, value, 1); + assert(status == 0); + } + + ~scoped_env_var_t() + { + const int status = was_set_ ? setenv(name_, original_value_.c_str(), 1) : unsetenv(name_); + assert(status == 0); + } + + scoped_env_var_t(const scoped_env_var_t&) = delete; + scoped_env_var_t& operator=(const scoped_env_var_t&) = delete; + + private: + const char* name_; + std::string original_value_; + bool was_set_{false}; +}; + void expect_solutions_bitwise_equal(const mip_solution_t& sol1, const mip_solution_t& sol2, raft::handle_t& handle, @@ -45,6 +75,118 @@ void expect_solutions_bitwise_equal(const mip_solution_t& sol1, } } +struct callback_solution_t { + std::vector assignment; + double objective{}; + double solution_bound{}; + internals::mip_solution_origin_t origin{internals::mip_solution_origin_t::UNKNOWN}; +}; + +class first_n_get_solution_callback_t : public cuopt::internals::get_solution_callback_ext_t { + public: + first_n_get_solution_callback_t(std::vector& solutions_in, + int n_variables_, + size_t max_solutions_, + void* expected_user_data_) + : solutions(solutions_in), + expected_user_data(expected_user_data_), + n_variables(n_variables_), + max_solutions(max_solutions_) + { + } + + void get_solution(void* data, + void* cost, + void* solution_bound, + const internals::mip_solution_callback_info_t* callback_info, + void* user_data) override + { + EXPECT_EQ(user_data, expected_user_data); + ASSERT_NE(callback_info, nullptr); + EXPECT_GE(callback_info->struct_size, sizeof(internals::mip_solution_callback_info_t)); + n_calls++; + + auto assignment_ptr = static_cast(data); + auto objective_ptr = static_cast(cost); + auto solution_bound_ptr = static_cast(solution_bound); + EXPECT_FALSE(std::isnan(objective_ptr[0])); + EXPECT_FALSE(std::isnan(solution_bound_ptr[0])); + + if (solutions.size() >= max_solutions) { return; } + + callback_solution_t callback_solution; + callback_solution.assignment.assign(assignment_ptr, assignment_ptr + n_variables); + callback_solution.objective = objective_ptr[0]; + callback_solution.solution_bound = solution_bound_ptr[0]; + callback_solution.origin = callback_info->origin; + solutions.push_back(std::move(callback_solution)); + } + + std::vector& solutions; + void* expected_user_data; + int n_calls{0}; + int n_variables; + size_t max_solutions; +}; + +bool is_gpu_callback_origin(internals::mip_solution_origin_t origin) +{ + switch (origin) { + case internals::mip_solution_origin_t::FEASIBILITY_JUMP: + case internals::mip_solution_origin_t::LOCAL_SEARCH: + case internals::mip_solution_origin_t::QUICK_FEASIBLE: + case internals::mip_solution_origin_t::LP_ROUNDING: + case internals::mip_solution_origin_t::RECOMBINATION: + case internals::mip_solution_origin_t::SUB_MIP: return true; + default: return false; + } +} + +size_t count_callbacks_with_origin(const std::vector& callbacks, + internals::mip_solution_origin_t origin) +{ + return std::count_if(callbacks.begin(), + callbacks.end(), + [origin](const callback_solution_t& sol) { return sol.origin == origin; }); +} + +size_t count_gpu_callbacks(const std::vector& callbacks) +{ + return std::count_if(callbacks.begin(), callbacks.end(), [](const callback_solution_t& sol) { + return is_gpu_callback_origin(sol.origin); + }); +} + +size_t count_branch_and_bound_callbacks(const std::vector& callbacks) +{ + return std::count_if(callbacks.begin(), callbacks.end(), [](const callback_solution_t& sol) { + return sol.origin == internals::mip_solution_origin_t::BRANCH_AND_BOUND_NODE || + sol.origin == internals::mip_solution_origin_t::BRANCH_AND_BOUND_DIVING; + }); +} + +void expect_callback_prefixes_bitwise_equal(const std::vector& lhs, + const std::vector& rhs, + size_t prefix_size, + const std::string& label) +{ + ASSERT_GE(lhs.size(), prefix_size) << label << "Left callback prefix missing entries"; + ASSERT_GE(rhs.size(), prefix_size) << label << "Right callback prefix missing entries"; + for (size_t i = 0; i < prefix_size; ++i) { + EXPECT_EQ(lhs[i].objective, rhs[i].objective) + << label << "Callback objective differs at index " << i; + EXPECT_EQ(lhs[i].solution_bound, rhs[i].solution_bound) + << label << "Callback bound differs at index " << i; + EXPECT_EQ(lhs[i].origin, rhs[i].origin) << label << "Callback origin differs at index " << i; + ASSERT_EQ(lhs[i].assignment.size(), rhs[i].assignment.size()) + << label << "Callback assignment size differs at index " << i; + for (size_t j = 0; j < lhs[i].assignment.size(); ++j) { + EXPECT_EQ(lhs[i].assignment[j], rhs[i].assignment[j]) + << label << "Callback assignment differs at callback " << i << " variable " << j; + } + } +} + } // namespace class DeterministicBBTest : public ::testing::Test { @@ -61,7 +203,7 @@ TEST_F(DeterministicBBTest, reproducible_objective) mip_solver_settings_t settings; settings.time_limit = 60.0; - settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC_BB; settings.num_cpu_threads = 8; settings.work_limit = 4; @@ -93,7 +235,7 @@ TEST_F(DeterministicBBTest, reproducible_infeasibility) mip_solver_settings_t settings; settings.time_limit = 60.0; - settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC_BB; settings.num_cpu_threads = 8; settings.work_limit = 100; // High enough to fully explore @@ -125,7 +267,7 @@ TEST_F(DeterministicBBTest, reproducible_high_contention) mip_solver_settings_t settings; settings.time_limit = 60.0; - settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC_BB; settings.num_cpu_threads = 128; // High thread count to stress contention settings.work_limit = 1; @@ -160,7 +302,7 @@ TEST_F(DeterministicBBTest, reproducible_solution_vector) mip_solver_settings_t settings; settings.time_limit = 60.0; - settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC_BB; settings.num_cpu_threads = 8; settings.work_limit = 2; @@ -177,6 +319,250 @@ TEST_F(DeterministicBBTest, reproducible_solution_vector) expect_solutions_bitwise_equal(solution1, solution2, handle_); } +TEST_F(DeterministicBBTest, reproducible_with_gpu_pipeline_in_deterministic_mode) +{ + auto path = make_path_absolute("/mip/50v-10.mps"); + auto problem = mps_parser::parse_mps(path, false); + handle_.sync_stream(); + + mip_solver_settings_t settings; + settings.time_limit = 60.0; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + settings.num_cpu_threads = 8; + settings.work_limit = 30; + settings.gpu_heur_work_unit_scale = 0.1; + settings.cpufj_work_unit_scale = 1.0; + + auto seed = std::random_device{}() & 0x7fffffff; + std::cout << "Tested with seed " << seed << "\n"; + settings.seed = seed; + + cuopt::seed_generator::set_seed(seed); + auto solution1 = solve_mip(&handle_, problem, settings); + cuopt::seed_generator::set_seed(seed); + auto solution2 = solve_mip(&handle_, problem, settings); + cuopt::seed_generator::set_seed(seed); + auto solution3 = solve_mip(&handle_, problem, settings); + + EXPECT_EQ(solution1.get_termination_status(), solution2.get_termination_status()); + EXPECT_EQ(solution1.get_termination_status(), solution3.get_termination_status()); + + EXPECT_DOUBLE_EQ(solution1.get_objective_value(), solution2.get_objective_value()); + EXPECT_DOUBLE_EQ(solution1.get_objective_value(), solution3.get_objective_value()); + + EXPECT_DOUBLE_EQ(solution1.get_solution_bound(), solution2.get_solution_bound()); + EXPECT_DOUBLE_EQ(solution1.get_solution_bound(), solution3.get_solution_bound()); + + expect_solutions_bitwise_equal( + solution1, solution2, handle_, "Deterministic GPU pipeline run 1 vs 2: "); + expect_solutions_bitwise_equal( + solution1, solution3, handle_, "Deterministic GPU pipeline run 1 vs 3: "); +} + +TEST_F(DeterministicBBTest, deterministic_gpu_pipeline_ignores_cpufj_work_scale) +{ + auto path = make_path_absolute("/mip/50v-10.mps"); + auto problem = mps_parser::parse_mps(path, false); + handle_.sync_stream(); + + mip_solver_settings_t base_settings; + base_settings.time_limit = 60.0; + base_settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + base_settings.num_cpu_threads = 8; + base_settings.work_limit = 30; + base_settings.gpu_heur_work_unit_scale = 0.1; + + auto seed = std::random_device{}() & 0x7fffffff; + std::cout << "Tested with seed " << seed << "\n"; + base_settings.seed = seed; + + auto settings_without_cpufj = base_settings; + settings_without_cpufj.cpufj_work_unit_scale = 1.0; + cuopt::seed_generator::set_seed(seed); + auto solution_without_cpufj = solve_mip(&handle_, problem, settings_without_cpufj); + + auto settings_with_cpufj_scale = base_settings; + settings_with_cpufj_scale.cpufj_work_unit_scale = 17.0; + cuopt::seed_generator::set_seed(seed); + auto solution_with_cpufj_scale = solve_mip(&handle_, problem, settings_with_cpufj_scale); + + EXPECT_EQ(solution_without_cpufj.get_termination_status(), + solution_with_cpufj_scale.get_termination_status()); + EXPECT_DOUBLE_EQ(solution_without_cpufj.get_objective_value(), + solution_with_cpufj_scale.get_objective_value()); + EXPECT_DOUBLE_EQ(solution_without_cpufj.get_solution_bound(), + solution_with_cpufj_scale.get_solution_bound()); + expect_solutions_bitwise_equal(solution_without_cpufj, + solution_with_cpufj_scale, + handle_, + "Deterministic GPU pipeline should ignore CPUFJ scale: "); +} + +TEST_F(DeterministicBBTest, deterministic_callback_sequence_reproducible_with_gpu_pipeline) +{ + constexpr size_t callback_compare_count = 5; + constexpr size_t callback_capture_limit = 32; + constexpr size_t min_gpu_callback_count = 3; + constexpr size_t min_bnb_callback_count = 3; + + auto path = make_path_absolute("/mip/50v-10.mps"); + auto problem = mps_parser::parse_mps(path, false); + handle_.sync_stream(); + + mip_solver_settings_t settings; + settings.time_limit = 360.0; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + settings.num_cpu_threads = 2; + settings.work_limit = 60; + settings.gpu_heur_work_unit_scale = 0.05; + settings.cpufj_work_unit_scale = 1.0; + + auto seed = std::random_device{}() & 0x7fffffff; + std::cout << "Tested with seed " << seed << "\n"; + settings.seed = seed; + + const int n_variables = problem.get_variable_lower_bounds().size(); + int user_data = 7; + + std::vector callbacks_run1; + first_n_get_solution_callback_t callback_run1( + callbacks_run1, n_variables, callback_capture_limit, &user_data); + auto settings_run1 = settings; + settings_run1.set_mip_callback(&callback_run1, &user_data); + cuopt::seed_generator::set_seed(seed); + auto solution1 = solve_mip(&handle_, problem, settings_run1); + + std::vector callbacks_run2; + first_n_get_solution_callback_t callback_run2( + callbacks_run2, n_variables, callback_capture_limit, &user_data); + auto settings_run2 = settings; + settings_run2.set_mip_callback(&callback_run2, &user_data); + cuopt::seed_generator::set_seed(seed); + auto solution2 = solve_mip(&handle_, problem, settings_run2); + + std::vector callbacks_run3; + first_n_get_solution_callback_t callback_run3( + callbacks_run3, n_variables, callback_capture_limit, &user_data); + auto settings_run3 = settings; + settings_run3.set_mip_callback(&callback_run3, &user_data); + cuopt::seed_generator::set_seed(seed); + auto solution3 = solve_mip(&handle_, problem, settings_run3); + + EXPECT_EQ(solution1.get_termination_status(), solution2.get_termination_status()); + EXPECT_EQ(solution1.get_termination_status(), solution3.get_termination_status()); + EXPECT_GE(callback_run1.n_calls, (int)callback_compare_count); + EXPECT_GE(callback_run2.n_calls, (int)callback_compare_count); + EXPECT_GE(callback_run3.n_calls, (int)callback_compare_count); + ASSERT_GE(callbacks_run1.size(), callback_compare_count); + ASSERT_GE(callbacks_run2.size(), callback_compare_count); + ASSERT_GE(callbacks_run3.size(), callback_compare_count); + + EXPECT_GE(count_gpu_callbacks(callbacks_run1), min_gpu_callback_count); + EXPECT_GE(count_gpu_callbacks(callbacks_run2), min_gpu_callback_count); + EXPECT_GE(count_gpu_callbacks(callbacks_run3), min_gpu_callback_count); + EXPECT_GE(count_branch_and_bound_callbacks(callbacks_run1), min_bnb_callback_count); + EXPECT_GE(count_branch_and_bound_callbacks(callbacks_run2), min_bnb_callback_count); + EXPECT_GE(count_branch_and_bound_callbacks(callbacks_run3), min_bnb_callback_count); + + expect_callback_prefixes_bitwise_equal( + callbacks_run1, callbacks_run2, callback_compare_count, "Deterministic callback run 1 vs 2: "); + expect_callback_prefixes_bitwise_equal( + callbacks_run1, callbacks_run3, callback_compare_count, "Deterministic callback run 1 vs 3: "); +} + +class DeterministicGpuHeuristicsInstanceTest : public ::testing::TestWithParam { + protected: + raft::handle_t handle_; +}; + +TEST_P(DeterministicGpuHeuristicsInstanceTest, reproducible_with_gpu_heuristics) +{ + auto path = make_path_absolute(GetParam()); + auto problem = mps_parser::parse_mps(path, false); + handle_.sync_stream(); + + mip_solver_settings_t settings; + settings.time_limit = 60.0; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + settings.num_cpu_threads = 8; + settings.work_limit = 30; + + auto seed = std::random_device{}() & 0x7fffffff; + std::cout << "Tested with seed " << seed << "\n"; + settings.seed = seed; + + cuopt::seed_generator::set_seed(seed); + auto solution1 = solve_mip(&handle_, problem, settings); + cuopt::seed_generator::set_seed(seed); + auto solution2 = solve_mip(&handle_, problem, settings); + cuopt::seed_generator::set_seed(seed); + auto solution3 = solve_mip(&handle_, problem, settings); + + EXPECT_EQ(solution1.get_termination_status(), solution2.get_termination_status()); + EXPECT_EQ(solution1.get_termination_status(), solution3.get_termination_status()); + + EXPECT_DOUBLE_EQ(solution1.get_objective_value(), solution2.get_objective_value()); + EXPECT_DOUBLE_EQ(solution1.get_objective_value(), solution3.get_objective_value()); + + EXPECT_DOUBLE_EQ(solution1.get_solution_bound(), solution2.get_solution_bound()); + EXPECT_DOUBLE_EQ(solution1.get_solution_bound(), solution3.get_solution_bound()); + + expect_solutions_bitwise_equal(solution1, solution2, handle_, "GPU heur run 1 vs 2: "); + expect_solutions_bitwise_equal(solution1, solution3, handle_, "GPU heur run 1 vs 3: "); +} + +TEST_F(DeterministicBBTest, reproducible_with_gpu_heuristics_50v10_no_cuts) +{ + auto path = make_path_absolute("/mip/50v-10.mps"); + auto problem = mps_parser::parse_mps(path, false); + handle_.sync_stream(); + + mip_solver_settings_t settings; + settings.time_limit = 60.0; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + settings.num_cpu_threads = 8; + settings.work_limit = 30; + // settings.max_cut_passes = 0; + + auto seed = std::random_device{}() & 0x7fffffff; + std::cout << "Tested with seed " << seed << "\n"; + settings.seed = seed; + + cuopt::seed_generator::set_seed(seed); + auto solution1 = solve_mip(&handle_, problem, settings); + cuopt::seed_generator::set_seed(seed); + auto solution2 = solve_mip(&handle_, problem, settings); + cuopt::seed_generator::set_seed(seed); + auto solution3 = solve_mip(&handle_, problem, settings); + + EXPECT_EQ(solution1.get_termination_status(), solution2.get_termination_status()); + EXPECT_EQ(solution1.get_termination_status(), solution3.get_termination_status()); + + EXPECT_DOUBLE_EQ(solution1.get_objective_value(), solution2.get_objective_value()); + EXPECT_DOUBLE_EQ(solution1.get_objective_value(), solution3.get_objective_value()); + + EXPECT_DOUBLE_EQ(solution1.get_solution_bound(), solution2.get_solution_bound()); + EXPECT_DOUBLE_EQ(solution1.get_solution_bound(), solution3.get_solution_bound()); + + expect_solutions_bitwise_equal(solution1, solution2, handle_, "GPU heur no-cuts run 1 vs 2: "); + expect_solutions_bitwise_equal(solution1, solution3, handle_, "GPU heur no-cuts run 1 vs 3: "); +} + +INSTANTIATE_TEST_SUITE_P( + DeterministicGpuHeuristics, + DeterministicGpuHeuristicsInstanceTest, + ::testing::Values(std::string("/mip/gen-ip054.mps"), + std::string("/mip/pk1.mps"), + // std::string("/mip/sct2.mps"), + // std::string("/mip/thor50dday.mps"), + std::string("/mip/50v-10.mps")), + [](const ::testing::TestParamInfo& info) { + std::string name = info.param.substr(info.param.rfind('/') + 1); + name = name.substr(0, name.rfind('.')); + std::replace(name.begin(), name.end(), '-', '_'); + return name; + }); + // Parameterized test for different problem instances class DeterministicBBInstanceTest : public ::testing::TestWithParam> { @@ -186,6 +572,7 @@ class DeterministicBBInstanceTest TEST_P(DeterministicBBInstanceTest, deterministic_across_runs) { + // scoped_env_var_t gpu_fj_work_scale("CUOPT_GPU_HEUR_WORK_UNIT_SCALE", "0.1"); auto [instance_path, num_threads, time_limit, work_limit] = GetParam(); auto path = make_path_absolute(instance_path); auto problem = mps_parser::parse_mps(path, false); @@ -230,6 +617,7 @@ INSTANTIATE_TEST_SUITE_P( // Instance, threads, time_limit std::make_tuple("/mip/gen-ip054.mps", 4, 60.0, 4), std::make_tuple("/mip/swath1.mps", 8, 60.0, 4), + std::make_tuple("/mip/50v-10.mps", 8, 60.0, 30), std::make_tuple("/mip/gen-ip054.mps", 128, 120.0, 1), std::make_tuple("/mip/bb_optimality.mps", 4, 60.0, 4), std::make_tuple("/mip/neos5.mps", 16, 60.0, 1), diff --git a/cpp/tests/mip/determinism_utils.cuh b/cpp/tests/mip/determinism_utils.cuh new file mode 100644 index 0000000000..28e8c4b51b --- /dev/null +++ b/cpp/tests/mip/determinism_utils.cuh @@ -0,0 +1,77 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +#include +#include + +#include + +namespace cuopt::linear_programming::test { + +static __global__ void spin_kernel(int* flag, unsigned long long timeout_clocks = 10000000) +{ + cuda::atomic_ref flag_ref(*flag); + + long long int start_clock, sample_clock; + start_clock = clock64(); + + while (flag_ref.load() == 0) { + sample_clock = clock64(); + + if (sample_clock - start_clock > timeout_clocks) { break; } + } +} + +static void launch_spin_kernel_stream_thread(rmm::cuda_stream_view stream_view, int* flag) +{ + while (true) { + int blocks = rand() % 64 + 1; + int threads = rand() % 1024 + 1; + spin_kernel<<>>(flag); + cudaStreamSynchronize(stream_view); + if (host_copy(flag, 1, stream_view)[0] != 0) { break; } + std::this_thread::sleep_for(std::chrono::milliseconds(rand() % 1000 + 1)); + } +} + +class spin_stream_raii_t { + public: + spin_stream_raii_t() + : flag(0, stream), spin_thread(launch_spin_kernel_stream_thread, stream.view(), flag.data()) + { + } + + ~spin_stream_raii_t() + { + int one = 1; + flag.set_value_async(one, stream); + spin_thread.join(); + } + + private: + rmm::cuda_stream stream; + rmm::device_scalar flag; + std::thread spin_thread; +}; + +} // namespace cuopt::linear_programming::test \ No newline at end of file diff --git a/cpp/tests/mip/diversity_test.cu b/cpp/tests/mip/diversity_test.cu new file mode 100644 index 0000000000..d41dcc83cb --- /dev/null +++ b/cpp/tests/mip/diversity_test.cu @@ -0,0 +1,427 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../linear_programming/utilities/pdlp_test_utilities.cuh" +#include "determinism_utils.cuh" +#include "mip_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::test { + +void init_handler(const raft::handle_t* handle_ptr) +{ + // Init cuBlas / cuSparse context here to avoid having it during solving time + RAFT_CUBLAS_TRY(raft::linalg::detail::cublassetpointermode( + handle_ptr->get_cublas_handle(), CUBLAS_POINTER_MODE_DEVICE, handle_ptr->get_stream())); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsesetpointermode( + handle_ptr->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream())); +} + +static void setup_device_symbols(rmm::cuda_stream_view stream_view) { (void)stream_view; } + +static uint32_t test_full_run_determinism(std::string path, + unsigned long seed = std::random_device{}(), + float work_limit = 10.0f) +{ + const raft::handle_t handle_{}; + + cuopt::mps_parser::mps_data_model_t mps_problem = + cuopt::mps_parser::parse_mps(path, false); + handle_.sync_stream(); + auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem); + problem_checking_t::check_problem_representation(op_problem); + + init_handler(op_problem.get_handle_ptr()); + // run the problem constructor of MIP, so that we do bounds standardization + detail::problem_t problem(op_problem); + problem.deterministic = true; + problem.preprocess_problem(); + + setup_device_symbols(op_problem.get_handle_ptr()->get_stream()); + + pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; + detail::pdlp_initial_scaling_strategy_t scaling(&handle_, + problem, + 10, + 1.0, + problem.reverse_coefficients, + problem.reverse_offsets, + problem.reverse_constraints, + nullptr, + hyper_params, + true); + + auto settings = mip_solver_settings_t{}; + settings.time_limit = 3000.; + settings.work_limit = work_limit; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC_GPU_HEURISTICS; + settings.heuristics_only = true; + auto timer = cuopt::termination_checker_t(3000.0, cuopt::termination_checker_t::root_tag_t{}); + detail::mip_solver_t solver(problem, settings, scaling, timer); + problem.tolerances = settings.get_tolerances(); + + detail::diversity_manager_t diversity_manager(solver.context); + solver.context.gpu_heur_loop.deterministic = true; + diversity_manager.timer = + work_limit_timer_t(solver.context.gpu_heur_loop, settings.work_limit, timer); + diversity_manager.run_solver(); + + std::vector hashes; + auto pop = diversity_manager.get_population_pointer(); + for (const auto& sol : pop->population_to_vector()) { + hashes.push_back(sol.get_hash()); + } + + uint32_t final_hash = detail::compute_hash(hashes); + printf("%s: final hash: 0x%x, pop size %d\n", + path.c_str(), + final_hash, + (int)pop->population_to_vector().size()); + return final_hash; +} + +static uint32_t test_initial_solution_determinism(std::string path, + unsigned long seed = std::random_device{}()) +{ + const raft::handle_t handle_{}; + + cuopt::mps_parser::mps_data_model_t mps_problem = + cuopt::mps_parser::parse_mps(path, false); + handle_.sync_stream(); + auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem); + problem_checking_t::check_problem_representation(op_problem); + + init_handler(op_problem.get_handle_ptr()); + // run the problem constructor of MIP, so that we do bounds standardization + detail::problem_t problem(op_problem); + problem.deterministic = true; + problem.preprocess_problem(); + + setup_device_symbols(op_problem.get_handle_ptr()->get_stream()); + + pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; + detail::pdlp_initial_scaling_strategy_t scaling(&handle_, + problem, + 10, + 1.0, + problem.reverse_coefficients, + problem.reverse_offsets, + problem.reverse_constraints, + nullptr, + hyper_params, + true); + + auto settings = mip_solver_settings_t{}; + settings.time_limit = 3000.; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC_GPU_HEURISTICS; + settings.heuristics_only = true; + auto timer = cuopt::termination_checker_t(3000.0, cuopt::termination_checker_t::root_tag_t{}); + detail::mip_solver_t solver(problem, settings, scaling, timer); + problem.tolerances = settings.get_tolerances(); + + detail::diversity_manager_t diversity_manager(solver.context); + work_limit_context_t work_limit_context("DiversityManager"); + work_limit_context.deterministic = true; + diversity_manager.timer = work_limit_timer_t(work_limit_context, 60000, timer); + diversity_manager.diversity_config.initial_solution_only = true; + diversity_manager.run_solver(); + + std::vector hashes; + auto pop = diversity_manager.get_population_pointer(); + for (const auto& sol : pop->population_to_vector()) { + hashes.push_back(sol.get_hash()); + } + + uint32_t final_hash = detail::compute_hash(hashes); + printf("%s: final hash: 0x%x, pop size %d\n", + path.c_str(), + final_hash, + (int)pop->population_to_vector().size()); + return final_hash; +} + +static uint32_t test_recombiners_determinism(std::string path, + unsigned long seed = std::random_device{}()) +{ + const raft::handle_t handle_{}; + + cuopt::mps_parser::mps_data_model_t mps_problem = + cuopt::mps_parser::parse_mps(path, false); + handle_.sync_stream(); + auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem); + problem_checking_t::check_problem_representation(op_problem); + + init_handler(op_problem.get_handle_ptr()); + // run the problem constructor of MIP, so that we do bounds standardization + detail::problem_t problem(op_problem); + problem.deterministic = true; + problem.preprocess_problem(); + + setup_device_symbols(op_problem.get_handle_ptr()->get_stream()); + + pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; + detail::pdlp_initial_scaling_strategy_t scaling(&handle_, + problem, + 10, + 1.0, + problem.reverse_coefficients, + problem.reverse_offsets, + problem.reverse_constraints, + nullptr, + hyper_params, + true); + + auto settings = mip_solver_settings_t{}; + settings.time_limit = 3000.; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC_GPU_HEURISTICS; + settings.heuristics_only = true; + auto timer = cuopt::termination_checker_t(3000.0, cuopt::termination_checker_t::root_tag_t{}); + detail::mip_solver_t solver(problem, settings, scaling, timer); + problem.tolerances = settings.get_tolerances(); + + detail::diversity_manager_t diversity_manager(solver.context); + work_limit_context_t work_limit_context("DiversityManager"); + work_limit_context.deterministic = true; + diversity_manager.timer = work_limit_timer_t(work_limit_context, 60000, timer); + diversity_manager.diversity_config.dry_run = true; + diversity_manager.run_solver(); + + // Generate a population by running FJ on random starting points + // recombine a few solutions, observe the output + for (int i = diversity_manager.population.current_size(); i < 3; ++i) { + detail::solution_t random_initial_solution(problem); + random_initial_solution.assign_random_within_bounds(); + detail::fj_settings_t fj_settings; + fj_settings.feasibility_run = false; + fj_settings.iteration_limit = 1000 + i * 100; + fj_settings.seed = seed + i; + auto solution = run_fj(problem, + fj_settings, + fj_tweaks_t{}, + random_initial_solution.get_host_assignment(), + CUOPT_MODE_DETERMINISTIC) + .solution; + printf("population %d hash: 0x%x\n", i, solution.get_hash()); + diversity_manager.population.add_solution(std::move(solution)); + } + + auto pop_vector = diversity_manager.get_population_pointer()->population_to_vector(); + int pop_size = std::min(6, (int)pop_vector.size()); + + std::vector hashes; + + static std::map, uint32_t> hash_map; + + for (auto recombiner : {detail::recombiner_enum_t::LINE_SEGMENT, + detail::recombiner_enum_t::BOUND_PROP, + detail::recombiner_enum_t::FP}) { + for (int i = 1; i < pop_size; i++) { + for (int j = i + 1; j < pop_size; j++) { + printf("recombining %d and %d w/ recombiner %s\n", + i, + j, + detail::all_recombine_stats::recombiner_labels[(int)recombiner]); + auto [offspring, success] = + diversity_manager.recombine(pop_vector[i], pop_vector[j], recombiner); + auto offspring_hash = offspring.get_hash(); + printf("for %d,%d: offspring hash: 0x%x, parent 1 hash: 0x%x, parent 2 hash: 0x%x\n", + i, + j, + offspring_hash, + pop_vector[i].get_hash(), + pop_vector[j].get_hash()); + if (hash_map.find(std::make_tuple(path, i, j, recombiner)) == hash_map.end()) { + hash_map[std::make_tuple(path, i, j, recombiner)] = offspring_hash; + } else { + if (hash_map[std::make_tuple(path, i, j, recombiner)] != offspring_hash) { + printf("%s: hash mismatch for %d,%d: %d != %d\n", + path.c_str(), + i, + j, + hash_map[std::make_tuple(path, i, j, recombiner)], + offspring_hash); + exit(1); + } + } + hashes.push_back(offspring_hash); + } + } + } + return detail::compute_hash(hashes); + + auto pop = diversity_manager.get_population_pointer(); + for (const auto& sol : pop->population_to_vector()) { + hashes.push_back(sol.get_hash()); + } + + uint32_t final_hash = detail::compute_hash(hashes); + printf("%s: final hash: 0x%x, pop size %d\n", + path.c_str(), + final_hash, + (int)pop->population_to_vector().size()); + return final_hash; +} + +class DiversityTestParams : public testing::TestWithParam> {}; + +TEST_P(DiversityTestParams, recombiners_deterministic) +{ + // cuopt::init_logger_t log("", true); + cuopt::default_logger().set_pattern("[%n] [%-6l] %v"); + cuopt::default_logger().set_level(rapids_logger::level_enum::debug); + cuopt::default_logger().flush_on(rapids_logger::level_enum::debug); + + spin_stream_raii_t spin_stream_1; + spin_stream_raii_t spin_stream_2; + + auto test_instance = std::get<0>(GetParam()); + std::cout << "Running: " << test_instance << std::endl; + int seed = + std::getenv("CUOPT_SEED") ? std::stoi(std::getenv("CUOPT_SEED")) : std::random_device{}(); + std::cerr << "Tested with seed " << seed << "\n"; + auto path = make_path_absolute(test_instance); + test_instance = std::getenv("CUOPT_INSTANCE") ? std::getenv("CUOPT_INSTANCE") : test_instance; + uint32_t gold_hash = 0; + for (int i = 0; i < 2; ++i) { + cuopt::seed_generator::set_seed(seed); + std::cout << "Running " << test_instance << " " << i << std::endl; + std::cout << "-------------------------------------------------------------\n"; + auto hash = test_recombiners_determinism(path, seed); + if (i == 0) { + gold_hash = hash; + std::cout << "Gold hash: " << gold_hash << std::endl; + } else { + ASSERT_EQ(hash, gold_hash); + } + } +} + +TEST_P(DiversityTestParams, initial_solution_deterministic) +{ + cuopt::default_logger().set_pattern("[%n] [%-6l] %v"); + + spin_stream_raii_t spin_stream_1; + spin_stream_raii_t spin_stream_2; + + auto test_instance = std::get<0>(GetParam()); + std::cout << "Running: " << test_instance << std::endl; + int seed = + std::getenv("CUOPT_SEED") ? std::stoi(std::getenv("CUOPT_SEED")) : std::random_device{}(); + std::cerr << "Tested with seed " << seed << "\n"; + auto path = make_path_absolute(test_instance); + test_instance = std::getenv("CUOPT_INSTANCE") ? std::getenv("CUOPT_INSTANCE") : test_instance; + uint32_t gold_hash = 0; + for (int i = 0; i < 2; ++i) { + cuopt::seed_generator::set_seed(seed); + std::cout << "Running " << test_instance << " " << i << std::endl; + std::cout << "-------------------------------------------------------------\n"; + auto hash = test_initial_solution_determinism(path, seed); + if (i == 0) { + gold_hash = hash; + std::cout << "Gold hash: " << gold_hash << std::endl; + } else { + ASSERT_EQ(hash, gold_hash); + } + } +} + +TEST_P(DiversityTestParams, full_run_deterministic) +{ + cuopt::init_logger_t log("", true); + // cuopt::default_logger().set_pattern("[%n] [%-6l] %v"); + cuopt::default_logger().set_level(rapids_logger::level_enum::debug); + cuopt::default_logger().flush_on(rapids_logger::level_enum::debug); + + // spin_stream_raii_t spin_stream_1; + // spin_stream_raii_t spin_stream_2; + + auto test_instance = std::get<0>(GetParam()); + const float work_limit = std::get<1>(GetParam()); + std::cout << "Running: " << test_instance << std::endl; + int seed = + std::getenv("CUOPT_SEED") ? std::stoi(std::getenv("CUOPT_SEED")) : std::random_device{}(); + std::cerr << "Tested with seed " << seed << "\n"; + auto path = make_path_absolute(test_instance); + if (std::getenv("CUOPT_INSTANCE")) { + test_instance = std::getenv("CUOPT_INSTANCE"); + path = make_path_absolute(test_instance); + } + uint32_t gold_hash = 0; + for (int i = 0; i < 4; ++i) { + cuopt::seed_generator::set_seed(seed); + std::cout << "Running " << test_instance << " " << i << std::endl; + std::cout << "-------------------------------------------------------------\n"; + auto hash = test_full_run_determinism(path, seed, work_limit); + if (i == 0) { + gold_hash = hash; + std::cout << "Gold hash: " << gold_hash << std::endl; + } else { + ASSERT_EQ(hash, gold_hash); + } + } +} + +INSTANTIATE_TEST_SUITE_P(DiversityTest, + DiversityTestParams, + testing::Values( + // std::make_tuple("mip/gen-ip054.mps", 5.0f), + // std::make_tuple("mip/pk1.mps", 5.0f), + std::make_tuple("mip/uccase9.mps", 5.0f), + std::make_tuple("mip/sct2.mps", 5.0f), + std::make_tuple("mip/thor50dday.mps", 5.0f), + // std::make_tuple("uccase9.mps"), + // std::make_tuple("mip/neos5.mps", 5.0f), + std::make_tuple("mip/50v-10.mps", 5.0f) + // std::make_tuple("mip/rmatr200-p5.mps", 5.0f) + )); + +} // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/feasibility_jump_tests.cu b/cpp/tests/mip/feasibility_jump_tests.cu index baa3e9b803..b36fbcd316 100644 --- a/cpp/tests/mip/feasibility_jump_tests.cu +++ b/cpp/tests/mip/feasibility_jump_tests.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -45,28 +46,23 @@ void init_handler(const raft::handle_t* handle_ptr) handle_ptr->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream())); } -struct fj_tweaks_t { - double objective_weight = 0; -}; - -struct fj_state_t { - detail::solution_t solution; - std::vector solution_vector; - int minimums; - double incumbent_objective; - double incumbent_violation; -}; - // Helper function to setup MIP solver and run FJ with given settings and initial solution -static fj_state_t run_fj(std::string test_instance, - const detail::fj_settings_t& fj_settings, - fj_tweaks_t tweaks = {}, - std::vector initial_solution = {}) +static fj_state_t run_fj_instance(std::string test_instance, + const detail::fj_settings_t& fj_settings, + fj_tweaks_t tweaks = {}, + std::vector initial_solution = {}, + int determinism_mode = CUOPT_MODE_DETERMINISTIC) { const raft::handle_t handle_{}; std::cout << "Running: " << test_instance << std::endl; auto path = cuopt::test::get_rapids_dataset_root_dir() + ("/mip/" + test_instance); + + if (std::getenv("CUOPT_INSTANCE")) { + path = make_path_absolute(std::getenv("CUOPT_INSTANCE")); + std::cout << "Using instance from CUOPT_INSTANCE: " << path << std::endl; + } + cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); handle_.sync_stream(); @@ -77,46 +73,8 @@ static fj_state_t run_fj(std::string test_instance, // run the problem constructor of MIP, so that we do bounds standardization detail::problem_t problem(op_problem); problem.preprocess_problem(); - detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); - detail::pdlp_initial_scaling_strategy_t scaling(&handle_, - problem, - 10, - 1.0, - pdhg_solver, - problem.reverse_coefficients, - problem.reverse_offsets, - problem.reverse_constraints, - true); - - auto settings = mip_solver_settings_t{}; - settings.time_limit = 30.; - auto timer = cuopt::timer_t(30); - detail::mip_solver_t solver(problem, settings, scaling, timer); - - detail::solution_t solution(*solver.context.problem_ptr); - if (initial_solution.size() > 0) { - expand_device_copy(solution.assignment, initial_solution, solution.handle_ptr->get_stream()); - } else { - thrust::fill(solution.handle_ptr->get_thrust_policy(), - solution.assignment.begin(), - solution.assignment.end(), - 0.0); - } - solution.clamp_within_bounds(); - - detail::fj_t fj(solver.context, fj_settings); - fj.reset_weights(solution.handle_ptr->get_stream(), 1.); - fj.objective_weight.set_value_async(tweaks.objective_weight, solution.handle_ptr->get_stream()); - solution.handle_ptr->sync_stream(); - fj.solve(solution); - auto solution_vector = host_copy(solution.assignment, solution.handle_ptr->get_stream()); - - return {solution, - solution_vector, - fj.climbers[0]->local_minimums_reached.value(solution.handle_ptr->get_stream()), - fj.climbers[0]->incumbent_objective.value(solution.handle_ptr->get_stream()), - fj.climbers[0]->violation_score.value(solution.handle_ptr->get_stream())}; + return run_fj(problem, fj_settings, tweaks, initial_solution, determinism_mode); } // FJ had a bug causing objective/violation values to explode in magnitude in certain scenarios. @@ -126,12 +84,12 @@ static bool run_fj_check_no_obj_runoff(std::string test_instance) detail::fj_settings_t fj_settings; fj_settings.time_limit = 30.; fj_settings.mode = detail::fj_mode_t::EXIT_NON_IMPROVING; - fj_settings.n_of_minimums_for_exit = 20000 * 1000; + fj_settings.n_of_minimums_for_exit = 5000; fj_settings.update_weights = true; fj_settings.feasibility_run = false; fj_settings.iteration_limit = 20000; - auto state = run_fj(test_instance, fj_settings); + auto state = run_fj_instance(test_instance, fj_settings); // ensure that the objective and the violation in the FJ state are not too large (<1e60) EXPECT_LE(state.incumbent_violation, 1e60) << "FJ violation too large"; @@ -148,12 +106,13 @@ static bool run_fj_check_objective(std::string test_instance, int iter_limit, do detail::fj_settings_t fj_settings; fj_settings.time_limit = 30.; fj_settings.mode = detail::fj_mode_t::EXIT_NON_IMPROVING; - fj_settings.n_of_minimums_for_exit = 20000 * 1000; + fj_settings.n_of_minimums_for_exit = 5000; fj_settings.update_weights = true; fj_settings.feasibility_run = obj_target == +std::numeric_limits::infinity(); fj_settings.iteration_limit = iter_limit; - auto state = run_fj(test_instance, fj_settings); + auto state = + run_fj_instance(test_instance, fj_settings, fj_tweaks_t{}, {}, CUOPT_MODE_DETERMINISTIC); auto& solution = state.solution; CUOPT_LOG_DEBUG("%s: Solution generated with FJ: is_feasible %d, objective %g (raw %g)", @@ -175,12 +134,12 @@ static bool run_fj_check_feasible(std::string test_instance) detail::fj_settings_t fj_settings; fj_settings.time_limit = 30.; fj_settings.mode = detail::fj_mode_t::EXIT_NON_IMPROVING; - fj_settings.n_of_minimums_for_exit = 20000 * 1000; + fj_settings.n_of_minimums_for_exit = 5000; fj_settings.update_weights = true; fj_settings.feasibility_run = false; fj_settings.iteration_limit = 25000; - auto state = run_fj(test_instance, fj_settings); + auto state = run_fj_instance(test_instance, fj_settings); auto& solution = state.solution; bool previous_feasible = solution.get_feasible(); @@ -191,8 +150,8 @@ static bool run_fj_check_feasible(std::string test_instance) // again but with very large obj weight to force FJ into the infeasible region fj_tweaks_t tweaks; tweaks.objective_weight = 1e6; - auto new_state = run_fj(test_instance, fj_settings, tweaks, state.solution_vector); - auto& new_solution = new_state.solution; + auto new_state = run_fj_instance(test_instance, fj_settings, tweaks, state.solution_vector); + auto& new_solution = new_state.solution; CUOPT_LOG_DEBUG("%s: Solution generated with FJ: is_feasible %d, objective %g (raw %g)", test_instance.c_str(), @@ -207,63 +166,119 @@ static bool run_fj_check_feasible(std::string test_instance) return true; } -class MIPSolveParametricTest : public testing::TestWithParam> { -}; - -TEST_P(MIPSolveParametricTest, feasibility_jump_obj_test) +static bool run_fj_check_determinism(std::string test_instance, int iter_limit) { - auto [instance, obj_target, iter_limit] = GetParam(); - EXPECT_TRUE(run_fj_check_objective(instance, iter_limit, obj_target)); -} + detail::fj_settings_t fj_settings; + fj_settings.time_limit = std::numeric_limits::max(); + fj_settings.mode = detail::fj_mode_t::EXIT_NON_IMPROVING; + fj_settings.n_of_minimums_for_exit = 5000 * 1000; + // fj_settings.work_limit = 0.5; // run for 0.5wu (~0.5s) + fj_settings.update_weights = true; + fj_settings.feasibility_run = false; + fj_settings.iteration_limit = iter_limit; + fj_settings.load_balancing_mode = detail::fj_load_balancing_mode_t::ALWAYS_ON; + fj_settings.seed = cuopt::seed_generator::get_seed(); + + auto state = run_fj_instance(test_instance, fj_settings); + auto& solution = state.solution; -INSTANTIATE_TEST_SUITE_P( - MIPSolveTest, - MIPSolveParametricTest, - testing::Values( - // Bug: https://github.com/NVIDIA/cuopt/issues/214 - // std::make_tuple("50v-10.mps", 7800, 100000), - // std::make_tuple("fiball.mps", 140, 25000), - // std::make_tuple("rmatr200-p5.mps", 7000, 10000), - std::make_tuple("gen-ip054.mps", 7500, 20000), - std::make_tuple("sct2.mps", 100, 50000), - std::make_tuple("uccase9.mps", 4000000, 50000), - // unstable, prone to failure on slight weight changes - // std::make_tuple("drayage-25-23.mps", 300000, 50000), - std::make_tuple("tr12-30.mps", 300000, 50000), - std::make_tuple("neos-3004026-krka.mps", - +std::numeric_limits::infinity(), - 35000), // feasibility - // std::make_tuple("nursesched-medium-hint03.mps", 12000, 50000), // too large - std::make_tuple("ns1208400.mps", 2, 60000), - std::make_tuple("gmu-35-50.mps", -2300000, 25000), - std::make_tuple("n2seq36q.mps", 158800, 25000), - std::make_tuple("seymour1.mps", 440, 50000), - std::make_tuple("cvs16r128-89.mps", -50, 10000) -// TEMPORARY: occasional cusparse transpose issues on ARM in CI -#ifndef __aarch64__ - , - std::make_tuple("thor50dday.mps", 250000, 1000) -#endif - )); - -TEST(mip_solve, feasibility_jump_feas_test) -{ - for (const auto& instance : {"tr12-30.mps", - "sct2.mps" -#ifndef __aarch64__ - , - "thor50dday.mps" -#endif - }) { - run_fj_check_feasible(instance); + printf("%s[seed=%x]: Solution generated with FJ: is_feasible %d, objective %g (raw %g)", + test_instance.c_str(), + fj_settings.seed, + solution.get_feasible(), + solution.get_user_objective(), + solution.get_objective()); + + static std::unordered_map first_val_map; + if (first_val_map.count(test_instance) == 0) { + first_val_map[test_instance] = solution.get_user_objective(); } + EXPECT_NEAR(solution.get_user_objective(), first_val_map[test_instance], 1.0) + << test_instance << " determinism objective mismatch"; + + return true; } -TEST(mip_solve, feasibility_jump_obj_runoff_test) +// class MIPSolveParametricTest : public testing::TestWithParam> { +// }; + +// TEST_P(MIPSolveParametricTest, feasibility_jump_obj_test) +// { +// auto [instance, obj_target, iter_limit] = GetParam(); +// EXPECT_TRUE(run_fj_check_objective(instance, iter_limit, obj_target)); +// } + +// INSTANTIATE_TEST_SUITE_P( +// MIPSolveTest, +// MIPSolveParametricTest, +// testing::Values( +// // Bug: https://github.com/NVIDIA/cuopt/issues/214 +// // std::make_tuple("50v-10.mps", 7800, 100000), +// // std::make_tuple("fiball.mps", 140, 25000), +// // std::make_tuple("rmatr200-p5.mps", 7000, 10000), +// std::make_tuple("gen-ip054.mps", 7500, 20000), +// std::make_tuple("sct2.mps", 100, 50000), +// std::make_tuple("uccase9.mps", 4000000, 50000), +// // unstable, prone to failure on slight weight changes +// // std::make_tuple("drayage-25-23.mps", 300000, 50000), +// std::make_tuple("tr12-30.mps", 300000, 50000), +// std::make_tuple("neos-3004026-krka.mps", +// +std::numeric_limits::infinity(), +// 35000), // feasibility +// // std::make_tuple("nursesched-medium-hint03.mps", 12000, 50000), // too large +// std::make_tuple("ns1208400.mps", 2, 60000), +// std::make_tuple("gmu-35-50.mps", -2300000, 25000), +// std::make_tuple("n2seq36q.mps", 158800, 25000), +// std::make_tuple("seymour1.mps", 440, 50000), +// std::make_tuple("cvs16r128-89.mps", -50, 10000) +// // TEMPORARY: occasional cusparse transpose issues on ARM in CI +// #ifndef __aarch64__ +// , +// std::make_tuple("thor50dday.mps", 250000, 1000) +// #endif +// )); + +// TEST(mip_solve, feasibility_jump_feas_test) +// { +// for (const auto& instance : {"tr12-30.mps", +// "sct2.mps" +// #ifndef __aarch64__ +// , +// "thor50dday.mps" +// #endif +// }) { +// run_fj_check_feasible(instance); +// } +// } + +// TEST(mip_solve, feasibility_jump_obj_runoff_test) +// { +// for (const auto& instance : {"minrep_inf.mps", "sct2.mps", "uccase9.mps", +// /*"buildingenergy.mps"*/}) { +// run_fj_check_no_obj_runoff(instance); +// } +// } + +TEST(mip_solve, feasibility_jump_determinism) { - for (const auto& instance : {"minrep_inf.mps", "sct2.mps", "uccase9.mps", - /*"buildingenergy.mps"*/}) { - run_fj_check_no_obj_runoff(instance); + int seed = + std::getenv("CUOPT_SEED") ? std::stoi(std::getenv("CUOPT_SEED")) : std::random_device{}(); + + for (const auto& [instance, iter_limit] : {std::make_pair("thor50dday.mps", 1000), + std::make_pair("gen-ip054.mps", 1000), + std::make_pair("50v-10.mps", 1000), + std::make_pair("seymour1.mps", 1000), + std::make_pair("rmatr200-p5.mps", 1000), + std::make_pair("tr12-30.mps", 1000), + std::make_pair("sct2.mps", 1000), + std::make_pair("uccase9.mps", 1000), + std::make_pair("supportcase42.mps", 25000)}) { + for (int i = 0; i < 10; i++) { + // while (true) { + cuopt::seed_generator::set_seed(seed); + run_fj_check_determinism(instance, iter_limit); + } } } diff --git a/cpp/tests/mip/load_balancing_test.cu b/cpp/tests/mip/load_balancing_test.cu index 5e2f08007d..11e518e892 100644 --- a/cpp/tests/mip/load_balancing_test.cu +++ b/cpp/tests/mip/load_balancing_test.cu @@ -9,6 +9,7 @@ #include "mip_utils.cuh" #include +#include #include #include #include @@ -129,6 +130,7 @@ void test_multi_probe(std::string path) detail::problem_t problem(op_problem); mip_solver_settings_t default_settings{}; detail::pdhg_solver_t pdhg_solver(problem.handle_ptr, problem); + pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; detail::pdlp_initial_scaling_strategy_t scaling(&handle_, problem, 10, @@ -137,8 +139,10 @@ void test_multi_probe(std::string path) problem.reverse_offsets, problem.reverse_constraints, nullptr, + hyper_params, true); - detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); + auto timer = cuopt::termination_checker_t(0.0, cuopt::termination_checker_t::root_tag_t{}); + detail::mip_solver_t solver(problem, default_settings, scaling, timer); detail::load_balanced_problem_t lb_problem(problem); detail::load_balanced_bounds_presolve_t lb_prs(lb_problem, solver.context); diff --git a/cpp/tests/mip/local_search_test.cu b/cpp/tests/mip/local_search_test.cu new file mode 100644 index 0000000000..b5ef19d58a --- /dev/null +++ b/cpp/tests/mip/local_search_test.cu @@ -0,0 +1,264 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../linear_programming/utilities/pdlp_test_utilities.cuh" +#include "determinism_utils.cuh" +#include "mip_utils.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::test { + +void init_handler(const raft::handle_t* handle_ptr) +{ + // Init cuBlas / cuSparse context here to avoid having it during solving time + RAFT_CUBLAS_TRY(raft::linalg::detail::cublassetpointermode( + handle_ptr->get_cublas_handle(), CUBLAS_POINTER_MODE_DEVICE, handle_ptr->get_stream())); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsesetpointermode( + handle_ptr->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream())); +} + +static void setup_device_symbols(rmm::cuda_stream_view stream_view) { (void)stream_view; } + +enum local_search_mode_t { + FP = 0, + STAGED_FP, + FJ_LINE_SEGMENT, + FJ_ON_ZERO, + FJ_ANNEALING, +}; + +// Helper function to setup MIP solver and run FJ with given settings and initial solution +static uint32_t run_fp(std::string test_instance, local_search_mode_t mode) +{ + const raft::handle_t handle_{}; + std::cout << "Running: " << test_instance << std::endl; + + auto path = cuopt::test::get_rapids_dataset_root_dir() + ("/mip/" + test_instance); + cuopt::mps_parser::mps_data_model_t mps_problem = + cuopt::mps_parser::parse_mps(path, false); + handle_.sync_stream(); + auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem); + problem_checking_t::check_problem_representation(op_problem); + + init_handler(op_problem.get_handle_ptr()); + // run the problem constructor of MIP, so that we do bounds standardization + auto settings = mip_solver_settings_t{}; + settings.time_limit = 120.; + settings.determinism_mode = CUOPT_MODE_DETERMINISTIC; + + detail::problem_t problem(op_problem, settings.get_tolerances(), true); + problem.preprocess_problem(); + + setup_device_symbols(op_problem.get_handle_ptr()->get_stream()); + + pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; + detail::pdlp_initial_scaling_strategy_t scaling(&handle_, + problem, + 10, + 1.0, + problem.reverse_coefficients, + problem.reverse_offsets, + problem.reverse_constraints, + nullptr, + hyper_params, + true); + auto timer = + cuopt::termination_checker_t(settings.time_limit, cuopt::termination_checker_t::root_tag_t{}); + detail::mip_solver_t solver(problem, settings, scaling, timer); + problem.tolerances = settings.get_tolerances(); + + rmm::device_uvector lp_optimal_solution(problem.n_variables, + problem.handle_ptr->get_stream()); + thrust::fill(problem.handle_ptr->get_thrust_policy(), + lp_optimal_solution.begin(), + lp_optimal_solution.end(), + 0.0); + detail::lp_state_t& lp_state = problem.lp_state; + // resize because some constructor might be called before the presolve + lp_state.resize(problem, problem.handle_ptr->get_stream()); + detail::relaxed_lp_settings_t lp_settings{}; + lp_settings.time_limit = std::numeric_limits::max(); + lp_settings.tolerance = 1e-6; + lp_settings.return_first_feasible = false; + lp_settings.save_state = false; + // lp_settings.iteration_limit = 5; + auto lp_result = + detail::get_relaxed_lp_solution(problem, lp_optimal_solution, lp_state, lp_settings); + EXPECT_EQ(lp_result.get_termination_status(), pdlp_termination_status_t::Optimal); + clamp_within_var_bounds(lp_optimal_solution, &problem, problem.handle_ptr); + + // return detail::compute_hash(lp_optimal_solution); + + detail::local_search_t local_search(solver.context, lp_optimal_solution); + + detail::solution_t solution(problem); + solution.assign_random_within_bounds(); + solution.compute_feasibility(); + + printf("Model fingerprint: 0x%x\n", problem.get_fingerprint()); + printf("LP optimal hash: 0x%x\n", + detail::compute_hash(make_span(lp_optimal_solution), problem.handle_ptr->get_stream())); + printf("running mode: %d\n", mode); + + work_limit_context_t work_limit_context("LocalSearch"); + work_limit_context.deterministic = true; + local_search.fp.timer = work_limit_timer_t(work_limit_context, 10, timer); + + detail::ls_config_t ls_config{}; + + if (mode == local_search_mode_t::FP) { + bool is_feasible = false; + int iterations = 0; + while (true) { + is_feasible = local_search.fp.run_single_fp_descent(solution); + printf("fp_loop it %d, is_feasible %d\n", iterations, is_feasible); + if (is_feasible) { + break; + } else { + is_feasible = local_search.fp.restart_fp(solution); + if (is_feasible) { break; } + } + iterations++; + } + } else if (mode == local_search_mode_t::FJ_LINE_SEGMENT) { + local_search.run_fj_line_segment( + solution, work_limit_timer_t(work_limit_context, 6000, timer), ls_config); + } else if (mode == local_search_mode_t::FJ_ON_ZERO) { + local_search.run_fj_on_zero(solution, work_limit_timer_t(work_limit_context, 6000, timer)); + } else if (mode == local_search_mode_t::FJ_ANNEALING) { + local_search.run_fj_annealing( + solution, work_limit_timer_t(work_limit_context, 6000, timer), ls_config); + } + + std::vector hashes; + hashes.push_back(detail::compute_hash(solution.get_host_assignment())); + printf("hashes: 0x%x, hash of the hash: 0x%x\n", hashes[0], detail::compute_hash(hashes)); + + return detail::compute_hash(hashes); + // return {host_copy(solution_vector, problem.handle_ptr->get_stream()), iterations}; +} + +static uint32_t run_fp_check_determinism(std::string test_instance, + local_search_mode_t mode, + unsigned long seed) +{ + cuopt::seed_generator::set_seed(seed); + + return run_fp(test_instance, mode); + + // auto state = run_fp(test_instance, fj_settings); + // auto& solution = state.solution; + + // CUOPT_LOG_DEBUG("%s: Solution generated with FJ: is_feasible %d, objective %g (raw %g)", + // test_instance.c_str(), + // solution.get_feasible(), + // solution.get_user_objective(), + // solution.get_objective()); + + // static auto first_val = solution.get_user_objective(); +} + +class LocalSearchTestParams : public testing::TestWithParam> {}; + +TEST_P(LocalSearchTestParams, local_search_operator_determinism) +{ + cuopt::init_logger_t log("", true); + cuopt::default_logger().set_pattern("[%n] [%-6l] %v"); + cuopt::default_logger().set_level(rapids_logger::level_enum::debug); + cuopt::default_logger().flush_on(rapids_logger::level_enum::debug); + + spin_stream_raii_t spin_stream_1; + spin_stream_raii_t spin_stream_2; + + auto mode = std::get<0>(GetParam()); + + for (const auto& instance : { + //"thor50dday.mps", + "gen-ip054.mps", + "50v-10.mps", + "seymour1.mps", + "rmatr200-p5.mps", + "tr12-30.mps", + //"sct2.mps", + //"uccase9.mps" + }) { + // for (int i = 0; i < 10; i++) + // while (true) { + // run_fp_check_determinism(instance, 1000); + // } + + unsigned long seed = std::getenv("CUOPT_SEED") + ? (unsigned long)std::stoi(std::getenv("CUOPT_SEED")) + : (unsigned long)std::random_device{}(); + std::cerr << "Tested with seed " << seed << "\n"; + uint32_t gold_hash = 0; + for (int i = 0; i < 5; ++i) { + uint32_t hash = run_fp_check_determinism(instance, mode, seed); + if (i == 0) { + gold_hash = hash; + printf("Gold hash: 0x%x\n", gold_hash); + } else { + ASSERT_EQ(hash, gold_hash); + printf("Hash: 0x%x\n", hash); + } + } + } +} + +INSTANTIATE_TEST_SUITE_P(LocalSearchTests, + LocalSearchTestParams, + testing::Values( + // std::make_tuple(local_search_mode_t::FP) + // std::make_tuple(local_search_mode_t::FJ_LINE_SEGMENT), + // std::make_tuple(local_search_mode_t::FJ_ON_ZERO), + std::make_tuple(local_search_mode_t::FJ_ANNEALING))); + +} // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/mip_utils.cuh b/cpp/tests/mip/mip_utils.cuh index 5c2b39d290..b0af0fecf8 100644 --- a/cpp/tests/mip/mip_utils.cuh +++ b/cpp/tests/mip/mip_utils.cuh @@ -8,9 +8,14 @@ #include #include #include +#include #include +#include +#include #include +#include #include +#include namespace cuopt::linear_programming::test { @@ -180,4 +185,66 @@ static std::tuple test_mps_file( solution.get_solution_bound()); } +struct fj_tweaks_t { + double objective_weight = 0; +}; + +struct fj_state_t { + detail::solution_t solution; + std::vector solution_vector; + int minimums; + double incumbent_objective; + double incumbent_violation; +}; + +static fj_state_t run_fj(detail::problem_t& problem, + const detail::fj_settings_t& fj_settings, + fj_tweaks_t tweaks = {}, + std::vector initial_solution = {}, + int determinism_mode = CUOPT_MODE_OPPORTUNISTIC) +{ + pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; + detail::pdlp_initial_scaling_strategy_t scaling(problem.handle_ptr, + problem, + 10, + 1.0, + problem.reverse_coefficients, + problem.reverse_offsets, + problem.reverse_constraints, + nullptr, + hyper_params, + true); + + auto settings = mip_solver_settings_t{}; + settings.time_limit = 30.; + settings.determinism_mode = determinism_mode; + auto timer = cuopt::termination_checker_t(30.0, cuopt::termination_checker_t::root_tag_t{}); + detail::mip_solver_t solver(problem, settings, scaling, timer); + + detail::solution_t solution(*solver.context.problem_ptr); + if (initial_solution.size() > 0) { + expand_device_copy(solution.assignment, initial_solution, solution.handle_ptr->get_stream()); + } else { + thrust::fill(solution.handle_ptr->get_thrust_policy(), + solution.assignment.begin(), + solution.assignment.end(), + 0.0); + } + solution.clamp_within_bounds(); + + detail::fj_t fj(solver.context, fj_settings); + fj.reset_weights(solution.handle_ptr->get_stream(), 1.); + fj.objective_weight.set_value_async(tweaks.objective_weight, solution.handle_ptr->get_stream()); + solution.handle_ptr->sync_stream(); + + fj.solve(solution); + auto solution_vector = host_copy(solution.assignment, solution.handle_ptr->get_stream()); + + return {solution, + solution_vector, + fj.climbers[0]->local_minimums_reached.value(solution.handle_ptr->get_stream()), + fj.climbers[0]->incumbent_objective.value(solution.handle_ptr->get_stream()), + fj.climbers[0]->violation_score.value(solution.handle_ptr->get_stream())}; +} + } // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/miplib_test.cu b/cpp/tests/mip/miplib_test.cu index 7607ad91f8..ac4312e04b 100644 --- a/cpp/tests/mip/miplib_test.cu +++ b/cpp/tests/mip/miplib_test.cu @@ -33,7 +33,9 @@ struct result_map_t { double cost; }; -void test_miplib_file(result_map_t test_instance, mip_solver_settings_t settings) +void test_miplib_file(result_map_t test_instance, + mip_solver_settings_t settings, + bool heuristic = false) { const raft::handle_t handle_{}; @@ -49,6 +51,7 @@ void test_miplib_file(result_map_t test_instance, mip_solver_settings_t solution = solve_mip(&handle_, problem, settings); bool is_feasible = solution.get_termination_status() == mip_termination_status_t::FeasibleFound || solution.get_termination_status() == mip_termination_status_t::Optimal; @@ -70,4 +73,13 @@ TEST(mip_solve, run_small_tests) } } +// TEST(mip_solve, run_small_tests_determinism) +// { +// std::vector test_instances = { +// {"mip/50v-10.mps", 11311031.}, {"mip/neos5.mps", 15.}, {"mip/swath1.mps", 1300.}}; +// for (const auto& test_instance : test_instances) { +// test_miplib_file(test_instance, true); +// } +// } + } // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/multi_probe_test.cu b/cpp/tests/mip/multi_probe_test.cu index 073c153486..b50fb2848c 100644 --- a/cpp/tests/mip/multi_probe_test.cu +++ b/cpp/tests/mip/multi_probe_test.cu @@ -6,6 +6,7 @@ /* clang-format on */ #include "../linear_programming/utilities/pdlp_test_utilities.cuh" +#include "determinism_utils.cuh" #include "mip_utils.cuh" #include @@ -43,9 +44,10 @@ void init_handler(const raft::handle_t* handle_ptr) } std::tuple, std::vector, std::vector> select_k_random( - detail::problem_t& problem, int sample_size) + detail::problem_t& problem, + int sample_size, + unsigned long seed = std::random_device{}()) { - auto seed = std::random_device{}(); std::cerr << "Tested with seed " << seed << "\n"; problem.compute_n_integer_vars(); auto [v_lb, v_ub] = extract_host_bounds(problem.variable_bounds, problem.handle_ptr); @@ -138,10 +140,8 @@ multi_probe_results( std::move(h_lb_0), std::move(h_ub_0), std::move(h_lb_1), std::move(h_ub_1)); } -void test_multi_probe(std::string path) +uint32_t test_multi_probe(std::string path, unsigned long seed = std::random_device{}()) { - auto memory_resource = make_async(); - rmm::mr::set_current_device_resource(memory_resource.get()); const raft::handle_t handle_{}; cuopt::mps_parser::mps_data_model_t mps_problem = cuopt::mps_parser::parse_mps(path, false); @@ -161,12 +161,13 @@ void test_multi_probe(std::string path) nullptr, hyper_params, true); - detail::mip_solver_t solver(problem, default_settings, scaling, cuopt::timer_t(0)); + auto timer = cuopt::termination_checker_t(0.0, cuopt::termination_checker_t::root_tag_t{}); + detail::mip_solver_t solver(problem, default_settings, scaling, timer); detail::bound_presolve_t bnd_prb_0(solver.context); detail::bound_presolve_t bnd_prb_1(solver.context); detail::multi_probe_t multi_probe_prs(solver.context); - auto probe_tuple = select_k_random(problem, 100); + auto probe_tuple = select_k_random(problem, 100, seed); auto bounds_probe_vals = convert_probe_tuple(probe_tuple); auto [bnd_lb_0, bnd_ub_0, bnd_lb_1, bnd_ub_1] = @@ -185,6 +186,16 @@ void test_multi_probe(std::string path) auto mlp_min_act_1 = host_copy(multi_probe_prs.upd_1.min_activity, stream); auto mlp_max_act_1 = host_copy(multi_probe_prs.upd_1.max_activity, stream); + std::vector hashes; + hashes.push_back(detail::compute_hash(bnd_min_act_0)); + hashes.push_back(detail::compute_hash(bnd_min_act_1)); + hashes.push_back(detail::compute_hash(bnd_max_act_0)); + hashes.push_back(detail::compute_hash(bnd_max_act_1)); + hashes.push_back(detail::compute_hash(bnd_lb_0)); + hashes.push_back(detail::compute_hash(bnd_ub_0)); + hashes.push_back(detail::compute_hash(bnd_lb_1)); + hashes.push_back(detail::compute_hash(bnd_ub_1)); + for (int i = 0; i < (int)bnd_min_act_0.size(); ++i) { EXPECT_DOUBLE_EQ(bnd_min_act_0[i], mlp_min_act_0[i]); EXPECT_DOUBLE_EQ(bnd_max_act_0[i], mlp_max_act_0[i]); @@ -198,6 +209,9 @@ void test_multi_probe(std::string path) EXPECT_DOUBLE_EQ(bnd_lb_1[i], m_lb_1[i]); EXPECT_DOUBLE_EQ(bnd_ub_1[i], m_ub_1[i]); } + + // return a composite hash of all the hashes to check for determinism + return detail::compute_hash(hashes); } TEST(presolve, multi_probe) @@ -211,4 +225,29 @@ TEST(presolve, multi_probe) } } +TEST(presolve, multi_probe_deterministic) +{ + spin_stream_raii_t spin_stream_1; + + std::vector test_instances = { + "mip/50v-10-free-bound.mps", + "mip/neos5-free-bound.mps", + "mip/neos5.mps", + "mip/50v-10.mps", + }; + for (const auto& test_instance : test_instances) { + std::cout << "Running: " << test_instance << std::endl; + unsigned long seed = std::random_device{}(); + auto path = make_path_absolute(test_instance); + uint32_t gold_hash = 0; + for (int i = 0; i < 10; ++i) { + auto hash = test_multi_probe(path, seed); + if (i == 0) { + gold_hash = hash; + } else { + EXPECT_EQ(hash, gold_hash); + } + } + } +} } // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/presolve_test.cu b/cpp/tests/mip/presolve_test.cu index cf8abd1b69..2fa1cf9dc4 100644 --- a/cpp/tests/mip/presolve_test.cu +++ b/cpp/tests/mip/presolve_test.cu @@ -6,12 +6,22 @@ /* clang-format on */ #include "../linear_programming/utilities/pdlp_test_utilities.cuh" +#include "determinism_utils.cuh" +#include "mip_utils.cuh" +#include +#include #include +#include +#include #include +#include #include +#include #include #include +#include +#include #include #include #include @@ -29,6 +39,184 @@ namespace cuopt::linear_programming::test { +void init_handler(const raft::handle_t* handle_ptr) +{ + // Init cuBlas / cuSparse context here to avoid having it during solving time + RAFT_CUBLAS_TRY(raft::linalg::detail::cublassetpointermode( + handle_ptr->get_cublas_handle(), CUBLAS_POINTER_MODE_DEVICE, handle_ptr->get_stream())); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsesetpointermode( + handle_ptr->get_cusparse_handle(), CUSPARSE_POINTER_MODE_DEVICE, handle_ptr->get_stream())); +} + +std::tuple, std::vector, std::vector> select_k_random( + detail::problem_t& problem, + int sample_size, + unsigned long seed = std::random_device{}()) +{ + std::cerr << "Tested with seed " << seed << "\n"; + problem.compute_n_integer_vars(); + auto [v_lb, v_ub] = extract_host_bounds(problem.variable_bounds, problem.handle_ptr); + auto int_var_id = host_copy(problem.integer_indices, problem.handle_ptr->get_stream()); + int_var_id.erase( + std::remove_if(int_var_id.begin(), + int_var_id.end(), + [v_lb_sp = v_lb, v_ub_sp = v_ub](auto id) { + return !(std::isfinite(v_lb_sp[id]) && std::isfinite(v_ub_sp[id])); + }), + int_var_id.end()); + sample_size = std::min(sample_size, static_cast(int_var_id.size())); + std::vector random_int_vars; + std::mt19937 m{seed}; + std::sample( + int_var_id.begin(), int_var_id.end(), std::back_inserter(random_int_vars), sample_size, m); + std::vector probe_0(sample_size); + std::vector probe_1(sample_size); + for (int i = 0; i < static_cast(random_int_vars.size()); ++i) { + if (i % 2) { + probe_0[i] = v_lb[random_int_vars[i]]; + probe_1[i] = v_ub[random_int_vars[i]]; + } else { + probe_1[i] = v_lb[random_int_vars[i]]; + probe_0[i] = v_ub[random_int_vars[i]]; + } + } + return std::make_tuple(std::move(random_int_vars), std::move(probe_0), std::move(probe_1)); +} + +std::pair>, std::vector>> +convert_probe_tuple(std::tuple, std::vector, std::vector>& probe) +{ + std::vector> probe_first; + std::vector> probe_second; + for (size_t i = 0; i < std::get<0>(probe).size(); ++i) { + probe_first.emplace_back(thrust::make_pair(std::get<0>(probe)[i], std::get<1>(probe)[i])); + probe_second.emplace_back(thrust::make_pair(std::get<0>(probe)[i], std::get<2>(probe)[i])); + } + return std::make_pair(std::move(probe_first), std::move(probe_second)); +} + +uint32_t test_probing_cache_determinism(std::string path, + unsigned long seed = std::random_device{}()) +{ + const raft::handle_t handle_{}; + cuopt::mps_parser::mps_data_model_t mps_problem = + cuopt::mps_parser::parse_mps(path, false); + handle_.sync_stream(); + auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem); + problem_checking_t::check_problem_representation(op_problem); + detail::problem_t problem(op_problem); + mip_solver_settings_t default_settings{}; + default_settings.mip_scaling = false; // we're not checking scaling determinism here + pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; + detail::pdlp_initial_scaling_strategy_t scaling(&handle_, + problem, + 10, + 1.0, + problem.reverse_coefficients, + problem.reverse_offsets, + problem.reverse_constraints, + nullptr, + hyper_params, + true); + auto timer = cuopt::termination_checker_t(0.0, cuopt::termination_checker_t::root_tag_t{}); + detail::mip_solver_t solver(problem, default_settings, scaling, timer); + detail::bound_presolve_t bnd_prb(solver.context); + + work_limit_context_t work_limit_context("ProbingCache"); + // rely on the iteration limit + compute_probing_cache( + bnd_prb, + problem, + work_limit_timer_t(work_limit_context, std::numeric_limits::max(), timer)); + std::vector, 2>>> cached_values( + bnd_prb.probing_cache.probing_cache.begin(), bnd_prb.probing_cache.probing_cache.end()); + std::sort(cached_values.begin(), cached_values.end(), [](const auto& a, const auto& b) { + return a.first < b.first; + }); + + std::vector probed_indices; + std::vector intervals; + std::vector interval_types; + + std::vector var_to_cached_bound_keys; + std::vector var_to_cached_bound_lb; + std::vector var_to_cached_bound_ub; + for (const auto& a : cached_values) { + probed_indices.push_back(a.first); + intervals.push_back(a.second[0].val_interval.val); + intervals.push_back(a.second[1].val_interval.val); + interval_types.push_back(a.second[0].val_interval.interval_type); + interval_types.push_back(a.second[1].val_interval.interval_type); + + auto sorted_map = std::map>( + a.second[0].var_to_cached_bound_map.begin(), a.second[0].var_to_cached_bound_map.end()); + for (const auto& [var_id, cached_bound] : sorted_map) { + var_to_cached_bound_keys.push_back(var_id); + var_to_cached_bound_lb.push_back(cached_bound.lb); + var_to_cached_bound_ub.push_back(cached_bound.ub); + } + } + + std::vector hashes; + hashes.push_back(detail::compute_hash(probed_indices)); + hashes.push_back(detail::compute_hash(intervals)); + hashes.push_back(detail::compute_hash(interval_types)); + hashes.push_back(detail::compute_hash(var_to_cached_bound_keys)); + hashes.push_back(detail::compute_hash(var_to_cached_bound_lb)); + hashes.push_back(detail::compute_hash(var_to_cached_bound_ub)); + + // return a composite hash of all the hashes to check for determinism + return detail::compute_hash(hashes); +} + +uint32_t test_scaling_determinism(std::string path, unsigned long seed = std::random_device{}()) +{ + const raft::handle_t handle_{}; + cuopt::mps_parser::mps_data_model_t mps_problem = + cuopt::mps_parser::parse_mps(path, false); + handle_.sync_stream(); + auto op_problem = mps_data_model_to_optimization_problem(&handle_, mps_problem); + problem_checking_t::check_problem_representation(op_problem); + detail::problem_t problem(op_problem); + + pdlp_hyper_params::pdlp_hyper_params_t hyper_params{}; + hyper_params.update_primal_weight_on_initial_solution = false; + hyper_params.update_step_size_on_initial_solution = true; + // problem contains unpreprocessed data + detail::problem_t scaled_problem(problem); + + detail::pdlp_initial_scaling_strategy_t scaling( + scaled_problem.handle_ptr, + scaled_problem, + hyper_params.default_l_inf_ruiz_iterations, + (double)hyper_params.default_alpha_pock_chambolle_rescaling, + scaled_problem.reverse_coefficients, + scaled_problem.reverse_offsets, + scaled_problem.reverse_constraints, + nullptr, + hyper_params, + true); + + scaling.scale_problem(); + + // generate a random initial solution in order to ensure scaling of solution vectors is + // deterministic as well as the initial step size + std::vector initial_solution(scaled_problem.n_variables); + std::mt19937 m{seed}; + std::generate(initial_solution.begin(), initial_solution.end(), [&m]() { return m(); }); + auto d_initial_solution = device_copy(initial_solution, handle_.get_stream()); + scaling.scale_primal(d_initial_solution); + + scaled_problem.preprocess_problem(); + + detail::trivial_presolve(scaled_problem); + + std::vector hashes; + hashes.push_back(detail::compute_hash(d_initial_solution, handle_.get_stream())); + hashes.push_back(scaled_problem.get_fingerprint()); + return detail::compute_hash(hashes); +} + TEST(problem, find_implied_integers) { const raft::handle_t handle_{}; @@ -62,4 +250,63 @@ TEST(problem, find_implied_integers) ((int)detail::problem_t::var_flags_t::VAR_IMPLIED_INTEGER)); } +TEST(presolve, probing_cache_deterministic) +{ + spin_stream_raii_t spin_stream_1; + + std::vector test_instances = {"mip/50v-10-free-bound.mps", + "mip/neos5-free-bound.mps", + "mip/neos5.mps", + "mip/50v-10.mps", + "mip/gen-ip054.mps", + "mip/rmatr200-p5.mps"}; + for (const auto& test_instance : test_instances) { + std::cout << "Running: " << test_instance << std::endl; + unsigned long seed = std::random_device{}(); + std::cerr << "Tested with seed " << seed << "\n"; + auto path = make_path_absolute(test_instance); + uint32_t gold_hash = 0; + for (int i = 0; i < 10; ++i) { + auto hash = test_probing_cache_determinism(path, seed); + if (i == 0) { + gold_hash = hash; + std::cout << "Gold hash: " << gold_hash << std::endl; + } else { + EXPECT_EQ(hash, gold_hash); + } + } + } +} + +TEST(presolve, mip_scaling_deterministic) +{ + spin_stream_raii_t spin_stream_1; + spin_stream_raii_t spin_stream_2; + + std::vector test_instances = {"mip/sct2.mps", + "mip/thor50dday.mps", + "mip/uccase9.mps", + "mip/neos5-free-bound.mps", + "mip/neos5.mps", + "mip/50v-10.mps", + "mip/gen-ip054.mps", + "mip/rmatr200-p5.mps"}; + for (const auto& test_instance : test_instances) { + std::cout << "Running: " << test_instance << std::endl; + unsigned long seed = std::random_device{}(); + std::cerr << "Tested with seed " << seed << "\n"; + auto path = make_path_absolute(test_instance); + uint32_t gold_hash = 0; + for (int i = 0; i < 10; ++i) { + auto hash = test_scaling_determinism(path, seed); + if (i == 0) { + gold_hash = hash; + std::cout << "Gold hash: " << gold_hash << std::endl; + } else { + EXPECT_EQ(hash, gold_hash); + } + } + } +} + } // namespace cuopt::linear_programming::test diff --git a/cpp/tests/mip/problem_test.cu b/cpp/tests/mip/problem_test.cu index 28f4f1f955..92fa6d41d1 100644 --- a/cpp/tests/mip/problem_test.cu +++ b/cpp/tests/mip/problem_test.cu @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -204,6 +205,95 @@ TEST(problem, run_small_tests) } } +namespace ds = cuopt::linear_programming::dual_simplex; + +template +void test_roundtrip_equivalence(i_t n_cnst, i_t n_var) +{ + raft::handle_t handle; + auto op_problem = create_problem(&handle, n_cnst, n_var); + dtl::problem_t problem(op_problem); + problem.preprocess_problem(); + + auto stream = handle.get_stream(); + + const auto n_constraints_before = problem.n_constraints; + const auto n_variables_before = problem.n_variables; + const auto nnz_before = problem.nnz; + + auto coefficients_before = cuopt::host_copy(problem.coefficients, stream); + auto variables_before = cuopt::host_copy(problem.variables, stream); + auto offsets_before = cuopt::host_copy(problem.offsets, stream); + auto constraint_lower_before = cuopt::host_copy(problem.constraint_lower_bounds, stream); + auto constraint_upper_before = cuopt::host_copy(problem.constraint_upper_bounds, stream); + auto variable_bounds_before = cuopt::host_copy(problem.variable_bounds, stream); + auto objective_before = cuopt::host_copy(problem.objective_coefficients, stream); + auto reverse_coefficients_before = cuopt::host_copy(problem.reverse_coefficients, stream); + auto reverse_constraints_before = cuopt::host_copy(problem.reverse_constraints, stream); + auto reverse_offsets_before = cuopt::host_copy(problem.reverse_offsets, stream); + + ds::user_problem_t host_problem(problem.handle_ptr); + problem.get_host_user_problem(host_problem); + + problem.set_constraints_from_host_user_problem(host_problem); + ASSERT_EQ(host_problem.lower.size(), static_cast(problem.n_variables)); + ASSERT_EQ(host_problem.upper.size(), static_cast(problem.n_variables)); + std::vector all_var_indices(problem.n_variables); + std::iota(all_var_indices.begin(), all_var_indices.end(), 0); + problem.update_variable_bounds(all_var_indices, host_problem.lower, host_problem.upper); + + EXPECT_EQ(problem.n_constraints, n_constraints_before); + EXPECT_EQ(problem.n_variables, n_variables_before); + EXPECT_EQ(problem.nnz, nnz_before); + + auto coefficients_after = cuopt::host_copy(problem.coefficients, stream); + auto variables_after = cuopt::host_copy(problem.variables, stream); + auto offsets_after = cuopt::host_copy(problem.offsets, stream); + auto constraint_lower_after = cuopt::host_copy(problem.constraint_lower_bounds, stream); + auto constraint_upper_after = cuopt::host_copy(problem.constraint_upper_bounds, stream); + auto variable_bounds_after = cuopt::host_copy(problem.variable_bounds, stream); + auto objective_after = cuopt::host_copy(problem.objective_coefficients, stream); + auto reverse_coefficients_after = cuopt::host_copy(problem.reverse_coefficients, stream); + auto reverse_constraints_after = cuopt::host_copy(problem.reverse_constraints, stream); + auto reverse_offsets_after = cuopt::host_copy(problem.reverse_offsets, stream); + + EXPECT_EQ(coefficients_before, coefficients_after) << "CSR coefficients differ"; + EXPECT_EQ(variables_before, variables_after) << "CSR column indices differ"; + EXPECT_EQ(offsets_before, offsets_after) << "CSR row offsets differ"; + EXPECT_EQ(objective_before, objective_after) << "objective coefficients differ"; + EXPECT_EQ(reverse_constraints_before, reverse_constraints_after) << "reverse constraints differ"; + EXPECT_EQ(reverse_offsets_before, reverse_offsets_after) << "reverse offsets differ"; + EXPECT_EQ(reverse_coefficients_before, reverse_coefficients_after) + << "reverse coefficients differ"; + + ASSERT_EQ(constraint_lower_before.size(), constraint_lower_after.size()); + for (size_t i = 0; i < constraint_lower_before.size(); ++i) { + EXPECT_NEAR(constraint_lower_before[i], constraint_lower_after[i], 1e-10) + << "constraint_lower_bounds[" << i << "]"; + } + ASSERT_EQ(constraint_upper_before.size(), constraint_upper_after.size()); + for (size_t i = 0; i < constraint_upper_before.size(); ++i) { + EXPECT_NEAR(constraint_upper_before[i], constraint_upper_after[i], 1e-10) + << "constraint_upper_bounds[" << i << "]"; + } + + ASSERT_EQ(variable_bounds_before.size(), variable_bounds_after.size()); + for (size_t i = 0; i < variable_bounds_before.size(); ++i) { + EXPECT_DOUBLE_EQ(variable_bounds_before[i].x, variable_bounds_after[i].x) + << "variable_bounds[" << i << "].lower"; + EXPECT_DOUBLE_EQ(variable_bounds_before[i].y, variable_bounds_after[i].y) + << "variable_bounds[" << i << "].upper"; + } +} + +TEST(problem, get_set_host_user_problem_roundtrip_preserves_problem) +{ + std::vector> cnst_var_vals = {{5, 20}, {20, 80}, {40, 200}}; + for (const auto& [nc, nv] : cnst_var_vals) { + test_roundtrip_equivalence(nc, nv); + } +} + static void fill_problem(optimization_problem_t& op_problem) { // Set A_CSR_matrix diff --git a/cpp/tests/mip/unit_test.cu b/cpp/tests/mip/unit_test.cu index 29b58b736b..9c6d5a3c09 100644 --- a/cpp/tests/mip/unit_test.cu +++ b/cpp/tests/mip/unit_test.cu @@ -283,4 +283,25 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple( false, false, false, cuopt::linear_programming::mip_termination_status_t::Optimal))); +// TEST_P(MILPTestParams, TestDeterminism) +// { +// bool maximize = std::get<0>(GetParam()); +// bool scaling = std::get<1>(GetParam()); +// bool heuristics_only = std::get<2>(GetParam()); +// auto expected_termination_status = std::get<3>(GetParam()); + +// raft::handle_t handle; +// auto problem = create_std_milp_problem(maximize); + +// cuopt::linear_programming::mip_solver_settings_t settings{}; +// settings.mip_scaling = true; +// settings.heuristics_only = true; +// settings.presolve = true; +// settings.deterministic = true; + +// auto result = cuopt::linear_programming::solve_mip(&handle, problem, settings); + +// EXPECT_EQ(result.get_termination_status(), expected_termination_status); +// } + } // namespace cuopt::linear_programming::test diff --git a/datasets/mip/download_miplib_test_dataset.sh b/datasets/mip/download_miplib_test_dataset.sh index dc2dd79662..f8df265618 100755 --- a/datasets/mip/download_miplib_test_dataset.sh +++ b/datasets/mip/download_miplib_test_dataset.sh @@ -20,10 +20,12 @@ INSTANCES=( "thor50dday" "stein9inf" "neos5" + "neos8" "swath1" "enlight_hard" "enlight11" "supportcase22" + "supportcase42" ) BASE_URL="https://miplib.zib.de/WebData/instances" diff --git a/dependencies.yaml b/dependencies.yaml index 011dfbcee6..014889c7d5 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -346,8 +346,8 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - numba-cuda>=0.22.1,<0.23.0 - - numba>=0.60.0 + - numba-cuda>=0.22.1 + - numba>=0.60.0,<0.65.0 - &pandas pandas>=2.0 - &pyyaml pyyaml>=6.0.0 - scipy>=1.14.1 @@ -373,10 +373,10 @@ dependencies: - &requests requests - output_types: [requirements, pyproject] packages: - - &msgpack msgpack==1.1.0 + - &msgpack msgpack==1.1.2 - output_types: conda packages: - - &msgpack_python msgpack-python==1.1.0 + - &msgpack_python msgpack-python==1.1.2 run_cuopt_server: common: @@ -784,5 +784,9 @@ dependencies: packages: - python=3.13 - matrix: + py: "3.14" packages: - - python>=3.11,<3.14 + - python=3.14 + - matrix: + packages: + - python>=3.11,<3.15 diff --git a/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst index 43d15eca64..d9a42301cb 100644 --- a/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst +++ b/docs/cuopt/source/cuopt-c/lp-qp-milp/lp-qp-milp-c-api.rst @@ -187,6 +187,7 @@ These constants are used as parameter names in the :c:func:`cuOptSetParameter`, .. doxygendefine:: CUOPT_SOLUTION_FILE .. doxygendefine:: CUOPT_NUM_CPU_THREADS .. doxygendefine:: CUOPT_USER_PROBLEM_FILE +.. doxygendefine:: CUOPT_PDLP_PRECISION .. _pdlp-solver-mode-constants: @@ -201,6 +202,18 @@ These constants are used to configure `CUOPT_PDLP_SOLVER_MODE` via :c:func:`cuOp .. doxygendefine:: CUOPT_PDLP_SOLVER_MODE_METHODICAL1 .. doxygendefine:: CUOPT_PDLP_SOLVER_MODE_FAST1 +.. _pdlp-precision-constants: + +PDLP Precision Constants +------------------------ + +These constants are used to configure `CUOPT_PDLP_PRECISION` via :c:func:`cuOptSetIntegerParameter`. + +.. doxygendefine:: CUOPT_PDLP_DEFAULT_PRECISION +.. doxygendefine:: CUOPT_PDLP_SINGLE_PRECISION +.. doxygendefine:: CUOPT_PDLP_DOUBLE_PRECISION +.. doxygendefine:: CUOPT_PDLP_MIXED_PRECISION + .. _method-constants: Method Constants diff --git a/docs/cuopt/source/lp-qp-features.rst b/docs/cuopt/source/lp-qp-features.rst index 4bd178ed53..e3cbddbb05 100644 --- a/docs/cuopt/source/lp-qp-features.rst +++ b/docs/cuopt/source/lp-qp-features.rst @@ -157,6 +157,17 @@ Batch Mode Users can submit a set of problems which will be solved in a batch. Problems will be solved at the same time in parallel to fully utilize the GPU. Checkout :ref:`self-hosted client ` example in thin client. +PDLP Precision Modes +-------------------- + +By default, PDLP operates in the native precision of the problem type (FP64 for double-precision problems). The ``pdlp_precision`` parameter provides several modes: + +- **single**: Run PDLP internally in FP32, with automatic conversion of inputs and outputs. FP32 uses half the memory and allows PDHG iterations to be on average twice as fast, but may require more iterations to converge. Compatible with crossover (solution is converted back to FP64 before crossover) and concurrent mode (PDLP runs in FP32 while other solvers run in FP64). +- **mixed**: Use mixed precision SpMV during PDHG iterations. The constraint matrix is stored in FP32 while vectors and compute type remain in FP64, improving SpMV performance with limited impact on convergence. Convergence checking and restart logic always use the full FP64 matrix. +- **double**: Explicitly run in FP64 (same as default for double-precision problems). + +.. note:: The default precision is the native type of the problem (FP64 for double). + Multi-GPU Mode -------------- diff --git a/docs/cuopt/source/lp-qp-milp-settings.rst b/docs/cuopt/source/lp-qp-milp-settings.rst index bd1372f70e..29c27a4ac2 100644 --- a/docs/cuopt/source/lp-qp-milp-settings.rst +++ b/docs/cuopt/source/lp-qp-milp-settings.rst @@ -192,6 +192,27 @@ Per Constraint Residual .. note:: The default value is false. +PDLP Precision +^^^^^^^^^^^^^^ + +``CUOPT_PDLP_PRECISION`` controls the precision mode used by the PDLP solver. The following modes are +available: + +- **default** (-1): Use the native precision of the problem type (FP64 for double-precision problems). +- **single** (0): Run PDLP internally in FP32 (float). Inputs are converted from FP64 to FP32 before + solving and outputs are converted back to FP64. FP32 uses half the memory and allows PDHG iterations + to be on average twice as fast, but may require more iterations to converge due to reduced numerical + accuracy. Compatible with crossover (solution is converted back to FP64 before crossover runs) and + concurrent mode (the PDLP leg runs in FP32 while Dual Simplex and Barrier run in FP64). +- **double** (1): Explicitly run in FP64 (same as default for double-precision problems). +- **mixed** (2): Use mixed precision sparse matrix-vector products (SpMV) during PDHG iterations. The + constraint matrix and its transpose are stored in FP32 while vectors and the compute type remain in + FP64, improving SpMV performance. Convergence checking and restart logic always use the + full FP64 matrix, so this mode does not reduce overall memory usage. This provides a middle ground + between full FP64 and FP32: faster PDHG iterations with limited impact on convergence. + +.. note:: The default value is 0 (default precision). + Barrier Solver Settings ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/cuopt/source/system-requirements.rst b/docs/cuopt/source/system-requirements.rst index 92ac2d70fc..bbc37f26e9 100644 --- a/docs/cuopt/source/system-requirements.rst +++ b/docs/cuopt/source/system-requirements.rst @@ -26,7 +26,7 @@ Dependencies are installed automatically when using the pip and Conda installati - 12.0+ * Python: - - >= 3.11.* and <= 3.13.* + - >= 3.11.* and <= 3.14.* * NVIDIA drivers: - 525.60.13+ (Linux) @@ -98,4 +98,4 @@ Thin-client for Self-Hosted - x86-64 - ARM64 -* Python >= 3.11.x <= 3.13.x +* Python >= 3.11.x <= 3.14.x diff --git a/gemini-extension.json b/gemini-extension.json new file mode 100644 index 0000000000..b4c6b764a4 --- /dev/null +++ b/gemini-extension.json @@ -0,0 +1,6 @@ +{ + "name": "nvidia-cuopt-skills", + "description": "Agent skills for NVIDIA cuOpt optimization engine: routing, LP/MILP/QP, installation, and server.", + "version": "26.04.00", + "contextFileName": "AGENTS.md" +} diff --git a/python/cuopt/cuopt/linear_programming/problem.py b/python/cuopt/cuopt/linear_programming/problem.py index baf9716191..5821fd3a93 100644 --- a/python/cuopt/cuopt/linear_programming/problem.py +++ b/python/cuopt/cuopt/linear_programming/problem.py @@ -195,6 +195,12 @@ def getVariableName(self): """ return self.VariableName + def __neg__(self): + return LinearExpression([self], [-1.0], 0.0) + + def __pos__(self): + return self + def __add__(self, other): match other: case int() | float(): @@ -204,6 +210,8 @@ def __add__(self, other): return LinearExpression([self, other], [1.0, 1.0], 0.0) case LinearExpression(): return other + self + case QuadraticExpression(): + return other + self case _: raise ValueError( "Cannot add type %s to variable" % type(other).__name__ @@ -221,6 +229,8 @@ def __sub__(self, other): case LinearExpression(): # self - other -> other * -1.0 + self return other * -1.0 + self + case QuadraticExpression(): + return other * -1.0 + self case _: raise ValueError( "Cannot subtract type %s from variable" @@ -1900,7 +1910,11 @@ def relax(self): def populate_solution(self, solution): self.Status = solution.get_termination_status() self.SolveTime = solution.get_solve_time() - self.warmstart_data = solution.get_pdlp_warm_start_data() + self.warmstart_data = ( + solution.get_pdlp_warm_start_data() + if solution.problem_category == 0 + else None + ) IsMIP = False if solution.problem_category == 0: @@ -1909,7 +1923,7 @@ def populate_solution(self, solution): IsMIP = True self.SolutionStats = self.dict_to_object(solution.get_milp_stats()) primal_sol = solution.get_primal_solution() - reduced_cost = solution.get_reduced_cost() + reduced_cost = solution.get_reduced_cost() if not IsMIP else None if len(primal_sol) > 0: for var in self.vars: var.Value = primal_sol[var.index] diff --git a/python/cuopt/cuopt/linear_programming/pyproject.toml b/python/cuopt/cuopt/linear_programming/pyproject.toml index e59f8d3a0f..934b12f547 100644 --- a/python/cuopt/cuopt/linear_programming/pyproject.toml +++ b/python/cuopt/cuopt/linear_programming/pyproject.toml @@ -27,6 +27,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] [project.urls] diff --git a/python/cuopt/cuopt/linear_programming/solution/solution.py b/python/cuopt/cuopt/linear_programming/solution/solution.py index 849c907d06..e2533da8c1 100644 --- a/python/cuopt/cuopt/linear_programming/solution/solution.py +++ b/python/cuopt/cuopt/linear_programming/solution/solution.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 from cuopt.linear_programming.solver.solver_wrapper import ( @@ -167,25 +167,28 @@ def __init__( self.problem_category = problem_category self.primal_solution = primal_solution self.dual_solution = dual_solution - self.pdlp_warm_start_data = PDLPWarmStartData( - current_primal_solution, - current_dual_solution, - initial_primal_average, - initial_dual_average, - current_ATY, - sum_primal_solutions, - sum_dual_solutions, - last_restart_duality_gap_primal_solution, - last_restart_duality_gap_dual_solution, - initial_primal_weight, - initial_step_size, - total_pdlp_iterations, - total_pdhg_iterations, - last_candidate_kkt_score, - last_restart_kkt_score, - sum_solution_weight, - iterations_since_last_restart, - ) + if problem_category == ProblemCategory.LP: + self.pdlp_warm_start_data = PDLPWarmStartData( + current_primal_solution, + current_dual_solution, + initial_primal_average, + initial_dual_average, + current_ATY, + sum_primal_solutions, + sum_dual_solutions, + last_restart_duality_gap_primal_solution, + last_restart_duality_gap_dual_solution, + initial_primal_weight, + initial_step_size, + total_pdlp_iterations, + total_pdhg_iterations, + last_candidate_kkt_score, + last_restart_kkt_score, + sum_solution_weight, + iterations_since_last_restart, + ) + else: + self.pdlp_warm_start_data = None self._set_termination_status(termination_status) self.error_status = error_status self.error_message = error_message @@ -216,8 +219,17 @@ def __init__( def _set_termination_status(self, ts): if self.problem_category == ProblemCategory.LP: self.termination_status = LPTerminationStatus(ts) - else: + elif self.problem_category in ( + ProblemCategory.MIP, + ProblemCategory.IP, + ): self.termination_status = MILPTerminationStatus(ts) + else: + raise ValueError( + f"Unknown problem_category: {self.problem_category!r}. " + "Expected one of ProblemCategory.LP, ProblemCategory.MIP, " + "ProblemCategory.IP." + ) def raise_if_milp_solution(self, function_name): if self.problem_category in (ProblemCategory.MIP, ProblemCategory.IP): @@ -242,7 +254,7 @@ def get_dual_solution(self): Note: Applicable to only LP Returns the dual solution as numpy.array with float64 type. """ - self.raise_if_milp_solution(__name__) + self.raise_if_milp_solution("get_dual_solution") return self.dual_solution def get_primal_objective(self): @@ -256,7 +268,7 @@ def get_dual_objective(self): Note: Applicable to only LP Returns the dual objective as a float64. """ - self.raise_if_milp_solution(__name__) + self.raise_if_milp_solution("get_dual_objective") return self.dual_objective def get_termination_status(self): @@ -325,14 +337,16 @@ def get_lp_stats(self): Number of iterations the LP solver did before converging. """ - self.raise_if_milp_solution(__name__) + self.raise_if_milp_solution("get_lp_stats") return self.lp_stats def get_reduced_cost(self): """ + Note: Applicable to only LP Returns the reduced cost as numpy.array with float64 type. """ + self.raise_if_milp_solution("get_reduced_cost") return self.reduced_cost def get_pdlp_warm_start_data(self): @@ -343,6 +357,7 @@ def get_pdlp_warm_start_data(self): See `SolverSettings.set_pdlp_warm_start_data` for more details. """ + self.raise_if_milp_solution("get_pdlp_warm_start_data") return self.pdlp_warm_start_data def get_milp_stats(self): @@ -386,7 +401,7 @@ def get_milp_stats(self): Number of simplex iterations performed during the MIP solve """ - self.raise_if_lp_solution(__name__) + self.raise_if_lp_solution("get_milp_stats") return self.milp_stats diff --git a/python/cuopt/cuopt/tests/linear_programming/test_cpu_only_execution.py b/python/cuopt/cuopt/tests/linear_programming/test_cpu_only_execution.py index 792942aae9..e8dd179e05 100644 --- a/python/cuopt/cuopt/tests/linear_programming/test_cpu_only_execution.py +++ b/python/cuopt/cuopt/tests/linear_programming/test_cpu_only_execution.py @@ -168,6 +168,8 @@ def _impl_warmstart_cpu_only(): class TestCPUOnlyExecution: """Tests that run with CUDA_VISIBLE_DEVICES='' to simulate CPU-only hosts.""" + pytestmark = pytest.mark.skip(reason="CPU-only tests temporarily disabled") + @pytest.fixture def env(self): return _cpu_only_env() @@ -201,6 +203,8 @@ def test_warmstart_cpu_only(self, env): class TestCuoptCliCPUOnly: """Test that cuopt_cli runs without CUDA in remote-execution mode.""" + pytestmark = pytest.mark.skip(reason="CPU-only tests temporarily disabled") + @pytest.fixture def env(self): return _cpu_only_env() diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py index 9f94916ff0..e284ffc0ab 100644 --- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py +++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py @@ -18,6 +18,7 @@ CUOPT_ITERATION_LIMIT, CUOPT_METHOD, CUOPT_MIP_HEURISTICS_ONLY, + CUOPT_PDLP_PRECISION, CUOPT_PDLP_SOLVER_MODE, CUOPT_PRIMAL_INFEASIBLE_TOLERANCE, CUOPT_RELATIVE_DUAL_TOLERANCE, @@ -604,10 +605,10 @@ def test_barrier(): A_offsets = np.array([0, 2, 4]) data_model_obj.set_csr_constraint_matrix(A_values, A_indices, A_offsets) - b = np.array([200, 160]) + b = np.array([200.0, 160.0]) data_model_obj.set_constraint_bounds(b) - c = np.array([5, 20]) + c = np.array([5.0, 20.0]) data_model_obj.set_objective_coefficients(c) row_types = np.array(["L", "L"]) @@ -722,3 +723,43 @@ def test_write_files(): assert float(line.split()[-1]) == pytest.approx(80) os.remove("afiro.sol") + + +def test_pdlp_precision_single(): + file_path = ( + RAPIDS_DATASET_ROOT_DIR + "/linear_programming/afiro_original.mps" + ) + data_model_obj = cuopt_mps_parser.ParseMps(file_path) + + settings = solver_settings.SolverSettings() + settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP) + settings.set_parameter(CUOPT_PDLP_PRECISION, 0) # Single + settings.set_optimality_tolerance(1e-4) + + solution = solver.Solve(data_model_obj, settings) + + assert solution.get_termination_status() == LPTerminationStatus.Optimal + assert solution.get_primal_objective() == pytest.approx( + -464.7531, rel=1e-1 + ) + assert solution.get_solved_by_pdlp() + + +def test_pdlp_precision_single_crossover(): + file_path = ( + RAPIDS_DATASET_ROOT_DIR + "/linear_programming/afiro_original.mps" + ) + data_model_obj = cuopt_mps_parser.ParseMps(file_path) + + settings = solver_settings.SolverSettings() + settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP) + settings.set_parameter(CUOPT_PDLP_PRECISION, 1) # Single + settings.set_parameter("crossover", True) + settings.set_optimality_tolerance(1e-4) + + solution = solver.Solve(data_model_obj, settings) + + assert solution.get_termination_status() == LPTerminationStatus.Optimal + assert solution.get_primal_objective() == pytest.approx( + -464.7531, rel=1e-1 + ) diff --git a/python/cuopt/pyproject.toml b/python/cuopt/pyproject.toml index 44734577c9..e86b5bdd73 100644 --- a/python/cuopt/pyproject.toml +++ b/python/cuopt/pyproject.toml @@ -24,8 +24,8 @@ dependencies = [ "cuopt-mps-parser==26.4.*,>=0.0.0a0", "cupy-cuda13x>=13.6.0", "libcuopt==26.4.*,>=0.0.0a0", - "numba-cuda>=0.22.1,<0.23.0", - "numba>=0.60.0", + "numba-cuda>=0.22.1", + "numba>=0.60.0,<0.65.0", "numpy>=1.23.5,<3.0", "pandas>=2.0", "pylibraft==26.4.*,>=0.0.0a0", @@ -40,6 +40,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] [project.optional-dependencies] diff --git a/python/cuopt_self_hosted/pyproject.toml b/python/cuopt_self_hosted/pyproject.toml index ce1fb9ae15..7645c99ed0 100644 --- a/python/cuopt_self_hosted/pyproject.toml +++ b/python/cuopt_self_hosted/pyproject.toml @@ -22,7 +22,7 @@ requires-python = ">=3.11" dependencies = [ "cuopt-mps-parser==26.4.*,>=0.0.0a0", "msgpack-numpy==0.4.8", - "msgpack==1.1.0", + "msgpack==1.1.2", "requests", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -30,6 +30,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", "Operating System :: OS Independent", ] diff --git a/python/cuopt_server/cuopt_server/cuopt_service.py b/python/cuopt_server/cuopt_server/cuopt_service.py index 15106bd293..8da26a39ad 100644 --- a/python/cuopt_server/cuopt_server/cuopt_service.py +++ b/python/cuopt_server/cuopt_server/cuopt_service.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import argparse @@ -6,7 +6,7 @@ import os import signal import sys -from multiprocessing import Event, Process, Queue +from multiprocessing import get_context import psutil @@ -66,21 +66,23 @@ def watcher(app_exit, results_queue, job_queue, abort_queue, abort_list): except Exception: pass + ctx = get_context("fork") + # Flag for this process that says we have already run the # exit handler - terminated = Event() + terminated = ctx.Event() # Flag for all processes that the app is shutting down - app_exit = Event() + app_exit = ctx.Event() # Flag set by results thread when all jobs have been # marked done, to give a chance for anyone actively # waiting to get a graceful response - jobs_marked_done = Event() + jobs_marked_done = ctx.Event() - job_queue = Queue() - abort_queue = Queue() - results_queue = Queue() + job_queue = ctx.Queue() + abort_queue = ctx.Queue() + results_queue = ctx.Queue() w = None @@ -401,7 +403,7 @@ def record_factory(*args, **kwargs): from cuopt_server.webserver import run_server - w = Process( + w = ctx.Process( target=run_server, args=( app_exit, diff --git a/python/cuopt_server/cuopt_server/tests/test_set_fleet_data.py b/python/cuopt_server/cuopt_server/tests/test_set_fleet_data.py index db2402eeba..e9526be784 100644 --- a/python/cuopt_server/cuopt_server/tests/test_set_fleet_data.py +++ b/python/cuopt_server/cuopt_server/tests/test_set_fleet_data.py @@ -1,10 +1,13 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import copy from cuopt_server.tests.utils.utils import cuoptproc # noqa from cuopt_server.tests.utils.utils import RequestClient +from cuopt_server.utils.routing.validation_fleet_data import ( + validate_fleet_data, +) client = RequestClient() @@ -62,6 +65,117 @@ # FLEET DATA TESTING +# Test validate_fleet_data rejects duplicate vehicle_ids (no server required) +def test_validate_fleet_data_duplicate_vehicle_ids(): + vehicle_locations = [[0, 0], [0, 0], [0, 0]] + vehicle_ids_dup = ["Truck 1", "Truck 1", "Truck 1"] + + is_valid, msg = validate_fleet_data( + vehicle_ids=vehicle_ids_dup, + vehicle_locations=vehicle_locations, + capacities=None, + vehicle_time_windows=None, + vehicle_breaks=None, + vehicle_break_time_windows=None, + vehicle_break_durations=None, + vehicle_break_locations=None, + vehicle_types=None, + vehicle_types_dict={}, + vehicle_order_match=None, + skip_first_trips=None, + drop_return_trips=None, + min_vehicles=None, + vehicle_max_costs=None, + vehicle_max_times=None, + vehicle_fixed_costs=None, + ) + assert is_valid is False + assert "unique" in msg.lower() and "duplicate" in msg.lower() + + +# Test validate_fleet_data accepts unique vehicle_ids (no server required) +def test_validate_fleet_data_unique_vehicle_ids(): + vehicle_locations = [[0, 0], [0, 0]] + vehicle_ids_unique = ["Truck 1", "Truck 2"] + + is_valid, msg = validate_fleet_data( + vehicle_ids=vehicle_ids_unique, + vehicle_locations=vehicle_locations, + capacities=None, + vehicle_time_windows=None, + vehicle_breaks=None, + vehicle_break_time_windows=None, + vehicle_break_durations=None, + vehicle_break_locations=None, + vehicle_types=None, + vehicle_types_dict={}, + vehicle_order_match=None, + skip_first_trips=None, + drop_return_trips=None, + min_vehicles=None, + vehicle_max_costs=None, + vehicle_max_times=None, + vehicle_fixed_costs=None, + ) + assert is_valid is True + assert msg == "Valid Fleet Data" + + +# Test validate_fleet_data with vehicle_ids=None passes (no server required) +def test_validate_fleet_data_vehicle_ids_none(): + vehicle_locations = [[0, 0], [0, 0]] + + is_valid, msg = validate_fleet_data( + vehicle_ids=None, + vehicle_locations=vehicle_locations, + capacities=None, + vehicle_time_windows=None, + vehicle_breaks=None, + vehicle_break_time_windows=None, + vehicle_break_durations=None, + vehicle_break_locations=None, + vehicle_types=None, + vehicle_types_dict={}, + vehicle_order_match=None, + skip_first_trips=None, + drop_return_trips=None, + min_vehicles=None, + vehicle_max_costs=None, + vehicle_max_times=None, + vehicle_fixed_costs=None, + ) + assert is_valid is True + assert msg == "Valid Fleet Data" + + +# Test validate_fleet_data with single vehicle (no server required) +def test_validate_fleet_data_single_vehicle(): + vehicle_locations = [[0, 0]] + vehicle_ids_single = ["Truck 1"] + + is_valid, msg = validate_fleet_data( + vehicle_ids=vehicle_ids_single, + vehicle_locations=vehicle_locations, + capacities=None, + vehicle_time_windows=None, + vehicle_breaks=None, + vehicle_break_time_windows=None, + vehicle_break_durations=None, + vehicle_break_locations=None, + vehicle_types=None, + vehicle_types_dict={}, + vehicle_order_match=None, + skip_first_trips=None, + drop_return_trips=None, + min_vehicles=None, + vehicle_max_costs=None, + vehicle_max_times=None, + vehicle_fixed_costs=None, + ) + assert is_valid is True + assert msg == "Valid Fleet Data" + + # Test validation error when multiple cost matrices set without vehicle types def test_invalid_vehicle_types(cuoptproc): # noqa matrix_data = { @@ -101,6 +215,38 @@ def test_valid_full_set_fleet_data(cuoptproc): # noqa assert response_set.status_code == 200 +# Testing duplicate vehicle_ids rejected (issue #903) +def test_duplicate_vehicle_ids_set_fleet_data(cuoptproc): # noqa + test_data = copy.deepcopy(valid_data) + test_data["fleet_data"]["vehicle_ids"] = [ + "veh-1", + "veh-2", + "veh-1", + "veh-4", + ] + + response_set = client.post("/cuopt/request", json=test_data) + assert response_set.status_code == 400 + assert response_set.json() == { + "error": "vehicle_ids must be unique; duplicates are not allowed", + "error_result": True, + } + + +# Testing valid with unique vehicle_ids +def test_valid_unique_vehicle_ids_set_fleet_data(cuoptproc): # noqa + test_data = copy.deepcopy(valid_data) + test_data["fleet_data"]["vehicle_ids"] = [ + "veh-1", + "veh-2", + "veh-3", + "veh-4", + ] + + response_set = client.post("/cuopt/request", json=test_data) + assert response_set.status_code == 200 + + # Testing valid with minimal required parameters def test_valid_minimal_set_fleet_data(cuoptproc): # noqa test_data = copy.deepcopy(valid_data) diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py index 6eeaafbde5..87524f8715 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py @@ -304,6 +304,9 @@ def create_solution(sol): milp_stats = get_if_attribute_is_valid_else_none( sol.get_milp_stats ) + pdlpwarmstart_data = get_if_attribute_is_valid_else_none( + sol.get_pdlp_warm_start_data + ) solution["problem_category"] = sol.get_problem_category().name solution["primal_solution"] = primal_solution solution["dual_solution"] = dual_solution @@ -318,8 +321,9 @@ def create_solution(sol): solution["vars"] = sol.get_vars() solution["lp_statistics"] = {} if lp_stats is None else lp_stats solution["reduced_cost"] = reduced_cost + solution["pdlpwarmstart_data"] = extract_pdlpwarmstart_data( - sol.get_pdlp_warm_start_data() + pdlpwarmstart_data ) solution["milp_statistics"] = ( {} if milp_stats is None else milp_stats diff --git a/python/cuopt_server/cuopt_server/utils/process_handler.py b/python/cuopt_server/cuopt_server/utils/process_handler.py index 2a7d9e7969..d3d54d7009 100644 --- a/python/cuopt_server/cuopt_server/utils/process_handler.py +++ b/python/cuopt_server/cuopt_server/utils/process_handler.py @@ -1,10 +1,10 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import logging import queue import time -from multiprocessing import Event, Process +from multiprocessing import get_context import psutil @@ -70,11 +70,12 @@ def terminate(job_queue, results_queue, abort_queue, signame): def create_process(app_exit, job_queue, results_queue, abort_list, gpu_id): global s_procs - complete = Event() + ctx = get_context("fork") + complete = ctx.Event() from cuopt_server.utils import solver - s = Process( + s = ctx.Process( target=solver.process_async_solve, args=( app_exit, diff --git a/python/cuopt_server/cuopt_server/utils/routing/data_definition.py b/python/cuopt_server/cuopt_server/utils/routing/data_definition.py index 4a0abda553..ba1b5e4e52 100644 --- a/python/cuopt_server/cuopt_server/utils/routing/data_definition.py +++ b/python/cuopt_server/cuopt_server/utils/routing/data_definition.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 import copy @@ -232,7 +232,10 @@ class FleetData(StrictModel): vehicle_ids: Optional[List[str]] = Field( default=None, examples=[["veh-1", "veh-2"]], - description=("List of the vehicle ids or names provided as a string."), + description=( + "List of the vehicle ids or names provided as a string. " + "Must be unique; duplicates are not allowed." + ), ) capacities: Optional[List[List[int]]] = Field( default=None, diff --git a/python/cuopt_server/cuopt_server/utils/routing/validation_fleet_data.py b/python/cuopt_server/cuopt_server/utils/routing/validation_fleet_data.py index 5a505e8ef6..ff94ccfa79 100644 --- a/python/cuopt_server/cuopt_server/utils/routing/validation_fleet_data.py +++ b/python/cuopt_server/cuopt_server/utils/routing/validation_fleet_data.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 @@ -85,6 +85,11 @@ def validate_fleet_data( if vehicle_ids is not None: fleet_length_check_array.append(len(vehicle_ids)) + if len(vehicle_ids) != len(set(vehicle_ids)): + return ( + False, + "vehicle_ids must be unique; duplicates are not allowed", + ) if capacities is not None: fleet_length_check_array.append(len(capacities[0])) diff --git a/python/cuopt_server/pyproject.toml b/python/cuopt_server/pyproject.toml index 0a39531b89..d24cfcbd77 100644 --- a/python/cuopt_server/pyproject.toml +++ b/python/cuopt_server/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "fastapi", "jsonref==1.1.0", "msgpack-numpy==0.4.8", - "msgpack==1.1.0", + "msgpack==1.1.2", "numpy>=1.23.5,<3.0", "pandas>=2.0", "psutil>=6.0.0", @@ -38,13 +38,14 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] [project.optional-dependencies] test = [ "jsonref==1.1.0", "msgpack-numpy==0.4.8", - "msgpack==1.1.0", + "msgpack==1.1.2", "pexpect", "pytest-cov", "pytest<9.0", diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml index fabd3da0df..2507971a0f 100644 --- a/python/libcuopt/pyproject.toml +++ b/python/libcuopt/pyproject.toml @@ -27,6 +27,7 @@ classifiers = [ "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", ] dependencies = [ "cuopt-mps-parser==26.4.*,>=0.0.0a0", diff --git a/.github/skills/cuopt-developer/SKILL.md b/skills/cuopt-developer/SKILL.md similarity index 99% rename from .github/skills/cuopt-developer/SKILL.md rename to skills/cuopt-developer/SKILL.md index 8e73995b58..12419153ac 100644 --- a/.github/skills/cuopt-developer/SKILL.md +++ b/skills/cuopt-developer/SKILL.md @@ -1,5 +1,6 @@ --- name: cuopt-developer +version: "26.04.00" description: Contribute to NVIDIA cuOpt codebase including C++/CUDA, Python, server, docs, and CI. Use when the user wants to modify solver internals, add features, submit PRs, or understand the codebase architecture. --- diff --git a/skills/cuopt-installation-api-c/SKILL.md b/skills/cuopt-installation-api-c/SKILL.md new file mode 100644 index 0000000000..747382e3c7 --- /dev/null +++ b/skills/cuopt-installation-api-c/SKILL.md @@ -0,0 +1,32 @@ +--- +name: cuopt-installation-api-c +version: "26.04.00" +description: Install cuOpt for C — conda, locate lib/headers, verification. Use when the user is installing or verifying the C API. Standalone; no common skill. +--- + +# cuOpt Installation — C API (user) + +Install cuOpt to *use* it from C. Standalone skill (no separate common). + +## System requirements + +- **GPU**: NVIDIA Compute Capability ≥ 7.0 (Volta+). CUDA 12.x or 13.x. +- **Driver**: Compatible NVIDIA driver. Python and C are separate installables. + +## conda (C / libcuopt) + +```bash +conda install -c rapidsai -c conda-forge -c nvidia cuopt +# libcuopt is provided by the same channel; Python and C are separate packages. +``` + +## Verify C API + +```bash +find $CONDA_PREFIX -name "cuopt_c.h" +find $CONDA_PREFIX -name "libcuopt.so" +``` + +## Examples + +- [verification_examples.md](resources/verification_examples.md) — C API verification diff --git a/.github/skills/cuopt-installation/resources/verification_examples.md b/skills/cuopt-installation-api-c/resources/verification_examples.md similarity index 98% rename from .github/skills/cuopt-installation/resources/verification_examples.md rename to skills/cuopt-installation-api-c/resources/verification_examples.md index bd84de80ba..83628437d7 100644 --- a/.github/skills/cuopt-installation/resources/verification_examples.md +++ b/skills/cuopt-installation-api-c/resources/verification_examples.md @@ -17,7 +17,7 @@ print("DataModel created - GPU access OK") import cudf cost_matrix = cudf.DataFrame([[0,1,2],[1,0,1],[2,1,0]], dtype="float32") dm.add_cost_matrix(cost_matrix) -dm.set_order_locations(cudf.Series([1, 2])) +dm.set_order_locations(cudf.Series([1, 2], dtype="int32")) solution = routing.Solve(dm, routing.SolverSettings()) print(f"Solve status: {solution.get_status()}") diff --git a/skills/cuopt-installation-api-python/SKILL.md b/skills/cuopt-installation-api-python/SKILL.md new file mode 100644 index 0000000000..a3d7a5e5d2 --- /dev/null +++ b/skills/cuopt-installation-api-python/SKILL.md @@ -0,0 +1,73 @@ +--- +name: cuopt-installation-api-python +version: "26.04.00" +description: Install cuOpt for Python — pip, conda, Docker, verification. Use when the user is installing or verifying the Python API. Standalone; no common skill. +--- + +# cuOpt Installation — Python (user) + +Install cuOpt to *use* it from Python. Standalone skill (no separate common). + +## System requirements + +- **GPU**: NVIDIA Compute Capability ≥ 7.0 (Volta+). CUDA 12.x or 13.x; match package (cuopt-cu12 / cuopt-cu13). +- **Driver**: Compatible NVIDIA driver. + +## pip (Python) + +**Choose one** — do not run both. The second install would override the first and can cause CUDA/package mismatch. + +- **CUDA 13.x:** + ```bash + pip install --extra-index-url=https://pypi.nvidia.com cuopt-cu13 + ``` +- **CUDA 12.x:** + ```bash + pip install --extra-index-url=https://pypi.nvidia.com 'cuopt-cu12==26.2.*' + ``` + +## pip: Server + Client + +```bash +pip install --extra-index-url=https://pypi.nvidia.com cuopt-server-cu12 cuopt-sh-client +``` + +## conda + +```bash +conda install -c rapidsai -c conda-forge -c nvidia cuopt +conda install -c rapidsai -c conda-forge -c nvidia cuopt-server cuopt-sh-client +``` + +## Docker + +```bash +docker pull nvidia/cuopt:latest-cuda12.9-py3.13 +docker run --gpus all -it --rm -p 8000:8000 nvidia/cuopt:latest-cuda12.9-py3.13 +``` + +## Verify Python + +```python +import cuopt +print(cuopt.__version__) +from cuopt import routing +dm = routing.DataModel(n_locations=3, n_fleet=1, n_orders=2) +``` + +## Verify Server + +```bash +python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000 & +sleep 5 +curl -s http://localhost:8000/cuopt/health | jq . +``` + +## Common Issues + +- No module 'cuopt' → check `pip list | grep cuopt`, `which python`, reinstall with correct index. +- CUDA not available → `nvidia-smi`, `nvcc --version`, match cuopt-cu12 vs cuopt-cu13 to CUDA. + +## Examples + +- [verification_examples.md](resources/verification_examples.md) — Python and server verification diff --git a/skills/cuopt-installation-api-python/resources/verification_examples.md b/skills/cuopt-installation-api-python/resources/verification_examples.md new file mode 100644 index 0000000000..83628437d7 --- /dev/null +++ b/skills/cuopt-installation-api-python/resources/verification_examples.md @@ -0,0 +1,172 @@ +# Installation: Verification Examples + +## Verify Python Installation + +```python +# Basic import test +import cuopt +print(f"cuOpt version: {cuopt.__version__}") + +# GPU access test +from cuopt import routing + +dm = routing.DataModel(n_locations=3, n_fleet=1, n_orders=2) +print("DataModel created - GPU access OK") + +# Quick solve test +import cudf +cost_matrix = cudf.DataFrame([[0,1,2],[1,0,1],[2,1,0]], dtype="float32") +dm.add_cost_matrix(cost_matrix) +dm.set_order_locations(cudf.Series([1, 2], dtype="int32")) + +solution = routing.Solve(dm, routing.SolverSettings()) +print(f"Solve status: {solution.get_status()}") +print("cuOpt installation verified!") +``` + +## Verify LP/MILP + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +problem = Problem("Test") +x = problem.addVariable(lb=0, vtype=CONTINUOUS, name="x") +problem.setObjective(x, sense=MAXIMIZE) +problem.addConstraint(x <= 10) + +problem.solve(SolverSettings()) +print(f"Status: {problem.Status.name}") +print(f"x = {x.getValue()}") +print("LP/MILP working!") +``` + +## Verify Server Installation + +```bash +# Start server in background +python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000 & +SERVER_PID=$! + +# Wait for startup +sleep 5 + +# Health check +curl -s http://localhost:8000/cuopt/health | jq . + +# Quick routing test +curl -s -X POST "http://localhost:8000/cuopt/request" \ + -H "Content-Type: application/json" \ + -H "CLIENT-VERSION: custom" \ + -d '{ + "cost_matrix_data": {"data": {"0": [[0,1],[1,0]]}}, + "travel_time_matrix_data": {"data": {"0": [[0,1],[1,0]]}}, + "task_data": {"task_locations": [1]}, + "fleet_data": {"vehicle_locations": [[0,0]], "capacities": [[10]]}, + "solver_config": {"time_limit": 1} + }' | jq . + +# Stop server +kill $SERVER_PID +``` + +## Verify C API Installation + +```bash +# Find header +echo "Looking for cuopt_c.h..." +find ${CONDA_PREFIX:-/usr} -name "cuopt_c.h" 2>/dev/null + +# Find library +echo "Looking for libcuopt.so..." +find ${CONDA_PREFIX:-/usr} -name "libcuopt.so" 2>/dev/null + +# Test compile (if gcc available) +cat > /tmp/test_cuopt.c << 'EOF' +#include +#include +int main() { + printf("cuopt_c.h found and compilable\n"); + return 0; +} +EOF + +gcc -I${CONDA_PREFIX}/include -c /tmp/test_cuopt.c -o /tmp/test_cuopt.o && \ + echo "C API headers OK" || echo "C API headers not found" +``` + +## Check System Requirements + +```bash +# GPU check +nvidia-smi + +# CUDA version +nvcc --version + +# Compute capability (need >= 7.0) +nvidia-smi --query-gpu=compute_cap --format=csv,noheader + +# Python version +python --version + +# Available memory +nvidia-smi --query-gpu=memory.total,memory.free --format=csv +``` + +## Check Package Versions + +```python +import importlib.metadata + +packages = ["cuopt-cu12", "cuopt-cu13", "cuopt-server-cu12", "cuopt-server-cu13", "cuopt-sh-client"] +for pkg in packages: + try: + version = importlib.metadata.version(pkg) + print(f"{pkg}: {version}") + except importlib.metadata.PackageNotFoundError: + pass +``` + +## Troubleshooting Commands + +```bash +# Check if cuopt is installed +pip list | grep -i cuopt + +# Check conda packages +conda list | grep -i cuopt + +# Check CUDA runtime +python -c "import torch; print(torch.cuda.is_available())" 2>/dev/null || echo "PyTorch not installed" + +# Check cudf (routing dependency) +python -c "import cudf; print(f'cudf: {cudf.__version__}')" + +# Check rmm (memory manager) +python -c "import rmm; print(f'rmm: {rmm.__version__}')" +``` + +## Docker Verification + +```bash +# Pull and run +docker run --gpus all --rm nvidia/cuopt:latest-cuda12.9-py3.13 python -c " +import cuopt +print(f'cuOpt version: {cuopt.__version__}') +from cuopt import routing +dm = routing.DataModel(n_locations=3, n_fleet=1, n_orders=2) +print('GPU access OK') +" +``` + +--- + +## Additional References + +| Topic | Resource | +|-------|----------| +| Installation Guide | [NVIDIA cuOpt Docs](https://docs.nvidia.com/cuopt/user-guide/latest/installation.html) | +| System Requirements | [cuOpt Requirements](https://docs.nvidia.com/cuopt/user-guide/latest/requirements.html) | +| Docker Images | See `ci/docker/` in this repo | +| Conda Recipes | See `conda/recipes/` in this repo | diff --git a/skills/cuopt-installation-common/SKILL.md b/skills/cuopt-installation-common/SKILL.md new file mode 100644 index 0000000000..6ceb9f9000 --- /dev/null +++ b/skills/cuopt-installation-common/SKILL.md @@ -0,0 +1,29 @@ +--- +name: cuopt-installation-common +version: "26.04.00" +description: Install cuOpt — system and environment requirements only. Domain concepts; no install commands or interface guidance. +--- + +# cuOpt Installation (common) + +Domain concepts for installing and running cuOpt. No install commands or interface details here. + +## System requirements + +- **GPU**: NVIDIA with Compute Capability ≥ 7.0 (Volta or newer). Examples: V100, A100, H100, RTX 20xx/30xx/40xx. Not supported: GTX 10xx (Pascal). +- **CUDA**: 12.x or 13.x. Package and runtime must match (e.g. cuopt built for CUDA 12 with a CUDA 12 driver). +- **Driver**: Compatible NVIDIA driver for the CUDA version in use. + +## Required questions (environment) + +Ask these if not already clear: + +1. **Environment** — Local machine with GPU, cloud instance, Docker/Kubernetes, or no GPU (need remote/server)? +2. **CUDA version** — What is installed or planned? (e.g. `nvcc --version`, `nvidia-smi`.) +3. **Usage** — In-process (library/API) vs server (REST)? Which language or runtime (Python, C, server)? +4. **Package manager** — pip, conda, or Docker preferred? + +## Notes + +- Python API and C API are separate installables; having one does not provide the other. +- Server deployment typically uses Docker or a dedicated server package; client can be any language. diff --git a/skills/cuopt-installation-developer/SKILL.md b/skills/cuopt-installation-developer/SKILL.md new file mode 100644 index 0000000000..a002498853 --- /dev/null +++ b/skills/cuopt-installation-developer/SKILL.md @@ -0,0 +1,36 @@ +--- +name: cuopt-installation-developer +version: "26.04.00" +description: Developer installation — build cuOpt from source, run tests. Use when the user wants to set up a dev environment to contribute or modify cuOpt. +--- + +# cuOpt Installation — Developer + +Set up an environment to **build cuOpt from source** and run tests. For contribution behavior and PRs, see the developer skill after the build works. + +## When to use this skill + +- User wants to *build* cuOpt (clone, build deps, build, tests). +- Not for *using* cuOpt (pip/conda) — use the user installation skill instead. + +## Required questions (environment) + +Ask these if not already clear: + +1. **OS and GPU** — Linux? Which CUDA version (e.g. 12.x)? +2. **Goal** — Contributing upstream, or local fork/modification? +3. **Component** — C++/CUDA core, Python bindings, server, docs, or CI? + +## Typical setup (conceptual) + +1. **Clone** the cuOpt repo (and submodules if any). +2. **Build dependencies** — CUDA toolkit, compiler, CMake; see repo docs for the canonical list. +3. **Configure and build** — e.g. top-level `build.sh` or CMake; Debug/Release. +4. **Run tests** — e.g. `pytest` for Python, `ctest` or project test runner for C++. +5. **Optional** — Python env for bindings; pre-commit or style checks. + +Use the repository’s own documentation (README, CONTRIBUTING, or docs/) for exact commands and versions. + +## After setup + +Once the developer can build and run tests, use **cuopt-developer** for behavior rules, code patterns, and contribution workflow (DCO, PRs). diff --git a/skills/cuopt-lp-milp-api-c/SKILL.md b/skills/cuopt-lp-milp-api-c/SKILL.md new file mode 100644 index 0000000000..53df3de63e --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/SKILL.md @@ -0,0 +1,57 @@ +--- +name: cuopt-lp-milp-api-c +version: "26.04.00" +description: LP and MILP with cuOpt — C API only. Use when the user is embedding LP/MILP in C/C++. +--- + +# cuOpt LP/MILP — C API + +Confirm problem type and formulation (variables, objective, constraints, variable types) before coding. + +This skill is **C only**. + +## Quick Reference: C API + +```c +#include + +// CSR format for constraints +cuopt_int_t row_offsets[] = {0, 2, 4}; +cuopt_int_t col_indices[] = {0, 1, 0, 1}; +cuopt_float_t values[] = {2.0, 3.0, 4.0, 2.0}; +char var_types[] = {CUOPT_CONTINUOUS, CUOPT_INTEGER}; + +cuOptCreateRangedProblem( + num_constraints, num_variables, CUOPT_MINIMIZE, + 0.0, objective_coefficients, + row_offsets, col_indices, values, + constraint_lower, constraint_upper, + var_lower, var_upper, var_types, + &problem +); +cuOptSolve(problem, settings, &solution); +cuOptGetObjectiveValue(solution, &obj_value); +``` + +## Debugging (MPS / C) + +**MPS parsing:** Required sections in order: NAME, ROWS, COLUMNS, RHS, (optional) BOUNDS, ENDATA. Integer markers: `'MARKER'`, `'INTORG'`, `'INTEND'`. + +**OOM or slow:** Check problem size (variables, constraints); use sparse matrix; set time limit and gap tolerance. + +## Examples + +- [examples.md](resources/examples.md) — LP/MILP with build instructions +- [assets/README.md](assets/README.md) — Build commands for all reference code below +- [lp_basic](assets/lp_basic/) — Simple LP: create problem, solve, get solution +- [lp_duals](assets/lp_duals/) — Dual values and reduced costs +- [lp_warmstart](assets/lp_warmstart/) — PDLP warmstart (see README) +- [milp_basic](assets/milp_basic/) — Simple MILP with integer variable +- [milp_production_planning](assets/milp_production_planning/) — Production planning with resource constraints +- [mps_solver](assets/mps_solver/) — Solve from MPS file via `cuOptReadProblem` + +For **CLI** (MPS files), use `cuopt_cli` and product docs. + +## Escalate + +If the problem is quadratic (squared or cross terms in the objective), use QP. For contribution or build-from-source, use product or repo documentation. diff --git a/skills/cuopt-lp-milp-api-c/assets/README.md b/skills/cuopt-lp-milp-api-c/assets/README.md new file mode 100644 index 0000000000..e354988da1 --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/README.md @@ -0,0 +1,33 @@ +# Assets — reference C examples + +LP/MILP C API reference implementations. Use as reference when building new applications; do not edit in place. Build requires cuOpt installed (include and lib paths set). + +| Example | Type | Description | +|---------|------|-------------| +| [lp_basic](lp_basic/) | LP | Simple LP: create problem, solve, get solution | +| [lp_duals](lp_duals/) | LP | Dual values and reduced costs | +| [lp_warmstart](lp_warmstart/) | LP | PDLP warmstart (see README) | +| [milp_basic](milp_basic/) | MILP | Simple MILP with integer variable | +| [milp_production_planning](milp_production_planning/) | MILP | Production planning with resource constraints | +| [mps_solver](mps_solver/) | LP/MILP | Solve from MPS file via `cuOptReadProblem` | + +## Build and run + +Set include and library paths, then build and run. + +**Using conda:** Activate your cuOpt env first (`conda activate cuopt`), then: + +```bash +# Paths from active conda env (CONDA_PREFIX is set when env is activated) +export INCLUDE_PATH="${CONDA_PREFIX}/include" +export LIB_PATH="${CONDA_PREFIX}/lib" +export LD_LIBRARY_PATH="${LIB_PATH}:${LD_LIBRARY_PATH}" + +# Build and run (from this assets/ directory) — example: lp_basic +gcc -I"${INCLUDE_PATH}" -L"${LIB_PATH}" -o lp_basic/lp_simple lp_basic/lp_simple.c -lcuopt +./lp_basic/lp_simple +``` + +For the other examples, use the same pattern (e.g. `lp_duals/lp_duals.c` → `lp_duals/lp_duals`). `mps_solver` takes an MPS file path: `./mps_solver mps_solver/data/sample.mps`. + +Without conda, set `INCLUDE_PATH` and `LIB_PATH` to your cuOpt include and lib directories, then use the same `gcc` and `LD_LIBRARY_PATH` as above. Each subdirectory README has a one-line build/run for that example. diff --git a/skills/cuopt-lp-milp-api-c/assets/lp_basic/README.md b/skills/cuopt-lp-milp-api-c/assets/lp_basic/README.md new file mode 100644 index 0000000000..010666240f --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/lp_basic/README.md @@ -0,0 +1,15 @@ +# Simple LP (C API) + +Minimize `-0.2*x1 + 0.1*x2` subject to: +- `3*x1 + 4*x2 <= 5.4` +- `2.7*x1 + 10.1*x2 <= 4.9` +- `x1, x2 >= 0` + +**Build:** From repo root or skill dir, with cuOpt on `INCLUDE_PATH` and `LIB_PATH`: + +```bash +gcc -I${INCLUDE_PATH} -L${LIB_PATH} -o lp_simple lp_simple.c -lcuopt +LD_LIBRARY_PATH=${LIB_PATH}:$LD_LIBRARY_PATH ./lp_simple +``` + +**See also:** [resources/examples.md](../../resources/examples.md) for parameter constants and more examples. diff --git a/skills/cuopt-lp-milp-api-c/assets/lp_basic/lp_simple.c b/skills/cuopt-lp-milp-api-c/assets/lp_basic/lp_simple.c new file mode 100644 index 0000000000..a21e17ab7b --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/lp_basic/lp_simple.c @@ -0,0 +1,109 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * Simple LP (C API): minimize -0.2*x1 + 0.1*x2 + * subject to 3*x1 + 4*x2 <= 5.4, 2.7*x1 + 10.1*x2 <= 4.9, x1,x2 >= 0 + */ +#include +#include +#include +#include + +int main(void) { + cuOptOptimizationProblem problem = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + + cuopt_int_t num_variables = 2; + cuopt_int_t num_constraints = 2; + + cuopt_int_t row_offsets[] = {0, 2, 4}; + cuopt_int_t column_indices[] = {0, 1, 0, 1}; + cuopt_float_t values[] = {3.0, 4.0, 2.7, 10.1}; + + cuopt_float_t objective_coefficients[] = {-0.2, 0.1}; + cuopt_float_t constraint_upper_bounds[] = {5.4, 4.9}; + cuopt_float_t constraint_lower_bounds[] = {-CUOPT_INFINITY, -CUOPT_INFINITY}; + + cuopt_float_t var_lower_bounds[] = {0.0, 0.0}; + cuopt_float_t var_upper_bounds[] = {CUOPT_INFINITY, CUOPT_INFINITY}; + char variable_types[] = {CUOPT_CONTINUOUS, CUOPT_CONTINUOUS}; + + cuopt_int_t status = cuOptCreateRangedProblem( + num_constraints, num_variables, CUOPT_MINIMIZE, 0.0, + objective_coefficients, + row_offsets, column_indices, values, + constraint_lower_bounds, constraint_upper_bounds, + var_lower_bounds, var_upper_bounds, + variable_types, &problem + ); + if (status != CUOPT_SUCCESS) { + printf("Error creating problem: %d\n", status); + return 1; + } + + status = cuOptCreateSolverSettings(&settings); + if (status != CUOPT_SUCCESS) { + printf("Error creating solver settings: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, 0.0001); + if (status != CUOPT_SUCCESS) { + printf("Error setting primal tolerance: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_TIME_LIMIT, 60.0); + if (status != CUOPT_SUCCESS) { + printf("Error setting time limit: %d\n", status); + goto cleanup; + } + + status = cuOptSolve(problem, settings, &solution); + if (status != CUOPT_SUCCESS) { + printf("Error solving: %d\n", status); + goto cleanup; + } + + cuopt_float_t time, objective_value; + cuopt_int_t termination_status; + status = cuOptGetSolveTime(solution, &time); + if (status != CUOPT_SUCCESS) { + printf("Error getting solve time: %d\n", status); + goto cleanup; + } + status = cuOptGetTerminationStatus(solution, &termination_status); + if (status != CUOPT_SUCCESS) { + printf("Error getting termination status: %d\n", status); + goto cleanup; + } + status = cuOptGetObjectiveValue(solution, &objective_value); + if (status != CUOPT_SUCCESS) { + printf("Error getting objective value: %d\n", status); + goto cleanup; + } + + printf("Status: %d\n", termination_status); + printf("Time: %f s\n", time); + printf("Objective: %f\n", objective_value); + + cuopt_float_t *sol = malloc((size_t)num_variables * sizeof(cuopt_float_t)); + if (sol) { + status = cuOptGetPrimalSolution(solution, sol); + if (status != CUOPT_SUCCESS) { + printf("Error getting primal solution: %d\n", status); + free(sol); + goto cleanup; + } + printf("x1 = %f, x2 = %f\n", sol[0], sol[1]); + free(sol); + } + +cleanup: + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); + return (status == CUOPT_SUCCESS) ? 0 : 1; +} diff --git a/skills/cuopt-lp-milp-api-c/assets/lp_duals/README.md b/skills/cuopt-lp-milp-api-c/assets/lp_duals/README.md new file mode 100644 index 0000000000..78f275fc63 --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/lp_duals/README.md @@ -0,0 +1,14 @@ +# LP duals and reduced costs (C API) + +Retrieve dual values (shadow prices) and reduced costs after solving an LP. + +**Problem:** Minimize 3x + 2y + 5z subject to x + y + z = 4, 2x + y + z = 5, x, y, z ≥ 0. + +**Build:** With cuOpt on `INCLUDE_PATH` and `LIB_PATH`: + +```bash +gcc -I${INCLUDE_PATH} -L${LIB_PATH} -o lp_duals lp_duals.c -lcuopt +LD_LIBRARY_PATH=${LIB_PATH}:$LD_LIBRARY_PATH ./lp_duals +``` + +**See also:** [resources/examples.md](../../resources/examples.md) for full parameter reference. diff --git a/skills/cuopt-lp-milp-api-c/assets/lp_duals/lp_duals.c b/skills/cuopt-lp-milp-api-c/assets/lp_duals/lp_duals.c new file mode 100644 index 0000000000..a92262d18a --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/lp_duals/lp_duals.c @@ -0,0 +1,115 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * LP with dual values and reduced costs (C API). + * Problem: Minimize 3x + 2y + 5z subject to x + y + z = 4, 2x + y + z = 5, x,y,z >= 0. + */ +#include +#include +#include +#include + +int main(void) { + cuOptOptimizationProblem problem = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + + const cuopt_int_t num_variables = 3; + const cuopt_int_t num_constraints = 2; + + /* Constraint matrix CSR: row0 1*x+1*y+1*z, row1 2*x+1*y+1*z */ + cuopt_int_t row_offsets[] = {0, 3, 6}; + cuopt_int_t column_indices[] = {0, 1, 2, 0, 1, 2}; + cuopt_float_t values[] = {1.0, 1.0, 1.0, 2.0, 1.0, 1.0}; + + cuopt_float_t objective_coefficients[] = {3.0, 2.0, 5.0}; + cuopt_float_t constraint_lower[] = {4.0, 5.0}; + cuopt_float_t constraint_upper[] = {4.0, 5.0}; + cuopt_float_t var_lower[] = {0.0, 0.0, 0.0}; + cuopt_float_t var_upper[] = {CUOPT_INFINITY, CUOPT_INFINITY, CUOPT_INFINITY}; + char variable_types[] = {CUOPT_CONTINUOUS, CUOPT_CONTINUOUS, CUOPT_CONTINUOUS}; + + cuopt_int_t status = cuOptCreateRangedProblem( + num_constraints, num_variables, CUOPT_MINIMIZE, 0.0, + objective_coefficients, + row_offsets, column_indices, values, + constraint_lower, constraint_upper, + var_lower, var_upper, + variable_types, &problem + ); + if (status != CUOPT_SUCCESS) { + printf("Error creating problem: %d\n", status); + return 1; + } + + status = cuOptCreateSolverSettings(&settings); + if (status != CUOPT_SUCCESS) { + printf("Error creating solver settings: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_ABSOLUTE_PRIMAL_TOLERANCE, 0.0001); + if (status != CUOPT_SUCCESS) { + printf("Error setting primal tolerance: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_TIME_LIMIT, 60.0); + if (status != CUOPT_SUCCESS) { + printf("Error setting time limit: %d\n", status); + goto cleanup; + } + + status = cuOptSolve(problem, settings, &solution); + if (status != CUOPT_SUCCESS) { + printf("Error solving: %d\n", status); + goto cleanup; + } + + cuopt_float_t objective_value; + status = cuOptGetObjectiveValue(solution, &objective_value); + if (status != CUOPT_SUCCESS) { + printf("Error getting objective value: %d\n", status); + goto cleanup; + } + printf("Objective: %f\n", objective_value); + + cuopt_float_t *primal = malloc((size_t)num_variables * sizeof(cuopt_float_t)); + if (primal) { + status = cuOptGetPrimalSolution(solution, primal); + if (status != CUOPT_SUCCESS) { + printf("Error getting primal solution: %d\n", status); + free(primal); + goto cleanup; + } + printf("x = %f, y = %f, z = %f\n", primal[0], primal[1], primal[2]); + free(primal); + } + + cuopt_float_t *dual = malloc((size_t)num_constraints * sizeof(cuopt_float_t)); + if (dual) { + status = cuOptGetDualSolution(solution, dual); + if (status == CUOPT_SUCCESS) { + printf("Constraint c1 DualValue = %f\n", dual[0]); + printf("Constraint c2 DualValue = %f\n", dual[1]); + } + free(dual); + } + + cuopt_float_t *reduced = malloc((size_t)num_variables * sizeof(cuopt_float_t)); + if (reduced) { + status = cuOptGetReducedCosts(solution, reduced); + if (status == CUOPT_SUCCESS) { + printf("x ReducedCost = %f, y ReducedCost = %f, z ReducedCost = %f\n", + reduced[0], reduced[1], reduced[2]); + } + free(reduced); + } + +cleanup: + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); + return (status == CUOPT_SUCCESS) ? 0 : 1; +} diff --git a/skills/cuopt-lp-milp-api-c/assets/lp_warmstart/README.md b/skills/cuopt-lp-milp-api-c/assets/lp_warmstart/README.md new file mode 100644 index 0000000000..1e254b75ea --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/lp_warmstart/README.md @@ -0,0 +1,5 @@ +# LP PDLP warmstart (C API) + +PDLP warmstart: use solution data from a solved LP to solve a similar problem faster. LP only (not MILP). + +Warmstart is not demonstrated in these C assets. See repo docs (e.g. `docs/cuopt/source/cuopt-c/lp-qp-milp/`) and headers for C-level warmstart support. diff --git a/skills/cuopt-lp-milp-api-c/assets/milp_basic/README.md b/skills/cuopt-lp-milp-api-c/assets/milp_basic/README.md new file mode 100644 index 0000000000..e3faa7a26e --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/milp_basic/README.md @@ -0,0 +1,12 @@ +# Simple MILP (C API) + +Same as LP but `x1` is integer. Demonstrates variable types and MIP parameters. + +**Build:** With cuOpt on `INCLUDE_PATH` and `LIB_PATH`: + +```bash +gcc -I${INCLUDE_PATH} -L${LIB_PATH} -o milp_simple milp_simple.c -lcuopt +LD_LIBRARY_PATH=${LIB_PATH}:$LD_LIBRARY_PATH ./milp_simple +``` + +**See also:** [resources/examples.md](../../resources/examples.md) for full parameter reference. diff --git a/skills/cuopt-lp-milp-api-c/assets/milp_basic/milp_simple.c b/skills/cuopt-lp-milp-api-c/assets/milp_basic/milp_simple.c new file mode 100644 index 0000000000..585b961c3e --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/milp_basic/milp_simple.c @@ -0,0 +1,102 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * Simple MILP (C API): same as LP but x1 is integer + */ +#include +#include +#include +#include + +int main(void) { + cuOptOptimizationProblem problem = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + + cuopt_int_t num_variables = 2; + cuopt_int_t num_constraints = 2; + + cuopt_int_t row_offsets[] = {0, 2, 4}; + cuopt_int_t column_indices[] = {0, 1, 0, 1}; + cuopt_float_t values[] = {3.0, 4.0, 2.7, 10.1}; + + cuopt_float_t objective_coefficients[] = {-0.2, 0.1}; + cuopt_float_t constraint_upper[] = {5.4, 4.9}; + cuopt_float_t constraint_lower[] = {-CUOPT_INFINITY, -CUOPT_INFINITY}; + cuopt_float_t var_lower[] = {0.0, 0.0}; + cuopt_float_t var_upper[] = {CUOPT_INFINITY, CUOPT_INFINITY}; + + /* x1 = INTEGER, x2 = CONTINUOUS */ + char variable_types[] = {CUOPT_INTEGER, CUOPT_CONTINUOUS}; + + cuopt_int_t status = cuOptCreateRangedProblem( + num_constraints, num_variables, CUOPT_MINIMIZE, 0.0, + objective_coefficients, + row_offsets, column_indices, values, + constraint_lower, constraint_upper, + var_lower, var_upper, + variable_types, &problem + ); + if (status != CUOPT_SUCCESS) { + printf("Error creating problem: %d\n", status); + return 1; + } + + status = cuOptCreateSolverSettings(&settings); + if (status != CUOPT_SUCCESS) { + printf("Error creating solver settings: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_MIP_ABSOLUTE_TOLERANCE, 0.0001); + if (status != CUOPT_SUCCESS) { + printf("Error setting MIP absolute tolerance: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_MIP_RELATIVE_GAP, 0.01); + if (status != CUOPT_SUCCESS) { + printf("Error setting MIP relative gap: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_TIME_LIMIT, 120.0); + if (status != CUOPT_SUCCESS) { + printf("Error setting time limit: %d\n", status); + goto cleanup; + } + + status = cuOptSolve(problem, settings, &solution); + if (status != CUOPT_SUCCESS) { + printf("Error solving: %d\n", status); + goto cleanup; + } + + if (solution != NULL) { + cuopt_float_t objective_value; + status = cuOptGetObjectiveValue(solution, &objective_value); + if (status != CUOPT_SUCCESS) { + printf("Error getting objective value: %d\n", status); + goto cleanup; + } + printf("Objective: %f\n", objective_value); + + cuopt_float_t *sol = malloc((size_t)num_variables * sizeof(cuopt_float_t)); + if (sol) { + status = cuOptGetPrimalSolution(solution, sol); + if (status != CUOPT_SUCCESS) { + printf("Error getting primal solution: %d\n", status); + free(sol); + goto cleanup; + } + printf("x1 (integer) = %f, x2 (continuous) = %f\n", sol[0], sol[1]); + free(sol); + } + } + +cleanup: + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); + return (status == CUOPT_SUCCESS) ? 0 : 1; +} diff --git a/skills/cuopt-lp-milp-api-c/assets/milp_production_planning/README.md b/skills/cuopt-lp-milp-api-c/assets/milp_production_planning/README.md new file mode 100644 index 0000000000..d51b944fe2 --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/milp_production_planning/README.md @@ -0,0 +1,12 @@ +# Production planning MILP (C API) + +Two products (A, B), resource limits (machine time, labor, material), minimum production, maximize profit. + +**Build:** With cuOpt on `INCLUDE_PATH` and `LIB_PATH`: + +```bash +gcc -I${INCLUDE_PATH} -L${LIB_PATH} -o milp_production milp_production.c -lcuopt +LD_LIBRARY_PATH=${LIB_PATH}:$LD_LIBRARY_PATH ./milp_production +``` + +**See also:** [resources/examples.md](../../resources/examples.md) for parameters and MIP options. diff --git a/skills/cuopt-lp-milp-api-c/assets/milp_production_planning/milp_production.c b/skills/cuopt-lp-milp-api-c/assets/milp_production_planning/milp_production.c new file mode 100644 index 0000000000..093cdc8115 --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/milp_production_planning/milp_production.c @@ -0,0 +1,98 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * Production planning MILP (C API): two products, resource limits, maximize profit. + * Variables: Product_A (x1), Product_B (x2), both integer, lb 10 and 15. + * Constraints: 2*x1+x2 <= 100 (machine), x1+3*x2 <= 120 (labor), 4*x1+2*x2 <= 200 (material). + * Objective: maximize 50*x1 + 30*x2 => minimize -50*x1 - 30*x2. + */ +#include +#include +#include +#include + +int main(void) { + cuOptOptimizationProblem problem = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + + const cuopt_int_t num_variables = 2; + const cuopt_int_t num_constraints = 3; + + /* CSR: row0 2*x1+1*x2, row1 1*x1+3*x2, row2 4*x1+2*x2 */ + cuopt_int_t row_offsets[] = {0, 2, 4, 6}; + cuopt_int_t column_indices[] = {0, 1, 0, 1, 0, 1}; + cuopt_float_t values[] = {2.0, 1.0, 1.0, 3.0, 4.0, 2.0}; + + cuopt_float_t objective_coefficients[] = {-50.0, -30.0}; + cuopt_float_t constraint_upper[] = {100.0, 120.0, 200.0}; + cuopt_float_t constraint_lower[] = {-CUOPT_INFINITY, -CUOPT_INFINITY, -CUOPT_INFINITY}; + cuopt_float_t var_lower[] = {10.0, 15.0}; + cuopt_float_t var_upper[] = {CUOPT_INFINITY, CUOPT_INFINITY}; + char variable_types[] = {CUOPT_INTEGER, CUOPT_INTEGER}; + + cuopt_int_t status = cuOptCreateRangedProblem( + num_constraints, num_variables, CUOPT_MINIMIZE, 0.0, + objective_coefficients, + row_offsets, column_indices, values, + constraint_lower, constraint_upper, + var_lower, var_upper, + variable_types, &problem + ); + if (status != CUOPT_SUCCESS) { + printf("Error creating problem: %d\n", status); + return 1; + } + + status = cuOptCreateSolverSettings(&settings); + if (status != CUOPT_SUCCESS) { + printf("Error creating solver settings: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_TIME_LIMIT, 30.0); + if (status != CUOPT_SUCCESS) { + printf("Error setting time limit: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_MIP_RELATIVE_GAP, 0.01); + if (status != CUOPT_SUCCESS) { + printf("Error setting MIP relative gap: %d\n", status); + goto cleanup; + } + + status = cuOptSolve(problem, settings, &solution); + if (status != CUOPT_SUCCESS) { + printf("Error solving: %d\n", status); + goto cleanup; + } + + cuopt_float_t objective_value; + status = cuOptGetObjectiveValue(solution, &objective_value); + if (status != CUOPT_SUCCESS) { + printf("Error getting objective value: %d\n", status); + goto cleanup; + } + /* We minimized -profit, so total profit = -objective_value */ + printf("Total profit: %f\n", -objective_value); + + cuopt_float_t *sol = malloc((size_t)num_variables * sizeof(cuopt_float_t)); + if (sol) { + status = cuOptGetPrimalSolution(solution, sol); + if (status != CUOPT_SUCCESS) { + printf("Error getting primal solution: %d\n", status); + free(sol); + goto cleanup; + } + printf("Product_A: %f, Product_B: %f\n", sol[0], sol[1]); + free(sol); + } + +cleanup: + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); + return (status == CUOPT_SUCCESS) ? 0 : 1; +} diff --git a/skills/cuopt-lp-milp-api-c/assets/mps_solver/README.md b/skills/cuopt-lp-milp-api-c/assets/mps_solver/README.md new file mode 100644 index 0000000000..efd351b9e8 --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/mps_solver/README.md @@ -0,0 +1,14 @@ +# MPS file solver (C API) + +Read and solve LP/MILP from a standard MPS file using `cuOptReadProblem`. + +**Build:** With cuOpt on `INCLUDE_PATH` and `LIB_PATH`: + +```bash +gcc -I${INCLUDE_PATH} -L${LIB_PATH} -o mps_solver mps_solver.c -lcuopt +LD_LIBRARY_PATH=${LIB_PATH}:$LD_LIBRARY_PATH ./mps_solver data/sample.mps +``` + +**Data:** `data/sample.mps` is a small LP (two variables, two constraints). Use any MPS file path as the first argument. + +**See also:** [resources/examples.md](../../resources/examples.md); repo example `docs/cuopt/source/cuopt-c/lp-qp-milp/examples/mps_file_example.c`. diff --git a/skills/cuopt-lp-milp-api-c/assets/mps_solver/data/sample.mps b/skills/cuopt-lp-milp-api-c/assets/mps_solver/data/sample.mps new file mode 100644 index 0000000000..6baeb6e524 --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/mps_solver/data/sample.mps @@ -0,0 +1,19 @@ +NAME PRODUCTION_LP +ROWS + N PROFIT + L RES_A + L RES_B +COLUMNS + PROD_X PROFIT -40.0 + PROD_X RES_A 2.0 + PROD_X RES_B 4.0 + PROD_Y PROFIT -30.0 + PROD_Y RES_A 3.0 + PROD_Y RES_B 2.0 +RHS + RHS1 RES_A 120.0 + RHS1 RES_B 100.0 +BOUNDS + LO BND1 PROD_X 0.0 + LO BND1 PROD_Y 0.0 +ENDATA diff --git a/skills/cuopt-lp-milp-api-c/assets/mps_solver/mps_solver.c b/skills/cuopt-lp-milp-api-c/assets/mps_solver/mps_solver.c new file mode 100644 index 0000000000..9aeb6f952a --- /dev/null +++ b/skills/cuopt-lp-milp-api-c/assets/mps_solver/mps_solver.c @@ -0,0 +1,107 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ + +/* + * Solve LP/MILP from MPS file (C API). + * Usage: mps_solver + */ +#include +#include +#include +#include + +int main(int argc, char *argv[]) { + if (argc != 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + const char *filename = argv[1]; + + cuOptOptimizationProblem problem = NULL; + cuOptSolverSettings settings = NULL; + cuOptSolution solution = NULL; + cuopt_int_t num_variables = 0; + cuopt_float_t *primal = NULL; + + cuopt_int_t status = cuOptReadProblem(filename, &problem); + if (status != CUOPT_SUCCESS) { + printf("Error reading MPS file: %d\n", status); + return 1; + } + + status = cuOptGetNumVariables(problem, &num_variables); + if (status != CUOPT_SUCCESS) { + printf("Error getting number of variables: %d\n", status); + goto cleanup; + } + printf("Variables: %d\n", num_variables); + + status = cuOptCreateSolverSettings(&settings); + if (status != CUOPT_SUCCESS) { + printf("Error creating solver settings: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_TIME_LIMIT, 60.0); + if (status != CUOPT_SUCCESS) { + printf("Error setting time limit: %d\n", status); + goto cleanup; + } + status = cuOptSetFloatParameter(settings, CUOPT_MIP_RELATIVE_GAP, 0.01); + if (status != CUOPT_SUCCESS) { + printf("Error setting MIP relative gap: %d\n", status); + goto cleanup; + } + + status = cuOptSolve(problem, settings, &solution); + if (status != CUOPT_SUCCESS) { + printf("Error solving: %d\n", status); + goto cleanup; + } + + cuopt_float_t objective_value, time; + cuopt_int_t termination_status; + status = cuOptGetObjectiveValue(solution, &objective_value); + if (status != CUOPT_SUCCESS) { + printf("Error getting objective value: %d\n", status); + goto cleanup; + } + status = cuOptGetSolveTime(solution, &time); + if (status != CUOPT_SUCCESS) { + printf("Error getting solve time: %d\n", status); + goto cleanup; + } + status = cuOptGetTerminationStatus(solution, &termination_status); + if (status != CUOPT_SUCCESS) { + printf("Error getting termination status: %d\n", status); + goto cleanup; + } + + printf("Termination status: %d\n", termination_status); + printf("Solve time: %f s\n", time); + printf("Objective: %f\n", objective_value); + + primal = malloc((size_t)num_variables * sizeof(cuopt_float_t)); + if (primal) { + status = cuOptGetPrimalSolution(solution, primal); + if (status != CUOPT_SUCCESS) { + printf("Error getting primal solution: %d\n", status); + free(primal); + primal = NULL; + goto cleanup; + } + printf("Primal (first 10): "); + for (cuopt_int_t i = 0; i < (num_variables < 10 ? num_variables : 10); i++) + printf("%f ", primal[i]); + if (num_variables > 10) printf("... (%d total)", (int)num_variables); + printf("\n"); + free(primal); + } + +cleanup: + cuOptDestroyProblem(&problem); + cuOptDestroySolverSettings(&settings); + cuOptDestroySolution(&solution); + return (status == CUOPT_SUCCESS) ? 0 : 1; +} diff --git a/.github/skills/cuopt-lp-milp/resources/c_api_examples.md b/skills/cuopt-lp-milp-api-c/resources/examples.md similarity index 100% rename from .github/skills/cuopt-lp-milp/resources/c_api_examples.md rename to skills/cuopt-lp-milp-api-c/resources/examples.md diff --git a/skills/cuopt-lp-milp-api-cli/SKILL.md b/skills/cuopt-lp-milp-api-cli/SKILL.md new file mode 100644 index 0000000000..cbdc1e7778 --- /dev/null +++ b/skills/cuopt-lp-milp-api-cli/SKILL.md @@ -0,0 +1,66 @@ +--- +name: cuopt-lp-milp-api-cli +version: "26.04.00" +description: LP and MILP with cuOpt — CLI only (MPS files, cuopt_cli). Use when the user is solving from MPS via command line. +--- + +# cuOpt LP/MILP — CLI + +Confirm problem type and formulation (variables, objective, constraints, variable types) before coding. + +This skill is **CLI only** (MPS input). + +## Basic usage + +```bash +# Solve LP or MILP from MPS file +cuopt_cli problem.mps + +# With options +cuopt_cli problem.mps --time-limit 120 --mip-relative-tolerance 0.01 +``` + +## Common options + +```bash +cuopt_cli --help + +# Time limit (seconds) +cuopt_cli problem.mps --time-limit 120 + +# MIP gap tolerance (stop when within X% of optimal) +cuopt_cli problem.mps --mip-relative-tolerance 0.001 + +# MIP absolute tolerance +cuopt_cli problem.mps --mip-absolute-tolerance 0.0001 + +# Presolve, iteration limit, method +cuopt_cli problem.mps --presolve --iteration-limit 10000 --method 1 +``` + +## MPS format (required sections, in order) + +1. **NAME** — problem name +2. **ROWS** — N (objective), L/G/E (constraints) +3. **COLUMNS** — variable names, row names, coefficients +4. **RHS** — right-hand side values +5. **BOUNDS** (optional) — LO, UP, FX, BV, LI, UI +6. **ENDATA** + +Integer variables: use `'MARKER' 'INTORG'` before and `'MARKER' 'INTEND'` after the integer columns. + +## Troubleshooting + +- **Failed to parse MPS** — Check ENDATA, section order (NAME, ROWS, COLUMNS, RHS, [BOUNDS], ENDATA), integer markers. +- **Infeasible** — Check constraint directions (L/G/E) and RHS values. + +## Examples + +- [assets/README.md](assets/README.md) — Build/run for sample MPS files +- [lp_simple](assets/lp_simple/) — Minimal LP (PROD_X, PROD_Y, two constraints) +- [lp_production](assets/lp_production/) — Production planning: chairs + tables, wood/labor +- [milp_facility](assets/milp_facility/) — Facility location with binary open/close + +## Getting the CLI + +CLI is included with the Python package (`cuopt`). Install via pip or conda; then run `cuopt_cli --help` to verify. diff --git a/skills/cuopt-lp-milp-api-cli/assets/README.md b/skills/cuopt-lp-milp-api-cli/assets/README.md new file mode 100644 index 0000000000..8680eb9e38 --- /dev/null +++ b/skills/cuopt-lp-milp-api-cli/assets/README.md @@ -0,0 +1,21 @@ +# Assets — sample MPS files + +Sample MPS files for use with `cuopt_cli`. Use as reference; do not edit in place. + +| File | Type | Description | +|------|------|-------------| +| [lp_production](lp_production/) | LP | Production planning: chairs + tables, wood/labor | +| [milp_facility](milp_facility/) | MILP | Facility location with binary open/close | +| [lp_simple](lp_simple/) | LP | Minimal LP (PROD_X, PROD_Y, two constraints) | + +**Run:** From each subdir or with path: `cuopt_cli lp_simple/sample.mps` (or `cuopt_cli production.mps`, etc.). See the skill for options (`--time-limit`, `--mip-relative-tolerance`, etc.). + +## Test CLI + +With conda env `cuopt` activated, from this `assets/` directory: + +```bash +cuopt_cli lp_simple/sample.mps --time-limit 10 +``` + +Use the same pattern for the other MPS files; for MILP, add e.g. `--mip-relative-gap 0.01`. diff --git a/skills/cuopt-lp-milp-api-cli/assets/lp_production/README.md b/skills/cuopt-lp-milp-api-cli/assets/lp_production/README.md new file mode 100644 index 0000000000..de4ca53043 --- /dev/null +++ b/skills/cuopt-lp-milp-api-cli/assets/lp_production/README.md @@ -0,0 +1,5 @@ +# Production LP (MPS) + +Production planning: maximize 40*chairs + 30*tables subject to wood and labor limits. + +**Run:** `cuopt_cli production.mps` or `cuopt_cli production.mps --time-limit 30` diff --git a/skills/cuopt-lp-milp-api-cli/assets/lp_production/production.mps b/skills/cuopt-lp-milp-api-cli/assets/lp_production/production.mps new file mode 100644 index 0000000000..40e3217b52 --- /dev/null +++ b/skills/cuopt-lp-milp-api-cli/assets/lp_production/production.mps @@ -0,0 +1,16 @@ +NAME PRODUCTION +ROWS + N PROFIT + L WOOD + L LABOR +COLUMNS + CHAIRS PROFIT -40.0 + CHAIRS WOOD 2.0 + CHAIRS LABOR 4.0 + TABLES PROFIT -30.0 + TABLES WOOD 3.0 + TABLES LABOR 2.0 +RHS + RHS1 WOOD 240.0 + RHS1 LABOR 200.0 +ENDATA diff --git a/skills/cuopt-lp-milp-api-cli/assets/lp_simple/README.md b/skills/cuopt-lp-milp-api-cli/assets/lp_simple/README.md new file mode 100644 index 0000000000..ed39464a77 --- /dev/null +++ b/skills/cuopt-lp-milp-api-cli/assets/lp_simple/README.md @@ -0,0 +1,5 @@ +# Minimal LP (MPS) + +Maximize 40*PROD_X + 30*PROD_Y subject to resource constraints. Two variables, two constraints. + +**Run:** `cuopt_cli sample.mps` or `cuopt_cli sample.mps --time-limit 30` diff --git a/skills/cuopt-lp-milp-api-cli/assets/lp_simple/sample.mps b/skills/cuopt-lp-milp-api-cli/assets/lp_simple/sample.mps new file mode 100644 index 0000000000..6baeb6e524 --- /dev/null +++ b/skills/cuopt-lp-milp-api-cli/assets/lp_simple/sample.mps @@ -0,0 +1,19 @@ +NAME PRODUCTION_LP +ROWS + N PROFIT + L RES_A + L RES_B +COLUMNS + PROD_X PROFIT -40.0 + PROD_X RES_A 2.0 + PROD_X RES_B 4.0 + PROD_Y PROFIT -30.0 + PROD_Y RES_A 3.0 + PROD_Y RES_B 2.0 +RHS + RHS1 RES_A 120.0 + RHS1 RES_B 100.0 +BOUNDS + LO BND1 PROD_X 0.0 + LO BND1 PROD_Y 0.0 +ENDATA diff --git a/skills/cuopt-lp-milp-api-cli/assets/milp_facility/README.md b/skills/cuopt-lp-milp-api-cli/assets/milp_facility/README.md new file mode 100644 index 0000000000..ac2a323908 --- /dev/null +++ b/skills/cuopt-lp-milp-api-cli/assets/milp_facility/README.md @@ -0,0 +1,5 @@ +# Facility location MILP (MPS) + +Facility location with binary open/close variables. Integer markers: INTORG / INTEND. + +**Run:** `cuopt_cli facility.mps --time-limit 60 --mip-relative-tolerance 0.01` diff --git a/skills/cuopt-lp-milp-api-cli/assets/milp_facility/facility.mps b/skills/cuopt-lp-milp-api-cli/assets/milp_facility/facility.mps new file mode 100644 index 0000000000..07f6bf3b7f --- /dev/null +++ b/skills/cuopt-lp-milp-api-cli/assets/milp_facility/facility.mps @@ -0,0 +1,27 @@ +NAME FACILITY +ROWS + N COST + G DEMAND1 + L CAP1 + L CAP2 +COLUMNS + MARKER 'MARKER' 'INTORG' + OPEN1 COST 100.0 + OPEN1 CAP1 -50.0 + OPEN2 COST 150.0 + OPEN2 CAP2 -70.0 + MARKER 'MARKER' 'INTEND' + SHIP11 COST 5.0 + SHIP11 DEMAND1 1.0 + SHIP11 CAP1 1.0 + SHIP21 COST 7.0 + SHIP21 DEMAND1 1.0 + SHIP21 CAP2 1.0 +RHS + RHS1 DEMAND1 30.0 +BOUNDS + BV BND1 OPEN1 + BV BND1 OPEN2 + LO BND1 SHIP11 0.0 + LO BND1 SHIP21 0.0 +ENDATA diff --git a/skills/cuopt-lp-milp-api-python/SKILL.md b/skills/cuopt-lp-milp-api-python/SKILL.md new file mode 100644 index 0000000000..a7cd9a59f2 --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/SKILL.md @@ -0,0 +1,226 @@ +--- +name: cuopt-lp-milp-api-python +version: "26.04.00" +description: Solve Linear Programming (LP) and Mixed-Integer Linear Programming (MILP) with the Python API. Use when the user asks about optimization with linear constraints, integer variables, scheduling, resource allocation, facility location, or production planning. +--- + +# cuOpt LP/MILP Skill + +Model and solve linear and mixed-integer linear programs using NVIDIA cuOpt's GPU-accelerated solver. + +## Before You Start + +Use a formulation summary (parameters, constraints, decisions, objective) if available; otherwise ask for decision variables, objective, and constraints. Then confirm **variable types** (see below) and **interface** (Python API recommended). + +## Choosing LP vs MILP + +**Prefer LP (all continuous variables) when the problem allows it.** LP solves faster and has stronger optimality guarantees. Use **MILP** only when the problem logically requires whole numbers or yes/no decisions. + +**Problem types that need extra care:** Multi-period planning and goal programming are easy to misinterpret. Double-check that rates and constraints apply to the right time period or priority level (AGENTS.md: verify understanding before code). + +- **Use LP** when every quantity can meaningfully be fractional: flows, proportions, rates, dollars, hours, tonnes of material, etc. +- **Use MILP** when the problem mentions **counts** of discrete entities, **yes/no** choices, or **either/or** decisions (e.g. open a facility or not, assign a person to a shift, number of trucks). + +## Integer vs continuous from wording + +Choose variable type from what the problem describes. + +| Problem wording / concept | Variable type | Examples | +|---------------------------|---------------|----------| +| **Discrete entities (counts)** | **INTEGER** | Workers, cars, trucks, machines, pilots, facilities, units to manufacture (when "units" means whole items), trainees, vehicles | +| **Yes/no or on/off** | **INTEGER** (binary, lb=0 ub=1) | Open a facility, run a machine, produce a product line, assign a person to a shift | +| **Amounts that can be fractional** | **CONTINUOUS** | Tonnes, litres, dollars, hours, kWh, proportion of capacity, flow volume, weight | +| **Rates or fractions** | **CONTINUOUS** | Utilization, percentage, share of budget | +| **Unclear** | Prefer **INTEGER** if the noun is a countable thing (a worker, a car); prefer **CONTINUOUS** if it's a measure (amount of steel, hours worked). If the problem says "whole" or "integer" or "number of", use INTEGER. | + +**Rule of thumb:** If the quantity is "how many *things*" (people, vehicles, items, sites), use **INTEGER**. If it's "how much" (mass, volume, money, time) or a rate, use **CONTINUOUS** unless the problem explicitly requires whole numbers. + +## Quick Reference: Python API + +### LP Example + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +# Create problem +problem = Problem("MyLP") + +# Decision variables +x = problem.addVariable(lb=0, vtype=CONTINUOUS, name="x") +y = problem.addVariable(lb=0, vtype=CONTINUOUS, name="y") + +# Constraints +problem.addConstraint(2*x + 3*y <= 120, name="resource_a") +problem.addConstraint(4*x + 2*y <= 100, name="resource_b") + +# Objective +problem.setObjective(40*x + 30*y, sense=MAXIMIZE) + +# Solve +settings = SolverSettings() +settings.set_parameter("time_limit", 60) +problem.solve(settings) + +# Check status (CRITICAL: use PascalCase!) +if problem.Status.name in ["Optimal", "PrimalFeasible"]: + print(f"Objective: {problem.ObjValue}") + print(f"x = {x.getValue()}") + print(f"y = {y.getValue()}") +``` + +### MILP Example (with integer variables) + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, INTEGER, MINIMIZE + +problem = Problem("FacilityLocation") + +# Binary variable (integer with bounds 0-1) +open_facility = problem.addVariable(lb=0, ub=1, vtype=INTEGER, name="open") + +# Continuous variable +production = problem.addVariable(lb=0, vtype=CONTINUOUS, name="production") + +# Linking constraint: can only produce if facility is open +problem.addConstraint(production <= 1000 * open_facility, name="link") + +# Objective: fixed cost + variable cost +problem.setObjective(500*open_facility + 2*production, sense=MINIMIZE) + +# MILP-specific settings +settings = SolverSettings() +settings.set_parameter("time_limit", 120) +settings.set_parameter("mip_relative_gap", 0.01) # 1% optimality gap + +problem.solve(settings) + +# Check status +if problem.Status.name in ["Optimal", "FeasibleFound"]: + print(f"Open facility: {open_facility.getValue() > 0.5}") + print(f"Production: {production.getValue()}") +``` + +## CRITICAL: Status Checking + +**Status values use PascalCase, NOT ALL_CAPS:** + +```python +# ✅ CORRECT +if problem.Status.name in ["Optimal", "FeasibleFound"]: + print(problem.ObjValue) + +# ❌ WRONG - will silently fail! +if problem.Status.name == "OPTIMAL": # Never matches! + print(problem.ObjValue) +``` + +**LP Status Values:** `Optimal`, `NoTermination`, `NumericalError`, `PrimalInfeasible`, `DualInfeasible`, `IterationLimit`, `TimeLimit`, `PrimalFeasible` + +**MILP Status Values:** `Optimal`, `FeasibleFound`, `Infeasible`, `Unbounded`, `TimeLimit`, `NoTermination` + +## Common Modeling Patterns + +### Binary Selection +```python +# Select exactly k items from n +items = [problem.addVariable(lb=0, ub=1, vtype=INTEGER) for _ in range(n)] +problem.addConstraint(sum(items) == k) +``` + +### Big-M Linking +```python +# If y=1, then x <= 100; if y=0, x can be anything up to M +M = 10000 +problem.addConstraint(x <= 100 + M*(1 - y)) +``` + +### If-then "must also produce" +When the problem says *if we do X then we must also do Y*, enforce both (i) the binary link and (ii) that Y is actually produced: +```python +# y_X <= y_Y (if we do X, we must "do" Y) +problem.addConstraint(y_X <= y_Y) +# Production of Y when Y is chosen: produce at least 1 (or a minimum) when y_Y=1 +problem.addConstraint(production_Y >= 1 * y_Y) # or min_amount * y_Y +``` +Otherwise the solver can set y_Y=1 but production_Y=0, satisfying the binary link but not the intent. + +### Building large expressions +Chained `+` over many terms can hit recursion limits in the API. Prefer building objectives and constraints with **LinearExpression**: +```python +from cuopt.linear_programming.problem import LinearExpression + +# Build as list of (vars, coeffs) instead of v1*c1 + v2*c2 + ... +vars_list = [x, y, z] +coeffs_list = [1.0, 2.0, 3.0] +expr = LinearExpression(vars_list, coeffs_list, constant=0.0) +problem.addConstraint(expr <= 100) +``` +See reference models in this skill's `assets/` for examples. + +### Piecewise Linear (SOS2) +```python +# Approximate nonlinear function with breakpoints +# Use lambda variables that sum to 1, at most 2 adjacent non-zero +``` + +## Solver Settings + +```python +settings = SolverSettings() + +# Time limit +settings.set_parameter("time_limit", 60) + +# MILP gap tolerance (stop when within X% of optimal) +settings.set_parameter("mip_relative_gap", 0.01) + +# Logging +settings.set_parameter("log_to_console", 1) +``` + +## Common Issues + +| Problem | Likely Cause | Fix | +|---------|--------------|-----| +| Status never "OPTIMAL" | Using wrong case | Use `"Optimal"` not `"OPTIMAL"` | +| Integer var has fractional value | Defined as CONTINUOUS | Use `vtype=INTEGER` | +| Infeasible | Conflicting constraints | Check constraint logic | +| Unbounded | Missing bounds | Add variable bounds | +| Slow solve | Large problem | Set time limit, increase gap tolerance | +| Maximum recursion depth | Building big expr with chained `+` | Use `LinearExpression(vars_list, coeffs_list, constant)` | + +## Getting Dual Values (LP only) + +```python +if problem.Status.name == "Optimal": + constraint = problem.getConstraint("resource_a") + shadow_price = constraint.DualValue + print(f"Shadow price: {shadow_price}") +``` + +## Reference Models + +All reference models live in this skill's **`assets/`** directory. Use them as reference when building new applications; do not edit them in place. + +### Minimal / canonical examples (LP & MILP) +| Model | Type | Description | +|-------|------|-------------| +| [lp_basic](assets/lp_basic/) | LP | Minimal LP: variables, constraints, objective, solve | +| [lp_duals](assets/lp_duals/) | LP | Dual values and reduced costs | +| [lp_warmstart](assets/lp_warmstart/) | LP | PDLP warmstart for similar problems | +| [milp_basic](assets/milp_basic/) | MILP | Minimal MIP; includes incumbent callback example | +| [milp_production_planning](assets/milp_production_planning/) | MILP | Production planning with resource constraints | + +### Other reference +| Model | Type | Description | +|-------|------|-------------| +| [mps_solver](assets/mps_solver/) | LP/MILP | Solve any problem from standard MPS file format | + +**Quick command to list models:** `ls assets/` (from this skill's directory). + +## When to Escalate + +Use troubleshooting and diagnostic guidance if: +- Infeasible and you can't determine why +- Numerical issues diff --git a/skills/cuopt-lp-milp-api-python/assets/README.md b/skills/cuopt-lp-milp-api-python/assets/README.md new file mode 100644 index 0000000000..0b9a727e4b --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/README.md @@ -0,0 +1,12 @@ +# Assets — reference models + +LP/MILP reference implementations. Use as reference when building new applications; do not edit in place. + +| Model | Type | +|-------|------| +| lp_basic | LP | +| lp_duals | LP | +| lp_warmstart | LP | +| milp_basic | MILP | +| milp_production_planning | MILP | +| mps_solver | LP/MILP | diff --git a/skills/cuopt-lp-milp-api-python/assets/lp_basic/README.md b/skills/cuopt-lp-milp-api-python/assets/lp_basic/README.md new file mode 100644 index 0000000000..4c06f2ded6 --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/lp_basic/README.md @@ -0,0 +1,7 @@ +# Minimal LP + +Basic linear program: continuous variables, linear constraints, maximize objective. + +**Problem:** Maximize x + y subject to x + y ≤ 10, x − y ≥ 0, x, y ≥ 0. + +**Run:** `python model.py` diff --git a/skills/cuopt-lp-milp-api-python/assets/lp_basic/model.py b/skills/cuopt-lp-milp-api-python/assets/lp_basic/model.py new file mode 100644 index 0000000000..d81c6a749d --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/lp_basic/model.py @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Minimal LP: variables, constraints, objective, solve. + +Problem: + Maximize: x + y + Subject to: x + y <= 10, x - y >= 0, x, y >= 0 +""" + +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + + +def main(): + problem = Problem("Simple LP") + x = problem.addVariable(lb=0, vtype=CONTINUOUS, name="x") + y = problem.addVariable(lb=0, vtype=CONTINUOUS, name="y") + problem.addConstraint(x + y <= 10, name="c1") + problem.addConstraint(x - y >= 0, name="c2") + problem.setObjective(x + y, sense=MAXIMIZE) + + settings = SolverSettings() + settings.set_parameter("time_limit", 60) + problem.solve(settings) + + if problem.Status.name in ["Optimal", "PrimalFeasible"]: + print(f"Objective: {problem.ObjValue}") + print(f"x = {x.getValue()}, y = {y.getValue()}") + else: + print(f"Status: {problem.Status.name}") + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-lp-milp-api-python/assets/lp_duals/README.md b/skills/cuopt-lp-milp-api-python/assets/lp_duals/README.md new file mode 100644 index 0000000000..f0eb9bcf8b --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/lp_duals/README.md @@ -0,0 +1,7 @@ +# LP Duals and Reduced Costs + +Retrieve dual values (shadow prices) and reduced costs after solving an LP. + +**Problem:** Minimize 3x + 2y + 5z subject to x + y + z = 4, 2x + y + z = 5, x, y, z ≥ 0. + +**Run:** `python model.py` diff --git a/skills/cuopt-lp-milp-api-python/assets/lp_duals/model.py b/skills/cuopt-lp-milp-api-python/assets/lp_duals/model.py new file mode 100644 index 0000000000..4fa6a50a5b --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/lp_duals/model.py @@ -0,0 +1,38 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +LP with dual values and reduced costs. + +Problem: + Minimize: 3x + 2y + 5z + Subject to: x + y + z = 4, 2x + y + z = 5, x, y, z >= 0 +""" + +from cuopt.linear_programming.problem import Problem, MINIMIZE + + +def main(): + problem = Problem("min_dual_rc") + x = problem.addVariable(lb=0.0, name="x") + y = problem.addVariable(lb=0.0, name="y") + z = problem.addVariable(lb=0.0, name="z") + problem.addConstraint(x + y + z == 4.0, name="c1") + problem.addConstraint(2.0 * x + y + z == 5.0, name="c2") + problem.setObjective(3.0 * x + 2.0 * y + 5.0 * z, sense=MINIMIZE) + problem.solve() + + if problem.Status.name in ["Optimal", "PrimalFeasible"]: + print(f"Objective: {problem.ObjValue}") + for v in problem.getVariables(): + print( + f"{v.VariableName} = {v.Value}, ReducedCost = {v.ReducedCost}" + ) + for c in problem.getConstraints(): + print(f"{c.ConstraintName} DualValue = {c.DualValue}") + else: + print(f"Status: {problem.Status.name}") + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-lp-milp-api-python/assets/lp_warmstart/README.md b/skills/cuopt-lp-milp-api-python/assets/lp_warmstart/README.md new file mode 100644 index 0000000000..000e7a42fa --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/lp_warmstart/README.md @@ -0,0 +1,5 @@ +# LP PDLP Warmstart + +Use warmstart data from a solved LP to solve a similar problem faster. LP only (not MILP). + +**Run:** `python model.py` diff --git a/skills/cuopt-lp-milp-api-python/assets/lp_warmstart/model.py b/skills/cuopt-lp-milp-api-python/assets/lp_warmstart/model.py new file mode 100644 index 0000000000..b0e893118f --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/lp_warmstart/model.py @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +PDLP warmstart: solve a similar LP faster by reusing solution context. + +Warmstart is for LP only, not MILP. +""" + +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE +from cuopt.linear_programming.solver.solver_parameters import ( + CUOPT_METHOD, + CUOPT_PDLP_SOLVER_MODE, +) +from cuopt.linear_programming.solver_settings import ( + SolverSettings, + SolverMethod, + PDLPSolverMode, +) + + +def main(): + print("=== Problem 1 ===") + problem = Problem("LP1") + x = problem.addVariable(lb=0, vtype=CONTINUOUS, name="x") + y = problem.addVariable(lb=0, vtype=CONTINUOUS, name="y") + problem.addConstraint(4 * x + 10 * y <= 130, name="c1") + problem.addConstraint(8 * x - 3 * y >= 40, name="c2") + problem.setObjective(2 * x + y, sense=MAXIMIZE) + + settings = SolverSettings() + settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP) + settings.set_parameter(CUOPT_PDLP_SOLVER_MODE, PDLPSolverMode.Stable2) + problem.solve(settings) + print(f"Objective: {problem.ObjValue}") + + warmstart_data = problem.getWarmstartData() + print("\n=== Problem 2 (with warmstart) ===") + new_problem = Problem("LP2") + x = new_problem.addVariable(lb=0, vtype=CONTINUOUS, name="x") + y = new_problem.addVariable(lb=0, vtype=CONTINUOUS, name="y") + new_problem.addConstraint(4 * x + 10 * y <= 100, name="c1") + new_problem.addConstraint(8 * x - 3 * y >= 50, name="c2") + new_problem.setObjective(2 * x + y, sense=MAXIMIZE) + settings.set_pdlp_warm_start_data(warmstart_data) + new_problem.solve(settings) + if new_problem.Status.name in ["Optimal", "PrimalFeasible"]: + print(f"Objective: {new_problem.ObjValue}") + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-lp-milp-api-python/assets/milp_basic/README.md b/skills/cuopt-lp-milp-api-python/assets/milp_basic/README.md new file mode 100644 index 0000000000..45362da09b --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/milp_basic/README.md @@ -0,0 +1,10 @@ +# Minimal MILP + +Basic mixed-integer program: integer variables with bounds, linear constraints. + +**Problem:** Maximize 5x + 3y subject to 2x + 4y ≥ 230, 3x + 2y ≤ 190, 10 ≤ y ≤ 50, x, y integer. + +- **model.py** — solve and print solution. +- **incumbent_callback.py** — same problem with a callback that prints intermediate (incumbent) solutions during solve. + +**Run:** `python model.py` or `python incumbent_callback.py` diff --git a/skills/cuopt-lp-milp-api-python/assets/milp_basic/incumbent_callback.py b/skills/cuopt-lp-milp-api-python/assets/milp_basic/incumbent_callback.py new file mode 100644 index 0000000000..49e533291c --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/milp_basic/incumbent_callback.py @@ -0,0 +1,50 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Same MILP as model.py but with a callback to receive incumbent (intermediate) solutions. +MILP only; not for LP. +""" + +from cuopt.linear_programming.problem import Problem, INTEGER, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings +from cuopt.linear_programming.solver.solver_parameters import CUOPT_TIME_LIMIT +from cuopt.linear_programming.internals import GetSolutionCallback + + +class IncumbentCallback(GetSolutionCallback): + def __init__(self, user_data): + super().__init__() + self.n_callbacks = 0 + self.user_data = user_data + + def get_solution(self, solution, solution_cost, solution_bound, user_data): + self.n_callbacks += 1 + sol = ( + solution.tolist() + if hasattr(solution, "tolist") + else list(solution) + ) + cost = float(solution_cost[0]) + print(f"Incumbent {self.n_callbacks}: {sol}, cost: {cost:.2f}") + + +def main(): + problem = Problem("Incumbent Example") + x = problem.addVariable(vtype=INTEGER) + y = problem.addVariable(vtype=INTEGER) + problem.addConstraint(2 * x + 4 * y >= 230) + problem.addConstraint(3 * x + 2 * y <= 190) + problem.setObjective(5 * x + 3 * y, sense=MAXIMIZE) + + user_data = {"source": "incumbent_callback"} + settings = SolverSettings() + settings.set_mip_callback(IncumbentCallback(user_data), user_data) + settings.set_parameter(CUOPT_TIME_LIMIT, 30) + problem.solve(settings) + + print(f"Status: {problem.Status.name}, Objective: {problem.ObjValue}") + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-lp-milp-api-python/assets/milp_basic/model.py b/skills/cuopt-lp-milp-api-python/assets/milp_basic/model.py new file mode 100644 index 0000000000..5c0bf88e15 --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/milp_basic/model.py @@ -0,0 +1,36 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Minimal MILP: integer variables with bounds, linear constraints. + +Problem: + Maximize: 5x + 3y + Subject to: 2x + 4y >= 230, 3x + 2y <= 190, 10 <= y <= 50, x, y integer +""" + +from cuopt.linear_programming.problem import Problem, INTEGER, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + + +def main(): + problem = Problem("Simple MIP") + x = problem.addVariable(vtype=INTEGER, name="V_x") + y = problem.addVariable(lb=10, ub=50, vtype=INTEGER, name="V_y") + problem.addConstraint(2 * x + 4 * y >= 230, name="C1") + problem.addConstraint(3 * x + 2 * y <= 190, name="C2") + problem.setObjective(5 * x + 3 * y, sense=MAXIMIZE) + + settings = SolverSettings() + settings.set_parameter("time_limit", 60) + problem.solve(settings) + + if problem.Status.name in ["Optimal", "FeasibleFound"]: + print(f"Objective: {problem.ObjValue}") + print(f"x = {x.getValue()}, y = {y.getValue()}") + else: + print(f"Status: {problem.Status.name}") + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-lp-milp-api-python/assets/milp_production_planning/README.md b/skills/cuopt-lp-milp-api-python/assets/milp_production_planning/README.md new file mode 100644 index 0000000000..42a2a1a9d5 --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/milp_production_planning/README.md @@ -0,0 +1,5 @@ +# Production Planning (MILP) + +Two products (A, B), resource limits (machine time, labor, material), minimum production, maximize profit. + +**Run:** `python model.py` diff --git a/skills/cuopt-lp-milp-api-python/assets/milp_production_planning/model.py b/skills/cuopt-lp-milp-api-python/assets/milp_production_planning/model.py new file mode 100644 index 0000000000..72ded8164d --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/milp_production_planning/model.py @@ -0,0 +1,33 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Production planning: two products, resource limits (machine, labor, material), maximize profit. +""" + +from cuopt.linear_programming.problem import Problem, INTEGER, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + + +def main(): + problem = Problem("Production Planning") + x1 = problem.addVariable(lb=10, vtype=INTEGER, name="Product_A") + x2 = problem.addVariable(lb=15, vtype=INTEGER, name="Product_B") + problem.addConstraint(2 * x1 + x2 <= 100, name="Machine_Time") + problem.addConstraint(x1 + 3 * x2 <= 120, name="Labor_Hours") + problem.addConstraint(4 * x1 + 2 * x2 <= 200, name="Material") + problem.setObjective(50 * x1 + 30 * x2, sense=MAXIMIZE) + + settings = SolverSettings() + settings.set_parameter("time_limit", 30) + problem.solve(settings) + + if problem.Status.name in ["Optimal", "FeasibleFound"]: + print(f"Product A: {x1.getValue()}, Product B: {x2.getValue()}") + print(f"Total profit: {problem.ObjValue}") + else: + print(f"Status: {problem.Status.name}") + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-lp-milp-api-python/assets/mps_solver/README.md b/skills/cuopt-lp-milp-api-python/assets/mps_solver/README.md new file mode 100644 index 0000000000..f18f4f549e --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/mps_solver/README.md @@ -0,0 +1,88 @@ +# MPS File Solver + +Read and solve LP/MILP problems from standard MPS files using cuOpt. + +## Problem Description + +MPS (Mathematical Programming System) is a standard file format for representing linear and mixed-integer programming problems. This model demonstrates how to: + +1. Load an MPS file using `Problem.readMPS()` (static method) +2. Solve the problem using cuOpt's GPU-accelerated solver +3. Extract and display the solution + +This is useful when you have optimization problems in standard MPS format from other solvers, modeling tools, or benchmark libraries like MIPLIB. + +## MPS File Format + +MPS is a column-oriented format with sections: + +``` +NAME problem_name +ROWS + N OBJ (objective row) + L CON1 (≤ constraint) + G CON2 (≥ constraint) + E CON3 (= constraint) +COLUMNS + X1 OBJ 1.0 + X1 CON1 2.0 + X2 OBJ 2.0 + X2 CON1 3.0 +RHS + RHS CON1 10.0 +BOUNDS + LO BND X1 0.0 + UP BND X1 5.0 +ENDATA +``` + +## Usage + +```bash +# Solve the sample problem +python model.py + +# Solve a custom MPS file +python model.py --file path/to/problem.mps + +# With time limit +python model.py --file problem.mps --time-limit 120 +``` + +## Model Characteristics + +- **Type**: LP or MILP (detected from MPS file) +- **Input**: Standard MPS file format +- **Output**: Solution values, objective, status + +## Sample Problem + +The included `data/air05.mps` is a MIPLIB benchmark (airline crew scheduling): + +- **Variables**: 7,195 (binary) +- **Constraints**: 426 +- **Known optimal**: 26,374 +- **Typical solve time**: ~2 seconds + +## Key API Usage + +```python +from cuopt.linear_programming.problem import Problem +from cuopt.linear_programming.solver_settings import SolverSettings + +# Load MPS file (static method - returns Problem object) +problem = Problem.readMPS("path/to/problem.mps") + +# Configure and solve +settings = SolverSettings() +settings.set_parameter("time_limit", 60) +problem.solve(settings) + +# Check solution +if problem.Status.name in ["Optimal", "FeasibleFound"]: + print(f"Objective: {problem.ObjValue}") +``` + +## Source + +Based on cuOpt's built-in MPS support via `Problem.readMPS()`. diff --git a/skills/cuopt-lp-milp-api-python/assets/mps_solver/data/README.md b/skills/cuopt-lp-milp-api-python/assets/mps_solver/data/README.md new file mode 100644 index 0000000000..67266feea8 --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/mps_solver/data/README.md @@ -0,0 +1,82 @@ +# MPS Solver Data + +This directory contains MPS files for testing. + +## Included Files + +### air05.mps (MIPLIB Benchmark) + +An airline crew scheduling problem from the MIPLIB benchmark library. + +| Property | Value | +|----------|-------| +| Type | Binary Integer Program | +| Variables | 7,195 (all binary) | +| Constraints | 426 | +| Non-zeros | 52,121 | +| Known Optimal | 26,374 | + +**Source**: https://miplib.zib.de/instance_details_air05.html + +**Problem**: Given flight legs and possible crew pairings, find the minimum-cost +set of pairings that covers all flight legs (set covering problem). + +## MPS File Format + +MPS (Mathematical Programming System) is a standard format for LP/MILP problems. + +### Sections + +| Section | Purpose | +|---------|---------| +| NAME | Problem name | +| ROWS | Constraint and objective definitions | +| COLUMNS | Variable coefficients in each row | +| RHS | Right-hand side values for constraints | +| BOUNDS | Variable bounds and types | +| ENDATA | End of file marker | + +### Row Types + +| Type | Meaning | +|------|---------| +| N | Objective function (no constraint) | +| L | Less than or equal (≤) | +| G | Greater than or equal (≥) | +| E | Equality (=) | + +### Bound Types + +| Type | Meaning | +|------|---------| +| LO | Lower bound | +| UP | Upper bound | +| FX | Fixed value (lb = ub) | +| FR | Free variable (-∞ to +∞) | +| BV | Binary variable (0 or 1) | +| UI | Upper bound, integer | +| LI | Lower bound, integer | + +## Adding Custom MPS Files + +```bash +python model.py --file path/to/your/problem.mps +``` + +## Standard Test Problem Sources + +- [MIPLIB](https://miplib.zib.de/) - Mixed Integer Programming Library +- [Netlib LP](https://www.netlib.org/lp/) - Classic LP test problems +- [NEOS](https://neos-server.org/neos/) - Network-Enabled Optimization System + +## Creating MPS Files + +cuOpt can export problems to MPS format: + +```python +from cuopt.linear_programming.problem import Problem + +problem = Problem("MyProblem") +# ... define variables, constraints, objective ... +problem.writeMPS("output.mps") +``` diff --git a/skills/cuopt-lp-milp-api-python/assets/mps_solver/data/sample.mps b/skills/cuopt-lp-milp-api-python/assets/mps_solver/data/sample.mps new file mode 100644 index 0000000000..6baeb6e524 --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/mps_solver/data/sample.mps @@ -0,0 +1,19 @@ +NAME PRODUCTION_LP +ROWS + N PROFIT + L RES_A + L RES_B +COLUMNS + PROD_X PROFIT -40.0 + PROD_X RES_A 2.0 + PROD_X RES_B 4.0 + PROD_Y PROFIT -30.0 + PROD_Y RES_A 3.0 + PROD_Y RES_B 2.0 +RHS + RHS1 RES_A 120.0 + RHS1 RES_B 100.0 +BOUNDS + LO BND1 PROD_X 0.0 + LO BND1 PROD_Y 0.0 +ENDATA diff --git a/skills/cuopt-lp-milp-api-python/assets/mps_solver/model.py b/skills/cuopt-lp-milp-api-python/assets/mps_solver/model.py new file mode 100644 index 0000000000..fb8918c11c --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/mps_solver/model.py @@ -0,0 +1,283 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +MPS File Solver using cuOpt Python API + +Read and solve LP/MILP problems from standard MPS files using +cuOpt's built-in readMPS method. + +Default benchmark: air05.mps (airline crew scheduling from MIPLIB) +- Best known optimal: 26,374 +""" + +import os +import gzip +import urllib.request +from typing import Optional + +from cuopt.linear_programming.problem import Problem +from cuopt.linear_programming.solver_settings import SolverSettings + + +# MIPLIB benchmark URL +AIR05_URL = "https://miplib.zib.de/WebData/instances/air05.mps.gz" +AIR05_OPTIMAL = 26374 # Best known optimal solution + + +def download_air05(data_dir: str) -> str: + """Download air05.mps from MIPLIB if not present.""" + mps_file = os.path.join(data_dir, "air05.mps") + + if os.path.exists(mps_file): + return mps_file + + os.makedirs(data_dir, exist_ok=True) + gz_file = os.path.join(data_dir, "air05.mps.gz") + + print("Downloading air05.mps from MIPLIB...") + urllib.request.urlretrieve(AIR05_URL, gz_file) + + # Decompress + print("Decompressing...") + with gzip.open(gz_file, "rb") as f_in: + with open(mps_file, "wb") as f_out: + f_out.write(f_in.read()) + + # Clean up + os.remove(gz_file) + print(f"Downloaded: {mps_file}") + + return mps_file + + +def solve_mps( + filepath: str, + time_limit: float = 60.0, + mip_gap: float = 0.01, + verbose: bool = True, +) -> tuple: + """ + Solve an LP/MILP problem from an MPS file. + + Parameters + ---------- + filepath : str + Path to the MPS file + time_limit : float + Solver time limit in seconds + mip_gap : float + MIP relative gap tolerance + verbose : bool + Print solver output + + Returns + ------- + tuple + (problem, solution_dict) or (problem, None) if no solution + """ + + # Read MPS file directly (static method returns Problem object) + problem = Problem.readMPS(filepath) + + print(f"Loaded MPS file: {filepath}") + print(f"Variables: {problem.NumVariables}") + print(f"Constraints: {problem.NumConstraints}") + print(f"Is MIP: {problem.IsMIP}") + + # Solver settings + settings = SolverSettings() + settings.set_parameter("time_limit", time_limit) + settings.set_parameter("log_to_console", verbose) + settings.set_parameter("mip_relative_gap", mip_gap) + + # Solve + print("\nSolving...") + problem.solve(settings) + + # Extract solution + status = problem.Status.name + print(f"\nStatus: {status}") + + if status in ["Optimal", "FeasibleFound", "PrimalFeasible"]: + solution = { + "status": status, + "objective": problem.ObjValue, + "num_variables": problem.NumVariables, + "num_constraints": problem.NumConstraints, + "is_mip": problem.IsMIP, + "mip_gap": mip_gap, + } + + # Get variable values (use getVariables() for MPS-loaded problems) + var_values = {} + try: + variables = problem.getVariables() + for var in variables: + val = var.getValue() + if abs(val) > 1e-6: # Only include non-zero values + var_values[var.Name] = val + except (AttributeError, Exception): + # For MPS problems, variable access may be limited + pass + + solution["variables"] = var_values + return problem, solution + else: + return problem, None + + +def compare_gaps( + filepath: str, + time_limit: float = 120.0, + known_optimal: Optional[float] = None, +) -> dict: + """ + Compare solutions at different MIP gap tolerances. + + Parameters + ---------- + filepath : str + Path to the MPS file + time_limit : float + Solver time limit per run + known_optimal : float, optional + Known optimal objective value. If provided, results include + "gap_to_optimal" (percent above optimal). Omit for generic MPS files. + + Returns + ------- + dict + Results for each gap tolerance + """ + gaps = [0.01, 0.001] # 1% and 0.1% + results = {} + + for gap in gaps: + print(f"\n{'=' * 60}") + print(f"Solving with MIP gap = {gap * 100}%") + print(f"{'=' * 60}") + + problem, solution = solve_mps( + filepath=filepath, time_limit=time_limit, mip_gap=gap, verbose=True + ) + + if solution: + results[gap] = { + "objective": solution["objective"], + "status": solution["status"], + } + if known_optimal is not None: + results[gap]["gap_to_optimal"] = ( + (solution["objective"] - known_optimal) + / known_optimal + * 100 + ) + else: + results[gap] = {"objective": None, "status": "No solution"} + + return results + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Solve LP/MILP from MPS file") + parser.add_argument( + "--file", type=str, default=None, help="Path to MPS file" + ) + parser.add_argument( + "--time-limit", type=float, default=60.0, help="Solver time limit" + ) + parser.add_argument( + "--mip-gap", type=float, default=0.01, help="MIP gap tolerance" + ) + parser.add_argument( + "--compare", action="store_true", help="Compare 1%% vs 0.1%% gap" + ) + parser.add_argument( + "--known-optimal", + type=float, + default=None, + help="Known optimal objective value (enables gap-to-optimal reporting)", + ) + args = parser.parse_args() + + print("=" * 60) + print("MPS File Solver using cuOpt") + print("=" * 60) + + # Determine MPS file to use + script_dir = os.path.dirname(os.path.abspath(__file__)) + data_dir = os.path.join(script_dir, "data") + + if args.file: + mps_file = args.file + else: + # Download air05.mps if not present + mps_file = download_air05(data_dir) + + # Use known optimal only when explicitly set or when using default air05 + known_optimal = args.known_optimal + if known_optimal is None and mps_file.endswith("air05.mps"): + known_optimal = AIR05_OPTIMAL + + if args.compare: + # Compare different gap tolerances + print(f"\nComparing MIP gap tolerances on: {mps_file}") + if known_optimal is not None: + print(f"Best known optimal: {known_optimal}") + + results = compare_gaps( + mps_file, time_limit=args.time_limit, known_optimal=known_optimal + ) + + print() + print("=" * 60) + print("COMPARISON SUMMARY") + print("=" * 60) + if known_optimal is not None: + print(f"Best known optimal: {known_optimal}") + print() + header = f"{'Gap Tolerance':<15} {'Objective':<15}" + if known_optimal is not None: + header += f" {'Gap to Optimal':<15}" + print(header) + print("-" * (45 if known_optimal is None else 60)) + + for gap, result in sorted(results.items()): + if result["objective"] is not None: + line = f"{gap * 100:.1f}%{'':<12} {result['objective']:<15.0f}" + if known_optimal is not None: + line += f" {result['gap_to_optimal']:.2f}%" + print(line) + else: + print(f"{gap * 100:.1f}%{'':<12} {'No solution':<15}") + else: + # Single solve + print(f"\nMPS File: {mps_file}") + print(f"Time Limit: {args.time_limit}s") + print(f"MIP Gap: {args.mip_gap * 100}%") + print() + + problem, solution = solve_mps( + filepath=mps_file, + time_limit=args.time_limit, + mip_gap=args.mip_gap, + verbose=True, + ) + + if solution: + print() + print("=" * 60) + print("SOLUTION") + print("=" * 60) + print(f"Status: {solution['status']}") + print(f"Objective Value: {solution['objective']:.0f}") + if known_optimal is not None: + print(f"Best Known Optimal: {known_optimal}") + print( + f"Gap to Optimal: {(solution['objective'] - known_optimal) / known_optimal * 100:.2f}%" + ) + else: + print("\nNo feasible solution found.") diff --git a/skills/cuopt-lp-milp-api-python/assets/mps_solver/results.md b/skills/cuopt-lp-milp-api-python/assets/mps_solver/results.md new file mode 100644 index 0000000000..4100dea6b2 --- /dev/null +++ b/skills/cuopt-lp-milp-api-python/assets/mps_solver/results.md @@ -0,0 +1,90 @@ +# MPS Solver Results + +## Problem: air05.mps (MIPLIB benchmark) + +**Description:** Airline crew scheduling - set partitioning problem + +### Problem Characteristics +- **Variables:** 7195 (all binary) +- **Constraints:** 426 +- **Nonzeros:** 52121 +- **Best Known Optimal:** 26374 + +--- + +## Gap Tolerance Comparison + +Comparing different MIP relative gap tolerances to show trade-off between solution quality and solve time. + +### Run Configuration +- **Time Limit:** 60 seconds +- **cuOpt Version:** 26.2.0 +- **Device:** Quadro RTX 8000 (47.24 GiB VRAM) +- **CPU:** AMD Ryzen Threadripper PRO 3975WX (32 cores) + +### Results Summary + +| Gap Tolerance | Objective | Gap to Optimal | Solve Time | Nodes Explored | +|--------------|-----------|----------------|------------|----------------| +| 0.1% | **26374** | 0.00% | 8.42s | 386 | +| 1.0% | 26491 | 0.44% | 3.23s | 328 | + +### Key Observations + +1. **Tighter gap finds optimal**: The 0.1% gap tolerance found the exact best-known optimal solution (26374) +2. **Trade-off**: The looser 1.0% gap converged faster (3.2s vs 8.4s) but with 0.44% suboptimality +3. **Both are fast**: cuOpt solved this 7195-variable MILP in under 10 seconds + +--- + +## Detailed Solver Output (0.1% gap) + +``` +Solving a problem with 426 constraints, 7195 variables (7195 integers), and 52121 nonzeros + +Presolve removed: 90 constraints, 1116 variables, 16171 nonzeros +Presolved problem: 336 constraints, 6079 variables, 35950 nonzeros + +Root relaxation objective +2.58776093e+04 + +Strong branching using 7 threads and 222 fractional variables +Explored 386 nodes in 7.73s. + +Optimal solution found within relative MIP gap tolerance (1.0e-03) +Solution objective: 26374.000000 +relative_mip_gap 0.000992 +total_solve_time 8.421934 +``` + +--- + +## Detailed Solver Output (1.0% gap) + +``` +Solving a problem with 426 constraints, 7195 variables (7195 integers), and 52121 nonzeros + +Presolve removed: 90 constraints, 1116 variables, 16171 nonzeros +Presolved problem: 336 constraints, 6079 variables, 35950 nonzeros + +Root relaxation objective +2.58776093e+04 + +Strong branching using 63 threads and 222 fractional variables +Explored 328 nodes in 1.09s. + +Optimal solution found within relative MIP gap tolerance (1.0e-02) +Solution objective: 26491.000000 +relative_mip_gap 0.009669 +total_solve_time 3.233650 +``` + +--- + +## Usage + +```bash +# Default: download air05.mps and solve with comparison +python model.py --compare --time-limit 60 + +# Solve custom MPS file +python model.py --file path/to/problem.mps --time-limit 300 --mip-gap 0.001 +``` diff --git a/skills/cuopt-qp-api-c/SKILL.md b/skills/cuopt-qp-api-c/SKILL.md new file mode 100644 index 0000000000..bc1efb63d3 --- /dev/null +++ b/skills/cuopt-qp-api-c/SKILL.md @@ -0,0 +1,19 @@ +--- +name: cuopt-qp-api-c +version: "26.04.00" +description: Quadratic Programming (QP) with cuOpt — C API. Use when the user is embedding QP in C/C++. +--- + +# cuOpt QP — C API + +Confirm the objective has squared or cross terms (QP); if purely linear, use LP/MILP. QP must be minimization. + +This skill is **C only**. + +QP uses the same cuOpt C library as LP/MILP; the API extends to quadratic objectives. Use the same include/lib paths and build pattern as for LP/MILP C (see this skill's assets/README.md); then use the QP-specific creation/solve calls from the cuOpt C headers. + +**Reference:** This skill's [assets/README.md](assets/README.md) — build pattern and repo QP C API docs. + +## Escalate + +If the problem is linear, use LP/MILP. For contribution or build-from-source, see the developer skill. diff --git a/skills/cuopt-qp-api-c/assets/README.md b/skills/cuopt-qp-api-c/assets/README.md new file mode 100644 index 0000000000..b3fcea0586 --- /dev/null +++ b/skills/cuopt-qp-api-c/assets/README.md @@ -0,0 +1,9 @@ +# Assets — QP C API reference + +QP uses the same cuOpt C library as LP/MILP; the API extends to quadratic objectives. + +**Build and run:** Use the same include/lib paths and link steps as for LP/MILP C (see repository documentation for build and examples). Then use the QP-specific creation and solve calls from the cuOpt C headers. + +**Repo docs:** `docs/cuopt/source/cuopt-c/lp-qp-milp/` for QP C API and examples; parameter constants and CSR format are in the same doc tree. + +No standalone QP C source files are included in this skill; adapt the LP/MILP C build pattern for quadratic objective APIs from the headers. diff --git a/skills/cuopt-qp-api-cli/SKILL.md b/skills/cuopt-qp-api-cli/SKILL.md new file mode 100644 index 0000000000..5f8a8e848a --- /dev/null +++ b/skills/cuopt-qp-api-cli/SKILL.md @@ -0,0 +1,37 @@ +--- +name: cuopt-qp-api-cli +version: "26.04.00" +description: QP with cuOpt — CLI (e.g. cuopt_cli with QP-capable input). Use when the user is solving QP from the command line. +--- + +# cuOpt QP — CLI + +QP objectives must be **minimization**. For maximization, negate the objective. + +This skill is **CLI only** for QP. + +## QP via CLI + +cuOpt CLI supports QP (quadratic objectives). Use the same `cuopt_cli` tool; input format and options may extend the LP/MILP MPS workflow to allow quadratic terms (see repo docs or `cuopt_cli --help` for QP-specific options). + +## Basic usage + +```bash +# Solve QP (syntax may match or extend LP/MILP CLI; check --help) +cuopt_cli problem.mps + +# With time limit +cuopt_cli problem.mps --time-limit 60 +``` + +Check `cuopt_cli --help` and the repository documentation (e.g. `docs/cuopt/source/cuopt-cli/`) for QP file format and any QP-specific flags. + +**Reference:** This skill's [assets/README.md](assets/README.md) — CLI options and repo docs. + +## Getting the CLI + +CLI is included with the Python package (`cuopt`). Install via pip or conda; then run `cuopt_cli --help` to verify. + +## Escalate + +If the problem is linear, use LP/MILP CLI. For contribution or build-from-source, see the developer skill. diff --git a/skills/cuopt-qp-api-cli/assets/README.md b/skills/cuopt-qp-api-cli/assets/README.md new file mode 100644 index 0000000000..040f03efad --- /dev/null +++ b/skills/cuopt-qp-api-cli/assets/README.md @@ -0,0 +1,9 @@ +# Assets — QP CLI reference + +QP can be solved via `cuopt_cli` when the input format supports quadratic objectives (see repo docs and `cuopt_cli --help` for QP-specific options and file format). + +**Important:** QP objectives must be **minimization**. For maximization, negate the objective. + +**Repo docs:** `docs/cuopt/source/cuopt-cli/` for QP file format and flags. For sample MPS files and CLI options (time limit, tolerances), see the repository documentation. + +No sample QP input files are included here; check documentation for quadratic term format. diff --git a/skills/cuopt-qp-api-python/SKILL.md b/skills/cuopt-qp-api-python/SKILL.md new file mode 100644 index 0000000000..b85b9e3db2 --- /dev/null +++ b/skills/cuopt-qp-api-python/SKILL.md @@ -0,0 +1,61 @@ +--- +name: cuopt-qp-api-python +version: "26.04.00" +description: Quadratic Programming (QP) with cuOpt — Python API only (beta). Use when the user is building or solving QP in Python. +--- + +# cuOpt QP — Python API (beta) + +Confirm the objective has squared or cross terms (QP); if purely linear, use LP/MILP. QP must be minimization. + +This skill is **Python only**. **QP is beta.** + +## CRITICAL: MINIMIZE only + +```python +# ❌ WRONG +problem.setObjective(x*x + y*y, sense=MAXIMIZE) + +# ✅ CORRECT — negate for maximization +problem.setObjective(-(x*x + y*y), sense=MINIMIZE) +``` + +## Portfolio Example + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MINIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +problem = Problem("Portfolio") +x1 = problem.addVariable(lb=0, ub=1, vtype=CONTINUOUS, name="stock_a") +x2 = problem.addVariable(lb=0, ub=1, vtype=CONTINUOUS, name="stock_b") +x3 = problem.addVariable(lb=0, ub=1, vtype=CONTINUOUS, name="stock_c") +r1, r2, r3 = 0.12, 0.08, 0.05 # expected returns (12%, 8%, 5%) +problem.setObjective( + 0.04*x1*x1 + 0.02*x2*x2 + 0.01*x3*x3 + 0.02*x1*x2 + 0.01*x1*x3 + 0.016*x2*x3, + sense=MINIMIZE +) +problem.addConstraint(x1 + x2 + x3 == 1, name="budget") +problem.addConstraint(r1*x1 + r2*x2 + r3*x3 >= 0.08, name="min_return") +problem.solve(SolverSettings()) +``` + +## Status (PascalCase) + +```python +if problem.Status.name in ["Optimal", "PrimalFeasible"]: + print(problem.ObjValue) +``` + +## Debugging + +**Diagnostic:** `print(f"Actual status: '{problem.Status.name}'")`. For numerical issues, check Q is PSD and variables are scaled. + +## Examples + +- [examples.md](resources/examples.md) — portfolio, least squares, maximization workaround +- **Reference models:** This skill's `assets/` — [portfolio](assets/portfolio/), [least_squares](assets/least_squares/), [maximization_workaround](assets/maximization_workaround/). See [assets/README.md](assets/README.md). + +## Escalate + +If the problem is linear (no squared or cross terms), use LP/MILP. For contribution or build-from-source, see the developer skill. diff --git a/skills/cuopt-qp-api-python/assets/README.md b/skills/cuopt-qp-api-python/assets/README.md new file mode 100644 index 0000000000..3c696f07b6 --- /dev/null +++ b/skills/cuopt-qp-api-python/assets/README.md @@ -0,0 +1,11 @@ +# Assets — reference QP models + +QP reference implementations (Python, beta). Use as reference when building new applications; do not edit in place. + +| Model | Description | +|-------|-------------| +| [portfolio](portfolio/) | Minimize portfolio variance; budget and min-return constraints | +| [least_squares](least_squares/) | Minimize (x-3)² + (y-4)² (closest point) | +| [maximization_workaround](maximization_workaround/) | Maximize quadratic via minimize -f(x) | + +**Run:** From each subdir, `python model.py`. QP is **beta** and supports **MINIMIZE** only. See [resources/examples.md](../resources/examples.md) for more. diff --git a/skills/cuopt-qp-api-python/assets/least_squares/README.md b/skills/cuopt-qp-api-python/assets/least_squares/README.md new file mode 100644 index 0000000000..5592ff2ac0 --- /dev/null +++ b/skills/cuopt-qp-api-python/assets/least_squares/README.md @@ -0,0 +1,5 @@ +# Least squares (QP) + +Minimize (x-3)² + (y-4)² — find point closest to (3, 4). Unconstrained quadratic. + +**Run:** `python model.py` diff --git a/skills/cuopt-qp-api-python/assets/least_squares/model.py b/skills/cuopt-qp-api-python/assets/least_squares/model.py new file mode 100644 index 0000000000..822d6397d2 --- /dev/null +++ b/skills/cuopt-qp-api-python/assets/least_squares/model.py @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Least squares: minimize (x-3)² + (y-4)². Solution should be x=3, y=4. +""" + +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MINIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +problem = Problem("LeastSquares") + +x = problem.addVariable(lb=-100, ub=100, vtype=CONTINUOUS, name="x") +y = problem.addVariable(lb=-100, ub=100, vtype=CONTINUOUS, name="y") + +problem.setObjective(x * x + y * y - 6 * x - 8 * y + 25, sense=MINIMIZE) + +problem.solve(SolverSettings()) + +if problem.Status.name in ["Optimal", "PrimalFeasible"]: + print(f"x = {x.getValue():.4f}") + print(f"y = {y.getValue():.4f}") +else: + print(f"Status: {problem.Status.name}") diff --git a/skills/cuopt-qp-api-python/assets/maximization_workaround/README.md b/skills/cuopt-qp-api-python/assets/maximization_workaround/README.md new file mode 100644 index 0000000000..bcd0f2c3c1 --- /dev/null +++ b/skills/cuopt-qp-api-python/assets/maximization_workaround/README.md @@ -0,0 +1,5 @@ +# Maximization workaround (QP) + +QP supports MINIMIZE only. To maximize f(x), minimize -f(x); then negate the optimal value. + +**Run:** `python model.py` diff --git a/skills/cuopt-qp-api-python/assets/maximization_workaround/model.py b/skills/cuopt-qp-api-python/assets/maximization_workaround/model.py new file mode 100644 index 0000000000..e18aa613d8 --- /dev/null +++ b/skills/cuopt-qp-api-python/assets/maximization_workaround/model.py @@ -0,0 +1,22 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Maximize -x² + 4x (max at x=2) by minimizing x² - 4x; then report -objective. +""" + +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MINIMIZE + +problem = Problem("MaxWorkaround") + +x = problem.addVariable(lb=0, ub=10, vtype=CONTINUOUS, name="x") +problem.setObjective(x * x - 4 * x, sense=MINIMIZE) + +problem.solve() + +if problem.Status.name in ["Optimal", "PrimalFeasible"]: + print(f"x = {x.getValue():.4f}") + print(f"Minimized value = {problem.ObjValue:.4f}") + print(f"Original maximum = {-problem.ObjValue:.4f}") +else: + print(f"Status: {problem.Status.name}") diff --git a/skills/cuopt-qp-api-python/assets/portfolio/README.md b/skills/cuopt-qp-api-python/assets/portfolio/README.md new file mode 100644 index 0000000000..cf2173a455 --- /dev/null +++ b/skills/cuopt-qp-api-python/assets/portfolio/README.md @@ -0,0 +1,7 @@ +# Portfolio optimization (QP) + +Minimize portfolio variance (risk) subject to fully invested (sum x = 1) and minimum return. Three assets; Q must be PSD. + +**Run:** `python model.py` + +**Note:** QP is beta; objective must be MINIMIZE. diff --git a/skills/cuopt-qp-api-python/assets/portfolio/model.py b/skills/cuopt-qp-api-python/assets/portfolio/model.py new file mode 100644 index 0000000000..0196efdcf8 --- /dev/null +++ b/skills/cuopt-qp-api-python/assets/portfolio/model.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Portfolio: minimize variance x'Qx subject to sum(x)=1, r'x >= target, x >= 0. +QP is beta; MUST use MINIMIZE. +""" + +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MINIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +problem = Problem("Portfolio") + +x1 = problem.addVariable(lb=0, ub=1, vtype=CONTINUOUS, name="stock_a") +x2 = problem.addVariable(lb=0, ub=1, vtype=CONTINUOUS, name="stock_b") +x3 = problem.addVariable(lb=0, ub=1, vtype=CONTINUOUS, name="stock_c") + +r1, r2, r3 = 0.12, 0.08, 0.05 +target_return = 0.08 + +problem.setObjective( + 0.04 * x1 * x1 + + 0.02 * x2 * x2 + + 0.01 * x3 * x3 + + 0.02 * x1 * x2 + + 0.01 * x1 * x3 + + 0.016 * x2 * x3, + sense=MINIMIZE, +) +problem.addConstraint(x1 + x2 + x3 == 1, name="budget") +problem.addConstraint( + r1 * x1 + r2 * x2 + r3 * x3 >= target_return, name="min_return" +) + +settings = SolverSettings() +settings.set_parameter("time_limit", 60) +problem.solve(settings) + +if problem.Status.name in ["Optimal", "PrimalFeasible"]: + print(f"Portfolio variance: {problem.ObjValue:.6f}") + print(f"Std dev: {problem.ObjValue**0.5:.4f}") + print(f" Stock A: {x1.getValue() * 100:.2f}%") + print(f" Stock B: {x2.getValue() * 100:.2f}%") + print(f" Stock C: {x3.getValue() * 100:.2f}%") + print( + f"Expected return: {(r1 * x1.getValue() + r2 * x2.getValue() + r3 * x3.getValue()) * 100:.2f}%" + ) +else: + print(f"Status: {problem.Status.name}") diff --git a/.github/skills/cuopt-qp/resources/python_examples.md b/skills/cuopt-qp-api-python/resources/examples.md similarity index 100% rename from .github/skills/cuopt-qp/resources/python_examples.md rename to skills/cuopt-qp-api-python/resources/examples.md diff --git a/skills/cuopt-routing-api-python/SKILL.md b/skills/cuopt-routing-api-python/SKILL.md new file mode 100644 index 0000000000..d8bf736f8f --- /dev/null +++ b/skills/cuopt-routing-api-python/SKILL.md @@ -0,0 +1,101 @@ +--- +name: cuopt-routing-api-python +version: "26.04.00" +description: Vehicle routing (VRP, TSP, PDP) with cuOpt — Python API only. Use when the user is building or solving routing in Python. +--- + +# cuOpt Routing — Python API + +Confirm problem type (TSP, VRP, PDP) and data (locations, orders, fleet, constraints) before coding. + +This skill is **Python only**. Routing has no C API in cuOpt. + +## Minimal VRP Example + +```python +import cudf +from cuopt import routing + +cost_matrix = cudf.DataFrame([...], dtype="float32") +dm = routing.DataModel(n_locations=4, n_fleet=2, n_orders=3) +dm.add_cost_matrix(cost_matrix) +dm.set_order_locations(cudf.Series([1, 2, 3], dtype="int32")) +solution = routing.Solve(dm, routing.SolverSettings()) + +if solution.get_status() == 0: + solution.display_routes() +``` + +## Adding Constraints + +```python +# Time windows +dm.add_transit_time_matrix(transit_time_matrix) +dm.set_order_time_windows(earliest_series, latest_series) + +# Capacities +dm.add_capacity_dimension("weight", demand_series, capacity_series) +dm.set_order_service_times(service_times) +dm.set_vehicle_locations(start_locations, end_locations) +dm.set_vehicle_time_windows(earliest_start, latest_return) + +# Pickup-delivery pairs +dm.set_pickup_delivery_pairs(pickup_indices, delivery_indices) + +# Precedence +dm.add_order_precedence(node_id=2, preceding_nodes=np.array([0, 1])) +``` + +## Solution Checking + +```python +status = solution.get_status() # 0=SUCCESS, 1=FAIL, 2=TIMEOUT, 3=EMPTY +if status == 0: + route_df = solution.get_route() + total_cost = solution.get_total_objective() +else: + print(solution.get_error_message()) + print(solution.get_infeasible_orders().to_list()) +``` + +## Data Types (use explicit dtypes) + +```python +cost_matrix = cost_matrix.astype("float32") +order_locations = cudf.Series([...], dtype="int32") +demand = cudf.Series([...], dtype="int32") +``` + +## Solver Settings + +```python +ss = routing.SolverSettings() +ss.set_time_limit(30) +ss.set_verbose_mode(True) +ss.set_error_logging_mode(True) +``` + +## Common Issues + +| Problem | Fix | +|---------|-----| +| Empty solution | Widen time windows or check travel times | +| Infeasible orders | Increase fleet or capacity | +| Status != 0 with time windows | Add `add_transit_time_matrix()` | +| Wrong cost | Check cost_matrix is symmetric | + +## Debugging + +**When status != 0:** `print(solution.get_error_message())` and `print(solution.get_infeasible_orders().to_list())` to see which orders are infeasible. + +**Data types:** Use explicit dtypes (float32, int32) for matrices and series to avoid silent errors. + +## Examples + +- [examples.md](resources/examples.md) — VRP, PDP, multi-depot +- [server_examples.md](resources/server_examples.md) — REST client (curl, Python) +- **Reference models:** This skill's `assets/` — [vrp_basic](assets/vrp_basic/), [pdp_basic](assets/pdp_basic/). See [assets/README.md](assets/README.md). + +## Escalate + +For contribution or build-from-source, see the developer skill. diff --git a/skills/cuopt-routing-api-python/assets/README.md b/skills/cuopt-routing-api-python/assets/README.md new file mode 100644 index 0000000000..6b7a8091c9 --- /dev/null +++ b/skills/cuopt-routing-api-python/assets/README.md @@ -0,0 +1,10 @@ +# Assets — reference routing models + +Routing reference implementations (Python). Use as reference when building new applications; do not edit in place. + +| Model | Type | Description | +|-------|------|-------------| +| [vrp_basic](vrp_basic/) | VRP | Minimal VRP: 4 locations, 1 vehicle, 3 orders | +| [pdp_basic](pdp_basic/) | PDP | Pickup-delivery pairs, capacity dimension | + +**Run:** From each subdir, `python model.py` (requires cuOpt and cudf). See [resources/examples.md](../resources/examples.md) for more patterns (time windows, multi-depot). diff --git a/skills/cuopt-routing-api-python/assets/pdp_basic/README.md b/skills/cuopt-routing-api-python/assets/pdp_basic/README.md new file mode 100644 index 0000000000..64e345bb7c --- /dev/null +++ b/skills/cuopt-routing-api-python/assets/pdp_basic/README.md @@ -0,0 +1,7 @@ +# Pickup-Delivery (PDP) + +2 pickup-delivery pairs (4 orders), 2 vehicles. Pickup must occur before delivery; capacity dimension. + +**Run:** `python model.py` + +**See also:** [resources/examples.md](../../resources/examples.md) for more PDP and VRP patterns. diff --git a/skills/cuopt-routing-api-python/assets/pdp_basic/model.py b/skills/cuopt-routing-api-python/assets/pdp_basic/model.py new file mode 100644 index 0000000000..d85ec5329b --- /dev/null +++ b/skills/cuopt-routing-api-python/assets/pdp_basic/model.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +PDP: 2 pickup-delivery pairs, 2 vehicles. Pickup before delivery; capacity dimension. +""" + +import cudf +from cuopt import routing + +cost_matrix = cudf.DataFrame( + [ + [0, 10, 20, 30, 40], + [10, 0, 15, 25, 35], + [20, 15, 0, 10, 20], + [30, 25, 10, 0, 15], + [40, 35, 20, 15, 0], + ], + dtype="float32", +) + +transit_time_matrix = cost_matrix.copy(deep=True) +n_fleet = 2 +n_orders = 4 + +order_locations = cudf.Series([1, 2, 3, 4], dtype="int32") +pickup_indices = cudf.Series([0, 2]) +delivery_indices = cudf.Series([1, 3]) +demand = cudf.Series([10, -10, 15, -15], dtype="int32") +vehicle_capacity = cudf.Series([50, 50], dtype="int32") + +dm = routing.DataModel( + n_locations=cost_matrix.shape[0], + n_fleet=n_fleet, + n_orders=n_orders, +) +dm.add_cost_matrix(cost_matrix) +dm.add_transit_time_matrix(transit_time_matrix) +dm.set_order_locations(order_locations) +dm.add_capacity_dimension("load", demand, vehicle_capacity) +dm.set_pickup_delivery_pairs(pickup_indices, delivery_indices) +dm.set_vehicle_locations( + cudf.Series([0, 0], dtype="int32"), + cudf.Series([0, 0], dtype="int32"), +) + +ss = routing.SolverSettings() +ss.set_time_limit(10) +solution = routing.Solve(dm, ss) + +print(f"Status: {solution.get_status()}") +if solution.get_status() == 0: + solution.display_routes() + print(f"Total cost: {solution.get_total_objective()}") +else: + print(solution.get_error_message()) diff --git a/skills/cuopt-routing-api-python/assets/vrp_basic/README.md b/skills/cuopt-routing-api-python/assets/vrp_basic/README.md new file mode 100644 index 0000000000..cdb2890269 --- /dev/null +++ b/skills/cuopt-routing-api-python/assets/vrp_basic/README.md @@ -0,0 +1,7 @@ +# Minimal VRP + +4 locations (depot 0 + 3 customers), 1 vehicle, 3 orders. Cost matrix only; no time windows or capacity. + +**Run:** `python model.py` + +**See also:** [resources/examples.md](../../resources/examples.md) for VRP with time windows, capacity, and multi-depot. diff --git a/skills/cuopt-routing-api-python/assets/vrp_basic/model.py b/skills/cuopt-routing-api-python/assets/vrp_basic/model.py new file mode 100644 index 0000000000..165f6afc1e --- /dev/null +++ b/skills/cuopt-routing-api-python/assets/vrp_basic/model.py @@ -0,0 +1,31 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +Minimal VRP: 4 locations, 1 vehicle, 3 orders. Cost matrix only. +""" + +import cudf +from cuopt import routing + +cost_matrix = cudf.DataFrame( + [ + [0, 10, 15, 20], + [10, 0, 12, 18], + [15, 12, 0, 10], + [20, 18, 10, 0], + ], + dtype="float32", +) + +dm = routing.DataModel(n_locations=4, n_fleet=1, n_orders=3) +dm.add_cost_matrix(cost_matrix) +dm.set_order_locations(cudf.Series([1, 2, 3], dtype="int32")) + +solution = routing.Solve(dm, routing.SolverSettings()) + +if solution.get_status() == 0: + solution.display_routes() + print(f"Total cost: {solution.get_total_objective()}") +else: + print(f"Status: {solution.get_status()}", solution.get_error_message()) diff --git a/.github/skills/cuopt-routing/resources/python_examples.md b/skills/cuopt-routing-api-python/resources/examples.md similarity index 96% rename from .github/skills/cuopt-routing/resources/python_examples.md rename to skills/cuopt-routing-api-python/resources/examples.md index 4ff694d35a..ee402bb314 100644 --- a/.github/skills/cuopt-routing/resources/python_examples.md +++ b/skills/cuopt-routing-api-python/resources/examples.md @@ -28,7 +28,7 @@ cost_matrix = cudf.DataFrame([ transit_time_matrix = cost_matrix.copy(deep=True) # Order data (customers 1-5) -order_locations = cudf.Series([1, 2, 3, 4, 5]) # Location indices for orders +order_locations = cudf.Series([1, 2, 3, 4, 5], dtype="int32") # Location indices for orders # Demand at each customer (single capacity dimension) demand = cudf.Series([20, 30, 25, 15, 35], dtype="int32") @@ -130,7 +130,7 @@ n_fleet = 2 n_orders = 4 # 2 pickup-delivery pairs = 4 orders # Orders: pickup at loc 1 -> deliver at loc 2, pickup at loc 3 -> deliver at loc 4 -order_locations = cudf.Series([1, 2, 3, 4]) +order_locations = cudf.Series([1, 2, 3, 4], dtype="int32") # Pickup and delivery pairs (indices into order array) # Order 0 (pickup) pairs with Order 1 (delivery) @@ -191,7 +191,7 @@ cost_matrix = cudf.DataFrame([ dm = routing.DataModel(n_locations=4, n_fleet=1, n_orders=3) dm.add_cost_matrix(cost_matrix) -dm.set_order_locations(cudf.Series([1, 2, 3])) +dm.set_order_locations(cudf.Series([1, 2, 3], dtype="int32")) solution = routing.Solve(dm, routing.SolverSettings()) @@ -219,7 +219,7 @@ n_fleet = 2 dm = routing.DataModel(n_locations=6, n_fleet=n_fleet, n_orders=4) dm.add_cost_matrix(cost_matrix) -dm.set_order_locations(cudf.Series([2, 3, 4, 5])) +dm.set_order_locations(cudf.Series([2, 3, 4, 5], dtype="int32")) # Vehicle 0 starts/ends at depot 0, Vehicle 1 at depot 1 dm.set_vehicle_locations( diff --git a/.github/skills/cuopt-routing/resources/server_examples.md b/skills/cuopt-routing-api-python/resources/server_examples.md similarity index 100% rename from .github/skills/cuopt-routing/resources/server_examples.md rename to skills/cuopt-routing-api-python/resources/server_examples.md diff --git a/skills/cuopt-server-api-python/SKILL.md b/skills/cuopt-server-api-python/SKILL.md new file mode 100644 index 0000000000..b340e9883f --- /dev/null +++ b/skills/cuopt-server-api-python/SKILL.md @@ -0,0 +1,80 @@ +--- +name: cuopt-server-api-python +version: "26.04.00" +description: cuOpt REST server — start server, endpoints, Python/curl client examples. Use when the user is deploying or calling the REST API. +--- + +# cuOpt Server — Deploy and client (Python/curl) + +This skill covers **starting the server** and **client examples** (curl, Python). Server has no separate C API (clients can be any language). + +## Start server + +```bash +# Development +python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000 + +# Docker +docker run --gpus all -d -p 8000:8000 -e CUOPT_SERVER_PORT=8000 \ + nvidia/cuopt:latest-cuda12.9-py3.13 +``` + +## Verify + +```bash +curl http://localhost:8000/cuopt/health +``` + +## Workflow + +1. POST to `/cuopt/request` → get `reqId` +2. Poll `/cuopt/solution/{reqId}` until solution ready +3. Parse response + +## Python client (routing) + +```python +import requests, time +SERVER = "http://localhost:8000" +HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} +payload = { + "cost_matrix_data": {"data": {"0": [[0,10,15],[10,0,12],[15,12,0]]}}, + "travel_time_matrix_data": {"data": {"0": [[0,10,15],[10,0,12],[15,12,0]]}}, + "task_data": {"task_locations": [1, 2], "demand": [[10, 20]], "task_time_windows": [[0,100],[0,100]], "service_times": [5, 5]}, + "fleet_data": {"vehicle_locations": [[0, 0]], "capacities": [[50]], "vehicle_time_windows": [[0, 200]]}, + "solver_config": {"time_limit": 5} +} +r = requests.post(f"{SERVER}/cuopt/request", json=payload, headers=HEADERS) +req_id = r.json()["reqId"] +# Poll: GET /cuopt/solution/{req_id} +``` + +## Terminology: REST vs Python API + +| Python API | REST | +|------------|------| +| order_locations | task_locations | +| set_order_time_windows() | task_time_windows | +| service_times | service_times | + +Use `travel_time_matrix_data` (not transit_time_matrix_data). Capacities: `[[50, 50]]` not `[[50], [50]]`. + +## Debugging (422 / payload) + +**Validation errors:** Check field names against OpenAPI (`/cuopt.yaml`). Common mistakes: `transit_time_matrix_data` → `travel_time_matrix_data`; capacities per dimension `[[50, 50]]` not per vehicle `[[50], [50]]`. Capture `reqId` and response body for failed requests. + +## Runnable assets + +Run from each asset directory (server must be running; scripts exit 0 if server unreachable). All use Python `requests`: + +- [assets/vrp_simple/](assets/vrp_simple/) — Basic VRP (no time windows) +- [assets/vrp_basic/](assets/vrp_basic/) — VRP with time windows +- [assets/pdp_basic/](assets/pdp_basic/) — Pickup and delivery +- [assets/lp_basic/](assets/lp_basic/) — LP via REST (CSR format) +- [assets/milp_basic/](assets/milp_basic/) — MILP via REST + +See [assets/README.md](assets/README.md) for overview. + +## Escalate + +For contribution or build-from-source, see the developer skill. diff --git a/skills/cuopt-server-api-python/assets/README.md b/skills/cuopt-server-api-python/assets/README.md new file mode 100644 index 0000000000..1389f3eb7b --- /dev/null +++ b/skills/cuopt-server-api-python/assets/README.md @@ -0,0 +1,14 @@ +# Server API Python — runnable assets + +REST client examples (Python requests). Each runs against a cuOpt server; if the server is not reachable, the script exits 0 (skip). + +| Asset | Description | +|---------------|-------------| +| `vrp_simple/` | Basic VRP (no time windows) | +| `vrp_basic/` | VRP with time windows | +| `pdp_basic/` | Pickup and delivery (pairs) | +| `lp_basic/` | LP (CSR format) | +| `milp_basic/` | MILP (integer + continuous variables) | + +Start server: `python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000` +Env: `CUOPT_SERVER_URL` (default `http://localhost:8000`). diff --git a/skills/cuopt-server-api-python/assets/lp_basic/README.md b/skills/cuopt-server-api-python/assets/lp_basic/README.md new file mode 100644 index 0000000000..34c10fb350 --- /dev/null +++ b/skills/cuopt-server-api-python/assets/lp_basic/README.md @@ -0,0 +1,10 @@ +# LP via REST (maximize 40x + 30y) + +Submit an LP to the cuOpt server (CSR format) and poll for the solution. + +**Requires:** cuOpt server running (e.g. `python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000`). + +**Run:** `python client.py` +If the server is not reachable, the script exits 0 (skip). + +**Env:** `CUOPT_SERVER_URL` (default `http://localhost:8000`). diff --git a/skills/cuopt-server-api-python/assets/lp_basic/client.py b/skills/cuopt-server-api-python/assets/lp_basic/client.py new file mode 100644 index 0000000000..bca7b15295 --- /dev/null +++ b/skills/cuopt-server-api-python/assets/lp_basic/client.py @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +REST client: LP request (maximize 40x + 30y s.t. 2x+3y<=240, 4x+2y<=200). Requires cuOpt server running. + +Usage: python client.py + Set CUOPT_SERVER_URL (default http://localhost:8000). Exits 0 if server unreachable (e.g. in CI without server). +""" + +import os +import sys +import time + +import requests + +SERVER = os.environ.get("CUOPT_SERVER_URL", "http://localhost:8000") +HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} + + +def server_ok(): + try: + r = requests.get(f"{SERVER}/cuopt/health", timeout=2) + return r.status_code == 200 + except Exception: + return False + + +def main(): + if not server_ok(): + print( + "Server not running, skipping. Start with: python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000" + ) + sys.exit(0) + + payload = { + "csr_constraint_matrix": { + "offsets": [0, 2, 4], + "indices": [0, 1, 0, 1], + "values": [2.0, 3.0, 4.0, 2.0], + }, + "constraint_bounds": { + "upper_bounds": [240.0, 200.0], + "lower_bounds": ["ninf", "ninf"], + }, + "objective_data": { + "coefficients": [40.0, 30.0], + }, + "variable_bounds": { + "upper_bounds": ["inf", "inf"], + "lower_bounds": [0.0, 0.0], + }, + "maximize": True, + "solver_config": { + "time_limit": 60, + }, + } + + response = requests.post( + f"{SERVER}/cuopt/request", json=payload, headers=HEADERS + ) + response.raise_for_status() + req_id = response.json()["reqId"] + print(f"Submitted: {req_id}") + + for _ in range(30): + response = requests.get( + f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS + ) + result = response.json() + + if "response" in result: + print(f"Status: {result['response'].get('status')}") + print(f"Objective: {result['response'].get('objective_value')}") + print(f"Solution: {result['response'].get('primal_solution')}") + return + time.sleep(1) + + print("Timeout waiting for solution") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-server-api-python/assets/milp_basic/README.md b/skills/cuopt-server-api-python/assets/milp_basic/README.md new file mode 100644 index 0000000000..e490840557 --- /dev/null +++ b/skills/cuopt-server-api-python/assets/milp_basic/README.md @@ -0,0 +1,6 @@ +# MILP via REST + +Same problem as LP (maximize 40x + 30y, 2x+3y≤240, 4x+2y≤200) with `variable_types`: first variable integer, second continuous. + +**Requires:** cuOpt server running. **Run:** `python client.py` (exits 0 if server unreachable). +**Env:** `CUOPT_SERVER_URL` (default `http://localhost:8000`). Variable types: `continuous`, `integer`, `binary`. diff --git a/skills/cuopt-server-api-python/assets/milp_basic/client.py b/skills/cuopt-server-api-python/assets/milp_basic/client.py new file mode 100644 index 0000000000..1c18de60e9 --- /dev/null +++ b/skills/cuopt-server-api-python/assets/milp_basic/client.py @@ -0,0 +1,82 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +REST client: MILP (same constraints as LP but variable_types: integer, continuous). +Requires cuOpt server running. Exits 0 if server unreachable. +""" + +import os +import sys +import time + +import requests + +SERVER = os.environ.get("CUOPT_SERVER_URL", "http://localhost:8000") +HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} + + +def server_ok(): + try: + r = requests.get(f"{SERVER}/cuopt/health", timeout=2) + return r.status_code == 200 + except Exception: + return False + + +def main(): + if not server_ok(): + print( + "Server not running, skipping. Start with: python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000" + ) + sys.exit(0) + + payload = { + "csr_constraint_matrix": { + "offsets": [0, 2, 4], + "indices": [0, 1, 0, 1], + "values": [2.0, 3.0, 4.0, 2.0], + }, + "constraint_bounds": { + "upper_bounds": [240.0, 200.0], + "lower_bounds": ["ninf", "ninf"], + }, + "objective_data": {"coefficients": [40.0, 30.0]}, + "variable_bounds": { + "upper_bounds": ["inf", "inf"], + "lower_bounds": [0.0, 0.0], + }, + "variable_types": ["integer", "continuous"], + "maximize": True, + "solver_config": { + "time_limit": 120, + "tolerances": {"mip_relative_gap": 0.01}, + }, + } + + response = requests.post( + f"{SERVER}/cuopt/request", json=payload, headers=HEADERS + ) + response.raise_for_status() + req_id = response.json()["reqId"] + print(f"Submitted: {req_id}") + + for _ in range(60): + response = requests.get( + f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS + ) + result = response.json() + + if "response" in result: + print(f"Status: {result['response'].get('status')}") + print(f"Objective: {result['response'].get('objective_value')}") + print(f"Solution: {result['response'].get('primal_solution')}") + return + time.sleep(1) + + print("Timeout waiting for solution") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-server-api-python/assets/pdp_basic/README.md b/skills/cuopt-server-api-python/assets/pdp_basic/README.md new file mode 100644 index 0000000000..ca6c174c6c --- /dev/null +++ b/skills/cuopt-server-api-python/assets/pdp_basic/README.md @@ -0,0 +1,6 @@ +# Pickup and delivery (PDP) + +Pickup-delivery pairs: (0,1) and (2,3). Pickup must be visited before the corresponding delivery. + +**Requires:** cuOpt server running. **Run:** `python client.py` (exits 0 if server unreachable). +**Env:** `CUOPT_SERVER_URL` (default `http://localhost:8000`). diff --git a/skills/cuopt-server-api-python/assets/pdp_basic/client.py b/skills/cuopt-server-api-python/assets/pdp_basic/client.py new file mode 100644 index 0000000000..cad4d3bdb1 --- /dev/null +++ b/skills/cuopt-server-api-python/assets/pdp_basic/client.py @@ -0,0 +1,97 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +REST client: Pickup and delivery (PDP). Pairs (0,1) and (2,3); pickup before delivery. +Requires cuOpt server running. Exits 0 if server unreachable. +""" + +import os +import sys +import time + +import requests + +SERVER = os.environ.get("CUOPT_SERVER_URL", "http://localhost:8000") +HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} + + +def server_ok(): + try: + r = requests.get(f"{SERVER}/cuopt/health", timeout=2) + return r.status_code == 200 + except Exception: + return False + + +def main(): + if not server_ok(): + print( + "Server not running, skipping. Start with: python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000" + ) + sys.exit(0) + + payload = { + "cost_matrix_data": { + "data": { + "0": [ + [0, 10, 20, 30, 40], + [10, 0, 15, 25, 35], + [20, 15, 0, 10, 20], + [30, 25, 10, 0, 15], + [40, 35, 20, 15, 0], + ] + } + }, + "travel_time_matrix_data": { + "data": { + "0": [ + [0, 10, 20, 30, 40], + [10, 0, 15, 25, 35], + [20, 15, 0, 10, 20], + [30, 25, 10, 0, 15], + [40, 35, 20, 15, 0], + ] + } + }, + "task_data": { + "task_locations": [1, 2, 3, 4], + "demand": [[10, -10, 15, -15]], + "pickup_and_delivery_pairs": [[0, 1], [2, 3]], + }, + "fleet_data": { + "vehicle_locations": [[0, 0]], + "capacities": [[50]], + }, + "solver_config": {"time_limit": 10}, + } + + response = requests.post( + f"{SERVER}/cuopt/request", json=payload, headers=HEADERS + ) + response.raise_for_status() + req_id = response.json()["reqId"] + print(f"Submitted: {req_id}") + + for _ in range(30): + response = requests.get( + f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS + ) + result = response.json() + + if "response" in result: + solver_response = result["response"].get("solver_response", {}) + print(f"Status: {solver_response.get('status')}") + print(f"Cost: {solver_response.get('solution_cost')}") + if "vehicle_data" in solver_response: + for vid, vdata in solver_response["vehicle_data"].items(): + print(f"Vehicle {vid}: {vdata.get('route', [])}") + return + time.sleep(1) + + print("Timeout waiting for solution") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-server-api-python/assets/vrp_basic/README.md b/skills/cuopt-server-api-python/assets/vrp_basic/README.md new file mode 100644 index 0000000000..84b46f7240 --- /dev/null +++ b/skills/cuopt-server-api-python/assets/vrp_basic/README.md @@ -0,0 +1,10 @@ +# VRP with time windows (REST client) + +Submit a VRP with time windows to the cuOpt server and poll for the solution. + +**Requires:** cuOpt server running (e.g. `python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000`). + +**Run:** `python client.py` +If the server is not reachable, the script exits 0 (skip). + +**Env:** `CUOPT_SERVER_URL` (default `http://localhost:8000`). diff --git a/skills/cuopt-server-api-python/assets/vrp_basic/client.py b/skills/cuopt-server-api-python/assets/vrp_basic/client.py new file mode 100644 index 0000000000..9285eb05cd --- /dev/null +++ b/skills/cuopt-server-api-python/assets/vrp_basic/client.py @@ -0,0 +1,101 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +REST client: VRP with time windows. Requires cuOpt server running. + +Usage: python client.py + Set CUOPT_SERVER_URL (default http://localhost:8000). Exits 0 if server unreachable (e.g. in CI without server). +""" + +import os +import sys +import time + +import requests + +SERVER = os.environ.get("CUOPT_SERVER_URL", "http://localhost:8000") +HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} + + +def server_ok(): + try: + r = requests.get(f"{SERVER}/cuopt/health", timeout=2) + return r.status_code == 200 + except Exception: + return False + + +def main(): + if not server_ok(): + print( + "Server not running, skipping. Start with: python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000" + ) + sys.exit(0) + + payload = { + "cost_matrix_data": { + "data": { + "0": [ + [0, 10, 15, 20, 25], + [10, 0, 12, 18, 22], + [15, 12, 0, 10, 15], + [20, 18, 10, 0, 8], + [25, 22, 15, 8, 0], + ] + } + }, + "travel_time_matrix_data": { + "data": { + "0": [ + [0, 10, 15, 20, 25], + [10, 0, 12, 18, 22], + [15, 12, 0, 10, 15], + [20, 18, 10, 0, 8], + [25, 22, 15, 8, 0], + ] + } + }, + "task_data": { + "task_locations": [1, 2, 3, 4], + "demand": [[20, 30, 25, 15]], + "task_time_windows": [[0, 50], [10, 60], [20, 70], [0, 80]], + "service_times": [5, 5, 5, 5], + }, + "fleet_data": { + "vehicle_locations": [[0, 0], [0, 0]], + "capacities": [[100, 100]], + "vehicle_time_windows": [[0, 200], [0, 200]], + }, + "solver_config": {"time_limit": 10}, + } + + response = requests.post( + f"{SERVER}/cuopt/request", json=payload, headers=HEADERS + ) + response.raise_for_status() + req_id = response.json()["reqId"] + print(f"Submitted: {req_id}") + + for _ in range(30): + response = requests.get( + f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS + ) + result = response.json() + + if "response" in result: + solver_response = result["response"].get("solver_response", {}) + print(f"Status: {solver_response.get('status')}") + print(f"Cost: {solver_response.get('solution_cost')}") + if "vehicle_data" in solver_response: + for vid, vdata in solver_response["vehicle_data"].items(): + print(f"Vehicle {vid}: {vdata.get('route', [])}") + return + time.sleep(1) + + print("Timeout waiting for solution") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-server-api-python/assets/vrp_simple/README.md b/skills/cuopt-server-api-python/assets/vrp_simple/README.md new file mode 100644 index 0000000000..f9de54a24c --- /dev/null +++ b/skills/cuopt-server-api-python/assets/vrp_simple/README.md @@ -0,0 +1,6 @@ +# Basic VRP (no time windows) + +Simple VRP: 4 locations, 3 tasks, 2 vehicles. No time windows. + +**Requires:** cuOpt server running. **Run:** `python client.py` (exits 0 if server unreachable). +**Env:** `CUOPT_SERVER_URL` (default `http://localhost:8000`). diff --git a/skills/cuopt-server-api-python/assets/vrp_simple/client.py b/skills/cuopt-server-api-python/assets/vrp_simple/client.py new file mode 100644 index 0000000000..35f37f5c72 --- /dev/null +++ b/skills/cuopt-server-api-python/assets/vrp_simple/client.py @@ -0,0 +1,95 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +""" +REST client: Basic VRP (no time windows). 4 locations, 3 tasks, 2 vehicles. +Requires cuOpt server running. Exits 0 if server unreachable. +""" + +import os +import sys +import time + +import requests + +SERVER = os.environ.get("CUOPT_SERVER_URL", "http://localhost:8000") +HEADERS = {"Content-Type": "application/json", "CLIENT-VERSION": "custom"} + + +def server_ok(): + try: + r = requests.get(f"{SERVER}/cuopt/health", timeout=2) + return r.status_code == 200 + except Exception: + return False + + +def main(): + if not server_ok(): + print( + "Server not running, skipping. Start with: python -m cuopt_server.cuopt_service --ip 0.0.0.0 --port 8000" + ) + sys.exit(0) + + payload = { + "cost_matrix_data": { + "data": { + "0": [ + [0, 10, 15, 20], + [10, 0, 12, 18], + [15, 12, 0, 10], + [20, 18, 10, 0], + ] + } + }, + "travel_time_matrix_data": { + "data": { + "0": [ + [0, 10, 15, 20], + [10, 0, 12, 18], + [15, 12, 0, 10], + [20, 18, 10, 0], + ] + } + }, + "task_data": { + "task_locations": [1, 2, 3], + "demand": [[10, 15, 20]], + "service_times": [5, 5, 5], + }, + "fleet_data": { + "vehicle_locations": [[0, 0], [0, 0]], + "capacities": [[50, 50]], + }, + "solver_config": {"time_limit": 5}, + } + + response = requests.post( + f"{SERVER}/cuopt/request", json=payload, headers=HEADERS + ) + response.raise_for_status() + req_id = response.json()["reqId"] + print(f"Submitted: {req_id}") + + for _ in range(30): + response = requests.get( + f"{SERVER}/cuopt/solution/{req_id}", headers=HEADERS + ) + result = response.json() + + if "response" in result: + solver_response = result["response"].get("solver_response", {}) + print(f"Status: {solver_response.get('status')}") + print(f"Cost: {solver_response.get('solution_cost')}") + if "vehicle_data" in solver_response: + for vid, vdata in solver_response["vehicle_data"].items(): + print(f"Vehicle {vid}: {vdata.get('route', [])}") + return + time.sleep(1) + + print("Timeout waiting for solution") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/skills/cuopt-server-common/SKILL.md b/skills/cuopt-server-common/SKILL.md new file mode 100644 index 0000000000..f23c9c4a5f --- /dev/null +++ b/skills/cuopt-server-common/SKILL.md @@ -0,0 +1,46 @@ +--- +name: cuopt-server-common +version: "26.04.00" +description: cuOpt REST server — what it does and how requests flow. Domain concepts; no deploy or client code. +--- + +# cuOpt Server (common) + +Domain concepts for the cuOpt REST server. No deploy commands or client code here. + +## What the server does + +- Accepts optimization requests (routing, LP, MILP) over HTTP. +- Returns a request ID; solution is obtained by polling with that ID. +- Does **not** support QP via REST. + +## Problem types supported + +| Problem type | Supported | +|--------------|:---------:| +| Routing | ✓ | +| LP | ✓ | +| MILP | ✓ | +| QP | ✗ | + +## Request flow (conceptual) + +1. Client sends problem data in the required schema (matrices, tasks, fleet, solver config). +2. Server returns a `reqId`. +3. Client polls the solution endpoint with `reqId` until the job completes. +4. Response contains status and, on success, solution (routes, objective, primal values, etc.). + +## Required questions (deployment and usage) + +Ask these if not already clear: + +1. **Problem type** — Routing or LP/MILP? (QP not available.) +2. **Deployment** — Local, Docker, Kubernetes, or cloud? +3. **Client** — Which language or tool will call the API (e.g. Python, curl, another service)? + +## Key endpoints (conceptual) + +- Health check. +- Submit request (POST). +- Get solution by request ID (GET). +- OpenAPI spec (e.g. for payload format). diff --git a/.github/skills/cuopt-user-rules/SKILL.md b/skills/cuopt-user-rules/SKILL.md similarity index 67% rename from .github/skills/cuopt-user-rules/SKILL.md rename to skills/cuopt-user-rules/SKILL.md index a6c870c10d..7ca291ac9b 100644 --- a/.github/skills/cuopt-user-rules/SKILL.md +++ b/skills/cuopt-user-rules/SKILL.md @@ -1,6 +1,7 @@ --- name: cuopt-user-rules -description: Base behavior rules for using NVIDIA cuOpt. Read this FIRST before any cuOpt user task (routing, LP/MILP, QP, debugging, installation, server). Covers handling incomplete questions, clarifying data requirements, verifying understanding, and running commands safely. +version: "26.04.00" +description: Base behavior rules for using NVIDIA cuOpt. Read this FIRST before any cuOpt user task (routing, LP/MILP, QP, installation, server). Covers handling incomplete questions, clarifying data requirements, verifying understanding, and running commands safely. --- # cuOpt User Rules @@ -9,14 +10,14 @@ description: Base behavior rules for using NVIDIA cuOpt. Read this FIRST before --- -## 1. Ask Before Assuming +## Ask Before Assuming **Always clarify ambiguous requirements before implementing:** -- What interface? (Python API / REST Server / C API / CLI) -- What problem type? (Routing / LP / MILP / QP) -- What constraints matter? (time windows, capacities, etc.) -- What output format? (solution values, routes, visualization) +- What **language/interface**? +- What problem type? +- What constraints matter? +- What output format? **Skip asking only if:** - User explicitly stated the requirement @@ -24,7 +25,7 @@ description: Base behavior rules for using NVIDIA cuOpt. Read this FIRST before --- -## 2. Handle Incomplete Questions +## Handle Incomplete Questions **If a question seems partial or incomplete, ask follow-up questions:** @@ -42,7 +43,7 @@ description: Base behavior rules for using NVIDIA cuOpt. Read this FIRST before --- -## 3. Clarify Data Requirements +## Clarify Data Requirements **Before generating examples, ask about data:** @@ -69,7 +70,7 @@ description: Base behavior rules for using NVIDIA cuOpt. Read this FIRST before --- -## 4. MUST Verify Understanding +## MUST Verify Understanding **Before writing substantial code, you MUST confirm your understanding:** @@ -84,7 +85,7 @@ Is this correct?" --- -## 5. Follow Requirements Exactly +## Follow Requirements Exactly - Use the **exact** variable names, formats, and structures the user specifies - Don't add features the user didn't ask for @@ -93,23 +94,7 @@ Is this correct?" --- -## 6. Read Examples First - -Before generating code, **read the canonical example** for that problem type: - -| Problem | Example Location | -|---------|------------------| -| Routing | `docs/cuopt/source/cuopt-python/routing/examples/` | -| LP/MILP | `docs/cuopt/source/cuopt-python/lp-qp-milp/examples/` | -| QP | `docs/cuopt/source/cuopt-python/lp-qp-milp/examples/simple_qp_example.py` | -| Server | `docs/cuopt/source/cuopt_spec.yaml` (OpenAPI) | -| C API | `docs/cuopt/source/cuopt-c/lp-qp-milp/examples/` | - -**Don't invent API patterns.** Copy from examples. - ---- - -## 7. Check Results +## Check Results After providing a solution, guide the user to verify: @@ -117,11 +102,20 @@ After providing a solution, guide the user to verify: - **Constraint satisfaction**: Are all constraints met? - **Objective value**: Is it reasonable for the problem? +**Always end with a Result summary** that includes at least: +- Solver status (e.g. Optimal, FeasibleFound, SUCCESS). +- **Objective value with highlight** — easy to spot (bold or code block). Example: **Objective value (min total cost):** <value> or `Objective value: `. +- Briefly what the objective represents (e.g. total cost, total profit). + +Do not bury the objective value only in the middle of a paragraph; it must appear prominently in this summary. Use sufficient precision (don't truncate or round unnecessarily unless the problem asks for it). + +**Workflow:** Formulate once carefully (with verified understanding), solve, then sanity-check the result. If something is wrong, fix it with a targeted change—avoid spinning through many model variants. Decide, implement, verify, then move on. + Provide diagnostic code snippets when helpful. --- -## 8. Check Environment First +## Check Environment First **Before writing code or suggesting installation, verify the user's setup:** @@ -129,16 +123,16 @@ Provide diagnostic code snippets when helpful. - "Do you have cuOpt installed? If so, which interface?" - "What environment are you using? (local GPU, cloud, Docker, server, etc.)" -2. **Different packages for different interfaces:** +2. **Different packages by language/interface:** - | Interface | Package | Check | - |-----------|---------|-------| - | Python API | `cuopt` (pip/conda) | `import cuopt` | - | C API | `libcuopt` (conda/system) | `find libcuopt.so` or header check | + | Language / Interface | Package | Check | + |----------------------|---------|-------| + | **Python** | `cuopt` (pip/conda) | `import cuopt` | + | **C** | `libcuopt` (conda/system) | `find libcuopt.so` or header check | | REST Server | `cuopt-server` or Docker | `curl /cuopt/health` | | CLI | `cuopt` package includes CLI | `cuopt_cli --help` | - **Note:** `libcuopt` (C library) installed via conda is NOT available through Python import — they are separate packages. + **Note:** `libcuopt` (C library) is separate from the Python package — C and Python use different installs. 3. **If not installed, ask how they want to access:** - "Would you like help installing cuOpt, or do you have access another way?" @@ -167,14 +161,14 @@ Provide diagnostic code snippets when helpful. --- -## 9. Ask Before Running +## Ask Before Running **Do not execute commands or code without explicit permission:** | Action | Rule | |--------|------| | Shell commands | Show command, explain what it does, ask "Should I run this?" | -| Package installs | **Never** run `pip`, `conda`, `apt` without asking first | +| Package installs | **Never** run installs yourself — give the exact command, user runs it (see below). | | Examples/scripts | Show the code first, ask "Would you like me to run this?" | | File writes | Explain what will change, ask before writing | @@ -184,7 +178,7 @@ Provide diagnostic code snippets when helpful. --- -## 10. No Privileged Operations +## No Privileged Operations **Never do these without explicit user request AND confirmation:** @@ -196,6 +190,18 @@ Provide diagnostic code snippets when helpful. --- +## Never Install Packages Automatically + +> **🔒 MANDATORY — You MUST NOT install, upgrade, or modify packages.** Provide the exact command; the user runs it. No exceptions. + +| Forbidden | What to do instead | +|-----------|--------------------| +| `pip install ...`, `conda install ...`, `apt install ...`, any package manager | Give the exact command and ask the user to run it. Say why the package is needed. | + +**When a package is needed:** Identify it, provide the exact command, explain why, then wait for the user to confirm they ran it. Even if the user says "just install it", give the command and require them to execute it themselves. + +--- + ## Resources ### Documentation diff --git a/skills/lp-milp-formulation/SKILL.md b/skills/lp-milp-formulation/SKILL.md new file mode 100644 index 0000000000..c0df08f45c --- /dev/null +++ b/skills/lp-milp-formulation/SKILL.md @@ -0,0 +1,128 @@ +--- +name: lp-milp-formulation +version: "26.04.00" +description: LP/MILP concepts and going from problem text to formulation. What LP/MILP are, required formulation questions, typical modeling elements, and how to parse problem statements (parameters, constraints, decisions, objective). +--- + +# LP/MILP Formulation + +Concepts and workflow for going from a problem description to a clear formulation. No API code here. + +## What is LP / MILP + +- **LP**: Linear objective, linear constraints, continuous variables. +- **MILP**: Same plus some integer or binary variables (e.g. scheduling, facility location, selection). + +## Required questions (problem formulation) + +Ask these if not already clear: + +1. **Decision variables** — What are they? Bounds? +2. **Objective** — Minimize or maximize? Linear expression in the variables? +3. **Constraints** — Linear inequalities/equalities? Names and meaning? +4. **Variable types** — All continuous (LP) or some integer/binary (MILP)? + +## Typical modeling elements + +- **Continuous variables** — production amounts, flow, etc. +- **Binary variables** — open/close, yes/no (e.g. facility open, item selected). +- **Linking constraints** — e.g. production only if facility open (Big-M or indicator). +- **Resource constraints** — linear cap on usage (materials, time, capacity). + +--- + +## Problem statement parsing + +When the user gives **problem text**, classify every sentence and then summarize before formulating. + +**Classify every sentence** as **parameter/given**, **constraint**, **decision**, or **objective**. Watch for **implicit constraints** (e.g. committed vs optional phrasing) and **implicit objectives** (e.g. "determine the plan" + costs → minimize total cost). + +**Ambiguity:** If anything is still ambiguous, ask the user or solve all plausible interpretations and report all outcomes; do not assume a single interpretation. + +### 🔒 MANDATORY: When in Doubt — Ask + +- If there is **any doubt** about whether a constraint or value should be included, **ask the user** and state the possible interpretations. + +### 🔒 MANDATORY: Complete-Path Runs — Try All Variants + +- When the user asks to **run the complete path** (e.g. end-to-end, full pipeline), run all plausible variants and **report all outcomes** so the user can choose; do not assume a single interpretation. + +### Three labels + +| Label | Meaning | Examples (sentence type) | +|-------|--------|---------------------------| +| **Parameter / given** | Fixed data, inputs, facts. Not chosen by the model. | "Demand is 100 units." "There are 3 factories." "Costs are $5 per unit." | +| **Constraint** | Something that must hold. May be explicit or **implicit** from phrasing. | "Capacity is 200." "All demand must be met." "At least 2 shifts must be staffed." | +| **Decision** | Something we choose or optimize. | "How much to produce." "Which facilities to open." "How many workers to hire." | +| **Objective** | What to minimize or maximize. May be **explicit** ("minimize cost") or **implicit** ("determine the plan" with costs given). | "Minimize total cost." "Determine the production plan" (with costs) → minimize total cost. | + +### Implicit constraints: committed vs optional phrasing + +**Committed/fixed phrasing** → treat as **parameter** or **implicit constraint** (everything mentioned is given or must happen). Not a decision. + +| Phrasing | Interpretation | Why | +|----------|-----------------|-----| +| "Plans to produce X products" | **Constraint**: all X must be produced. | Commitment; production level is fixed. | +| "Operates 3 factories" | **Parameter**: all 3 are open. Not a location-selection problem. | Current state is fixed. | +| "Employs N workers" | **Parameter**: all N are employed. Not a hiring decision. | Workforce size is given. | +| "Has a capacity of C" | **Parameter** (C) + **constraint**: usage ≤ C. | Capacity is fixed. | +| "Must meet all demand" | **Constraint**: demand satisfaction. | Explicit requirement. | + +**Optional/decision phrasing** → treat as **decision**. + +| Phrasing | Interpretation | Why | +|----------|-----------------|-----| +| "May produce up to …" | **Decision**: how much to produce. | Optional level. | +| "Can choose to open" (factories, sites) | **Decision**: which to open. | Selection is decided. | +| "Considers hiring" | **Decision**: how many to hire. | Hiring is under consideration. | +| "Decides how much to order" | **Decision**: order quantities. | Explicit decision. | +| "Wants to minimize/maximize …" | **Objective** (drives decisions). | Goal; decisions are the levers. | + +### Implicit objectives — do not miss + +**If the problem asks to "determine the plan" (or similar) but does not state "minimize" or "maximize" explicitly, the objective is often implicit.** You **MUST** identify it and state it before formulating; do not build a model with no objective. + +| Phrasing / context | Likely implicit objective | Why | +|-------------------|---------------------------|-----| +| "Determine the production plan" + costs given (per unit, per hour, etc.) | **Minimize total cost** (production + inspection/sales + overtime, etc.) | Plan is chosen; costs are specified → natural goal is to minimize total cost. | +| "Determine the plan" + costs and revenues given | **Maximize profit** (revenue − cost) | Both sides of the ledger → optimize profit. | +| "Try to determine the monthly production plan" + workshop hour costs, inspection/sales costs | **Minimize total cost** | All cost components are given; no revenue to maximize → minimize total cost. | + +**Rule:** When the problem gives cost (or cost and revenue) data and asks to "determine", "find", or "establish" the plan, **always state the objective explicitly** (e.g. "I'm treating the objective as minimize total cost, since only costs are given."). If both cost and revenue are present, state whether you use "minimize cost" or "maximize profit". Ask the user if unclear. + +### Parsing workflow + +1. **Split** the problem text into sentences or logical clauses. +2. **Label** each: parameter/given | constraint | decision | **objective** (if stated). +3. **Identify the objective (explicit or implicit):** If the problem says "minimize/maximize X", that's the objective. If it only says "determine the plan" (or "find", "establish") but gives costs (and possibly revenues), the objective is **implicit** — state it (e.g. minimize total cost, or maximize profit) and confirm with the user if ambiguous. +4. **Flag implicit constraints**: For each sentence, ask — "Does this state a fixed fact or a requirement (→ parameter/constraint), or something we choose (→ decision)?" +5. **Resolve ambiguity** by checking verbs and modals: + - "is", "has", "operates", "employs", "plans to" (fixed/committed) → parameter or implicit constraint. + - "may", "can choose", "considers", "decides", "wants to" (optional) → decision or objective. +6. **🔒 MANDATORY — If anything is still ambiguous** (e.g. a value or constraint could be read two ways): ask the user which interpretation is correct, or solve all plausible interpretations and report all outcomes. Do not assume a single interpretation. +7. **Summarize** for the user: list parameters, constraints (explicit + flagged implicit), decisions, and **objective (explicit or inferred)** before writing the math formulation. + +### Parsing checklist + +- [ ] Every sentence has a label (parameter | constraint | decision | objective if stated). +- [ ] **Objective is identified:** Explicit ("minimize/maximize X") or implicit ("determine the plan" + costs → minimize total cost; + revenues → maximize profit). Never formulate without stating the objective. +- [ ] Committed phrasing ("plans to", "operates", "employs") → not decisions. +- [ ] Optional phrasing ("may", "can choose", "considers") → decisions. +- [ ] Implicit constraints from committed phrasing are written out (e.g. "all X must be produced"). +- [ ] **🔒 MANDATORY — Ambiguity:** Any phrase that could be read two ways → I asked the user or I will solve all interpretations and report all outcomes (no silent single interpretation). +- [ ] Summary is produced before formulating (parameters, constraints, decisions, **objective**). + +### Example + +**Text:** "The company operates 3 factories and plans to produce 500 units. It may use overtime at extra cost. Minimize total cost." + +| Sentence / phrase | Label | Note | +|-------------------|-------|------| +| "Operates 3 factories" | Parameter | All 3 open; not facility selection. | +| "Plans to produce 500 units" | Constraint (implicit) | All 500 must be produced. | +| "May use overtime at extra cost" | Decision | How much overtime is a decision. | +| "Minimize total cost" | Objective | Drives decisions. | + +Result: Parameters = 3 factories, 500 units target. Constraints = produce exactly 500 (implicit from "plans to produce"). Decisions = production allocation across factories, overtime amounts. Objective = minimize cost. + +**Implicit-objective example:** A problem that asks to "determine the production plan" (or similar) and gives cost components (e.g. workshop, inspection, sales) but does not state "minimize" or "maximize" → **Objective is implicit: minimize total cost**. Always state it explicitly: "The objective is to minimize total cost." diff --git a/skills/qp-formulation/SKILL.md b/skills/qp-formulation/SKILL.md new file mode 100644 index 0000000000..c87b887fbc --- /dev/null +++ b/skills/qp-formulation/SKILL.md @@ -0,0 +1,33 @@ +--- +name: qp-formulation +version: "26.04.00" +description: Quadratic Programming (QP) — problem form and constraints. Domain concepts; no API or interface. QP is beta. +--- + +# QP Formulation + +Domain concepts for quadratic programming. No API or interface details here. **QP support in cuOpt is currently in beta.** + +## What is QP + +- **Objective**: Quadratic in the variables (e.g. x², x·y terms). Example: portfolio variance xᵀQx. +- **Constraints**: Linear only. cuOpt does not support quadratic constraints. + +## Important domain rule: minimize only + +QP objectives must be **minimization**. To maximize a quadratic expression, negate it and minimize; then negate the optimal value. + +## Required questions (problem formulation) + +Ask these if not already clear: + +1. **Objective** — Does it have squared or cross terms (x², x·y)? If purely linear, use LP/MILP instead. +2. **Minimize or maximize?** — If maximize, user must negate objective and minimize. +3. **Convexity** — For minimization, the quadratic form (matrix Q) should be positive semi-definite for well-posed problems. +4. **Constraints** — All linear (no quadratic constraints)? + +## Typical use cases + +- Portfolio optimization (minimize variance subject to return and budget). +- Least squares (minimize ‖Ax − b‖²). +- Other quadratic objectives with linear constraints. diff --git a/skills/routing-formulation/SKILL.md b/skills/routing-formulation/SKILL.md new file mode 100644 index 0000000000..4ab8d6419d --- /dev/null +++ b/skills/routing-formulation/SKILL.md @@ -0,0 +1,31 @@ +--- +name: routing-formulation +version: "26.04.00" +description: Vehicle routing (VRP, TSP, PDP) — problem types and data requirements. Domain concepts; no API or interface. +--- + +# Routing Formulation + +Domain concepts for vehicle routing. No API or interface details here. + +## What is routing + +- **TSP**: Single vehicle, visit all locations once (e.g. shortest tour). +- **VRP**: Multiple vehicles, capacity and/or time limits; assign orders to vehicles and sequence stops. +- **PDP**: Pickup and delivery pairs; pickup must be visited before the corresponding delivery. + +## Required questions (problem and data) + +Ask these if not already clear: + +1. **Problem type** — TSP, VRP, or PDP? +2. **Locations** — How many? Depot(s)? Cost or distance between pairs (matrix or derived)? +3. **Orders / tasks** — Which locations must be visited? Demand or service per stop? +4. **Fleet** — Number of vehicles, capacity per vehicle (and per dimension if multiple), start/end locations? +5. **Constraints** — Time windows (earliest/latest arrival), service times, precedence (order A before B)? + +## Typical data + +- Cost or distance matrix (or travel-time matrix). +- Order locations and, for VRP, demand per order. +- Vehicle capacities and optional time windows for vehicles and orders.