Skip to content

Commit 0da41fa

Browse files
authored
Merge pull request NVIDIA#1014 from NVIDIA/release/26.04
Forward-merge release/26.04 into main
2 parents 39b3b47 + e91c66f commit 0da41fa

30 files changed

Lines changed: 807 additions & 90 deletions

cpp/CMakeLists.txt

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,28 @@ if(CMAKE_COMPILER_IS_GNUCXX)
7878
list(APPEND CUOPT_CXX_FLAGS -Werror -Wno-error=deprecated-declarations)
7979
endif(CMAKE_COMPILER_IS_GNUCXX)
8080

81+
# Papilo pulls in Boost.Multiprecision float128 support, which expects quadmath.h from the GCC
82+
# toolchain internals. Conda clang ships libquadmath, but does not surface the matching GCC
83+
# internal include directory by default. Add it late in the search order so clang still prefers its
84+
# own builtin intrinsic headers.
85+
if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
86+
execute_process(
87+
COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libquadmath.a
88+
OUTPUT_VARIABLE CUOPT_QUADMATH_LIB
89+
OUTPUT_STRIP_TRAILING_WHITESPACE
90+
)
91+
92+
if(IS_ABSOLUTE "${CUOPT_QUADMATH_LIB}")
93+
get_filename_component(CUOPT_QUADMATH_LIBDIR "${CUOPT_QUADMATH_LIB}" DIRECTORY)
94+
set(CUOPT_QUADMATH_INCLUDEDIR "${CUOPT_QUADMATH_LIBDIR}/include")
95+
96+
if(EXISTS "${CUOPT_QUADMATH_INCLUDEDIR}/quadmath.h")
97+
message(STATUS "Adding clang fallback include for quadmath: ${CUOPT_QUADMATH_INCLUDEDIR}")
98+
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:-idirafter${CUOPT_QUADMATH_INCLUDEDIR}>")
99+
endif()
100+
endif()
101+
endif()
102+
81103
# To use sanitizer with cuda runtime, one must follow a few steps:
82104
# 1. Run the binary with env var set: LD_PRELOAD="$(gcc -print-file-name=libasan.so)" ASAN_OPTIONS='protect_shadow_gap=0:replace_intrin=0'
83105
# 2. (Optional) To run with a debugger (gdb or cuda-gdb) use the additional ASAN option alloc_dealloc_mismatch=0
@@ -204,6 +226,12 @@ if (FETCH_RAPIDS)
204226
include(cmake/thirdparty/get_cccl.cmake)
205227
include(cmake/thirdparty/get_rmm.cmake)
206228
include(cmake/thirdparty/get_raft.cmake)
229+
# Source-built RMM can hide out-of-line utility symbols such as
230+
# rmm::align_up / rmm::get_current_cuda_device when built with hidden visibility on clang.
231+
# Force default visibility on the fetched rmm target until this is fixed upstream/figured out.
232+
if(TARGET rmm)
233+
set_target_properties(rmm PROPERTIES CXX_VISIBILITY_PRESET default)
234+
endif()
207235
else()
208236
find_package(CCCL REQUIRED)
209237
find_package(RMM REQUIRED)
@@ -415,9 +443,13 @@ add_library(cuopt::cuopt ALIAS cuopt)
415443
# - include paths ---------------------------------------------------------------------------------
416444
message(STATUS "target include directories CUDSS_INCLUDES = ${CUDSS_INCLUDE}")
417445

418-
target_include_directories(cuopt SYSTEM PRIVATE
446+
# Adding Papilo as a system include messes up clang's include resolution if papilo is already installed as a conda package
447+
target_include_directories(cuopt PRIVATE
419448
"${papilo_SOURCE_DIR}/src"
420449
"${papilo_BINARY_DIR}"
450+
)
451+
452+
target_include_directories(cuopt SYSTEM PRIVATE
421453
"${pslp_SOURCE_DIR}/include"
422454
)
423455

cpp/include/cuopt/linear_programming/cpu_optimization_problem_solution.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,9 @@ class cpu_lp_solution_t : public lp_solution_interface_t<i_t, f_t> {
132132

133133
i_t get_reduced_cost_size() const override { return reduced_cost_.size(); }
134134

135-
f_t get_objective_value(i_t = 0) const override { return primal_objective_; }
135+
f_t get_objective_value(i_t) const override { return primal_objective_; }
136136

137-
f_t get_dual_objective_value(i_t = 0) const override { return dual_objective_; }
137+
f_t get_dual_objective_value(i_t) const override { return dual_objective_; }
138138

139139
pdlp_termination_status_t get_termination_status(i_t = 0) const override
140140
{

cpp/include/cuopt/linear_programming/optimization_problem_solution.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ class gpu_lp_solution_t : public lp_solution_interface_t<i_t, f_t> {
9090
return result;
9191
}
9292

93-
f_t get_objective_value(i_t id = 0) const override { return solution_.get_objective_value(id); }
93+
f_t get_objective_value(i_t id) const override { return solution_.get_objective_value(id); }
9494

95-
f_t get_dual_objective_value(i_t id = 0) const override
95+
f_t get_dual_objective_value(i_t id) const override
9696
{
9797
return solution_.get_dual_objective_value(id);
9898
}

cpp/include/cuopt/linear_programming/optimization_problem_solution_interface.hpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ class lp_solution_interface_t : public optimization_problem_solution_interface_t
237237
* @brief Get solve time
238238
* @return Total solve time in seconds
239239
*/
240-
virtual f_t get_solve_time() const = 0;
240+
virtual f_t get_solve_time() const override = 0;
241241

242242
/**
243243
* @brief Get primal objective value
@@ -391,31 +391,31 @@ class mip_solution_interface_t : public optimization_problem_solution_interface_
391391
* @brief Get solution as host vector
392392
* @return Host vector of solution
393393
*/
394-
virtual std::vector<f_t> get_solution_host() const = 0;
394+
virtual std::vector<f_t> get_solution_host() const override = 0;
395395

396396
/**
397397
* @brief Get objective value
398398
* @return Objective value
399399
*/
400-
virtual f_t get_objective_value() const = 0;
400+
virtual f_t get_objective_value() const override = 0;
401401

402402
/**
403403
* @brief Get solve time
404404
* @return Total solve time in seconds
405405
*/
406-
virtual f_t get_solve_time() const = 0;
406+
virtual f_t get_solve_time() const override = 0;
407407

408408
/**
409409
* @brief Get MIP gap
410410
* @return MIP gap
411411
*/
412-
virtual f_t get_mip_gap() const = 0;
412+
virtual f_t get_mip_gap() const override = 0;
413413

414414
/**
415415
* @brief Get solution bound
416416
* @return Solution bound
417417
*/
418-
virtual f_t get_solution_bound() const = 0;
418+
virtual f_t get_solution_bound() const override = 0;
419419

420420
/**
421421
* @brief Get termination status

cpp/include/cuopt/linear_programming/utilities/callbacks_implems.hpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,12 @@ class default_get_solution_callback_t : public get_solution_callback_t {
3232
void* solution_bound,
3333
void* user_data) override
3434
{
35-
PyObject* numpy_matrix = get_numpy_array(data, n_variables);
36-
PyObject* numpy_array = get_numpy_array(objective_value, 1);
37-
PyObject* numpy_bound = get_numpy_array(solution_bound, 1);
38-
PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
39-
PyObject* res = PyObject_CallMethod(this->pyCallbackClass,
35+
PyGILState_STATE gstate = PyGILState_Ensure();
36+
PyObject* numpy_matrix = get_numpy_array(data, n_variables);
37+
PyObject* numpy_array = get_numpy_array(objective_value, 1);
38+
PyObject* numpy_bound = get_numpy_array(solution_bound, 1);
39+
PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
40+
PyObject* res = PyObject_CallMethod(this->pyCallbackClass,
4041
"get_solution",
4142
"(OOOO)",
4243
numpy_matrix,
@@ -47,6 +48,7 @@ class default_get_solution_callback_t : public get_solution_callback_t {
4748
Py_DECREF(numpy_array);
4849
Py_DECREF(numpy_bound);
4950
if (res != nullptr) { Py_DECREF(res); }
51+
PyGILState_Release(gstate);
5052
}
5153

5254
PyObject* pyCallbackClass;
@@ -69,11 +71,12 @@ class default_set_solution_callback_t : public set_solution_callback_t {
6971
void* solution_bound,
7072
void* user_data) override
7173
{
72-
PyObject* numpy_matrix = get_numpy_array(data, n_variables);
73-
PyObject* numpy_array = get_numpy_array(objective_value, 1);
74-
PyObject* numpy_bound = get_numpy_array(solution_bound, 1);
75-
PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
76-
PyObject* res = PyObject_CallMethod(this->pyCallbackClass,
74+
PyGILState_STATE gstate = PyGILState_Ensure();
75+
PyObject* numpy_matrix = get_numpy_array(data, n_variables);
76+
PyObject* numpy_array = get_numpy_array(objective_value, 1);
77+
PyObject* numpy_bound = get_numpy_array(solution_bound, 1);
78+
PyObject* py_user_data = user_data == nullptr ? Py_None : static_cast<PyObject*>(user_data);
79+
PyObject* res = PyObject_CallMethod(this->pyCallbackClass,
7780
"set_solution",
7881
"(OOOO)",
7982
numpy_matrix,
@@ -84,6 +87,7 @@ class default_set_solution_callback_t : public set_solution_callback_t {
8487
Py_DECREF(numpy_array);
8588
Py_DECREF(numpy_bound);
8689
if (res != nullptr) { Py_DECREF(res); }
90+
PyGILState_Release(gstate);
8791
}
8892

8993
PyObject* pyCallbackClass;

cpp/src/branch_and_bound/branch_and_bound.cpp

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -919,7 +919,7 @@ struct nondeterministic_policy_t : tree_update_policy_t<i_t, f_t> {
919919
{
920920
}
921921

922-
f_t upper_bound() const override { return bnb.upper_bound_.load(); }
922+
f_t upper_bound() const override { return bnb.get_cutoff(); }
923923

924924
void update_pseudo_costs(mip_node_t<i_t, f_t>* node, f_t leaf_obj) override
925925
{
@@ -1337,10 +1337,11 @@ dual::status_t branch_and_bound_t<i_t, f_t>::solve_node_lp(
13371337

13381338
simplex_solver_settings_t lp_settings = settings_;
13391339
lp_settings.set_log(false);
1340+
f_t cutoff = get_cutoff();
13401341
if (original_lp_.objective_is_integral) {
1341-
lp_settings.cut_off = std::ceil(upper_bound_ - settings_.integer_tol) + settings_.dual_tol;
1342+
lp_settings.cut_off = std::ceil(cutoff - settings_.integer_tol) + settings_.dual_tol;
13421343
} else {
1343-
lp_settings.cut_off = upper_bound_ + settings_.dual_tol;
1344+
lp_settings.cut_off = cutoff + settings_.dual_tol;
13441345
}
13451346
lp_settings.inside_mip = 2;
13461347
lp_settings.time_limit = settings_.time_limit - toc(exploration_stats_.start_time);
@@ -1447,7 +1448,7 @@ void branch_and_bound_t<i_t, f_t>::plunge_with(branch_and_bound_worker_t<i_t, f_
14471448
// - The lower bound of the parent is lower or equal to its children
14481449
worker->lower_bound = lower_bound;
14491450

1450-
if (lower_bound > upper_bound) {
1451+
if (lower_bound > get_cutoff()) {
14511452
search_tree_.graphviz_node(settings_.log, node_ptr, "cutoff", node_ptr->lower_bound);
14521453
search_tree_.update(node_ptr, node_status_t::FATHOMED);
14531454
worker->recompute_basis = true;
@@ -1557,7 +1558,7 @@ void branch_and_bound_t<i_t, f_t>::dive_with(branch_and_bound_worker_t<i_t, f_t>
15571558
f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound);
15581559
worker->lower_bound = lower_bound;
15591560

1560-
if (node_ptr->lower_bound > upper_bound) {
1561+
if (node_ptr->lower_bound > get_cutoff()) {
15611562
worker->recompute_basis = true;
15621563
worker->recompute_bounds = true;
15631564
continue;
@@ -1696,7 +1697,7 @@ void branch_and_bound_t<i_t, f_t>::run_scheduler()
16961697
std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_best_first();
16971698

16981699
if (!start_node.has_value()) { continue; }
1699-
if (upper_bound_ < start_node.value()->lower_bound) {
1700+
if (get_cutoff() < start_node.value()->lower_bound) {
17001701
// This node was put on the heap earlier but its lower bound is now greater than the
17011702
// current upper bound
17021703
search_tree_.graphviz_node(
@@ -1720,7 +1721,7 @@ void branch_and_bound_t<i_t, f_t>::run_scheduler()
17201721
std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_diving();
17211722

17221723
if (!start_node.has_value()) { continue; }
1723-
if (upper_bound_ < start_node.value()->lower_bound ||
1724+
if (get_cutoff() < start_node.value()->lower_bound ||
17241725
start_node.value()->depth < diving_settings.min_node_depth) {
17251726
continue;
17261727
}
@@ -1788,7 +1789,7 @@ void branch_and_bound_t<i_t, f_t>::single_threaded_solve()
17881789
std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_best_first();
17891790

17901791
if (!start_node.has_value()) { continue; }
1791-
if (upper_bound_ < start_node.value()->lower_bound) {
1792+
if (get_cutoff() < start_node.value()->lower_bound) {
17921793
// This node was put on the heap earlier but its lower bound is now greater than the
17931794
// current upper bound
17941795
search_tree_.graphviz_node(
@@ -2275,12 +2276,12 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
22752276
return mip_status_t::NUMERICAL;
22762277
}
22772278

2278-
if (settings_.reduced_cost_strengthening >= 1 && upper_bound_.load() < last_upper_bound) {
2279+
if (settings_.reduced_cost_strengthening >= 1 && get_cutoff() < last_upper_bound) {
22792280
mutex_upper_.lock();
2280-
last_upper_bound = upper_bound_.load();
2281+
last_upper_bound = get_cutoff();
22812282
std::vector<f_t> lower_bounds;
22822283
std::vector<f_t> upper_bounds;
2283-
find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds);
2284+
find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds);
22842285
mutex_upper_.unlock();
22852286
mutex_original_lp_.lock();
22862287
original_lp_.lower = lower_bounds;
@@ -2467,10 +2468,10 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
24672468
return solver_status_;
24682469
}
24692470

2470-
if (settings_.reduced_cost_strengthening >= 2 && upper_bound_.load() < last_upper_bound) {
2471+
if (settings_.reduced_cost_strengthening >= 2 && get_cutoff() < last_upper_bound) {
24712472
std::vector<f_t> lower_bounds;
24722473
std::vector<f_t> upper_bounds;
2473-
i_t num_fixed = find_reduced_cost_fixings(upper_bound_.load(), lower_bounds, upper_bounds);
2474+
i_t num_fixed = find_reduced_cost_fixings(get_cutoff(), lower_bounds, upper_bounds);
24742475
if (num_fixed > 0) {
24752476
std::vector<bool> bounds_changed(original_lp_.num_cols, true);
24762477
std::vector<char> row_sense;
@@ -2574,7 +2575,7 @@ mip_status_t branch_and_bound_t<i_t, f_t>::solve(mip_solution_t<i_t, f_t>& solut
25742575
std::optional<mip_node_t<i_t, f_t>*> start_node = node_queue_.pop_best_first();
25752576

25762577
if (!start_node.has_value()) { continue; }
2577-
if (upper_bound_ < start_node.value()->lower_bound) {
2578+
if (get_cutoff() < start_node.value()->lower_bound) {
25782579
// This node was put on the heap earlier but its lower bound is now greater than the
25792580
// current upper bound
25802581
search_tree_.graphviz_node(
@@ -3416,7 +3417,7 @@ void branch_and_bound_t<i_t, f_t>::deterministic_sort_replay_events(
34163417
template <typename i_t, typename f_t>
34173418
void branch_and_bound_t<i_t, f_t>::deterministic_prune_worker_nodes_vs_incumbent()
34183419
{
3419-
f_t upper_bound = upper_bound_.load();
3420+
f_t upper_bound = get_cutoff();
34203421

34213422
for (auto& worker : *deterministic_workers_) {
34223423
// Check nodes in plunge stack - filter in place
@@ -3552,14 +3553,14 @@ void branch_and_bound_t<i_t, f_t>::deterministic_populate_diving_heap()
35523553
const int num_diving = deterministic_diving_workers_->size();
35533554
constexpr int target_nodes_per_worker = 10;
35543555
const int target_total = num_diving * target_nodes_per_worker;
3555-
f_t upper_bound = upper_bound_.load();
3556+
f_t cutoff = get_cutoff();
35563557

35573558
// Collect candidate nodes from BFS worker backlog heaps
35583559
std::vector<std::pair<mip_node_t<i_t, f_t>*, f_t>> candidates;
35593560

35603561
for (auto& worker : *deterministic_workers_) {
35613562
for (auto* node : worker.backlog.data()) {
3562-
if (node->lower_bound < upper_bound) {
3563+
if (node->lower_bound < cutoff) {
35633564
f_t score = node->objective_estimate;
35643565
if (score >= inf) { score = node->lower_bound; }
35653566
candidates.push_back({node, score});

cpp/src/branch_and_bound/branch_and_bound.hpp

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,20 @@ class branch_and_bound_t {
115115

116116
void set_concurrent_lp_root_solve(bool enable) { enable_concurrent_lp_root_solve_ = enable; }
117117

118+
// Set a cutoff bound from an external source (e.g., early FJ during presolve).
119+
// Used for node pruning and reduced cost strengthening but NOT for gap computation.
120+
// Unlike upper_bound_, this does not imply a verified incumbent solution exists.
121+
//
122+
// IMPORTANT: `bound` must be in B&B's internal objective space, i.e. the space of
123+
// original_lp_ where: user_obj = obj_scale * (internal_obj + obj_constant).
124+
// The caller (solver.cu) converts from user-space via
125+
// problem_ptr->get_solver_obj_from_user_obj(user_cutoff)
126+
// which accounts for both the presolve objective offset and maximization.
127+
void set_initial_cutoff(f_t bound) { initial_cutoff_ = bound; }
128+
129+
// Effective cutoff for node pruning: min of verified incumbent and external cutoff.
130+
f_t get_cutoff() const { return std::min(upper_bound_.load(), initial_cutoff_); }
131+
118132
// Repair a low-quality solution from the heuristics.
119133
bool repair_solution(const std::vector<f_t>& leaf_edge_norms,
120134
const std::vector<f_t>& potential_solution,
@@ -179,9 +193,13 @@ class branch_and_bound_t {
179193
// Mutex for upper bound
180194
omp_mutex_t mutex_upper_;
181195

182-
// Global variable for upper bound
196+
// Verified incumbent bound (only set when B&B has an actual integer-feasible solution).
183197
omp_atomic_t<f_t> upper_bound_;
184198

199+
// External cutoff from early heuristics (for pruning only, no verified solution).
200+
// Must be in B&B internal objective space (see set_initial_cutoff).
201+
f_t initial_cutoff_{std::numeric_limits<f_t>::infinity()};
202+
185203
// Global variable for incumbent. The incumbent should be updated with the upper bound
186204
mip_solution_t<i_t, f_t> incumbent_;
187205

cpp/src/branch_and_bound/mip_node.hpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,27 @@ bool inactive_status(node_status_t status);
3636
template <typename i_t, typename f_t>
3737
class mip_node_t {
3838
public:
39+
~mip_node_t()
40+
{
41+
// Iterative teardown to avoid stack overflow on deep trees.
42+
// Detach all descendants breadth-first, then destroy them as leaves.
43+
std::vector<std::unique_ptr<mip_node_t>> nodes;
44+
for (auto& c : children) {
45+
if (c) { nodes.push_back(std::move(c)); }
46+
}
47+
// nodes.size() grows so that this loop only terminates when only leaves remain
48+
for (size_t i = 0; i < nodes.size(); ++i) {
49+
for (auto& c : nodes[i]->children) {
50+
if (c) { nodes.push_back(std::move(c)); }
51+
}
52+
}
53+
54+
// scope-exit ensure destruction of all detached leaves
55+
}
56+
57+
mip_node_t(mip_node_t&&) = default;
58+
mip_node_t& operator=(mip_node_t&&) = default;
59+
3960
mip_node_t()
4061
: status(node_status_t::PENDING),
4162
lower_bound(-std::numeric_limits<f_t>::infinity()),

cpp/src/mip_heuristics/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,9 @@ set(MIP_NON_LP_FILES
4141
${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu
4242
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu
4343
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu
44-
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu)
44+
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu
45+
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_cpufj.cu
46+
${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/early_gpufj.cu)
4547

4648
# Choose which files to include based on build mode
4749
if(BUILD_LP_ONLY)

0 commit comments

Comments
 (0)