Skip to content

Commit b93d220

Browse files
committed
Propagation can change the domain even if it's already at BOT. But we don't need to rely on has_changed to decide when the solving is finished anyways.
1 parent b790929 commit b93d220

File tree

1 file changed

+7
-11
lines changed

1 file changed

+7
-11
lines changed

include/gpu_solving.hpp

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ __device__ void update_block_best_bound(BlockData<S>& block_data, GridData<S>& g
341341
* Branching on unknown nodes is a task left to the caller.
342342
*/
343343
template <class S>
344-
__device__ bool propagate(BlockData<S>& block_data, GridData<S>& grid_data, local::B& thread_has_changed) {
344+
__device__ bool propagate(BlockData<S>& block_data, GridData<S>& grid_data) {
345345
using BlockCP = typename S::BlockCP;
346346
bool is_leaf_node = false;
347347
BlockCP& cp = *block_data.root;
@@ -353,6 +353,7 @@ __device__ bool propagate(BlockData<S>& block_data, GridData<S>& grid_data, loca
353353
}
354354
fp_engine.barrier();
355355
#endif
356+
local::B thread_has_changed{false};
356357
size_t iterations = fp_engine.fixpoint(*cp.ipc, thread_has_changed, &grid_data.cpu_stop);
357358
if(threadIdx.x == 0) {
358359
#ifdef TURBO_PROFILE_MODE
@@ -369,7 +370,7 @@ __device__ bool propagate(BlockData<S>& block_data, GridData<S>& grid_data, loca
369370
else if(cp.search_tree->template is_extractable<AtomicExtraction>()) {
370371
is_leaf_node = true;
371372
if(cp.bab->is_satisfaction() || cp.bab->compare_bound(*cp.store, cp.bab->optimum())) {
372-
thread_has_changed |= cp.bab->deduce();
373+
cp.bab->deduce();
373374
bool best_has_changed = update_grid_best_bound(block_data, grid_data);
374375
if(cp.bab->is_satisfaction() || (best_has_changed && cp.is_printing_intermediate_sol())) {
375376
grid_data.produce_solution(*cp.bab);
@@ -406,8 +407,7 @@ __device__ size_t dive(BlockData<S>& block_data, GridData<S>& grid_data) {
406407
size_t remaining_depth = grid_data.root.config.subproblems_power;
407408
while(remaining_depth > 0 && !stop_diving && !stop) {
408409
remaining_depth--;
409-
local::B thread_has_changed;
410-
bool is_leaf_node = propagate(block_data, grid_data, thread_has_changed);
410+
bool is_leaf_node = propagate(block_data, grid_data);
411411
if(threadIdx.x == 0) {
412412
if(is_leaf_node) {
413413
stop_diving.join(true);
@@ -438,17 +438,13 @@ __device__ void solve_problem(BlockData<S>& block_data, GridData<S>& grid_data)
438438
// In the condition, we must only read variables that are local to this block.
439439
// Otherwise, two threads might read different values if it is changed in between by another block.
440440
while(block_has_changed && !stop) {
441-
// For correctness we need this local variable, we cannot use `block_has_changed` (because it might still need to be read by other threads to enter this loop).
442-
local::B thread_has_changed;
443441
update_block_best_bound(block_data, grid_data);
444-
propagate(block_data, grid_data, thread_has_changed);
442+
propagate(block_data, grid_data);
445443
if(threadIdx.x == 0) {
446444
stop.join(grid_data.cpu_stop || *(grid_data.gpu_stop));
447-
thread_has_changed |= cp.search_tree->deduce();
445+
// propagate induces a memory fence, therefore all threads are already past the "while" condition.
446+
block_has_changed.meet(cp.search_tree->deduce());
448447
}
449-
block_has_changed.meet(false);
450-
fp_engine.barrier();
451-
block_has_changed.join(thread_has_changed);
452448
fp_engine.barrier();
453449
}
454450
}

0 commit comments

Comments
 (0)