Skip to content

Commit

Permalink
more work on set cover
Browse files Browse the repository at this point in the history
  • Loading branch information
lperron committed Nov 17, 2024
1 parent 9e60a4e commit c87e13a
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 232 deletions.
16 changes: 0 additions & 16 deletions ortools/algorithms/python/set_cover.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <memory>
#include <vector>

#include "absl/base/nullability.h"
#include "ortools/algorithms/set_cover_heuristics.h"
#include "ortools/algorithms/set_cover_invariant.h"
#include "ortools/algorithms/set_cover_model.h"
Expand All @@ -38,7 +37,6 @@ using ::operations_research::GreedySolutionGenerator;
using ::operations_research::GuidedLocalSearch;
using ::operations_research::GuidedTabuSearch;
using ::operations_research::LazyElementDegreeSolutionGenerator;
using ::operations_research::Preprocessor;
using ::operations_research::RandomSolutionGenerator;
using ::operations_research::ReadBeasleySetCoverProblem;
using ::operations_research::ReadRailSetCoverProblem;
Expand Down Expand Up @@ -350,20 +348,6 @@ PYBIND11_MODULE(set_cover, m) {
&SetCoverInvariant::ImportSolutionFromProto);

// set_cover_heuristics.h
py::class_<Preprocessor>(m, "Preprocessor")
.def(py::init<absl::Nonnull<SetCoverInvariant*>>())
.def("next_solution",
[](Preprocessor& heuristic) -> bool {
return heuristic.NextSolution();
})
.def("next_solution",
[](Preprocessor& heuristic,
const std::vector<BaseInt>& focus) -> bool {
return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
})
.def("num_columns_fixed_by_singleton_row",
&Preprocessor::num_columns_fixed_by_singleton_row);

py::class_<TrivialSolutionGenerator>(m, "TrivialSolutionGenerator")
.def(py::init<SetCoverInvariant*>())
.def("next_solution",
Expand Down
17 changes: 0 additions & 17 deletions ortools/algorithms/python/set_cover_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,23 +117,6 @@ def test_initial_values(self):
inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE)
)

def test_preprocessor(self):
model = create_initial_cover_model()
self.assertTrue(model.compute_feasibility())

inv = set_cover.SetCoverInvariant(model)
preprocessor = set_cover.Preprocessor(inv)
self.assertTrue(preprocessor.next_solution())
self.assertTrue(
inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE)
)

greedy = set_cover.GreedySolutionGenerator(inv)
self.assertTrue(greedy.next_solution())
self.assertTrue(
inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED)
)

def test_infeasible(self):
model = set_cover.SetCoverModel()
model.add_empty_subset(1.0)
Expand Down
160 changes: 69 additions & 91 deletions ortools/algorithms/set_cover_heuristics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,32 +47,6 @@ SubsetBoolVector MakeBoolVector(absl::Span<const SubsetIndex> focus,

using CL = SetCoverInvariant::ConsistencyLevel;

// Preprocessor.

bool Preprocessor::NextSolution() {
return NextSolution(inv_->model()->all_subsets());
}

bool Preprocessor::NextSolution(absl::Span<const SubsetIndex> focus) {
DVLOG(1) << "Entering Preprocessor::NextSolution";
const SubsetIndex num_subsets(inv_->model()->num_subsets());
const ElementIndex num_elements(inv_->model()->num_elements());
const SparseRowView& rows = inv_->model()->rows();
SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets);
for (const ElementIndex element : inv_->model()->ElementRange()) {
if (rows[element].size() == 1) {
const SubsetIndex subset = rows[element][RowEntryIndex(0)];
if (in_focus[subset] && !inv_->is_selected()[subset]) {
inv_->Select(subset, CL::kCostAndCoverage);
++num_columns_fixed_by_singleton_row_;
}
}
}
inv_->CompressTrace();
inv_->Recompute(CL::kCostAndCoverage);
return true;
}

// TrivialSolutionGenerator.

bool TrivialSolutionGenerator::NextSolution() {
Expand Down Expand Up @@ -130,19 +104,14 @@ bool GreedySolutionGenerator::NextSolution(absl::Span<const SubsetIndex> focus,
DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage));
inv_->Recompute(CL::kFreeAndUncovered);
inv_->ClearTrace();
SubsetCostVector elements_per_cost(costs.size(), 0);
for (const SubsetIndex subset : focus) {
elements_per_cost[subset] = 1.0 / costs[subset];
}
std::vector<std::pair<float, SubsetIndex::ValueType>> subset_priorities;
DVLOG(1) << "focus.size(): " << focus.size();
subset_priorities.reserve(focus.size());
for (const SubsetIndex subset : focus) {
if (!inv_->is_selected()[subset] &&
inv_->num_free_elements()[subset] != 0) {
// NOMUTANTS -- reason, for C++
const float priority =
elements_per_cost[subset] * inv_->num_free_elements()[subset];
const float priority = inv_->num_free_elements()[subset] / costs[subset];
subset_priorities.push_back({priority, subset.value()});
}
}
Expand All @@ -162,7 +131,7 @@ bool GreedySolutionGenerator::NextSolution(absl::Span<const SubsetIndex> focus,
const SubsetIndex subset = *it;
const BaseInt marginal_impact(inv_->num_free_elements()[subset]);
if (marginal_impact > 0) {
const float priority = marginal_impact * elements_per_cost[subset];
const float priority = marginal_impact / costs[subset];
pq.Update({priority, subset.value()});
} else {
pq.Remove(subset.value());
Expand All @@ -172,8 +141,7 @@ bool GreedySolutionGenerator::NextSolution(absl::Span<const SubsetIndex> focus,
<< " num_uncovered_elements = " << inv_->num_uncovered_elements();
}
inv_->CompressTrace();
// Don't expect the queue to be empty, because of the break in the while
// loop.
// Don't expect pq to be empty, because of the break in the while loop.
DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered));
return true;
}
Expand Down Expand Up @@ -249,6 +217,38 @@ class ComputationUsefulnessStats {
// Used to detect useless computations.
SubsetToIntVector num_free_elements_;
};

std::vector<ElementIndex> GetUncoveredElementsSortedByDegree(
const SetCoverInvariant* const inv) {
const BaseInt num_elements = inv->model()->num_elements();
std::vector<ElementIndex> degree_sorted_elements;
degree_sorted_elements.reserve(num_elements);
for (ElementIndex element : inv->model()->ElementRange()) {
// Already covered elements should not be considered.
if (inv->coverage()[element] != 0) continue;
degree_sorted_elements.push_back(element);
}
const SparseRowView& rows = inv->model()->rows();
// Sort indices by degree i.e. the size of the row corresponding to an
// element.
// NOMUTANTS -- The code still works if the sort is removed.
std::sort(degree_sorted_elements.begin(), degree_sorted_elements.end(),
[&rows](const ElementIndex a, const ElementIndex b) {
if (rows[a].size() < rows[b].size()) return true;
if (rows[a].size() == rows[b].size()) return a < b;
return false;
});
return degree_sorted_elements;
}

// Computes: d = c1 * n2 - c2 * n1. This is an easy way to compare two ratios
// without having to use a full division.
// If d < 0 then c1 / n1 < c2 / n2,
// If d == 0 then c1 / n1 == c2 / n2, etc...
// NOTE(user): This can be implemented using SSE2 with a gain of 5-10%.
double Determinant(Cost c1, BaseInt n1, Cost c2, BaseInt n2) {
return c1 * n2 - n1 * c2;
}
} // namespace

// ElementDegreeSolutionGenerator.
Expand Down Expand Up @@ -280,42 +280,34 @@ bool ElementDegreeSolutionGenerator::NextSolution(
DVLOG(1) << "Entering ElementDegreeSolutionGenerator::NextSolution";
inv_->Recompute(CL::kFreeAndUncovered);
// Create the list of all the indices in the problem.
const BaseInt num_elements = inv_->model()->num_elements();
std::vector<ElementIndex> degree_sorted_elements(num_elements);
std::iota(degree_sorted_elements.begin(), degree_sorted_elements.end(),
ElementIndex(0));
const SparseRowView& rows = inv_->model()->rows();
// Sort indices by degree i.e. the size of the row corresponding to an
// element.
std::sort(degree_sorted_elements.begin(), degree_sorted_elements.end(),
[&rows](const ElementIndex a, const ElementIndex b) {
if (rows[a].size() < rows[b].size()) return true;
if (rows[a].size() == rows[b].size()) return a < b;
return false;
});
std::vector<ElementIndex> degree_sorted_elements =
GetUncoveredElementsSortedByDegree(inv_);
ComputationUsefulnessStats stats(inv_, false);
const SparseRowView& rows = inv_->model()->rows();
for (const ElementIndex element : degree_sorted_elements) {
// No need to cover an element that is already covered.
if (inv_->coverage()[element] != 0) continue;
Cost min_ratio = std::numeric_limits<Cost>::max();
SubsetIndex best_subset(-1);
Cost best_subset_cost = 0.0;
BaseInt best_subset_num_free_elts = 0;
for (const SubsetIndex subset : rows[element]) {
if (!in_focus[subset]) continue;
const BaseInt num_free_elements = inv_->ComputeNumFreeElements(subset);
const Cost ratio = costs[subset] / num_free_elements;
const BaseInt num_free_elements = inv_->num_free_elements()[subset];
stats.Update(subset, num_free_elements);
if (ratio < min_ratio) {
min_ratio = ratio;
const Cost det = Determinant(costs[subset], num_free_elements,
best_subset_cost, best_subset_num_free_elts);
// Compare R = costs[subset] / num_free_elements with
// B = best_subset_cost / best_subset_num_free_elts.
// If R < B, we choose subset.
// If the ratios are the same, we choose the subset with the most free
// elements.
// TODO(user): What about adding a tolerance for equality, which could
// further favor larger columns?
if (det < 0 ||
(det == 0 && num_free_elements > best_subset_num_free_elts)) {
best_subset = subset;
} else if (ratio == min_ratio) {
// If the ratio is the same, we choose the subset with the most free
// elements.
// TODO(user): What about adding a tolerance for equality, which could
// further favor larger columns?
if (inv_->num_free_elements()[subset] >
inv_->num_free_elements()[best_subset]) {
best_subset = subset;
}
best_subset_cost = costs[subset];
best_subset_num_free_elts = num_free_elements;
}
}
if (best_subset.value() == -1) {
Expand Down Expand Up @@ -361,44 +353,30 @@ bool LazyElementDegreeSolutionGenerator::NextSolution(
bool LazyElementDegreeSolutionGenerator::NextSolution(
const SubsetBoolVector& in_focus, const SubsetCostVector& costs) {
DVLOG(1) << "Entering LazyElementDegreeSolutionGenerator::NextSolution";
DCHECK(inv_->CheckConsistency(
SetCoverInvariant::ConsistencyLevel::kCostAndCoverage));
DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage));
// Create the list of all the indices in the problem.
const BaseInt num_elements = inv_->model()->num_elements();
std::vector<ElementIndex> degree_sorted_elements(num_elements);
std::iota(degree_sorted_elements.begin(), degree_sorted_elements.end(),
ElementIndex(0));
std::vector<ElementIndex> degree_sorted_elements =
GetUncoveredElementsSortedByDegree(inv_);
const SparseRowView& rows = inv_->model()->rows();
// Sort indices by degree i.e. the size of the row corresponding to an
// element.
std::sort(degree_sorted_elements.begin(), degree_sorted_elements.end(),
[&rows](const ElementIndex a, const ElementIndex b) {
if (rows[a].size() < rows[b].size()) return true;
if (rows[a].size() == rows[b].size()) return a < b;
return false;
});
ComputationUsefulnessStats stats(inv_, false);
for (const ElementIndex element : degree_sorted_elements) {
// No need to cover an element that is already covered.
if (inv_->coverage()[element] != 0) continue;
Cost min_ratio = std::numeric_limits<Cost>::max();
SubsetIndex best_subset(-1);
Cost best_subset_cost = 0.0; // Cost of the best subset.
BaseInt best_subset_num_free_elts = 0;
for (const SubsetIndex subset : rows[element]) {
if (!in_focus[subset]) continue;
const BaseInt num_free_elements = inv_->ComputeNumFreeElements(subset);
const Cost ratio = costs[subset] / num_free_elements;
stats.Update(subset, num_free_elements);
if (ratio < min_ratio) {
min_ratio = ratio;
const Cost det = Determinant(costs[subset], num_free_elements,
best_subset_cost, best_subset_num_free_elts);
// Same as ElementDegreeSolutionGenerator.
if (det < 0 ||
(det == 0 && num_free_elements > best_subset_num_free_elts)) {
best_subset = subset;
} else if (ratio == min_ratio) {
// If the ratio is the same, we choose the subset with the most free
// elements.
// TODO(user): What about adding a tolerance for equality, which could
// further favor larger columns?
if (num_free_elements > inv_->num_free_elements()[best_subset]) {
best_subset = subset;
}
best_subset_cost = costs[subset];
best_subset_num_free_elts = num_free_elements;
}
}
DCHECK_NE(best_subset, SubsetIndex(-1));
Expand Down Expand Up @@ -726,7 +704,7 @@ bool GuidedLocalSearch::NextSolution(absl::Span<const SubsetIndex> focus,

namespace {
void SampleSubsets(std::vector<SubsetIndex>* list, BaseInt num_subsets) {
num_subsets = std::min(num_subsets, static_cast<BaseInt>(list->size()));
num_subsets = std::min<BaseInt>(num_subsets, list->size());
CHECK_GE(num_subsets, 0);
std::shuffle(list->begin(), list->end(), absl::BitGen());
list->resize(num_subsets);
Expand All @@ -741,7 +719,7 @@ std::vector<SubsetIndex> ClearRandomSubsets(BaseInt num_subsets,
std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
BaseInt num_subsets,
SetCoverInvariant* inv) {
num_subsets = std::min(num_subsets, static_cast<BaseInt>(focus.size()));
num_subsets = std::min<BaseInt>(num_subsets, focus.size());
CHECK_GE(num_subsets, 0);
std::vector<SubsetIndex> chosen_indices;
for (const SubsetIndex subset : focus) {
Expand Down Expand Up @@ -807,7 +785,7 @@ std::vector<SubsetIndex> ClearMostCoveredElements(
// TODO(user): find another algorithm if necessary.
std::shuffle(sampled_subsets.begin(), sampled_subsets.end(), absl::BitGen());
sampled_subsets.resize(
std::min(static_cast<BaseInt>(sampled_subsets.size()), max_num_subsets));
std::min<BaseInt>(sampled_subsets.size(), max_num_subsets));

// Testing has shown that sorting sampled_subsets is not necessary.
// Now, un-select the subset in sampled_subsets.
Expand Down
35 changes: 1 addition & 34 deletions ortools/algorithms/set_cover_heuristics.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,41 +45,8 @@ namespace operations_research {
// subproblems in different threads.
//

// The preprocessor finds the elements that can only be covered by one subset.
// Obviously, such subsets which are the only ones that can cover a given
// element are chosen.

// The consistency level is maintained up to kFreeAndUncovered.
class Preprocessor {
public:
explicit Preprocessor(absl::Nonnull<SetCoverInvariant*> inv)
: inv_(inv), num_columns_fixed_by_singleton_row_(0) {}

// Returns true if a solution was found.
// TODO(user): Add time-outs and exit with a partial solution. This seems
// unlikely, though.
bool NextSolution();

// Computes the next partial solution considering only the subsets whose
// indices are in focus.
bool NextSolution(absl::Span<const SubsetIndex> focus);

// Returns the number of columns that are the only one for a given row.
int num_columns_fixed_by_singleton_row() const {
return num_columns_fixed_by_singleton_row_;
}

private:
// The data structure that will maintain the invariant for the model.
SetCoverInvariant* inv_;

// The number of columns that are the only one for a given row, i.e.
// the subsets that are unique in covering a particular element.
BaseInt num_columns_fixed_by_singleton_row_;
};

// An obvious idea is to take all the S_j's (or equivalently to set all the
// x_j's to 1). It's a bit silly but fast, and we can improve on it later using
// x_j's to 1). It's very silly but fast, and we can improve on it later using
// local search.

// The consistency level is maintained up to kFreeAndUncovered.
Expand Down
Loading

0 comments on commit c87e13a

Please sign in to comment.