more work on set cover

google · Nov 17, 2024 · c87e13a · c87e13a
1 parent 9e60a4e
commit c87e13a
Show file tree

Hide file tree

Showing 5 changed files with 202 additions and 232 deletions.
diff --git a/ortools/algorithms/python/set_cover.cc b/ortools/algorithms/python/set_cover.cc
@@ -19,7 +19,6 @@
 #include <memory>
 #include <vector>
 
-#include "absl/base/nullability.h"
 #include "ortools/algorithms/set_cover_heuristics.h"
 #include "ortools/algorithms/set_cover_invariant.h"
 #include "ortools/algorithms/set_cover_model.h"
@@ -38,7 +37,6 @@ using ::operations_research::GreedySolutionGenerator;
 using ::operations_research::GuidedLocalSearch;
 using ::operations_research::GuidedTabuSearch;
 using ::operations_research::LazyElementDegreeSolutionGenerator;
-using ::operations_research::Preprocessor;
 using ::operations_research::RandomSolutionGenerator;
 using ::operations_research::ReadBeasleySetCoverProblem;
 using ::operations_research::ReadRailSetCoverProblem;
@@ -350,20 +348,6 @@ PYBIND11_MODULE(set_cover, m) {
            &SetCoverInvariant::ImportSolutionFromProto);
 
   // set_cover_heuristics.h
-  py::class_<Preprocessor>(m, "Preprocessor")
-      .def(py::init<absl::Nonnull<SetCoverInvariant*>>())
-      .def("next_solution",
-           [](Preprocessor& heuristic) -> bool {
-             return heuristic.NextSolution();
-           })
-      .def("next_solution",
-           [](Preprocessor& heuristic,
-              const std::vector<BaseInt>& focus) -> bool {
-             return heuristic.NextSolution(VectorIntToVectorSubsetIndex(focus));
-           })
-      .def("num_columns_fixed_by_singleton_row",
-           &Preprocessor::num_columns_fixed_by_singleton_row);
-
   py::class_<TrivialSolutionGenerator>(m, "TrivialSolutionGenerator")
       .def(py::init<SetCoverInvariant*>())
       .def("next_solution",

diff --git a/ortools/algorithms/python/set_cover_test.py b/ortools/algorithms/python/set_cover_test.py
@@ -117,23 +117,6 @@ def test_initial_values(self):
             inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE)
         )
 
-    def test_preprocessor(self):
-        model = create_initial_cover_model()
-        self.assertTrue(model.compute_feasibility())
-
-        inv = set_cover.SetCoverInvariant(model)
-        preprocessor = set_cover.Preprocessor(inv)
-        self.assertTrue(preprocessor.next_solution())
-        self.assertTrue(
-            inv.check_consistency(set_cover.consistency_level.COST_AND_COVERAGE)
-        )
-
-        greedy = set_cover.GreedySolutionGenerator(inv)
-        self.assertTrue(greedy.next_solution())
-        self.assertTrue(
-            inv.check_consistency(set_cover.consistency_level.FREE_AND_UNCOVERED)
-        )
-
     def test_infeasible(self):
         model = set_cover.SetCoverModel()
         model.add_empty_subset(1.0)

diff --git a/ortools/algorithms/set_cover_heuristics.cc b/ortools/algorithms/set_cover_heuristics.cc
@@ -47,32 +47,6 @@ SubsetBoolVector MakeBoolVector(absl::Span<const SubsetIndex> focus,
 
 using CL = SetCoverInvariant::ConsistencyLevel;
 
-// Preprocessor.
-
-bool Preprocessor::NextSolution() {
-  return NextSolution(inv_->model()->all_subsets());
-}
-
-bool Preprocessor::NextSolution(absl::Span<const SubsetIndex> focus) {
-  DVLOG(1) << "Entering Preprocessor::NextSolution";
-  const SubsetIndex num_subsets(inv_->model()->num_subsets());
-  const ElementIndex num_elements(inv_->model()->num_elements());
-  const SparseRowView& rows = inv_->model()->rows();
-  SubsetBoolVector in_focus = MakeBoolVector(focus, num_subsets);
-  for (const ElementIndex element : inv_->model()->ElementRange()) {
-    if (rows[element].size() == 1) {
-      const SubsetIndex subset = rows[element][RowEntryIndex(0)];
-      if (in_focus[subset] && !inv_->is_selected()[subset]) {
-        inv_->Select(subset, CL::kCostAndCoverage);
-        ++num_columns_fixed_by_singleton_row_;
-      }
-    }
-  }
-  inv_->CompressTrace();
-  inv_->Recompute(CL::kCostAndCoverage);
-  return true;
-}
-
 // TrivialSolutionGenerator.
 
 bool TrivialSolutionGenerator::NextSolution() {
@@ -130,19 +104,14 @@ bool GreedySolutionGenerator::NextSolution(absl::Span<const SubsetIndex> focus,
   DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage));
   inv_->Recompute(CL::kFreeAndUncovered);
   inv_->ClearTrace();
-  SubsetCostVector elements_per_cost(costs.size(), 0);
-  for (const SubsetIndex subset : focus) {
-    elements_per_cost[subset] = 1.0 / costs[subset];
-  }
   std::vector<std::pair<float, SubsetIndex::ValueType>> subset_priorities;
   DVLOG(1) << "focus.size(): " << focus.size();
   subset_priorities.reserve(focus.size());
   for (const SubsetIndex subset : focus) {
     if (!inv_->is_selected()[subset] &&
         inv_->num_free_elements()[subset] != 0) {
       // NOMUTANTS -- reason, for C++
-      const float priority =
-          elements_per_cost[subset] * inv_->num_free_elements()[subset];
+      const float priority = inv_->num_free_elements()[subset] / costs[subset];
       subset_priorities.push_back({priority, subset.value()});
     }
   }
@@ -162,7 +131,7 @@ bool GreedySolutionGenerator::NextSolution(absl::Span<const SubsetIndex> focus,
       const SubsetIndex subset = *it;
       const BaseInt marginal_impact(inv_->num_free_elements()[subset]);
       if (marginal_impact > 0) {
-        const float priority = marginal_impact * elements_per_cost[subset];
+        const float priority = marginal_impact / costs[subset];
         pq.Update({priority, subset.value()});
       } else {
         pq.Remove(subset.value());
@@ -172,8 +141,7 @@ bool GreedySolutionGenerator::NextSolution(absl::Span<const SubsetIndex> focus,
              << " num_uncovered_elements = " << inv_->num_uncovered_elements();
   }
   inv_->CompressTrace();
-  // Don't expect the queue to be empty, because of the break in the while
-  // loop.
+  // Don't expect pq to be empty, because of the break in the while loop.
   DCHECK(inv_->CheckConsistency(CL::kFreeAndUncovered));
   return true;
 }
@@ -249,6 +217,38 @@ class ComputationUsefulnessStats {
   // Used to detect useless computations.
   SubsetToIntVector num_free_elements_;
 };
+
+std::vector<ElementIndex> GetUncoveredElementsSortedByDegree(
+    const SetCoverInvariant* const inv) {
+  const BaseInt num_elements = inv->model()->num_elements();
+  std::vector<ElementIndex> degree_sorted_elements;
+  degree_sorted_elements.reserve(num_elements);
+  for (ElementIndex element : inv->model()->ElementRange()) {
+    // Already covered elements should not be considered.
+    if (inv->coverage()[element] != 0) continue;
+    degree_sorted_elements.push_back(element);
+  }
+  const SparseRowView& rows = inv->model()->rows();
+  // Sort indices by degree i.e. the size of the row corresponding to an
+  // element.
+  // NOMUTANTS -- The code still works if the sort is removed.
+  std::sort(degree_sorted_elements.begin(), degree_sorted_elements.end(),
+            [&rows](const ElementIndex a, const ElementIndex b) {
+              if (rows[a].size() < rows[b].size()) return true;
+              if (rows[a].size() == rows[b].size()) return a < b;
+              return false;
+            });
+  return degree_sorted_elements;
+}
+
+// Computes: d = c1 * n2 - c2 * n1. This is an easy way to compare two ratios
+// without having to use a full division.
+// If d < 0 then c1 / n1 < c2 / n2,
+// If d == 0 then c1 / n1 == c2 / n2, etc...
+// NOTE(user): This can be implemented using SSE2 with a gain of 5-10%.
+double Determinant(Cost c1, BaseInt n1, Cost c2, BaseInt n2) {
+  return c1 * n2 - n1 * c2;
+}
 }  // namespace
 
 // ElementDegreeSolutionGenerator.
@@ -280,42 +280,34 @@ bool ElementDegreeSolutionGenerator::NextSolution(
   DVLOG(1) << "Entering ElementDegreeSolutionGenerator::NextSolution";
   inv_->Recompute(CL::kFreeAndUncovered);
   // Create the list of all the indices in the problem.
-  const BaseInt num_elements = inv_->model()->num_elements();
-  std::vector<ElementIndex> degree_sorted_elements(num_elements);
-  std::iota(degree_sorted_elements.begin(), degree_sorted_elements.end(),
-            ElementIndex(0));
-  const SparseRowView& rows = inv_->model()->rows();
-  // Sort indices by degree i.e. the size of the row corresponding to an
-  // element.
-  std::sort(degree_sorted_elements.begin(), degree_sorted_elements.end(),
-            [&rows](const ElementIndex a, const ElementIndex b) {
-              if (rows[a].size() < rows[b].size()) return true;
-              if (rows[a].size() == rows[b].size()) return a < b;
-              return false;
-            });
+  std::vector<ElementIndex> degree_sorted_elements =
+      GetUncoveredElementsSortedByDegree(inv_);
   ComputationUsefulnessStats stats(inv_, false);
+  const SparseRowView& rows = inv_->model()->rows();
   for (const ElementIndex element : degree_sorted_elements) {
     // No need to cover an element that is already covered.
     if (inv_->coverage()[element] != 0) continue;
-    Cost min_ratio = std::numeric_limits<Cost>::max();
     SubsetIndex best_subset(-1);
+    Cost best_subset_cost = 0.0;
+    BaseInt best_subset_num_free_elts = 0;
     for (const SubsetIndex subset : rows[element]) {
       if (!in_focus[subset]) continue;
-      const BaseInt num_free_elements = inv_->ComputeNumFreeElements(subset);
-      const Cost ratio = costs[subset] / num_free_elements;
+      const BaseInt num_free_elements = inv_->num_free_elements()[subset];
       stats.Update(subset, num_free_elements);
-      if (ratio < min_ratio) {
-        min_ratio = ratio;
+      const Cost det = Determinant(costs[subset], num_free_elements,
+                                   best_subset_cost, best_subset_num_free_elts);
+      // Compare R = costs[subset] / num_free_elements with
+      //         B = best_subset_cost / best_subset_num_free_elts.
+      // If R < B, we choose subset.
+      // If the ratios are the same, we choose the subset with the most free
+      // elements.
+      // TODO(user): What about adding a tolerance for equality, which could
+      // further favor larger columns?
+      if (det < 0 ||
+          (det == 0 && num_free_elements > best_subset_num_free_elts)) {
         best_subset = subset;
-      } else if (ratio == min_ratio) {
-        // If the ratio is the same, we choose the subset with the most free
-        // elements.
-        // TODO(user): What about adding a tolerance for equality, which could
-        // further favor larger columns?
-        if (inv_->num_free_elements()[subset] >
-            inv_->num_free_elements()[best_subset]) {
-          best_subset = subset;
-        }
+        best_subset_cost = costs[subset];
+        best_subset_num_free_elts = num_free_elements;
       }
     }
     if (best_subset.value() == -1) {
@@ -361,44 +353,30 @@ bool LazyElementDegreeSolutionGenerator::NextSolution(
 bool LazyElementDegreeSolutionGenerator::NextSolution(
     const SubsetBoolVector& in_focus, const SubsetCostVector& costs) {
   DVLOG(1) << "Entering LazyElementDegreeSolutionGenerator::NextSolution";
-  DCHECK(inv_->CheckConsistency(
-      SetCoverInvariant::ConsistencyLevel::kCostAndCoverage));
+  DCHECK(inv_->CheckConsistency(CL::kCostAndCoverage));
   // Create the list of all the indices in the problem.
-  const BaseInt num_elements = inv_->model()->num_elements();
-  std::vector<ElementIndex> degree_sorted_elements(num_elements);
-  std::iota(degree_sorted_elements.begin(), degree_sorted_elements.end(),
-            ElementIndex(0));
+  std::vector<ElementIndex> degree_sorted_elements =
+      GetUncoveredElementsSortedByDegree(inv_);
   const SparseRowView& rows = inv_->model()->rows();
-  // Sort indices by degree i.e. the size of the row corresponding to an
-  // element.
-  std::sort(degree_sorted_elements.begin(), degree_sorted_elements.end(),
-            [&rows](const ElementIndex a, const ElementIndex b) {
-              if (rows[a].size() < rows[b].size()) return true;
-              if (rows[a].size() == rows[b].size()) return a < b;
-              return false;
-            });
   ComputationUsefulnessStats stats(inv_, false);
   for (const ElementIndex element : degree_sorted_elements) {
     // No need to cover an element that is already covered.
     if (inv_->coverage()[element] != 0) continue;
-    Cost min_ratio = std::numeric_limits<Cost>::max();
     SubsetIndex best_subset(-1);
+    Cost best_subset_cost = 0.0;  // Cost of the best subset.
+    BaseInt best_subset_num_free_elts = 0;
     for (const SubsetIndex subset : rows[element]) {
       if (!in_focus[subset]) continue;
       const BaseInt num_free_elements = inv_->ComputeNumFreeElements(subset);
-      const Cost ratio = costs[subset] / num_free_elements;
       stats.Update(subset, num_free_elements);
-      if (ratio < min_ratio) {
-        min_ratio = ratio;
+      const Cost det = Determinant(costs[subset], num_free_elements,
+                                   best_subset_cost, best_subset_num_free_elts);
+      // Same as ElementDegreeSolutionGenerator.
+      if (det < 0 ||
+          (det == 0 && num_free_elements > best_subset_num_free_elts)) {
         best_subset = subset;
-      } else if (ratio == min_ratio) {
-        // If the ratio is the same, we choose the subset with the most free
-        // elements.
-        // TODO(user): What about adding a tolerance for equality, which could
-        // further favor larger columns?
-        if (num_free_elements > inv_->num_free_elements()[best_subset]) {
-          best_subset = subset;
-        }
+        best_subset_cost = costs[subset];
+        best_subset_num_free_elts = num_free_elements;
       }
     }
     DCHECK_NE(best_subset, SubsetIndex(-1));
@@ -726,7 +704,7 @@ bool GuidedLocalSearch::NextSolution(absl::Span<const SubsetIndex> focus,
 
 namespace {
 void SampleSubsets(std::vector<SubsetIndex>* list, BaseInt num_subsets) {
-  num_subsets = std::min(num_subsets, static_cast<BaseInt>(list->size()));
+  num_subsets = std::min<BaseInt>(num_subsets, list->size());
   CHECK_GE(num_subsets, 0);
   std::shuffle(list->begin(), list->end(), absl::BitGen());
   list->resize(num_subsets);
@@ -741,7 +719,7 @@ std::vector<SubsetIndex> ClearRandomSubsets(BaseInt num_subsets,
 std::vector<SubsetIndex> ClearRandomSubsets(absl::Span<const SubsetIndex> focus,
                                             BaseInt num_subsets,
                                             SetCoverInvariant* inv) {
-  num_subsets = std::min(num_subsets, static_cast<BaseInt>(focus.size()));
+  num_subsets = std::min<BaseInt>(num_subsets, focus.size());
   CHECK_GE(num_subsets, 0);
   std::vector<SubsetIndex> chosen_indices;
   for (const SubsetIndex subset : focus) {
@@ -807,7 +785,7 @@ std::vector<SubsetIndex> ClearMostCoveredElements(
   // TODO(user): find another algorithm if necessary.
   std::shuffle(sampled_subsets.begin(), sampled_subsets.end(), absl::BitGen());
   sampled_subsets.resize(
-      std::min(static_cast<BaseInt>(sampled_subsets.size()), max_num_subsets));
+      std::min<BaseInt>(sampled_subsets.size(), max_num_subsets));
 
   // Testing has shown that sorting sampled_subsets is not necessary.
   // Now, un-select the subset in sampled_subsets.

diff --git a/ortools/algorithms/set_cover_heuristics.h b/ortools/algorithms/set_cover_heuristics.h
@@ -45,41 +45,8 @@ namespace operations_research {
 // subproblems in different threads.
 //
 
-// The preprocessor finds the elements that can only be covered by one subset.
-// Obviously, such subsets which are the only ones that can cover a given
-// element are chosen.
-
-// The consistency level is maintained up to kFreeAndUncovered.
-class Preprocessor {
- public:
-  explicit Preprocessor(absl::Nonnull<SetCoverInvariant*> inv)
-      : inv_(inv), num_columns_fixed_by_singleton_row_(0) {}
-
-  // Returns true if a solution was found.
-  // TODO(user): Add time-outs and exit with a partial solution. This seems
-  // unlikely, though.
-  bool NextSolution();
-
-  // Computes the next partial solution considering only the subsets whose
-  // indices are in focus.
-  bool NextSolution(absl::Span<const SubsetIndex> focus);
-
-  // Returns the number of columns that are the only one for a given row.
-  int num_columns_fixed_by_singleton_row() const {
-    return num_columns_fixed_by_singleton_row_;
-  }
-
- private:
-  // The data structure that will maintain the invariant for the model.
-  SetCoverInvariant* inv_;
-
-  // The number of columns that are the only one for a given row, i.e.
-  // the subsets that are unique in covering a particular element.
-  BaseInt num_columns_fixed_by_singleton_row_;
-};
-
 // An obvious idea is to take all the S_j's (or equivalently to set all the
-// x_j's to 1). It's a bit silly but fast, and we can improve on it later using
+// x_j's to 1). It's very silly but fast, and we can improve on it later using
 // local search.
 
 // The consistency level is maintained up to kFreeAndUncovered.