ray-project · jjyao · Nov 7, 2024 · Oct 17, 2024 · Oct 17, 2024 · Oct 21, 2024
diff --git a/src/ray/common/bundle_spec.cc b/src/ray/common/bundle_spec.cc
@@ -95,15 +95,15 @@ std::string BundleSpecification::DebugString() const {
 }
 
 std::string FormatPlacementGroupResource(const std::string &original_resource_name,
-                                         const PlacementGroupID &group_id,
+                                         const std::string &group_id_str,
                                          int64_t bundle_index) {
   std::stringstream os;
   if (bundle_index >= 0) {
     os << original_resource_name << kGroupKeyword << std::to_string(bundle_index) << "_"
-       << group_id.Hex();
+       << group_id_str;
   } else {
     RAY_CHECK(bundle_index == -1) << "Invalid index " << bundle_index;
-    os << original_resource_name << kGroupKeyword << group_id.Hex();
+    os << original_resource_name << kGroupKeyword << group_id_str;
   }
   std::string result = os.str();
   RAY_DCHECK(GetOriginalResourceName(result) == original_resource_name)
@@ -112,6 +112,13 @@ std::string FormatPlacementGroupResource(const std::string &original_resource_na
   return result;
 }
 
+std::string FormatPlacementGroupResource(const std::string &original_resource_name,
+                                         const PlacementGroupID &group_id,
+                                         int64_t bundle_index) {
+  return FormatPlacementGroupResource(
+      original_resource_name, group_id.Hex(), bundle_index);
+}
+
 std::string GetOriginalResourceName(const std::string &resource) {
   auto data = ParsePgFormattedResource(
       resource, /*for_wildcard_resource*/ true, /*for_indexed_resource*/ true);
@@ -146,6 +153,7 @@ std::optional<PgFormattedResourceData> ParsePgFormattedResource(
         match_groups.size() == 3) {
       data.original_resource = match_groups[1].str();
       data.bundle_index = -1;
+      data.group_id = match_groups[2].str();
       return data;
     }
   }
@@ -157,6 +165,7 @@ std::optional<PgFormattedResourceData> ParsePgFormattedResource(
         match_groups.size() == 4) {
       data.original_resource = match_groups[1].str();
       data.bundle_index = stoi(match_groups[2].str());
+      data.group_id = match_groups[3].str();
       return data;
     }
   }

diff --git a/src/ray/common/bundle_spec.h b/src/ray/common/bundle_spec.h
@@ -97,8 +97,22 @@ struct PgFormattedResourceData {
   std::string original_resource;
   /// -1 if it is a wildcard resource.
   int64_t bundle_index;
+  std::string group_id;
 };
 
+/// Format a placement group resource with provided parameters.
+///
+/// \param original_resource_name The original resource name of the pg resource.
+/// \param group_id_str The group id in string format.
+/// \param bundle_index The bundle index. If -1, generate the wildcard pg resource.
+///                     E.g., [original_resource_name]_group_[group_id_str].
+///                     If >=0, generate the indexed pg resource. E.g.,
+///                     [original_resource_name]_group_[bundle_index]_[group_id_str]
+/// \return The corresponding formatted placement group resource string.
+std::string FormatPlacementGroupResource(const std::string &original_resource_name,
+                                         const std::string &group_id_str,
+                                         int64_t bundle_index = -1);
+
 /// Format a placement group resource, e.g., CPU -> CPU_group_i
 std::string FormatPlacementGroupResource(const std::string &original_resource_name,
                                          const PlacementGroupID &group_id,

diff --git a/src/ray/common/scheduling/resource_instance_set.cc b/src/ray/common/scheduling/resource_instance_set.cc
@@ -16,7 +16,9 @@
 
 #include <cmath>
 #include <sstream>
+#include <utility>
 
+#include "ray/common/bundle_spec.h"
 #include "ray/util/logging.h"
 
 namespace ray {
@@ -43,6 +45,25 @@ bool NodeResourceInstanceSet::Has(ResourceID resource_id) const {
 
 void NodeResourceInstanceSet::Remove(ResourceID resource_id) {
   resources_.erase(resource_id);
+
+  // Remove from the pg_indexed_resources_ as well
+  auto data = ParsePgFormattedResource(resource_id.Binary(),
+                                       /*for_wildcard_resource=*/false,
+                                       /*for_indexed_resource=*/true);
+  if (data) {
+    ResourceID original_resource_id(data->original_resource);
+    absl::flat_hash_map<std::string, absl::flat_hash_set<ResourceID>> &pg_resource_map =
+        pg_indexed_resources_[original_resource_id];
+    absl::flat_hash_set<ResourceID> &resource_set = pg_resource_map[data->group_id];
+
+    resource_set.erase(resource_id);
+    if (resource_set.empty()) {
+      pg_resource_map.erase(data->group_id);
+    }
+    if (pg_resource_map.empty()) {
+      pg_indexed_resources_.erase(original_resource_id);
+    }
+  }
 }
 
 const std::vector<FixedPoint> &NodeResourceInstanceSet::Get(
@@ -69,6 +90,15 @@ NodeResourceInstanceSet &NodeResourceInstanceSet::Set(ResourceID resource_id,
     resources_.erase(resource_id);
   } else {
     resources_[resource_id] = std::move(instances);
+
+    // Popluate the pg_indexed_resources_map_
+    auto data = ParsePgFormattedResource(resource_id.Binary(),
+                                         /*for_wildcard_resource=*/false,
+                                         /*for_indexed_resource=*/true);
+    if (data) {
+      pg_indexed_resources_[ResourceID(data->original_resource)][data->group_id].emplace(
+          resource_id);
+    }
   }
   return *this;
 }
@@ -93,21 +123,130 @@ bool NodeResourceInstanceSet::operator==(const NodeResourceInstanceSet &other) c
 std::optional<absl::flat_hash_map<ResourceID, std::vector<FixedPoint>>>
 NodeResourceInstanceSet::TryAllocate(const ResourceSet &resource_demands) {
   absl::flat_hash_map<ResourceID, std::vector<FixedPoint>> allocations;
+
+  // During resource allocation with a placement group, no matter whether the allocation
+  // requirement specifies a bundle index, we need to generate the allocation on both
+  // the wildcard resource and the indexed resource. The resource_demand shouldn't be
+  // assigned across bundles. And If no bundle index is specified, we will iterate
+  // through the bundles and find the first bundle that can fit the required resources.
+  // In addition, for unit resources, we need to make sure that the allocation on the
+  // wildcard resource and the indexed resource are consistent, meaning the same
+  // instance ids should be allocated.
+
+  // In the format of:
+  // key: original resource id,
+  // value: [resource id, parsed pg format resource data]
+  absl::flat_hash_map<ResourceID,
+                      std::vector<std::pair<ResourceID, PgFormattedResourceData>>>
+      pg_resource_map;
+
   for (const auto &[resource_id, demand] : resource_demands.Resources()) {
-    auto allocation = TryAllocate(resource_id, demand);
-    if (allocation) {
-      // Even if allocation failed we need to remember partial allocations to correctly
-      // free resources.
-      allocations[resource_id] = std::move(*allocation);
+    auto data = ParsePgFormattedResource(resource_id.Binary(),
+                                         /*for_wildcard_resource*/ true,
+                                         /*for_indexed_resource*/ true);
+
+    if (data) {
+      // Aggregate based on resource type
+      ResourceID original_resource_id{data->original_resource};
+      pg_resource_map[original_resource_id].push_back(
+          std::make_pair(resource_id, data.value()));
     } else {
-      // Allocation failed. Restore partially allocated resources.
-      for (const auto &[resource_id, allocation] : allocations) {
-        Free(resource_id, allocation);
+      // Directly allocate the resources if the resource is not with a placement group
+      auto allocation = TryAllocate(resource_id, demand);
+      if (allocation) {
+        // Even if allocation failed we need to remember partial allocations to
+        // correctly free resources.
+        allocations[resource_id] = std::move(*allocation);
+      } else {
+        // Allocation failed. Restore partially allocated resources.
+        for (const auto &[resource_id, allocation] : allocations) {
+          Free(resource_id, allocation);
+        }
+        return std::nullopt;
       }
-      return std::nullopt;
     }
   }
 
+  // Handle the resource allocation for resources with placement group
+  for (const auto &[original_resource_id, resource_id_vector] : pg_resource_map) {
+    // Assuming exactly 1 placement group and at most 1 bundle index can be specified in
+    // the resource requirement for a single resource type
+    std::vector<FixedPoint> wildcard_allocation;
+    const ResourceID *wildcard_resource_id = nullptr;
+    const std::string *pg_id = nullptr;
+
+    // Allocate indexed resource
+    if (resource_id_vector.size() == 1) {
+      // The case where no bundle index is specified
+      // Iterate through the bundles with the same original resource and pg_id to find
+      // the first one with enough space
+      bool found = false;
+      wildcard_resource_id = &resource_id_vector[0].first;
+      pg_id = &resource_id_vector[0].second.group_id;
+
+      auto pg_index_resources = pg_indexed_resources_.find(original_resource_id);
+      if (pg_index_resources != pg_indexed_resources_.end()) {
+        auto index_resources = pg_index_resources->second.find(*pg_id);
+        if (index_resources != pg_index_resources->second.end()) {
+          for (ResourceID indexed_resource_id : index_resources->second) {
+            if (Has(indexed_resource_id)) {
+              auto allocation = TryAllocate(
+                  indexed_resource_id, resource_demands.Get(resource_id_vector[0].first));
+
+              if (allocation) {
+                // Found the allocation in a bundle
+                wildcard_allocation = *allocation;
+                allocations[indexed_resource_id] = std::move(*allocation);
+                found = true;
+                break;
+              }
+            }
+          }
+        }
+      }
+
+      if (!found) {
+        // No bundle can fit the required resources, allocation failed
+        for (const auto &[resource_id, allocation] : allocations) {
+          Free(resource_id, allocation);
+        }
+        return std::nullopt;
+      }
+    } else {
+      // The case where the bundle index is specified
 // Note that all nodes that have placement group have a special 
 // Note that all nodes that have placement group have a special 
+      // For each resource type, both the wildcard resource and the indexed resource
+      // should be in the resource_demand
+      for (const std::pair<ResourceID, PgFormattedResourceData> &pair :
+           resource_id_vector) {
+        if (pair.second.bundle_index != -1) {
+          // This is the indexed resource
+          auto allocation = TryAllocate(pair.first, resource_demands.Get(pair.first));
+
+          if (allocation) {
+            wildcard_allocation = *allocation;
+            allocations[pair.first] = std::move(*allocation);
+          } else {
+            // The corresponding bundle cannot hold the required resources.
+            // Allocation failed
+            for (const auto &[resource_id, allocation] : allocations) {
+              Free(resource_id, allocation);
+            }
+            return std::nullopt;
+          }
+        } else {
+          // This is the wildcard resource
+          wildcard_resource_id = &pair.first;
+        }
+      }
+    }
+
+    // Allocate wildcard resource, should be consistent with the indexed resource
+    RAY_CHECK(wildcard_resource_id != nullptr);
+    RAY_CHECK(!wildcard_allocation.empty());
+    AllocateWithReference(wildcard_allocation, *wildcard_resource_id);
+    allocations[*wildcard_resource_id] = std::move(wildcard_allocation);
+  }
+
   return std::make_optional<absl::flat_hash_map<ResourceID, std::vector<FixedPoint>>>(
       std::move(allocations));
 }
@@ -139,7 +278,7 @@ std::optional<std::vector<FixedPoint>> NodeResourceInstanceSet::TryAllocate(
   //
   // As long as remaining_demand is greater than 1.,
   // allocate full unit-capacity instances until the remaining_demand becomes fractional.
-  // Then try to find the best fit for the fractional remaining_resources. Best fist means
+  // Then try to find the best fit for the fractional remaining_resources. Best fit means
   // allocating the resource instance with the smallest available capacity greater than
   // remaining_demand
   if (remaining_demand >= 1.) {
@@ -186,6 +325,20 @@ std::optional<std::vector<FixedPoint>> NodeResourceInstanceSet::TryAllocate(
   return std::make_optional<std::vector<FixedPoint>>(std::move(allocation));
 }
 
+void NodeResourceInstanceSet::AllocateWithReference(
+    const std::vector<FixedPoint> &ref_allocation, ResourceID resource_id) {
+  std::vector<FixedPoint> available = Get(resource_id);
+  RAY_CHECK(!available.empty());
+  RAY_CHECK_EQ(available.size(), ref_allocation.size());
+
+  for (size_t i = 0; i < ref_allocation.size(); i++) {
+    RAY_CHECK_GE(available[i], ref_allocation[i]);
+    available[i] -= ref_allocation[i];
+  }
+
+  Set(resource_id, std::move(available));
+}
+
 void NodeResourceInstanceSet::Free(ResourceID resource_id,
                                    const std::vector<FixedPoint> &allocation) {
   std::vector<FixedPoint> available = Get(resource_id);
@@ -244,7 +397,7 @@ std::vector<FixedPoint> NodeResourceInstanceSet::Subtract(
 
 std::string NodeResourceInstanceSet::DebugString() const {
   std::stringstream buffer;
-  buffer << "{";
+  buffer << "{resources_:{";
   bool first = true;
   for (const auto &[id, quantity] : resources_) {
     if (!first) {
@@ -253,7 +406,37 @@ std::string NodeResourceInstanceSet::DebugString() const {
     first = false;
     buffer << id.Binary() << ": " << FixedPointVectorToString(quantity);
   }
-  buffer << "}";
+  buffer << "}, pg_indexed_resources_:{";
+
+  first = true;
+  for (const auto &[original_id, pg_resource_id_map] : pg_indexed_resources_) {
+    if (!first) {
+      buffer << ", ";
+    }
+    first = false;
+
+    buffer << original_id.Binary() << ": {";
+    bool firstInMap = true;
+    for (const auto &[pg_id, indexed_ids] : pg_resource_id_map) {
+      if (!firstInMap) {
+        buffer << ", ";
+      }
+      firstInMap = false;
+
+      buffer << pg_id << ": {";
+      bool firstInSet = true;
+      for (const auto &index_id : indexed_ids) {
+        if (!firstInSet) {
+          buffer << ", ";
+        }
+        firstInSet = false;
+        buffer << index_id.Binary();
+      }
+      buffer << "}";
+    }
+    buffer << "}";
+  }
+  buffer << "}}";
   return buffer.str();
 }
 

diff --git a/src/ray/common/scheduling/resource_instance_set.h b/src/ray/common/scheduling/resource_instance_set.h
@@ -87,6 +87,14 @@ class NodeResourceInstanceSet {
     return resources_;
   }
 
+  /// Only for testing.
+  const absl::flat_hash_map<
+      ResourceID,
+      absl::flat_hash_map<std::string, absl::flat_hash_set<ResourceID>>>
+      &PgIndexedResources() const {
+    return pg_indexed_resources_;
+  }
+
  private:
   /// Allocate enough capacity across the instances of a resource to satisfy "demand".
   ///
@@ -108,8 +116,30 @@ class NodeResourceInstanceSet {
   std::optional<std::vector<FixedPoint>> TryAllocate(ResourceID resource_id,
                                                      FixedPoint demand);
 
+  /// Allocate resource to the resource_id based on a provided reference allocation.
+  /// The function is used for placement group allocation. Making the allocation of
+  /// the wildcard resource be identical to the indexed resource allocation.
+  ///
+  /// The function assumes and also verifies that (1) the resource_id exists in the
+  /// node; (2) the available resources with resource_id on the node can satisfy the
+  /// provided ref_allocation.
+  ///
+  /// \param ref_allocation: The reference allocation used to allocate the resource_id
+  /// \param resource_id: The id of the resource to be allocated
+  void AllocateWithReference(const std::vector<FixedPoint> &ref_allocation,
+                             ResourceID resource_id);
+
   /// Map from the resource IDs to the resource instance values.
   absl::flat_hash_map<ResourceID, std::vector<FixedPoint>> resources_;
+
+  /// This is a derived map from the resources_ map. The map aggregates all the current
+  /// placement group indexed resources in resources_ by their original resource id and
+  /// pd id. The key of the map is the original resource id. The value is a map of pg id
+  /// to the corresponding placement group indexed resource ids. This map should always
+  /// be consistent with the _resource map.
+  absl::flat_hash_map<ResourceID,
+                      absl::flat_hash_map<std::string, absl::flat_hash_set<ResourceID>>>
+      pg_indexed_resources_;
 };
 
 }  // namespace ray