From eb33c15a6516401d2fd3bee5e8b56b79ba2ce0df Mon Sep 17 00:00:00 2001
From: Giovanni Petrantoni <7008900+sinkingsugar@users.noreply.github.com>
Date: Sat, 5 Oct 2024 11:48:54 +0800
Subject: [PATCH] feat(crdt): Add is_tombstoned method and refactor ListCRDT

Adds public is_tombstoned method to CRDT class for checking
record deletion status. Refactors ListCRDT for improved
readability, consistency, and performance. Updates comments
and formatting throughout the code.
---
 crdt.hpp      |  17 ++
 list_crdt.hpp | 579 +++++++++++++++++++++++++-------------------------
 2 files changed, 308 insertions(+), 288 deletions(-)
diff --git a/crdt.hpp b/crdt.hpp
index b70ea22..64fa662 100644
--- a/crdt.hpp
+++ b/crdt.hpp
@@ -674,6 +674,23 @@ template <typename K, typename V> class CRDT : public std::enable_shared_from_th
     }
   }
 
+  // Add this public method to the CRDT class
+  /// Checks if a record is tombstoned.
+  ///
+  /// # Arguments
+  ///
+  /// * `record_id` - The unique identifier for the record.
+  /// * `ignore_parent` - If true, only checks the current CRDT instance, ignoring the parent.
+  ///
+  /// # Returns
+  ///
+  /// True if the record is tombstoned, false otherwise.
+  ///
+  /// Complexity: O(1) average case for hash table lookup
+  bool is_tombstoned(const K &record_id, bool ignore_parent = false) const {
+    return is_record_tombstoned(record_id, ignore_parent);
+  }
+
 private:
   CrdtNodeId node_id_;
   LogicalClock clock_;
diff --git a/list_crdt.hpp b/list_crdt.hpp
index d6405ae..321b4d7 100644
--- a/list_crdt.hpp
+++ b/list_crdt.hpp
@@ -10,340 +10,343 @@
 
 // Represents a unique identifier for a list element
 struct ElementID {
-    std::string replica_id; // Unique identifier for the replica
-    uint64_t sequence;      // Monotonically increasing sequence number
-
-    bool operator<(const ElementID &other) const {
-        if (sequence != other.sequence)
-            return sequence < other.sequence;
-        return replica_id < other.replica_id;
-    }
-
-    bool operator==(const ElementID &other) const { 
-        return sequence == other.sequence && replica_id == other.replica_id; 
+  std::string replica_id; // Unique identifier for the replica
+  uint64_t sequence;      // Monotonically increasing sequence number
+
+  // Comparison operator for ordering elements
+  bool operator<(const ElementID &other) const {
+    if (sequence != other.sequence)
+      return sequence < other.sequence;
+    return replica_id < other.replica_id;
+  }
+
+  // Equality operator for comparing two ElementIDs
+  bool operator==(const ElementID &other) const { return sequence == other.sequence && replica_id == other.replica_id; }
+
+  // For hashing in unordered_map
+  struct Hash {
+    std::size_t operator()(const ElementID &id) const {
+      return std::hash<std::string>()(id.replica_id) ^ std::hash<uint64_t>()(id.sequence);
     }
+  };
 
-    // For hashing in unordered_map
-    struct Hash {
-        std::size_t operator()(const ElementID& id) const {
-            return std::hash<std::string>()(id.replica_id) ^ std::hash<uint64_t>()(id.sequence);
-        }
-    };
-
-    // For printing purposes
-    friend std::ostream &operator<<(std::ostream &os, const ElementID &id) {
-        os << "(" << id.replica_id << ", " << id.sequence << ")";
-        return os;
-    }
+  // For printing purposes
+  friend std::ostream &operator<<(std::ostream &os, const ElementID &id) {
+    os << "(" << id.replica_id << ", " << id.sequence << ")";
+    return os;
+  }
 };
 
 // Represents an element in the list
 struct ListElement {
-    ElementID id;                          // Unique identifier
-    std::optional<std::string> value;      // Value stored (None if tombstoned)
-    std::optional<ElementID> origin_left;  // Left origin at insertion
-    std::optional<ElementID> origin_right; // Right origin at insertion
-
-    bool is_deleted() const { return !value.has_value(); }
-
-    // For printing purposes
-    friend std::ostream &operator<<(std::ostream &os, const ListElement &elem) {
-        os << "ID: " << elem.id << ", ";
-        if (elem.is_deleted()) {
-            os << "[Deleted]";
-        } else {
-            os << "Value: " << elem.value.value();
-        }
-        os << ", Origin Left: ";
-        if (elem.origin_left.has_value()) {
-            os << elem.origin_left.value();
-        } else {
-            os << "None";
-        }
-        os << ", Origin Right: ";
-        if (elem.origin_right.has_value()) {
-            os << elem.origin_right.value();
-        } else {
-            os << "None";
-        }
-        return os;
+  ElementID id;                          // Unique identifier for the element
+  std::optional<std::string> value;      // Value stored (None if tombstoned)
+  std::optional<ElementID> origin_left;  // Left origin at insertion
+  std::optional<ElementID> origin_right; // Right origin at insertion
+
+  // Checks if the element is tombstoned (deleted)
+  bool is_deleted() const { return !value.has_value(); }
+
+  // For printing purposes
+  friend std::ostream &operator<<(std::ostream &os, const ListElement &elem) {
+    os << "ID: " << elem.id << ", ";
+    if (elem.is_deleted()) {
+      os << "[Deleted]";
+    } else {
+      os << "Value: " << elem.value.value();
+    }
+    os << ", Origin Left: ";
+    if (elem.origin_left.has_value()) {
+      os << elem.origin_left.value();
+    } else {
+      os << "None";
+    }
+    os << ", Origin Right: ";
+    if (elem.origin_right.has_value()) {
+      os << elem.origin_right.value();
+    } else {
+      os << "None";
     }
+    return os;
+  }
 };
 
 // Comparator for ListElements to establish a total order
 struct ListElementComparator {
-    bool operator()(const ListElement &a, const ListElement &b) const {
-        // Compare based on the position in the list using origins
-        if (a.origin_left != b.origin_left) {
-            if (!a.origin_left.has_value()) return true;  // Root is first
-            if (!b.origin_left.has_value()) return false;
-            return a.origin_left.value() < b.origin_left.value();
-        }
-
-        if (a.origin_right != b.origin_right) {
-            if (!a.origin_right.has_value()) return false; // a is before
-            if (!b.origin_right.has_value()) return true;  // b is before
-            return a.origin_right.value() < b.origin_right.value();
-        }
+  bool operator()(const ListElement &a, const ListElement &b) const {
+    // Compare based on the position in the list using origins
+    if (a.origin_left != b.origin_left) {
+      if (!a.origin_left.has_value())
+        return true; // Root is first
+      if (!b.origin_left.has_value())
+        return false;
+      return a.origin_left.value() < b.origin_left.value();
+    }
 
-        // If both have the same origins, use ElementID to break the tie
-        return a.id < b.id;
+    if (a.origin_right != b.origin_right) {
+      if (!a.origin_right.has_value())
+        return false; // a is before
+      if (!b.origin_right.has_value())
+        return true; // b is before
+      return a.origin_right.value() < b.origin_right.value();
     }
+
+    // If both have the same origins, use ElementID to break the tie
+    return a.id < b.id;
+  }
 };
 
 // Represents the List CRDT
 class ListCRDT {
 public:
-    // Constructor
-    ListCRDT(const std::string &replica_id) : replica_id_(replica_id), counter_(0) {
-        // Initialize with a root element to simplify origins
-        ElementID root_id{"root", 0};
-        ListElement root_element{root_id, std::nullopt, std::nullopt, std::nullopt};
-        elements_.push_back(root_element);
-        element_index_.emplace(root_id, 0); // Store index instead of iterator
+  // Constructor to initialize a new CRDT instance with a unique replica ID
+  ListCRDT(const std::string &replica_id) : replica_id_(replica_id), counter_(0) {
+    // Initialize with a root element to simplify origins
+    ElementID root_id{"root", 0};
+    ListElement root_element{root_id, std::nullopt, std::nullopt, std::nullopt};
+    elements_.push_back(root_element);
+    element_index_.emplace(root_id, 0); // Store index instead of iterator
+  }
+
+  // Inserts a value at the given index
+  void insert(uint32_t index, const std::string &value) {
+    ElementID new_id = generate_id();
+    std::optional<ElementID> left_origin;
+    std::optional<ElementID> right_origin;
+
+    // Retrieve visible elements (non-tombstoned)
+    auto visible = get_visible_elements();
+    if (index > visible.size()) {
+      index = visible.size(); // Adjust index if out of bounds
     }
 
-    // Inserts a value at the given index
-    void insert(uint32_t index, const std::string &value) {
-        ElementID new_id = generate_id();
-        std::optional<ElementID> left_origin;
-        std::optional<ElementID> right_origin;
-
-        auto visible = get_visible_elements();
-        if (index > visible.size()) {
-            index = visible.size(); // Adjust index if out of bounds
-        }
-
-        if (index == 0) {
-            // Insert at the beginning, right_origin is the first element
-            if (!visible.empty()) {
-                right_origin = visible[0].id;
-            }
-        } else if (index == visible.size()) {
-            // Insert at the end, left_origin is the last element
-            if (!visible.empty()) {
-                left_origin = visible.back().id;
-            }
-        } else {
-            // Insert in the middle
-            left_origin = visible[index - 1].id;
-            right_origin = visible[index].id;
-        }
-
-        ListElement new_element{new_id, value, left_origin, right_origin};
-        integrate(new_element);
+    if (index == 0) {
+      // Insert at the beginning, right_origin is the first element
+      if (!visible.empty()) {
+        right_origin = visible[0].id;
+      }
+    } else if (index == visible.size()) {
+      // Insert at the end, left_origin is the last element
+      if (!visible.empty()) {
+        left_origin = visible.back().id;
+      }
+    } else {
+      // Insert in the middle
+      left_origin = visible[index - 1].id;
+      right_origin = visible[index].id;
     }
 
-    // Deletes the element at the given index
-    void delete_element(uint32_t index) {
-        const auto &visible = get_visible_elements();
-        if (index >= visible.size())
-            return;
-
-        ElementID target_id = visible[index].id;
-        size_t it = find_element(target_id);
-        if (it != elements_.size()) {
-            elements_[it].value.reset(); // Tombstone the element
-        }
+    // Create a new element with the given value and origins
+    ListElement new_element{new_id, value, left_origin, right_origin};
+    integrate(new_element);
+  }
+
+  // Deletes the element at the given index by tombstoning it
+  void delete_element(uint32_t index) {
+    const auto &visible = get_visible_elements();
+    if (index >= visible.size())
+      return; // Index out of bounds, do nothing
+
+    ElementID target_id = visible[index].id;
+    size_t it = find_element(target_id);
+    if (it != elements_.size()) {
+      elements_[it].value.reset(); // Tombstone the element by resetting its value
     }
-
-    // Merges another ListCRDT into this one
-    void merge(const ListCRDT &other) {
-        // Integrate all elements from the other CRDT
-        for (const auto &elem : other.elements_) {
-            if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
-                continue; // Skip the root element
-            }
-            integrate(elem);
-        }
-
-        // After integration, sort all elements to establish a total order
-        std::sort(elements_.begin(), elements_.end(), ListElementComparator());
-
-        // Remove duplicates while maintaining the first occurrence
-        auto last = std::unique(elements_.begin(), elements_.end(),
-                                [&](const ListElement &a, const ListElement &b) -> bool { return a.id == b.id; });
-        elements_.erase(last, elements_.end());
-
-        // Rebuild the index after sorting and deduplication
-        rebuild_index();
+  }
+
+  // Merges another ListCRDT into this one
+  void merge(const ListCRDT &other) {
+    // Integrate all elements from the other CRDT
+    for (const auto &elem : other.elements_) {
+      if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
+        continue; // Skip the root element
+      }
+      integrate(elem);
     }
 
-    // Generates a delta containing operations not seen by the other replica
-    std::pair<std::vector<ListElement>, std::vector<ElementID>> generate_delta(const ListCRDT &other) const {
-        std::vector<ListElement> new_elements;
-        std::vector<ElementID> tombstones;
+    // After integration, sort all elements to establish a total order
+    std::sort(elements_.begin(), elements_.end(), ListElementComparator());
 
-        // Create a set of ElementIDs present in the other CRDT
-        std::set<ElementID> other_ids;
-        for (const auto &elem : other.elements_) {
-            other_ids.insert(elem.id);
-        }
+    // Remove duplicates while maintaining the first occurrence
+    auto last = std::unique(elements_.begin(), elements_.end(),
+                            [&](const ListElement &a, const ListElement &b) -> bool { return a.id == b.id; });
+    elements_.erase(last, elements_.end());
 
-        // Identify new elements and tombstones
-        for (const auto &elem : elements_) {
-            if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
-                continue; // Skip the root element
-            }
-            if (other_ids.find(elem.id) == other_ids.end()) {
-                new_elements.push_back(elem);
-                if (elem.is_deleted()) {
-                    tombstones.push_back(elem.id);
-                }
-            }
-        }
+    // Rebuild the index after sorting and deduplication
+    rebuild_index();
+  }
 
-        return {new_elements, tombstones};
-    }
+  // Generates a delta containing operations not seen by the other replica
+  std::pair<std::vector<ListElement>, std::vector<ElementID>> generate_delta(const ListCRDT &other) const {
+    std::vector<ListElement> new_elements;
+    std::vector<ElementID> tombstones;
 
-    // Applies a delta to this CRDT
-    void apply_delta(const std::vector<ListElement> &new_elements, const std::vector<ElementID> &tombstones) {
-        // Apply insertions
-        for (const auto &elem : new_elements) {
-            if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
-                continue; // Skip the root element
-            }
-            auto it = find_element(elem.id);
-            if (it == elements_.size()) {
-                integrate(elem);
-            } else {
-                // Element already exists, possibly update tombstone
-                if (elem.is_deleted()) {
-                    elements_[it].value.reset();
-                }
-            }
-        }
+    // Create a set of ElementIDs present in the other CRDT
+    std::set<ElementID> other_ids;
+    for (const auto &elem : other.elements_) {
+      other_ids.insert(elem.id);
+    }
 
-        // Apply tombstones
-        for (const auto &id : tombstones) {
-            size_t it = find_element(id);
-            if (it != elements_.size()) {
-                elements_[it].value.reset();
-            }
+    // Identify new elements and tombstones
+    for (const auto &elem : elements_) {
+      if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
+        continue; // Skip the root element
+      }
+      if (other_ids.find(elem.id) == other_ids.end()) {
+        new_elements.push_back(elem);
+        if (elem.is_deleted()) {
+          tombstones.push_back(elem.id);
         }
-
-        // After applying, sort all elements to maintain order
-        std::sort(elements_.begin(), elements_.end(), ListElementComparator());
-
-        // Remove duplicates while maintaining the first occurrence
-        auto last = std::unique(elements_.begin(), elements_.end(),
-                                [&](const ListElement &a, const ListElement &b) -> bool { return a.id == b.id; });
-        elements_.erase(last, elements_.end());
-
-        // Rebuild the index after sorting and deduplication
-        rebuild_index();
+      }
     }
 
-    // Retrieves the current list as a vector of strings
-    std::vector<std::string> get_values() const {
-        std::vector<std::string> values;
-        for (const auto &elem : elements_) {
-            if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
-                continue; // Skip the root element
-            }
-            if (!elem.is_deleted()) {
-                values.push_back(elem.value.value());
-            }
+    return {new_elements, tombstones};
+  }
+
+  // Applies a delta to this CRDT
+  void apply_delta(const std::vector<ListElement> &new_elements, const std::vector<ElementID> &tombstones) {
+    // Apply insertions
+    for (const auto &elem : new_elements) {
+      if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
+        continue; // Skip the root element
+      }
+      auto it = find_element(elem.id);
+      if (it == elements_.size()) {
+        integrate(elem);
+      } else {
+        // Element already exists, possibly update tombstone
+        if (elem.is_deleted()) {
+          elements_[it].value.reset();
         }
-        return values;
+      }
     }
 
-    // Prints the current visible list for debugging
-    void print_visible() const {
-        for (const auto &elem : elements_) {
-            if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
-                continue; // Skip the root element
-            }
-            if (!elem.is_deleted()) {
-                std::cout << elem.value.value() << " ";
-            }
-        }
-        std::cout << std::endl;
+    // Apply tombstones to existing elements
+    for (const auto &id : tombstones) {
+      size_t it = find_element(id);
+      if (it != elements_.size()) {
+        elements_[it].value.reset();
+      }
     }
 
-    // Prints all elements including tombstones for debugging
-    void print_all_elements() const {
-        for (const auto &elem : elements_) {
-            std::cout << elem << std::endl;
-        }
+    // After applying, sort all elements to maintain order
+    std::sort(elements_.begin(), elements_.end(), ListElementComparator());
+
+    // Remove duplicates while maintaining the first occurrence
+    auto last = std::unique(elements_.begin(), elements_.end(),
+                            [&](const ListElement &a, const ListElement &b) -> bool { return a.id == b.id; });
+    elements_.erase(last, elements_.end());
+
+    // Rebuild the index after sorting and deduplication
+    rebuild_index();
+  }
+
+  // Retrieves the current list as a vector of strings
+  std::vector<std::string> get_values() const {
+    std::vector<std::string> values;
+    for (const auto &elem : elements_) {
+      if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
+        continue; // Skip the root element
+      }
+      if (!elem.is_deleted()) {
+        values.push_back(elem.value.value());
+      }
+    }
+    return values;
+  }
+
+  // Prints the current visible list for debugging
+  void print_visible() const {
+    for (const auto &elem : elements_) {
+      if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
+        continue; // Skip the root element
+      }
+      if (!elem.is_deleted()) {
+        std::cout << elem.value.value() << " ";
+      }
     }
+    std::cout << std::endl;
+  }
 
-    // Performs garbage collection by removing tombstones that are safe to delete
-    // For simplicity, assumes that all replicas have seen all operations
-    // In a real-world scenario, you'd track replica states to ensure safety
-    void garbage_collect() {
-        // Remove tombstoned elements
-        elements_.erase(
-            std::remove_if(elements_.begin(), elements_.end(),
-                           [&](const ListElement &elem) -> bool {
-                               return elem.is_deleted() && elem.id.replica_id != "root";
-                           }),
-            elements_.end()
-        );
-
-        // Rebuild the index after garbage collection
-        rebuild_index();
+  // Prints all elements including tombstones for debugging
+  void print_all_elements() const {
+    for (const auto &elem : elements_) {
+      std::cout << elem << std::endl;
     }
+  }
+
+  // Performs garbage collection by removing tombstones that are safe to delete
+  // For simplicity, assumes that all replicas have seen all operations
+  // In a real-world scenario, you'd track replica states to ensure safety
+  void garbage_collect() {
+    // Remove tombstoned elements
+    elements_.erase(
+        std::remove_if(elements_.begin(), elements_.end(),
+                       [&](const ListElement &elem) -> bool { return elem.is_deleted() && elem.id.replica_id != "root"; }),
+        elements_.end());
+
+    // Rebuild the index after garbage collection
+    rebuild_index();
+  }
 
 private:
-    std::string replica_id_;
-    uint64_t counter_;
-    std::vector<ListElement> elements_;
-    // Changed to map ElementID to index instead of iterator to prevent invalidation
-    std::unordered_map<ElementID, size_t, ElementID::Hash> element_index_;
-
-    // Generates a unique ElementID
-    ElementID generate_id() { return ElementID{replica_id_, ++counter_}; }
-
-    // Finds an element by its ID using the index
-    // Returns the index of the element, or elements_.size() if not found
-    size_t find_element(const ElementID &id) const {
-        auto it = element_index_.find(id);
-        if (it != element_index_.end()) {
-            return it->second;
-        }
-        return elements_.size();
+  std::string replica_id_;            // Unique identifier for the replica
+  uint64_t counter_;                  // Monotonically increasing counter for generating unique IDs
+  std::vector<ListElement> elements_; // List of all elements (including tombstoned)
+  std::unordered_map<ElementID, size_t, ElementID::Hash> element_index_; // Maps ElementID to index for fast lookup
+
+  // Generates a unique ElementID
+  ElementID generate_id() { return ElementID{replica_id_, ++counter_}; }
+
+  // Finds an element by its ID using the index
+  // Returns the index of the element, or elements_.size() if not found
+  size_t find_element(const ElementID &id) const {
+    auto it = element_index_.find(id);
+    if (it != element_index_.end()) {
+      return it->second;
     }
-
-    // Retrieves visible (non-tombstoned) elements
-    std::vector<ListElement> get_visible_elements() const {
-        std::vector<ListElement> visible;
-        for (const auto &elem : elements_) {
-            if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
-                continue; // Skip the root element
-            }
-            if (!elem.is_deleted()) {
-                visible.push_back(elem);
-            }
-        }
-        return visible;
+    return elements_.size();
+  }
+
+  // Retrieves visible (non-tombstoned) elements
+  std::vector<ListElement> get_visible_elements() const {
+    std::vector<ListElement> visible;
+    for (const auto &elem : elements_) {
+      if (elem.id.replica_id == "root" && elem.id.sequence == 0) {
+        continue; // Skip the root element
+      }
+      if (!elem.is_deleted()) {
+        visible.push_back(elem);
+      }
+    }
+    return visible;
+  }
+
+  // Integrates a single element into the CRDT
+  void integrate(const ListElement &new_elem) {
+    // If the element already exists, update tombstone if necessary
+    size_t existing_index = find_element(new_elem.id);
+    if (existing_index != elements_.size()) {
+      if (new_elem.is_deleted()) {
+        elements_[existing_index].value.reset();
+      }
+      return;
     }
 
-    // Integrates a single element into the CRDT
-    void integrate(const ListElement &new_elem) {
-        // If the element already exists, update tombstone if necessary
-        size_t existing_index = find_element(new_elem.id);
-        if (existing_index != elements_.size()) {
-            if (new_elem.is_deleted()) {
-                elements_[existing_index].value.reset();
-            }
-            return;
-        }
-
-        // Find the correct position to insert the new element
-        auto insert_pos = std::lower_bound(elements_.begin(), elements_.end(), new_elem, ListElementComparator());
-        size_t index = std::distance(elements_.begin(), insert_pos);
-        elements_.insert(insert_pos, new_elem);
+    // Find the correct position to insert the new element
+    auto insert_pos = std::lower_bound(elements_.begin(), elements_.end(), new_elem, ListElementComparator());
+    size_t index = std::distance(elements_.begin(), insert_pos);
+    elements_.insert(insert_pos, new_elem);
 
-        // Rebuild the index as inserting elements shifts indices
-        rebuild_index();
-    }
+    // Rebuild the index as inserting elements shifts indices
+    rebuild_index();
+  }
 
-    // Rebuilds the element_index_ mapping
-    void rebuild_index() {
-        element_index_.clear();
-        for (size_t i = 0; i < elements_.size(); ++i) {
-            element_index_.emplace(elements_[i].id, i);
-        }
+  // Rebuilds the element_index_ mapping
+  void rebuild_index() {
+    element_index_.clear();
+    for (size_t i = 0; i < elements_.size(); ++i) {
+      element_index_.emplace(elements_[i].id, i);
     }
-};
+  }
+};
\ No newline at end of file