Skip to content

Commit

Permalink
fix(interactive): Introduce Calcite-Based CBO Optimizer in GIE Compil…
Browse files Browse the repository at this point in the history
…er (#3672)

Co-authored-by: bingqing.lbq <bingqing.lbq@alibaba-inc.com>
Co-authored-by: xiao.zl <xiaolei.zl@alibaba-inc.com>
  • Loading branch information
3 people committed Apr 1, 2024
1 parent 33bc365 commit 0b8e42f
Show file tree
Hide file tree
Showing 72 changed files with 6,458 additions and 270 deletions.
23 changes: 8 additions & 15 deletions flex/engines/hqps_db/core/null_record.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <limits>
#include <tuple>
#include "flex/engines/hqps_db/core/utils/hqps_utils.h"
#include "flex/engines/hqps_db/structures/path.h"

namespace gs {

Expand All @@ -29,13 +28,14 @@ static constexpr const None NONE;
template <typename T>
struct NullRecordCreator {
static inline T GetNull() {
static_assert(
std::numeric_limits<
std::remove_const_t<std::remove_reference_t<T>>>::is_specialized,
"NullRecordCreator only support numeric type");
using type = std::remove_const_t<std::remove_reference_t<T>>;
static type null_value = std::numeric_limits<type>::max();
return null_value;
if constexpr (std::numeric_limits<std::remove_const_t<
std::remove_reference_t<T>>>::is_specialized) {
using type = std::remove_const_t<std::remove_reference_t<T>>;
static type null_value = std::numeric_limits<type>::max();
return null_value;
} else {
return T::GetNull();
}
}
};

Expand Down Expand Up @@ -84,13 +84,6 @@ struct NullRecordCreator<std::tuple<T...>> {
}
};

template <typename VID_T, typename LabelT>
struct NullRecordCreator<Path<VID_T, LabelT>> {
static inline Path<VID_T, LabelT> GetNull() {
return Path<VID_T, LabelT>::Null();
}
};

static inline bool IsNull(const bool& b) { return false; }

template <typename A, typename B>
Expand Down
26 changes: 15 additions & 11 deletions flex/engines/hqps_db/core/operator/path_expand.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,9 @@ class PathExpand {

auto cur_label = vertex_set.GetLabel();

std::vector<offset_t> offsets;
CompressedPathSet<vertex_id_t, label_id_t> path_set;
std::tie(path_set, offsets) = path_expand_from_single_label(
graph, cur_label, vertex_set.GetVertices(), range, edge_expand_opt,
get_v_opt);

return std::make_pair(std::move(path_set), std::move(offsets));
return path_expand_from_single_label(graph, cur_label,
vertex_set.GetVertices(), range,
edge_expand_opt, get_v_opt);
}

// PathExpand Path with multiple edge triplet.
Expand Down Expand Up @@ -799,10 +795,18 @@ class PathExpand {

// create a copy of other_offsets.
auto copied_other_offsets(other_offsets);
std::vector<label_id_t> labels_vec(range.limit_, src_label);
auto path_set = CompressedPathSet<vertex_id_t, label_id_t>(
std::move(other_vertices), std::move(other_offsets),
std::move(labels_vec), range.start_);
std::vector<Path<vertex_id_t, label_id_t>> paths;
{
std::vector<label_id_t> labels_vec(range.limit_, src_label);
// use compressed_path_set to generate all paths. We don't insert the
// CompressPathSet into context, since it is hard to be resized.
auto compressed_path_set = CompressedPathSet<vertex_id_t, label_id_t>(
std::move(other_vertices), std::move(other_offsets),
std::move(labels_vec), range.start_);
paths = compressed_path_set.get_all_valid_paths();
}

PathSet<vertex_id_t, label_id_t> path_set(std::move(paths));

std::vector<std::vector<offset_t>> offset_amplify(
range.limit_, std::vector<offset_t>(copied_other_offsets[0].size(), 0));
Expand Down
24 changes: 24 additions & 0 deletions flex/engines/hqps_db/core/operator/project.h
Original file line number Diff line number Diff line change
Expand Up @@ -522,6 +522,30 @@ class ProjectOp {
std::move(lengths_vec));
}

// apply project on path set,the type must be lengthKey
template <typename PROP_T, typename VID_T, typename LabelT,
typename std::enable_if<std::is_same_v<PROP_T, LengthKey>>::type* =
nullptr>
static auto apply_single_project_impl(
const GRAPH_INTERFACE& graph, PathSet<VID_T, LabelT>& node,
const std::string& prop_name, const std::vector<size_t>& repeat_array) {
VLOG(10) << "Finish fetching properties";

std::vector<typename LengthKey::length_data_type> lengths_vec;
for (size_t i = 0; i < node.Size(); ++i) {
const auto& path = node.get(i);
if (repeat_array[i] > 0) {
auto length = path.length();
for (size_t j = 0; j < repeat_array[i]; ++j) {
lengths_vec.push_back(length);
}
}
}

return Collection<typename LengthKey::length_data_type>(
std::move(lengths_vec));
}

///////////////////Apply KeyValueMapper to all data structures.
template <typename CTX_T, typename... MAPPER>
static auto apply_single_project(
Expand Down
27 changes: 18 additions & 9 deletions flex/engines/hqps_db/core/utils/keyed.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,24 @@ struct KeyedT<KeyedRowVertexSetImpl<LabelT, KEY_T, VID_T, SET_T...>,
};

// group by vertex set' id, for generate vertex set.
// template <typename VID_T, typename LabelT, size_t N>
// struct KeyedT<GeneralVertexSet<VID_T, LabelT, N>,
// PropertySelector<grape::EmptyType>> {
// using keyed_set_t = KeyedRowVertexSet<LabelT, VID_T, VID_T,
// grape::EmptyType>;
// // // The builder type.
// using builder_t =
// KeyedRowVertexSetBuilder<LabelT, VID_T, VID_T, grape::EmptyType>;
// };
template <typename VID_T, typename LabelT, typename... T>
struct KeyedT<GeneralVertexSet<VID_T, LabelT, T...>,
PropertySelector<grape::EmptyType>> {
using keyed_builder_t = GeneralVertexSetKeyedBuilder<VID_T, LabelT, T...>;
using keyed_set_t = GeneralVertexSet<VID_T, LabelT, T...>;
using unkeyed_builder_t =
typename GeneralVertexSet<LabelT, VID_T, LabelT, T...>::builder_t;
static keyed_builder_t create_keyed_builder(
const GeneralVertexSet<VID_T, LabelT, T...>& set,
const PropertySelector<grape::EmptyType>& selector) {
return keyed_builder_t(set);
}
static unkeyed_builder_t create_unkeyedkeyed_builder(
const GeneralVertexSet<VID_T, LabelT, T...>& set,
const PropertySelector<grape::EmptyType>& selector) {
return set.CreateBuilder();
}
};

template <typename VID_T, typename LabelT, typename... T>
struct KeyedT<GeneralVertexSet<VID_T, LabelT, T...>,
Expand Down
4 changes: 3 additions & 1 deletion flex/engines/hqps_db/structures/collection.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ class TwoLabelVertexSetImpl;
template <typename VID_T, typename LabelT, typename... T>
class TwoLabelVertexSetImplBuilder;

template <typename VID_T, typename LabelT, typename... T>
class GeneralVertexSetBuilder;

// untypedEdgeSet
template <typename VID_T, typename LabelT, typename SUB_GRAPH_T>
class UnTypedEdgeSet;
Expand Down Expand Up @@ -418,7 +421,6 @@ class CountBuilder {
VLOG(10) << "ele is null";
}
} else {
VLOG(10) << "inc:" << ind << ", " << gs::to_string(tuple);
++vec_[ind];
}
return true;
Expand Down
38 changes: 31 additions & 7 deletions flex/engines/hqps_db/structures/multi_edge_set/flat_edge_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class FlatEdgeSetBuilder {
public:
using ele_tuple_t = std::tuple<VID_T, VID_T, EDATA_T>;
using index_ele_tuple_t = std::tuple<size_t, ele_tuple_t>;
using untyped_ele_tuple_t = std::tuple<size_t, VID_T, VID_T, EDATA_T>;
using result_t = FlatEdgeSet<VID_T, LabelT, EDATA_T>;

static constexpr bool is_flat_edge_set_builder = true;
Expand Down Expand Up @@ -68,6 +69,15 @@ class FlatEdgeSetBuilder {
}
}

void Insert(const untyped_ele_tuple_t& tuple) {
vec_.push_back(gs::tuple_slice<1, 4>(tuple));
if (!IsNull(std::get<1>(tuple))) {
label_triplet_ind_new_.push_back(label_triplet_ind_[std::get<0>(tuple)]);
} else {
label_triplet_ind_new_.push_back(NullRecordCreator<LabelT>::GetNull());
}
}

result_t Build() {
return result_t(std::move(vec_), std::move(label_triplet_), prop_names_,
std::move(label_triplet_ind_new_), std::move(directions_));
Expand Down Expand Up @@ -308,7 +318,8 @@ class FlatEdgeSet {
std::move(copied_directions));
}

void fillBuiltinPropsImpl(std::vector<EDATA_T>& tuples,
template <typename T>
void fillBuiltinPropsImpl(std::vector<T>& tuples,
const std::vector<std::string>& prop_names,
const std::vector<size_t>& repeat_array) {
// Make sure this is correct.
Expand All @@ -333,23 +344,36 @@ class FlatEdgeSet {
} else {
for (size_t j = 0; j < repeat_times; ++j) {
CHECK(cur_ind < tuples.size());
std::get<0>(tuples[cur_ind]) = std::get<0>(std::get<2>(vec_[i]));
if constexpr (std::is_same_v<T, EDATA_T>) {
std::get<0>(tuples[cur_ind]) = std::get<0>(std::get<2>(vec_[i]));
} else if constexpr (std::is_same_v<EDATA_T, Any>) {
std::get<0>(tuples[cur_ind]) =
AnyConverter<std::tuple_element_t<0, T>>::from_any(
std::get<2>(vec_[i]));
} else {
static_assert(
std::is_same_v<T, EDATA_T>,
"EDATA_T should be the same as T, or EDATA_T should be any");
}
cur_ind += 1;
}
}
}
}

void fillBuiltinProps(std::vector<EDATA_T>& tuples,
const PropNameArray<EDATA_T>& prop_names,
// In case EDATA is any, we need to convert to the actual type.
template <typename T>
void fillBuiltinProps(std::vector<T>& tuples,
const PropNameArray<T>& prop_names,
const std::vector<size_t>& repeat_array) {
auto vec = array_to_vec(prop_names);
fillBuiltinPropsImpl(tuples, vec, repeat_array);
}

// fill builtin props without repeat array.
void fillBuiltinProps(std::vector<EDATA_T>& tuples,
const PropNameArray<EDATA_T>& prop_names) {
// In case EDATA is any, we need to convert to the actual type.
template <typename T>
void fillBuiltinProps(std::vector<T>& tuples,
const PropNameArray<T>& prop_names) {
std::vector<size_t> repeat_array(vec_.size(), 1);
auto vec = array_to_vec(prop_names);
fillBuiltinPropsImpl(tuples, vec, repeat_array);
Expand Down
107 changes: 72 additions & 35 deletions flex/engines/hqps_db/structures/multi_edge_set/untyped_edge_set.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ class UnTypedEdgeSet {
using self_type_t = UnTypedEdgeSet<vid_t, label_t, sub_graph_t>;
using flat_t = FlatEdgeSet<vid_t, label_t, Any>;
using data_tuple_t = std::tuple<vid_t, vid_t, Any>;
using builder_t = FlatEdgeSetBuilder<vid_t, label_t, Any>;

static constexpr bool is_edge_set = true;

Expand Down Expand Up @@ -245,9 +246,25 @@ class UnTypedEdgeSet {
src_vertices_.size());
}

builder_t CreateBuilder() const {
auto triplet = get_edge_triplets();
std::vector<std::array<LabelT, 3>> edge_triplets;
for (size_t i = 0; i < triplet.size(); ++i) {
auto& cur_triplets_vec = triplet[i];
for (size_t j = 0; j < cur_triplets_vec.size(); ++j) {
edge_triplets.emplace_back(std::array{
std::get<0>(cur_triplets_vec[j]), std::get<1>(cur_triplets_vec[j]),
std::get<2>(cur_triplets_vec[j])});
}
}
return builder_t(edge_triplets, get_prop_names(),
get_label_triplet_indices(), get_directions());
}

std::vector<LabelKey> GetLabelVec() const {
std::vector<LabelKey> res;
res.reserve(Size());
auto edge_iters = generate_iters();
VLOG(1) << "GetLabelVec for UntypedEdgeSet, size: " << Size();
for (size_t i = 0; i < src_vertices_.size(); ++i) {
auto label_ind = label_indices_[i];
Expand Down Expand Up @@ -333,22 +350,54 @@ class UnTypedEdgeSet {
}
std::vector<std::vector<std::string>> prop_names = get_prop_names();
CHECK(prop_names.size() == res_label_triplets.size());
if (direction_ != Direction::Both) {
return FlatEdgeSet<vid_t, label_t, Any>(
std::move(dst_eles), std::move(res_label_triplets), prop_names,
std::move(label_triplet_indices), direction_);
} else {
std::vector<Direction> directions;
for (size_t i = 0; i < edge_label_triplets.size(); ++i) {
auto& cur_triplets_vec = edge_label_triplets[i];
for (size_t j = 0; j < cur_triplets_vec.size(); ++j) {
directions.emplace_back(std::get<3>(cur_triplets_vec[j]));
auto directions = get_directions();
return FlatEdgeSet<vid_t, label_t, Any>(
std::move(dst_eles), std::move(res_label_triplets), prop_names,
std::move(label_triplet_indices), std::move(directions));
}

std::vector<Direction> get_directions() const {
std::vector<Direction> res;
auto edge_triplet = get_edge_triplets();
for (auto src_label_ind = 0; src_label_ind < src_labels_.size();
++src_label_ind) {
auto src_label = src_labels_[src_label_ind];
std::vector<std::tuple<LabelT, LabelT, LabelT, Direction>> tmp;
if (adj_lists_.find(src_label) != adj_lists_.end()) {
auto& sub_graphs = adj_lists_.at(src_label);
for (auto& sub_graph : sub_graphs) {
res.emplace_back(sub_graph.GetDirection());
}
}
}
return res;
}

std::vector<uint8_t> get_label_triplet_indices() const {
std::vector<uint8_t> res;
auto edge_label_triplets = get_edge_triplets();
res.reserve(Size());
std::vector<size_t> sizes;
sizes.emplace_back(0);
auto edge_iters = generate_iters();
for (size_t i = 0; i < edge_label_triplets.size(); ++i) {
sizes.emplace_back(sizes.back() + edge_label_triplets[i].size());
}
for (size_t i = 0; i < src_vertices_.size(); ++i) {
auto src_vid = src_vertices_[i];
auto& cur_edge_iters = edge_iters[i];
auto src_label_ind = label_indices_[i];
auto src_label = src_labels_[src_label_ind];

for (size_t j = 0; j < cur_edge_iters.size(); ++j) {
auto& cur_iter = cur_edge_iters[j];
while (cur_iter.IsValid()) {
res.emplace_back(sizes[src_label_ind] + j);
cur_iter.Next();
}
}
return FlatEdgeSet<vid_t, label_t, Any>(
std::move(dst_eles), std::move(res_label_triplets), prop_names,
std::move(label_triplet_indices), std::move(directions));
}
return res;
}

size_t Size() const {
Expand Down Expand Up @@ -607,22 +656,10 @@ class UnTypedEdgeSet {
std::vector<std::vector<std::string>> prop_names = get_prop_names();
CHECK(prop_names.size() == res_label_triplets.size());

if (direction_ != Direction::Both) {
return FlatEdgeSet<vid_t, label_t, Any>(
std::move(dst_eles), std::move(res_label_triplets), prop_names,
std::move(label_triplet_indices), direction_);
} else {
std::vector<Direction> directions;
for (size_t i = 0; i < edge_label_triplets.size(); ++i) {
auto& cur_triplets_vec = edge_label_triplets[i];
for (size_t j = 0; j < cur_triplets_vec.size(); ++j) {
directions.emplace_back(std::get<3>(cur_triplets_vec[j]));
}
}
return FlatEdgeSet<vid_t, label_t, Any>(
std::move(dst_eles), std::move(res_label_triplets), prop_names,
std::move(label_triplet_indices), std::move(directions));
}
auto directions = get_directions();
return FlatEdgeSet<vid_t, label_t, Any>(
std::move(dst_eles), std::move(res_label_triplets), prop_names,
std::move(label_triplet_indices), std::move(directions));
}

private:
Expand Down Expand Up @@ -676,19 +713,19 @@ class UnTypedEdgeSet {
return edge_iter_vecs;
}

std::vector<std::vector<std::tuple<LabelT, LabelT, LabelT, Direction>>>
std::vector<std::vector<std::tuple<LabelT, LabelT, LabelT>>>
get_edge_triplets() const {
std::vector<std::vector<std::tuple<LabelT, LabelT, LabelT, Direction>>> ret;
std::vector<std::vector<std::tuple<LabelT, LabelT, LabelT>>> ret;
for (auto src_label_ind = 0; src_label_ind < src_labels_.size();
++src_label_ind) {
auto src_label = src_labels_[src_label_ind];
std::vector<std::tuple<LabelT, LabelT, LabelT, Direction>> tmp;
std::vector<std::tuple<LabelT, LabelT, LabelT>> tmp;
if (adj_lists_.find(src_label) != adj_lists_.end()) {
auto& sub_graphs = adj_lists_.at(src_label);
for (auto& sub_graph : sub_graphs) {
tmp.emplace_back(std::make_tuple(
sub_graph.GetSrcLabel(), sub_graph.GetDstLabel(),
sub_graph.GetEdgeLabel(), sub_graph.GetDirection()));
tmp.emplace_back(std::make_tuple(sub_graph.GetSrcLabel(),
sub_graph.GetDstLabel(),
sub_graph.GetEdgeLabel()));
}
}
ret.emplace_back(std::move(tmp));
Expand Down
Loading

0 comments on commit 0b8e42f

Please sign in to comment.