From d8cf90a1658fa1aedfa3c91e6cddfdb55226ce82 Mon Sep 17 00:00:00 2001 From: Guodong Jin Date: Thu, 8 Aug 2024 17:36:41 -0400 Subject: [PATCH 1/4] enable semi mask on node table --- src/function/gds/shortest_paths.cpp | 10 ++--- .../{processor/operator => common}/mask.h | 25 ++++++++---- src/include/processor/operator/gds_call.h | 9 +++-- .../recursive_extend/recursive_join.h | 8 ++-- .../processor/operator/scan/scan_node_table.h | 10 ++--- src/include/processor/operator/semi_masker.h | 4 +- src/include/storage/store/node_table.h | 13 ++----- src/include/storage/store/table.h | 13 ++++--- src/processor/map/map_recursive_extend.cpp | 4 +- src/processor/operator/gds_call.cpp | 4 +- .../recursive_extend/recursive_join.cpp | 2 +- src/storage/store/node_group.cpp | 38 +++++++------------ src/storage/store/node_table.cpp | 2 +- 13 files changed, 69 insertions(+), 73 deletions(-) rename src/include/{processor/operator => common}/mask.h (82%) diff --git a/src/function/gds/shortest_paths.cpp b/src/function/gds/shortest_paths.cpp index fcc2bf179e5..6c1a091a55b 100644 --- a/src/function/gds/shortest_paths.cpp +++ b/src/function/gds/shortest_paths.cpp @@ -83,7 +83,7 @@ class PathLengths : public GDSFrontier { explicit PathLengths( std::vector> nodeTableIDAndNumNodes) { for (const auto& [tableID, numNodes] : nodeTableIDAndNumNodes) { - masks.insert({tableID, std::make_unique(numNodes, UNVISITED)}); + masks.insert({tableID, std::make_unique(numNodes, UNVISITED)}); } } @@ -131,10 +131,10 @@ class PathLengths : public GDSFrontier { private: uint8_t curIter = 255; - common::table_id_map_t> masks; + common::table_id_map_t> masks; common::table_id_t curFrontierFixedTableID; - processor::MaskData* curFrontierFixedMask; - processor::MaskData* nextFrontierFixedMask; + MaskData* curFrontierFixedMask; + MaskData* nextFrontierFixedMask; }; class PathLengthsFrontiers : public Frontiers { @@ -419,7 +419,7 @@ class ShortestPathsAlgorithm final : public GDSAlgorithm { } auto mask = sharedState->inputNodeOffsetMasks.at(tableID).get(); for (auto offset = 0u; offset < sharedState->graph->getNumNodes(tableID); ++offset) { - if (!mask->isMasked(offset)) { + if (!mask->isMasked(offset, offset)) { continue; } auto sourceNodeID = nodeID_t{offset, tableID}; diff --git a/src/include/processor/operator/mask.h b/src/include/common/mask.h similarity index 82% rename from src/include/processor/operator/mask.h rename to src/include/common/mask.h index df99c2eaaed..529a0fd749e 100644 --- a/src/include/processor/operator/mask.h +++ b/src/include/common/mask.h @@ -7,7 +7,7 @@ #include "common/types/internal_id_t.h" namespace kuzu { -namespace processor { +namespace common { // Note: Classes in this file are NOT thread-safe. struct MaskUtil { @@ -52,7 +52,15 @@ class MaskCollection { maskData = std::make_unique(maxOffset + 1); } - bool isMasked(common::offset_t offset) { return maskData->isMasked(offset, numMasks); } + // Return true if any offset between [startOffset, endOffset] is masked. Otherwise return false. + bool isMasked(common::offset_t startOffset, common::offset_t endOffset) const { + auto offset = startOffset; + auto numMasked = 0u; + while (offset <= endOffset) { + numMasked += maskData->isMasked(offset++, numMasks); + } + return numMasked > 0; + } // Increment mask value for the given nodeOffset if its current mask value is equal to // the specified `currentMaskValue`. // Note: blindly update mask does not parallelize well, so we minimize write by first checking @@ -87,7 +95,7 @@ class NodeSemiMask { virtual void init() = 0; virtual void incrementMaskValue(common::offset_t nodeOffset, uint8_t currentMaskValue) = 0; - virtual bool isMasked(common::offset_t nodeOffset) = 0; + virtual bool isMasked(common::offset_t startNodeOffset, common::offset_t endNodeOffset) = 0; bool isEnabled() const { return getNumMasks() > 0; } uint8_t getNumMasks() const { return maskCollection.getNumMasks(); } @@ -115,8 +123,8 @@ class NodeOffsetLevelSemiMask final : public NodeSemiMask { maskCollection.incrementMaskValue(nodeOffset, currentMaskValue); } - bool isMasked(common::offset_t nodeOffset) override { - return maskCollection.isMasked(nodeOffset); + bool isMasked(common::offset_t startNodeOffset, common::offset_t endNodeOffset) override { + return maskCollection.isMasked(startNodeOffset, endNodeOffset); } }; @@ -136,10 +144,11 @@ class NodeVectorLevelSemiMask final : public NodeSemiMask { maskCollection.incrementMaskValue(MaskUtil::getVectorIdx(nodeOffset), currentMaskValue); } - bool isMasked(common::offset_t nodeOffset) override { - return maskCollection.isMasked(MaskUtil::getVectorIdx(nodeOffset)); + bool isMasked(common::offset_t startNodeOffset, common::offset_t endNodeOffset) override { + return maskCollection.isMasked(MaskUtil::getVectorIdx(startNodeOffset), + MaskUtil::getVectorIdx(endNodeOffset)); } }; -} // namespace processor +} // namespace common } // namespace kuzu diff --git a/src/include/processor/operator/gds_call.h b/src/include/processor/operator/gds_call.h index 04a9af045cf..4470cc13589 100644 --- a/src/include/processor/operator/gds_call.h +++ b/src/include/processor/operator/gds_call.h @@ -1,9 +1,9 @@ #pragma once +#include "common/mask.h" #include "function/gds/gds.h" #include "function/gds/gds_utils.h" #include "graph/graph.h" -#include "processor/operator/mask.h" #include "processor/operator/sink.h" namespace kuzu { @@ -13,10 +13,11 @@ struct GDSCallSharedState { std::mutex mtx; std::shared_ptr fTable; std::unique_ptr graph; - common::table_id_map_t> inputNodeOffsetMasks; + common::table_id_map_t> inputNodeOffsetMasks; GDSCallSharedState(std::shared_ptr fTable, std::unique_ptr graph, - common::table_id_map_t> inputNodeOffsetMasks) + common::table_id_map_t> + inputNodeOffsetMasks) : fTable{std::move(fTable)}, graph{std::move(graph)}, inputNodeOffsetMasks{std::move(inputNodeOffsetMasks)} {} DELETE_COPY_AND_MOVE(GDSCallSharedState); @@ -59,7 +60,7 @@ class GDSCall : public Sink { info{std::move(info)}, sharedState{std::move(sharedState)} {} bool hasSemiMask() const { return !sharedState->inputNodeOffsetMasks.empty(); } - std::vector getSemiMasks() const; + std::vector getSemiMasks() const; bool isSource() const override { return true; } diff --git a/src/include/processor/operator/recursive_extend/recursive_join.h b/src/include/processor/operator/recursive_extend/recursive_join.h index 5f9d06e8aad..c5e2def14b3 100644 --- a/src/include/processor/operator/recursive_extend/recursive_join.h +++ b/src/include/processor/operator/recursive_extend/recursive_join.h @@ -3,9 +3,9 @@ #include "bfs_state.h" #include "common/enums/extend_direction.h" #include "common/enums/query_rel_type.h" +#include "common/mask.h" #include "frontier_scanner.h" #include "planner/operator/extend/recursive_join_type.h" -#include "processor/operator/mask.h" #include "processor/operator/physical_operator.h" namespace kuzu { @@ -14,10 +14,10 @@ namespace processor { class OffsetScanNodeTable; struct RecursiveJoinSharedState { - std::vector> semiMasks; + std::vector> semiMasks; explicit RecursiveJoinSharedState( - std::vector> semiMasks) + std::vector> semiMasks) : semiMasks{std::move(semiMasks)} {} }; @@ -116,7 +116,7 @@ class RecursiveJoin : public PhysicalOperator { info{std::move(info)}, sharedState{std::move(sharedState)}, recursiveRoot{std::move(recursiveRoot)} {} - std::vector getSemiMask() const; + std::vector getSemiMask() const; void initLocalStateInternal(ResultSet* resultSet_, ExecutionContext* context) final; diff --git a/src/include/processor/operator/scan/scan_node_table.h b/src/include/processor/operator/scan/scan_node_table.h index 6e24d32fe90..f12713346e5 100644 --- a/src/include/processor/operator/scan/scan_node_table.h +++ b/src/include/processor/operator/scan/scan_node_table.h @@ -16,7 +16,7 @@ struct ScanNodeTableProgressSharedState { class ScanNodeTableSharedState { public: - explicit ScanNodeTableSharedState(std::unique_ptr semiMask) + explicit ScanNodeTableSharedState(std::unique_ptr semiMask) : table{nullptr}, currentCommittedGroupIdx{common::INVALID_NODE_GROUP_IDX}, currentUnCommittedGroupIdx{common::INVALID_NODE_GROUP_IDX}, numCommittedNodeGroups{0}, numUnCommittedNodeGroups{0}, semiMask{std::move(semiMask)} {}; @@ -27,7 +27,7 @@ class ScanNodeTableSharedState { void nextMorsel(storage::NodeTableScanState& scanState, std::shared_ptr progressSharedState); - NodeSemiMask* getSemiMask() const { return semiMask.get(); } + common::NodeSemiMask* getSemiMask() const { return semiMask.get(); } private: std::mutex mtx; @@ -36,7 +36,7 @@ class ScanNodeTableSharedState { common::node_group_idx_t currentUnCommittedGroupIdx; common::node_group_idx_t numCommittedNodeGroups; common::node_group_idx_t numUnCommittedNodeGroups; - std::unique_ptr semiMask; + std::unique_ptr semiMask; }; struct ScanNodeTableInfo { @@ -52,7 +52,7 @@ struct ScanNodeTableInfo { columnPredicates{std::move(columnPredicates)} {} EXPLICIT_COPY_DEFAULT_MOVE(ScanNodeTableInfo); - void initScanState(NodeSemiMask* semiMask); + void initScanState(common::NodeSemiMask* semiMask); private: ScanNodeTableInfo(const ScanNodeTableInfo& other) @@ -93,7 +93,7 @@ class ScanNodeTable final : public ScanTable { KU_ASSERT(this->nodeInfos.size() == this->sharedStates.size()); } - std::vector getSemiMasks() const; + std::vector getSemiMasks() const; bool isSource() const override { return true; } diff --git a/src/include/processor/operator/semi_masker.h b/src/include/processor/operator/semi_masker.h index f396cf83e4a..69901998947 100644 --- a/src/include/processor/operator/semi_masker.h +++ b/src/include/processor/operator/semi_masker.h @@ -1,6 +1,6 @@ #pragma once -#include "processor/operator/mask.h" +#include "common/mask.h" #include "processor/operator/physical_operator.h" namespace kuzu { @@ -12,7 +12,7 @@ class BaseSemiMasker; // to indicate the execution sequence of its pipeline. Also, the maskerIdx is used as a flag to // indicate if a value in the mask is masked or not, as each masker will increment the selected // value in the mask by 1. More details are described in NodeTableSemiMask. -using mask_with_idx = std::pair; +using mask_with_idx = std::pair; class SemiMaskerInfo { friend class BaseSemiMasker; diff --git a/src/include/storage/store/node_table.h b/src/include/storage/store/node_table.h index 191e6c4b05d..a5cb0c59df9 100644 --- a/src/include/storage/store/node_table.h +++ b/src/include/storage/store/node_table.h @@ -2,12 +2,11 @@ #include +#include "common/exception/not_implemented.h" #include "common/types/types.h" -#include "processor/operator/mask.h" #include "storage/index/hash_index.h" #include "storage/store/node_group_collection.h" #include "storage/store/table.h" -#include namespace kuzu { namespace evaluator { @@ -26,25 +25,21 @@ class Transaction; namespace storage { struct NodeTableScanState final : TableScanState { - processor::NodeSemiMask* semiMask; - // Scan state for un-committed data. // Ideally we shouldn't need columns to scan un-checkpointed but committed data. explicit NodeTableScanState(std::vector columnIDs) - : TableScanState{std::move(columnIDs), {}}, semiMask{nullptr} { + : TableScanState{std::move(columnIDs), {}} { nodeGroupScanState = std::make_unique(this->columnIDs.size()); } NodeTableScanState(std::vector columnIDs, std::vector columns) : TableScanState{std::move(columnIDs), std::move(columns), - std::vector{}}, - semiMask{nullptr} { + std::vector{}} { nodeGroupScanState = std::make_unique(this->columnIDs.size()); } NodeTableScanState(std::vector columnIDs, std::vector columns, std::vector columnPredicateSets) - : TableScanState{std::move(columnIDs), std::move(columns), std::move(columnPredicateSets)}, - semiMask{nullptr} { + : TableScanState{std::move(columnIDs), std::move(columns), std::move(columnPredicateSets)} { nodeGroupScanState = std::make_unique(this->columnIDs.size()); } }; diff --git a/src/include/storage/store/table.h b/src/include/storage/store/table.h index 199b20b5d71..8f239657d4d 100644 --- a/src/include/storage/store/table.h +++ b/src/include/storage/store/table.h @@ -2,6 +2,7 @@ #include "catalog/catalog_entry/table_catalog_entry.h" #include "common/enums/zone_map_check_result.h" +#include "common/mask.h" #include "storage/predicate/column_predicate.h" #include "storage/store/column.h" #include "storage/store/node_group.h" @@ -20,8 +21,9 @@ struct TableScanState { common::ValueVector* IDVector; std::vector outputVectors; std::vector columnIDs; + common::NodeSemiMask* semiMask; - // Only used when scan from checkpointed data. + // Only used when scan from persistent data. std::vector columns; TableScanSource source = TableScanSource::NONE; @@ -33,18 +35,19 @@ struct TableScanState { common::ZoneMapCheckResult zoneMapResult = common::ZoneMapCheckResult::ALWAYS_SCAN; explicit TableScanState(std::vector columnIDs) - : IDVector(nullptr), columnIDs{std::move(columnIDs)} { + : IDVector(nullptr), columnIDs{std::move(columnIDs)}, semiMask{nullptr} { rowIdxVector = std::make_unique(common::LogicalType::INT64()); } TableScanState(std::vector columnIDs, std::vector columns, std::vector columnPredicateSets) - : IDVector(nullptr), columnIDs{std::move(columnIDs)}, columns{std::move(columns)}, - columnPredicateSets{std::move(columnPredicateSets)} { + : IDVector(nullptr), columnIDs{std::move(columnIDs)}, semiMask{nullptr}, + columns{std::move(columns)}, columnPredicateSets{std::move(columnPredicateSets)} { rowIdxVector = std::make_unique(common::LogicalType::INT64()); } explicit TableScanState(std::vector columnIDs, std::vector columns) - : IDVector(nullptr), columnIDs{std::move(columnIDs)}, columns{std::move(columns)} { + : IDVector(nullptr), columnIDs{std::move(columnIDs)}, semiMask{nullptr}, + columns{std::move(columns)} { rowIdxVector = std::make_unique(common::LogicalType::INT64()); } virtual ~TableScanState() = default; diff --git a/src/processor/map/map_recursive_extend.cpp b/src/processor/map/map_recursive_extend.cpp index 8437dc52412..f76fd140e73 100644 --- a/src/processor/map/map_recursive_extend.cpp +++ b/src/processor/map/map_recursive_extend.cpp @@ -12,11 +12,11 @@ namespace processor { static std::shared_ptr createSharedState( const binder::NodeExpression& nbrNode, const main::ClientContext& context) { - std::vector> semiMasks; + std::vector> semiMasks; for (auto tableID : nbrNode.getTableIDs()) { auto table = context.getStorageManager()->getTable(tableID)->ptrCast(); semiMasks.push_back( - std::make_unique(tableID, table->getNumRows())); + std::make_unique(tableID, table->getNumRows())); } return std::make_shared(std::move(semiMasks)); } diff --git a/src/processor/operator/gds_call.cpp b/src/processor/operator/gds_call.cpp index df68b20736e..9d75e4d1ba6 100644 --- a/src/processor/operator/gds_call.cpp +++ b/src/processor/operator/gds_call.cpp @@ -10,8 +10,8 @@ std::string GDSCallPrintInfo::toString() const { return "Algorithm: " + funcName; } -std::vector GDSCall::getSemiMasks() const { - std::vector masks; +std::vector GDSCall::getSemiMasks() const { + std::vector masks; for (auto& [_, mask] : sharedState->inputNodeOffsetMasks) { masks.push_back(mask.get()); } diff --git a/src/processor/operator/recursive_extend/recursive_join.cpp b/src/processor/operator/recursive_extend/recursive_join.cpp index 6898a296ea0..8a74d4a5f17 100644 --- a/src/processor/operator/recursive_extend/recursive_join.cpp +++ b/src/processor/operator/recursive_extend/recursive_join.cpp @@ -280,7 +280,7 @@ void RecursiveJoin::populateTargetDstNodes(ExecutionContext*) { auto numNodes = mask->getMaxOffset() + 1; if (mask->isEnabled()) { for (auto offset = 0u; offset < numNodes; ++offset) { - if (mask->isMasked(offset)) { + if (mask->isMasked(offset, offset)) { targetNodeIDs.insert(nodeID_t{offset, mask->getTableID()}); numTargetNodes++; } diff --git a/src/storage/store/node_group.cpp b/src/storage/store/node_group.cpp index bd2486ef88a..ca6835430d4 100644 --- a/src/storage/store/node_group.cpp +++ b/src/storage/store/node_group.cpp @@ -92,30 +92,9 @@ void NodeGroup::merge(Transaction*, std::unique_ptr chunkedGro chunkedGroups.appendGroup(lock, std::move(chunkedGroup)); } -void NodeGroup::initializeScanState(Transaction*, TableScanState& state) { - auto& nodeGroupScanState = *state.nodeGroupScanState; - nodeGroupScanState.chunkedGroupIdx = 0; - nodeGroupScanState.nextRowToScan = 0; - ChunkedNodeGroup* firstChunkedGroup; - { - const auto lock = chunkedGroups.lock(); - firstChunkedGroup = chunkedGroups.getFirstGroup(lock); - } - if (firstChunkedGroup != nullptr && - firstChunkedGroup->getResidencyState() == ResidencyState::ON_DISK) { - for (auto i = 0u; i < state.columnIDs.size(); i++) { - KU_ASSERT(i < state.columnIDs.size()); - KU_ASSERT(i < nodeGroupScanState.chunkStates.size()); - const auto columnID = state.columnIDs[i]; - if (columnID == INVALID_COLUMN_ID || columnID == ROW_IDX_COLUMN_ID) { - continue; - } - auto& chunk = firstChunkedGroup->getColumnChunk(columnID); - chunk.initializeScanState(nodeGroupScanState.chunkStates[i]); - // TODO: Not a good way to initialize column for chunkState here. - nodeGroupScanState.chunkStates[i].column = state.columns[i]; - } - } +void NodeGroup::initializeScanState(Transaction* transaction, TableScanState& state) { + const auto lock = chunkedGroups.lock(); + initializeScanState(transaction, lock, state); } void NodeGroup::initializeScanState(Transaction*, const UniqLock& lock, TableScanState& state) { @@ -161,11 +140,20 @@ NodeGroupScanResult NodeGroup::scan(Transaction* transaction, TableScanState& st KU_ASSERT(rowIdxInChunkToScan < chunkedGroupToScan.getNumRows()); const auto numRowsToScan = std::min(chunkedGroupToScan.getNumRows() - rowIdxInChunkToScan, DEFAULT_VECTOR_CAPACITY); + if (state.source == TableScanSource::COMMITTED && state.semiMask && + state.semiMask->isEnabled()) { + const auto startNodeOffset = nodeGroupScanState.nextRowToScan + + StorageUtils::getStartOffsetOfNodeGroup(state.nodeGroupIdx); + if (!state.semiMask->isMasked(startNodeOffset, startNodeOffset + numRowsToScan - 1)) { + state.IDVector->state->getSelVectorUnsafe().setSelSize(0); + nodeGroupScanState.nextRowToScan += numRowsToScan; + return NodeGroupScanResult{nodeGroupScanState.nextRowToScan, 0}; + } + } chunkedGroupToScan.scan(transaction, state, nodeGroupScanState, rowIdxInChunkToScan, numRowsToScan); const auto startRow = nodeGroupScanState.nextRowToScan; nodeGroupScanState.nextRowToScan += numRowsToScan; - // TODO(Guodong): numRowsToScan should be changed to selVector.size return NodeGroupScanResult{startRow, numRowsToScan}; } diff --git a/src/storage/store/node_table.cpp b/src/storage/store/node_table.cpp index a187495b600..912d78adb01 100644 --- a/src/storage/store/node_table.cpp +++ b/src/storage/store/node_table.cpp @@ -100,8 +100,8 @@ bool NodeTable::scanInternal(Transaction* transaction, TableScanState& scanState KU_ASSERT(scanState.source != TableScanSource::NONE && scanState.columns.size() == scanState.outputVectors.size()); for (const auto& outputVector : scanState.outputVectors) { - (void)outputVector; KU_ASSERT(outputVector->state == scanState.IDVector->state); + KU_UNUSED(outputVector); } const auto scanResult = scanState.nodeGroup->scan(transaction, scanState); if (scanResult == NODE_GROUP_SCAN_EMMPTY_RESULT) { From 0a7afcc41017ec4a4cbfa876acff8f516862994e Mon Sep 17 00:00:00 2001 From: Guodong Jin Date: Thu, 8 Aug 2024 21:57:47 -0400 Subject: [PATCH 2/4] add todo --- src/include/planner/operator/sip/side_way_info_passing.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/include/planner/operator/sip/side_way_info_passing.h b/src/include/planner/operator/sip/side_way_info_passing.h index 658f1b2e1e6..3f9fe63c4b9 100644 --- a/src/include/planner/operator/sip/side_way_info_passing.h +++ b/src/include/planner/operator/sip/side_way_info_passing.h @@ -9,6 +9,7 @@ enum class SemiMaskPosition : uint8_t { NONE = 0, ON_BUILD = 1, ON_PROBE = 2, + // TODO(Xiyang): we should differentiate build to probe and probe to build. PROHIBIT = 3, }; From 039aa6026f6ff803381c75222e6af30947ef7d65 Mon Sep 17 00:00:00 2001 From: xiyang Date: Thu, 8 Aug 2024 22:40:06 -0400 Subject: [PATCH 3/4] Add fine-grained-semi-mask optimizer --- .../operator/sip/side_way_info_passing.h | 4 ++-- src/optimizer/acc_hash_join_optimizer.cpp | 19 +++++++++++++++---- src/planner/plan/append_extend.cpp | 2 +- src/planner/plan/append_join.cpp | 2 +- 4 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/include/planner/operator/sip/side_way_info_passing.h b/src/include/planner/operator/sip/side_way_info_passing.h index 3f9fe63c4b9..ba606c686a2 100644 --- a/src/include/planner/operator/sip/side_way_info_passing.h +++ b/src/include/planner/operator/sip/side_way_info_passing.h @@ -9,8 +9,8 @@ enum class SemiMaskPosition : uint8_t { NONE = 0, ON_BUILD = 1, ON_PROBE = 2, - // TODO(Xiyang): we should differentiate build to probe and probe to build. - PROHIBIT = 3, + PROHIBIT_PROBE_TO_BUILD = 3, + PROHIBIT = 4, }; enum class SIPDependency : uint8_t { diff --git a/src/optimizer/acc_hash_join_optimizer.cpp b/src/optimizer/acc_hash_join_optimizer.cpp index 0b1984b66c8..e9a7c33d221 100644 --- a/src/optimizer/acc_hash_join_optimizer.cpp +++ b/src/optimizer/acc_hash_join_optimizer.cpp @@ -134,6 +134,9 @@ void HashJoinSIPOptimizer::visitHashJoin(LogicalOperator* op) { if (tryBuildToProbeHJSIP(op)) { // Try build to probe SIP first. return; } + if (hashJoin.getSIPInfo().position == SemiMaskPosition::PROHIBIT_PROBE_TO_BUILD) { + return; + } tryProbeToBuildHJSIP(op); } @@ -213,8 +216,12 @@ bool HashJoinSIPOptimizer::tryBuildToProbeHJSIP(LogicalOperator* op) { // TODO(Xiyang): we don't apply SIP from build to probe. void HashJoinSIPOptimizer::visitIntersect(LogicalOperator* op) { auto& intersect = op->cast(); - if (intersect.getSIPInfo().position == SemiMaskPosition::PROHIBIT) { - return; + switch (intersect.getSIPInfo().position) { + case SemiMaskPosition::PROHIBIT_PROBE_TO_BUILD: + case SemiMaskPosition::PROHIBIT: + return ; + default: + break; } if (!isProbeSideQualified(op->getChild(0).get())) { return; @@ -246,8 +253,12 @@ void HashJoinSIPOptimizer::visitIntersect(LogicalOperator* op) { void HashJoinSIPOptimizer::visitPathPropertyProbe(LogicalOperator* op) { auto& pathPropertyProbe = op->cast(); - if (pathPropertyProbe.getSIPInfo().position == SemiMaskPosition::PROHIBIT) { - return; + switch (pathPropertyProbe.getSIPInfo().position) { + case SemiMaskPosition::PROHIBIT_PROBE_TO_BUILD: + case SemiMaskPosition::PROHIBIT: + return ; + default: + break; } if (pathPropertyProbe.getJoinType() == RecursiveJoinType::TRACK_NONE) { return; diff --git a/src/planner/plan/append_extend.cpp b/src/planner/plan/append_extend.cpp index 6c155ebf02e..170550d2677 100644 --- a/src/planner/plan/append_extend.cpp +++ b/src/planner/plan/append_extend.cpp @@ -196,7 +196,7 @@ void Planner::appendRecursiveExtend(const std::shared_ptr& bound // Check for sip auto ratio = plan.getCardinality() / relScanCardinality; if (ratio > PlannerKnobs::SIP_RATIO) { - pathPropertyProbe->getSIPInfoUnsafe().position = SemiMaskPosition::PROHIBIT; + pathPropertyProbe->getSIPInfoUnsafe().position = SemiMaskPosition::PROHIBIT_PROBE_TO_BUILD; } plan.setLastOperator(std::move(pathPropertyProbe)); // Update cost diff --git a/src/planner/plan/append_join.cpp b/src/planner/plan/append_join.cpp index 6ca66924f8d..dba69e415df 100644 --- a/src/planner/plan/append_join.cpp +++ b/src/planner/plan/append_join.cpp @@ -34,7 +34,7 @@ void Planner::appendHashJoin(const expression_vector& joinNodeIDs, JoinType join // Check for sip auto ratio = probePlan.getCardinality() / buildPlan.getCardinality(); if (ratio > PlannerKnobs::SIP_RATIO) { - hashJoin->getSIPInfoUnsafe().position = SemiMaskPosition::PROHIBIT; + hashJoin->getSIPInfoUnsafe().position = SemiMaskPosition::PROHIBIT_PROBE_TO_BUILD; } // Update cost resultPlan.setCost(CostModel::computeHashJoinCost(joinNodeIDs, probePlan, buildPlan)); From 948e1447058c31845c664cd81795a653dcec04cf Mon Sep 17 00:00:00 2001 From: CI Bot Date: Fri, 9 Aug 2024 02:48:35 +0000 Subject: [PATCH 4/4] Run clang-format --- src/optimizer/acc_hash_join_optimizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/optimizer/acc_hash_join_optimizer.cpp b/src/optimizer/acc_hash_join_optimizer.cpp index e9a7c33d221..98c55e600b7 100644 --- a/src/optimizer/acc_hash_join_optimizer.cpp +++ b/src/optimizer/acc_hash_join_optimizer.cpp @@ -216,10 +216,10 @@ bool HashJoinSIPOptimizer::tryBuildToProbeHJSIP(LogicalOperator* op) { // TODO(Xiyang): we don't apply SIP from build to probe. void HashJoinSIPOptimizer::visitIntersect(LogicalOperator* op) { auto& intersect = op->cast(); - switch (intersect.getSIPInfo().position) { + switch (intersect.getSIPInfo().position) { case SemiMaskPosition::PROHIBIT_PROBE_TO_BUILD: case SemiMaskPosition::PROHIBIT: - return ; + return; default: break; } @@ -256,7 +256,7 @@ void HashJoinSIPOptimizer::visitPathPropertyProbe(LogicalOperator* op) { switch (pathPropertyProbe.getSIPInfo().position) { case SemiMaskPosition::PROHIBIT_PROBE_TO_BUILD: case SemiMaskPosition::PROHIBIT: - return ; + return; default: break; }