Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix semi mask on scan node table #4050

Merged
merged 4 commits into from
Aug 9, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/function/gds/shortest_paths.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
explicit PathLengths(
std::vector<std::tuple<common::table_id_t, uint64_t>> nodeTableIDAndNumNodes) {
for (const auto& [tableID, numNodes] : nodeTableIDAndNumNodes) {
masks.insert({tableID, std::make_unique<processor::MaskData>(numNodes, UNVISITED)});
masks.insert({tableID, std::make_unique<MaskData>(numNodes, UNVISITED)});

Check warning on line 86 in src/function/gds/shortest_paths.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/gds/shortest_paths.cpp#L86

Added line #L86 was not covered by tests
}
}

Expand Down Expand Up @@ -131,10 +131,10 @@

private:
uint8_t curIter = 255;
common::table_id_map_t<std::unique_ptr<processor::MaskData>> masks;
common::table_id_map_t<std::unique_ptr<MaskData>> masks;
common::table_id_t curFrontierFixedTableID;
processor::MaskData* curFrontierFixedMask;
processor::MaskData* nextFrontierFixedMask;
MaskData* curFrontierFixedMask;
MaskData* nextFrontierFixedMask;
};

class PathLengthsFrontiers : public Frontiers {
Expand Down Expand Up @@ -419,7 +419,7 @@
}
auto mask = sharedState->inputNodeOffsetMasks.at(tableID).get();
for (auto offset = 0u; offset < sharedState->graph->getNumNodes(tableID); ++offset) {
if (!mask->isMasked(offset)) {
if (!mask->isMasked(offset, offset)) {
continue;
}
auto sourceNodeID = nodeID_t{offset, tableID};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "common/types/internal_id_t.h"

namespace kuzu {
namespace processor {
namespace common {

// Note: Classes in this file are NOT thread-safe.
struct MaskUtil {
Expand Down Expand Up @@ -52,7 +52,15 @@
maskData = std::make_unique<MaskData>(maxOffset + 1);
}

bool isMasked(common::offset_t offset) { return maskData->isMasked(offset, numMasks); }
// Return true if any offset between [startOffset, endOffset] is masked. Otherwise return false.
bool isMasked(common::offset_t startOffset, common::offset_t endOffset) const {
auto offset = startOffset;
auto numMasked = 0u;
while (offset <= endOffset) {
numMasked += maskData->isMasked(offset++, numMasks);
}
return numMasked > 0;
}
// Increment mask value for the given nodeOffset if its current mask value is equal to
// the specified `currentMaskValue`.
// Note: blindly update mask does not parallelize well, so we minimize write by first checking
Expand Down Expand Up @@ -87,7 +95,7 @@
virtual void init() = 0;

virtual void incrementMaskValue(common::offset_t nodeOffset, uint8_t currentMaskValue) = 0;
virtual bool isMasked(common::offset_t nodeOffset) = 0;
virtual bool isMasked(common::offset_t startNodeOffset, common::offset_t endNodeOffset) = 0;

bool isEnabled() const { return getNumMasks() > 0; }
uint8_t getNumMasks() const { return maskCollection.getNumMasks(); }
Expand Down Expand Up @@ -115,8 +123,8 @@
maskCollection.incrementMaskValue(nodeOffset, currentMaskValue);
}

bool isMasked(common::offset_t nodeOffset) override {
return maskCollection.isMasked(nodeOffset);
bool isMasked(common::offset_t startNodeOffset, common::offset_t endNodeOffset) override {
return maskCollection.isMasked(startNodeOffset, endNodeOffset);

Check warning on line 127 in src/include/common/mask.h

View check run for this annotation

Codecov / codecov/patch

src/include/common/mask.h#L126-L127

Added lines #L126 - L127 were not covered by tests
}
};

Expand All @@ -136,10 +144,11 @@
maskCollection.incrementMaskValue(MaskUtil::getVectorIdx(nodeOffset), currentMaskValue);
}

bool isMasked(common::offset_t nodeOffset) override {
return maskCollection.isMasked(MaskUtil::getVectorIdx(nodeOffset));
bool isMasked(common::offset_t startNodeOffset, common::offset_t endNodeOffset) override {
return maskCollection.isMasked(MaskUtil::getVectorIdx(startNodeOffset),
MaskUtil::getVectorIdx(endNodeOffset));
}
};

} // namespace processor
} // namespace common
} // namespace kuzu
1 change: 1 addition & 0 deletions src/include/planner/operator/sip/side_way_info_passing.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ enum class SemiMaskPosition : uint8_t {
NONE = 0,
ON_BUILD = 1,
ON_PROBE = 2,
// TODO(Xiyang): we should differentiate build to probe and probe to build.
PROHIBIT = 3,
};

Expand Down
9 changes: 5 additions & 4 deletions src/include/processor/operator/gds_call.h
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#pragma once

#include "common/mask.h"
#include "function/gds/gds.h"
#include "function/gds/gds_utils.h"
#include "graph/graph.h"
#include "processor/operator/mask.h"
#include "processor/operator/sink.h"

namespace kuzu {
Expand All @@ -13,10 +13,11 @@ struct GDSCallSharedState {
std::mutex mtx;
std::shared_ptr<FactorizedTable> fTable;
std::unique_ptr<graph::Graph> graph;
common::table_id_map_t<std::unique_ptr<NodeOffsetLevelSemiMask>> inputNodeOffsetMasks;
common::table_id_map_t<std::unique_ptr<common::NodeOffsetLevelSemiMask>> inputNodeOffsetMasks;

GDSCallSharedState(std::shared_ptr<FactorizedTable> fTable, std::unique_ptr<graph::Graph> graph,
common::table_id_map_t<std::unique_ptr<NodeOffsetLevelSemiMask>> inputNodeOffsetMasks)
common::table_id_map_t<std::unique_ptr<common::NodeOffsetLevelSemiMask>>
inputNodeOffsetMasks)
: fTable{std::move(fTable)}, graph{std::move(graph)},
inputNodeOffsetMasks{std::move(inputNodeOffsetMasks)} {}
DELETE_COPY_AND_MOVE(GDSCallSharedState);
Expand Down Expand Up @@ -59,7 +60,7 @@ class GDSCall : public Sink {
info{std::move(info)}, sharedState{std::move(sharedState)} {}

bool hasSemiMask() const { return !sharedState->inputNodeOffsetMasks.empty(); }
std::vector<NodeSemiMask*> getSemiMasks() const;
std::vector<common::NodeSemiMask*> getSemiMasks() const;

bool isSource() const override { return true; }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
#include "bfs_state.h"
#include "common/enums/extend_direction.h"
#include "common/enums/query_rel_type.h"
#include "common/mask.h"
#include "frontier_scanner.h"
#include "planner/operator/extend/recursive_join_type.h"
#include "processor/operator/mask.h"
#include "processor/operator/physical_operator.h"

namespace kuzu {
Expand All @@ -14,10 +14,10 @@ namespace processor {
class OffsetScanNodeTable;

struct RecursiveJoinSharedState {
std::vector<std::unique_ptr<NodeOffsetLevelSemiMask>> semiMasks;
std::vector<std::unique_ptr<common::NodeOffsetLevelSemiMask>> semiMasks;

explicit RecursiveJoinSharedState(
std::vector<std::unique_ptr<NodeOffsetLevelSemiMask>> semiMasks)
std::vector<std::unique_ptr<common::NodeOffsetLevelSemiMask>> semiMasks)
: semiMasks{std::move(semiMasks)} {}
};

Expand Down Expand Up @@ -116,7 +116,7 @@ class RecursiveJoin : public PhysicalOperator {
info{std::move(info)}, sharedState{std::move(sharedState)},
recursiveRoot{std::move(recursiveRoot)} {}

std::vector<NodeSemiMask*> getSemiMask() const;
std::vector<common::NodeSemiMask*> getSemiMask() const;

void initLocalStateInternal(ResultSet* resultSet_, ExecutionContext* context) final;

Expand Down
10 changes: 5 additions & 5 deletions src/include/processor/operator/scan/scan_node_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct ScanNodeTableProgressSharedState {

class ScanNodeTableSharedState {
public:
explicit ScanNodeTableSharedState(std::unique_ptr<NodeVectorLevelSemiMask> semiMask)
explicit ScanNodeTableSharedState(std::unique_ptr<common::NodeVectorLevelSemiMask> semiMask)
: table{nullptr}, currentCommittedGroupIdx{common::INVALID_NODE_GROUP_IDX},
currentUnCommittedGroupIdx{common::INVALID_NODE_GROUP_IDX}, numCommittedNodeGroups{0},
numUnCommittedNodeGroups{0}, semiMask{std::move(semiMask)} {};
Expand All @@ -27,7 +27,7 @@ class ScanNodeTableSharedState {
void nextMorsel(storage::NodeTableScanState& scanState,
std::shared_ptr<ScanNodeTableProgressSharedState> progressSharedState);

NodeSemiMask* getSemiMask() const { return semiMask.get(); }
common::NodeSemiMask* getSemiMask() const { return semiMask.get(); }

private:
std::mutex mtx;
Expand All @@ -36,7 +36,7 @@ class ScanNodeTableSharedState {
common::node_group_idx_t currentUnCommittedGroupIdx;
common::node_group_idx_t numCommittedNodeGroups;
common::node_group_idx_t numUnCommittedNodeGroups;
std::unique_ptr<NodeVectorLevelSemiMask> semiMask;
std::unique_ptr<common::NodeVectorLevelSemiMask> semiMask;
};

struct ScanNodeTableInfo {
Expand All @@ -52,7 +52,7 @@ struct ScanNodeTableInfo {
columnPredicates{std::move(columnPredicates)} {}
EXPLICIT_COPY_DEFAULT_MOVE(ScanNodeTableInfo);

void initScanState(NodeSemiMask* semiMask);
void initScanState(common::NodeSemiMask* semiMask);

private:
ScanNodeTableInfo(const ScanNodeTableInfo& other)
Expand Down Expand Up @@ -93,7 +93,7 @@ class ScanNodeTable final : public ScanTable {
KU_ASSERT(this->nodeInfos.size() == this->sharedStates.size());
}

std::vector<NodeSemiMask*> getSemiMasks() const;
std::vector<common::NodeSemiMask*> getSemiMasks() const;

bool isSource() const override { return true; }

Expand Down
4 changes: 2 additions & 2 deletions src/include/processor/operator/semi_masker.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once

#include "processor/operator/mask.h"
#include "common/mask.h"
#include "processor/operator/physical_operator.h"

namespace kuzu {
Expand All @@ -12,7 +12,7 @@ class BaseSemiMasker;
// to indicate the execution sequence of its pipeline. Also, the maskerIdx is used as a flag to
// indicate if a value in the mask is masked or not, as each masker will increment the selected
// value in the mask by 1. More details are described in NodeTableSemiMask.
using mask_with_idx = std::pair<NodeSemiMask*, uint8_t>;
using mask_with_idx = std::pair<common::NodeSemiMask*, uint8_t>;

class SemiMaskerInfo {
friend class BaseSemiMasker;
Expand Down
13 changes: 4 additions & 9 deletions src/include/storage/store/node_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@

#include <cstdint>

#include "common/exception/not_implemented.h"
#include "common/types/types.h"
#include "processor/operator/mask.h"
#include "storage/index/hash_index.h"
#include "storage/store/node_group_collection.h"
#include "storage/store/table.h"
#include <common/exception/not_implemented.h>

namespace kuzu {
namespace evaluator {
Expand All @@ -26,25 +25,21 @@ class Transaction;
namespace storage {

struct NodeTableScanState final : TableScanState {
processor::NodeSemiMask* semiMask;

// Scan state for un-committed data.
// Ideally we shouldn't need columns to scan un-checkpointed but committed data.
explicit NodeTableScanState(std::vector<common::column_id_t> columnIDs)
: TableScanState{std::move(columnIDs), {}}, semiMask{nullptr} {
: TableScanState{std::move(columnIDs), {}} {
nodeGroupScanState = std::make_unique<NodeGroupScanState>(this->columnIDs.size());
}

NodeTableScanState(std::vector<common::column_id_t> columnIDs, std::vector<Column*> columns)
: TableScanState{std::move(columnIDs), std::move(columns),
std::vector<ColumnPredicateSet>{}},
semiMask{nullptr} {
std::vector<ColumnPredicateSet>{}} {
nodeGroupScanState = std::make_unique<NodeGroupScanState>(this->columnIDs.size());
}
NodeTableScanState(std::vector<common::column_id_t> columnIDs, std::vector<Column*> columns,
std::vector<ColumnPredicateSet> columnPredicateSets)
: TableScanState{std::move(columnIDs), std::move(columns), std::move(columnPredicateSets)},
semiMask{nullptr} {
: TableScanState{std::move(columnIDs), std::move(columns), std::move(columnPredicateSets)} {
nodeGroupScanState = std::make_unique<NodeGroupScanState>(this->columnIDs.size());
}
};
Expand Down
13 changes: 8 additions & 5 deletions src/include/storage/store/table.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "catalog/catalog_entry/table_catalog_entry.h"
#include "common/enums/zone_map_check_result.h"
#include "common/mask.h"
#include "storage/predicate/column_predicate.h"
#include "storage/store/column.h"
#include "storage/store/node_group.h"
Expand All @@ -20,8 +21,9 @@ struct TableScanState {
common::ValueVector* IDVector;
std::vector<common::ValueVector*> outputVectors;
std::vector<common::column_id_t> columnIDs;
common::NodeSemiMask* semiMask;

// Only used when scan from checkpointed data.
// Only used when scan from persistent data.
std::vector<Column*> columns;

TableScanSource source = TableScanSource::NONE;
Expand All @@ -33,18 +35,19 @@ struct TableScanState {
common::ZoneMapCheckResult zoneMapResult = common::ZoneMapCheckResult::ALWAYS_SCAN;

explicit TableScanState(std::vector<common::column_id_t> columnIDs)
: IDVector(nullptr), columnIDs{std::move(columnIDs)} {
: IDVector(nullptr), columnIDs{std::move(columnIDs)}, semiMask{nullptr} {
rowIdxVector = std::make_unique<common::ValueVector>(common::LogicalType::INT64());
}
TableScanState(std::vector<common::column_id_t> columnIDs, std::vector<Column*> columns,
std::vector<ColumnPredicateSet> columnPredicateSets)
: IDVector(nullptr), columnIDs{std::move(columnIDs)}, columns{std::move(columns)},
columnPredicateSets{std::move(columnPredicateSets)} {
: IDVector(nullptr), columnIDs{std::move(columnIDs)}, semiMask{nullptr},
columns{std::move(columns)}, columnPredicateSets{std::move(columnPredicateSets)} {
rowIdxVector = std::make_unique<common::ValueVector>(common::LogicalType::INT64());
}
explicit TableScanState(std::vector<common::column_id_t> columnIDs,
std::vector<Column*> columns)
: IDVector(nullptr), columnIDs{std::move(columnIDs)}, columns{std::move(columns)} {
: IDVector(nullptr), columnIDs{std::move(columnIDs)}, semiMask{nullptr},
columns{std::move(columns)} {
rowIdxVector = std::make_unique<common::ValueVector>(common::LogicalType::INT64());
}
virtual ~TableScanState() = default;
Expand Down
4 changes: 2 additions & 2 deletions src/processor/map/map_recursive_extend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ namespace processor {

static std::shared_ptr<RecursiveJoinSharedState> createSharedState(
const binder::NodeExpression& nbrNode, const main::ClientContext& context) {
std::vector<std::unique_ptr<NodeOffsetLevelSemiMask>> semiMasks;
std::vector<std::unique_ptr<common::NodeOffsetLevelSemiMask>> semiMasks;
for (auto tableID : nbrNode.getTableIDs()) {
auto table = context.getStorageManager()->getTable(tableID)->ptrCast<storage::NodeTable>();
semiMasks.push_back(
std::make_unique<NodeOffsetLevelSemiMask>(tableID, table->getNumRows()));
std::make_unique<common::NodeOffsetLevelSemiMask>(tableID, table->getNumRows()));
}
return std::make_shared<RecursiveJoinSharedState>(std::move(semiMasks));
}
Expand Down
4 changes: 2 additions & 2 deletions src/processor/operator/gds_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
return "Algorithm: " + funcName;
}

std::vector<NodeSemiMask*> GDSCall::getSemiMasks() const {
std::vector<NodeSemiMask*> masks;
std::vector<common::NodeSemiMask*> GDSCall::getSemiMasks() const {

Check warning on line 13 in src/processor/operator/gds_call.cpp

View check run for this annotation

Codecov / codecov/patch

src/processor/operator/gds_call.cpp#L13

Added line #L13 was not covered by tests
std::vector<common::NodeSemiMask*> masks;
for (auto& [_, mask] : sharedState->inputNodeOffsetMasks) {
masks.push_back(mask.get());
}
Expand Down
2 changes: 1 addition & 1 deletion src/processor/operator/recursive_extend/recursive_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ void RecursiveJoin::populateTargetDstNodes(ExecutionContext*) {
auto numNodes = mask->getMaxOffset() + 1;
if (mask->isEnabled()) {
for (auto offset = 0u; offset < numNodes; ++offset) {
if (mask->isMasked(offset)) {
if (mask->isMasked(offset, offset)) {
targetNodeIDs.insert(nodeID_t{offset, mask->getTableID()});
numTargetNodes++;
}
Expand Down
Loading
Loading