Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(search_family): Fix indexes loading in the FT.SEARCH and Ft.AGGREGATE commands #3955

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/core/search/search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,15 @@ vector<pair<string, SortableValue>> FieldIndices::ExtractStoredValues(DocId doc)
return out;
}

absl::flat_hash_set<std::string_view> FieldIndices::GetSortIndiciesFields() const {
absl::flat_hash_set<std::string_view> fields_idents;
fields_idents.reserve(sort_indices_.size());
for (const auto& [ident, _] : sort_indices_) {
fields_idents.insert(ident);
}
return fields_idents;
}

SearchAlgorithm::SearchAlgorithm() = default;
SearchAlgorithm::~SearchAlgorithm() = default;

Expand Down
2 changes: 2 additions & 0 deletions src/core/search/search.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class FieldIndices {
// Extract values stored in sort indices
std::vector<std::pair<std::string, SortableValue>> ExtractStoredValues(DocId doc) const;

absl::flat_hash_set<std::string_view> GetSortIndiciesFields() const;

private:
void CreateIndices(PMR_NS::memory_resource* mr);
void CreateSortIndices(PMR_NS::memory_resource* mr);
Expand Down
10 changes: 5 additions & 5 deletions src/server/search/doc_accessors.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ search::SortableValue ExtractSortableValueFromJson(const search::Schema& schema,

} // namespace

SearchDocData BaseAccessor::Serialize(const search::Schema& schema,
const FieldsList& fields) const {
SearchDocData BaseAccessor::Serialize(
const search::Schema& schema, absl::Span<const SearchField<std::string_view>> fields) const {
SearchDocData out{};
for (const auto& [fident, fname] : fields) {
out[fname] = ExtractSortableValue(schema, fident, absl::StrJoin(GetStrings(fident), ","));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why this turned into "sortable". What is sortable about it? 🙂

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It returns sortable value?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, but it's not exclusively used for sorting - that's the confusing part. Its used for all document access. It just returns a document value in one of the value types

Expand Down Expand Up @@ -248,14 +248,14 @@ JsonAccessor::JsonPathContainer* JsonAccessor::GetPath(std::string_view field) c
}

SearchDocData JsonAccessor::Serialize(const search::Schema& schema) const {
FieldsList fields{};
SearchFieldsList fields{};
for (const auto& [fname, fident] : schema.field_names)
fields.emplace_back(fident, fname);
return Serialize(schema, fields);
}

SearchDocData JsonAccessor::Serialize(const search::Schema& schema,
const FieldsList& fields) const {
SearchDocData JsonAccessor::Serialize(
const search::Schema& schema, absl::Span<const SearchField<std::string_view>> fields) const {
SearchDocData out{};
for (const auto& [ident, name] : fields) {
if (auto* path = GetPath(ident); path) {
Expand Down
6 changes: 4 additions & 2 deletions src/server/search/doc_accessors.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ struct BaseAccessor : public search::DocumentAccessor {
virtual SearchDocData Serialize(const search::Schema& schema) const = 0;

// Serialize selected fields
virtual SearchDocData Serialize(const search::Schema& schema, const FieldsList& fields) const;
virtual SearchDocData Serialize(const search::Schema& schema,
absl::Span<const SearchField<std::string_view>> fields) const;

/*
Serialize the whole type, the default implementation is to serialize all fields.
Expand Down Expand Up @@ -78,7 +79,8 @@ struct JsonAccessor : public BaseAccessor {
VectorInfo GetVector(std::string_view field) const override;

// The JsonAccessor works with structured types and not plain strings, so an overload is needed
SearchDocData Serialize(const search::Schema& schema, const FieldsList& fields) const override;
SearchDocData Serialize(const search::Schema& schema,
absl::Span<const SearchField<std::string_view>> fields) const override;
SearchDocData Serialize(const search::Schema& schema) const override;
SearchDocData SerializeDocument(const search::Schema& schema) const override;

Expand Down
64 changes: 51 additions & 13 deletions src/server/search/doc_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ bool SerializedSearchDoc::operator>=(const SerializedSearchDoc& other) const {

bool SearchParams::ShouldReturnField(std::string_view field) const {
auto cb = [field](const auto& entry) { return entry.first == field; };
return !return_fields.fields || any_of(return_fields->begin(), return_fields->end(), cb);
return !return_fields || any_of(return_fields->begin(), return_fields->end(), cb);
}

string_view SearchFieldTypeToString(search::SchemaField::FieldType type) {
Expand Down Expand Up @@ -211,6 +211,17 @@ bool ShardDocIndex::Matches(string_view key, unsigned obj_code) const {
return base_->Matches(key, obj_code);
}

SearchFieldsList ToSV(const std::optional<OwnedSearchFieldsList>& fields) {
SearchFieldsList sv_fields;
if (fields) {
sv_fields.reserve(fields->size());
for (const auto& [fident, fname] : fields.value()) {
sv_fields.emplace_back(fident, fname);
}
}
return sv_fields;
}

SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& params,
search::SearchAlgorithm* search_algo) const {
auto& db_slice = op_args.GetDbSlice();
Expand All @@ -219,6 +230,9 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
if (!search_results.error.empty())
return SearchResult{facade::ErrorReply{std::move(search_results.error)}};

SearchFieldsList fields_to_load =
ToSV(params.ShouldReturnAllFields() ? params.load_fields : params.return_fields);

vector<SerializedSearchDoc> out;
out.reserve(search_results.ids.size());

Expand All @@ -235,15 +249,19 @@ SearchResult ShardDocIndex::Search(const OpArgs& op_args, const SearchParams& pa
auto accessor = GetAccessor(op_args.db_cntx, (*it)->second);

SearchDocData doc_data;
if (params.return_fields.ShouldReturnAllFields()) {
if (params.ShouldReturnAllFields()) {
/*
In this case we need to load the whole document.
In this case we need to load the whole document or loaded fields.
For JSON indexes it would be {"$", <the whole document as string>}
*/
doc_data = accessor->SerializeDocument(base_->schema);

SearchDocData loaded_fields = accessor->Serialize(base_->schema, fields_to_load);
doc_data.insert(std::make_move_iterator(loaded_fields.begin()),
std::make_move_iterator(loaded_fields.end()));
} else {
/* Load only selected fields */
doc_data = accessor->Serialize(base_->schema, params.return_fields.GetFields());
/* Load only specific fields */
doc_data = accessor->Serialize(base_->schema, fields_to_load);
}

auto score = search_results.scores.empty() ? monostate{} : std::move(search_results.scores[i]);
Expand All @@ -263,6 +281,9 @@ vector<SearchDocData> ShardDocIndex::SearchForAggregator(
if (!search_results.error.empty())
return {};

SearchFieldsList fields_to_load =
GetFieldsToLoad(params.load_fields, indices_->GetSortIndiciesFields());

vector<absl::flat_hash_map<string, search::SortableValue>> out;
for (DocId doc : search_results.ids) {
auto key = key_index_.Get(doc);
Expand All @@ -274,14 +295,7 @@ vector<SearchDocData> ShardDocIndex::SearchForAggregator(
auto accessor = GetAccessor(op_args.db_cntx, (*it)->second);
auto extracted = indices_->ExtractStoredValues(doc);

SearchDocData loaded;
if (params.load_fields.ShouldReturnAllFields()) {
// Load all fields
loaded = accessor->Serialize(base_->schema);
} else {
// Load only selected fields
loaded = accessor->Serialize(base_->schema, params.load_fields.GetFields());
}
SearchDocData loaded = accessor->Serialize(base_->schema, fields_to_load);

out.emplace_back(make_move_iterator(extracted.begin()), make_move_iterator(extracted.end()));
out.back().insert(make_move_iterator(loaded.begin()), make_move_iterator(loaded.end()));
Expand All @@ -290,6 +304,30 @@ vector<SearchDocData> ShardDocIndex::SearchForAggregator(
return out;
}

SearchFieldsList ShardDocIndex::GetFieldsToLoad(
const std::optional<OwnedSearchFieldsList>& load_fields,
const absl::flat_hash_set<std::string_view>& skip_fields) const {
// identifier to short name
absl::flat_hash_map<std::string_view, std::string_view> unique_fields;
unique_fields.reserve(base_->schema.field_names.size());

for (const auto& [fname, fident] : base_->schema.field_names) {
if (!skip_fields.contains(fident)) {
unique_fields[fident] = fname;
}
}

if (load_fields) {
for (const auto& [fident, fname] : load_fields.value()) {
if (!skip_fields.contains(fident)) {
unique_fields[fident] = fname;
}
}
}

return {unique_fields.begin(), unique_fields.end()};
}

DocIndexInfo ShardDocIndex::GetInfo() const {
return {*base_, key_index_.Size()};
}
Expand Down
59 changes: 27 additions & 32 deletions src/server/search/doc_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,50 +52,40 @@ struct SearchResult {
std::optional<facade::ErrorReply> error;
};

using FieldsList = std::vector<std::pair<std::string /*identifier*/, std::string /*short name*/>>;
template <typename T> using SearchField = std::pair<T /*identifier*/, T /*short name*/>;

using SearchFieldsList = std::vector<SearchField<std::string_view>>;
using OwnedSearchFieldsList = std::vector<SearchField<std::string>>;

struct SearchParams {
// Parameters for "LIMIT offset total": select total amount documents with a specific offset from
// the whole result set
size_t limit_offset = 0;
size_t limit_total = 10;

struct SelectedFields {
/*
1. If not set -> return all fields
2. If set but empty -> no fields should be returned
3. If set and not empty -> return only these fields
*/
std::optional<FieldsList> fields;
std::optional<OwnedSearchFieldsList> return_fields;

bool ShouldReturnAllFields() const {
return !fields.has_value();
}

bool ShouldReturnNoFields() const {
return fields && fields->empty();
}

FieldsList* operator->() {
return &fields.value();
}

const FieldsList* operator->() const {
return &fields.value();
}

const FieldsList& GetFields() const {
return fields.value();
}
};
/*
Fields that should be also loaded from the document.

struct SearchParams {
// Parameters for "LIMIT offset total": select total amount documents with a specific offset from
// the whole result set
size_t limit_offset = 0;
size_t limit_total = 10;
Only one of load_fields and return_fields should be set.
*/
std::optional<OwnedSearchFieldsList> load_fields;

// Set but empty means no fields should be returned
SelectedFields return_fields;
std::optional<search::SortOption> sort_option;
search::QueryParams query_params;

bool ShouldReturnAllFields() const {
return !return_fields.has_value();
}

bool IdsOnly() const {
return return_fields.ShouldReturnNoFields();
return return_fields && return_fields->empty();
}

bool ShouldReturnField(std::string_view field) const;
Expand All @@ -105,7 +95,7 @@ struct AggregateParams {
std::string_view index, query;
search::QueryParams params;

SelectedFields load_fields;
std::optional<OwnedSearchFieldsList> load_fields;
std::vector<aggregate::PipelineStep> steps;
};

Expand Down Expand Up @@ -179,6 +169,11 @@ class ShardDocIndex {
io::Result<StringVec, facade::ErrorReply> GetTagVals(std::string_view field) const;

private:
// Returns the fields that are the union of the already indexed fields and load_fields, excluding
// skip_fields Load_fields should not be destroyed while the result of this function is being used
SearchFieldsList GetFieldsToLoad(const std::optional<OwnedSearchFieldsList>& load_fields,
const absl::flat_hash_set<std::string_view>& skip_fields) const;

// Clears internal data. Traverses all matching documents and assigns ids.
void Rebuild(const OpArgs& op_args, PMR_NS::memory_resource* mr);

Expand Down
Loading
Loading