Skip to content

Commit

Permalink
Refactor from static utils into abstract class and add support for us…
Browse files Browse the repository at this point in the history
…ing search inference IDs from field
  • Loading branch information
kderusso committed Dec 13, 2024
1 parent 2181d86 commit 8c82d00
Show file tree
Hide file tree
Showing 8 changed files with 269 additions and 170 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,9 @@ public SparseVectorQueryBuilder(
: (this.shouldPruneTokens ? new TokenPruningConfig() : null));
this.weightedTokensSupplier = null;

if (queryVectors == null ^ inferenceId == null == false) {
if (queryVectors == null ^ query == null == false) {
throw new IllegalArgumentException(
"["
+ NAME
+ "] requires one of ["
+ QUERY_VECTOR_FIELD.getPreferredName()
+ "] or ["
+ INFERENCE_ID_FIELD.getPreferredName()
+ "]"
"[" + NAME + "] requires one of [" + QUERY_VECTOR_FIELD.getPreferredName() + "] or [" + QUERY_FIELD.getPreferredName() + "]"
);
}
if (inferenceId != null && query == null) {
Expand Down Expand Up @@ -184,7 +178,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep
}
builder.endObject();
} else {
builder.field(INFERENCE_ID_FIELD.getPreferredName(), inferenceId);
if (inferenceId != null) {
builder.field(INFERENCE_ID_FIELD.getPreferredName(), inferenceId);
}
builder.field(QUERY_FIELD.getPreferredName(), query);
}
builder.field(PRUNE_FIELD.getPreferredName(), shouldPruneTokens);
Expand Down Expand Up @@ -236,6 +232,11 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) {
shouldPruneTokens,
tokenPruningConfig
);
} else if (inferenceId == null) {
// Edge case, where inference_id was not specified in the request,
// but we did not intercept this and rewrite to a query o field with
// pre-configured inference. So we trap here and output a nicer error message.
throw new IllegalArgumentException("inference_id required to perform vector search on query string");
}

// TODO move this to xpack core and use inference APIs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,11 @@ public record SemanticTextField(String fieldName, List<String> originalValues, I
ToXContentObject {

static final String TEXT_FIELD = "text";
public static final String INFERENCE_FIELD = "inference";
static final String INFERENCE_FIELD = "inference";
static final String INFERENCE_ID_FIELD = "inference_id";
static final String SEARCH_INFERENCE_ID_FIELD = "search_inference_id";
public static final String CHUNKS_FIELD = "chunks";
public static final String CHUNKED_EMBEDDINGS_FIELD = "embeddings";
static final String CHUNKS_FIELD = "chunks";
static final String CHUNKED_EMBEDDINGS_FIELD = "embeddings";
public static final String CHUNKED_TEXT_FIELD = "text";
static final String MODEL_SETTINGS_FIELD = "model_settings";
static final String TASK_TYPE_FIELD = "task_type";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,11 @@
package org.elasticsearch.xpack.inference.queries;

import org.elasticsearch.features.NodeFeature;
import org.elasticsearch.index.mapper.IndexFieldMapper;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.TermsQueryBuilder;
import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor;

import java.util.List;

import static org.elasticsearch.xpack.inference.queries.SemanticQueryInterceptionUtils.InferenceIndexInformationForField;

public class SemanticMatchQueryRewriteInterceptor implements QueryRewriteInterceptor {
public class SemanticMatchQueryRewriteInterceptor extends SemanticQueryRewriteInterceptor {

public static final NodeFeature SEMANTIC_MATCH_QUERY_REWRITE_INTERCEPTION_SUPPORTED = new NodeFeature(
"search.semantic_match_query_rewrite_interception_supported"
Expand All @@ -29,52 +21,45 @@ public class SemanticMatchQueryRewriteInterceptor implements QueryRewriteInterce
public SemanticMatchQueryRewriteInterceptor() {}

@Override
public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilder queryBuilder) {
protected String getFieldName(QueryBuilder queryBuilder) {
assert (queryBuilder instanceof MatchQueryBuilder);
MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder;
QueryBuilder rewritten = queryBuilder;
InferenceIndexInformationForField inferenceIndexInformationForField = SemanticQueryInterceptionUtils.resolveIndicesForField(
matchQueryBuilder.fieldName(),
context.getResolvedIndices()
);

if (inferenceIndexInformationForField == null || inferenceIndexInformationForField.inferenceIndices().isEmpty()) {
// No inference fields, return original query
return rewritten;
} else if (inferenceIndexInformationForField.nonInferenceIndices().isEmpty() == false) {
// Combined inference and non inference fields
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.should(
createSemanticSubQuery(
inferenceIndexInformationForField.inferenceIndices(),
matchQueryBuilder.fieldName(),
(String) matchQueryBuilder.value()
)
);
boolQueryBuilder.should(
SemanticQueryInterceptionUtils.createSubQueryForIndices(
inferenceIndexInformationForField.nonInferenceIndices(),
matchQueryBuilder
)
);
rewritten = boolQueryBuilder;
} else {
// Only inference fields
rewritten = new SemanticQueryBuilder(matchQueryBuilder.fieldName(), (String) matchQueryBuilder.value(), false);
}
return matchQueryBuilder.fieldName();
}

return rewritten;
@Override
protected String getQuery(QueryBuilder queryBuilder) {
assert (queryBuilder instanceof MatchQueryBuilder);
MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder;
return (String) matchQueryBuilder.value();
}

@Override
public String getQueryName() {
return MatchQueryBuilder.NAME;
protected QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation) {
return new SemanticQueryBuilder(indexInformation.fieldName(), getQuery(queryBuilder), false);
}

private QueryBuilder createSemanticSubQuery(List<String> indices, String fieldName, String value) {
@Override
protected QueryBuilder buildCombinedInferenceAndNonInferenceQuery(
QueryBuilder queryBuilder,
InferenceIndexInformationForField indexInformation
) {
assert (queryBuilder instanceof MatchQueryBuilder);
MatchQueryBuilder matchQueryBuilder = (MatchQueryBuilder) queryBuilder;
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.must(new SemanticQueryBuilder(fieldName, value, true));
boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices));
boolQueryBuilder.should(
createSemanticSubQuery(
indexInformation.getInferenceIndices(),
matchQueryBuilder.fieldName(),
(String) matchQueryBuilder.value()
)
);
boolQueryBuilder.should(createSubQueryForIndices(indexInformation.nonInferenceIndices(), matchQueryBuilder));
return boolQueryBuilder;
}

@Override
public String getQueryName() {
return MatchQueryBuilder.NAME;
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.inference.queries;

import org.elasticsearch.action.ResolvedIndices;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.InferenceFieldMetadata;
import org.elasticsearch.index.mapper.IndexFieldMapper;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
import org.elasticsearch.index.query.TermsQueryBuilder;
import org.elasticsearch.plugins.internal.rewriter.QueryRewriteInterceptor;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Intercepts and adapts a query to be rewritten to work seamlessly on a semantic_text field.
*/
public abstract class SemanticQueryRewriteInterceptor implements QueryRewriteInterceptor {

public SemanticQueryRewriteInterceptor() {}

@Override
public QueryBuilder interceptAndRewrite(QueryRewriteContext context, QueryBuilder queryBuilder) {
QueryBuilder rewritten = queryBuilder;
String fieldName = getFieldName(queryBuilder);
InferenceIndexInformationForField indexInformation = resolveIndicesForField(fieldName, context.getResolvedIndices());

if (indexInformation == null || indexInformation.getInferenceIndices().isEmpty()) {
// No inference fields were identified, so return the original query.
return rewritten;
} else if (indexInformation.nonInferenceIndices().isEmpty() == false) {
// Combined case where the field name requested by this query contains both
// semantic_text and non-inference fields, so we have to combine queries per index
// containing each field type.
rewritten = buildCombinedInferenceAndNonInferenceQuery(queryBuilder, indexInformation);
} else {
// The only fields we've identified are inference fields (e.g. semantic_text),
// so rewrite the entire query to work on a semantic_text field.
rewritten = buildInferenceQuery(queryBuilder, indexInformation);
}

return rewritten;
}

/**
* @param queryBuilder {@link QueryBuilder}
* @return The singular field name requested by the provided query builder.
*/
protected abstract String getFieldName(QueryBuilder queryBuilder);

/**
* @param queryBuilder {@link QueryBuilder}
* @return The text/query string requested by the provided query builder.
*/
protected abstract String getQuery(QueryBuilder queryBuilder);

/**
* Builds the inference query
*
* @param queryBuilder {@link QueryBuilder}
* @param indexInformation {@link InferenceIndexInformationForField}
* @return {@link QueryBuilder}
*/
protected abstract QueryBuilder buildInferenceQuery(QueryBuilder queryBuilder, InferenceIndexInformationForField indexInformation);

/**
* Builds a combined inference and non-inference query,
* which separates the different queries into appropriate indices based on field type.
* @param queryBuilder {@link QueryBuilder}
* @param indexInformation {@link InferenceIndexInformationForField}
* @return {@link QueryBuilder}
*/
protected abstract QueryBuilder buildCombinedInferenceAndNonInferenceQuery(
QueryBuilder queryBuilder,
InferenceIndexInformationForField indexInformation
);

private InferenceIndexInformationForField resolveIndicesForField(String fieldName, ResolvedIndices resolvedIndices) {
if (resolvedIndices != null) {
Collection<IndexMetadata> indexMetadataCollection = resolvedIndices.getConcreteLocalIndicesMetadata().values();
Map<String, InferenceFieldMetadata> inferenceIndicesMetadata = new HashMap<>();
List<String> nonInferenceIndices = new ArrayList<>();
for (IndexMetadata indexMetadata : indexMetadataCollection) {
String indexName = indexMetadata.getIndex().getName();
InferenceFieldMetadata inferenceFieldMetadata = indexMetadata.getInferenceFields().get(fieldName);
if (inferenceFieldMetadata != null) {
inferenceIndicesMetadata.put(indexName, inferenceFieldMetadata);
} else {
nonInferenceIndices.add(indexName);
}
}

return new InferenceIndexInformationForField(fieldName, inferenceIndicesMetadata, nonInferenceIndices);
}
return null;
}

protected QueryBuilder createSubQueryForIndices(Collection<String> indices, QueryBuilder queryBuilder) {
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.must(queryBuilder);
boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices));
return boolQueryBuilder;
}

protected QueryBuilder createSemanticSubQuery(Collection<String> indices, String fieldName, String value) {
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
boolQueryBuilder.must(new SemanticQueryBuilder(fieldName, value, true));
boolQueryBuilder.filter(new TermsQueryBuilder(IndexFieldMapper.NAME, indices));
return boolQueryBuilder;
}

/**
* Represents the indices and associated inference information for a field.
*/
public record InferenceIndexInformationForField(
String fieldName,
Map<String, InferenceFieldMetadata> inferenceIndicesMetadata,
List<String> nonInferenceIndices
) {

public Collection<String> getInferenceIndices() {
return inferenceIndicesMetadata.keySet();
}

public String getSearchInferenceIdForIndex(String index) {
return inferenceIndicesMetadata.get(index).getSearchInferenceId();
}

public String getSearchInferenceId() {
List<String> searchInferenceIds = inferenceIndicesMetadata.values()
.stream()
.map(InferenceFieldMetadata::getSearchInferenceId)
.distinct()
.toList();
if (searchInferenceIds.size() > 1) {
throw new IllegalStateException(
"Conflicting searchInferenceIds for field [" + fieldName + "]: Found [" + searchInferenceIds + "]"
);
}
return searchInferenceIds.getFirst();
}
}
}
Loading

0 comments on commit 8c82d00

Please sign in to comment.