Skip to content

Commit

Permalink
Fix bug in rule_query where text_expansion errored because it was not…
Browse files Browse the repository at this point in the history
… rewritten (elastic#105365)
  • Loading branch information
kderusso committed Feb 15, 2024
1 parent afd2dc6 commit 54f6fba
Show file tree
Hide file tree
Showing 5 changed files with 190 additions and 37 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/105365.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 105365
summary: Fix bug in `rule_query` where `text_expansion` errored because it was not
rewritten
area: Application
type: bug
issues: []
2 changes: 1 addition & 1 deletion x-pack/plugin/ent-search/qa/rest/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies {

restResources {
restApi {
include '_common', 'bulk', 'cluster', 'connector', 'nodes', 'indices', 'index', 'query_ruleset', 'search_application', 'xpack', 'security', 'search'
include '_common', 'bulk', 'cluster', 'connector', 'nodes', 'indices', 'index', 'query_ruleset', 'search_application', 'xpack', 'security', 'search', 'ml'
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,4 +236,154 @@ setup:
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: 'doc1' }

---
"Perform a rule query with an organic query that must be rewritten to another query type":
- skip:
version: " - 8.13.99"
reason: Bugfix that was broken in previous versions

- do:
indices.create:
index: test-index-with-sparse-vector
body:
mappings:
properties:
source_text:
type: keyword
ml.tokens:
type: sparse_vector

- do:
ml.put_trained_model:
model_id: "text_expansion_model"
body: >
{
"description": "simple model for testing",
"model_type": "pytorch",
"inference_config": {
"text_expansion": {
"tokenization": {
"bert": {
"with_special_tokens": false
}
}
}
}
}
- do:
ml.put_trained_model_vocabulary:
model_id: "text_expansion_model"
body: >
{ "vocabulary": ["[PAD]", "[UNK]", "these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"] }
- do:
ml.put_trained_model_definition_part:
model_id: "text_expansion_model"
part: 0
body: >
{
"total_definition_length":2078,
"definition": "UEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VGV4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxlbW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVctJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdFZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26QuZy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/eUuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNLFgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vLqbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLktgNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjghg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0YgbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSRrhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRglXIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAABkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgIAAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAAAACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLAQIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAAAAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAAAAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAAAAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA=",
"total_parts": 1
}
- do:
bulk:
index: test-index-with-sparse-vector
refresh: true
body: |
{"index": {}}
{"source_text": "my words comforter", "ml.tokens":{"my":1.0, "words":1.0,"comforter":1.0}}
{"index": {}}
{"source_text": "the machine is leaking", "ml.tokens":{"the":1.0,"machine":1.0,"is":1.0,"leaking":1.0}}
{"index": {}}
{"source_text": "these are my words", "ml.tokens":{"these":1.0,"are":1.0,"my":1.0,"words":1.0}}
{"index": {}}
{"source_text": "the octopus comforter smells", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"smells":1.0}}
{"index": {}}
{"source_text": "the octopus comforter is leaking", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"is":1.0,"leaking":1.0}}
{"index": {}}
{"source_text": "washing machine smells", "ml.tokens":{"washing":1.0,"machine":1.0,"smells":1.0}}
{"index": { "_id": "pinned_doc1" }}
{"source_text": "unrelated pinned doc", "ml.tokens":{"unrelated":1.0,"pinned":1.0,"doc":1.0}}
{"index": { "_id": "pinned_doc2" }}
{"source_text": "another unrelated pinned doc", "ml.tokens":{"another":1.0, "unrelated":1.0,"pinned":1.0,"doc":1.0}}
- do:
ml.start_trained_model_deployment:
model_id: text_expansion_model
wait_for: started

- do:
query_ruleset.put:
ruleset_id: combined-ruleset
body:
rules:
- rule_id: rule1
type: pinned
criteria:
- type: exact
metadata: foo
values: [ bar ]
actions:
ids:
- 'pinned_doc1'
- rule_id: rule2
type: pinned
criteria:
- type: exact
metadata: foo
values: [ baz ]
actions:
docs:
- '_index': 'test-index-with-sparse-vector'
'_id': 'pinned_doc2'
- do:
search:
body:
query:
rule_query:
organic:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
match_criteria:
foo: bar
ruleset_id: combined-ruleset

- match: { hits.total.value: 5 }
- match: { hits.hits.0._id: 'pinned_doc1' }

- do:
search:
body:
query:
rule_query:
organic:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
match_criteria:
foo: baz
ruleset_id: combined-ruleset

- match: { hits.total.value: 5 }
- match: { hits.hits.0._id: 'pinned_doc2' }

- do:
search:
body:
query:
rule_query:
organic:
text_expansion:
ml.tokens:
model_id: text_expansion_model
model_text: "octopus comforter smells"
match_criteria:
foo: puggle
ruleset_id: combined-ruleset

- match: { hits.total.value: 4 }


Original file line number Diff line number Diff line change
Expand Up @@ -111,18 +111,6 @@ private RuleQueryBuilder(
throw new IllegalArgumentException("rulesetId must not be null or empty");
}

// PinnedQueryBuilder will return an error if we attempt to return more than the maximum number of
// pinned hits. Here, we truncate matching rules rather than return an error.
if (pinnedIds != null && pinnedIds.size() > MAX_NUM_PINNED_HITS) {
HeaderWarning.addWarning("Truncating query rule pinned hits to " + MAX_NUM_PINNED_HITS + " documents");
pinnedIds = pinnedIds.subList(0, MAX_NUM_PINNED_HITS);
}

if (pinnedDocs != null && pinnedDocs.size() > MAX_NUM_PINNED_HITS) {
HeaderWarning.addWarning("Truncating query rule pinned hits to " + MAX_NUM_PINNED_HITS + " documents");
pinnedDocs = pinnedDocs.subList(0, MAX_NUM_PINNED_HITS);
}

this.organicQuery = organicQuery;
this.matchCriteria = matchCriteria;
this.rulesetId = rulesetId;
Expand Down Expand Up @@ -174,6 +162,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep

@Override
protected Query doToQuery(SearchExecutionContext context) throws IOException {
// NOTE: this is old query logic, as in 8.12.2+ and 8.13.0+ we will always rewrite this query
// into a pinned query or the organic query. This logic remains here for backwards compatibility
// with coordinator nodes running versions 8.10.0 - 8.12.1.
if ((pinnedIds != null && pinnedIds.isEmpty() == false) && (pinnedDocs != null && pinnedDocs.isEmpty() == false)) {
throw new IllegalArgumentException("applied rules contain both pinned ids and pinned docs, only one of ids or docs is allowed");
}
Expand All @@ -187,21 +178,25 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException {
} else {
return organicQuery.toQuery(context);
}

}

@SuppressWarnings("unchecked")
@Override
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException {
if (pinnedIds != null || pinnedDocs != null) {
return this;
} else if (pinnedIdsSupplier != null || pinnedDocsSupplier != null) {
List<String> identifiedPinnedIds = pinnedIdsSupplier != null ? pinnedIdsSupplier.get() : null;
List<Item> identifiedPinnedDocs = pinnedDocsSupplier != null ? pinnedDocsSupplier.get() : null;
if (identifiedPinnedIds == null && identifiedPinnedDocs == null) {
return this; // not executed yet
protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) {
if (pinnedIdsSupplier != null && pinnedDocsSupplier != null) {
List<String> identifiedPinnedIds = pinnedIdsSupplier.get();
List<Item> identifiedPinnedDocs = pinnedDocsSupplier.get();
if (identifiedPinnedIds == null || identifiedPinnedDocs == null) {
return this; // Not executed yet
} else if (identifiedPinnedIds.isEmpty() && identifiedPinnedDocs.isEmpty()) {
return organicQuery; // Nothing to pin here
} else if (identifiedPinnedIds.isEmpty() == false && identifiedPinnedDocs.isEmpty() == false) {
throw new IllegalArgumentException(
"applied rules contain both pinned ids and pinned docs, only one of ids or docs is allowed"
);
} else if (identifiedPinnedIds.isEmpty() == false) {
return new PinnedQueryBuilder(organicQuery, truncateList(identifiedPinnedIds).toArray(new String[0]));
} else {
return new RuleQueryBuilder(organicQuery, matchCriteria, rulesetId, identifiedPinnedIds, identifiedPinnedDocs, null, null);
return new PinnedQueryBuilder(organicQuery, truncateList(identifiedPinnedDocs).toArray(new Item[0]));
}
}

Expand Down Expand Up @@ -244,18 +239,19 @@ public void onFailure(Exception e) {
});
});

QueryBuilder newOrganicQuery = organicQuery.rewrite(queryRewriteContext);
RuleQueryBuilder rewritten = new RuleQueryBuilder(
newOrganicQuery,
matchCriteria,
this.rulesetId,
null,
null,
pinnedIdsSetOnce::get,
pinnedDocsSetOnce::get
);
rewritten.boost(this.boost);
return rewritten;
return new RuleQueryBuilder(organicQuery, matchCriteria, this.rulesetId, null, null, pinnedIdsSetOnce::get, pinnedDocsSetOnce::get)
.boost(this.boost)
.queryName(this.queryName);
}

private List<?> truncateList(List<?> input) {
// PinnedQueryBuilder will return an error if we attempt to return more than the maximum number of
// pinned hits. Here, we truncate matching rules rather than return an error.
if (input.size() > MAX_NUM_PINNED_HITS) {
HeaderWarning.addWarning("Truncating query rule pinned hits to " + MAX_NUM_PINNED_HITS + " documents");
return input.subList(0, MAX_NUM_PINNED_HITS);
}
return input;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentFactory;
import org.elasticsearch.xpack.application.LocalStateEnterpriseSearch;
import org.elasticsearch.xpack.searchbusinessrules.SearchBusinessRules;
import org.hamcrest.Matchers;

import java.io.IOException;
Expand Down Expand Up @@ -62,7 +63,7 @@ protected void doAssertLuceneQuery(RuleQueryBuilder queryBuilder, Query query, S

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
return Collections.singletonList(LocalStateEnterpriseSearch.class);
return List.of(LocalStateEnterpriseSearch.class, SearchBusinessRules.class);
}

public void testIllegalArguments() {
Expand Down

0 comments on commit 54f6fba

Please sign in to comment.