From 54f6fbaccf5da58c61acb876115f37a5f73f0077 Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Thu, 15 Feb 2024 10:25:24 -0500 Subject: [PATCH] Fix bug in rule_query where text_expansion errored because it was not rewritten (#105365) --- docs/changelog/105365.yaml | 6 + x-pack/plugin/ent-search/qa/rest/build.gradle | 2 +- .../test/entsearch/260_rule_query_search.yml | 150 ++++++++++++++++++ .../application/rules/RuleQueryBuilder.java | 66 ++++---- .../rules/RuleQueryBuilderTests.java | 3 +- 5 files changed, 190 insertions(+), 37 deletions(-) create mode 100644 docs/changelog/105365.yaml diff --git a/docs/changelog/105365.yaml b/docs/changelog/105365.yaml new file mode 100644 index 0000000000000..265e6dccc3915 --- /dev/null +++ b/docs/changelog/105365.yaml @@ -0,0 +1,6 @@ +pr: 105365 +summary: Fix bug in `rule_query` where `text_expansion` errored because it was not + rewritten +area: Application +type: bug +issues: [] diff --git a/x-pack/plugin/ent-search/qa/rest/build.gradle b/x-pack/plugin/ent-search/qa/rest/build.gradle index c9b1557d74a9c..37f1d8f13c850 100644 --- a/x-pack/plugin/ent-search/qa/rest/build.gradle +++ b/x-pack/plugin/ent-search/qa/rest/build.gradle @@ -7,7 +7,7 @@ dependencies { restResources { restApi { - include '_common', 'bulk', 'cluster', 'connector', 'nodes', 'indices', 'index', 'query_ruleset', 'search_application', 'xpack', 'security', 'search' + include '_common', 'bulk', 'cluster', 'connector', 'nodes', 'indices', 'index', 'query_ruleset', 'search_application', 'xpack', 'security', 'search', 'ml' } } diff --git a/x-pack/plugin/ent-search/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/entsearch/260_rule_query_search.yml b/x-pack/plugin/ent-search/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/entsearch/260_rule_query_search.yml index c287209da5bed..40cdb7839c9ed 100644 --- a/x-pack/plugin/ent-search/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/entsearch/260_rule_query_search.yml +++ b/x-pack/plugin/ent-search/qa/rest/src/yamlRestTest/resources/rest-api-spec/test/entsearch/260_rule_query_search.yml @@ -236,4 +236,154 @@ setup: - match: { hits.total.value: 1 } - match: { hits.hits.0._id: 'doc1' } +--- +"Perform a rule query with an organic query that must be rewritten to another query type": + - skip: + version: " - 8.13.99" + reason: Bugfix that was broken in previous versions + + - do: + indices.create: + index: test-index-with-sparse-vector + body: + mappings: + properties: + source_text: + type: keyword + ml.tokens: + type: sparse_vector + + - do: + ml.put_trained_model: + model_id: "text_expansion_model" + body: > + { + "description": "simple model for testing", + "model_type": "pytorch", + "inference_config": { + "text_expansion": { + "tokenization": { + "bert": { + "with_special_tokens": false + } + } + } + } + } + - do: + ml.put_trained_model_vocabulary: + model_id: "text_expansion_model" + body: > + { "vocabulary": ["[PAD]", "[UNK]", "these", "are", "my", "words", "the", "washing", "machine", "is", "leaking", "octopus", "comforter", "smells"] } + - do: + ml.put_trained_model_definition_part: + model_id: "text_expansion_model" + part: 0 + body: > + { + "total_definition_length":2078, + "definition": "UEsDBAAACAgAAAAAAAAAAAAAAAAAAAAAAAAUAA4Ac2ltcGxlbW9kZWwvZGF0YS5wa2xGQgoAWlpaWlpaWlpaWoACY19fdG9yY2hfXwpUaW55VGV4dEV4cGFuc2lvbgpxACmBfShYCAAAAHRyYWluaW5ncQGJWBYAAABfaXNfZnVsbF9iYWNrd2FyZF9ob29rcQJOdWJxAy5QSwcIITmbsFgAAABYAAAAUEsDBBQACAgIAAAAAAAAAAAAAAAAAAAAAAAdAB0Ac2ltcGxlbW9kZWwvY29kZS9fX3RvcmNoX18ucHlGQhkAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWoWRT4+cMAzF7/spfASJomF3e0Ga3nrrn8vcELIyxAzRhAQlpjvbT19DWDrdquqBA/bvPT87nVUxwsm41xPd+PNtUi4a77KvXs+W8voBAHFSQY3EFCIiHKFp1+p57vs/ShyUccZdoIaz93aBTMR+thbPqru+qKBx8P4q/e8TyxRlmwVctJp66H1YmCyS7WsZwD50A2L5V7pCBADGTTOj0bGGE7noQyqzv5JDfp0o9fZRCWqP37yjhE4+mqX5X3AdFZHGM/2TzOHDpy1IvQWR+OWo3KwsRiKdpcqg4pBFDtm+QJ7nqwIPckrlnGfFJG0uNhOl38Sjut3pCqg26QuZy8BR9In7ScHHrKkKMW0TIucFrGQXCMpdaDO05O6DpOiy8e4kr0Ed/2YKOIhplW8gPr4ntygrd9ixpx3j9UZZVRagl2c6+imWUzBjuf5m+Ch7afphuvvW+r/0dsfn+2N9MZGb9+/SFtCYdhd83CMYp+mGy0LiKNs8y/eUuEA8B/d2z4dfUEsHCFSE3IaCAQAAIAMAAFBLAwQUAAgICAAAAAAAAAAAAAAAAAAAAAAAJwApAHNpbXBsZW1vZGVsL2NvZGUvX190b3JjaF9fLnB5LmRlYnVnX3BrbEZCJQBaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpahZHLbtNAFIZtp03rSVIuLRKXjdk5ojitKJsiFq24lem0KKSqpRIZt55gE9/GM+lNLFgx4i1Ys2aHhIBXgAVICNggHgNm6rqJN2BZGv36/v/MOWeea/Z5RVHurLfRUsfZXOnccx522itrd53O0vLqbaKYtsAKUe1pcege7hm9JNtzM8+kOOzNApIX0A3xBXE6YE7g0UWjg2OaZAJXbKvALOnj2GEHKc496ykLktgNt3Jz17hprCUxFqExe7YIpQkNpO1/kfHhPUdtUAdH2/gfmeYiIFW7IkM6IBP2wrDNbMe3Mjf2ksiK3Hjghg7F2DN9l/omZZl5Mmez2QRk0q4WUUB0+1oh9nDwxGdUXJdXPMRZQs352eGaRPV9s2lcMeZFGWBfKJJiw0YgbCMLBaRmXyy4flx6a667Fch55q05QOq2Jg2ANOyZwplhNsjiohVApo7aa21QnNGW5+4GXv8gxK1beBeHSRrhmLXWVh+0aBhErZ7bx1ejxMOhlR6QU4ycNqGyk8/yNGCWkwY7/RCD7UEQek4QszCgDJAzZtfErA0VqHBy9ugQP9pUfUmgCjVYgWNwHFbhBJyEOgSwBuuwARWZmoI6J9PwLfzEocpRpPrT8DP8wqHG0b4UX+E3DiscvRglXIoi81KKPwioHI5x9EooNKWiy0KOc/T6WF4SssrRuzJ9L2VNRXUhJzj6UKYfS4W/q/5wuh/l4M9R9qsU+y2dpoo2hJzkaEET8r6KRONicnRdK9EbUi6raFVIwNGjsrlbpk6ZPi7TbS3fv3LyNjPiEKzG0aG0tvNb6xw90/whe6ONjnJcUxobHDUqQ8bIOW79BVBLBwhfSmPKdAIAAE4EAABQSwMEAAAICAAAAAAAAAAAAAAAAAAAAAAAABkABQBzaW1wbGVtb2RlbC9jb25zdGFudHMucGtsRkIBAFqAAikuUEsHCG0vCVcEAAAABAAAAFBLAwQAAAgIAAAAAAAAAAAAAAAAAAAAAAAAEwA7AHNpbXBsZW1vZGVsL3ZlcnNpb25GQjcAWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWlpaWjMKUEsHCNGeZ1UCAAAAAgAAAFBLAQIAAAAACAgAAAAAAAAhOZuwWAAAAFgAAAAUAAAAAAAAAAAAAAAAAAAAAABzaW1wbGVtb2RlbC9kYXRhLnBrbFBLAQIAABQACAgIAAAAAABUhNyGggEAACADAAAdAAAAAAAAAAAAAAAAAKgAAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weVBLAQIAABQACAgIAAAAAABfSmPKdAIAAE4EAAAnAAAAAAAAAAAAAAAAAJICAABzaW1wbGVtb2RlbC9jb2RlL19fdG9yY2hfXy5weS5kZWJ1Z19wa2xQSwECAAAAAAgIAAAAAAAAbS8JVwQAAAAEAAAAGQAAAAAAAAAAAAAAAACEBQAAc2ltcGxlbW9kZWwvY29uc3RhbnRzLnBrbFBLAQIAAAAACAgAAAAAAADRnmdVAgAAAAIAAAATAAAAAAAAAAAAAAAAANQFAABzaW1wbGVtb2RlbC92ZXJzaW9uUEsGBiwAAAAAAAAAHgMtAAAAAAAAAAAABQAAAAAAAAAFAAAAAAAAAGoBAAAAAAAAUgYAAAAAAABQSwYHAAAAALwHAAAAAAAAAQAAAFBLBQYAAAAABQAFAGoBAABSBgAAAAA=", + "total_parts": 1 + } + - do: + bulk: + index: test-index-with-sparse-vector + refresh: true + body: | + {"index": {}} + {"source_text": "my words comforter", "ml.tokens":{"my":1.0, "words":1.0,"comforter":1.0}} + {"index": {}} + {"source_text": "the machine is leaking", "ml.tokens":{"the":1.0,"machine":1.0,"is":1.0,"leaking":1.0}} + {"index": {}} + {"source_text": "these are my words", "ml.tokens":{"these":1.0,"are":1.0,"my":1.0,"words":1.0}} + {"index": {}} + {"source_text": "the octopus comforter smells", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"smells":1.0}} + {"index": {}} + {"source_text": "the octopus comforter is leaking", "ml.tokens":{"the":1.0,"octopus":1.0,"comforter":1.0,"is":1.0,"leaking":1.0}} + {"index": {}} + {"source_text": "washing machine smells", "ml.tokens":{"washing":1.0,"machine":1.0,"smells":1.0}} + {"index": { "_id": "pinned_doc1" }} + {"source_text": "unrelated pinned doc", "ml.tokens":{"unrelated":1.0,"pinned":1.0,"doc":1.0}} + {"index": { "_id": "pinned_doc2" }} + {"source_text": "another unrelated pinned doc", "ml.tokens":{"another":1.0, "unrelated":1.0,"pinned":1.0,"doc":1.0}} + + - do: + ml.start_trained_model_deployment: + model_id: text_expansion_model + wait_for: started + + - do: + query_ruleset.put: + ruleset_id: combined-ruleset + body: + rules: + - rule_id: rule1 + type: pinned + criteria: + - type: exact + metadata: foo + values: [ bar ] + actions: + ids: + - 'pinned_doc1' + - rule_id: rule2 + type: pinned + criteria: + - type: exact + metadata: foo + values: [ baz ] + actions: + docs: + - '_index': 'test-index-with-sparse-vector' + '_id': 'pinned_doc2' + - do: + search: + body: + query: + rule_query: + organic: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + match_criteria: + foo: bar + ruleset_id: combined-ruleset + + - match: { hits.total.value: 5 } + - match: { hits.hits.0._id: 'pinned_doc1' } + + - do: + search: + body: + query: + rule_query: + organic: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + match_criteria: + foo: baz + ruleset_id: combined-ruleset + + - match: { hits.total.value: 5 } + - match: { hits.hits.0._id: 'pinned_doc2' } + + - do: + search: + body: + query: + rule_query: + organic: + text_expansion: + ml.tokens: + model_id: text_expansion_model + model_text: "octopus comforter smells" + match_criteria: + foo: puggle + ruleset_id: combined-ruleset + + - match: { hits.total.value: 4 } + diff --git a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/RuleQueryBuilder.java b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/RuleQueryBuilder.java index 3882b6c61bb2c..11d2945a97354 100644 --- a/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/RuleQueryBuilder.java +++ b/x-pack/plugin/ent-search/src/main/java/org/elasticsearch/xpack/application/rules/RuleQueryBuilder.java @@ -111,18 +111,6 @@ private RuleQueryBuilder( throw new IllegalArgumentException("rulesetId must not be null or empty"); } - // PinnedQueryBuilder will return an error if we attempt to return more than the maximum number of - // pinned hits. Here, we truncate matching rules rather than return an error. - if (pinnedIds != null && pinnedIds.size() > MAX_NUM_PINNED_HITS) { - HeaderWarning.addWarning("Truncating query rule pinned hits to " + MAX_NUM_PINNED_HITS + " documents"); - pinnedIds = pinnedIds.subList(0, MAX_NUM_PINNED_HITS); - } - - if (pinnedDocs != null && pinnedDocs.size() > MAX_NUM_PINNED_HITS) { - HeaderWarning.addWarning("Truncating query rule pinned hits to " + MAX_NUM_PINNED_HITS + " documents"); - pinnedDocs = pinnedDocs.subList(0, MAX_NUM_PINNED_HITS); - } - this.organicQuery = organicQuery; this.matchCriteria = matchCriteria; this.rulesetId = rulesetId; @@ -174,6 +162,9 @@ protected void doXContent(XContentBuilder builder, Params params) throws IOExcep @Override protected Query doToQuery(SearchExecutionContext context) throws IOException { + // NOTE: this is old query logic, as in 8.12.2+ and 8.13.0+ we will always rewrite this query + // into a pinned query or the organic query. This logic remains here for backwards compatibility + // with coordinator nodes running versions 8.10.0 - 8.12.1. if ((pinnedIds != null && pinnedIds.isEmpty() == false) && (pinnedDocs != null && pinnedDocs.isEmpty() == false)) { throw new IllegalArgumentException("applied rules contain both pinned ids and pinned docs, only one of ids or docs is allowed"); } @@ -187,21 +178,25 @@ protected Query doToQuery(SearchExecutionContext context) throws IOException { } else { return organicQuery.toQuery(context); } - } - @SuppressWarnings("unchecked") @Override - protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws IOException { - if (pinnedIds != null || pinnedDocs != null) { - return this; - } else if (pinnedIdsSupplier != null || pinnedDocsSupplier != null) { - List identifiedPinnedIds = pinnedIdsSupplier != null ? pinnedIdsSupplier.get() : null; - List identifiedPinnedDocs = pinnedDocsSupplier != null ? pinnedDocsSupplier.get() : null; - if (identifiedPinnedIds == null && identifiedPinnedDocs == null) { - return this; // not executed yet + protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) { + if (pinnedIdsSupplier != null && pinnedDocsSupplier != null) { + List identifiedPinnedIds = pinnedIdsSupplier.get(); + List identifiedPinnedDocs = pinnedDocsSupplier.get(); + if (identifiedPinnedIds == null || identifiedPinnedDocs == null) { + return this; // Not executed yet + } else if (identifiedPinnedIds.isEmpty() && identifiedPinnedDocs.isEmpty()) { + return organicQuery; // Nothing to pin here + } else if (identifiedPinnedIds.isEmpty() == false && identifiedPinnedDocs.isEmpty() == false) { + throw new IllegalArgumentException( + "applied rules contain both pinned ids and pinned docs, only one of ids or docs is allowed" + ); + } else if (identifiedPinnedIds.isEmpty() == false) { + return new PinnedQueryBuilder(organicQuery, truncateList(identifiedPinnedIds).toArray(new String[0])); } else { - return new RuleQueryBuilder(organicQuery, matchCriteria, rulesetId, identifiedPinnedIds, identifiedPinnedDocs, null, null); + return new PinnedQueryBuilder(organicQuery, truncateList(identifiedPinnedDocs).toArray(new Item[0])); } } @@ -244,18 +239,19 @@ public void onFailure(Exception e) { }); }); - QueryBuilder newOrganicQuery = organicQuery.rewrite(queryRewriteContext); - RuleQueryBuilder rewritten = new RuleQueryBuilder( - newOrganicQuery, - matchCriteria, - this.rulesetId, - null, - null, - pinnedIdsSetOnce::get, - pinnedDocsSetOnce::get - ); - rewritten.boost(this.boost); - return rewritten; + return new RuleQueryBuilder(organicQuery, matchCriteria, this.rulesetId, null, null, pinnedIdsSetOnce::get, pinnedDocsSetOnce::get) + .boost(this.boost) + .queryName(this.queryName); + } + + private List truncateList(List input) { + // PinnedQueryBuilder will return an error if we attempt to return more than the maximum number of + // pinned hits. Here, we truncate matching rules rather than return an error. + if (input.size() > MAX_NUM_PINNED_HITS) { + HeaderWarning.addWarning("Truncating query rule pinned hits to " + MAX_NUM_PINNED_HITS + " documents"); + return input.subList(0, MAX_NUM_PINNED_HITS); + } + return input; } @Override diff --git a/x-pack/plugin/ent-search/src/test/java/org/elasticsearch/xpack/application/rules/RuleQueryBuilderTests.java b/x-pack/plugin/ent-search/src/test/java/org/elasticsearch/xpack/application/rules/RuleQueryBuilderTests.java index 1d19011fdd4a9..d4ab8d8f8e6e8 100644 --- a/x-pack/plugin/ent-search/src/test/java/org/elasticsearch/xpack/application/rules/RuleQueryBuilderTests.java +++ b/x-pack/plugin/ent-search/src/test/java/org/elasticsearch/xpack/application/rules/RuleQueryBuilderTests.java @@ -31,6 +31,7 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xpack.application.LocalStateEnterpriseSearch; +import org.elasticsearch.xpack.searchbusinessrules.SearchBusinessRules; import org.hamcrest.Matchers; import java.io.IOException; @@ -62,7 +63,7 @@ protected void doAssertLuceneQuery(RuleQueryBuilder queryBuilder, Query query, S @Override protected Collection> getPlugins() { - return Collections.singletonList(LocalStateEnterpriseSearch.class); + return List.of(LocalStateEnterpriseSearch.class, SearchBusinessRules.class); } public void testIllegalArguments() {