Skip to content

Commit

Permalink
Fix index prefixes to work with span_multi (#31066)
Browse files Browse the repository at this point in the history
* Fix index prefixes to work with span_multi

Text fields that use `index_prefixes` can rewrite `prefix` queries into
`term` queries internally. This commit fix the handling of this rewriting
in the `span_multi` query.
This change also copies the index options of the text field into the
prefix field in order to be able to run positional queries. This is mandatory
for `span_multi` to work but this could also be useful to optimize `match_phrase_prefix`
queries in a follow up. Note that this change can only be done on indices created
after 6.3 since we set the index options to doc only in this version.

Fixes #31056
  • Loading branch information
jimczi committed Jun 4, 2018
1 parent 024400b commit f94a757
Show file tree
Hide file tree
Showing 6 changed files with 277 additions and 73 deletions.
8 changes: 8 additions & 0 deletions docs/reference/query-dsl/span-multi-term-query.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,11 @@ GET /_search
}
--------------------------------------------------
// CONSOLE

WARNING: By default `span_multi queries are rewritten to a `span_or` query
containing **all** the expanded terms. This can be expensive if the number of expanded
terms is large. To avoid an unbounded expansion you can set the
<<query-dsl-multi-term-rewrite,rewrite method>> of the multi term query to `top_terms_*`
rewrite. Or, if you use `span_multi` on `prefix` query only, you can
activate the <<index-prefix-config,`index_prefixes`>> field option of the `text` field instead. This will
rewrite any prefix query on the field to a a single term query that matches the indexed prefix.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
"search with index prefixes":
setup:
- skip:
version: " - 6.99.99"
version: " - 6.2.99"
reason: index_prefixes is only available as of 6.3.0

- do:
indices.create:
index: test
Expand All @@ -27,6 +27,11 @@
indices.refresh:
index: [test]

---
"search with index prefixes":
- skip:
version: " - 6.2.99"
reason: index_prefixes is only available as of 6.3.0
- do:
search:
index: test
Expand Down Expand Up @@ -57,3 +62,23 @@

- match: {hits.total: 1}
- match: {hits.hits.0._score: 1}

---
"search index prefixes with span_multi":
- skip:
version: " - 6.99.99"
reason: span_multi throws an exception with prefix fields on < versions

- do:
search:
index: test
body:
query:
span_near:
clauses: [
{ "span_term": { "text": "short" } },
{ "span_multi": { "match": { "prefix": { "text": "word" } } } }
]

- match: {hits.total: 1}

Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.settings.Settings;
Expand Down Expand Up @@ -175,7 +176,16 @@ public TextFieldMapper build(BuilderContext context) {
if (fieldType().isSearchable() == false) {
throw new IllegalArgumentException("Cannot set index_prefixes on unindexed field [" + name() + "]");
}
if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
// Copy the index options of the main field to allow phrase queries on
// the prefix field.
if (context.indexCreatedVersion().onOrAfter(Version.V_6_4_0)) {
if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS) {
// frequencies are not needed because prefix queries always use a constant score
prefixFieldType.setIndexOptions(IndexOptions.DOCS);
} else {
prefixFieldType.setIndexOptions(fieldType.indexOptions());
}
} else if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) {
prefixFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
}
if (fieldType.storeTermVectorOffsets()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,28 @@
*/
package org.elasticsearch.index.query;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.FieldMaskingSpanQuery;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.query.support.QueryParsers;

import java.io.IOException;
import java.util.Objects;
Expand Down Expand Up @@ -124,22 +134,67 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro
protected Query doToQuery(QueryShardContext context) throws IOException {
Query subQuery = multiTermQueryBuilder.toQuery(context);
float boost = AbstractQueryBuilder.DEFAULT_BOOST;
if (subQuery instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) subQuery;
subQuery = boostQuery.getQuery();
boost = boostQuery.getBoost();
while (true) {
if (subQuery instanceof ConstantScoreQuery) {
subQuery = ((ConstantScoreQuery) subQuery).getQuery();
boost = 1;
} else if (subQuery instanceof BoostQuery) {
BoostQuery boostQuery = (BoostQuery) subQuery;
subQuery = boostQuery.getQuery();
boost *= boostQuery.getBoost();
} else {
break;
}
}
//no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here
final SpanQuery spanQuery;
// no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here
assert subQuery instanceof SpanBoostQuery == false;
if (subQuery instanceof MultiTermQuery == false) {
throw new UnsupportedOperationException("unsupported inner query, should be " + MultiTermQuery.class.getName() +" but was "
+ subQuery.getClass().getName());
if (subQuery instanceof TermQuery) {
/**
* Text fields that index prefixes can rewrite prefix queries
* into term queries. See {@link TextFieldMapper.TextFieldType#prefixQuery}.
*/
if (multiTermQueryBuilder.getClass() != PrefixQueryBuilder.class) {
throw new UnsupportedOperationException("unsupported inner query generated by " +
multiTermQueryBuilder.getClass().getName() + ", should be " + MultiTermQuery.class.getName()
+ " but was " + subQuery.getClass().getName());
}
if (context.getIndexSettings().getIndexVersionCreated().before(Version.V_6_4_0)) {
/**
* Indices created in this version do not index positions on the prefix field
* so we cannot use it to match positional queries. Instead, we explicitly create the prefix
* query on the main field to avoid the rewrite.
*/
PrefixQueryBuilder prefixBuilder = (PrefixQueryBuilder) multiTermQueryBuilder;
PrefixQuery prefixQuery = new PrefixQuery(new Term(prefixBuilder.fieldName(), prefixBuilder.value()));
if (prefixBuilder.rewrite() != null) {
MultiTermQuery.RewriteMethod rewriteMethod =
QueryParsers.parseRewriteMethod(prefixBuilder.rewrite(), null, LoggingDeprecationHandler.INSTANCE);
prefixQuery.setRewriteMethod(rewriteMethod);
}
spanQuery = new SpanMultiTermQueryWrapper<>(prefixQuery);
} else {
String origFieldName = ((PrefixQueryBuilder) multiTermQueryBuilder).fieldName();
SpanTermQuery spanTermQuery = new SpanTermQuery(((TermQuery) subQuery).getTerm());
/**
* Prefixes are indexed in a different field so we mask the term query with the original field
* name. This is required because span_near and span_or queries don't work across different field.
* The masking is safe because the prefix field is indexed using the same content than the original field
* and the prefix analyzer preserves positions.
*/
spanQuery = new FieldMaskingSpanQuery(spanTermQuery, origFieldName);
}
} else {
if (subQuery instanceof MultiTermQuery == false) {
throw new UnsupportedOperationException("unsupported inner query, should be "
+ MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName());
}
spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
}
SpanQuery wrapper = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
wrapper = new SpanBoostQuery(wrapper, boost);
return new SpanBoostQuery(spanQuery, boost);
}
return wrapper;
return spanQuery;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
Expand Down Expand Up @@ -638,7 +639,7 @@ public void testIndexPrefixIndexTypes() throws IOException {
.field("type", "text")
.field("analyzer", "english")
.startObject("index_prefixes").endObject()
.field("index_options", "positions")
.field("index_options", "freqs")
.endObject().endObject().endObject().endObject());

DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));
Expand All @@ -649,6 +650,27 @@ public void testIndexPrefixIndexTypes() throws IOException {
assertFalse(ft.storeTermVectors());
}

{
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
.field("type", "text")
.field("analyzer", "english")
.startObject("index_prefixes").endObject()
.field("index_options", "positions")
.endObject().endObject().endObject().endObject());

DocumentMapper mapper = parser.parse("type", new CompressedXContent(mapping));

FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix");
FieldType ft = prefix.fieldType;
if (indexService.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) {
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, ft.indexOptions());
} else {
assertEquals(IndexOptions.DOCS, ft.indexOptions());
}
assertFalse(ft.storeTermVectors());
}

{
String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type")
.startObject("properties").startObject("field")
Expand All @@ -662,7 +684,11 @@ public void testIndexPrefixIndexTypes() throws IOException {

FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix");
FieldType ft = prefix.fieldType;
assertEquals(IndexOptions.DOCS, ft.indexOptions());
if (indexService.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) {
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, ft.indexOptions());
} else {
assertEquals(IndexOptions.DOCS, ft.indexOptions());
}
assertTrue(ft.storeTermVectorOffsets());
}

Expand All @@ -679,7 +705,11 @@ public void testIndexPrefixIndexTypes() throws IOException {

FieldMapper prefix = mapper.mappers().getMapper("field._index_prefix");
FieldType ft = prefix.fieldType;
assertEquals(IndexOptions.DOCS, ft.indexOptions());
if (indexService.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) {
assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, ft.indexOptions());
} else {
assertEquals(IndexOptions.DOCS, ft.indexOptions());
}
assertFalse(ft.storeTermVectorOffsets());
}
}
Expand Down
Loading

0 comments on commit f94a757

Please sign in to comment.