Skip to content

Commit

Permalink
Improve similarity integration.
Browse files Browse the repository at this point in the history
This improves the way similarities are plugged in in order to:
 - reject the classic similarity on 7.x indices and emit a deprecation
   warning otherwise
 - reject unkwown parameters on 7.x indices and emit a deprecation
   warning otherwise

Even though this breaks the plugin API, I'd like to backport to 7.x so
that users can get deprecation warnings when they are doing something
that will become unsupported in the future.

Closes #23208
Closes #29035
  • Loading branch information
jpountz committed Mar 21, 2018
1 parent 2f6c773 commit 6737bf5
Show file tree
Hide file tree
Showing 26 changed files with 502 additions and 806 deletions.
18 changes: 2 additions & 16 deletions docs/reference/index-modules/similarity.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,6 @@ This similarity has the following options:

Type name: `BM25`

[float]
[[classic-similarity]]
==== Classic similarity

The classic similarity that is based on the TF/IDF model. This
similarity has the following option:

`discount_overlaps`::
Determines whether overlap tokens (Tokens with
0 position increment) are ignored when computing norm. By default this
is true, meaning overlap tokens do not count when computing norms.

Type name: `classic`

[float]
[[dfr]]
==== DFR similarity
Expand Down Expand Up @@ -541,7 +527,7 @@ PUT /index
"index": {
"similarity": {
"default": {
"type": "classic"
"type": "boolean"
}
}
}
Expand All @@ -563,7 +549,7 @@ PUT /index/_settings
"index": {
"similarity": {
"default": {
"type": "classic"
"type": "boolean"
}
}
}
Expand Down
9 changes: 2 additions & 7 deletions docs/reference/mapping/params/similarity.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,9 @@ PUT my_index
"default_field": { <1>
"type": "text"
},
"classic_field": {
"type": "text",
"similarity": "classic" <2>
},
"boolean_sim_field": {
"type": "text",
"similarity": "boolean" <3>
"similarity": "boolean" <2>
}
}
}
Expand All @@ -59,5 +55,4 @@ PUT my_index
--------------------------------------------------
// CONSOLE
<1> The `default_field` uses the `BM25` similarity.
<2> The `classic_field` uses the `classic` similarity (ie TF/IDF).
<3> The `boolean_sim_field` uses the `boolean` similarity.
<2> The `boolean_sim_field` uses the `boolean` similarity.
13 changes: 13 additions & 0 deletions docs/reference/migration/migrate_7_0/mappings.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,16 @@ the index setting `index.mapping.nested_objects.limit`.
==== The `update_all_types` option has been removed

This option is useless now that all indices have at most one type.

=== The `classic` similarity has been removed

The `classic` similarity relied on coordination factors for scoring to be good
in presence of stopwords in the query. This feature has been removed from
Lucene, which means that the `classic` similarity now produces scores of lower
quality. It is advised to switch to `BM25` instead, which is widely accepted
as a better alternative.

=== Similarities fail when unsupported options are provided

An error will now be thrown when unknown configuration options are provided
to similarities. Such unknown parameters were ignored before.
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,7 @@ public void testNonDefaultSimilarity() throws Exception {
hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
.create(similarity, Settings.EMPTY,
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
.get();
.apply(Settings.EMPTY, Version.CURRENT, null);
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -323,9 +323,7 @@ public void testNonDefaultSimilarity() throws Exception {
hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
.create(similarity, Settings.EMPTY,
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
.get();
.apply(Settings.EMPTY, Version.CURRENT, null);
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.logging.log4j.util.Supplier;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.Version;
import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.settings.IndexScopedSettings;
import org.elasticsearch.common.settings.Settings;
Expand All @@ -32,8 +34,8 @@
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.indices.mapper.MapperRegistry;
import org.elasticsearch.script.ScriptService;

import java.util.AbstractMap;
import java.util.Collection;
Expand Down Expand Up @@ -143,22 +145,23 @@ private void checkMappingsCompatibility(IndexMetaData indexMetaData) {

IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings);

final Map<String, SimilarityProvider.Factory> similarityMap = new AbstractMap<String, SimilarityProvider.Factory>() {
final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarityMap
= new AbstractMap<String, TriFunction<Settings, Version, ScriptService, Similarity>>() {
@Override
public boolean containsKey(Object key) {
return true;
}

@Override
public SimilarityProvider.Factory get(Object key) {
public TriFunction<Settings, Version, ScriptService, Similarity> get(Object key) {
assert key instanceof String : "key must be a string but was: " + key.getClass();
return SimilarityService.BUILT_IN.get(SimilarityService.DEFAULT_SIMILARITY);
}

// this entrySet impl isn't fully correct but necessary as SimilarityService will iterate
// over all similarities
@Override
public Set<Entry<String, SimilarityProvider.Factory>> entrySet() {
public Set<Entry<String, TriFunction<Settings, Version, ScriptService, Similarity>>> entrySet() {
return Collections.emptySet();
}
};
Expand Down
8 changes: 5 additions & 3 deletions server/src/main/java/org/elasticsearch/index/IndexModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@

package org.elasticsearch.index;

import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.Version;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
Expand All @@ -40,7 +43,6 @@
import org.elasticsearch.index.shard.IndexingOperationListener;
import org.elasticsearch.index.shard.SearchOperationListener;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.index.store.IndexStore;
Expand Down Expand Up @@ -107,7 +109,7 @@ public final class IndexModule {
final SetOnce<EngineFactory> engineFactory = new SetOnce<>();
private SetOnce<IndexSearcherWrapperFactory> indexSearcherWrapper = new SetOnce<>();
private final Set<IndexEventListener> indexEventListeners = new HashSet<>();
private final Map<String, SimilarityProvider.Factory> similarities = new HashMap<>();
private final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarities = new HashMap<>();
private final Map<String, Function<IndexSettings, IndexStore>> storeTypes = new HashMap<>();
private final SetOnce<BiFunction<IndexSettings, IndicesQueryCache, QueryCache>> forceQueryCacheProvider = new SetOnce<>();
private final List<SearchOperationListener> searchOperationListeners = new ArrayList<>();
Expand Down Expand Up @@ -251,7 +253,7 @@ public void addIndexStore(String type, Function<IndexSettings, IndexStore> provi
* @param name Name of the SimilarityProvider
* @param similarity SimilarityProvider to register
*/
public void addSimilarity(String name, SimilarityProvider.Factory similarity) {
public void addSimilarity(String name, TriFunction<Settings, Version, ScriptService, Similarity> similarity) {
ensureNotFrozen();
if (similarities.containsKey(name) || SimilarityService.BUILT_IN.containsKey(name)) {
throw new IllegalArgumentException("similarity for name: [" + name + " is already registered");
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

Loading

0 comments on commit 6737bf5

Please sign in to comment.