Skip to content

Commit

Permalink
Improve similarity integration. (elastic#29187)
Browse files Browse the repository at this point in the history
This improves the way similarities are plugged in in order to:
 - reject the classic similarity on 7.x indices and emit a deprecation
   warning otherwise
 - reject unkwown parameters on 7.x indices and emit a deprecation
   warning otherwise

Even though this breaks the plugin API, I'd like to backport to 7.x so
that users can get deprecation warnings when they are doing something
that will become unsupported in the future.

Closes elastic#23208
Closes elastic#29035
  • Loading branch information
jpountz committed Apr 4, 2018
1 parent 1fd448b commit 9e61f6d
Show file tree
Hide file tree
Showing 25 changed files with 484 additions and 819 deletions.
8 changes: 5 additions & 3 deletions docs/reference/index-modules/similarity.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ Type name: `BM25`
[[classic-similarity]]
==== Classic similarity

deprecated[6.3.0, The quality of the produced scores used to rely on coordination factors, which have been removed. It is advised to use BM25 instead.]

The classic similarity that is based on the TF/IDF model. This
similarity has the following option:

Expand All @@ -97,7 +99,7 @@ similarity has the following option:
Type name: `classic`

[float]
[[drf]]
[[dfr]]
==== DFR similarity

Similarity that implements the
Expand Down Expand Up @@ -510,7 +512,7 @@ PUT /index
"index": {
"similarity": {
"default": {
"type": "classic"
"type": "boolean"
}
}
}
Expand All @@ -532,7 +534,7 @@ PUT /index/_settings
"index": {
"similarity": {
"default": {
"type": "classic"
"type": "boolean"
}
}
}
Expand Down
9 changes: 2 additions & 7 deletions docs/reference/mapping/params/similarity.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,9 @@ PUT my_index
"default_field": { <1>
"type": "text"
},
"classic_field": {
"type": "text",
"similarity": "classic" <2>
},
"boolean_sim_field": {
"type": "text",
"similarity": "boolean" <3>
"similarity": "boolean" <2>
}
}
}
Expand All @@ -59,5 +55,4 @@ PUT my_index
--------------------------------------------------
// CONSOLE
<1> The `default_field` uses the `BM25` similarity.
<2> The `classic_field` uses the `classic` similarity (ie TF/IDF).
<3> The `boolean_sim_field` uses the `boolean` similarity.
<2> The `boolean_sim_field` uses the `boolean` similarity.
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ protected Settings indexSettings() {

@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
similarity = randomFrom("classic", "BM25");
similarity = randomFrom("boolean", "BM25");
XContentBuilder mapping = jsonBuilder().startObject().startObject("_doc").startObject("properties")
.startObject("join_field")
.field("type", "join")
Expand Down Expand Up @@ -336,9 +336,7 @@ public void testNonDefaultSimilarity() throws Exception {
hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
.create(similarity, Settings.EMPTY,
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
.get();
.apply(Settings.EMPTY, Version.CURRENT, null);
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {

@Override
protected void initializeAdditionalMappings(MapperService mapperService) throws IOException {
similarity = randomFrom("classic", "BM25");
similarity = randomFrom("boolean", "BM25");
// TODO: use a single type when inner hits have been changed to work with join field,
// this test randomly generates queries with inner hits
mapperService.merge(PARENT_TYPE, new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(PARENT_TYPE,
Expand Down Expand Up @@ -323,9 +323,7 @@ public void testNonDefaultSimilarity() throws Exception {
hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None);
HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext);
Similarity expected = SimilarityService.BUILT_IN.get(similarity)
.create(similarity, Settings.EMPTY,
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null)
.get();
.apply(Settings.EMPTY, Version.CURRENT, null);
assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass()));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@

import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.Version;
import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.settings.IndexScopedSettings;
import org.elasticsearch.common.settings.Settings;
Expand All @@ -31,8 +33,8 @@
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.indices.mapper.MapperRegistry;
import org.elasticsearch.script.ScriptService;

import java.util.AbstractMap;
import java.util.Collection;
Expand Down Expand Up @@ -141,22 +143,24 @@ private void checkMappingsCompatibility(IndexMetaData indexMetaData) {
// actual upgrade.

IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings);
final Map<String, SimilarityProvider.Factory> similarityMap = new AbstractMap<String, SimilarityProvider.Factory>() {

final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarityMap
= new AbstractMap<String, TriFunction<Settings, Version, ScriptService, Similarity>>() {
@Override
public boolean containsKey(Object key) {
return true;
}

@Override
public SimilarityProvider.Factory get(Object key) {
public TriFunction<Settings, Version, ScriptService, Similarity> get(Object key) {
assert key instanceof String : "key must be a string but was: " + key.getClass();
return SimilarityService.BUILT_IN.get(SimilarityService.DEFAULT_SIMILARITY);
}

// this entrySet impl isn't fully correct but necessary as SimilarityService will iterate
// over all similarities
@Override
public Set<Entry<String, SimilarityProvider.Factory>> entrySet() {
public Set<Entry<String, TriFunction<Settings, Version, ScriptService, Similarity>>> entrySet() {
return Collections.emptySet();
}
};
Expand Down
24 changes: 15 additions & 9 deletions server/src/main/java/org/elasticsearch/index/IndexModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,13 @@

package org.elasticsearch.index;

import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.util.SetOnce;
import org.elasticsearch.Version;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.TriFunction;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.settings.Setting;
import org.elasticsearch.common.settings.Setting.Property;
Expand All @@ -39,9 +43,6 @@
import org.elasticsearch.index.shard.IndexSearcherWrapper;
import org.elasticsearch.index.shard.IndexingOperationListener;
import org.elasticsearch.index.shard.SearchOperationListener;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.similarity.BM25SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.index.store.IndexStore;
import org.elasticsearch.indices.IndicesQueryCache;
Expand All @@ -68,10 +69,10 @@
/**
* IndexModule represents the central extension point for index level custom implementations like:
* <ul>
* <li>{@link SimilarityProvider} - New {@link SimilarityProvider} implementations can be registered through
* {@link #addSimilarity(String, SimilarityProvider.Factory)} while existing Providers can be referenced through Settings under the
* <li>{@link Similarity} - New {@link Similarity} implementations can be registered through
* {@link #addSimilarity(String, TriFunction)} while existing Providers can be referenced through Settings under the
* {@link IndexModule#SIMILARITY_SETTINGS_PREFIX} prefix along with the "type" value. For example, to reference the
* {@link BM25SimilarityProvider}, the configuration <tt>"index.similarity.my_similarity.type : "BM25"</tt> can be used.</li>
* {@link BM25Similarity}, the configuration <tt>"index.similarity.my_similarity.type : "BM25"</tt> can be used.</li>
* <li>{@link IndexStore} - Custom {@link IndexStore} instances can be registered via {@link #addIndexStore(String, Function)}</li>
* <li>{@link IndexEventListener} - Custom {@link IndexEventListener} instances can be registered via
* {@link #addIndexEventListener(IndexEventListener)}</li>
Expand Down Expand Up @@ -107,7 +108,7 @@ public final class IndexModule {
final SetOnce<EngineFactory> engineFactory = new SetOnce<>();
private SetOnce<IndexSearcherWrapperFactory> indexSearcherWrapper = new SetOnce<>();
private final Set<IndexEventListener> indexEventListeners = new HashSet<>();
private final Map<String, SimilarityProvider.Factory> similarities = new HashMap<>();
private final Map<String, TriFunction<Settings, Version, ScriptService, Similarity>> similarities = new HashMap<>();
private final Map<String, Function<IndexSettings, IndexStore>> storeTypes = new HashMap<>();
private final SetOnce<BiFunction<IndexSettings, IndicesQueryCache, QueryCache>> forceQueryCacheProvider = new SetOnce<>();
private final List<SearchOperationListener> searchOperationListeners = new ArrayList<>();
Expand Down Expand Up @@ -246,12 +247,17 @@ public void addIndexStore(String type, Function<IndexSettings, IndexStore> provi


/**
* Registers the given {@link SimilarityProvider} with the given name
* Registers the given {@link Similarity} with the given name.
* The function takes as parameters:<ul>
* <li>settings for this similarity
* <li>version of Elasticsearch when the index was created
* <li>ScriptService, for script-based similarities
* </ul>
*
* @param name Name of the SimilarityProvider
* @param similarity SimilarityProvider to register
*/
public void addSimilarity(String name, SimilarityProvider.Factory similarity) {
public void addSimilarity(String name, TriFunction<Settings, Version, ScriptService, Similarity> similarity) {
ensureNotFrozen();
if (similarities.containsKey(name) || SimilarityService.BUILT_IN.containsKey(name)) {
throw new IllegalArgumentException("similarity for name: [" + name + " is already registered");
Expand Down

This file was deleted.

This file was deleted.

Loading

0 comments on commit 9e61f6d

Please sign in to comment.