From 6737bf55c8832d0c57da424acbcbb344ad7e422c Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 20 Mar 2018 18:50:59 +0100 Subject: [PATCH] Improve similarity integration. This improves the way similarities are plugged in in order to: - reject the classic similarity on 7.x indices and emit a deprecation warning otherwise - reject unkwown parameters on 7.x indices and emit a deprecation warning otherwise Even though this breaks the plugin API, I'd like to backport to 7.x so that users can get deprecation warnings when they are doing something that will become unsupported in the future. Closes #23208 Closes #29035 --- .../index-modules/similarity.asciidoc | 18 +- .../mapping/params/similarity.asciidoc | 9 +- .../migration/migrate_7_0/mappings.asciidoc | 13 + .../join/query/HasChildQueryBuilderTests.java | 4 +- .../LegacyHasChildQueryBuilderTests.java | 4 +- .../metadata/MetaDataIndexUpgradeService.java | 11 +- .../org/elasticsearch/index/IndexModule.java | 8 +- .../AbstractSimilarityProvider.java | 82 ----- .../similarity/BM25SimilarityProvider.java | 59 ---- .../similarity/BooleanSimilarityProvider.java | 48 --- .../similarity/ClassicSimilarityProvider.java | 52 --- .../similarity/DFISimilarityProvider.java | 79 ----- .../similarity/DFRSimilarityProvider.java | 123 ------- .../similarity/IBSimilarityProvider.java | 113 ------- .../LMDirichletSimilarityProvider.java | 52 --- .../LMJelinekMercerSimilarityProvider.java | 52 --- .../ScriptedSimilarityProvider.java | 19 +- .../index/similarity/SimilarityProvider.java | 36 +-- .../index/similarity/SimilarityProviders.java | 300 ++++++++++++++++++ .../index/similarity/SimilarityService.java | 122 ++++--- .../elasticsearch/index/IndexModuleTests.java | 14 +- .../similarity/SimilarityServiceTests.java | 6 +- .../index/similarity/SimilarityTests.java | 59 +++- .../indices/IndicesServiceTests.java | 8 +- .../similarity/SimilarityIT.java | 8 +- .../index/mapper/FieldTypeTestCase.java | 9 +- 26 files changed, 502 insertions(+), 806 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java delete mode 100644 server/src/main/java/org/elasticsearch/index/similarity/BM25SimilarityProvider.java delete mode 100644 server/src/main/java/org/elasticsearch/index/similarity/BooleanSimilarityProvider.java delete mode 100644 server/src/main/java/org/elasticsearch/index/similarity/ClassicSimilarityProvider.java delete mode 100644 server/src/main/java/org/elasticsearch/index/similarity/DFISimilarityProvider.java delete mode 100644 server/src/main/java/org/elasticsearch/index/similarity/DFRSimilarityProvider.java delete mode 100644 server/src/main/java/org/elasticsearch/index/similarity/IBSimilarityProvider.java delete mode 100644 server/src/main/java/org/elasticsearch/index/similarity/LMDirichletSimilarityProvider.java delete mode 100644 server/src/main/java/org/elasticsearch/index/similarity/LMJelinekMercerSimilarityProvider.java create mode 100644 server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java diff --git a/docs/reference/index-modules/similarity.asciidoc b/docs/reference/index-modules/similarity.asciidoc index d0fd5dd399867..40f7de90c0ab2 100644 --- a/docs/reference/index-modules/similarity.asciidoc +++ b/docs/reference/index-modules/similarity.asciidoc @@ -82,20 +82,6 @@ This similarity has the following options: Type name: `BM25` -[float] -[[classic-similarity]] -==== Classic similarity - -The classic similarity that is based on the TF/IDF model. This -similarity has the following option: - -`discount_overlaps`:: - Determines whether overlap tokens (Tokens with - 0 position increment) are ignored when computing norm. By default this - is true, meaning overlap tokens do not count when computing norms. - -Type name: `classic` - [float] [[dfr]] ==== DFR similarity @@ -541,7 +527,7 @@ PUT /index "index": { "similarity": { "default": { - "type": "classic" + "type": "boolean" } } } @@ -563,7 +549,7 @@ PUT /index/_settings "index": { "similarity": { "default": { - "type": "classic" + "type": "boolean" } } } diff --git a/docs/reference/mapping/params/similarity.asciidoc b/docs/reference/mapping/params/similarity.asciidoc index 3509cd0cf8eb5..a0be0fb3ccbeb 100644 --- a/docs/reference/mapping/params/similarity.asciidoc +++ b/docs/reference/mapping/params/similarity.asciidoc @@ -44,13 +44,9 @@ PUT my_index "default_field": { <1> "type": "text" }, - "classic_field": { - "type": "text", - "similarity": "classic" <2> - }, "boolean_sim_field": { "type": "text", - "similarity": "boolean" <3> + "similarity": "boolean" <2> } } } @@ -59,5 +55,4 @@ PUT my_index -------------------------------------------------- // CONSOLE <1> The `default_field` uses the `BM25` similarity. -<2> The `classic_field` uses the `classic` similarity (ie TF/IDF). -<3> The `boolean_sim_field` uses the `boolean` similarity. +<2> The `boolean_sim_field` uses the `boolean` similarity. diff --git a/docs/reference/migration/migrate_7_0/mappings.asciidoc b/docs/reference/migration/migrate_7_0/mappings.asciidoc index 8f1474aa57cbe..b0ab90546c3a8 100644 --- a/docs/reference/migration/migrate_7_0/mappings.asciidoc +++ b/docs/reference/migration/migrate_7_0/mappings.asciidoc @@ -24,3 +24,16 @@ the index setting `index.mapping.nested_objects.limit`. ==== The `update_all_types` option has been removed This option is useless now that all indices have at most one type. + +=== The `classic` similarity has been removed + +The `classic` similarity relied on coordination factors for scoring to be good +in presence of stopwords in the query. This feature has been removed from +Lucene, which means that the `classic` similarity now produces scores of lower +quality. It is advised to switch to `BM25` instead, which is widely accepted +as a better alternative. + +=== Similarities fail when unsupported options are provided + +An error will now be thrown when unknown configuration options are provided +to similarities. Such unknown parameters were ignored before. diff --git a/modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java b/modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java index 0dcf5933f4f23..8172aaf759fda 100644 --- a/modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java +++ b/modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java @@ -337,9 +337,7 @@ public void testNonDefaultSimilarity() throws Exception { hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None); HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext); Similarity expected = SimilarityService.BUILT_IN.get(similarity) - .create(similarity, Settings.EMPTY, - Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null) - .get(); + .apply(Settings.EMPTY, Version.CURRENT, null); assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass())); } diff --git a/modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java b/modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java index a52cc1db3d088..2c4338b707bad 100644 --- a/modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java +++ b/modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java @@ -323,9 +323,7 @@ public void testNonDefaultSimilarity() throws Exception { hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None); HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext); Similarity expected = SimilarityService.BUILT_IN.get(similarity) - .create(similarity, Settings.EMPTY, - Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null) - .get(); + .apply(Settings.EMPTY, Version.CURRENT, null); assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass())); } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java index a9301056f5ae0..a4d7cfadf2730 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java @@ -21,7 +21,9 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.logging.log4j.util.Supplier; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.Version; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.settings.IndexScopedSettings; import org.elasticsearch.common.settings.Settings; @@ -32,8 +34,8 @@ import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.similarity.SimilarityService; -import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.indices.mapper.MapperRegistry; +import org.elasticsearch.script.ScriptService; import java.util.AbstractMap; import java.util.Collection; @@ -143,14 +145,15 @@ private void checkMappingsCompatibility(IndexMetaData indexMetaData) { IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings); - final Map similarityMap = new AbstractMap() { + final Map> similarityMap + = new AbstractMap>() { @Override public boolean containsKey(Object key) { return true; } @Override - public SimilarityProvider.Factory get(Object key) { + public TriFunction get(Object key) { assert key instanceof String : "key must be a string but was: " + key.getClass(); return SimilarityService.BUILT_IN.get(SimilarityService.DEFAULT_SIMILARITY); } @@ -158,7 +161,7 @@ public SimilarityProvider.Factory get(Object key) { // this entrySet impl isn't fully correct but necessary as SimilarityService will iterate // over all similarities @Override - public Set> entrySet() { + public Set>> entrySet() { return Collections.emptySet(); } }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexModule.java b/server/src/main/java/org/elasticsearch/index/IndexModule.java index 869f8c9ca72db..9947a0c686dbe 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexModule.java +++ b/server/src/main/java/org/elasticsearch/index/IndexModule.java @@ -19,9 +19,12 @@ package org.elasticsearch.index; +import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.SetOnce; +import org.elasticsearch.Version; import org.elasticsearch.client.Client; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; @@ -40,7 +43,6 @@ import org.elasticsearch.index.shard.IndexingOperationListener; import org.elasticsearch.index.shard.SearchOperationListener; import org.elasticsearch.index.shard.ShardId; -import org.elasticsearch.index.similarity.BM25SimilarityProvider; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.store.IndexStore; @@ -107,7 +109,7 @@ public final class IndexModule { final SetOnce engineFactory = new SetOnce<>(); private SetOnce indexSearcherWrapper = new SetOnce<>(); private final Set indexEventListeners = new HashSet<>(); - private final Map similarities = new HashMap<>(); + private final Map> similarities = new HashMap<>(); private final Map> storeTypes = new HashMap<>(); private final SetOnce> forceQueryCacheProvider = new SetOnce<>(); private final List searchOperationListeners = new ArrayList<>(); @@ -251,7 +253,7 @@ public void addIndexStore(String type, Function provi * @param name Name of the SimilarityProvider * @param similarity SimilarityProvider to register */ - public void addSimilarity(String name, SimilarityProvider.Factory similarity) { + public void addSimilarity(String name, TriFunction similarity) { ensureNotFrozen(); if (similarities.containsKey(name) || SimilarityService.BUILT_IN.containsKey(name)) { throw new IllegalArgumentException("similarity for name: [" + name + " is already registered"); diff --git a/server/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java deleted file mode 100644 index fef43d6f5deaf..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.Normalization; -import org.apache.lucene.search.similarities.NormalizationH1; -import org.apache.lucene.search.similarities.NormalizationH2; -import org.apache.lucene.search.similarities.NormalizationH3; -import org.apache.lucene.search.similarities.NormalizationZ; -import org.elasticsearch.common.settings.Settings; - -/** - * Abstract implementation of {@link SimilarityProvider} providing common behaviour - */ -public abstract class AbstractSimilarityProvider implements SimilarityProvider { - - protected static final Normalization NO_NORMALIZATION = new Normalization.NoNormalization(); - - private final String name; - - /** - * Creates a new AbstractSimilarityProvider with the given name - * - * @param name Name of the Provider - */ - protected AbstractSimilarityProvider(String name) { - this.name = name; - } - - /** - * {@inheritDoc} - */ - @Override - public String name() { - return this.name; - } - - /** - * Parses the given Settings and creates the appropriate {@link Normalization} - * - * @param settings Settings to parse - * @return {@link Normalization} referred to in the Settings - */ - protected Normalization parseNormalization(Settings settings) { - String normalization = settings.get("normalization"); - - if ("no".equals(normalization)) { - return NO_NORMALIZATION; - } else if ("h1".equals(normalization)) { - float c = settings.getAsFloat("normalization.h1.c", 1f); - return new NormalizationH1(c); - } else if ("h2".equals(normalization)) { - float c = settings.getAsFloat("normalization.h2.c", 1f); - return new NormalizationH2(c); - } else if ("h3".equals(normalization)) { - float c = settings.getAsFloat("normalization.h3.c", 800f); - return new NormalizationH3(c); - } else if ("z".equals(normalization)) { - float z = settings.getAsFloat("normalization.z.z", 0.30f); - return new NormalizationZ(z); - } else { - throw new IllegalArgumentException("Unsupported Normalization [" + normalization + "]"); - } - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/BM25SimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/BM25SimilarityProvider.java deleted file mode 100644 index ad49e7e9cc901..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/BM25SimilarityProvider.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.BM25Similarity; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for the {@link BM25Similarity}. - *

- * Configuration options available: - *

    - *
  • k1
  • - *
  • b
  • - *
  • discount_overlaps
  • - *
- * @see BM25Similarity For more information about configuration - */ -public class BM25SimilarityProvider extends AbstractSimilarityProvider { - - private final BM25Similarity similarity; - - public BM25SimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - float k1 = settings.getAsFloat("k1", 1.2f); - float b = settings.getAsFloat("b", 0.75f); - boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true); - - this.similarity = new BM25Similarity(k1, b); - this.similarity.setDiscountOverlaps(discountOverlaps); - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } - -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/BooleanSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/BooleanSimilarityProvider.java deleted file mode 100644 index e5db045f3716f..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/BooleanSimilarityProvider.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.BooleanSimilarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for the {@link BooleanSimilarity}, - * which is a simple similarity that gives terms a score equal - * to their query boost only. This is useful in situations where - * a field does not need to be scored by a full-text ranking - * algorithm, but rather all that matters is whether the query - * terms matched or not. - */ -public class BooleanSimilarityProvider extends AbstractSimilarityProvider { - - private final BooleanSimilarity similarity = new BooleanSimilarity(); - - public BooleanSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - } - - /** - * {@inheritDoc} - */ - @Override - public BooleanSimilarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/ClassicSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/ClassicSimilarityProvider.java deleted file mode 100644 index 419321996a301..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/ClassicSimilarityProvider.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.ClassicSimilarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for {@link ClassicSimilarity}. - *

- * Configuration options available: - *

    - *
  • discount_overlaps
  • - *
- * @see ClassicSimilarity For more information about configuration - */ -public class ClassicSimilarityProvider extends AbstractSimilarityProvider { - - private final ClassicSimilarity similarity = new ClassicSimilarity(); - - public ClassicSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true); - this.similarity.setDiscountOverlaps(discountOverlaps); - } - - /** - * {@inheritDoc} - */ - @Override - public ClassicSimilarity get() { - return similarity; - } - -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/DFISimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/DFISimilarityProvider.java deleted file mode 100644 index 324314b2669b2..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/DFISimilarityProvider.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.DFISimilarity; -import org.apache.lucene.search.similarities.Independence; -import org.apache.lucene.search.similarities.IndependenceChiSquared; -import org.apache.lucene.search.similarities.IndependenceSaturated; -import org.apache.lucene.search.similarities.IndependenceStandardized; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -import java.util.HashMap; -import java.util.Map; - -import static java.util.Collections.unmodifiableMap; - -/** - * {@link SimilarityProvider} for the {@link DFISimilarity}. - *

- * Configuration options available: - *

    - *
  • independence_measure
  • - *
  • discount_overlaps
  • - *
- * @see DFISimilarity For more information about configuration - */ -public class DFISimilarityProvider extends AbstractSimilarityProvider { - // the "basic models" of divergence from independence - private static final Map INDEPENDENCE_MEASURES; - static { - Map measures = new HashMap<>(); - measures.put("standardized", new IndependenceStandardized()); - measures.put("saturated", new IndependenceSaturated()); - measures.put("chisquared", new IndependenceChiSquared()); - INDEPENDENCE_MEASURES = unmodifiableMap(measures); - } - - private final DFISimilarity similarity; - - public DFISimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true); - Independence measure = parseIndependence(settings); - this.similarity = new DFISimilarity(measure); - this.similarity.setDiscountOverlaps(discountOverlaps); - } - - private Independence parseIndependence(Settings settings) { - String name = settings.get("independence_measure"); - Independence measure = INDEPENDENCE_MEASURES.get(name); - if (measure == null) { - throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "]"); - } - return measure; - } - - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/DFRSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/DFRSimilarityProvider.java deleted file mode 100644 index 0d47e86da0182..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/DFRSimilarityProvider.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.AfterEffect; -import org.apache.lucene.search.similarities.AfterEffectB; -import org.apache.lucene.search.similarities.AfterEffectL; -import org.apache.lucene.search.similarities.BasicModel; -import org.apache.lucene.search.similarities.BasicModelBE; -import org.apache.lucene.search.similarities.BasicModelD; -import org.apache.lucene.search.similarities.BasicModelG; -import org.apache.lucene.search.similarities.BasicModelIF; -import org.apache.lucene.search.similarities.BasicModelIn; -import org.apache.lucene.search.similarities.BasicModelIne; -import org.apache.lucene.search.similarities.BasicModelP; -import org.apache.lucene.search.similarities.DFRSimilarity; -import org.apache.lucene.search.similarities.Normalization; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -import java.util.HashMap; -import java.util.Map; - -import static java.util.Collections.unmodifiableMap; - -/** - * {@link SimilarityProvider} for {@link DFRSimilarity}. - *

- * Configuration options available: - *

    - *
  • basic_model
  • - *
  • after_effect
  • - *
  • normalization
  • - *
- * @see DFRSimilarity For more information about configuration - */ -public class DFRSimilarityProvider extends AbstractSimilarityProvider { - private static final Map BASIC_MODELS; - private static final Map AFTER_EFFECTS; - - static { - Map models = new HashMap<>(); - models.put("be", new BasicModelBE()); - models.put("d", new BasicModelD()); - models.put("g", new BasicModelG()); - models.put("if", new BasicModelIF()); - models.put("in", new BasicModelIn()); - models.put("ine", new BasicModelIne()); - models.put("p", new BasicModelP()); - BASIC_MODELS = unmodifiableMap(models); - - Map effects = new HashMap<>(); - effects.put("no", new AfterEffect.NoAfterEffect()); - effects.put("b", new AfterEffectB()); - effects.put("l", new AfterEffectL()); - AFTER_EFFECTS = unmodifiableMap(effects); - } - - private final DFRSimilarity similarity; - - public DFRSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - BasicModel basicModel = parseBasicModel(settings); - AfterEffect afterEffect = parseAfterEffect(settings); - Normalization normalization = parseNormalization(settings); - this.similarity = new DFRSimilarity(basicModel, afterEffect, normalization); - } - - /** - * Parses the given Settings and creates the appropriate {@link BasicModel} - * - * @param settings Settings to parse - * @return {@link BasicModel} referred to in the Settings - */ - protected BasicModel parseBasicModel(Settings settings) { - String basicModel = settings.get("basic_model"); - BasicModel model = BASIC_MODELS.get(basicModel); - if (model == null) { - throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "]"); - } - return model; - } - - /** - * Parses the given Settings and creates the appropriate {@link AfterEffect} - * - * @param settings Settings to parse - * @return {@link AfterEffect} referred to in the Settings - */ - protected AfterEffect parseAfterEffect(Settings settings) { - String afterEffect = settings.get("after_effect"); - AfterEffect effect = AFTER_EFFECTS.get(afterEffect); - if (effect == null) { - throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "]"); - } - return effect; - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/IBSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/IBSimilarityProvider.java deleted file mode 100644 index a43276bbfaa82..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/IBSimilarityProvider.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.Distribution; -import org.apache.lucene.search.similarities.DistributionLL; -import org.apache.lucene.search.similarities.DistributionSPL; -import org.apache.lucene.search.similarities.IBSimilarity; -import org.apache.lucene.search.similarities.Lambda; -import org.apache.lucene.search.similarities.LambdaDF; -import org.apache.lucene.search.similarities.LambdaTTF; -import org.apache.lucene.search.similarities.Normalization; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -import java.util.HashMap; -import java.util.Map; - -import static java.util.Collections.unmodifiableMap; - -/** - * {@link SimilarityProvider} for {@link IBSimilarity}. - *

- * Configuration options available: - *

    - *
  • distribution
  • - *
  • lambda
  • - *
  • normalization
  • - *
- * @see IBSimilarity For more information about configuration - */ -public class IBSimilarityProvider extends AbstractSimilarityProvider { - - private static final Map DISTRIBUTIONS; - private static final Map LAMBDAS; - - static { - Map distributions = new HashMap<>(); - distributions.put("ll", new DistributionLL()); - distributions.put("spl", new DistributionSPL()); - DISTRIBUTIONS = unmodifiableMap(distributions); - - Map lamdas = new HashMap<>(); - lamdas.put("df", new LambdaDF()); - lamdas.put("ttf", new LambdaTTF()); - LAMBDAS = unmodifiableMap(lamdas); - } - - private final IBSimilarity similarity; - - public IBSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - Distribution distribution = parseDistribution(settings); - Lambda lambda = parseLambda(settings); - Normalization normalization = parseNormalization(settings); - this.similarity = new IBSimilarity(distribution, lambda, normalization); - } - - /** - * Parses the given Settings and creates the appropriate {@link Distribution} - * - * @param settings Settings to parse - * @return {@link Normalization} referred to in the Settings - */ - protected Distribution parseDistribution(Settings settings) { - String rawDistribution = settings.get("distribution"); - Distribution distribution = DISTRIBUTIONS.get(rawDistribution); - if (distribution == null) { - throw new IllegalArgumentException("Unsupported Distribution [" + rawDistribution + "]"); - } - return distribution; - } - - /** - * Parses the given Settings and creates the appropriate {@link Lambda} - * - * @param settings Settings to parse - * @return {@link Normalization} referred to in the Settings - */ - protected Lambda parseLambda(Settings settings) { - String rawLambda = settings.get("lambda"); - Lambda lambda = LAMBDAS.get(rawLambda); - if (lambda == null) { - throw new IllegalArgumentException("Unsupported Lambda [" + rawLambda + "]"); - } - return lambda; - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/LMDirichletSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/LMDirichletSimilarityProvider.java deleted file mode 100644 index 170a7e42133c9..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/LMDirichletSimilarityProvider.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.LMDirichletSimilarity; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for {@link LMDirichletSimilarity}. - *

- * Configuration options available: - *

    - *
  • mu
  • - *
- * @see LMDirichletSimilarity For more information about configuration - */ -public class LMDirichletSimilarityProvider extends AbstractSimilarityProvider { - - private final LMDirichletSimilarity similarity; - - public LMDirichletSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - float mu = settings.getAsFloat("mu", 2000f); - this.similarity = new LMDirichletSimilarity(mu); - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/LMJelinekMercerSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/LMJelinekMercerSimilarityProvider.java deleted file mode 100644 index 2ee04b78ec2ef..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/LMJelinekMercerSimilarityProvider.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for {@link LMJelinekMercerSimilarity}. - *

- * Configuration options available: - *

    - *
  • lambda
  • - *
- * @see LMJelinekMercerSimilarity For more information about configuration - */ -public class LMJelinekMercerSimilarityProvider extends AbstractSimilarityProvider { - - private final LMJelinekMercerSimilarity similarity; - - public LMJelinekMercerSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - float lambda = settings.getAsFloat("lambda", 0.1f); - this.similarity = new LMJelinekMercerSimilarity(lambda); - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarityProvider.java index e290fd3457aeb..190f861f26216 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarityProvider.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarityProvider.java @@ -20,6 +20,8 @@ package org.elasticsearch.index.similarity; import org.apache.lucene.search.similarities.Similarity; +import org.elasticsearch.Version; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptService; @@ -27,13 +29,11 @@ import org.elasticsearch.script.SimilarityWeightScript; /** Provider of scripted similarities. */ -public class ScriptedSimilarityProvider extends AbstractSimilarityProvider { +final class ScriptedSimilarityProvider implements TriFunction { - private final ScriptedSimilarity scriptedSimilarity; - - public ScriptedSimilarityProvider(String name, Settings settings, Settings indexSettings, ScriptService scriptService) { - super(name); - boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true); + @Override + public Similarity apply(Settings settings, Version indexCreatedVersion, ScriptService scriptService) { + boolean discountOverlaps = settings.getAsBoolean(SimilarityProviders.DISCOUNT_OVERLAPS, true); Settings scriptSettings = settings.getAsSettings("script"); Script script = Script.parse(scriptSettings); SimilarityScript.Factory scriptFactory = scriptService.compile(script, SimilarityScript.CONTEXT); @@ -44,15 +44,10 @@ public ScriptedSimilarityProvider(String name, Settings settings, Settings index weightScript = Script.parse(weightScriptSettings); weightScriptFactory = scriptService.compile(weightScript, SimilarityWeightScript.CONTEXT); } - scriptedSimilarity = new ScriptedSimilarity( + return new ScriptedSimilarity( weightScript == null ? null : weightScript.toString(), weightScriptFactory == null ? null : weightScriptFactory::newInstance, script.toString(), scriptFactory::newInstance, discountOverlaps); } - @Override - public Similarity get() { - return scriptedSimilarity; - } - } diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java index 666e70c406937..fed15b3058360 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java @@ -20,32 +20,32 @@ package org.elasticsearch.index.similarity; import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.script.ScriptService; /** - * Provider for {@link Similarity} instances + * Wrapper around a {@link Similarity} and its name. */ -public interface SimilarityProvider { +public final class SimilarityProvider { + + private final String name; + private final Similarity similarity; + + public SimilarityProvider(String name, Similarity similarity) { + this.name = name; + this.similarity = similarity; + } /** - * Returns the name associated with the Provider - * - * @return Name of the Provider + * Return the name of this {@link Similarity}. */ - String name(); + public String name() { + return name; + } /** - * Returns the {@link Similarity} the Provider is for - * - * @return Provided {@link Similarity} + * Return the wrapped {@link Similarity}. */ - Similarity get(); - - /** Factory of {@link SimilarityProvider} */ - @FunctionalInterface - interface Factory { - /** Create a new {@link SimilarityProvider}. */ - SimilarityProvider create(String name, Settings settings, Settings indexSettings, ScriptService scriptService); + public Similarity get() { + return similarity; } + } diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java new file mode 100644 index 0000000000000..18c6d6a3fc063 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java @@ -0,0 +1,300 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.similarity; + +import org.apache.lucene.search.similarities.AfterEffect; +import org.apache.lucene.search.similarities.AfterEffectB; +import org.apache.lucene.search.similarities.AfterEffectL; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.BasicModel; +import org.apache.lucene.search.similarities.BasicModelBE; +import org.apache.lucene.search.similarities.BasicModelD; +import org.apache.lucene.search.similarities.BasicModelG; +import org.apache.lucene.search.similarities.BasicModelIF; +import org.apache.lucene.search.similarities.BasicModelIn; +import org.apache.lucene.search.similarities.BasicModelIne; +import org.apache.lucene.search.similarities.BasicModelP; +import org.apache.lucene.search.similarities.BooleanSimilarity; +import org.apache.lucene.search.similarities.ClassicSimilarity; +import org.apache.lucene.search.similarities.DFISimilarity; +import org.apache.lucene.search.similarities.DFRSimilarity; +import org.apache.lucene.search.similarities.Distribution; +import org.apache.lucene.search.similarities.DistributionLL; +import org.apache.lucene.search.similarities.DistributionSPL; +import org.apache.lucene.search.similarities.IBSimilarity; +import org.apache.lucene.search.similarities.Independence; +import org.apache.lucene.search.similarities.IndependenceChiSquared; +import org.apache.lucene.search.similarities.IndependenceSaturated; +import org.apache.lucene.search.similarities.IndependenceStandardized; +import org.apache.lucene.search.similarities.LMDirichletSimilarity; +import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; +import org.apache.lucene.search.similarities.Lambda; +import org.apache.lucene.search.similarities.LambdaDF; +import org.apache.lucene.search.similarities.LambdaTTF; +import org.apache.lucene.search.similarities.Normalization; +import org.apache.lucene.search.similarities.NormalizationH1; +import org.apache.lucene.search.similarities.NormalizationH2; +import org.apache.lucene.search.similarities.NormalizationH3; +import org.apache.lucene.search.similarities.NormalizationZ; +import org.elasticsearch.Version; +import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.common.settings.Settings; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import static java.util.Collections.unmodifiableMap; + +final class SimilarityProviders { + + private SimilarityProviders() {} // no instantiation + + private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(SimilarityProviders.class)); + static final String DISCOUNT_OVERLAPS = "discount_overlaps"; + + private static final Map BASIC_MODELS; + private static final Map AFTER_EFFECTS; + + static { + Map models = new HashMap<>(); + models.put("be", new BasicModelBE()); + models.put("d", new BasicModelD()); + models.put("g", new BasicModelG()); + models.put("if", new BasicModelIF()); + models.put("in", new BasicModelIn()); + models.put("ine", new BasicModelIne()); + models.put("p", new BasicModelP()); + BASIC_MODELS = unmodifiableMap(models); + + Map effects = new HashMap<>(); + effects.put("no", new AfterEffect.NoAfterEffect()); + effects.put("b", new AfterEffectB()); + effects.put("l", new AfterEffectL()); + AFTER_EFFECTS = unmodifiableMap(effects); + } + + private static final Map INDEPENDENCE_MEASURES; + static { + Map measures = new HashMap<>(); + measures.put("standardized", new IndependenceStandardized()); + measures.put("saturated", new IndependenceSaturated()); + measures.put("chisquared", new IndependenceChiSquared()); + INDEPENDENCE_MEASURES = unmodifiableMap(measures); + } + + private static final Map DISTRIBUTIONS; + private static final Map LAMBDAS; + + static { + Map distributions = new HashMap<>(); + distributions.put("ll", new DistributionLL()); + distributions.put("spl", new DistributionSPL()); + DISTRIBUTIONS = unmodifiableMap(distributions); + + Map lamdas = new HashMap<>(); + lamdas.put("df", new LambdaDF()); + lamdas.put("ttf", new LambdaTTF()); + LAMBDAS = unmodifiableMap(lamdas); + } + + /** + * Parses the given Settings and creates the appropriate {@link BasicModel} + * + * @param settings Settings to parse + * @return {@link BasicModel} referred to in the Settings + */ + private static BasicModel parseBasicModel(Settings settings) { + String basicModel = settings.get("basic_model"); + BasicModel model = BASIC_MODELS.get(basicModel); + if (model == null) { + throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "], expected one of " + BASIC_MODELS.keySet()); + } + return model; + } + + /** + * Parses the given Settings and creates the appropriate {@link AfterEffect} + * + * @param settings Settings to parse + * @return {@link AfterEffect} referred to in the Settings + */ + private static AfterEffect parseAfterEffect(Settings settings) { + String afterEffect = settings.get("after_effect"); + AfterEffect effect = AFTER_EFFECTS.get(afterEffect); + if (effect == null) { + throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "], expected one of " + AFTER_EFFECTS.keySet()); + } + return effect; + } + + /** + * Parses the given Settings and creates the appropriate {@link Normalization} + * + * @param settings Settings to parse + * @return {@link Normalization} referred to in the Settings + */ + private static Normalization parseNormalization(Settings settings) { + String normalization = settings.get("normalization"); + + if ("no".equals(normalization)) { + return new Normalization.NoNormalization(); + } else if ("h1".equals(normalization)) { + float c = settings.getAsFloat("normalization.h1.c", 1f); + return new NormalizationH1(c); + } else if ("h2".equals(normalization)) { + float c = settings.getAsFloat("normalization.h2.c", 1f); + return new NormalizationH2(c); + } else if ("h3".equals(normalization)) { + float c = settings.getAsFloat("normalization.h3.c", 800f); + return new NormalizationH3(c); + } else if ("z".equals(normalization)) { + float z = settings.getAsFloat("normalization.z.z", 0.30f); + return new NormalizationZ(z); + } else { + throw new IllegalArgumentException("Unsupported Normalization [" + normalization + "]"); + } + } + + private static Independence parseIndependence(Settings settings) { + String name = settings.get("independence_measure"); + Independence measure = INDEPENDENCE_MEASURES.get(name); + if (measure == null) { + throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "], expected one of " + + INDEPENDENCE_MEASURES.keySet()); + } + return measure; + } + + /** + * Parses the given Settings and creates the appropriate {@link Distribution} + * + * @param settings Settings to parse + * @return {@link Normalization} referred to in the Settings + */ + private static Distribution parseDistribution(Settings settings) { + String rawDistribution = settings.get("distribution"); + Distribution distribution = DISTRIBUTIONS.get(rawDistribution); + if (distribution == null) { + throw new IllegalArgumentException("Unsupported Distribution [" + rawDistribution + "]"); + } + return distribution; + } + + /** + * Parses the given Settings and creates the appropriate {@link Lambda} + * + * @param settings Settings to parse + * @return {@link Normalization} referred to in the Settings + */ + private static Lambda parseLambda(Settings settings) { + String rawLambda = settings.get("lambda"); + Lambda lambda = LAMBDAS.get(rawLambda); + if (lambda == null) { + throw new IllegalArgumentException("Unsupported Lambda [" + rawLambda + "]"); + } + return lambda; + } + + static void assertSettingsIsSubsetOf(String type, Version version, Settings settings, String... supportedSettings) { + Set unknownSettings = new HashSet<>(settings.keySet()); + unknownSettings.removeAll(Arrays.asList(supportedSettings)); + unknownSettings.remove("type"); // used to figure out which sim this is + if (unknownSettings.isEmpty() == false) { + if (version.onOrAfter(Version.V_7_0_0_alpha1)) { + throw new IllegalArgumentException("Unknown settings for similarity of type [" + type + "]: " + unknownSettings); + } else { + DEPRECATION_LOGGER.deprecated("Unknown settings for similarity of type [" + type + "]: " + unknownSettings); + } + } + } + + public static BM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS); + + float k1 = settings.getAsFloat("k1", 1.2f); + float b = settings.getAsFloat("b", 0.75f); + boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); + + BM25Similarity similarity = new BM25Similarity(k1, b); + similarity.setDiscountOverlaps(discountOverlaps); + return similarity; + } + + public static BooleanSimilarity createBooleanSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("boolean", indexCreatedVersion, settings); + return new BooleanSimilarity(); + } + + public static ClassicSimilarity createClassicSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("classic", indexCreatedVersion, settings, DISCOUNT_OVERLAPS); + + boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); + + ClassicSimilarity similarity = new ClassicSimilarity(); + similarity.setDiscountOverlaps(discountOverlaps); + return similarity; + } + + public static DFRSimilarity createDfrSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("DFR", indexCreatedVersion, settings, + "basic_model", "after_effect", "normalization", + "normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z"); + + + return new DFRSimilarity( + parseBasicModel(settings), + parseAfterEffect(settings), + parseNormalization(settings)); + } + + public static DFISimilarity createDfiSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("DFI", indexCreatedVersion, settings, "independence_measure"); + + return new DFISimilarity(parseIndependence(settings)); + } + + public static IBSimilarity createIBSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("IB", indexCreatedVersion, settings, "distribution", "lambda", "normalization", + "normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z"); + + return new IBSimilarity( + parseDistribution(settings), + parseLambda(settings), + parseNormalization(settings)); + } + + public static LMDirichletSimilarity createLMDirichletSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("LMDirichlet", indexCreatedVersion, settings, "mu"); + + float mu = settings.getAsFloat("mu", 2000f); + return new LMDirichletSimilarity(mu); + } + + public static LMJelinekMercerSimilarity createLMJelinekMercerSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("LMJelinekMercer", indexCreatedVersion, settings, "lambda"); + + float lambda = settings.getAsFloat("lambda", 0.1f); + return new LMJelinekMercerSimilarity(lambda); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index 16afb55599d49..eaed2169f11c0 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -19,8 +19,13 @@ package org.elasticsearch.index.similarity; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.BooleanSimilarity; +import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.Similarity; +import org.elasticsearch.Version; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; @@ -34,45 +39,84 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.function.Function; +import java.util.function.Supplier; public final class SimilarityService extends AbstractIndexComponent { private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(SimilarityService.class)); public static final String DEFAULT_SIMILARITY = "BM25"; - private final Similarity defaultSimilarity; - private final Map similarities; - private static final Map DEFAULTS; - public static final Map BUILT_IN; + private static final String CLASSIC_SIMILARITY = "classic"; + private static final Map>> DEFAULTS; + public static final Map> BUILT_IN; static { - Map defaults = new HashMap<>(); - defaults.put("classic", - (name, settings, indexSettings, scriptService) -> new ClassicSimilarityProvider(name, settings, indexSettings)); - defaults.put("BM25", - (name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings)); - defaults.put("boolean", - (name, settings, indexSettings, scriptService) -> new BooleanSimilarityProvider(name, settings, indexSettings)); - - Map builtIn = new HashMap<>(defaults); + Map>> defaults = new HashMap<>(); + defaults.put(CLASSIC_SIMILARITY, version -> { + if (version.onOrAfter(Version.V_7_0_0_alpha1)) { + return () -> { + throw new IllegalArgumentException("The [classic] similarity may not be used anymore. Please use the [BM25] " + + "similarity or build a custom [scripted] similarity instead."); + }; + } else { + final ClassicSimilarity similarity = SimilarityProviders.createClassicSimilarity(Settings.EMPTY, version); + return () -> { + DEPRECATION_LOGGER.deprecated("The [classic] similarity is now deprecated in favour of BM25, which is generally " + + "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity " + + "instead."); + return similarity; + }; + } + }); + defaults.put("BM25", version -> { + final BM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); + return () -> similarity; + }); + defaults.put("boolean", version -> { + final Similarity similarity = new BooleanSimilarity(); + return () -> similarity; + }); + + Map> builtIn = new HashMap<>(); + builtIn.put(CLASSIC_SIMILARITY, + (settings, version, script) -> { + if (version.onOrAfter(Version.V_7_0_0_alpha1)) { + throw new IllegalArgumentException("The [classic] similarity may not be used anymore. Please use the [BM25] " + + "similarity or build a custom [scripted] similarity instead."); + } else { + DEPRECATION_LOGGER.deprecated("The [classic] similarity is now deprecated in favour of BM25, which is generally " + + "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity " + + "instead."); + return SimilarityProviders.createClassicSimilarity(settings, version); + } + }); + builtIn.put("BM25", + (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version)); + builtIn.put("boolean", + (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version)); builtIn.put("DFR", - (name, settings, indexSettings, scriptService) -> new DFRSimilarityProvider(name, settings, indexSettings)); + (settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version)); builtIn.put("IB", - (name, settings, indexSettings, scriptService) -> new IBSimilarityProvider(name, settings, indexSettings)); + (settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version)); builtIn.put("LMDirichlet", - (name, settings, indexSettings, scriptService) -> new LMDirichletSimilarityProvider(name, settings, indexSettings)); + (settings, version, scriptService) -> SimilarityProviders.createLMDirichletSimilarity(settings, version)); builtIn.put("LMJelinekMercer", - (name, settings, indexSettings, scriptService) -> new LMJelinekMercerSimilarityProvider(name, settings, indexSettings)); + (settings, version, scriptService) -> SimilarityProviders.createLMJelinekMercerSimilarity(settings, version)); builtIn.put("DFI", - (name, settings, indexSettings, scriptService) -> new DFISimilarityProvider(name, settings, indexSettings)); - builtIn.put("scripted", ScriptedSimilarityProvider::new); + (settings, version, scriptService) -> SimilarityProviders.createDfiSimilarity(settings, version)); + builtIn.put("scripted", new ScriptedSimilarityProvider()); DEFAULTS = Collections.unmodifiableMap(defaults); BUILT_IN = Collections.unmodifiableMap(builtIn); } + private final Similarity defaultSimilarity; + private final Map> similarities; + public SimilarityService(IndexSettings indexSettings, ScriptService scriptService, - Map similarities) { + Map> similarities) { super(indexSettings); - Map providers = new HashMap<>(similarities.size()); + Map> providers = new HashMap<>(similarities.size()); Map similaritySettings = this.indexSettings.getSettings().getGroups(IndexModule.SIMILARITY_SETTINGS_PREFIX); + for (Map.Entry entry : similaritySettings.entrySet()) { String name = entry.getKey(); if (BUILT_IN.containsKey(name)) { @@ -85,14 +129,13 @@ public SimilarityService(IndexSettings indexSettings, ScriptService scriptServic } else if ((similarities.containsKey(typeName) || BUILT_IN.containsKey(typeName)) == false) { throw new IllegalArgumentException("Unknown Similarity type [" + typeName + "] for [" + name + "]"); } - SimilarityProvider.Factory defaultFactory = BUILT_IN.get(typeName); - SimilarityProvider.Factory factory = similarities.getOrDefault(typeName, defaultFactory); - providers.put(name, factory.create(name, providerSettings, indexSettings.getSettings(), scriptService)); + TriFunction defaultFactory = BUILT_IN.get(typeName); + TriFunction factory = similarities.getOrDefault(typeName, defaultFactory); + final Similarity similarity = factory.apply(providerSettings, indexSettings.getIndexVersionCreated(), scriptService); + providers.put(name, () -> similarity); } - Map providerMapping = addSimilarities(similaritySettings, indexSettings.getSettings(), scriptService, - DEFAULTS); - for (Map.Entry entry : providerMapping.entrySet()) { - providers.put(entry.getKey(), entry.getValue()); + for (Map.Entry>> entry : DEFAULTS.entrySet()) { + providers.put(entry.getKey(), entry.getValue().apply(indexSettings.getIndexVersionCreated())); } this.similarities = providers; defaultSimilarity = (providers.get("default") != null) ? providers.get("default").get() @@ -108,25 +151,16 @@ public Similarity similarity(MapperService mapperService) { defaultSimilarity; } - private Map addSimilarities(Map similaritySettings, Settings indexSettings, - ScriptService scriptService, Map similarities) { - Map providers = new HashMap<>(similarities.size()); - for (Map.Entry entry : similarities.entrySet()) { - String name = entry.getKey(); - SimilarityProvider.Factory factory = entry.getValue(); - Settings providerSettings = similaritySettings.get(name); - if (providerSettings == null) { - providerSettings = Settings.Builder.EMPTY_SETTINGS; - } - providers.put(name, factory.create(name, providerSettings, indexSettings, scriptService)); - } - return providers; - } - + public SimilarityProvider getSimilarity(String name) { - return similarities.get(name); + Supplier sim = similarities.get(name); + if (sim == null) { + return null; + } + return new SimilarityProvider(name, sim.get()); } + // for testing Similarity getDefaultSimilarity() { return defaultSimilarity; } diff --git a/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java b/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java index 706421c5ce73a..dde9c1ca3bdb6 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java @@ -59,7 +59,6 @@ import org.elasticsearch.index.shard.IndexingOperationListener; import org.elasticsearch.index.shard.SearchOperationListener; import org.elasticsearch.index.shard.ShardId; -import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.store.IndexStore; import org.elasticsearch.indices.IndicesModule; @@ -287,17 +286,8 @@ public void testAddSimilarity() throws IOException { .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry); - module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings, scriptService) -> new SimilarityProvider() { - @Override - public String name() { - return string; - } - - @Override - public Similarity get() { - return new TestSimilarity(providerSettings.get("key")); - } - }); + module.addSimilarity("test_similarity", + (providerSettings, indexCreatedVersion, scriptService) -> new TestSimilarity(providerSettings.get("key"))); IndexService indexService = newIndexService(module); SimilarityService similarityService = indexService.similarityService(); diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java index ed219c972b614..5d18a595e9687 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.similarity; import org.apache.lucene.search.similarities.BM25Similarity; -import org.apache.lucene.search.similarities.ClassicSimilarity; +import org.apache.lucene.search.similarities.BooleanSimilarity; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.test.ESTestCase; @@ -50,10 +50,10 @@ public void testOverrideBuiltInSimilarity() { } public void testOverrideDefaultSimilarity() { - Settings settings = Settings.builder().put("index.similarity.default.type", "classic") + Settings settings = Settings.builder().put("index.similarity.default.type", "boolean") .build(); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); - assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity); + assertTrue(service.getDefaultSimilarity() instanceof BooleanSimilarity); } } diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java index 2ab905a2dd526..3de02f6831837 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java @@ -33,6 +33,8 @@ import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; import org.apache.lucene.search.similarities.LambdaTTF; import org.apache.lucene.search.similarities.NormalizationH2; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; @@ -60,7 +62,24 @@ protected Collection> getPlugins() { public void testResolveDefaultSimilarities() { SimilarityService similarityService = createIndex("foo").similarityService(); + assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); + assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); + assertThat(similarityService.getSimilarity("default"), equalTo(null)); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> similarityService.getSimilarity("classic")); + assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] " + + "similarity instead.", e.getMessage()); + } + + public void testResolveDefaultSimilaritiesOn6xIndex() { + Settings indexSettings = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden + .build(); + SimilarityService similarityService = createIndex("foo", indexSettings).similarityService(); assertThat(similarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class)); + assertWarnings("The [classic] similarity is now deprecated in favour of BM25, which is generally " + + "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity " + + "instead."); assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); assertThat(similarityService.getSimilarity("default"), equalTo(null)); @@ -76,15 +95,27 @@ public void testResolveSimilaritiesFromMapping_classic() throws IOException { Settings indexSettings = Settings.builder() .put("index.similarity.my_similarity.type", "classic") .put("index.similarity.my_similarity.discount_overlaps", false) + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(ClassicSimilarity.class)); ClassicSimilarity similarity = (ClassicSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getDiscountOverlaps(), equalTo(false)); } + public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException { + Settings indexSettings = Settings.builder() + .put("index.similarity.my_similarity.type", "classic") + .put("index.similarity.my_similarity.discount_overlaps", false) + .build(); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> createIndex("foo", indexSettings)); + assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] " + + "similarity instead.", e.getMessage()); + } + public void testResolveSimilaritiesFromMapping_bm25() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties") @@ -100,7 +131,7 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException { .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(BM25SimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(BM25Similarity.class)); BM25Similarity similarity = (BM25Similarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getK1(), equalTo(2.0f)); @@ -119,8 +150,8 @@ public void testResolveSimilaritiesFromMapping_boolean() throws IOException { DocumentMapper documentMapper = indexService.mapperService() .documentMapperParser() .parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), - instanceOf(BooleanSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), + instanceOf(BooleanSimilarity.class)); } public void testResolveSimilaritiesFromMapping_DFR() throws IOException { @@ -139,7 +170,7 @@ public void testResolveSimilaritiesFromMapping_DFR() throws IOException { .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DFRSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(DFRSimilarity.class)); DFRSimilarity similarity = (DFRSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getBasicModel(), instanceOf(BasicModelG.class)); @@ -164,7 +195,7 @@ public void testResolveSimilaritiesFromMapping_IB() throws IOException { .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(IBSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(IBSimilarity.class)); IBSimilarity similarity = (IBSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getDistribution(), instanceOf(DistributionSPL.class)); @@ -187,7 +218,7 @@ public void testResolveSimilaritiesFromMapping_DFI() throws IOException { IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); MappedFieldType fieldType = documentMapper.mappers().getMapper("field1").fieldType(); - assertThat(fieldType.similarity(), instanceOf(DFISimilarityProvider.class)); + assertThat(fieldType.similarity().get(), instanceOf(DFISimilarity.class)); DFISimilarity similarity = (DFISimilarity) fieldType.similarity().get(); assertThat(similarity.getIndependence(), instanceOf(IndependenceChiSquared.class)); } @@ -205,7 +236,7 @@ public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMDirichletSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(LMDirichletSimilarity.class)); LMDirichletSimilarity similarity = (LMDirichletSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getMu(), equalTo(3000f)); @@ -224,7 +255,7 @@ public void testResolveSimilaritiesFromMapping_LMJelinekMercer() throws IOExcept .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMJelinekMercerSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(LMJelinekMercerSimilarity.class)); LMJelinekMercerSimilarity similarity = (LMJelinekMercerSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getLambda(), equalTo(0.7f)); @@ -245,4 +276,14 @@ public void testResolveSimilaritiesFromMapping_Unknown() throws IOException { assertThat(e.getMessage(), equalTo("Unknown Similarity type [unknown_similarity] for field [field1]")); } } + + public void testUnknownParameters() throws IOException { + Settings indexSettings = Settings.builder() + .put("index.similarity.my_similarity.type", "BM25") + .put("index.similarity.my_similarity.z", 2.0f) + .build(); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> createIndex("foo", indexSettings)); + assertEquals("Unknown settings for similarity of type [BM25]: [z]", e.getMessage()); + } } diff --git a/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java b/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java index 7cef608850e11..46d7311a90e23 100644 --- a/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.indices; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; @@ -49,7 +50,6 @@ import org.elasticsearch.index.shard.IndexShardState; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardPath; -import org.elasticsearch.index.similarity.BM25SimilarityProvider; import org.elasticsearch.indices.IndicesService.ShardDeletionCheckResult; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; @@ -106,7 +106,7 @@ public Map getMappers() { public void onIndexModule(IndexModule indexModule) { super.onIndexModule(indexModule); indexModule.addSimilarity("fake-similarity", - (name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings)); + (settings, indexCreatedVersion, scriptService) -> new BM25Similarity()); } } @@ -375,8 +375,8 @@ public void testStandAloneMapperServiceWithPlugins() throws IOException { .build(); MapperService mapperService = indicesService.createIndexMapperService(indexMetaData); assertNotNull(mapperService.documentMapperParser().parserContext("type").typeParser("fake-mapper")); - assertThat(mapperService.documentMapperParser().parserContext("type").getSimilarity("test"), - instanceOf(BM25SimilarityProvider.class)); + assertThat(mapperService.documentMapperParser().parserContext("type").getSimilarity("test").get(), + instanceOf(BM25Similarity.class)); } public void testStatsByShardDoesNotDieFromExpectedExceptions() { diff --git a/server/src/test/java/org/elasticsearch/similarity/SimilarityIT.java b/server/src/test/java/org/elasticsearch/similarity/SimilarityIT.java index c925e46cfa048..35e5b7071872b 100644 --- a/server/src/test/java/org/elasticsearch/similarity/SimilarityIT.java +++ b/server/src/test/java/org/elasticsearch/similarity/SimilarityIT.java @@ -46,7 +46,7 @@ public void testCustomBM25Similarity() throws Exception { .field("type", "text") .endObject() .startObject("field2") - .field("similarity", "classic") + .field("similarity", "boolean") .field("type", "text") .endObject() .endObject() @@ -68,9 +68,9 @@ public void testCustomBM25Similarity() throws Exception { assertThat(bm25SearchResponse.getHits().getTotalHits(), equalTo(1L)); float bm25Score = bm25SearchResponse.getHits().getHits()[0].getScore(); - SearchResponse defaultSearchResponse = client().prepareSearch().setQuery(matchQuery("field2", "quick brown fox")).execute().actionGet(); - assertThat(defaultSearchResponse.getHits().getTotalHits(), equalTo(1L)); - float defaultScore = defaultSearchResponse.getHits().getHits()[0].getScore(); + SearchResponse booleanSearchResponse = client().prepareSearch().setQuery(matchQuery("field2", "quick brown fox")).execute().actionGet(); + assertThat(booleanSearchResponse.getHits().getTotalHits(), equalTo(1L)); + float defaultScore = booleanSearchResponse.getHits().getHits()[0].getScore(); assertThat(bm25Score, not(equalTo(defaultScore))); } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java index 818594d3bf7fd..28767cb34d73b 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java @@ -20,13 +20,14 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.search.Query; +import org.apache.lucene.search.similarities.BM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.index.similarity.BM25SimilarityProvider; +import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.test.ESTestCase; import java.util.ArrayList; @@ -123,17 +124,17 @@ public void normalizeOther(MappedFieldType other) { new Modifier("similarity", false) { @Override public void modify(MappedFieldType ft) { - ft.setSimilarity(new BM25SimilarityProvider("foo", Settings.EMPTY, INDEX_SETTINGS)); + ft.setSimilarity(new SimilarityProvider("foo", new BM25Similarity())); } }, new Modifier("similarity", false) { @Override public void modify(MappedFieldType ft) { - ft.setSimilarity(new BM25SimilarityProvider("foo", Settings.EMPTY, INDEX_SETTINGS)); + ft.setSimilarity(new SimilarityProvider("foo", new BM25Similarity())); } @Override public void normalizeOther(MappedFieldType other) { - other.setSimilarity(new BM25SimilarityProvider("bar", Settings.EMPTY, INDEX_SETTINGS)); + other.setSimilarity(new SimilarityProvider("bar", new BM25Similarity())); } }, new Modifier("eager_global_ordinals", true) {