diff --git a/docs/reference/index-modules/similarity.asciidoc b/docs/reference/index-modules/similarity.asciidoc index d0fd5dd399867..40f7de90c0ab2 100644 --- a/docs/reference/index-modules/similarity.asciidoc +++ b/docs/reference/index-modules/similarity.asciidoc @@ -82,20 +82,6 @@ This similarity has the following options: Type name: `BM25` -[float] -[[classic-similarity]] -==== Classic similarity - -The classic similarity that is based on the TF/IDF model. This -similarity has the following option: - -`discount_overlaps`:: - Determines whether overlap tokens (Tokens with - 0 position increment) are ignored when computing norm. By default this - is true, meaning overlap tokens do not count when computing norms. - -Type name: `classic` - [float] [[dfr]] ==== DFR similarity @@ -541,7 +527,7 @@ PUT /index "index": { "similarity": { "default": { - "type": "classic" + "type": "boolean" } } } @@ -563,7 +549,7 @@ PUT /index/_settings "index": { "similarity": { "default": { - "type": "classic" + "type": "boolean" } } } diff --git a/docs/reference/mapping/params/similarity.asciidoc b/docs/reference/mapping/params/similarity.asciidoc index 3509cd0cf8eb5..a0be0fb3ccbeb 100644 --- a/docs/reference/mapping/params/similarity.asciidoc +++ b/docs/reference/mapping/params/similarity.asciidoc @@ -44,13 +44,9 @@ PUT my_index "default_field": { <1> "type": "text" }, - "classic_field": { - "type": "text", - "similarity": "classic" <2> - }, "boolean_sim_field": { "type": "text", - "similarity": "boolean" <3> + "similarity": "boolean" <2> } } } @@ -59,5 +55,4 @@ PUT my_index -------------------------------------------------- // CONSOLE <1> The `default_field` uses the `BM25` similarity. -<2> The `classic_field` uses the `classic` similarity (ie TF/IDF). -<3> The `boolean_sim_field` uses the `boolean` similarity. +<2> The `boolean_sim_field` uses the `boolean` similarity. diff --git a/docs/reference/migration/migrate_7_0/mappings.asciidoc b/docs/reference/migration/migrate_7_0/mappings.asciidoc index 8f1474aa57cbe..b0ab90546c3a8 100644 --- a/docs/reference/migration/migrate_7_0/mappings.asciidoc +++ b/docs/reference/migration/migrate_7_0/mappings.asciidoc @@ -24,3 +24,16 @@ the index setting `index.mapping.nested_objects.limit`. ==== The `update_all_types` option has been removed This option is useless now that all indices have at most one type. + +=== The `classic` similarity has been removed + +The `classic` similarity relied on coordination factors for scoring to be good +in presence of stopwords in the query. This feature has been removed from +Lucene, which means that the `classic` similarity now produces scores of lower +quality. It is advised to switch to `BM25` instead, which is widely accepted +as a better alternative. + +=== Similarities fail when unsupported options are provided + +An error will now be thrown when unknown configuration options are provided +to similarities. Such unknown parameters were ignored before. diff --git a/modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java b/modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java index 4f4d965d59433..2d7215c239821 100644 --- a/modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java +++ b/modules/parent-join/src/test/java/org/elasticsearch/join/query/HasChildQueryBuilderTests.java @@ -336,9 +336,7 @@ public void testNonDefaultSimilarity() throws Exception { hasChildQuery(CHILD_DOC, new TermQueryBuilder("custom_string", "value"), ScoreMode.None); HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext); Similarity expected = SimilarityService.BUILT_IN.get(similarity) - .create(similarity, Settings.EMPTY, - Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null) - .get(); + .apply(Settings.EMPTY, Version.CURRENT, null); assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass())); } diff --git a/modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java b/modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java index a52cc1db3d088..3eb16a925676c 100644 --- a/modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java +++ b/modules/parent-join/src/test/java/org/elasticsearch/join/query/LegacyHasChildQueryBuilderTests.java @@ -87,7 +87,7 @@ protected Collection> getPlugins() { @Override protected void initializeAdditionalMappings(MapperService mapperService) throws IOException { - similarity = randomFrom("classic", "BM25"); + similarity = randomFrom("boolean", "BM25"); // TODO: use a single type when inner hits have been changed to work with join field, // this test randomly generates queries with inner hits mapperService.merge(PARENT_TYPE, new CompressedXContent(Strings.toString(PutMappingRequest.buildFromSimplifiedDef(PARENT_TYPE, @@ -323,9 +323,7 @@ public void testNonDefaultSimilarity() throws Exception { hasChildQuery(CHILD_TYPE, new TermQueryBuilder("custom_string", "value"), ScoreMode.None); HasChildQueryBuilder.LateParsingQuery query = (HasChildQueryBuilder.LateParsingQuery) hasChildQueryBuilder.toQuery(shardContext); Similarity expected = SimilarityService.BUILT_IN.get(similarity) - .create(similarity, Settings.EMPTY, - Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(), null) - .get(); + .apply(Settings.EMPTY, Version.CURRENT, null); assertThat(((PerFieldSimilarityWrapper) query.getSimilarity()).get("custom_string"), instanceOf(expected.getClass())); } diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java index 4b6a898a3a9f1..6d18f5e01b5d3 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/MetaDataIndexUpgradeService.java @@ -20,7 +20,9 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.Version; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.settings.IndexScopedSettings; import org.elasticsearch.common.settings.Settings; @@ -31,8 +33,8 @@ import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.similarity.SimilarityService; -import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.indices.mapper.MapperRegistry; +import org.elasticsearch.script.ScriptService; import java.util.AbstractMap; import java.util.Collection; @@ -142,14 +144,15 @@ private void checkMappingsCompatibility(IndexMetaData indexMetaData) { IndexSettings indexSettings = new IndexSettings(indexMetaData, this.settings); - final Map similarityMap = new AbstractMap() { + final Map> similarityMap + = new AbstractMap>() { @Override public boolean containsKey(Object key) { return true; } @Override - public SimilarityProvider.Factory get(Object key) { + public TriFunction get(Object key) { assert key instanceof String : "key must be a string but was: " + key.getClass(); return SimilarityService.BUILT_IN.get(SimilarityService.DEFAULT_SIMILARITY); } @@ -157,7 +160,7 @@ public SimilarityProvider.Factory get(Object key) { // this entrySet impl isn't fully correct but necessary as SimilarityService will iterate // over all similarities @Override - public Set> entrySet() { + public Set>> entrySet() { return Collections.emptySet(); } }; diff --git a/server/src/main/java/org/elasticsearch/index/IndexModule.java b/server/src/main/java/org/elasticsearch/index/IndexModule.java index 869f8c9ca72db..767ef48733937 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexModule.java +++ b/server/src/main/java/org/elasticsearch/index/IndexModule.java @@ -19,9 +19,13 @@ package org.elasticsearch.index; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.util.SetOnce; +import org.elasticsearch.Version; import org.elasticsearch.client.Client; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; @@ -39,9 +43,6 @@ import org.elasticsearch.index.shard.IndexSearcherWrapper; import org.elasticsearch.index.shard.IndexingOperationListener; import org.elasticsearch.index.shard.SearchOperationListener; -import org.elasticsearch.index.shard.ShardId; -import org.elasticsearch.index.similarity.BM25SimilarityProvider; -import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.store.IndexStore; import org.elasticsearch.indices.IndicesQueryCache; @@ -68,10 +69,10 @@ /** * IndexModule represents the central extension point for index level custom implementations like: *
    - *
  • {@link SimilarityProvider} - New {@link SimilarityProvider} implementations can be registered through - * {@link #addSimilarity(String, SimilarityProvider.Factory)} while existing Providers can be referenced through Settings under the + *
  • {@link Similarity} - New {@link Similarity} implementations can be registered through + * {@link #addSimilarity(String, TriFunction)} while existing Providers can be referenced through Settings under the * {@link IndexModule#SIMILARITY_SETTINGS_PREFIX} prefix along with the "type" value. For example, to reference the - * {@link BM25SimilarityProvider}, the configuration "index.similarity.my_similarity.type : "BM25" can be used.
  • + * {@link BM25Similarity}, the configuration "index.similarity.my_similarity.type : "BM25" can be used. *
  • {@link IndexStore} - Custom {@link IndexStore} instances can be registered via {@link #addIndexStore(String, Function)}
  • *
  • {@link IndexEventListener} - Custom {@link IndexEventListener} instances can be registered via * {@link #addIndexEventListener(IndexEventListener)}
  • @@ -107,7 +108,7 @@ public final class IndexModule { final SetOnce engineFactory = new SetOnce<>(); private SetOnce indexSearcherWrapper = new SetOnce<>(); private final Set indexEventListeners = new HashSet<>(); - private final Map similarities = new HashMap<>(); + private final Map> similarities = new HashMap<>(); private final Map> storeTypes = new HashMap<>(); private final SetOnce> forceQueryCacheProvider = new SetOnce<>(); private final List searchOperationListeners = new ArrayList<>(); @@ -246,12 +247,17 @@ public void addIndexStore(String type, Function provi /** - * Registers the given {@link SimilarityProvider} with the given name + * Registers the given {@link Similarity} with the given name. + * The function takes as parameters:
      + *
    • settings for this similarity + *
    • version of Elasticsearch when the index was created + *
    • ScriptService, for script-based similarities + *
    * * @param name Name of the SimilarityProvider * @param similarity SimilarityProvider to register */ - public void addSimilarity(String name, SimilarityProvider.Factory similarity) { + public void addSimilarity(String name, TriFunction similarity) { ensureNotFrozen(); if (similarities.containsKey(name) || SimilarityService.BUILT_IN.containsKey(name)) { throw new IllegalArgumentException("similarity for name: [" + name + " is already registered"); diff --git a/server/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java deleted file mode 100644 index fef43d6f5deaf..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.Normalization; -import org.apache.lucene.search.similarities.NormalizationH1; -import org.apache.lucene.search.similarities.NormalizationH2; -import org.apache.lucene.search.similarities.NormalizationH3; -import org.apache.lucene.search.similarities.NormalizationZ; -import org.elasticsearch.common.settings.Settings; - -/** - * Abstract implementation of {@link SimilarityProvider} providing common behaviour - */ -public abstract class AbstractSimilarityProvider implements SimilarityProvider { - - protected static final Normalization NO_NORMALIZATION = new Normalization.NoNormalization(); - - private final String name; - - /** - * Creates a new AbstractSimilarityProvider with the given name - * - * @param name Name of the Provider - */ - protected AbstractSimilarityProvider(String name) { - this.name = name; - } - - /** - * {@inheritDoc} - */ - @Override - public String name() { - return this.name; - } - - /** - * Parses the given Settings and creates the appropriate {@link Normalization} - * - * @param settings Settings to parse - * @return {@link Normalization} referred to in the Settings - */ - protected Normalization parseNormalization(Settings settings) { - String normalization = settings.get("normalization"); - - if ("no".equals(normalization)) { - return NO_NORMALIZATION; - } else if ("h1".equals(normalization)) { - float c = settings.getAsFloat("normalization.h1.c", 1f); - return new NormalizationH1(c); - } else if ("h2".equals(normalization)) { - float c = settings.getAsFloat("normalization.h2.c", 1f); - return new NormalizationH2(c); - } else if ("h3".equals(normalization)) { - float c = settings.getAsFloat("normalization.h3.c", 800f); - return new NormalizationH3(c); - } else if ("z".equals(normalization)) { - float z = settings.getAsFloat("normalization.z.z", 0.30f); - return new NormalizationZ(z); - } else { - throw new IllegalArgumentException("Unsupported Normalization [" + normalization + "]"); - } - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/BM25SimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/BM25SimilarityProvider.java deleted file mode 100644 index ad49e7e9cc901..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/BM25SimilarityProvider.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.BM25Similarity; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for the {@link BM25Similarity}. - *

    - * Configuration options available: - *

      - *
    • k1
    • - *
    • b
    • - *
    • discount_overlaps
    • - *
    - * @see BM25Similarity For more information about configuration - */ -public class BM25SimilarityProvider extends AbstractSimilarityProvider { - - private final BM25Similarity similarity; - - public BM25SimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - float k1 = settings.getAsFloat("k1", 1.2f); - float b = settings.getAsFloat("b", 0.75f); - boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true); - - this.similarity = new BM25Similarity(k1, b); - this.similarity.setDiscountOverlaps(discountOverlaps); - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } - -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/BooleanSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/BooleanSimilarityProvider.java deleted file mode 100644 index e5db045f3716f..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/BooleanSimilarityProvider.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.BooleanSimilarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for the {@link BooleanSimilarity}, - * which is a simple similarity that gives terms a score equal - * to their query boost only. This is useful in situations where - * a field does not need to be scored by a full-text ranking - * algorithm, but rather all that matters is whether the query - * terms matched or not. - */ -public class BooleanSimilarityProvider extends AbstractSimilarityProvider { - - private final BooleanSimilarity similarity = new BooleanSimilarity(); - - public BooleanSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - } - - /** - * {@inheritDoc} - */ - @Override - public BooleanSimilarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/ClassicSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/ClassicSimilarityProvider.java deleted file mode 100644 index 419321996a301..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/ClassicSimilarityProvider.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.ClassicSimilarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for {@link ClassicSimilarity}. - *

    - * Configuration options available: - *

      - *
    • discount_overlaps
    • - *
    - * @see ClassicSimilarity For more information about configuration - */ -public class ClassicSimilarityProvider extends AbstractSimilarityProvider { - - private final ClassicSimilarity similarity = new ClassicSimilarity(); - - public ClassicSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true); - this.similarity.setDiscountOverlaps(discountOverlaps); - } - - /** - * {@inheritDoc} - */ - @Override - public ClassicSimilarity get() { - return similarity; - } - -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/DFISimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/DFISimilarityProvider.java deleted file mode 100644 index 324314b2669b2..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/DFISimilarityProvider.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.DFISimilarity; -import org.apache.lucene.search.similarities.Independence; -import org.apache.lucene.search.similarities.IndependenceChiSquared; -import org.apache.lucene.search.similarities.IndependenceSaturated; -import org.apache.lucene.search.similarities.IndependenceStandardized; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -import java.util.HashMap; -import java.util.Map; - -import static java.util.Collections.unmodifiableMap; - -/** - * {@link SimilarityProvider} for the {@link DFISimilarity}. - *

    - * Configuration options available: - *

      - *
    • independence_measure
    • - *
    • discount_overlaps
    • - *
    - * @see DFISimilarity For more information about configuration - */ -public class DFISimilarityProvider extends AbstractSimilarityProvider { - // the "basic models" of divergence from independence - private static final Map INDEPENDENCE_MEASURES; - static { - Map measures = new HashMap<>(); - measures.put("standardized", new IndependenceStandardized()); - measures.put("saturated", new IndependenceSaturated()); - measures.put("chisquared", new IndependenceChiSquared()); - INDEPENDENCE_MEASURES = unmodifiableMap(measures); - } - - private final DFISimilarity similarity; - - public DFISimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true); - Independence measure = parseIndependence(settings); - this.similarity = new DFISimilarity(measure); - this.similarity.setDiscountOverlaps(discountOverlaps); - } - - private Independence parseIndependence(Settings settings) { - String name = settings.get("independence_measure"); - Independence measure = INDEPENDENCE_MEASURES.get(name); - if (measure == null) { - throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "]"); - } - return measure; - } - - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/DFRSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/DFRSimilarityProvider.java deleted file mode 100644 index 0d47e86da0182..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/DFRSimilarityProvider.java +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.AfterEffect; -import org.apache.lucene.search.similarities.AfterEffectB; -import org.apache.lucene.search.similarities.AfterEffectL; -import org.apache.lucene.search.similarities.BasicModel; -import org.apache.lucene.search.similarities.BasicModelBE; -import org.apache.lucene.search.similarities.BasicModelD; -import org.apache.lucene.search.similarities.BasicModelG; -import org.apache.lucene.search.similarities.BasicModelIF; -import org.apache.lucene.search.similarities.BasicModelIn; -import org.apache.lucene.search.similarities.BasicModelIne; -import org.apache.lucene.search.similarities.BasicModelP; -import org.apache.lucene.search.similarities.DFRSimilarity; -import org.apache.lucene.search.similarities.Normalization; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -import java.util.HashMap; -import java.util.Map; - -import static java.util.Collections.unmodifiableMap; - -/** - * {@link SimilarityProvider} for {@link DFRSimilarity}. - *

    - * Configuration options available: - *

      - *
    • basic_model
    • - *
    • after_effect
    • - *
    • normalization
    • - *
    - * @see DFRSimilarity For more information about configuration - */ -public class DFRSimilarityProvider extends AbstractSimilarityProvider { - private static final Map BASIC_MODELS; - private static final Map AFTER_EFFECTS; - - static { - Map models = new HashMap<>(); - models.put("be", new BasicModelBE()); - models.put("d", new BasicModelD()); - models.put("g", new BasicModelG()); - models.put("if", new BasicModelIF()); - models.put("in", new BasicModelIn()); - models.put("ine", new BasicModelIne()); - models.put("p", new BasicModelP()); - BASIC_MODELS = unmodifiableMap(models); - - Map effects = new HashMap<>(); - effects.put("no", new AfterEffect.NoAfterEffect()); - effects.put("b", new AfterEffectB()); - effects.put("l", new AfterEffectL()); - AFTER_EFFECTS = unmodifiableMap(effects); - } - - private final DFRSimilarity similarity; - - public DFRSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - BasicModel basicModel = parseBasicModel(settings); - AfterEffect afterEffect = parseAfterEffect(settings); - Normalization normalization = parseNormalization(settings); - this.similarity = new DFRSimilarity(basicModel, afterEffect, normalization); - } - - /** - * Parses the given Settings and creates the appropriate {@link BasicModel} - * - * @param settings Settings to parse - * @return {@link BasicModel} referred to in the Settings - */ - protected BasicModel parseBasicModel(Settings settings) { - String basicModel = settings.get("basic_model"); - BasicModel model = BASIC_MODELS.get(basicModel); - if (model == null) { - throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "]"); - } - return model; - } - - /** - * Parses the given Settings and creates the appropriate {@link AfterEffect} - * - * @param settings Settings to parse - * @return {@link AfterEffect} referred to in the Settings - */ - protected AfterEffect parseAfterEffect(Settings settings) { - String afterEffect = settings.get("after_effect"); - AfterEffect effect = AFTER_EFFECTS.get(afterEffect); - if (effect == null) { - throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "]"); - } - return effect; - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/IBSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/IBSimilarityProvider.java deleted file mode 100644 index a43276bbfaa82..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/IBSimilarityProvider.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.Distribution; -import org.apache.lucene.search.similarities.DistributionLL; -import org.apache.lucene.search.similarities.DistributionSPL; -import org.apache.lucene.search.similarities.IBSimilarity; -import org.apache.lucene.search.similarities.Lambda; -import org.apache.lucene.search.similarities.LambdaDF; -import org.apache.lucene.search.similarities.LambdaTTF; -import org.apache.lucene.search.similarities.Normalization; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -import java.util.HashMap; -import java.util.Map; - -import static java.util.Collections.unmodifiableMap; - -/** - * {@link SimilarityProvider} for {@link IBSimilarity}. - *

    - * Configuration options available: - *

      - *
    • distribution
    • - *
    • lambda
    • - *
    • normalization
    • - *
    - * @see IBSimilarity For more information about configuration - */ -public class IBSimilarityProvider extends AbstractSimilarityProvider { - - private static final Map DISTRIBUTIONS; - private static final Map LAMBDAS; - - static { - Map distributions = new HashMap<>(); - distributions.put("ll", new DistributionLL()); - distributions.put("spl", new DistributionSPL()); - DISTRIBUTIONS = unmodifiableMap(distributions); - - Map lamdas = new HashMap<>(); - lamdas.put("df", new LambdaDF()); - lamdas.put("ttf", new LambdaTTF()); - LAMBDAS = unmodifiableMap(lamdas); - } - - private final IBSimilarity similarity; - - public IBSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - Distribution distribution = parseDistribution(settings); - Lambda lambda = parseLambda(settings); - Normalization normalization = parseNormalization(settings); - this.similarity = new IBSimilarity(distribution, lambda, normalization); - } - - /** - * Parses the given Settings and creates the appropriate {@link Distribution} - * - * @param settings Settings to parse - * @return {@link Normalization} referred to in the Settings - */ - protected Distribution parseDistribution(Settings settings) { - String rawDistribution = settings.get("distribution"); - Distribution distribution = DISTRIBUTIONS.get(rawDistribution); - if (distribution == null) { - throw new IllegalArgumentException("Unsupported Distribution [" + rawDistribution + "]"); - } - return distribution; - } - - /** - * Parses the given Settings and creates the appropriate {@link Lambda} - * - * @param settings Settings to parse - * @return {@link Normalization} referred to in the Settings - */ - protected Lambda parseLambda(Settings settings) { - String rawLambda = settings.get("lambda"); - Lambda lambda = LAMBDAS.get(rawLambda); - if (lambda == null) { - throw new IllegalArgumentException("Unsupported Lambda [" + rawLambda + "]"); - } - return lambda; - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/LMDirichletSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/LMDirichletSimilarityProvider.java deleted file mode 100644 index 170a7e42133c9..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/LMDirichletSimilarityProvider.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.LMDirichletSimilarity; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for {@link LMDirichletSimilarity}. - *

    - * Configuration options available: - *

      - *
    • mu
    • - *
    - * @see LMDirichletSimilarity For more information about configuration - */ -public class LMDirichletSimilarityProvider extends AbstractSimilarityProvider { - - private final LMDirichletSimilarity similarity; - - public LMDirichletSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - float mu = settings.getAsFloat("mu", 2000f); - this.similarity = new LMDirichletSimilarity(mu); - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/LMJelinekMercerSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/LMJelinekMercerSimilarityProvider.java deleted file mode 100644 index 2ee04b78ec2ef..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/similarity/LMJelinekMercerSimilarityProvider.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.similarity; - -import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; -import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; - -/** - * {@link SimilarityProvider} for {@link LMJelinekMercerSimilarity}. - *

    - * Configuration options available: - *

      - *
    • lambda
    • - *
    - * @see LMJelinekMercerSimilarity For more information about configuration - */ -public class LMJelinekMercerSimilarityProvider extends AbstractSimilarityProvider { - - private final LMJelinekMercerSimilarity similarity; - - public LMJelinekMercerSimilarityProvider(String name, Settings settings, Settings indexSettings) { - super(name); - float lambda = settings.getAsFloat("lambda", 0.1f); - this.similarity = new LMJelinekMercerSimilarity(lambda); - } - - /** - * {@inheritDoc} - */ - @Override - public Similarity get() { - return similarity; - } -} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarityProvider.java index e290fd3457aeb..190f861f26216 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarityProvider.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/ScriptedSimilarityProvider.java @@ -20,6 +20,8 @@ package org.elasticsearch.index.similarity; import org.apache.lucene.search.similarities.Similarity; +import org.elasticsearch.Version; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptService; @@ -27,13 +29,11 @@ import org.elasticsearch.script.SimilarityWeightScript; /** Provider of scripted similarities. */ -public class ScriptedSimilarityProvider extends AbstractSimilarityProvider { +final class ScriptedSimilarityProvider implements TriFunction { - private final ScriptedSimilarity scriptedSimilarity; - - public ScriptedSimilarityProvider(String name, Settings settings, Settings indexSettings, ScriptService scriptService) { - super(name); - boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true); + @Override + public Similarity apply(Settings settings, Version indexCreatedVersion, ScriptService scriptService) { + boolean discountOverlaps = settings.getAsBoolean(SimilarityProviders.DISCOUNT_OVERLAPS, true); Settings scriptSettings = settings.getAsSettings("script"); Script script = Script.parse(scriptSettings); SimilarityScript.Factory scriptFactory = scriptService.compile(script, SimilarityScript.CONTEXT); @@ -44,15 +44,10 @@ public ScriptedSimilarityProvider(String name, Settings settings, Settings index weightScript = Script.parse(weightScriptSettings); weightScriptFactory = scriptService.compile(weightScript, SimilarityWeightScript.CONTEXT); } - scriptedSimilarity = new ScriptedSimilarity( + return new ScriptedSimilarity( weightScript == null ? null : weightScript.toString(), weightScriptFactory == null ? null : weightScriptFactory::newInstance, script.toString(), scriptFactory::newInstance, discountOverlaps); } - @Override - public Similarity get() { - return scriptedSimilarity; - } - } diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java index 666e70c406937..fed15b3058360 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java @@ -20,32 +20,32 @@ package org.elasticsearch.index.similarity; import org.apache.lucene.search.similarities.Similarity; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.script.ScriptService; /** - * Provider for {@link Similarity} instances + * Wrapper around a {@link Similarity} and its name. */ -public interface SimilarityProvider { +public final class SimilarityProvider { + + private final String name; + private final Similarity similarity; + + public SimilarityProvider(String name, Similarity similarity) { + this.name = name; + this.similarity = similarity; + } /** - * Returns the name associated with the Provider - * - * @return Name of the Provider + * Return the name of this {@link Similarity}. */ - String name(); + public String name() { + return name; + } /** - * Returns the {@link Similarity} the Provider is for - * - * @return Provided {@link Similarity} + * Return the wrapped {@link Similarity}. */ - Similarity get(); - - /** Factory of {@link SimilarityProvider} */ - @FunctionalInterface - interface Factory { - /** Create a new {@link SimilarityProvider}. */ - SimilarityProvider create(String name, Settings settings, Settings indexSettings, ScriptService scriptService); + public Similarity get() { + return similarity; } + } diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java new file mode 100644 index 0000000000000..18c6d6a3fc063 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityProviders.java @@ -0,0 +1,300 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.similarity; + +import org.apache.lucene.search.similarities.AfterEffect; +import org.apache.lucene.search.similarities.AfterEffectB; +import org.apache.lucene.search.similarities.AfterEffectL; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.BasicModel; +import org.apache.lucene.search.similarities.BasicModelBE; +import org.apache.lucene.search.similarities.BasicModelD; +import org.apache.lucene.search.similarities.BasicModelG; +import org.apache.lucene.search.similarities.BasicModelIF; +import org.apache.lucene.search.similarities.BasicModelIn; +import org.apache.lucene.search.similarities.BasicModelIne; +import org.apache.lucene.search.similarities.BasicModelP; +import org.apache.lucene.search.similarities.BooleanSimilarity; +import org.apache.lucene.search.similarities.ClassicSimilarity; +import org.apache.lucene.search.similarities.DFISimilarity; +import org.apache.lucene.search.similarities.DFRSimilarity; +import org.apache.lucene.search.similarities.Distribution; +import org.apache.lucene.search.similarities.DistributionLL; +import org.apache.lucene.search.similarities.DistributionSPL; +import org.apache.lucene.search.similarities.IBSimilarity; +import org.apache.lucene.search.similarities.Independence; +import org.apache.lucene.search.similarities.IndependenceChiSquared; +import org.apache.lucene.search.similarities.IndependenceSaturated; +import org.apache.lucene.search.similarities.IndependenceStandardized; +import org.apache.lucene.search.similarities.LMDirichletSimilarity; +import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; +import org.apache.lucene.search.similarities.Lambda; +import org.apache.lucene.search.similarities.LambdaDF; +import org.apache.lucene.search.similarities.LambdaTTF; +import org.apache.lucene.search.similarities.Normalization; +import org.apache.lucene.search.similarities.NormalizationH1; +import org.apache.lucene.search.similarities.NormalizationH2; +import org.apache.lucene.search.similarities.NormalizationH3; +import org.apache.lucene.search.similarities.NormalizationZ; +import org.elasticsearch.Version; +import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.common.settings.Settings; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import static java.util.Collections.unmodifiableMap; + +final class SimilarityProviders { + + private SimilarityProviders() {} // no instantiation + + private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(SimilarityProviders.class)); + static final String DISCOUNT_OVERLAPS = "discount_overlaps"; + + private static final Map BASIC_MODELS; + private static final Map AFTER_EFFECTS; + + static { + Map models = new HashMap<>(); + models.put("be", new BasicModelBE()); + models.put("d", new BasicModelD()); + models.put("g", new BasicModelG()); + models.put("if", new BasicModelIF()); + models.put("in", new BasicModelIn()); + models.put("ine", new BasicModelIne()); + models.put("p", new BasicModelP()); + BASIC_MODELS = unmodifiableMap(models); + + Map effects = new HashMap<>(); + effects.put("no", new AfterEffect.NoAfterEffect()); + effects.put("b", new AfterEffectB()); + effects.put("l", new AfterEffectL()); + AFTER_EFFECTS = unmodifiableMap(effects); + } + + private static final Map INDEPENDENCE_MEASURES; + static { + Map measures = new HashMap<>(); + measures.put("standardized", new IndependenceStandardized()); + measures.put("saturated", new IndependenceSaturated()); + measures.put("chisquared", new IndependenceChiSquared()); + INDEPENDENCE_MEASURES = unmodifiableMap(measures); + } + + private static final Map DISTRIBUTIONS; + private static final Map LAMBDAS; + + static { + Map distributions = new HashMap<>(); + distributions.put("ll", new DistributionLL()); + distributions.put("spl", new DistributionSPL()); + DISTRIBUTIONS = unmodifiableMap(distributions); + + Map lamdas = new HashMap<>(); + lamdas.put("df", new LambdaDF()); + lamdas.put("ttf", new LambdaTTF()); + LAMBDAS = unmodifiableMap(lamdas); + } + + /** + * Parses the given Settings and creates the appropriate {@link BasicModel} + * + * @param settings Settings to parse + * @return {@link BasicModel} referred to in the Settings + */ + private static BasicModel parseBasicModel(Settings settings) { + String basicModel = settings.get("basic_model"); + BasicModel model = BASIC_MODELS.get(basicModel); + if (model == null) { + throw new IllegalArgumentException("Unsupported BasicModel [" + basicModel + "], expected one of " + BASIC_MODELS.keySet()); + } + return model; + } + + /** + * Parses the given Settings and creates the appropriate {@link AfterEffect} + * + * @param settings Settings to parse + * @return {@link AfterEffect} referred to in the Settings + */ + private static AfterEffect parseAfterEffect(Settings settings) { + String afterEffect = settings.get("after_effect"); + AfterEffect effect = AFTER_EFFECTS.get(afterEffect); + if (effect == null) { + throw new IllegalArgumentException("Unsupported AfterEffect [" + afterEffect + "], expected one of " + AFTER_EFFECTS.keySet()); + } + return effect; + } + + /** + * Parses the given Settings and creates the appropriate {@link Normalization} + * + * @param settings Settings to parse + * @return {@link Normalization} referred to in the Settings + */ + private static Normalization parseNormalization(Settings settings) { + String normalization = settings.get("normalization"); + + if ("no".equals(normalization)) { + return new Normalization.NoNormalization(); + } else if ("h1".equals(normalization)) { + float c = settings.getAsFloat("normalization.h1.c", 1f); + return new NormalizationH1(c); + } else if ("h2".equals(normalization)) { + float c = settings.getAsFloat("normalization.h2.c", 1f); + return new NormalizationH2(c); + } else if ("h3".equals(normalization)) { + float c = settings.getAsFloat("normalization.h3.c", 800f); + return new NormalizationH3(c); + } else if ("z".equals(normalization)) { + float z = settings.getAsFloat("normalization.z.z", 0.30f); + return new NormalizationZ(z); + } else { + throw new IllegalArgumentException("Unsupported Normalization [" + normalization + "]"); + } + } + + private static Independence parseIndependence(Settings settings) { + String name = settings.get("independence_measure"); + Independence measure = INDEPENDENCE_MEASURES.get(name); + if (measure == null) { + throw new IllegalArgumentException("Unsupported IndependenceMeasure [" + name + "], expected one of " + + INDEPENDENCE_MEASURES.keySet()); + } + return measure; + } + + /** + * Parses the given Settings and creates the appropriate {@link Distribution} + * + * @param settings Settings to parse + * @return {@link Normalization} referred to in the Settings + */ + private static Distribution parseDistribution(Settings settings) { + String rawDistribution = settings.get("distribution"); + Distribution distribution = DISTRIBUTIONS.get(rawDistribution); + if (distribution == null) { + throw new IllegalArgumentException("Unsupported Distribution [" + rawDistribution + "]"); + } + return distribution; + } + + /** + * Parses the given Settings and creates the appropriate {@link Lambda} + * + * @param settings Settings to parse + * @return {@link Normalization} referred to in the Settings + */ + private static Lambda parseLambda(Settings settings) { + String rawLambda = settings.get("lambda"); + Lambda lambda = LAMBDAS.get(rawLambda); + if (lambda == null) { + throw new IllegalArgumentException("Unsupported Lambda [" + rawLambda + "]"); + } + return lambda; + } + + static void assertSettingsIsSubsetOf(String type, Version version, Settings settings, String... supportedSettings) { + Set unknownSettings = new HashSet<>(settings.keySet()); + unknownSettings.removeAll(Arrays.asList(supportedSettings)); + unknownSettings.remove("type"); // used to figure out which sim this is + if (unknownSettings.isEmpty() == false) { + if (version.onOrAfter(Version.V_7_0_0_alpha1)) { + throw new IllegalArgumentException("Unknown settings for similarity of type [" + type + "]: " + unknownSettings); + } else { + DEPRECATION_LOGGER.deprecated("Unknown settings for similarity of type [" + type + "]: " + unknownSettings); + } + } + } + + public static BM25Similarity createBM25Similarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("BM25", indexCreatedVersion, settings, "k1", "b", DISCOUNT_OVERLAPS); + + float k1 = settings.getAsFloat("k1", 1.2f); + float b = settings.getAsFloat("b", 0.75f); + boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); + + BM25Similarity similarity = new BM25Similarity(k1, b); + similarity.setDiscountOverlaps(discountOverlaps); + return similarity; + } + + public static BooleanSimilarity createBooleanSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("boolean", indexCreatedVersion, settings); + return new BooleanSimilarity(); + } + + public static ClassicSimilarity createClassicSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("classic", indexCreatedVersion, settings, DISCOUNT_OVERLAPS); + + boolean discountOverlaps = settings.getAsBoolean(DISCOUNT_OVERLAPS, true); + + ClassicSimilarity similarity = new ClassicSimilarity(); + similarity.setDiscountOverlaps(discountOverlaps); + return similarity; + } + + public static DFRSimilarity createDfrSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("DFR", indexCreatedVersion, settings, + "basic_model", "after_effect", "normalization", + "normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z"); + + + return new DFRSimilarity( + parseBasicModel(settings), + parseAfterEffect(settings), + parseNormalization(settings)); + } + + public static DFISimilarity createDfiSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("DFI", indexCreatedVersion, settings, "independence_measure"); + + return new DFISimilarity(parseIndependence(settings)); + } + + public static IBSimilarity createIBSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("IB", indexCreatedVersion, settings, "distribution", "lambda", "normalization", + "normalization.h1.c", "normalization.h2.c", "normalization.h3.c", "normalization.z.z"); + + return new IBSimilarity( + parseDistribution(settings), + parseLambda(settings), + parseNormalization(settings)); + } + + public static LMDirichletSimilarity createLMDirichletSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("LMDirichlet", indexCreatedVersion, settings, "mu"); + + float mu = settings.getAsFloat("mu", 2000f); + return new LMDirichletSimilarity(mu); + } + + public static LMJelinekMercerSimilarity createLMJelinekMercerSimilarity(Settings settings, Version indexCreatedVersion) { + assertSettingsIsSubsetOf("LMJelinekMercer", indexCreatedVersion, settings, "lambda"); + + float lambda = settings.getAsFloat("lambda", 0.1f); + return new LMJelinekMercerSimilarity(lambda); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index 16afb55599d49..eaed2169f11c0 100644 --- a/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/server/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -19,8 +19,13 @@ package org.elasticsearch.index.similarity; +import org.apache.lucene.search.similarities.BM25Similarity; +import org.apache.lucene.search.similarities.BooleanSimilarity; +import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.Similarity; +import org.elasticsearch.Version; +import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; @@ -34,45 +39,84 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.function.Function; +import java.util.function.Supplier; public final class SimilarityService extends AbstractIndexComponent { private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(SimilarityService.class)); public static final String DEFAULT_SIMILARITY = "BM25"; - private final Similarity defaultSimilarity; - private final Map similarities; - private static final Map DEFAULTS; - public static final Map BUILT_IN; + private static final String CLASSIC_SIMILARITY = "classic"; + private static final Map>> DEFAULTS; + public static final Map> BUILT_IN; static { - Map defaults = new HashMap<>(); - defaults.put("classic", - (name, settings, indexSettings, scriptService) -> new ClassicSimilarityProvider(name, settings, indexSettings)); - defaults.put("BM25", - (name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings)); - defaults.put("boolean", - (name, settings, indexSettings, scriptService) -> new BooleanSimilarityProvider(name, settings, indexSettings)); - - Map builtIn = new HashMap<>(defaults); + Map>> defaults = new HashMap<>(); + defaults.put(CLASSIC_SIMILARITY, version -> { + if (version.onOrAfter(Version.V_7_0_0_alpha1)) { + return () -> { + throw new IllegalArgumentException("The [classic] similarity may not be used anymore. Please use the [BM25] " + + "similarity or build a custom [scripted] similarity instead."); + }; + } else { + final ClassicSimilarity similarity = SimilarityProviders.createClassicSimilarity(Settings.EMPTY, version); + return () -> { + DEPRECATION_LOGGER.deprecated("The [classic] similarity is now deprecated in favour of BM25, which is generally " + + "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity " + + "instead."); + return similarity; + }; + } + }); + defaults.put("BM25", version -> { + final BM25Similarity similarity = SimilarityProviders.createBM25Similarity(Settings.EMPTY, version); + return () -> similarity; + }); + defaults.put("boolean", version -> { + final Similarity similarity = new BooleanSimilarity(); + return () -> similarity; + }); + + Map> builtIn = new HashMap<>(); + builtIn.put(CLASSIC_SIMILARITY, + (settings, version, script) -> { + if (version.onOrAfter(Version.V_7_0_0_alpha1)) { + throw new IllegalArgumentException("The [classic] similarity may not be used anymore. Please use the [BM25] " + + "similarity or build a custom [scripted] similarity instead."); + } else { + DEPRECATION_LOGGER.deprecated("The [classic] similarity is now deprecated in favour of BM25, which is generally " + + "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity " + + "instead."); + return SimilarityProviders.createClassicSimilarity(settings, version); + } + }); + builtIn.put("BM25", + (settings, version, scriptService) -> SimilarityProviders.createBM25Similarity(settings, version)); + builtIn.put("boolean", + (settings, version, scriptService) -> SimilarityProviders.createBooleanSimilarity(settings, version)); builtIn.put("DFR", - (name, settings, indexSettings, scriptService) -> new DFRSimilarityProvider(name, settings, indexSettings)); + (settings, version, scriptService) -> SimilarityProviders.createDfrSimilarity(settings, version)); builtIn.put("IB", - (name, settings, indexSettings, scriptService) -> new IBSimilarityProvider(name, settings, indexSettings)); + (settings, version, scriptService) -> SimilarityProviders.createIBSimilarity(settings, version)); builtIn.put("LMDirichlet", - (name, settings, indexSettings, scriptService) -> new LMDirichletSimilarityProvider(name, settings, indexSettings)); + (settings, version, scriptService) -> SimilarityProviders.createLMDirichletSimilarity(settings, version)); builtIn.put("LMJelinekMercer", - (name, settings, indexSettings, scriptService) -> new LMJelinekMercerSimilarityProvider(name, settings, indexSettings)); + (settings, version, scriptService) -> SimilarityProviders.createLMJelinekMercerSimilarity(settings, version)); builtIn.put("DFI", - (name, settings, indexSettings, scriptService) -> new DFISimilarityProvider(name, settings, indexSettings)); - builtIn.put("scripted", ScriptedSimilarityProvider::new); + (settings, version, scriptService) -> SimilarityProviders.createDfiSimilarity(settings, version)); + builtIn.put("scripted", new ScriptedSimilarityProvider()); DEFAULTS = Collections.unmodifiableMap(defaults); BUILT_IN = Collections.unmodifiableMap(builtIn); } + private final Similarity defaultSimilarity; + private final Map> similarities; + public SimilarityService(IndexSettings indexSettings, ScriptService scriptService, - Map similarities) { + Map> similarities) { super(indexSettings); - Map providers = new HashMap<>(similarities.size()); + Map> providers = new HashMap<>(similarities.size()); Map similaritySettings = this.indexSettings.getSettings().getGroups(IndexModule.SIMILARITY_SETTINGS_PREFIX); + for (Map.Entry entry : similaritySettings.entrySet()) { String name = entry.getKey(); if (BUILT_IN.containsKey(name)) { @@ -85,14 +129,13 @@ public SimilarityService(IndexSettings indexSettings, ScriptService scriptServic } else if ((similarities.containsKey(typeName) || BUILT_IN.containsKey(typeName)) == false) { throw new IllegalArgumentException("Unknown Similarity type [" + typeName + "] for [" + name + "]"); } - SimilarityProvider.Factory defaultFactory = BUILT_IN.get(typeName); - SimilarityProvider.Factory factory = similarities.getOrDefault(typeName, defaultFactory); - providers.put(name, factory.create(name, providerSettings, indexSettings.getSettings(), scriptService)); + TriFunction defaultFactory = BUILT_IN.get(typeName); + TriFunction factory = similarities.getOrDefault(typeName, defaultFactory); + final Similarity similarity = factory.apply(providerSettings, indexSettings.getIndexVersionCreated(), scriptService); + providers.put(name, () -> similarity); } - Map providerMapping = addSimilarities(similaritySettings, indexSettings.getSettings(), scriptService, - DEFAULTS); - for (Map.Entry entry : providerMapping.entrySet()) { - providers.put(entry.getKey(), entry.getValue()); + for (Map.Entry>> entry : DEFAULTS.entrySet()) { + providers.put(entry.getKey(), entry.getValue().apply(indexSettings.getIndexVersionCreated())); } this.similarities = providers; defaultSimilarity = (providers.get("default") != null) ? providers.get("default").get() @@ -108,25 +151,16 @@ public Similarity similarity(MapperService mapperService) { defaultSimilarity; } - private Map addSimilarities(Map similaritySettings, Settings indexSettings, - ScriptService scriptService, Map similarities) { - Map providers = new HashMap<>(similarities.size()); - for (Map.Entry entry : similarities.entrySet()) { - String name = entry.getKey(); - SimilarityProvider.Factory factory = entry.getValue(); - Settings providerSettings = similaritySettings.get(name); - if (providerSettings == null) { - providerSettings = Settings.Builder.EMPTY_SETTINGS; - } - providers.put(name, factory.create(name, providerSettings, indexSettings, scriptService)); - } - return providers; - } - + public SimilarityProvider getSimilarity(String name) { - return similarities.get(name); + Supplier sim = similarities.get(name); + if (sim == null) { + return null; + } + return new SimilarityProvider(name, sim.get()); } + // for testing Similarity getDefaultSimilarity() { return defaultSimilarity; } diff --git a/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java b/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java index 706421c5ce73a..dde9c1ca3bdb6 100644 --- a/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/elasticsearch/index/IndexModuleTests.java @@ -59,7 +59,6 @@ import org.elasticsearch.index.shard.IndexingOperationListener; import org.elasticsearch.index.shard.SearchOperationListener; import org.elasticsearch.index.shard.ShardId; -import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.index.similarity.SimilarityService; import org.elasticsearch.index.store.IndexStore; import org.elasticsearch.indices.IndicesModule; @@ -287,17 +286,8 @@ public void testAddSimilarity() throws IOException { .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IndexModule module = new IndexModule(IndexSettingsModule.newIndexSettings("foo", indexSettings), emptyAnalysisRegistry); - module.addSimilarity("test_similarity", (string, providerSettings, indexLevelSettings, scriptService) -> new SimilarityProvider() { - @Override - public String name() { - return string; - } - - @Override - public Similarity get() { - return new TestSimilarity(providerSettings.get("key")); - } - }); + module.addSimilarity("test_similarity", + (providerSettings, indexCreatedVersion, scriptService) -> new TestSimilarity(providerSettings.get("key"))); IndexService indexService = newIndexService(module); SimilarityService similarityService = indexService.similarityService(); diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java index ed219c972b614..5d18a595e9687 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityServiceTests.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.similarity; import org.apache.lucene.search.similarities.BM25Similarity; -import org.apache.lucene.search.similarities.ClassicSimilarity; +import org.apache.lucene.search.similarities.BooleanSimilarity; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.test.ESTestCase; @@ -50,10 +50,10 @@ public void testOverrideBuiltInSimilarity() { } public void testOverrideDefaultSimilarity() { - Settings settings = Settings.builder().put("index.similarity.default.type", "classic") + Settings settings = Settings.builder().put("index.similarity.default.type", "boolean") .build(); IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings); SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap()); - assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity); + assertTrue(service.getDefaultSimilarity() instanceof BooleanSimilarity); } } diff --git a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java index 2ab905a2dd526..3de02f6831837 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/SimilarityTests.java @@ -33,6 +33,8 @@ import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity; import org.apache.lucene.search.similarities.LambdaTTF; import org.apache.lucene.search.similarities.NormalizationH2; +import org.elasticsearch.Version; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; @@ -60,7 +62,24 @@ protected Collection> getPlugins() { public void testResolveDefaultSimilarities() { SimilarityService similarityService = createIndex("foo").similarityService(); + assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); + assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); + assertThat(similarityService.getSimilarity("default"), equalTo(null)); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> similarityService.getSimilarity("classic")); + assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] " + + "similarity instead.", e.getMessage()); + } + + public void testResolveDefaultSimilaritiesOn6xIndex() { + Settings indexSettings = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden + .build(); + SimilarityService similarityService = createIndex("foo", indexSettings).similarityService(); assertThat(similarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class)); + assertWarnings("The [classic] similarity is now deprecated in favour of BM25, which is generally " + + "accepted as a better alternative. Use the [BM25] similarity or build a custom [scripted] similarity " + + "instead."); assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class)); assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class)); assertThat(similarityService.getSimilarity("default"), equalTo(null)); @@ -76,15 +95,27 @@ public void testResolveSimilaritiesFromMapping_classic() throws IOException { Settings indexSettings = Settings.builder() .put("index.similarity.my_similarity.type", "classic") .put("index.similarity.my_similarity.discount_overlaps", false) + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_6_3_0) // otherwise classic is forbidden .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(ClassicSimilarity.class)); ClassicSimilarity similarity = (ClassicSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getDiscountOverlaps(), equalTo(false)); } + public void testResolveSimilaritiesFromMapping_classicIsForbidden() throws IOException { + Settings indexSettings = Settings.builder() + .put("index.similarity.my_similarity.type", "classic") + .put("index.similarity.my_similarity.discount_overlaps", false) + .build(); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> createIndex("foo", indexSettings)); + assertEquals("The [classic] similarity may not be used anymore. Please use the [BM25] similarity or build a custom [scripted] " + + "similarity instead.", e.getMessage()); + } + public void testResolveSimilaritiesFromMapping_bm25() throws IOException { String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("properties") @@ -100,7 +131,7 @@ public void testResolveSimilaritiesFromMapping_bm25() throws IOException { .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(BM25SimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(BM25Similarity.class)); BM25Similarity similarity = (BM25Similarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getK1(), equalTo(2.0f)); @@ -119,8 +150,8 @@ public void testResolveSimilaritiesFromMapping_boolean() throws IOException { DocumentMapper documentMapper = indexService.mapperService() .documentMapperParser() .parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), - instanceOf(BooleanSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), + instanceOf(BooleanSimilarity.class)); } public void testResolveSimilaritiesFromMapping_DFR() throws IOException { @@ -139,7 +170,7 @@ public void testResolveSimilaritiesFromMapping_DFR() throws IOException { .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(DFRSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(DFRSimilarity.class)); DFRSimilarity similarity = (DFRSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getBasicModel(), instanceOf(BasicModelG.class)); @@ -164,7 +195,7 @@ public void testResolveSimilaritiesFromMapping_IB() throws IOException { .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(IBSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(IBSimilarity.class)); IBSimilarity similarity = (IBSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getDistribution(), instanceOf(DistributionSPL.class)); @@ -187,7 +218,7 @@ public void testResolveSimilaritiesFromMapping_DFI() throws IOException { IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); MappedFieldType fieldType = documentMapper.mappers().getMapper("field1").fieldType(); - assertThat(fieldType.similarity(), instanceOf(DFISimilarityProvider.class)); + assertThat(fieldType.similarity().get(), instanceOf(DFISimilarity.class)); DFISimilarity similarity = (DFISimilarity) fieldType.similarity().get(); assertThat(similarity.getIndependence(), instanceOf(IndependenceChiSquared.class)); } @@ -205,7 +236,7 @@ public void testResolveSimilaritiesFromMapping_LMDirichlet() throws IOException .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMDirichletSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(LMDirichletSimilarity.class)); LMDirichletSimilarity similarity = (LMDirichletSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getMu(), equalTo(3000f)); @@ -224,7 +255,7 @@ public void testResolveSimilaritiesFromMapping_LMJelinekMercer() throws IOExcept .build(); IndexService indexService = createIndex("foo", indexSettings); DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping)); - assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(LMJelinekMercerSimilarityProvider.class)); + assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity().get(), instanceOf(LMJelinekMercerSimilarity.class)); LMJelinekMercerSimilarity similarity = (LMJelinekMercerSimilarity) documentMapper.mappers().getMapper("field1").fieldType().similarity().get(); assertThat(similarity.getLambda(), equalTo(0.7f)); @@ -245,4 +276,14 @@ public void testResolveSimilaritiesFromMapping_Unknown() throws IOException { assertThat(e.getMessage(), equalTo("Unknown Similarity type [unknown_similarity] for field [field1]")); } } + + public void testUnknownParameters() throws IOException { + Settings indexSettings = Settings.builder() + .put("index.similarity.my_similarity.type", "BM25") + .put("index.similarity.my_similarity.z", 2.0f) + .build(); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, + () -> createIndex("foo", indexSettings)); + assertEquals("Unknown settings for similarity of type [BM25]: [z]", e.getMessage()); + } } diff --git a/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java b/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java index 7cef608850e11..46d7311a90e23 100644 --- a/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java +++ b/server/src/test/java/org/elasticsearch/indices/IndicesServiceTests.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.indices; +import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.store.AlreadyClosedException; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.stats.CommonStatsFlags; @@ -49,7 +50,6 @@ import org.elasticsearch.index.shard.IndexShardState; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardPath; -import org.elasticsearch.index.similarity.BM25SimilarityProvider; import org.elasticsearch.indices.IndicesService.ShardDeletionCheckResult; import org.elasticsearch.plugins.MapperPlugin; import org.elasticsearch.plugins.Plugin; @@ -106,7 +106,7 @@ public Map getMappers() { public void onIndexModule(IndexModule indexModule) { super.onIndexModule(indexModule); indexModule.addSimilarity("fake-similarity", - (name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings)); + (settings, indexCreatedVersion, scriptService) -> new BM25Similarity()); } } @@ -375,8 +375,8 @@ public void testStandAloneMapperServiceWithPlugins() throws IOException { .build(); MapperService mapperService = indicesService.createIndexMapperService(indexMetaData); assertNotNull(mapperService.documentMapperParser().parserContext("type").typeParser("fake-mapper")); - assertThat(mapperService.documentMapperParser().parserContext("type").getSimilarity("test"), - instanceOf(BM25SimilarityProvider.class)); + assertThat(mapperService.documentMapperParser().parserContext("type").getSimilarity("test").get(), + instanceOf(BM25Similarity.class)); } public void testStatsByShardDoesNotDieFromExpectedExceptions() { diff --git a/server/src/test/java/org/elasticsearch/similarity/SimilarityIT.java b/server/src/test/java/org/elasticsearch/similarity/SimilarityIT.java index c925e46cfa048..35e5b7071872b 100644 --- a/server/src/test/java/org/elasticsearch/similarity/SimilarityIT.java +++ b/server/src/test/java/org/elasticsearch/similarity/SimilarityIT.java @@ -46,7 +46,7 @@ public void testCustomBM25Similarity() throws Exception { .field("type", "text") .endObject() .startObject("field2") - .field("similarity", "classic") + .field("similarity", "boolean") .field("type", "text") .endObject() .endObject() @@ -68,9 +68,9 @@ public void testCustomBM25Similarity() throws Exception { assertThat(bm25SearchResponse.getHits().getTotalHits(), equalTo(1L)); float bm25Score = bm25SearchResponse.getHits().getHits()[0].getScore(); - SearchResponse defaultSearchResponse = client().prepareSearch().setQuery(matchQuery("field2", "quick brown fox")).execute().actionGet(); - assertThat(defaultSearchResponse.getHits().getTotalHits(), equalTo(1L)); - float defaultScore = defaultSearchResponse.getHits().getHits()[0].getScore(); + SearchResponse booleanSearchResponse = client().prepareSearch().setQuery(matchQuery("field2", "quick brown fox")).execute().actionGet(); + assertThat(booleanSearchResponse.getHits().getTotalHits(), equalTo(1L)); + float defaultScore = booleanSearchResponse.getHits().getHits()[0].getScore(); assertThat(bm25Score, not(equalTo(defaultScore))); } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java index 818594d3bf7fd..28767cb34d73b 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/FieldTypeTestCase.java @@ -20,13 +20,14 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.search.Query; +import org.apache.lucene.search.similarities.BM25Similarity; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.analysis.AnalyzerScope; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.index.similarity.BM25SimilarityProvider; +import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.test.ESTestCase; import java.util.ArrayList; @@ -123,17 +124,17 @@ public void normalizeOther(MappedFieldType other) { new Modifier("similarity", false) { @Override public void modify(MappedFieldType ft) { - ft.setSimilarity(new BM25SimilarityProvider("foo", Settings.EMPTY, INDEX_SETTINGS)); + ft.setSimilarity(new SimilarityProvider("foo", new BM25Similarity())); } }, new Modifier("similarity", false) { @Override public void modify(MappedFieldType ft) { - ft.setSimilarity(new BM25SimilarityProvider("foo", Settings.EMPTY, INDEX_SETTINGS)); + ft.setSimilarity(new SimilarityProvider("foo", new BM25Similarity())); } @Override public void normalizeOther(MappedFieldType other) { - other.setSimilarity(new BM25SimilarityProvider("bar", Settings.EMPTY, INDEX_SETTINGS)); + other.setSimilarity(new SimilarityProvider("bar", new BM25Similarity())); } }, new Modifier("eager_global_ordinals", true) {