From 5402c0e4fda99f660cc4b25c432e20af6c99f2b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Sun, 8 Jul 2018 19:47:16 +0200 Subject: [PATCH 1/4] Add Expected Reciprocal Rank metric This change adds Expected Reciprocal Rank (ERR) as a ranking evaluation metric as descriped in: Chapelle, O., Metlzer, D., Zhang, Y., & Grinspan, P. (2009). Expected reciprocal rank for graded relevance. Proceeding of the 18th ACM Conference on Information and Knowledge Management. https://doi.org/10.1145/1645953.1646033 ERR is an extension of the classical reciprocal rank to the graded relevance case and assumes a cascade browsing model. It quantifies the usefulness of a document at rank `i` conditioned on the degree of relevance of the items at ranks less than `i`. ERR seems to be gain traction as an alternative to (n)DCG, so it seems like a good metric to support. Also ERR seems to be the default optimization metric used for training in RankLib, a widely used learning to rank library. Relates to #29653 --- .../xcontent/ConstructingObjectParser.java | 1 + .../rankeval/DiscountedCumulativeGain.java | 6 +- .../rankeval/ExpectedReciprocalRank.java | 292 ++++++++++++++++++ .../DiscountedCumulativeGainTests.java | 12 +- .../rankeval/ExpectedReciprocalRankTests.java | 230 ++++++++++++++ 5 files changed, 532 insertions(+), 9 deletions(-) create mode 100644 modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java create mode 100644 modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java diff --git a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/ConstructingObjectParser.java b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/ConstructingObjectParser.java index d61bd8a5dbbdb..e880781fad781 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/ConstructingObjectParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/common/xcontent/ConstructingObjectParser.java @@ -294,6 +294,7 @@ public void declareNamedObjects(BiConsumer> consumer, NamedOb } } + @Override public String getName() { return objectParser.getName(); } diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGain.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGain.java index 01a6e35299b29..cab3237732301 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGain.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGain.java @@ -326,9 +326,9 @@ public boolean equals(Object obj) { return false; } DiscountedCumulativeGain.Detail other = (DiscountedCumulativeGain.Detail) obj; - return (this.dcg == other.dcg && - this.idcg == other.idcg && - this.unratedDocs == other.unratedDocs); + return Double.compare(this.dcg, other.dcg) == 0 && + Double.compare(this.idcg, other.idcg) == 0 && + this.unratedDocs == other.unratedDocs; } @Override diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java new file mode 100644 index 0000000000000..0b966add48937 --- /dev/null +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java @@ -0,0 +1,292 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.search.SearchHit; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Optional; + +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; +import static org.elasticsearch.index.rankeval.EvaluationMetric.joinHitsWithRatings; + +/** + * Implemention of the Expected Reciprocal Rank metric described in:

+ * + * Chapelle, O., Metlzer, D., Zhang, Y., & Grinspan, P. (2009).
+ * Expected reciprocal rank for graded relevance.
+ * Proceeding of the 18th ACM Conference on Information and Knowledge Management - CIKM ’09, 621.
+ * https://doi.org/10.1145/1645953.1646033 + */ +public class ExpectedReciprocalRank implements EvaluationMetric { + + /** the default search window size */ + private static final int DEFAULT_K = 10; + + /** the search window size */ + private final int k; + + /** + * Optional. If set, this will be the rating for docs that are unrated in the ranking evaluation request + */ + private final Integer unknownDocRating; + + private final int maxRelevance; + + private final double two_pow_maxRelevance; + + public static final String NAME = "err"; + + public ExpectedReciprocalRank(int maxRelevance) { + this(maxRelevance, null, DEFAULT_K); + } + + /** + * @param maxRelevance the maximal relevance judgment in the evaluation dataset + * @param unknownDocRating + * the rating for documents the user hasn't supplied an explicit + * rating for. Can be null, in which case document will be skipped. + * @param k the search window size all request use. + */ + public ExpectedReciprocalRank(int maxRelevance, @Nullable Integer unknownDocRating, int k) { + this.maxRelevance = maxRelevance; + this.unknownDocRating = unknownDocRating; + this.k = k; + // we can pre-calculate the constant used in metric calculation + this.two_pow_maxRelevance = Math.pow(2, this.maxRelevance); + } + + ExpectedReciprocalRank(StreamInput in) throws IOException { + this.maxRelevance = in.readVInt(); + this.unknownDocRating = in.readOptionalVInt(); + this.k = in.readVInt(); + this.two_pow_maxRelevance = Math.pow(2, this.maxRelevance); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(maxRelevance); + out.writeOptionalVInt(unknownDocRating); + out.writeVInt(k); + } + + @Override + public String getWriteableName() { + return NAME; + } + + int getK() { + return this.k; + } + + int getMaxRelevance() { + return this.maxRelevance; + } + + /** + * get the rating used for unrated documents + */ + public Integer getUnknownDocRating() { + return this.unknownDocRating; + } + + + @Override + public Optional forcedSearchSize() { + return Optional.of(k); + } + + @Override + public EvalQueryQuality evaluate(String taskId, SearchHit[] hits, List ratedDocs) { + List ratedHits = joinHitsWithRatings(hits, ratedDocs); + if (ratedHits.size() > this.k) { + ratedHits = ratedHits.subList(0, k); + } + List ratingsInSearchHits = new ArrayList<>(ratedHits.size()); + int unratedResults = 0; + for (RatedSearchHit hit : ratedHits) { + // unknownDocRating might be null, in which case unrated will be ignored in the calculation. + // we still need to add them as a placeholder so the rank of the subsequent ratings is correct + ratingsInSearchHits.add(hit.getRating().orElse(unknownDocRating)); + if (hit.getRating().isPresent() == false) { + unratedResults++; + } + } + + double p = 1; + double err = 0; + int rank = 1; + for (Integer rating : ratingsInSearchHits) { + if (rating != null) { + double probR = probabilityOfRelevance(rating); + err = err + (p * probR / rank); + p = p * (1 - probR); + } + rank++; + } + + EvalQueryQuality evalQueryQuality = new EvalQueryQuality(taskId, err); + evalQueryQuality.addHitsAndRatings(ratedHits); + evalQueryQuality.setMetricDetails(new Detail(unratedResults)); + return evalQueryQuality; + } + + double probabilityOfRelevance(Integer rating) { + return (Math.pow(2, rating) - 1) / this.two_pow_maxRelevance; + } + + private static final ParseField K_FIELD = new ParseField("k"); + private static final ParseField UNKNOWN_DOC_RATING_FIELD = new ParseField("unknown_doc_rating"); + private static final ParseField MAX_RELEVANCE_FIELD = new ParseField("maximum_relevance"); + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("dcg", false, + args -> { + int maxRelevance = (Integer) args[0]; + Integer optK = (Integer) args[2]; + return new ExpectedReciprocalRank(maxRelevance, (Integer) args[1], + optK == null ? DEFAULT_K : optK); + }); + + + static { + PARSER.declareInt(constructorArg(), MAX_RELEVANCE_FIELD); + PARSER.declareInt(optionalConstructorArg(), UNKNOWN_DOC_RATING_FIELD); + PARSER.declareInt(optionalConstructorArg(), K_FIELD); + } + + public static ExpectedReciprocalRank fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startObject(NAME); + builder.field(MAX_RELEVANCE_FIELD.getPreferredName(), this.maxRelevance); + if (unknownDocRating != null) { + builder.field(UNKNOWN_DOC_RATING_FIELD.getPreferredName(), this.unknownDocRating); + } + builder.field(K_FIELD.getPreferredName(), this.k); + builder.endObject(); + builder.endObject(); + return builder; + } + + @Override + public final boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + ExpectedReciprocalRank other = (ExpectedReciprocalRank) obj; + return this.k == other.k && + this.maxRelevance == other.maxRelevance + && Objects.equals(unknownDocRating, other.unknownDocRating); + } + + @Override + public final int hashCode() { + return Objects.hash(unknownDocRating, k, maxRelevance); + } + + public static final class Detail implements MetricDetail { + + private static ParseField UNRATED_FIELD = new ParseField("unrated_docs"); + private final int unratedDocs; + + Detail(int unratedDocs) { + this.unratedDocs = unratedDocs; + } + + Detail(StreamInput in) throws IOException { + this.unratedDocs = in.readVInt(); + } + + @Override + public + String getMetricName() { + return NAME; + } + + @Override + public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException { + return builder.field(UNRATED_FIELD.getPreferredName(), this.unratedDocs); + } + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, true, args -> { + return new Detail((Integer) args[0]); + }); + + static { + PARSER.declareInt(constructorArg(), UNRATED_FIELD); + } + + public static Detail fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVInt(this.unratedDocs); + } + + @Override + public String getWriteableName() { + return NAME; + } + + /** + * @return the number of unrated documents in the search results + */ + public Object getUnratedDocs() { + return this.unratedDocs; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + ExpectedReciprocalRank.Detail other = (ExpectedReciprocalRank.Detail) obj; + return this.unratedDocs == other.unratedDocs; + } + + @Override + public int hashCode() { + return Objects.hash(this.unratedDocs); + } + } +} + diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java index 24ac600a11398..56b0c692c411a 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/DiscountedCumulativeGainTests.java @@ -55,7 +55,7 @@ public class DiscountedCumulativeGainTests extends ESTestCase { /** * Assuming the docs are ranked in the following order: * - * rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1) + * rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1) * ------------------------------------------------------------------------------------------- * 1 | 3 | 7.0 | 1.0 | 7.0 | 7.0 |  * 2 | 2 | 3.0 | 1.5849625007211563 | 1.8927892607143721 @@ -82,7 +82,7 @@ public void testDCGAt() { * Check with normalization: to get the maximal possible dcg, sort documents by * relevance in descending order * - * rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1) + * rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1) * --------------------------------------------------------------------------------------- * 1 | 3 | 7.0 | 1.0  | 7.0 * 2 | 3 | 7.0 | 1.5849625007211563 | 4.416508275000202 @@ -101,7 +101,7 @@ public void testDCGAt() { * This tests metric when some documents in the search result don't have a * rating provided by the user. * - * rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1) + * rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1) * ------------------------------------------------------------------------------------------- * 1 | 3 | 7.0 | 1.0 | 7.0 2 |  * 2 | 3.0 | 1.5849625007211563 | 1.8927892607143721 @@ -134,7 +134,7 @@ public void testDCGAtSixMissingRatings() { * Check with normalization: to get the maximal possible dcg, sort documents by * relevance in descending order * - * rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1) + * rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1) * ---------------------------------------------------------------------------------------- * 1 | 3 | 7.0 | 1.0  | 7.0 * 2 | 3 | 7.0 | 1.5849625007211563 | 4.416508275000202 @@ -154,7 +154,7 @@ public void testDCGAtSixMissingRatings() { * documents than search hits because we restrict DCG to be calculated at the * fourth position * - * rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1) + * rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1) * ------------------------------------------------------------------------------------------- * 1 | 3 | 7.0 | 1.0 | 7.0 2 |  * 2 | 3.0 | 1.5849625007211563 | 1.8927892607143721 @@ -191,7 +191,7 @@ public void testDCGAtFourMoreRatings() { * Check with normalization: to get the maximal possible dcg, sort documents by * relevance in descending order * - * rank | rel_rank | 2^(rel_rank) - 1 | log_2(rank + 1) | (2^(rel_rank) - 1) / log_2(rank + 1) + * rank | relevance | 2^(relevance) - 1 | log_2(rank + 1) | (2^(relevance) - 1) / log_2(rank + 1) * --------------------------------------------------------------------------------------- * 1 | 3 | 7.0 | 1.0  | 7.0 * 2 | 3 | 7.0 | 1.5849625007211563 | 4.416508275000202 diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java new file mode 100644 index 0000000000000..a765b32adcacb --- /dev/null +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java @@ -0,0 +1,230 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.rankeval; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.text.Text; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentParseException; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.common.xcontent.json.JsonXContent; +import org.elasticsearch.index.Index; +import org.elasticsearch.search.SearchHit; +import org.elasticsearch.search.SearchShardTarget; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode; +import static org.elasticsearch.test.XContentTestUtils.insertRandomFields; +import static org.hamcrest.CoreMatchers.containsString; + +public class ExpectedReciprocalRankTests extends ESTestCase { + + private static final double DELTA = 10E-14; + + public void testProbabilityOfRelevance() { + ExpectedReciprocalRank err = new ExpectedReciprocalRank(5); + assertEquals(0.0, err.probabilityOfRelevance(0), 0.0); + assertEquals(1d/32d, err.probabilityOfRelevance(1), 0.0); + assertEquals(3d/32d, err.probabilityOfRelevance(2), 0.0); + assertEquals(7d/32d, err.probabilityOfRelevance(3), 0.0); + assertEquals(15d/32d, err.probabilityOfRelevance(4), 0.0); + assertEquals(31d/32d, err.probabilityOfRelevance(5), 0.0); + } + + /** + * Assuming the result ranking is + * + *

{@code
+     * rank | relevance | probR / r | p        | p * probR / r
+     * -------------------------------------------------------
+     * 1    | 3         | 0.875     | 1        | 0.875       |
+     * 2    | 2         | 0.1875    | 0.125    | 0.0234375   |
+     * 3    | 0         | 0         | 0.078125 | 0           |
+     * 4    | 1         | 0.03125   | 0.078125 | 0.00244140625 |
+     * }
+ * + * err => sum of last column + */ + public void testERRAt() { + List rated = new ArrayList<>(); + int[] relevanceRatings = new int[] { 3, 2, 0, 1}; + SearchHit[] hits = new SearchHit[relevanceRatings.length]; + for (int i = 0; i < relevanceRatings.length; i++) { + rated.add(new RatedDocument("index", Integer.toString(i), relevanceRatings[i])); + hits[i] = new SearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap()); + hits[i].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null)); + } + ExpectedReciprocalRank err = new ExpectedReciprocalRank(3, 0, 3); + assertEquals(0.8984375, err.evaluate("id", hits, rated).getQualityLevel(), DELTA); + // take 4th rank into window + err = new ExpectedReciprocalRank(3, 0, 4); + assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).getQualityLevel(), DELTA); + } + + /** + * Assuming the result ranking is + * + *
{@code
+     * rank | relevance | probR / r | p        | p * probR / r
+     * -------------------------------------------------------
+     * 1    | 3         | 0.875     | 1        | 0.875       |
+     * 2    | n/a       | n/a       | 0.125    | n/a   |
+     * 3    | 0         | 0         | 0.125    | 0           |
+     * 4    | 1         | 0.03125   | 0.125    | 0.00390625 |
+     * }
+ * + * err => sum of last column + */ + public void testERRMissingRatings() { + List rated = new ArrayList<>(); + Integer[] relevanceRatings = new Integer[] { 3, null, 0, 1}; + SearchHit[] hits = new SearchHit[relevanceRatings.length]; + for (int i = 0; i < relevanceRatings.length; i++) { + if (relevanceRatings[i] != null) { + rated.add(new RatedDocument("index", Integer.toString(i), relevanceRatings[i])); + } + hits[i] = new SearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap()); + hits[i].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null)); + } + ExpectedReciprocalRank err = new ExpectedReciprocalRank(3, null, 4); + EvalQueryQuality evaluation = err.evaluate("id", hits, rated); + assertEquals(0.875 + 0.00390625, evaluation.getQualityLevel(), DELTA); + assertEquals(1, ((ExpectedReciprocalRank.Detail) evaluation.getMetricDetails()).getUnratedDocs()); + // if we supply e.g. 2 as unknown docs rating, it should be the same as in the other test above + err = new ExpectedReciprocalRank(3, 2, 4); + assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).getQualityLevel(), DELTA); + } + + /** + * test that metric returns 0.0 when there are no search results + */ + public void testNoResults() throws Exception { + ExpectedReciprocalRank err = new ExpectedReciprocalRank(5, 0, 10); + assertEquals(0.0, err.evaluate("id", new SearchHit[0], Collections.emptyList()).getQualityLevel(), DELTA); + } + + public void testParseFromXContent() throws IOException { + assertParsedCorrect("{ \"unknown_doc_rating\": 2, \"maximum_relevance\": 5, \"k\" : 15 }", 2, 5, 15); + assertParsedCorrect("{ \"unknown_doc_rating\": 2, \"maximum_relevance\": 4 }", 2, 4, 10); + assertParsedCorrect("{ \"maximum_relevance\": 4, \"k\": 23 }", null, 4, 23); + } + + private void assertParsedCorrect(String xContent, Integer expectedUnknownDocRating, int expectedMaxRelevance, int expectedK) + throws IOException { + try (XContentParser parser = createParser(JsonXContent.jsonXContent, xContent)) { + ExpectedReciprocalRank errAt = ExpectedReciprocalRank.fromXContent(parser); + assertEquals(expectedUnknownDocRating, errAt.getUnknownDocRating()); + assertEquals(expectedK, errAt.getK()); + assertEquals(expectedMaxRelevance, errAt.getMaxRelevance()); + } + } + + public static ExpectedReciprocalRank createTestItem() { + Integer unknownDocRating = frequently() ? Integer.valueOf(randomIntBetween(0, 10)) : null; + int maxRelevance = randomIntBetween(1, 10); + return new ExpectedReciprocalRank(maxRelevance, unknownDocRating, randomIntBetween(1, 10)); + } + + public void testXContentRoundtrip() throws IOException { + ExpectedReciprocalRank testItem = createTestItem(); + XContentBuilder builder = XContentFactory.contentBuilder(randomFrom(XContentType.values())); + XContentBuilder shuffled = shuffleXContent(testItem.toXContent(builder, ToXContent.EMPTY_PARAMS)); + try (XContentParser itemParser = createParser(shuffled)) { + itemParser.nextToken(); + itemParser.nextToken(); + ExpectedReciprocalRank parsedItem = ExpectedReciprocalRank.fromXContent(itemParser); + assertNotSame(testItem, parsedItem); + assertEquals(testItem, parsedItem); + assertEquals(testItem.hashCode(), parsedItem.hashCode()); + } + } + + public void testXContentParsingIsNotLenient() throws IOException { + ExpectedReciprocalRank testItem = createTestItem(); + XContentType xContentType = randomFrom(XContentType.values()); + BytesReference originalBytes = toShuffledXContent(testItem, xContentType, ToXContent.EMPTY_PARAMS, randomBoolean()); + BytesReference withRandomFields = insertRandomFields(xContentType, originalBytes, null, random()); + try (XContentParser parser = createParser(xContentType.xContent(), withRandomFields)) { + parser.nextToken(); + parser.nextToken(); + XContentParseException exception = expectThrows(XContentParseException.class, + () -> DiscountedCumulativeGain.fromXContent(parser)); + assertThat(exception.getMessage(), containsString("[dcg] unknown field")); + } + } + + public void testMetricDetails() { + double dcg = randomDoubleBetween(0, 1, true); + double idcg = randomBoolean() ? 0.0 : randomDoubleBetween(0, 1, true); + double expectedNdcg = idcg != 0 ? dcg / idcg : 0.0; + int unratedDocs = randomIntBetween(0, 100); + DiscountedCumulativeGain.Detail detail = new DiscountedCumulativeGain.Detail(dcg, idcg, unratedDocs); + assertEquals(dcg, detail.getDCG(), 0.0); + assertEquals(idcg, detail.getIDCG(), 0.0); + assertEquals(expectedNdcg, detail.getNDCG(), 0.0); + assertEquals(unratedDocs, detail.getUnratedDocs()); + if (idcg != 0) { + assertEquals("{\"dcg\":{\"dcg\":" + dcg + ",\"ideal_dcg\":" + idcg + ",\"normalized_dcg\":" + expectedNdcg + + ",\"unrated_docs\":" + unratedDocs + "}}", Strings.toString(detail)); + } else { + assertEquals("{\"dcg\":{\"dcg\":" + dcg + ",\"unrated_docs\":" + unratedDocs + "}}", Strings.toString(detail)); + } + } + + public void testSerialization() throws IOException { + ExpectedReciprocalRank original = createTestItem(); + ExpectedReciprocalRank deserialized = ESTestCase.copyWriteable(original, new NamedWriteableRegistry(Collections.emptyList()), + ExpectedReciprocalRank::new); + assertEquals(deserialized, original); + assertEquals(deserialized.hashCode(), original.hashCode()); + assertNotSame(deserialized, original); + } + + public void testEqualsAndHash() throws IOException { + checkEqualsAndHashCode(createTestItem(), original -> { + return new ExpectedReciprocalRank(original.getMaxRelevance(), original.getUnknownDocRating(), original.getK()); + }, ExpectedReciprocalRankTests::mutateTestItem); + } + + private static ExpectedReciprocalRank mutateTestItem(ExpectedReciprocalRank original) { + switch (randomIntBetween(0, 2)) { + case 0: + return new ExpectedReciprocalRank(original.getMaxRelevance() + 1, original.getUnknownDocRating(), original.getK()); + case 1: + return new ExpectedReciprocalRank(original.getMaxRelevance(), + randomValueOtherThan(original.getUnknownDocRating(), () -> randomIntBetween(0, 10)), original.getK()); + case 2: + return new ExpectedReciprocalRank(original.getMaxRelevance(), original.getUnknownDocRating(), + randomValueOtherThan(original.getK(), () -> randomIntBetween(1, 10))); + default: + throw new IllegalArgumentException("mutation variant not allowed"); + } + } +} From 4e8101bb7c3dfd20b8bb43125e7b8b04ad9ec692 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Tue, 10 Jul 2018 11:17:11 +0200 Subject: [PATCH 2/4] iter --- .../index/rankeval/ExpectedReciprocalRank.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java index 0b966add48937..85ed4ef4da211 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java @@ -70,11 +70,14 @@ public ExpectedReciprocalRank(int maxRelevance) { } /** - * @param maxRelevance the maximal relevance judgment in the evaluation dataset + * @param maxRelevance + * the maximal relevance judgment in the evaluation dataset * @param unknownDocRating * the rating for documents the user hasn't supplied an explicit - * rating for. Can be null, in which case document will be skipped. - * @param k the search window size all request use. + * rating for. Can be {@code null}, in which case document is + * skipped. + * @param k + * the search window size all request use. */ public ExpectedReciprocalRank(int maxRelevance, @Nullable Integer unknownDocRating, int k) { this.maxRelevance = maxRelevance; From 8ee4b0fd41486922c0b58d1350f7fa2ffa9e2ca4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Wed, 11 Jul 2018 18:05:44 +0200 Subject: [PATCH 3/4] iter --- .../rankeval/ExpectedReciprocalRank.java | 2 +- .../rankeval/ExpectedReciprocalRankTests.java | 43 +++++++------------ 2 files changed, 16 insertions(+), 29 deletions(-) diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java index 85ed4ef4da211..a5bac7a65d454 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java @@ -39,7 +39,7 @@ import static org.elasticsearch.index.rankeval.EvaluationMetric.joinHitsWithRatings; /** - * Implemention of the Expected Reciprocal Rank metric described in:

+ * Implementation of the Expected Reciprocal Rank metric described in:

* * Chapelle, O., Metlzer, D., Zhang, Y., & Grinspan, P. (2009).
* Expected reciprocal rank for graded relevance.
diff --git a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java index a765b32adcacb..a888bf010f9e2 100644 --- a/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java +++ b/modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRankTests.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.rankeval; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.text.Text; @@ -74,13 +73,8 @@ public void testProbabilityOfRelevance() { */ public void testERRAt() { List rated = new ArrayList<>(); - int[] relevanceRatings = new int[] { 3, 2, 0, 1}; - SearchHit[] hits = new SearchHit[relevanceRatings.length]; - for (int i = 0; i < relevanceRatings.length; i++) { - rated.add(new RatedDocument("index", Integer.toString(i), relevanceRatings[i])); - hits[i] = new SearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap()); - hits[i].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null)); - } + Integer[] relevanceRatings = new Integer[] { 3, 2, 0, 1}; + SearchHit[] hits = createSearchHits(rated, relevanceRatings); ExpectedReciprocalRank err = new ExpectedReciprocalRank(3, 0, 3); assertEquals(0.8984375, err.evaluate("id", hits, rated).getQualityLevel(), DELTA); // take 4th rank into window @@ -105,6 +99,17 @@ public void testERRAt() { public void testERRMissingRatings() { List rated = new ArrayList<>(); Integer[] relevanceRatings = new Integer[] { 3, null, 0, 1}; + SearchHit[] hits = createSearchHits(rated, relevanceRatings); + ExpectedReciprocalRank err = new ExpectedReciprocalRank(3, null, 4); + EvalQueryQuality evaluation = err.evaluate("id", hits, rated); + assertEquals(0.875 + 0.00390625, evaluation.getQualityLevel(), DELTA); + assertEquals(1, ((ExpectedReciprocalRank.Detail) evaluation.getMetricDetails()).getUnratedDocs()); + // if we supply e.g. 2 as unknown docs rating, it should be the same as in the other test above + err = new ExpectedReciprocalRank(3, 2, 4); + assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).getQualityLevel(), DELTA); + } + + private SearchHit[] createSearchHits(List rated, Integer[] relevanceRatings) { SearchHit[] hits = new SearchHit[relevanceRatings.length]; for (int i = 0; i < relevanceRatings.length; i++) { if (relevanceRatings[i] != null) { @@ -113,13 +118,7 @@ public void testERRMissingRatings() { hits[i] = new SearchHit(i, Integer.toString(i), new Text("type"), Collections.emptyMap()); hits[i].shard(new SearchShardTarget("testnode", new Index("index", "uuid"), 0, null)); } - ExpectedReciprocalRank err = new ExpectedReciprocalRank(3, null, 4); - EvalQueryQuality evaluation = err.evaluate("id", hits, rated); - assertEquals(0.875 + 0.00390625, evaluation.getQualityLevel(), DELTA); - assertEquals(1, ((ExpectedReciprocalRank.Detail) evaluation.getMetricDetails()).getUnratedDocs()); - // if we supply e.g. 2 as unknown docs rating, it should be the same as in the other test above - err = new ExpectedReciprocalRank(3, 2, 4); - assertEquals(0.8984375 + 0.00244140625, err.evaluate("id", hits, rated).getQualityLevel(), DELTA); + return hits; } /** @@ -181,21 +180,9 @@ public void testXContentParsingIsNotLenient() throws IOException { } public void testMetricDetails() { - double dcg = randomDoubleBetween(0, 1, true); - double idcg = randomBoolean() ? 0.0 : randomDoubleBetween(0, 1, true); - double expectedNdcg = idcg != 0 ? dcg / idcg : 0.0; int unratedDocs = randomIntBetween(0, 100); - DiscountedCumulativeGain.Detail detail = new DiscountedCumulativeGain.Detail(dcg, idcg, unratedDocs); - assertEquals(dcg, detail.getDCG(), 0.0); - assertEquals(idcg, detail.getIDCG(), 0.0); - assertEquals(expectedNdcg, detail.getNDCG(), 0.0); + ExpectedReciprocalRank.Detail detail = new ExpectedReciprocalRank.Detail(unratedDocs); assertEquals(unratedDocs, detail.getUnratedDocs()); - if (idcg != 0) { - assertEquals("{\"dcg\":{\"dcg\":" + dcg + ",\"ideal_dcg\":" + idcg + ",\"normalized_dcg\":" + expectedNdcg - + ",\"unrated_docs\":" + unratedDocs + "}}", Strings.toString(detail)); - } else { - assertEquals("{\"dcg\":{\"dcg\":" + dcg + ",\"unrated_docs\":" + unratedDocs + "}}", Strings.toString(detail)); - } } public void testSerialization() throws IOException { From cd348a39fa7c440cf205b219963e3fd9a84e0264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christoph=20B=C3=BCscher?= Date: Thu, 12 Jul 2018 12:31:02 +0200 Subject: [PATCH 4/4] Change metric id --- .../elasticsearch/index/rankeval/ExpectedReciprocalRank.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java index a5bac7a65d454..4aac29f299d67 100644 --- a/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java +++ b/modules/rank-eval/src/main/java/org/elasticsearch/index/rankeval/ExpectedReciprocalRank.java @@ -63,7 +63,7 @@ public class ExpectedReciprocalRank implements EvaluationMetric { private final double two_pow_maxRelevance; - public static final String NAME = "err"; + public static final String NAME = "expected_reciprocal_rank"; public ExpectedReciprocalRank(int maxRelevance) { this(maxRelevance, null, DEFAULT_K);