From 7141cb35a761eea1f0e6f3ad6abf84a4c972fedc Mon Sep 17 00:00:00 2001
From: James Dyer
Date: Mon, 6 May 2013 16:49:46 +0000
Subject: [PATCH] SOLR-3240: add "spellcheck.collateMaxCollectDocs" for
estimating collation hit-counts.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1479638 13f79535-47bb-0310-9956-ffa450edef68
---
solr/CHANGES.txt | 6 ++
.../component/SpellCheckComponent.java | 16 +++-
.../search/EarlyTerminatingCollector.java | 85 ++++++++++++++++++
.../EarlyTerminatingCollectorException.java | 45 ++++++++++
.../apache/solr/search/SolrIndexSearcher.java | 23 +++--
.../solr/spelling/SpellCheckCollator.java | 85 ++++++++++++++++--
.../solr/spelling/SpellCheckCollatorTest.java | 90 ++++++++++++++++---
.../solr/common/params/SpellingParams.java | 21 ++++-
8 files changed, 345 insertions(+), 26 deletions(-)
create mode 100644 solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java
create mode 100644 solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index ce423f707db..17967f7f9c7 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -66,6 +66,12 @@ New Features
* SOLR-4761: Add option to plugin a merged segment warmer into solrconfig.xml
(Mark Miller, Mike McCandless, Robert Muir)
+* SOLR-3240: Add "spellcheck.collateMaxCollectDocs" option so that when testing
+ potential Collations against the index, SpellCheckComponent will only collect
+ n documents, thereby estimating the hit-count. This is a performance optimization
+ in cases where exact hit-counts are unnecessary. Also, when "collateExtendedResults"
+ is false, this optimization is always made (James Dyer).
+
Bug Fixes
----------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
index 491733c0316..9a19522b037 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
@@ -214,10 +214,20 @@ protected void addCollationsToResponse(SolrParams params, SpellingResult spellin
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
int maxCollationEvaluations = params.getInt(SPELLCHECK_MAX_COLLATION_EVALUATIONS, 10000);
boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false);
+ int maxCollationCollectDocs = params.getInt(SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, 0);
+ // If not reporting hits counts, don't bother collecting more than 1 document per try.
+ if (!collationExtendedResults) {
+ maxCollationCollectDocs = 1;
+ }
boolean shard = params.getBool(ShardParams.IS_SHARD, false);
-
- SpellCheckCollator collator = new SpellCheckCollator();
- List collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries, maxCollationEvaluations, suggestionsMayOverlap);
+ SpellCheckCollator collator = new SpellCheckCollator()
+ .setMaxCollations(maxCollations)
+ .setMaxCollationTries(maxCollationTries)
+ .setMaxCollationEvaluations(maxCollationEvaluations)
+ .setSuggestionsMayOverlap(suggestionsMayOverlap)
+ .setDocCollectionLimit(maxCollationCollectDocs)
+ .setReportHits(collationExtendedResults);
+ List collations = collator.collate(spellingResult, q, rb);
//by sorting here we guarantee a non-distributed request returns all
//results in the same order as a distributed request would,
//even in cases when the internal rank is the same.
diff --git a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java
new file mode 100644
index 00000000000..e0dae354657
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java
@@ -0,0 +1,85 @@
+package org.apache.solr.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+/**
+ *
+ * A wrapper {@link Collector} that throws {@link EarlyTerminatingCollectorException})
+ * once a specified maximum number of documents are collected.
+ *
+ */
+public class EarlyTerminatingCollector extends Collector {
+ private int numCollected;
+ private int lastDocId = -1;
+ private int maxDocsToCollect;
+ private Collector delegate;
+
+ /**
+ *
+ * Wraps a {@link Collector}, throwing {@link EarlyTerminatingCollectorException}
+ * once the specified maximum is reached.
+ *
+ * @param delegate - the Collector to wrap.
+ * @param maxDocsToCollect - the maximum number of documents to Collect
+ *
+ */
+ public EarlyTerminatingCollector(Collector delegate, int maxDocsToCollect) {
+ this.delegate = delegate;
+ this.maxDocsToCollect = maxDocsToCollect;
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return delegate.acceptsDocsOutOfOrder();
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ delegate.collect(doc);
+ lastDocId = doc;
+ numCollected++;
+ if(numCollected==maxDocsToCollect) {
+ throw new EarlyTerminatingCollectorException(numCollected, lastDocId);
+ }
+ }
+ @Override
+ public void setNextReader(AtomicReaderContext context) throws IOException {
+ delegate.setNextReader(context);
+ }
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ delegate.setScorer(scorer);
+ }
+ public int getNumCollected() {
+ return numCollected;
+ }
+ public void setNumCollected(int numCollected) {
+ this.numCollected = numCollected;
+ }
+ public int getLastDocId() {
+ return lastDocId;
+ }
+ public void setLastDocId(int lastDocId) {
+ this.lastDocId = lastDocId;
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java
new file mode 100644
index 00000000000..914abc590fb
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java
@@ -0,0 +1,45 @@
+package org.apache.solr.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Thrown by {@link EarlyTerminatingCollector} when the maximum to abort
+ * the scoring / collection process early, when the specified maximum number
+ * of documents were collected.
+ */
+public class EarlyTerminatingCollectorException extends RuntimeException {
+ private static final long serialVersionUID = 5939241340763428118L;
+ private int lastDocId = -1;
+ private int numberCollected;
+
+ public EarlyTerminatingCollectorException(int numberCollected, int lastDocId) {
+ this.numberCollected = numberCollected;
+ this.lastDocId = lastDocId;
+ }
+ public int getLastDocId() {
+ return lastDocId;
+ }
+ public void setLastDocId(int lastDocId) {
+ this.lastDocId = lastDocId;
+ }
+ public int getNumberCollected() {
+ return numberCollected;
+ }
+ public void setNumberCollected(int numberCollected) {
+ this.numberCollected = numberCollected;
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index 7fb19078bd7..ac25388e87c 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -99,6 +99,7 @@
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
+import org.apache.solr.spelling.QueryConverter;
import org.apache.solr.update.SolrIndexConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -1235,7 +1236,8 @@ public DocList getDocList(Query query, List filterList, Sort lsort, int o
public static final int GET_DOCSET = 0x40000000;
static final int NO_CHECK_FILTERCACHE = 0x20000000;
static final int NO_SET_QCACHE = 0x10000000;
-
+ public static final int TERMINATE_EARLY = 0x04;
+ public static final int FORCE_INORDER_COLLECTION = 0x08;
public static final int GET_DOCLIST = 0x02; // get the documents actually returned in a response
public static final int GET_SCORES = 0x01;
@@ -1394,7 +1396,8 @@ private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException {
float[] scores;
boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
-
+ boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY;
+
Query query = QueryUtils.makeQueryable(cmd.getQuery());
ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList());
@@ -1446,7 +1449,9 @@ public boolean acceptsDocsOutOfOrder() {
}
};
}
-
+ if (terminateEarly) {
+ collector = new EarlyTerminatingCollector(collector, cmd.len);
+ }
if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
}
@@ -1481,6 +1486,9 @@ public boolean acceptsDocsOutOfOrder() {
topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true);
}
Collector collector = topCollector;
+ if (terminateEarly) {
+ collector = new EarlyTerminatingCollector(collector, cmd.len);
+ }
if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
}
@@ -1529,6 +1537,7 @@ private DocSet getDocListAndSetNC(QueryResult qr,QueryCommand cmd) throws IOExce
DocSet set;
boolean needScores = (cmd.getFlags() & GET_SCORES) != 0;
+ boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY;
int maxDoc = maxDoc();
int smallSetSize = maxDoc>>6;
@@ -1568,7 +1577,9 @@ public boolean acceptsDocsOutOfOrder() {
}
});
}
-
+ if (terminateEarly) {
+ collector = new EarlyTerminatingCollector(collector, cmd.len);
+ }
if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed);
}
@@ -1604,7 +1615,9 @@ public boolean acceptsDocsOutOfOrder() {
DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, topCollector);
Collector collector = setCollector;
-
+ if (terminateEarly) {
+ collector = new EarlyTerminatingCollector(collector, cmd.len);
+ }
if( timeAllowed > 0 ) {
collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed );
}
diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
index b859fd053c0..bd324495fb5 100644
--- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
@@ -22,7 +22,9 @@
import java.util.List;
import org.apache.lucene.analysis.Token;
+import org.apache.lucene.index.IndexReader;
import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.GroupParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
@@ -33,15 +35,23 @@
import org.apache.solr.handler.component.SearchComponent;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.search.EarlyTerminatingCollectorException;
+import org.apache.solr.search.SolrIndexSearcher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SpellCheckCollator {
private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class);
+ private int maxCollations = 1;
+ private int maxCollationTries = 0;
+ private int maxCollationEvaluations = 10000;
+ private boolean suggestionsMayOverlap = false;
+ private int docCollectionLimit = 0;
+ private boolean reportHits = true;
- public List collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse,
- int maxCollations, int maxTries, int maxEvaluations, boolean suggestionsMayOverlap) {
- List collations = new ArrayList();
+ public List collate(SpellingResult result,
+ String originalQuery, ResponseBuilder ultimateResponse) {
+ List collations = new ArrayList();
QueryComponent queryComponent = null;
if (ultimateResponse.components != null) {
@@ -54,6 +64,7 @@ public List collate(SpellingResult result, String originalQ
}
boolean verifyCandidateWithQuery = true;
+ int maxTries = maxCollationTries;
int maxNumberToIterate = maxTries;
if (maxTries < 1) {
maxTries = 1;
@@ -65,10 +76,17 @@ public List collate(SpellingResult result, String originalQ
maxTries = 1;
verifyCandidateWithQuery = false;
}
+ docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0;
+ int maxDocId = -1;
+ if (verifyCandidateWithQuery && docCollectionLimit > 0) {
+ IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader();
+ maxDocId = reader.maxDoc();
+ }
int tryNo = 0;
int collNo = 0;
- PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate, maxEvaluations, suggestionsMayOverlap);
+ PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(),
+ maxNumberToIterate, maxCollationEvaluations, suggestionsMayOverlap);
while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next();
@@ -96,12 +114,25 @@ public List collate(SpellingResult result, String originalQ
}
params.set(CommonParams.Q, collationQueryStr);
params.remove(CommonParams.START);
+ params.set(CommonParams.ROWS, "" + docCollectionLimit);
+ // we don't want any stored fields
params.set(CommonParams.FL, "id");
- params.set(CommonParams.ROWS, "0");
+ // we'll sort by doc id to ensure no scoring is done.
+ params.set(CommonParams.SORT, "_docid_ asc");
+ // If a dismax query, don't add unnecessary clauses for scoring
+ params.remove(DisMaxParams.TIE);
+ params.remove(DisMaxParams.PF);
+ params.remove(DisMaxParams.PF2);
+ params.remove(DisMaxParams.PF3);
+ params.remove(DisMaxParams.BQ);
+ params.remove(DisMaxParams.BF);
+ // Collate testing does not support Grouping (see SOLR-2577)
params.remove(GroupParams.GROUP);
// creating a request here... make sure to close it!
- ResponseBuilder checkResponse = new ResponseBuilder(new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params),new SolrQueryResponse(), Arrays.asList(queryComponent));
+ ResponseBuilder checkResponse = new ResponseBuilder(
+ new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params),
+ new SolrQueryResponse(), Arrays. asList(queryComponent));
checkResponse.setQparser(ultimateResponse.getQparser());
checkResponse.setFilters(ultimateResponse.getFilters());
checkResponse.setQueryString(collationQueryStr);
@@ -109,8 +140,23 @@ public List collate(SpellingResult result, String originalQ
try {
queryComponent.prepare(checkResponse);
+ if (docCollectionLimit > 0) {
+ int f = checkResponse.getFieldFlags();
+ checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY);
+ if (reportHits) {
+ f = checkResponse.getFieldFlags();
+ checkResponse.setFieldFlags(f |= SolrIndexSearcher.FORCE_INORDER_COLLECTION);
+ }
+ }
queryComponent.process(checkResponse);
hits = (Integer) checkResponse.rsp.getToLog().get("hits");
+ } catch (EarlyTerminatingCollectorException etce) {
+ assert (docCollectionLimit > 0);
+ if (etce.getLastDocId() + 1 == maxDocId) {
+ hits = docCollectionLimit;
+ } else {
+ hits = maxDocId / ((etce.getLastDocId() + 1) / docCollectionLimit);
+ }
} catch (Exception e) {
LOG.warn("Exception trying to re-query to check if a spell check possibility would return any hits.", e);
} finally {
@@ -191,6 +237,31 @@ private String getCollation(String origQuery,
offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset());
}
return collation.toString();
+ }
+ public SpellCheckCollator setMaxCollations(int maxCollations) {
+ this.maxCollations = maxCollations;
+ return this;
+ }
+ public SpellCheckCollator setMaxCollationTries(int maxCollationTries) {
+ this.maxCollationTries = maxCollationTries;
+ return this;
+ }
+ public SpellCheckCollator setMaxCollationEvaluations(
+ int maxCollationEvaluations) {
+ this.maxCollationEvaluations = maxCollationEvaluations;
+ return this;
+ }
+ public SpellCheckCollator setSuggestionsMayOverlap(
+ boolean suggestionsMayOverlap) {
+ this.suggestionsMayOverlap = suggestionsMayOverlap;
+ return this;
+ }
+ public SpellCheckCollator setDocCollectionLimit(int docCollectionLimit) {
+ this.docCollectionLimit = docCollectionLimit;
+ return this;
+ }
+ public SpellCheckCollator setReportHits(boolean reportHits) {
+ this.reportHits = reportHits;
+ return this;
}
-
}
diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java
index 940a5dcd4d3..2ba19fe66e0 100644
--- a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java
@@ -42,21 +42,24 @@
public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
- initCore("solrconfig-spellcheckcomponent.xml", "schema.xml");
- assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love")));
+ initCore("solrconfig-spellcheckcomponent.xml", "schema.xml");
+ assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love", "teststop", "metanoia")));
assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "faith hope and loaves")));
assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "fat hops and loaves")));
- assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer")));
+ assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer", "teststop", "metanoia")));
assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer")));
assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace")));
assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "hyphenated word")));
- assertNull(h.validateUpdate(adoc("id", "7", "teststop", "Jane filled out a form at Charles De Gaulle")));
- assertNull(h.validateUpdate(adoc("id", "8", "teststop", "Dick flew from Heathrow")));
- assertNull(h.validateUpdate(adoc("id", "9", "teststop", "Jane is stuck in customs because Spot chewed up the form")));
- assertNull(h.validateUpdate(adoc("id", "10", "teststop", "Once in Paris Dick built a fire on the hearth")));
- assertNull(h.validateUpdate(adoc("id", "11", "teststop", "Dick waited for Jane as he watched the sparks flow upward")));
- assertNull(h.validateUpdate(adoc("id", "12", "teststop", "This June parisian rendez-vous is ruined because of a customs snafu")));
- assertNull(h.validateUpdate(adoc("id", "13", "teststop", "partisan political machine")));
+ assertNull(h.validateUpdate(adoc("id", "7", "teststop", "Jane filled out a form at Charles De Gaulle")));
+ assertNull(h.validateUpdate(adoc("id", "8", "teststop", "Dick flew from Heathrow")));
+ assertNull(h.validateUpdate(adoc("id", "9", "teststop", "Jane is stuck in customs because Spot chewed up the form")));
+ assertNull(h.validateUpdate(adoc("id", "10", "teststop", "Once in Paris Dick built a fire on the hearth")));
+ assertNull(h.validateUpdate(adoc("id", "11", "teststop", "Dick waited for Jane as he watched the sparks flow upward")));
+ assertNull(h.validateUpdate(adoc("id", "12", "teststop", "This June parisian rendez-vous is ruined because of a customs snafu")));
+ assertNull(h.validateUpdate(adoc("id", "13", "teststop", "partisan political machine", "teststop", "metanoia")));
+ assertNull(h.validateUpdate(adoc("id", "14", "teststop", "metanoia")));
+ assertNull(h.validateUpdate(adoc("id", "15", "teststop", "metanoia")));
+ assertNull(h.validateUpdate(adoc("id", "16", "teststop", "metanoia")));
assertNull(h.validateUpdate(commit()));
}
@@ -430,4 +433,71 @@ public void testContextSensitiveCollate() throws Exception {
);
}
}
+ @Test
+ public void testEstimatedHitCounts() throws Exception {
+ assertQ(
+ req(
+ SpellCheckComponent.COMPONENT_NAME, "true",
+ SpellCheckComponent.SPELLCHECK_DICT, "direct",
+ SpellingParams.SPELLCHECK_COUNT, "1",
+ SpellingParams.SPELLCHECK_COLLATE, "true",
+ SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
+ SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
+ SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
+ "qt", "spellCheckCompRH",
+ CommonParams.Q, "teststop:metnoia"
+ ),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=6"
+ );
+ assertQ(
+ req(
+ SpellCheckComponent.COMPONENT_NAME, "true",
+ SpellCheckComponent.SPELLCHECK_DICT, "direct",
+ SpellingParams.SPELLCHECK_COUNT, "1",
+ SpellingParams.SPELLCHECK_COLLATE, "true",
+ SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
+ SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
+ SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
+ SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "1",
+ "qt", "spellCheckCompRH",
+ CommonParams.Q, "teststop:metnoia"
+ ),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=17"
+ );
+ assertQ(
+ req(
+ SpellCheckComponent.COMPONENT_NAME, "true",
+ SpellCheckComponent.SPELLCHECK_DICT, "direct",
+ SpellingParams.SPELLCHECK_COUNT, "1",
+ SpellingParams.SPELLCHECK_COLLATE, "true",
+ SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
+ SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
+ SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
+ SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "3",
+ "qt", "spellCheckCompRH",
+ CommonParams.Q, "teststop:metnoia"
+ ),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=4"
+ );
+ assertQ(
+ req(
+ SpellCheckComponent.COMPONENT_NAME, "true",
+ SpellCheckComponent.SPELLCHECK_DICT, "direct",
+ SpellingParams.SPELLCHECK_COUNT, "1",
+ SpellingParams.SPELLCHECK_COLLATE, "true",
+ SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1",
+ SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1",
+ SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true",
+ SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "100",
+ "qt", "spellCheckCompRH",
+ CommonParams.Q, "teststop:metnoia"
+ ),
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'",
+ "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=6"
+ );
+ }
+
}
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java b/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java
index 7ad21dc74dd..27fb4e10fc6 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java
@@ -129,13 +129,32 @@ public interface SpellingParams {
*
*/
public static final String SPELLCHECK_MAX_COLLATION_EVALUATIONS = SPELLCHECK_PREFIX + "maxCollationEvaluations";
-
+ /**
+ *
+ * For use with {@link SpellingParams#SPELLCHECK_MAX_COLLATION_TRIES} and
+ * {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS}.
+ * A performance optimization in cases where the exact number of hits a collation would return is not needed.
+ * Specify "0" to return the exact # of hits, otherwise give the maximum documents Lucene should collect
+ * with which to base an estimate. The higher the value the more likely the estimates will be accurate
+ * (at expense of performance).
+ *
+ *
+ *
+ * The default is 0 (report exact hit-counts) when {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS} is TRUE.
+ * When {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS} is FALSE, this optimization is always performed.
+ *
+ */
+ public static final String SPELLCHECK_COLLATE_MAX_COLLECT_DOCS = SPELLCHECK_PREFIX + "collateMaxCollectDocs";
/**
*
* Whether to use the Extended Results Format for collations.
* Includes "before>after" pairs to easily allow clients to generate messages like "no results for PORK. did you mean POLK?"
* Also indicates the # of hits each collation will return on re-query. Default=false, which retains 1.4-compatible output.
*
+ *
+ * Note: that if {@link SpellingParams#SPELLCHECK_COLLATE_MAX_COLLECT_DOCS} is set to a value greater than 0,
+ * then the hit counts returned by this will be estimated.
+ *
*/
public static final String SPELLCHECK_COLLATE_EXTENDED_RESULTS = SPELLCHECK_PREFIX + "collateExtendedResults";