From 7141cb35a761eea1f0e6f3ad6abf84a4c972fedc Mon Sep 17 00:00:00 2001 From: James Dyer Date: Mon, 6 May 2013 16:49:46 +0000 Subject: [PATCH] SOLR-3240: add "spellcheck.collateMaxCollectDocs" for estimating collation hit-counts. git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1479638 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 6 ++ .../component/SpellCheckComponent.java | 16 +++- .../search/EarlyTerminatingCollector.java | 85 ++++++++++++++++++ .../EarlyTerminatingCollectorException.java | 45 ++++++++++ .../apache/solr/search/SolrIndexSearcher.java | 23 +++-- .../solr/spelling/SpellCheckCollator.java | 85 ++++++++++++++++-- .../solr/spelling/SpellCheckCollatorTest.java | 90 ++++++++++++++++--- .../solr/common/params/SpellingParams.java | 21 ++++- 8 files changed, 345 insertions(+), 26 deletions(-) create mode 100644 solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java create mode 100644 solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index ce423f707db..17967f7f9c7 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -66,6 +66,12 @@ New Features * SOLR-4761: Add option to plugin a merged segment warmer into solrconfig.xml (Mark Miller, Mike McCandless, Robert Muir) +* SOLR-3240: Add "spellcheck.collateMaxCollectDocs" option so that when testing + potential Collations against the index, SpellCheckComponent will only collect + n documents, thereby estimating the hit-count. This is a performance optimization + in cases where exact hit-counts are unnecessary. Also, when "collateExtendedResults" + is false, this optimization is always made (James Dyer). + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 491733c0316..9a19522b037 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -214,10 +214,20 @@ protected void addCollationsToResponse(SolrParams params, SpellingResult spellin int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0); int maxCollationEvaluations = params.getInt(SPELLCHECK_MAX_COLLATION_EVALUATIONS, 10000); boolean collationExtendedResults = params.getBool(SPELLCHECK_COLLATE_EXTENDED_RESULTS, false); + int maxCollationCollectDocs = params.getInt(SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, 0); + // If not reporting hits counts, don't bother collecting more than 1 document per try. + if (!collationExtendedResults) { + maxCollationCollectDocs = 1; + } boolean shard = params.getBool(ShardParams.IS_SHARD, false); - - SpellCheckCollator collator = new SpellCheckCollator(); - List collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries, maxCollationEvaluations, suggestionsMayOverlap); + SpellCheckCollator collator = new SpellCheckCollator() + .setMaxCollations(maxCollations) + .setMaxCollationTries(maxCollationTries) + .setMaxCollationEvaluations(maxCollationEvaluations) + .setSuggestionsMayOverlap(suggestionsMayOverlap) + .setDocCollectionLimit(maxCollationCollectDocs) + .setReportHits(collationExtendedResults); + List collations = collator.collate(spellingResult, q, rb); //by sorting here we guarantee a non-distributed request returns all //results in the same order as a distributed request would, //even in cases when the internal rank is the same. diff --git a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java new file mode 100644 index 00000000000..e0dae354657 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollector.java @@ -0,0 +1,85 @@ +package org.apache.solr.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; +/** + *

+ * A wrapper {@link Collector} that throws {@link EarlyTerminatingCollectorException}) + * once a specified maximum number of documents are collected. + *

+ */ +public class EarlyTerminatingCollector extends Collector { + private int numCollected; + private int lastDocId = -1; + private int maxDocsToCollect; + private Collector delegate; + + /** + *

+ * Wraps a {@link Collector}, throwing {@link EarlyTerminatingCollectorException} + * once the specified maximum is reached. + *

+ * @param delegate - the Collector to wrap. + * @param maxDocsToCollect - the maximum number of documents to Collect + * + */ + public EarlyTerminatingCollector(Collector delegate, int maxDocsToCollect) { + this.delegate = delegate; + this.maxDocsToCollect = maxDocsToCollect; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return delegate.acceptsDocsOutOfOrder(); + } + + @Override + public void collect(int doc) throws IOException { + delegate.collect(doc); + lastDocId = doc; + numCollected++; + if(numCollected==maxDocsToCollect) { + throw new EarlyTerminatingCollectorException(numCollected, lastDocId); + } + } + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + delegate.setNextReader(context); + } + @Override + public void setScorer(Scorer scorer) throws IOException { + delegate.setScorer(scorer); + } + public int getNumCollected() { + return numCollected; + } + public void setNumCollected(int numCollected) { + this.numCollected = numCollected; + } + public int getLastDocId() { + return lastDocId; + } + public void setLastDocId(int lastDocId) { + this.lastDocId = lastDocId; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java new file mode 100644 index 00000000000..914abc590fb --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/EarlyTerminatingCollectorException.java @@ -0,0 +1,45 @@ +package org.apache.solr.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Thrown by {@link EarlyTerminatingCollector} when the maximum to abort + * the scoring / collection process early, when the specified maximum number + * of documents were collected. + */ +public class EarlyTerminatingCollectorException extends RuntimeException { + private static final long serialVersionUID = 5939241340763428118L; + private int lastDocId = -1; + private int numberCollected; + + public EarlyTerminatingCollectorException(int numberCollected, int lastDocId) { + this.numberCollected = numberCollected; + this.lastDocId = lastDocId; + } + public int getLastDocId() { + return lastDocId; + } + public void setLastDocId(int lastDocId) { + this.lastDocId = lastDocId; + } + public int getNumberCollected() { + return numberCollected; + } + public void setNumberCollected(int numberCollected) { + this.numberCollected = numberCollected; + } +} diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java index 7fb19078bd7..ac25388e87c 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java @@ -99,6 +99,7 @@ import org.apache.solr.response.SolrQueryResponse; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; +import org.apache.solr.spelling.QueryConverter; import org.apache.solr.update.SolrIndexConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1235,7 +1236,8 @@ public DocList getDocList(Query query, List filterList, Sort lsort, int o public static final int GET_DOCSET = 0x40000000; static final int NO_CHECK_FILTERCACHE = 0x20000000; static final int NO_SET_QCACHE = 0x10000000; - + public static final int TERMINATE_EARLY = 0x04; + public static final int FORCE_INORDER_COLLECTION = 0x08; public static final int GET_DOCLIST = 0x02; // get the documents actually returned in a response public static final int GET_SCORES = 0x01; @@ -1394,7 +1396,8 @@ private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException { float[] scores; boolean needScores = (cmd.getFlags() & GET_SCORES) != 0; - + boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY; + Query query = QueryUtils.makeQueryable(cmd.getQuery()); ProcessedFilter pf = getProcessedFilter(cmd.getFilter(), cmd.getFilterList()); @@ -1446,7 +1449,9 @@ public boolean acceptsDocsOutOfOrder() { } }; } - + if (terminateEarly) { + collector = new EarlyTerminatingCollector(collector, cmd.len); + } if( timeAllowed > 0 ) { collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed); } @@ -1481,6 +1486,9 @@ public boolean acceptsDocsOutOfOrder() { topCollector = TopFieldCollector.create(weightSort(cmd.getSort()), len, false, needScores, needScores, true); } Collector collector = topCollector; + if (terminateEarly) { + collector = new EarlyTerminatingCollector(collector, cmd.len); + } if( timeAllowed > 0 ) { collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed); } @@ -1529,6 +1537,7 @@ private DocSet getDocListAndSetNC(QueryResult qr,QueryCommand cmd) throws IOExce DocSet set; boolean needScores = (cmd.getFlags() & GET_SCORES) != 0; + boolean terminateEarly = (cmd.getFlags() & TERMINATE_EARLY) == TERMINATE_EARLY; int maxDoc = maxDoc(); int smallSetSize = maxDoc>>6; @@ -1568,7 +1577,9 @@ public boolean acceptsDocsOutOfOrder() { } }); } - + if (terminateEarly) { + collector = new EarlyTerminatingCollector(collector, cmd.len); + } if( timeAllowed > 0 ) { collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed); } @@ -1604,7 +1615,9 @@ public boolean acceptsDocsOutOfOrder() { DocSetCollector setCollector = new DocSetDelegateCollector(maxDoc>>6, maxDoc, topCollector); Collector collector = setCollector; - + if (terminateEarly) { + collector = new EarlyTerminatingCollector(collector, cmd.len); + } if( timeAllowed > 0 ) { collector = new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeAllowed ); } diff --git a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java index b859fd053c0..bd324495fb5 100644 --- a/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java +++ b/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java @@ -22,7 +22,9 @@ import java.util.List; import org.apache.lucene.analysis.Token; +import org.apache.lucene.index.IndexReader; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.DisMaxParams; import org.apache.solr.common.params.GroupParams; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.params.SolrParams; @@ -33,15 +35,23 @@ import org.apache.solr.handler.component.SearchComponent; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.search.EarlyTerminatingCollectorException; +import org.apache.solr.search.SolrIndexSearcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class SpellCheckCollator { private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class); + private int maxCollations = 1; + private int maxCollationTries = 0; + private int maxCollationEvaluations = 10000; + private boolean suggestionsMayOverlap = false; + private int docCollectionLimit = 0; + private boolean reportHits = true; - public List collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse, - int maxCollations, int maxTries, int maxEvaluations, boolean suggestionsMayOverlap) { - List collations = new ArrayList(); + public List collate(SpellingResult result, + String originalQuery, ResponseBuilder ultimateResponse) { + List collations = new ArrayList(); QueryComponent queryComponent = null; if (ultimateResponse.components != null) { @@ -54,6 +64,7 @@ public List collate(SpellingResult result, String originalQ } boolean verifyCandidateWithQuery = true; + int maxTries = maxCollationTries; int maxNumberToIterate = maxTries; if (maxTries < 1) { maxTries = 1; @@ -65,10 +76,17 @@ public List collate(SpellingResult result, String originalQ maxTries = 1; verifyCandidateWithQuery = false; } + docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0; + int maxDocId = -1; + if (verifyCandidateWithQuery && docCollectionLimit > 0) { + IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader(); + maxDocId = reader.maxDoc(); + } int tryNo = 0; int collNo = 0; - PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate, maxEvaluations, suggestionsMayOverlap); + PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), + maxNumberToIterate, maxCollationEvaluations, suggestionsMayOverlap); while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) { PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next(); @@ -96,12 +114,25 @@ public List collate(SpellingResult result, String originalQ } params.set(CommonParams.Q, collationQueryStr); params.remove(CommonParams.START); + params.set(CommonParams.ROWS, "" + docCollectionLimit); + // we don't want any stored fields params.set(CommonParams.FL, "id"); - params.set(CommonParams.ROWS, "0"); + // we'll sort by doc id to ensure no scoring is done. + params.set(CommonParams.SORT, "_docid_ asc"); + // If a dismax query, don't add unnecessary clauses for scoring + params.remove(DisMaxParams.TIE); + params.remove(DisMaxParams.PF); + params.remove(DisMaxParams.PF2); + params.remove(DisMaxParams.PF3); + params.remove(DisMaxParams.BQ); + params.remove(DisMaxParams.BF); + // Collate testing does not support Grouping (see SOLR-2577) params.remove(GroupParams.GROUP); // creating a request here... make sure to close it! - ResponseBuilder checkResponse = new ResponseBuilder(new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params),new SolrQueryResponse(), Arrays.asList(queryComponent)); + ResponseBuilder checkResponse = new ResponseBuilder( + new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params), + new SolrQueryResponse(), Arrays. asList(queryComponent)); checkResponse.setQparser(ultimateResponse.getQparser()); checkResponse.setFilters(ultimateResponse.getFilters()); checkResponse.setQueryString(collationQueryStr); @@ -109,8 +140,23 @@ public List collate(SpellingResult result, String originalQ try { queryComponent.prepare(checkResponse); + if (docCollectionLimit > 0) { + int f = checkResponse.getFieldFlags(); + checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY); + if (reportHits) { + f = checkResponse.getFieldFlags(); + checkResponse.setFieldFlags(f |= SolrIndexSearcher.FORCE_INORDER_COLLECTION); + } + } queryComponent.process(checkResponse); hits = (Integer) checkResponse.rsp.getToLog().get("hits"); + } catch (EarlyTerminatingCollectorException etce) { + assert (docCollectionLimit > 0); + if (etce.getLastDocId() + 1 == maxDocId) { + hits = docCollectionLimit; + } else { + hits = maxDocId / ((etce.getLastDocId() + 1) / docCollectionLimit); + } } catch (Exception e) { LOG.warn("Exception trying to re-query to check if a spell check possibility would return any hits.", e); } finally { @@ -191,6 +237,31 @@ private String getCollation(String origQuery, offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset()); } return collation.toString(); + } + public SpellCheckCollator setMaxCollations(int maxCollations) { + this.maxCollations = maxCollations; + return this; + } + public SpellCheckCollator setMaxCollationTries(int maxCollationTries) { + this.maxCollationTries = maxCollationTries; + return this; + } + public SpellCheckCollator setMaxCollationEvaluations( + int maxCollationEvaluations) { + this.maxCollationEvaluations = maxCollationEvaluations; + return this; + } + public SpellCheckCollator setSuggestionsMayOverlap( + boolean suggestionsMayOverlap) { + this.suggestionsMayOverlap = suggestionsMayOverlap; + return this; + } + public SpellCheckCollator setDocCollectionLimit(int docCollectionLimit) { + this.docCollectionLimit = docCollectionLimit; + return this; + } + public SpellCheckCollator setReportHits(boolean reportHits) { + this.reportHits = reportHits; + return this; } - } diff --git a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java index 940a5dcd4d3..2ba19fe66e0 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java +++ b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java @@ -42,21 +42,24 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { - initCore("solrconfig-spellcheckcomponent.xml", "schema.xml"); - assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love"))); + initCore("solrconfig-spellcheckcomponent.xml", "schema.xml"); + assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "faith hope and love", "teststop", "metanoia"))); assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "faith hope and loaves"))); assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "fat hops and loaves"))); - assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer"))); + assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer", "teststop", "metanoia"))); assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer"))); assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace"))); assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "hyphenated word"))); - assertNull(h.validateUpdate(adoc("id", "7", "teststop", "Jane filled out a form at Charles De Gaulle"))); - assertNull(h.validateUpdate(adoc("id", "8", "teststop", "Dick flew from Heathrow"))); - assertNull(h.validateUpdate(adoc("id", "9", "teststop", "Jane is stuck in customs because Spot chewed up the form"))); - assertNull(h.validateUpdate(adoc("id", "10", "teststop", "Once in Paris Dick built a fire on the hearth"))); - assertNull(h.validateUpdate(adoc("id", "11", "teststop", "Dick waited for Jane as he watched the sparks flow upward"))); - assertNull(h.validateUpdate(adoc("id", "12", "teststop", "This June parisian rendez-vous is ruined because of a customs snafu"))); - assertNull(h.validateUpdate(adoc("id", "13", "teststop", "partisan political machine"))); + assertNull(h.validateUpdate(adoc("id", "7", "teststop", "Jane filled out a form at Charles De Gaulle"))); + assertNull(h.validateUpdate(adoc("id", "8", "teststop", "Dick flew from Heathrow"))); + assertNull(h.validateUpdate(adoc("id", "9", "teststop", "Jane is stuck in customs because Spot chewed up the form"))); + assertNull(h.validateUpdate(adoc("id", "10", "teststop", "Once in Paris Dick built a fire on the hearth"))); + assertNull(h.validateUpdate(adoc("id", "11", "teststop", "Dick waited for Jane as he watched the sparks flow upward"))); + assertNull(h.validateUpdate(adoc("id", "12", "teststop", "This June parisian rendez-vous is ruined because of a customs snafu"))); + assertNull(h.validateUpdate(adoc("id", "13", "teststop", "partisan political machine", "teststop", "metanoia"))); + assertNull(h.validateUpdate(adoc("id", "14", "teststop", "metanoia"))); + assertNull(h.validateUpdate(adoc("id", "15", "teststop", "metanoia"))); + assertNull(h.validateUpdate(adoc("id", "16", "teststop", "metanoia"))); assertNull(h.validateUpdate(commit())); } @@ -430,4 +433,71 @@ public void testContextSensitiveCollate() throws Exception { ); } } + @Test + public void testEstimatedHitCounts() throws Exception { + assertQ( + req( + SpellCheckComponent.COMPONENT_NAME, "true", + SpellCheckComponent.SPELLCHECK_DICT, "direct", + SpellingParams.SPELLCHECK_COUNT, "1", + SpellingParams.SPELLCHECK_COLLATE, "true", + SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1", + SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1", + SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", + "qt", "spellCheckCompRH", + CommonParams.Q, "teststop:metnoia" + ), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=6" + ); + assertQ( + req( + SpellCheckComponent.COMPONENT_NAME, "true", + SpellCheckComponent.SPELLCHECK_DICT, "direct", + SpellingParams.SPELLCHECK_COUNT, "1", + SpellingParams.SPELLCHECK_COLLATE, "true", + SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1", + SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1", + SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", + SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "1", + "qt", "spellCheckCompRH", + CommonParams.Q, "teststop:metnoia" + ), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=17" + ); + assertQ( + req( + SpellCheckComponent.COMPONENT_NAME, "true", + SpellCheckComponent.SPELLCHECK_DICT, "direct", + SpellingParams.SPELLCHECK_COUNT, "1", + SpellingParams.SPELLCHECK_COLLATE, "true", + SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1", + SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1", + SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", + SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "3", + "qt", "spellCheckCompRH", + CommonParams.Q, "teststop:metnoia" + ), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=4" + ); + assertQ( + req( + SpellCheckComponent.COMPONENT_NAME, "true", + SpellCheckComponent.SPELLCHECK_DICT, "direct", + SpellingParams.SPELLCHECK_COUNT, "1", + SpellingParams.SPELLCHECK_COLLATE, "true", + SpellingParams.SPELLCHECK_MAX_COLLATION_TRIES, "1", + SpellingParams.SPELLCHECK_MAX_COLLATIONS, "1", + SpellingParams.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", + SpellingParams.SPELLCHECK_COLLATE_MAX_COLLECT_DOCS, "100", + "qt", "spellCheckCompRH", + CommonParams.Q, "teststop:metnoia" + ), + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/str[@name='collationQuery']='teststop:metanoia'", + "//lst[@name='spellcheck']/lst[@name='suggestions']/lst[@name='collation']/int[@name='hits']=6" + ); + } + } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java b/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java index 7ad21dc74dd..27fb4e10fc6 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/SpellingParams.java @@ -129,13 +129,32 @@ public interface SpellingParams { *

*/ public static final String SPELLCHECK_MAX_COLLATION_EVALUATIONS = SPELLCHECK_PREFIX + "maxCollationEvaluations"; - + /** + *

+ * For use with {@link SpellingParams#SPELLCHECK_MAX_COLLATION_TRIES} and + * {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS}. + * A performance optimization in cases where the exact number of hits a collation would return is not needed. + * Specify "0" to return the exact # of hits, otherwise give the maximum documents Lucene should collect + * with which to base an estimate. The higher the value the more likely the estimates will be accurate + * (at expense of performance). + *

+ * + *

+ * The default is 0 (report exact hit-counts) when {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS} is TRUE. + * When {@link SpellingParams#SPELLCHECK_COLLATE_EXTENDED_RESULTS} is FALSE, this optimization is always performed. + *

+ */ + public static final String SPELLCHECK_COLLATE_MAX_COLLECT_DOCS = SPELLCHECK_PREFIX + "collateMaxCollectDocs"; /** *

* Whether to use the Extended Results Format for collations. * Includes "before>after" pairs to easily allow clients to generate messages like "no results for PORK. did you mean POLK?" * Also indicates the # of hits each collation will return on re-query. Default=false, which retains 1.4-compatible output. *

+ *

+ * Note: that if {@link SpellingParams#SPELLCHECK_COLLATE_MAX_COLLECT_DOCS} is set to a value greater than 0, + * then the hit counts returned by this will be estimated. + *

*/ public static final String SPELLCHECK_COLLATE_EXTENDED_RESULTS = SPELLCHECK_PREFIX + "collateExtendedResults";