Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for efficient deep paging in lucene.net #320

Merged
merged 10 commits into from
Apr 14, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/sorting.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ With the combination of `ISearchResult.Skip` and `maxResults`, we can tell Lucen
* Skip over a certain number of results without allocating them and tell Lucene
* only allocate a certain number of results after skipping

### Deep Paging
When using Lucene.NET as the Examine provider it is possible to more efficiently perform deep paging.
Steps:
1. Build and execute your query as normal.
2. Cast the ISearchResults from IQueryExecutor.Execute to ILuceneSearchResults
3. Store ILuceneSearchResults.SearchAfter (SearchAfterOptions) for the next page. It may be worth serializing this class and cryptographically hashing it to prevent tampering in a web application so that it can be made available to the next request for the next page.
nzdev marked this conversation as resolved.
Show resolved Hide resolved
4. Create the same query as the previous request.
5. When calling IQueryExecutor.Execute. Pass in new LuceneQueryOptions(skip,take, SearchAfterOptions); Skip will be ignored, the next take documents will be retrieved after the SearchAfterOptions document.
6. Repeat Steps 2-5 for each page.

### Example

```cs
Expand Down
6 changes: 4 additions & 2 deletions src/Examine.Core/EmptySearchResults.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ IEnumerator IEnumerable.GetEnumerator()

public long TotalItemCount => 0;

public IEnumerable<ISearchResult> Skip(int skip)
public string ContinueWith => default;
nzdev marked this conversation as resolved.
Show resolved Hide resolved

public IEnumerable<ISearchResult> Skip(int skip)
{
return Enumerable.Empty<ISearchResult>();
}
Expand All @@ -34,4 +36,4 @@ public IEnumerable<ISearchResult> SkipTake(int skip, int? take = null)
return Enumerable.Empty<ISearchResult>();
}
}
}
}
7 changes: 7 additions & 0 deletions src/Examine.Core/Search/QueryOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@ public QueryOptions(int skip, int? take = null)
Take = take ?? DefaultMaxResults;
}

/// <summary>
/// The number of documents to skip in the result set.
/// </summary>
public int Skip { get; }

/// <summary>
/// The number of documents to take in the result set.
/// </summary>
public int Take { get; }
}
}
19 changes: 19 additions & 0 deletions src/Examine.Lucene/Search/ILuceneSearchResults.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
namespace Examine.Lucene.Search
{
/// <summary>
/// Lucene.NET Search Results
/// </summary>
public interface ILuceneSearchResults : ISearchResults
{
/// <summary>
/// Options for Searching After. Used for efficent deep paging.
/// </summary>
SearchAfterOptions SearchAfter { get; }

/// <summary>
/// Returns the maximum score value encountered. Note that in case
/// scores are not tracked, this returns <see cref="float.NaN"/>.
/// </summary>
float MaxScore { get; }
}
}
39 changes: 39 additions & 0 deletions src/Examine.Lucene/Search/LuceneQueryOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
using Examine.Search;

namespace Examine.Lucene.Search
{
/// <summary>
/// Lucene.NET specific query options
/// </summary>
public class LuceneQueryOptions : QueryOptions
{
/// <summary>
/// Constructor
/// </summary>
/// <param name="skip">Number of result documents to skip.</param>
/// <param name="take">Optional number of result documents to take.</param>
/// <param name="searchAfter">Optionally skip to results after the results from the previous search execution. Used for efficent deep paging.</param>
public LuceneQueryOptions(int skip, int? take = null, SearchAfterOptions searchAfter = null, bool trackDocumentScores = false, bool trackDocumentMaxScore = false)
: base(skip, take)
{
TrackDocumentScores = trackDocumentScores;
TrackDocumentMaxScore = trackDocumentMaxScore;
SearchAfter = searchAfter;
}

/// <summary>
/// Whether to Track Document Scores. For best performance, if not needed, leave false.
/// </summary>
public bool TrackDocumentScores { get; }

/// <summary>
/// Whether to track the maximum document score. For best performance, if not needed, leave false.
/// </summary>
public bool TrackDocumentMaxScore { get; }

/// <summary>
/// Options for Searching After. Used for efficent deep paging.
/// </summary>
public SearchAfterOptions SearchAfter { get; }
}
}
116 changes: 95 additions & 21 deletions src/Examine.Lucene/Search/LuceneSearchExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ namespace Examine.Lucene.Search
public class LuceneSearchExecutor
{
private readonly QueryOptions _options;
private readonly LuceneQueryOptions _luceneQueryOptions;
private readonly IEnumerable<SortField> _sortField;
private readonly ISearchContext _searchContext;
private readonly Query _luceneQuery;
Expand All @@ -24,6 +25,7 @@ public class LuceneSearchExecutor
internal LuceneSearchExecutor(QueryOptions options, Query query, IEnumerable<SortField> sortField, ISearchContext searchContext, ISet<string> fieldsToLoad)
{
_options = options ?? QueryOptions.Default;
_luceneQueryOptions = _options as LuceneQueryOptions;
_luceneQuery = query ?? throw new ArgumentNullException(nameof(query));
_fieldsToLoad = fieldsToLoad;
_sortField = sortField ?? throw new ArgumentNullException(nameof(sortField));
Expand Down Expand Up @@ -78,47 +80,119 @@ public ISearchResults Execute()

var maxResults = Math.Min((_options.Skip + 1) * _options.Take, MaxDoc);
maxResults = maxResults >= 1 ? maxResults : QueryOptions.DefaultMaxResults;
int numHits = maxResults;

ICollector topDocsCollector;
SortField[] sortFields = _sortField as SortField[] ?? _sortField.ToArray();
if (sortFields.Length > 0)
{
topDocsCollector = TopFieldCollector.Create(
new Sort(sortFields), maxResults, false, false, false, false);
}
else
{
topDocsCollector = TopScoreDocCollector.Create(maxResults, true);
}
Sort sort = null;
FieldDoc scoreDocAfter = null;
Filter filter = null;

using (ISearcherReference searcher = _searchContext.GetSearcher())
{
searcher.IndexSearcher.Search(_luceneQuery, topDocsCollector);
if (sortFields.Length > 0)
{
sort = new Sort(sortFields);
sort.Rewrite(searcher.IndexSearcher);
}
if (_luceneQueryOptions != null && _luceneQueryOptions.SearchAfter != null)
{
//The document to find results after.
scoreDocAfter = GetScoreDocAfter(_luceneQueryOptions);

// We want to only collect only the actual number of hits we want to take after the last document. We don't need to collect all previous/next docs.
numHits = _options.Take >= 1 ? _options.Take : QueryOptions.DefaultMaxResults;
}

TopDocs topDocs;
ICollector topDocsCollector;
bool trackMaxScore = _luceneQueryOptions == null ? false : _luceneQueryOptions.TrackDocumentMaxScore;
bool trackDocScores = _luceneQueryOptions == null ? false : _luceneQueryOptions.TrackDocumentScores;

if (sortFields.Length > 0)
{
topDocs = ((TopFieldCollector)topDocsCollector).GetTopDocs(_options.Skip, _options.Take);
bool fillFields = true;
topDocsCollector = TopFieldCollector.Create(sort, numHits, scoreDocAfter, fillFields, trackDocScores, trackMaxScore, false);
}
else
{
topDocs = ((TopScoreDocCollector)topDocsCollector).GetTopDocs(_options.Skip, _options.Take);
topDocsCollector = TopScoreDocCollector.Create(numHits, scoreDocAfter, true);
}

if (scoreDocAfter != null && sort != null)
{
topDocs = searcher.IndexSearcher.SearchAfter(scoreDocAfter, _luceneQuery, filter, _options.Take, sort, trackDocScores, trackMaxScore);
}
else if (scoreDocAfter != null && sort == null)
{
topDocs = searcher.IndexSearcher.SearchAfter(scoreDocAfter, _luceneQuery, _options.Take);
}
else
{
searcher.IndexSearcher.Search(_luceneQuery, topDocsCollector);
if (sortFields.Length > 0)
{
topDocs = ((TopFieldCollector)topDocsCollector).GetTopDocs(_options.Skip, _options.Take);
}
else
{
topDocs = ((TopScoreDocCollector)topDocsCollector).GetTopDocs(_options.Skip, _options.Take);
}
}

var totalItemCount = topDocs.TotalHits;

var results = new List<ISearchResult>();
var results = new List<ISearchResult>(topDocs.ScoreDocs.Length);
for (int i = 0; i < topDocs.ScoreDocs.Length; i++)
{
var result = GetSearchResult(i, topDocs, searcher.IndexSearcher);
results.Add(result);
}
var searchAfterOptions = GetSearchAfterOptions(topDocs);
float maxScore = topDocs.MaxScore;

return new LuceneSearchResults(results, totalItemCount);
return new LuceneSearchResults(results, totalItemCount, maxScore, searchAfterOptions);
}
}

private ISearchResult GetSearchResult(int index, TopDocs topDocs, IndexSearcher luceneSearcher)
private static FieldDoc GetScoreDocAfter(LuceneQueryOptions luceneQueryOptions)
{
FieldDoc scoreDocAfter;
var searchAfter = luceneQueryOptions.SearchAfter;

object[] searchAfterSortFields = new object[0];
if (luceneQueryOptions.SearchAfter.Fields != null && luceneQueryOptions.SearchAfter.Fields.Length > 0)
{
searchAfterSortFields = luceneQueryOptions.SearchAfter.Fields;
}
if (searchAfter.ShardIndex != null)
{
scoreDocAfter = new FieldDoc(searchAfter.DocumentId, searchAfter.DocumentScore, searchAfterSortFields, searchAfter.ShardIndex.Value);
}
else
{
scoreDocAfter = new FieldDoc(searchAfter.DocumentId, searchAfter.DocumentScore, searchAfterSortFields);
}

return scoreDocAfter;
}

private static SearchAfterOptions GetSearchAfterOptions(TopDocs topDocs)
{
if (topDocs.TotalHits > 0)
{
if (topDocs.ScoreDocs.LastOrDefault() is FieldDoc lastFieldDoc && lastFieldDoc != null)
Shazwazza marked this conversation as resolved.
Show resolved Hide resolved
{
return new SearchAfterOptions(lastFieldDoc.Doc, lastFieldDoc.Score, lastFieldDoc.Fields?.ToArray(), lastFieldDoc.ShardIndex);
}
if (topDocs.ScoreDocs.LastOrDefault() is ScoreDoc scoreDoc && scoreDoc != null)
{
return new SearchAfterOptions(scoreDoc.Doc, scoreDoc.Score, new object[0], scoreDoc.ShardIndex);
}
}
return null;
}

private LuceneSearchResult GetSearchResult(int index, TopDocs topDocs, IndexSearcher luceneSearcher)
{
// I have seen IndexOutOfRangeException here which is strange as this is only called in one place
// and from that one place "i" is always less than the size of this collection.
Expand All @@ -141,8 +215,8 @@ private ISearchResult GetSearchResult(int index, TopDocs topDocs, IndexSearcher
doc = luceneSearcher.Doc(docId);
}
var score = scoreDoc.Score;
var result = CreateSearchResult(doc, score);

var shardIndex = scoreDoc.ShardIndex;
var result = CreateSearchResult(doc, score, shardIndex);
return result;
}

Expand All @@ -152,7 +226,7 @@ private ISearchResult GetSearchResult(int index, TopDocs topDocs, IndexSearcher
/// <param name="doc">The doc to convert.</param>
/// <param name="score">The score.</param>
/// <returns>A populated search result object</returns>
private ISearchResult CreateSearchResult(Document doc, float score)
private LuceneSearchResult CreateSearchResult(Document doc, float score, int shardIndex)
{
var id = doc.Get("id");

Expand All @@ -161,7 +235,7 @@ private ISearchResult CreateSearchResult(Document doc, float score)
id = doc.Get(ExamineFieldNames.ItemIdFieldName);
}

var searchResult = new SearchResult(id, score, () =>
var searchResult = new LuceneSearchResult(id, score, () =>
{
//we can use lucene to find out the fields which have been stored for this particular document
var fields = doc.Fields;
Expand Down Expand Up @@ -190,7 +264,7 @@ private ISearchResult CreateSearchResult(Document doc, float score)
}

return resultVals;
});
}, shardIndex);

return searchResult;
}
Expand Down
19 changes: 19 additions & 0 deletions src/Examine.Lucene/Search/LuceneSearchResult.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Examine.Lucene.Search
{
public class LuceneSearchResult : SearchResult, ISearchResult
{
public LuceneSearchResult(string id, float score, Func<IDictionary<string, List<string>>> lazyFieldVals, int shardId)
: base(id, score, lazyFieldVals)
{
ShardIndex = shardId;
}

public int ShardIndex { get; }
}
}
18 changes: 14 additions & 4 deletions src/Examine.Lucene/Search/LuceneSearchResults.cs
Original file line number Diff line number Diff line change
@@ -1,23 +1,33 @@
using System;
using System;
using System.Collections;
using System.Collections.Generic;

namespace Examine.Lucene.Search
{
public class LuceneSearchResults : ISearchResults
public class LuceneSearchResults : ILuceneSearchResults
{
public static LuceneSearchResults Empty { get; } = new LuceneSearchResults(Array.Empty<ISearchResult>(), 0);
public static LuceneSearchResults Empty { get; } = new LuceneSearchResults(Array.Empty<ISearchResult>(), 0,float.NaN, default);

private readonly IReadOnlyCollection<ISearchResult> _results;

public LuceneSearchResults(IReadOnlyCollection<ISearchResult> results, int totalItemCount)
public LuceneSearchResults(IReadOnlyCollection<ISearchResult> results, int totalItemCount,float maxScore, SearchAfterOptions searchAfterOptions)
{
_results = results;
TotalItemCount = totalItemCount;
MaxScore = maxScore;
SearchAfter = searchAfterOptions;
}

public long TotalItemCount { get; }

/// <summary>
/// Returns the maximum score value encountered. Note that in case
/// scores are not tracked, this returns <see cref="float.NaN"/>.
/// </summary>
public float MaxScore { get; }

public SearchAfterOptions SearchAfter { get; }

public IEnumerator<ISearchResult> GetEnumerator() => _results.GetEnumerator();
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}
Expand Down
39 changes: 39 additions & 0 deletions src/Examine.Lucene/Search/SearchAfterOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
namespace Examine.Lucene.Search
{
/// <summary>
/// Options for Searching After. Used for efficent deep paging.
/// </summary>
public class SearchAfterOptions
{

public SearchAfterOptions(int documentId, float documentScore, object[] fields, int shardIndex)
{
DocumentId = documentId;
DocumentScore = documentScore;
Fields = fields;
ShardIndex = shardIndex;
}

/// <summary>
/// The Id of the last document in the previous result set.
/// The search will search after this document
/// </summary>
public int DocumentId { get; }

/// <summary>
/// The Score of the last document in the previous result set.
/// The search will search after this document
/// </summary>
public float DocumentScore { get; }

/// <summary>
/// The index of the shard the doc belongs to
/// </summary>
public int? ShardIndex { get; }

/// <summary>
/// Search fields. Should contain null or J2N.Int
/// </summary>
public object[] Fields { get; }
}
}
Loading