Skip to content

Commit

Permalink
Use soft-update to maintain document history (#29458)
Browse files Browse the repository at this point in the history
Today we can use the soft-update feature from Lucene to maintain a
history of document. This change simply replaces hard-update in the
Engine by soft-update methods. Stale operations, delete, and no-ops will
be handled in subsequent changes. This change is just a cut-over from
hard-update to soft-update, no new functionality has been introduced.

Relates #29530
  • Loading branch information
dnhatn committed May 10, 2018
1 parent 9fdc4a8 commit 6762afd
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 11 deletions.
10 changes: 10 additions & 0 deletions server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.document.LatLonDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
Expand Down Expand Up @@ -96,6 +97,8 @@ public class Lucene {
assert annotation == null : "DocValuesFormat " + LATEST_DOC_VALUES_FORMAT + " is deprecated" ;
}

public static final String SOFT_DELETE_FIELD = "__soft_delete";

public static final NamedAnalyzer STANDARD_ANALYZER = new NamedAnalyzer("_standard", AnalyzerScope.GLOBAL, new StandardAnalyzer());
public static final NamedAnalyzer KEYWORD_ANALYZER = new NamedAnalyzer("_keyword", AnalyzerScope.GLOBAL, new KeywordAnalyzer());

Expand Down Expand Up @@ -829,4 +832,11 @@ public int length() {
}
};
}

/**
* Returns a numeric docvalues which can be used to soft-delete documents.
*/
public static NumericDocValuesField newSoftDeleteField() {
return new NumericDocValuesField(SOFT_DELETE_FIELD, 1);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
IndexSettings.MAX_SLICES_PER_SCROLL,
ShardsLimitAllocationDecider.INDEX_TOTAL_SHARDS_PER_NODE_SETTING,
IndexSettings.INDEX_GC_DELETES_SETTING,
IndexSettings.INDEX_SOFT_DELETES_SETTING,
IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING,
UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING,
EnableAllocationDecider.INDEX_ROUTING_REBALANCE_ENABLE_SETTING,
Expand Down
16 changes: 16 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,13 @@ public final class IndexSettings {
public static final Setting<TimeValue> INDEX_GC_DELETES_SETTING =
Setting.timeSetting("index.gc_deletes", DEFAULT_GC_DELETES, new TimeValue(-1, TimeUnit.MILLISECONDS), Property.Dynamic,
Property.IndexScope);

/**
* Specifies if the index should use soft-delete instead of hard-delete for update/delete operations.
*/
public static final Setting<Boolean> INDEX_SOFT_DELETES_SETTING =
Setting.boolSetting("index.soft_deletes.enabled", true, Property.IndexScope);

/**
* The maximum number of refresh listeners allows on this shard.
*/
Expand Down Expand Up @@ -294,6 +301,7 @@ public final class IndexSettings {
private final IndexSortConfig indexSortConfig;
private final IndexScopedSettings scopedSettings;
private long gcDeletesInMillis = DEFAULT_GC_DELETES.millis();
private final boolean softDeleteEnabled;
private volatile boolean warmerEnabled;
private volatile int maxResultWindow;
private volatile int maxInnerResultWindow;
Expand Down Expand Up @@ -401,6 +409,7 @@ public IndexSettings(final IndexMetaData indexMetaData, final Settings nodeSetti
generationThresholdSize = scopedSettings.get(INDEX_TRANSLOG_GENERATION_THRESHOLD_SIZE_SETTING);
mergeSchedulerConfig = new MergeSchedulerConfig(this);
gcDeletesInMillis = scopedSettings.get(INDEX_GC_DELETES_SETTING).getMillis();
softDeleteEnabled = version.onOrAfter(Version.V_6_4_0) && scopedSettings.get(INDEX_SOFT_DELETES_SETTING);
warmerEnabled = scopedSettings.get(INDEX_WARMER_ENABLED_SETTING);
maxResultWindow = scopedSettings.get(MAX_RESULT_WINDOW_SETTING);
maxInnerResultWindow = scopedSettings.get(MAX_INNER_RESULT_WINDOW_SETTING);
Expand Down Expand Up @@ -806,4 +815,11 @@ public IndexSortConfig getIndexSortConfig() {
}

public IndexScopedSettings getScopedSettings() { return scopedSettings;}

/**
* Returns <code>true</code> if soft-delete is enabled.
*/
public boolean isSoftDeleteEnabled() {
return softDeleteEnabled;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ public final class CommitStats implements Streamable, ToXContentFragment {
private String id; // lucene commit id in base 64;
private int numDocs;

public CommitStats(SegmentInfos segmentInfos) {
public CommitStats(SegmentInfos segmentInfos, int numDocs) {
// clone the map to protect against concurrent changes
userData = MapBuilder.<String, String>newMapBuilder().putAll(segmentInfos.getUserData()).immutableMap();
// lucene calls the current generation, last generation.
generation = segmentInfos.getLastGeneration();
id = Base64.getEncoder().encodeToString(segmentInfos.getId());
numDocs = Lucene.getNumDocs(segmentInfos);
this.numDocs = numDocs;
}

private CommitStats() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,9 @@ protected final void ensureOpen() {

/** get commits stats for the last commit */
public CommitStats commitStats() {
return new CommitStats(getLastCommittedSegmentInfos());
try (Engine.Searcher searcher = acquireSearcher("commit_stats", Engine.SearcherScope.INTERNAL)) {
return new CommitStats(getLastCommittedSegmentInfos(), searcher.reader().numDocs());
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexReader;
Expand Down Expand Up @@ -69,7 +70,6 @@
import org.elasticsearch.index.seqno.LocalCheckpointTracker;
import org.elasticsearch.index.seqno.SequenceNumbers;
import org.elasticsearch.index.shard.ElasticsearchMergePolicy;
import org.elasticsearch.index.shard.IndexingStats;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.index.translog.TranslogConfig;
Expand Down Expand Up @@ -142,6 +142,8 @@ public class InternalEngine extends Engine {
private final CounterMetric numDocDeletes = new CounterMetric();
private final CounterMetric numDocAppends = new CounterMetric();
private final CounterMetric numDocUpdates = new CounterMetric();
private final NumericDocValuesField softDeleteField = Lucene.newSoftDeleteField();
private final boolean softDeleteEnabled;

/**
* How many bytes we are currently moving to disk, via either IndexWriter.flush or refresh. IndexingMemoryController polls this
Expand All @@ -166,6 +168,7 @@ public InternalEngine(EngineConfig engineConfig) {
maxUnsafeAutoIdTimestamp.set(Long.MAX_VALUE);
}
this.uidField = engineConfig.getIndexSettings().isSingleType() ? IdFieldMapper.NAME : UidFieldMapper.NAME;
this.softDeleteEnabled = engineConfig.getIndexSettings().isSoftDeleteEnabled();
final TranslogDeletionPolicy translogDeletionPolicy = new TranslogDeletionPolicy(
engineConfig.getIndexSettings().getTranslogRetentionSize().getBytes(),
engineConfig.getIndexSettings().getTranslogRetentionAge().getMillis()
Expand Down Expand Up @@ -807,6 +810,7 @@ public IndexResult index(Index index) throws IOException {
} else if (plan.indexIntoLucene) {
indexResult = indexIntoLucene(index, plan);
} else {
// TODO: We need to index stale documents to have a full history in Lucene.
indexResult = new IndexResult(
plan.versionForIndexing, plan.seqNoForIndexing, plan.currentNotFoundOrDeleted);
}
Expand Down Expand Up @@ -1105,10 +1109,18 @@ private boolean assertDocDoesNotExist(final Index index, final boolean allowDele
}

private void updateDocs(final Term uid, final List<ParseContext.Document> docs, final IndexWriter indexWriter) throws IOException {
if (docs.size() > 1) {
indexWriter.updateDocuments(uid, docs);
if (softDeleteEnabled) {
if (docs.size() > 1) {
indexWriter.softUpdateDocuments(uid, docs, softDeleteField);
} else {
indexWriter.softUpdateDocument(uid, docs.get(0), softDeleteField);
}
} else {
indexWriter.updateDocument(uid, docs.get(0));
if (docs.size() > 1) {
indexWriter.updateDocuments(uid, docs);
} else {
indexWriter.updateDocument(uid, docs.get(0));
}
}
numDocUpdates.inc(docs.size());
}
Expand Down Expand Up @@ -1972,11 +1984,14 @@ private IndexWriterConfig getIndexWriterConfig() {
}
iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
iwc.setMergeScheduler(mergeScheduler);
MergePolicy mergePolicy = config().getMergePolicy();
// Give us the opportunity to upgrade old segments while performing
// background merges
mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
iwc.setMergePolicy(mergePolicy);
MergePolicy mergePolicy = config().getMergePolicy();
if (softDeleteEnabled) {
// TODO: soft-delete retention policy
iwc.setSoftDeletesField(Lucene.SOFT_DELETE_FIELD);
}
iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
iwc.setSimilarity(engineConfig.getSimilarity());
iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
iwc.setCodec(engineConfig.getCodec());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1007,7 +1007,6 @@ public RecoveryDiff recoveryDiff(MetadataSnapshot recoveryTargetSnapshot) {
}
final String segmentId = IndexFileNames.parseSegmentName(meta.name());
final String extension = IndexFileNames.getExtension(meta.name());
assert FIELD_INFOS_FILE_EXTENSION.equals(extension) == false || IndexFileNames.stripExtension(IndexFileNames.stripSegmentName(meta.name())).isEmpty() : "FieldInfos are generational but updateable DV are not supported in elasticsearch";
if (IndexFileNames.SEGMENTS.equals(segmentId) || DEL_FILE_EXTENSION.equals(extension) || LIV_FILE_EXTENSION.equals(extension)) {
// only treat del files as per-commit files fnm files are generational but only for upgradable DV
perCommitStoreFiles.add(meta);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2804,6 +2804,12 @@ private void maybeThrowFailure() throws IOException {
}
}

@Override
public long softUpdateDocument(Term term, Iterable<? extends IndexableField> doc, Field... softDeletes) throws IOException {
maybeThrowFailure();
return super.softUpdateDocument(term, doc, softDeletes);
}

@Override
public long deleteDocuments(Term... terms) throws IOException {
maybeThrowFailure();
Expand Down

0 comments on commit 6762afd

Please sign in to comment.