Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow ingest processors to execute in a non blocking manner. #46241

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.function.BiConsumer;

import org.elasticsearch.ingest.WrappingProcessor;
import org.elasticsearch.script.ScriptService;
Expand Down Expand Up @@ -65,29 +67,46 @@ boolean isIgnoreMissing() {
}

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
List<?> values = ingestDocument.getFieldValue(field, List.class, ignoreMissing);
if (values == null) {
if (ignoreMissing) {
return ingestDocument;
handler.accept(ingestDocument, null);
} else {
handler.accept(null, new IllegalArgumentException("field [" + field + "] is null, cannot loop over its elements."));
}
throw new IllegalArgumentException("field [" + field + "] is null, cannot loop over its elements.");
} else {
List<Object> newValues = new CopyOnWriteArrayList<>();
innerExecute(0, values, newValues, ingestDocument, handler);
}
List<Object> newValues = new ArrayList<>(values.size());
IngestDocument document = ingestDocument;
for (Object value : values) {
Object previousValue = ingestDocument.getIngestMetadata().put("_value", value);
try {
document = processor.execute(document);
if (document == null) {
return null;
}
} finally {
newValues.add(ingestDocument.getIngestMetadata().put("_value", previousValue));
}
}

void innerExecute(int index, List<?> values, List<Object> newValues, IngestDocument document,
BiConsumer<IngestDocument, Exception> handler) {
if (index == values.size()) {
document.setFieldValue(field, new ArrayList<>(newValues));
handler.accept(document, null);
return;
}
document.setFieldValue(field, newValues);
return document;

Object value = values.get(index);
Object previousValue = document.getIngestMetadata().put("_value", value);
processor.execute(document, (result, e) -> {
if (e != null) {
newValues.add(document.getIngestMetadata().put("_value", previousValue));
handler.accept(null, e);
} else if (result == null) {
handler.accept(null, null);
} else {
newValues.add(document.getIngestMetadata().put("_value", previousValue));
innerExecute(index + 1, values, newValues, document, handler);
}
});
}

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
throw new UnsupportedOperationException("this method should not get executed");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void testExecute() throws Exception {
"_tag", "values", new UppercaseProcessor("_tag", "_ingest._value", false, "_ingest._value"),
false
);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

@SuppressWarnings("unchecked")
List<String> result = ingestDocument.getFieldValue("values", List.class);
Expand All @@ -73,12 +73,9 @@ public void testExecuteWithFailure() throws Exception {
}
});
ForEachProcessor processor = new ForEachProcessor("_tag", "values", testProcessor, false);
try {
processor.execute(ingestDocument);
fail("exception expected");
} catch (RuntimeException e) {
assertThat(e.getMessage(), equalTo("failure"));
}
Exception[] exceptions = new Exception[1];
processor.execute(ingestDocument, (result, e) -> {exceptions[0] = e;});
assertThat(exceptions[0].getMessage(), equalTo("failure"));
assertThat(testProcessor.getInvokedCounter(), equalTo(3));
assertThat(ingestDocument.getFieldValue("values", List.class), equalTo(Arrays.asList("a", "b", "c")));

Expand All @@ -95,7 +92,7 @@ public void testExecuteWithFailure() throws Exception {
"_tag", "values", new CompoundProcessor(false, Arrays.asList(testProcessor), Arrays.asList(onFailureProcessor)),
false
);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});
assertThat(testProcessor.getInvokedCounter(), equalTo(3));
assertThat(ingestDocument.getFieldValue("values", List.class), equalTo(Arrays.asList("A", "B", "c")));
}
Expand All @@ -114,7 +111,7 @@ public void testMetaDataAvailable() throws Exception {
id.setFieldValue("_ingest._value.id", id.getSourceAndMetadata().get("_id"));
});
ForEachProcessor processor = new ForEachProcessor("_tag", "values", innerProcessor, false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

assertThat(innerProcessor.getInvokedCounter(), equalTo(2));
assertThat(ingestDocument.getFieldValue("values.0.index", String.class), equalTo("_index"));
Expand Down Expand Up @@ -142,7 +139,7 @@ public void testRestOfTheDocumentIsAvailable() throws Exception {
"_tag", "values", new SetProcessor("_tag",
new TestTemplateService.MockTemplateScript.Factory("_ingest._value.new_field"),
(model) -> model.get("other")), false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

assertThat(ingestDocument.getFieldValue("values.0.new_field", String.class), equalTo("value"));
assertThat(ingestDocument.getFieldValue("values.1.new_field", String.class), equalTo("value"));
Expand Down Expand Up @@ -180,7 +177,7 @@ public String getTag() {
);

ForEachProcessor processor = new ForEachProcessor("_tag", "values", innerProcessor, false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});
@SuppressWarnings("unchecked")
List<String> result = ingestDocument.getFieldValue("values", List.class);
assertThat(result.size(), equalTo(numValues));
Expand All @@ -205,7 +202,7 @@ public void testModifyFieldsOutsideArray() throws Exception {
Collections.singletonList(new UppercaseProcessor("_tag_upper", "_ingest._value", false, "_ingest._value")),
Collections.singletonList(new AppendProcessor("_tag", template, (model) -> (Collections.singletonList("added"))))
), false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

List<?> result = ingestDocument.getFieldValue("values", List.class);
assertThat(result.get(0), equalTo("STRING"));
Expand All @@ -231,7 +228,7 @@ public void testScalarValueAllowsUnderscoreValueFieldToRemainAccessible() throws
TestProcessor processor = new TestProcessor(doc -> doc.setFieldValue("_ingest._value",
doc.getFieldValue("_source._value", String.class)));
ForEachProcessor forEachProcessor = new ForEachProcessor("_tag", "values", processor, false);
forEachProcessor.execute(ingestDocument);
forEachProcessor.execute(ingestDocument, (result, e) -> {});

List<?> result = ingestDocument.getFieldValue("values", List.class);
assertThat(result.get(0), equalTo("new_value"));
Expand Down Expand Up @@ -264,7 +261,7 @@ public void testNestedForEach() throws Exception {
);
ForEachProcessor processor = new ForEachProcessor(
"_tag", "values1", new ForEachProcessor("_tag", "_ingest._value.values2", testProcessor, false), false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

List<?> result = ingestDocument.getFieldValue("values1.0.values2", List.class);
assertThat(result.get(0), equalTo("ABC"));
Expand All @@ -282,7 +279,7 @@ public void testIgnoreMissing() throws Exception {
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
TestProcessor testProcessor = new TestProcessor(doc -> {});
ForEachProcessor processor = new ForEachProcessor("_tag", "_ingest._value", testProcessor, true);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});
assertIngestDocument(originalIngestDocument, ingestDocument);
assertThat(testProcessor.getInvokedCounter(), equalTo(0));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.elasticsearch.action.bulk;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.util.SparseFixedBitSet;
import org.elasticsearch.Assertions;
Expand Down Expand Up @@ -57,6 +59,7 @@
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.util.concurrent.AtomicArray;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexNotFoundException;
Expand All @@ -82,6 +85,7 @@
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicIntegerArray;
import java.util.function.LongSupplier;
import java.util.stream.Collectors;

Expand Down Expand Up @@ -644,14 +648,13 @@ private long relativeTime() {
}

void processBulkIndexIngestRequest(Task task, BulkRequest original, ActionListener<BulkResponse> listener) {
long ingestStartTimeInNanos = System.nanoTime();
BulkRequestModifier bulkRequestModifier = new BulkRequestModifier(original);
ingestService.executeBulkRequest(() -> bulkRequestModifier,
(indexRequest, exception) -> {
logger.debug(() -> new ParameterizedMessage("failed to execute pipeline [{}] for document [{}/{}/{}]",
indexRequest.getPipeline(), indexRequest.index(), indexRequest.type(), indexRequest.id()), exception);
bulkRequestModifier.markCurrentItemAsFailed(exception);
}, (exception) -> {
final long ingestStartTimeInNanos = System.nanoTime();
final BulkRequestModifier bulkRequestModifier = new BulkRequestModifier(original);
ingestService.executeBulkRequest(
original.numberOfActions(),
() -> bulkRequestModifier,
bulkRequestModifier::markItemAsFailed,
(originalThread, exception) -> {
if (exception != null) {
logger.error("failed to execute pipeline for a bulk request", exception);
listener.onFailure(exception);
Expand All @@ -666,26 +669,56 @@ void processBulkIndexIngestRequest(Task task, BulkRequest original, ActionListen
// (this will happen if pre-processing all items in the bulk failed)
actionListener.onResponse(new BulkResponse(new BulkItemResponse[0], 0));
} else {
doExecute(task, bulkRequest, actionListener);
// If a processor went async and returned a response on a different thread then
// before we continue the bulk request we should fork back on a write thread:
if (originalThread == Thread.currentThread()) {
assert Thread.currentThread().getName().contains(ThreadPool.Names.WRITE);
doExecute(task, bulkRequest, actionListener);
} else {
threadPool.executor(ThreadPool.Names.WRITE).execute(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}

@Override
protected void doRun() throws Exception {
doExecute(task, bulkRequest, actionListener);
}

@Override
public boolean isForceExecution() {
// If we fork back to a write thread we **not** should fail, because tp queue is full.
// (Otherwise the work done during ingest will be lost)
// It is okay to force execution here. Throttling of write requests happens prior to
// ingest when a node receives a bulk request.
return true;
}
});
}
}
}
},
indexRequest -> bulkRequestModifier.markCurrentItemAsDropped());
bulkRequestModifier::markItemAsDropped
);
}

static final class BulkRequestModifier implements Iterator<DocWriteRequest<?>> {

private static final Logger LOGGER = LogManager.getLogger(BulkRequestModifier.class);

final BulkRequest bulkRequest;
final SparseFixedBitSet failedSlots;
final List<BulkItemResponse> itemResponses;
final AtomicIntegerArray originalSlots;

int currentSlot = -1;
int[] originalSlots;
volatile int currentSlot = -1;

BulkRequestModifier(BulkRequest bulkRequest) {
this.bulkRequest = bulkRequest;
this.failedSlots = new SparseFixedBitSet(bulkRequest.requests().size());
this.itemResponses = new ArrayList<>(bulkRequest.requests().size());
this.originalSlots = new AtomicIntegerArray(bulkRequest.requests().size()); // oversize, but that's ok
}

@Override
Expand All @@ -709,12 +742,11 @@ BulkRequest getBulkRequest() {

int slot = 0;
List<DocWriteRequest<?>> requests = bulkRequest.requests();
originalSlots = new int[requests.size()]; // oversize, but that's ok
for (int i = 0; i < requests.size(); i++) {
DocWriteRequest<?> request = requests.get(i);
if (failedSlots.get(i) == false) {
modifiedBulkRequest.add(request);
originalSlots[slot++] = i;
originalSlots.set(slot++, i);
}
}
return modifiedBulkRequest;
Expand All @@ -729,7 +761,7 @@ ActionListener<BulkResponse> wrapActionListenerIfNeeded(long ingestTookInMillis,
return ActionListener.delegateFailure(actionListener, (delegatedListener, response) -> {
BulkItemResponse[] items = response.getItems();
for (int i = 0; i < items.length; i++) {
itemResponses.add(originalSlots[i], response.getItems()[i]);
itemResponses.add(originalSlots.get(i), response.getItems()[i]);
}
delegatedListener.onResponse(
new BulkResponse(
Expand All @@ -738,12 +770,12 @@ ActionListener<BulkResponse> wrapActionListenerIfNeeded(long ingestTookInMillis,
}
}

void markCurrentItemAsDropped() {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(currentSlot));
failedSlots.set(currentSlot);
synchronized void markItemAsDropped(int slot) {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(slot));
failedSlots.set(slot);
final String id = indexRequest.id() == null ? DROPPED_ITEM_WITH_AUTO_GENERATED_ID : indexRequest.id();
itemResponses.add(
new BulkItemResponse(currentSlot, indexRequest.opType(),
new BulkItemResponse(slot, indexRequest.opType(),
new UpdateResponse(
new ShardId(indexRequest.index(), IndexMetaData.INDEX_UUID_NA_VALUE, 0),
indexRequest.type(), id, SequenceNumbers.UNASSIGNED_SEQ_NO, SequenceNumbers.UNASSIGNED_PRIMARY_TERM,
Expand All @@ -753,16 +785,19 @@ void markCurrentItemAsDropped() {
);
}

void markCurrentItemAsFailed(Exception e) {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(currentSlot));
synchronized void markItemAsFailed(int slot, Exception e) {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(slot));
LOGGER.debug(() -> new ParameterizedMessage("failed to execute pipeline [{}] for document [{}/{}/{}]",
indexRequest.getPipeline(), indexRequest.index(), indexRequest.type(), indexRequest.id()), e);

// We hit a error during preprocessing a request, so we:
// 1) Remember the request item slot from the bulk, so that we're done processing all requests we know what failed
// 2) Add a bulk item failure for this request
// 3) Continue with the next request in the bulk.
failedSlots.set(currentSlot);
failedSlots.set(slot);
BulkItemResponse.Failure failure = new BulkItemResponse.Failure(indexRequest.index(), indexRequest.type(),
indexRequest.id(), e);
itemResponses.add(new BulkItemResponse(currentSlot, indexRequest.opType(), failure));
itemResponses.add(new BulkItemResponse(slot, indexRequest.opType(), failure));
}

}
Expand Down
Loading