Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make ingest executing non blocking #43361

Merged
merged 19 commits into from
Jul 1, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.function.BiConsumer;

import org.elasticsearch.script.ScriptService;

import static org.elasticsearch.ingest.ConfigurationUtils.newConfigurationException;
Expand Down Expand Up @@ -63,29 +66,48 @@ boolean isIgnoreMissing() {
}

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
public void execute(IngestDocument ingestDocument, BiConsumer<IngestDocument, Exception> handler) {
List<?> values = ingestDocument.getFieldValue(field, List.class, ignoreMissing);
if (values == null) {
if (ignoreMissing) {
return ingestDocument;
handler.accept(ingestDocument, null);
} else {
handler.accept(null, new IllegalArgumentException("field [" + field + "] is null, cannot loop over its elements."));
}
throw new IllegalArgumentException("field [" + field + "] is null, cannot loop over its elements.");
} else {
List<Object> newValues = new CopyOnWriteArrayList<>();
innerExecute(0, values, newValues, ingestDocument, handler);
}
}

void innerExecute(int index, List<?> values, List<Object> newValues, IngestDocument document,
BiConsumer<IngestDocument, Exception> handler) {
if (index == values.size()) {
document.setFieldValue(field, new ArrayList<>(newValues));
handler.accept(document, null);
return;
}
List<Object> newValues = new ArrayList<>(values.size());
IngestDocument document = ingestDocument;
for (Object value : values) {
Object previousValue = ingestDocument.getIngestMetadata().put("_value", value);
try {
document = processor.execute(document);
if (document == null) {
return null;

Object value = values.get(index);
Object previousValue = document.getIngestMetadata().put("_value", value);
processor.execute(document, (result, e) -> {
if (e != null) {
newValues.add(document.getIngestMetadata().put("_value", previousValue));
handler.accept(null, e);
} else {
if (result == null) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

merge both into an else if?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pushed: 0103761

handler.accept(null, null);
} else {
newValues.add(document.getIngestMetadata().put("_value", previousValue));
innerExecute(index + 1, values, newValues, document, handler);
}
} finally {
newValues.add(ingestDocument.getIngestMetadata().put("_value", previousValue));
}
}
document.setFieldValue(field, newValues);
return document;
});
}

@Override
public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
throw new UnsupportedOperationException("this method should not get executed");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void testExecute() throws Exception {
"_tag", "values", new UppercaseProcessor("_tag", "_ingest._value", false, "_ingest._value"),
false
);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

@SuppressWarnings("unchecked")
List<String> result = ingestDocument.getFieldValue("values", List.class);
Expand All @@ -73,12 +73,9 @@ public void testExecuteWithFailure() throws Exception {
}
});
ForEachProcessor processor = new ForEachProcessor("_tag", "values", testProcessor, false);
try {
processor.execute(ingestDocument);
fail("exception expected");
} catch (RuntimeException e) {
assertThat(e.getMessage(), equalTo("failure"));
}
Exception[] exceptions = new Exception[1];
processor.execute(ingestDocument, (result, e) -> {exceptions[0] = e;});
assertThat(exceptions[0].getMessage(), equalTo("failure"));
assertThat(testProcessor.getInvokedCounter(), equalTo(3));
assertThat(ingestDocument.getFieldValue("values", List.class), equalTo(Arrays.asList("a", "b", "c")));

Expand All @@ -95,7 +92,7 @@ public void testExecuteWithFailure() throws Exception {
"_tag", "values", new CompoundProcessor(false, Arrays.asList(testProcessor), Arrays.asList(onFailureProcessor)),
false
);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});
assertThat(testProcessor.getInvokedCounter(), equalTo(3));
assertThat(ingestDocument.getFieldValue("values", List.class), equalTo(Arrays.asList("A", "B", "c")));
}
Expand All @@ -114,7 +111,7 @@ public void testMetaDataAvailable() throws Exception {
id.setFieldValue("_ingest._value.id", id.getSourceAndMetadata().get("_id"));
});
ForEachProcessor processor = new ForEachProcessor("_tag", "values", innerProcessor, false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

assertThat(innerProcessor.getInvokedCounter(), equalTo(2));
assertThat(ingestDocument.getFieldValue("values.0.index", String.class), equalTo("_index"));
Expand Down Expand Up @@ -142,7 +139,7 @@ public void testRestOfTheDocumentIsAvailable() throws Exception {
"_tag", "values", new SetProcessor("_tag",
new TestTemplateService.MockTemplateScript.Factory("_ingest._value.new_field"),
(model) -> model.get("other")), false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

assertThat(ingestDocument.getFieldValue("values.0.new_field", String.class), equalTo("value"));
assertThat(ingestDocument.getFieldValue("values.1.new_field", String.class), equalTo("value"));
Expand Down Expand Up @@ -180,7 +177,7 @@ public String getTag() {
);

ForEachProcessor processor = new ForEachProcessor("_tag", "values", innerProcessor, false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});
@SuppressWarnings("unchecked")
List<String> result = ingestDocument.getFieldValue("values", List.class);
assertThat(result.size(), equalTo(numValues));
Expand All @@ -205,7 +202,7 @@ public void testModifyFieldsOutsideArray() throws Exception {
Collections.singletonList(new UppercaseProcessor("_tag_upper", "_ingest._value", false, "_ingest._value")),
Collections.singletonList(new AppendProcessor("_tag", template, (model) -> (Collections.singletonList("added"))))
), false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

List<?> result = ingestDocument.getFieldValue("values", List.class);
assertThat(result.get(0), equalTo("STRING"));
Expand All @@ -231,7 +228,7 @@ public void testScalarValueAllowsUnderscoreValueFieldToRemainAccessible() throws
TestProcessor processor = new TestProcessor(doc -> doc.setFieldValue("_ingest._value",
doc.getFieldValue("_source._value", String.class)));
ForEachProcessor forEachProcessor = new ForEachProcessor("_tag", "values", processor, false);
forEachProcessor.execute(ingestDocument);
forEachProcessor.execute(ingestDocument, (result, e) -> {});

List<?> result = ingestDocument.getFieldValue("values", List.class);
assertThat(result.get(0), equalTo("new_value"));
Expand Down Expand Up @@ -264,7 +261,7 @@ public void testNestedForEach() throws Exception {
);
ForEachProcessor processor = new ForEachProcessor(
"_tag", "values1", new ForEachProcessor("_tag", "_ingest._value.values2", testProcessor, false), false);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});

List<?> result = ingestDocument.getFieldValue("values1.0.values2", List.class);
assertThat(result.get(0), equalTo("ABC"));
Expand All @@ -282,7 +279,7 @@ public void testIgnoreMissing() throws Exception {
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
TestProcessor testProcessor = new TestProcessor(doc -> {});
ForEachProcessor processor = new ForEachProcessor("_tag", "_ingest._value", testProcessor, true);
processor.execute(ingestDocument);
processor.execute(ingestDocument, (result, e) -> {});
assertIngestDocument(originalIngestDocument, ingestDocument);
assertThat(testProcessor.getInvokedCounter(), equalTo(0));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.elasticsearch.action.bulk;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.lucene.util.SparseFixedBitSet;
import org.elasticsearch.ElasticsearchParseException;
Expand Down Expand Up @@ -585,14 +587,13 @@ private long relativeTime() {
}

void processBulkIndexIngestRequest(Task task, BulkRequest original, ActionListener<BulkResponse> listener) {
long ingestStartTimeInNanos = System.nanoTime();
BulkRequestModifier bulkRequestModifier = new BulkRequestModifier(original);
ingestService.executeBulkRequest(() -> bulkRequestModifier,
(indexRequest, exception) -> {
logger.debug(() -> new ParameterizedMessage("failed to execute pipeline [{}] for document [{}/{}/{}]",
indexRequest.getPipeline(), indexRequest.index(), indexRequest.type(), indexRequest.id()), exception);
bulkRequestModifier.markCurrentItemAsFailed(exception);
}, (exception) -> {
final long ingestStartTimeInNanos = System.nanoTime();
final BulkRequestModifier bulkRequestModifier = new BulkRequestModifier(original);
ingestService.executeBulkRequest(
original.numberOfActions(),
() -> bulkRequestModifier,
bulkRequestModifier::markItemAsFailed,
jakelandis marked this conversation as resolved.
Show resolved Hide resolved
(exception) -> {
if (exception != null) {
logger.error("failed to execute pipeline for a bulk request", exception);
listener.onFailure(exception);
Expand All @@ -611,17 +612,20 @@ void processBulkIndexIngestRequest(Task task, BulkRequest original, ActionListen
}
}
},
indexRequest -> bulkRequestModifier.markCurrentItemAsDropped());
bulkRequestModifier::markItemAsDropped
);
}

static final class BulkRequestModifier implements Iterator<DocWriteRequest<?>> {

private static final Logger LOGGER = LogManager.getLogger(BulkRequestModifier.class);

final BulkRequest bulkRequest;
final SparseFixedBitSet failedSlots;
final List<BulkItemResponse> itemResponses;

int currentSlot = -1;
int[] originalSlots;
volatile int currentSlot = -1;
volatile int[] originalSlots;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm always a bit on the fence when I see a volatile reference to a mutable object, because then volatile suggests that changes would be visible across threads, but this only applies to the int[] reference, not its content. I had a quick look and think the logic is correct right now, but an AtomicIntegerArray instead of an int[] would make it look less tricky in my opinion. Or alternatively we should set the content of the array before setting the reference a couple lines below and have comments about why concurrency is correct so that this doesn't get broken by an apparently harmless refactoring.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I pushed: f404843


BulkRequestModifier(BulkRequest bulkRequest) {
this.bulkRequest = bulkRequest;
Expand Down Expand Up @@ -679,11 +683,11 @@ ActionListener<BulkResponse> wrapActionListenerIfNeeded(long ingestTookInMillis,
}
}

void markCurrentItemAsDropped() {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(currentSlot));
failedSlots.set(currentSlot);
synchronized void markItemAsDropped(int slot) {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(slot));
failedSlots.set(slot);
itemResponses.add(
new BulkItemResponse(currentSlot, indexRequest.opType(),
new BulkItemResponse(slot, indexRequest.opType(),
new UpdateResponse(
new ShardId(indexRequest.index(), IndexMetaData.INDEX_UUID_NA_VALUE, 0),
indexRequest.type(), indexRequest.id(), indexRequest.version(), DocWriteResponse.Result.NOOP
Expand All @@ -692,16 +696,19 @@ void markCurrentItemAsDropped() {
);
}

void markCurrentItemAsFailed(Exception e) {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(currentSlot));
synchronized void markItemAsFailed(int slot, Exception e) {
IndexRequest indexRequest = getIndexWriteRequest(bulkRequest.requests().get(slot));
LOGGER.debug(() -> new ParameterizedMessage("failed to execute pipeline [{}] for document [{}/{}/{}]",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:elasticheart:

indexRequest.getPipeline(), indexRequest.index(), indexRequest.type(), indexRequest.id()), e);

// We hit a error during preprocessing a request, so we:
// 1) Remember the request item slot from the bulk, so that we're done processing all requests we know what failed
// 2) Add a bulk item failure for this request
// 3) Continue with the next request in the bulk.
failedSlots.set(currentSlot);
failedSlots.set(slot);
BulkItemResponse.Failure failure = new BulkItemResponse.Failure(indexRequest.index(), indexRequest.type(),
indexRequest.id(), e);
itemResponses.add(new BulkItemResponse(currentSlot, indexRequest.opType(), failure));
itemResponses.add(new BulkItemResponse(slot, indexRequest.opType(), failure));
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@
import org.elasticsearch.ingest.Pipeline;
import org.elasticsearch.threadpool.ThreadPool;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;

import static org.elasticsearch.ingest.TrackingResultProcessor.decorate;

Expand All @@ -41,40 +43,44 @@ class SimulateExecutionService {
this.threadPool = threadPool;
}

SimulateDocumentResult executeDocument(Pipeline pipeline, IngestDocument ingestDocument, boolean verbose) {
void executeDocument(Pipeline pipeline, IngestDocument ingestDocument, boolean verbose,
BiConsumer<SimulateDocumentResult, Exception> handler) {
if (verbose) {
List<SimulateProcessorResult> processorResultList = new ArrayList<>();
List<SimulateProcessorResult> processorResultList = new CopyOnWriteArrayList<>();
CompoundProcessor verbosePipelineProcessor = decorate(pipeline.getCompoundProcessor(), processorResultList);
try {
Pipeline verbosePipeline = new Pipeline(pipeline.getId(), pipeline.getDescription(), pipeline.getVersion(),
verbosePipelineProcessor);
ingestDocument.executePipeline(verbosePipeline);
return new SimulateDocumentVerboseResult(processorResultList);
} catch (Exception e) {
return new SimulateDocumentVerboseResult(processorResultList);
}
Pipeline verbosePipeline = new Pipeline(pipeline.getId(), pipeline.getDescription(), pipeline.getVersion(),
verbosePipelineProcessor);
ingestDocument.executePipeline(verbosePipeline, (result, e) -> {
handler.accept(new SimulateDocumentVerboseResult(processorResultList), e);
});
} else {
try {
pipeline.execute(ingestDocument);
return new SimulateDocumentBaseResult(ingestDocument);
} catch (Exception e) {
return new SimulateDocumentBaseResult(e);
}
pipeline.execute(ingestDocument, (result, e) -> {
if (e == null) {
handler.accept(new SimulateDocumentBaseResult(ingestDocument), null);
} else {
handler.accept(new SimulateDocumentBaseResult(e), null);
}
});
}
}

public void execute(SimulatePipelineRequest.Parsed request, ActionListener<SimulatePipelineResponse> listener) {
threadPool.executor(THREAD_POOL_NAME).execute(new ActionRunnable<SimulatePipelineResponse>(listener) {
threadPool.executor(THREAD_POOL_NAME).execute(new ActionRunnable<>(listener) {
@Override
protected void doRun() throws Exception {
List<SimulateDocumentResult> responses = new ArrayList<>();
protected void doRun() {
final AtomicInteger counter = new AtomicInteger();
final List<SimulateDocumentResult> responses = new CopyOnWriteArrayList<>();
for (IngestDocument ingestDocument : request.getDocuments()) {
SimulateDocumentResult response = executeDocument(request.getPipeline(), ingestDocument, request.isVerbose());
if (response != null) {
responses.add(response);
}
executeDocument(request.getPipeline(), ingestDocument, request.isVerbose(), (response, e) -> {
if (response != null) {
responses.add(response);
}
if (counter.incrementAndGet() == request.getDocuments().size()) {
jakelandis marked this conversation as resolved.
Show resolved Hide resolved
listener.onResponse(new SimulatePipelineResponse(request.getPipeline().getId(),
request.isVerbose(), responses));
}
});
}
listener.onResponse(new SimulatePipelineResponse(request.getPipeline().getId(), request.isVerbose(), responses));
}
});
}
Expand Down
Loading