-
Notifications
You must be signed in to change notification settings - Fork 207
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Internal and external latency to OpenSearch and S3 sinks. #3583
Changes from 1 commit
16cfa3f
72eda21
a31d9bb
fa8e91e
4c2af3b
123109a
c2bbbfb
07a0896
b22a682
e014114
2d32e77
7709f0e
39c6160
e794577
ed07c9d
b7a574c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,6 +30,7 @@ | |
import org.opensearch.dataprepper.model.configuration.PluginModel; | ||
import org.opensearch.dataprepper.model.configuration.PluginSetting; | ||
import org.opensearch.dataprepper.model.event.Event; | ||
import org.opensearch.dataprepper.model.event.EventHandle; | ||
import org.opensearch.dataprepper.model.event.exceptions.EventKeyNotFoundException; | ||
import org.opensearch.dataprepper.model.failures.DlqObject; | ||
import org.opensearch.dataprepper.model.opensearch.OpenSearchBulkActions; | ||
|
@@ -59,6 +60,7 @@ | |
import org.opensearch.dataprepper.plugins.sink.opensearch.index.IndexTemplateAPIWrapperFactory; | ||
import org.opensearch.dataprepper.plugins.sink.opensearch.index.IndexType; | ||
import org.opensearch.dataprepper.plugins.sink.opensearch.index.TemplateStrategy; | ||
import org.opensearch.dataprepper.plugins.sink.LatencyMetrics; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import software.amazon.awssdk.services.opensearchserverless.OpenSearchServerlessClient; | ||
|
@@ -91,6 +93,7 @@ public class OpenSearchSink extends AbstractSink<Record<Event>> { | |
private static final Logger LOG = LoggerFactory.getLogger(OpenSearchSink.class); | ||
private static final int INITIALIZE_RETRY_WAIT_TIME_MS = 5000; | ||
private final AwsCredentialsSupplier awsCredentialsSupplier; | ||
private final LatencyMetrics latencyMetrics; | ||
|
||
private DlqWriter dlqWriter; | ||
private BufferedWriter dlqFileWriter; | ||
|
@@ -141,6 +144,7 @@ public OpenSearchSink(final PluginSetting pluginSetting, | |
this.awsCredentialsSupplier = awsCredentialsSupplier; | ||
this.sinkContext = sinkContext != null ? sinkContext : new SinkContext(null, Collections.emptyList(), Collections.emptyList(), Collections.emptyList()); | ||
this.expressionEvaluator = expressionEvaluator; | ||
this.latencyMetrics = new LatencyMetrics(pluginMetrics); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there really no way to give the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How do we do that? Metrics are sink specific, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes but we could "fake" that the metrics are owned by the sink by adding the name of the sink here (since you can name the metrics anything and that it can still follow our metric naming pattern There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do not think that's the clean way because an event handle can only have one name for metrics. How can it have multiple names? I think this is way cleaner. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Every sink will need to provide this latency and we want to make it as easy as possible for sinks to use this feature. I think that data-prepper-core could hold a One thing we would need to be sure to do here is ensure that sinks always call |
||
bulkRequestTimer = pluginMetrics.timer(BULKREQUEST_LATENCY); | ||
bulkRequestErrorsCounter = pluginMetrics.counter(BULKREQUEST_ERRORS); | ||
invalidActionErrorsCounter = pluginMetrics.counter(INVALID_ACTION_ERRORS); | ||
|
@@ -316,6 +320,10 @@ private BulkOperation getBulkOperationForAction(final String action, final Seria | |
return bulkOperation; | ||
} | ||
|
||
public void updateLatencyMetrics(final EventHandle eventHandle) { | ||
latencyMetrics.update(eventHandle); | ||
} | ||
|
||
@Override | ||
public void doOutput(final Collection<Record<Event>> records) { | ||
final long threadId = Thread.currentThread().getId(); | ||
|
@@ -376,7 +384,7 @@ public void doOutput(final Collection<Record<Event>> records) { | |
} | ||
BulkOperation bulkOperation = getBulkOperationForAction(eventAction, document, indexName, event.getJsonNode()); | ||
|
||
BulkOperationWrapper bulkOperationWrapper = new BulkOperationWrapper(bulkOperation, event.getEventHandle(), serializedJsonNode); | ||
BulkOperationWrapper bulkOperationWrapper = new BulkOperationWrapper(this, bulkOperation, event.getEventHandle(), serializedJsonNode); | ||
final long estimatedBytesBeforeAdd = bulkRequest.estimateSizeInBytesWithDocument(bulkOperationWrapper); | ||
if (bulkSize >= 0 && estimatedBytesBeforeAdd >= bulkSize && bulkRequest.getOperationsCount() > 0) { | ||
flushBatch(bulkRequest); | ||
|
@@ -449,6 +457,7 @@ private void logFailureForDlqObjects(final List<DlqObject> dlqObjects, final Thr | |
try { | ||
dlqFileWriter.write(String.format("{\"Document\": [%s], \"failure\": %s}\n", | ||
BulkOperationWriter.dlqObjectToString(dlqObject), message)); | ||
updateLatencyMetrics(dlqObject.getEventHandle()); | ||
dlqObject.releaseEventHandle(true); | ||
} catch (final IOException e) { | ||
LOG.error("Failed to write a document to the DLQ", e); | ||
|
@@ -459,6 +468,7 @@ private void logFailureForDlqObjects(final List<DlqObject> dlqObjects, final Thr | |
try { | ||
dlqWriter.write(dlqObjects, pluginSetting.getPipelineName(), pluginSetting.getName()); | ||
dlqObjects.forEach((dlqObject) -> { | ||
updateLatencyMetrics(dlqObject.getEventHandle()); | ||
dlqObject.releaseEventHandle(true); | ||
}); | ||
} catch (final IOException e) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ultimately we want this called because
release
is called. Instead of doubling up calls, how about we configure theEventHandle
to provide this type of callback?Then, in
OpenSearchSink
, be sure to make this call.The current design results in too many classes having to be aware of these interactions.