Added BufferedInputStream to allow mark and reset ops during IO errors (

opensearch-project#10690) Signed-off-by: vikasvb90 <vikasvb@amazon.com> Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com>
deshsidd · Oct 18, 2023 · d79a6cc · d79a6cc
1 parent 042ae20
commit d79a6cc
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 6 deletions.
diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java
@@ -38,6 +38,7 @@
 import org.opensearch.cluster.service.ClusterService;
 import org.opensearch.common.settings.Setting;
 import org.opensearch.common.settings.Settings;
+import org.opensearch.common.unit.TimeValue;
 import org.opensearch.common.util.concurrent.OpenSearchExecutors;
 import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
 import org.opensearch.core.xcontent.NamedXContentRegistry;
@@ -55,6 +56,7 @@
 import org.opensearch.script.ScriptService;
 import org.opensearch.threadpool.ExecutorBuilder;
 import org.opensearch.threadpool.FixedExecutorBuilder;
+import org.opensearch.threadpool.ScalingExecutorBuilder;
 import org.opensearch.threadpool.ThreadPool;
 import org.opensearch.watcher.ResourceWatcherService;
 
@@ -93,17 +95,21 @@ public S3RepositoryPlugin(final Settings settings, final Path configPath) {
     @Override
     public List<ExecutorBuilder<?>> getExecutorBuilders(Settings settings) {
         List<ExecutorBuilder<?>> executorBuilders = new ArrayList<>();
+        int halfProcMaxAt5 = halfAllocatedProcessorsMaxFive(allocatedProcessors(settings));
         executorBuilders.add(
             new FixedExecutorBuilder(settings, PRIORITY_FUTURE_COMPLETION, priorityPoolCount(settings), 10_000, PRIORITY_FUTURE_COMPLETION)
         );
-        executorBuilders.add(
-            new FixedExecutorBuilder(settings, PRIORITY_STREAM_READER, priorityPoolCount(settings), 10_000, PRIORITY_STREAM_READER)
-        );
+        executorBuilders.add(new ScalingExecutorBuilder(PRIORITY_STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5)));
+
         executorBuilders.add(new FixedExecutorBuilder(settings, FUTURE_COMPLETION, normalPoolCount(settings), 10_000, FUTURE_COMPLETION));
-        executorBuilders.add(new FixedExecutorBuilder(settings, STREAM_READER, normalPoolCount(settings), 10_000, STREAM_READER));
+        executorBuilders.add(new ScalingExecutorBuilder(STREAM_READER, 1, halfProcMaxAt5, TimeValue.timeValueMinutes(5)));
         return executorBuilders;
     }
 
+    static int halfAllocatedProcessorsMaxFive(final int allocatedProcessors) {
+        return boundedBy((allocatedProcessors + 1) / 2, 1, 5);
+    }
+
     S3RepositoryPlugin(final Settings settings, final Path configPath, final S3Service service, final S3AsyncService s3AsyncService) {
         this.service = Objects.requireNonNull(service, "S3 service must not be null");
         this.configPath = configPath;

diff --git a/...s/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java b/...s/repository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncPartsHandler.java
@@ -23,9 +23,11 @@
 import org.opensearch.common.StreamContext;
 import org.opensearch.common.blobstore.stream.write.WritePriority;
 import org.opensearch.common.io.InputStreamContainer;
+import org.opensearch.core.common.unit.ByteSizeUnit;
 import org.opensearch.repositories.s3.SocketAccess;
 import org.opensearch.repositories.s3.io.CheckedContainer;
 
+import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -142,7 +144,9 @@ private static void uploadPart(
             () -> s3AsyncClient.uploadPart(
                 uploadPartRequest,
                 AsyncRequestBody.fromInputStream(
-                    inputStreamContainer.getInputStream(),
+                    // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered
+                    // data can be retried instead of retrying whole file by the application.
+                    new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)),
                     inputStreamContainer.getContentLength(),
                     streamReadExecutor
                 )

diff --git a/...epository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java b/...epository-s3/src/main/java/org/opensearch/repositories/s3/async/AsyncTransferManager.java
@@ -37,6 +37,7 @@
 import org.opensearch.repositories.s3.SocketAccess;
 import org.opensearch.repositories.s3.io.CheckedContainer;
 
+import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Base64;
@@ -303,7 +304,9 @@ private void uploadInOneChunk(
             () -> s3AsyncClient.putObject(
                 putObjectRequestBuilder.build(),
                 AsyncRequestBody.fromInputStream(
-                    inputStreamContainer.getInputStream(),
+                    // Buffered stream is needed to allow mark and reset ops during IO errors so that only buffered
+                    // data can be retried instead of retrying whole file by the application.
+                    new BufferedInputStream(inputStreamContainer.getInputStream(), (int) (ByteSizeUnit.MB.toBytes(1) + 1)),
                     inputStreamContainer.getContentLength(),
                     streamReadExecutor
                 )