From c76b08d90ff1f38c0f487e8b7ab871e4b330db79 Mon Sep 17 00:00:00 2001 From: anand kumar rai Date: Tue, 3 Sep 2024 09:53:01 +0530 Subject: [PATCH] Add limit on number of processors in Ingest pipelines (#15465) * Add limit on number of processors in Ingest pipelines Signed-off-by: Rai --- CHANGELOG.md | 1 + .../ingest/SimulateExecutionService.java | 8 +- .../SimulatePipelineTransportAction.java | 2 +- .../common/settings/ClusterSettings.java | 2 + .../org/opensearch/ingest/IngestService.java | 37 +++++ .../ingest/SimulateExecutionServiceTests.java | 23 ++- .../opensearch/ingest/IngestServiceTests.java | 155 +++++++++++++++++- 7 files changed, 221 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 442677287daf3..f17741c232b1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add support for centralize snapshot creation with pinned timestamp ([#15124](https://github.com/opensearch-project/OpenSearch/pull/15124)) - Add concurrent search support for Derived Fields ([#15326](https://github.com/opensearch-project/OpenSearch/pull/15326)) - [Workload Management] Add query group stats constructs ([#15343](https://github.com/opensearch-project/OpenSearch/pull/15343))) +- Add limit on number of processors for Ingest pipeline([#15460](https://github.com/opensearch-project/OpenSearch/pull/15465)). - Add runAs to Subject interface and introduce IdentityAwarePlugin extension point ([#14630](https://github.com/opensearch-project/OpenSearch/pull/14630)) - Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454)) - [Workload Management] Add rejection logic for co-ordinator and shard level requests ([#15428](https://github.com/opensearch-project/OpenSearch/pull/15428))) diff --git a/server/src/main/java/org/opensearch/action/ingest/SimulateExecutionService.java b/server/src/main/java/org/opensearch/action/ingest/SimulateExecutionService.java index c7c0f21eb0876..459466f8c8ab6 100644 --- a/server/src/main/java/org/opensearch/action/ingest/SimulateExecutionService.java +++ b/server/src/main/java/org/opensearch/action/ingest/SimulateExecutionService.java @@ -36,6 +36,7 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.ingest.CompoundProcessor; import org.opensearch.ingest.IngestDocument; +import org.opensearch.ingest.IngestService; import org.opensearch.ingest.Pipeline; import org.opensearch.threadpool.ThreadPool; @@ -56,9 +57,11 @@ class SimulateExecutionService { private static final String THREAD_POOL_NAME = ThreadPool.Names.MANAGEMENT; private final ThreadPool threadPool; + private final IngestService ingestService; - SimulateExecutionService(ThreadPool threadPool) { + SimulateExecutionService(ThreadPool threadPool, IngestService ingestService) { this.threadPool = threadPool; + this.ingestService = ingestService; } void executeDocument( @@ -91,6 +94,9 @@ void executeDocument( } public void execute(SimulatePipelineRequest.Parsed request, ActionListener listener) { + + ingestService.validateProcessorCountForIngestPipeline(request.getPipeline()); + threadPool.executor(THREAD_POOL_NAME).execute(ActionRunnable.wrap(listener, l -> { final AtomicInteger counter = new AtomicInteger(); final List responses = new CopyOnWriteArrayList<>( diff --git a/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineTransportAction.java b/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineTransportAction.java index 4753679d370af..5eeb09c4d50c0 100644 --- a/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineTransportAction.java +++ b/server/src/main/java/org/opensearch/action/ingest/SimulatePipelineTransportAction.java @@ -69,7 +69,7 @@ public SimulatePipelineTransportAction( (Writeable.Reader) SimulatePipelineRequest::new ); this.ingestService = ingestService; - this.executionService = new SimulateExecutionService(threadPool); + this.executionService = new SimulateExecutionService(threadPool, ingestService); } @Override diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 9a6b3f1118709..31e47dc14db23 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -132,6 +132,7 @@ import org.opensearch.indices.fielddata.cache.IndicesFieldDataCache; import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.store.IndicesStore; +import org.opensearch.ingest.IngestService; import org.opensearch.monitor.fs.FsHealthService; import org.opensearch.monitor.fs.FsService; import org.opensearch.monitor.jvm.JvmGcMonitorService; @@ -406,6 +407,7 @@ public void apply(Settings value, Settings current, Settings previous) { ClusterService.USER_DEFINED_METADATA, ClusterManagerService.MASTER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING, // deprecated ClusterManagerService.CLUSTER_MANAGER_SERVICE_SLOW_TASK_LOGGING_THRESHOLD_SETTING, + IngestService.MAX_NUMBER_OF_INGEST_PROCESSORS, SearchService.DEFAULT_SEARCH_TIMEOUT_SETTING, SearchService.DEFAULT_ALLOW_PARTIAL_SEARCH_RESULTS, TransportSearchAction.SHARD_COUNT_LIMIT_SETTING, diff --git a/server/src/main/java/org/opensearch/ingest/IngestService.java b/server/src/main/java/org/opensearch/ingest/IngestService.java index 938ca7493926e..0315a960dae92 100644 --- a/server/src/main/java/org/opensearch/ingest/IngestService.java +++ b/server/src/main/java/org/opensearch/ingest/IngestService.java @@ -62,6 +62,7 @@ import org.opensearch.common.collect.Tuple; import org.opensearch.common.metrics.OperationMetrics; import org.opensearch.common.regex.Regex; +import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.concurrent.AbstractRunnable; @@ -107,6 +108,18 @@ public class IngestService implements ClusterStateApplier, ReportingService MAX_NUMBER_OF_INGEST_PROCESSORS = Setting.intSetting( + "cluster.ingest.max_number_processors", + Integer.MAX_VALUE, + 1, + Integer.MAX_VALUE, + Setting.Property.NodeScope, + Setting.Property.Dynamic + ); + private static final Logger logger = LogManager.getLogger(IngestService.class); private final ClusterService clusterService; @@ -123,6 +136,7 @@ public class IngestService implements ClusterStateApplier, ReportingService processorFactories(List ingestPlugins, Processor.Parameters parameters) { @@ -494,6 +514,9 @@ void validatePipeline(Map ingestInfos, PutPipelineReq Map pipelineConfig = XContentHelper.convertToMap(request.getSource(), false, request.getMediaType()).v2(); Pipeline pipeline = Pipeline.create(request.getId(), pipelineConfig, processorFactories, scriptService); + + validateProcessorCountForIngestPipeline(pipeline); + List exceptions = new ArrayList<>(); for (Processor processor : pipeline.flattenAllProcessors()) { for (Map.Entry entry : ingestInfos.entrySet()) { @@ -507,6 +530,20 @@ void validatePipeline(Map ingestInfos, PutPipelineReq ExceptionsHelper.rethrowAndSuppress(exceptions); } + public void validateProcessorCountForIngestPipeline(Pipeline pipeline) { + List processors = pipeline.flattenAllProcessors(); + + if (processors.size() > maxIngestProcessorCount) { + throw new IllegalStateException( + "Cannot use more than the maximum processors allowed. Number of processors being configured is [" + + processors.size() + + "] which exceeds the maximum allowed configuration of [" + + maxIngestProcessorCount + + "] processors." + ); + } + } + public void executeBulkRequest( int numberOfActionRequests, Iterable> actionRequests, diff --git a/server/src/test/java/org/opensearch/action/ingest/SimulateExecutionServiceTests.java b/server/src/test/java/org/opensearch/action/ingest/SimulateExecutionServiceTests.java index a5a082286f123..a26afeee0f912 100644 --- a/server/src/test/java/org/opensearch/action/ingest/SimulateExecutionServiceTests.java +++ b/server/src/test/java/org/opensearch/action/ingest/SimulateExecutionServiceTests.java @@ -39,6 +39,7 @@ import org.opensearch.ingest.DropProcessor; import org.opensearch.ingest.IngestDocument; import org.opensearch.ingest.IngestProcessorException; +import org.opensearch.ingest.IngestService; import org.opensearch.ingest.Pipeline; import org.opensearch.ingest.Processor; import org.opensearch.ingest.RandomDocumentPicks; @@ -67,6 +68,8 @@ import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.sameInstance; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; public class SimulateExecutionServiceTests extends OpenSearchTestCase { @@ -75,11 +78,13 @@ public class SimulateExecutionServiceTests extends OpenSearchTestCase { private TestThreadPool threadPool; private SimulateExecutionService executionService; private IngestDocument ingestDocument; + private IngestService ingestService; @Before public void setup() { + ingestService = mock(IngestService.class); threadPool = new TestThreadPool(SimulateExecutionServiceTests.class.getSimpleName()); - executionService = new SimulateExecutionService(threadPool); + executionService = new SimulateExecutionService(threadPool, ingestService); ingestDocument = RandomDocumentPicks.randomIngestDocument(random()); } @@ -400,6 +405,22 @@ public String getType() { } } + public void testValidateProcessorCountForIngestPipelineThrowsException() { + + int numDocs = randomIntBetween(1, 64); + List documents = new ArrayList<>(numDocs); + for (int id = 0; id < numDocs; id++) { + documents.add(new IngestDocument("_index", Integer.toString(id), null, 0L, VersionType.INTERNAL, new HashMap<>())); + } + + Pipeline pipeline = new Pipeline("_id", "_description", version, new CompoundProcessor()); + SimulatePipelineRequest.Parsed request = new SimulatePipelineRequest.Parsed(pipeline, documents, false); + + doThrow(new IllegalStateException()).when(ingestService).validateProcessorCountForIngestPipeline(pipeline); + + expectThrows(IllegalStateException.class, () -> executionService.execute(request, ActionListener.wrap(response -> {}, e -> {}))); + } + private static void assertVerboseResult( SimulateProcessorResult result, String expectedPipelineId, diff --git a/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java b/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java index 1f4b1d635d438..e1e1ea561284b 100644 --- a/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java +++ b/server/src/test/java/org/opensearch/ingest/IngestServiceTests.java @@ -58,6 +58,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.SetOnce; import org.opensearch.common.metrics.OperationStats; +import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.concurrent.OpenSearchExecutors; import org.opensearch.common.xcontent.XContentType; @@ -151,8 +152,12 @@ public void setup() { public void testIngestPlugin() { Client client = mock(Client.class); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); IngestService ingestService = new IngestService( - mock(ClusterService.class), + clusterService, threadPool, null, null, @@ -186,8 +191,12 @@ public void testIngestPluginDuplicate() { public void testExecuteIndexPipelineDoesNotExist() { Client client = mock(Client.class); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); IngestService ingestService = new IngestService( - mock(ClusterService.class), + clusterService, threadPool, null, null, @@ -720,6 +729,124 @@ public void testValidate() throws Exception { ingestService.validatePipeline(ingestInfos, putRequest); } + public void testValidateProcessorCountForIngestPipelineThrowsException() { + IngestService ingestService = createWithProcessors(); + PutPipelineRequest putRequest = new PutPipelineRequest( + "_id", + new BytesArray( + "{\"processors\": [{\"set\" : {\"field\": \"_field\", \"value\": \"_value\", \"tag\": \"tag1\"}}," + + "{\"remove\" : {\"field\": \"_field\", \"tag\": \"tag2\"}}]}" + ), + MediaTypeRegistry.JSON + ); + + DiscoveryNode node1 = new DiscoveryNode("_node_id1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + DiscoveryNode node2 = new DiscoveryNode("_node_id2", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + Map ingestInfos = new HashMap<>(); + ingestInfos.put(node1, new IngestInfo(Arrays.asList(new ProcessorInfo("set"), new ProcessorInfo("remove")))); + ingestInfos.put(node2, new IngestInfo(Arrays.asList(new ProcessorInfo("set")))); + + Settings newSettings = Settings.builder().put("cluster.ingest.max_number_processors", 1).build(); + ingestService.getClusterService().getClusterSettings().applySettings(newSettings); + + expectThrows(IllegalStateException.class, () -> ingestService.validatePipeline(ingestInfos, putRequest)); + } + + public void testValidateProcessorCountForWithNestedOnFailureProcessorThrowsException() { + IngestService ingestService = createWithProcessors(); + PutPipelineRequest putRequest = new PutPipelineRequest( + "_id", + new BytesArray( + "{\n" + + " \"processors\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"timestamp_field_1\",\n" + + " \"value\": \"value\",\n" + + " \"on_failure\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error1\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\",\n" + + " \"on_failure\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error1\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\",\n" + + " \"on_failure\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error1\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\",\n" + + " \"on_failure\": [\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error1\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " },\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error2\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " },\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error2\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " },\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error2\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " },\n" + + " {\n" + + " \"set\": {\n" + + " \"field\": \"ingest_error2\",\n" + + " \"value\": \"failed\",\n" + + " \"tag\": \"tagggg\"\n" + + " }\n" + + " }\n" + + " ]\n" + + " }\n" + + " }\n" + + " ]\n" + + "}" + ), + MediaTypeRegistry.JSON + ); + + DiscoveryNode node1 = new DiscoveryNode("_node_id1", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + DiscoveryNode node2 = new DiscoveryNode("_node_id2", buildNewFakeTransportAddress(), emptyMap(), emptySet(), Version.CURRENT); + Map ingestInfos = new HashMap<>(); + ingestInfos.put(node1, new IngestInfo(Arrays.asList(new ProcessorInfo("set"), new ProcessorInfo("remove")))); + ingestInfos.put(node2, new IngestInfo(Arrays.asList(new ProcessorInfo("set")))); + + Settings newSettings = Settings.builder().put("cluster.ingest.max_number_processors", 7).build(); + ingestService.getClusterService().getClusterSettings().applySettings(newSettings); + + expectThrows(IllegalStateException.class, () -> ingestService.validatePipeline(ingestInfos, putRequest)); + } + public void testExecuteIndexPipelineExistsButFailedParsing() { IngestService ingestService = createWithProcessors( Collections.singletonMap("mock", (factories, tag, description, config) -> new AbstractProcessor("mock", "description") { @@ -1506,8 +1633,12 @@ public Map getProcessors(Processor.Parameters paramet // Create ingest service: Client client = mock(Client.class); + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); IngestService ingestService = new IngestService( - mock(ClusterService.class), + clusterService, threadPool, null, null, @@ -2058,6 +2189,18 @@ public void testPrepareBatches_different_index_pipeline() { assertEquals(4, batches.size()); } + public void testUpdateMaxIngestProcessorCountSetting() { + ClusterSettings clusterSettings = new ClusterSettings(Settings.builder().build(), ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); + + // verify defaults + assertEquals(Integer.MAX_VALUE, clusterSettings.get(IngestService.MAX_NUMBER_OF_INGEST_PROCESSORS).intValue()); + + // verify update max processor + Settings newSettings = Settings.builder().put("cluster.ingest.max_number_processors", 3).build(); + clusterSettings.applySettings(newSettings); + assertEquals(3, clusterSettings.get(IngestService.MAX_NUMBER_OF_INGEST_PROCESSORS).intValue()); + } + private IngestService.IndexRequestWrapper createIndexRequestWrapper(String index, List pipelines) { IndexRequest indexRequest = new IndexRequest(index); return new IngestService.IndexRequestWrapper(0, indexRequest, pipelines, true); @@ -2093,7 +2236,11 @@ private static IngestService createWithProcessors(Map ExecutorService executorService = OpenSearchExecutors.newDirectExecutorService(); when(threadPool.generic()).thenReturn(executorService); when(threadPool.executor(anyString())).thenReturn(executorService); - return new IngestService(mock(ClusterService.class), threadPool, null, null, null, Collections.singletonList(new IngestPlugin() { + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + return new IngestService(clusterService, threadPool, null, null, null, Collections.singletonList(new IngestPlugin() { @Override public Map getProcessors(final Processor.Parameters parameters) { return processors;