From 79b8f9af1afb230d588e3e43130846039aef126c Mon Sep 17 00:00:00 2001 From: Adi Suresh Date: Wed, 7 Aug 2024 18:30:40 +0000 Subject: [PATCH] Add Security Lake data source type. This changes adds Security Lake as a data source type. Security Lake as a data source is simply specific options set on top of the base S3Glue data source. Signed-off-by: Adi Suresh --- .../config/AsyncExecutorServiceModule.java | 3 + .../AsyncQueryExecutorServiceSpec.java | 4 + .../sql/datasource/model/DataSourceType.java | 3 +- .../glue/SecurityLakeDataSourceFactory.java | 61 +++++++++ .../glue/SecurityLakeSourceFactoryTest.java | 124 ++++++++++++++++++ .../org/opensearch/sql/plugin/SQLPlugin.java | 2 + 6 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 datasources/src/main/java/org/opensearch/sql/datasources/glue/SecurityLakeDataSourceFactory.java create mode 100644 datasources/src/test/java/org/opensearch/sql/datasources/glue/SecurityLakeSourceFactoryTest.java diff --git a/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java b/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java index 7dfcad3db5..2b0444142e 100644 --- a/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java +++ b/async-query/src/main/java/org/opensearch/sql/spark/transport/config/AsyncExecutorServiceModule.java @@ -15,6 +15,7 @@ import org.opensearch.common.inject.Singleton; import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.datasource.DataSourceService; +import org.opensearch.sql.datasource.model.DataSource; import org.opensearch.sql.datasource.model.DataSourceType; import org.opensearch.sql.legacy.metrics.GaugeMetric; import org.opensearch.sql.legacy.metrics.Metrics; @@ -161,6 +162,8 @@ public SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider SparkParameterComposerCollection collection = new SparkParameterComposerCollection(); collection.register( DataSourceType.S3GLUE, new S3GlueDataSourceSparkParameterComposer(clusterSettingLoader)); + collection.register( + DataSourceType.SECURITY_LAKE, new S3GlueDataSourceSparkParameterComposer(clusterSettingLoader)); collection.register(new OpenSearchExtraParameterComposer(clusterSettingLoader)); return new SparkSubmitParametersBuilderProvider(collection); } diff --git a/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java index 1f22be7961..641b083d53 100644 --- a/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java +++ b/async-query/src/test/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceSpec.java @@ -276,6 +276,10 @@ protected AsyncQueryExecutorService createAsyncQueryExecutorService( DataSourceType.S3GLUE, new S3GlueDataSourceSparkParameterComposer( getSparkExecutionEngineConfigClusterSettingLoader())); + sparkParameterComposerCollection.register( + DataSourceType.SECURITY_LAKE, + new S3GlueDataSourceSparkParameterComposer( + getSparkExecutionEngineConfigClusterSettingLoader())); SparkSubmitParametersBuilderProvider sparkSubmitParametersBuilderProvider = new SparkSubmitParametersBuilderProvider(sparkParameterComposerCollection); QueryHandlerFactory queryHandlerFactory = diff --git a/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceType.java b/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceType.java index c727c3c531..c74964fc00 100644 --- a/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceType.java +++ b/core/src/main/java/org/opensearch/sql/datasource/model/DataSourceType.java @@ -15,12 +15,13 @@ public class DataSourceType { public static DataSourceType OPENSEARCH = new DataSourceType("OPENSEARCH"); public static DataSourceType SPARK = new DataSourceType("SPARK"); public static DataSourceType S3GLUE = new DataSourceType("S3GLUE"); + public static DataSourceType SECURITY_LAKE = new DataSourceType("SECURITY_LAKE"); // Map from uppercase DataSourceType name to DataSourceType object private static Map knownValues = new HashMap<>(); static { - register(PROMETHEUS, OPENSEARCH, SPARK, S3GLUE); + register(PROMETHEUS, OPENSEARCH, SPARK, S3GLUE, SECURITY_LAKE); } private final String name; diff --git a/datasources/src/main/java/org/opensearch/sql/datasources/glue/SecurityLakeDataSourceFactory.java b/datasources/src/main/java/org/opensearch/sql/datasources/glue/SecurityLakeDataSourceFactory.java new file mode 100644 index 0000000000..eecb9d12f8 --- /dev/null +++ b/datasources/src/main/java/org/opensearch/sql/datasources/glue/SecurityLakeDataSourceFactory.java @@ -0,0 +1,61 @@ +package org.opensearch.sql.datasources.glue; + +import java.net.URISyntaxException; +import java.net.UnknownHostException; +import java.util.Map; +import java.util.Set; +import lombok.RequiredArgsConstructor; +import org.apache.commons.lang3.BooleanUtils; +import org.apache.commons.lang3.StringUtils; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.datasource.model.DataSource; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceType; +import org.opensearch.sql.datasources.auth.AuthenticationType; +import org.opensearch.sql.datasources.utils.DatasourceValidationUtils; +import org.opensearch.sql.storage.DataSourceFactory; + +public class SecurityLakeDataSourceFactory extends GlueDataSourceFactory { + + private final Settings pluginSettings; + + public static final String TRUE = "true"; + + public SecurityLakeDataSourceFactory(final Settings pluginSettings) { + super(pluginSettings); + this.pluginSettings = pluginSettings; + } + + @Override + public DataSourceType getDataSourceType() { + return DataSourceType.SECURITY_LAKE; + } + + @Override + public DataSource createDataSource(DataSourceMetadata metadata) { + validateProperties(metadata.getProperties()); + metadata.getProperties().put(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED, TRUE); + metadata.getProperties().put(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED, TRUE); + return super.createDataSource(metadata); + } + + private void validateProperties(Map properties) { + // validate Lake Formation config + if (properties.get(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED) != null && + !BooleanUtils.toBoolean(properties.get(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED))) { + throw new IllegalArgumentException(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED + + " cannot be false when using Security Lake data source."); + } + + if (properties.get(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED) != null && + !BooleanUtils.toBoolean(properties.get(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED))) { + throw new IllegalArgumentException(GLUE_LAKEFORMATION_ENABLED + + " cannot be false when using Security Lake data source."); + } + + if (StringUtils.isBlank(properties.get(GLUE_LAKEFORMATION_SESSION_TAG))) { + throw new IllegalArgumentException(GlueDataSourceFactory.GLUE_LAKEFORMATION_SESSION_TAG + + " must be specified when using Security Lake data source"); + } + } +} diff --git a/datasources/src/test/java/org/opensearch/sql/datasources/glue/SecurityLakeSourceFactoryTest.java b/datasources/src/test/java/org/opensearch/sql/datasources/glue/SecurityLakeSourceFactoryTest.java new file mode 100644 index 0000000000..a40400d948 --- /dev/null +++ b/datasources/src/test/java/org/opensearch/sql/datasources/glue/SecurityLakeSourceFactoryTest.java @@ -0,0 +1,124 @@ +package org.opensearch.sql.datasources.glue; + +import static org.mockito.Mockito.when; + +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import lombok.SneakyThrows; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; +import org.opensearch.sql.DataSourceSchemaName; +import org.opensearch.sql.common.setting.Settings; +import org.opensearch.sql.datasource.model.DataSource; +import org.opensearch.sql.datasource.model.DataSourceMetadata; +import org.opensearch.sql.datasource.model.DataSourceType; + +@ExtendWith(MockitoExtension.class) +public class SecurityLakeSourceFactoryTest { + + @Mock private Settings settings; + + @Test + void testGetConnectorType() { + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = new SecurityLakeDataSourceFactory(settings); + Assertions.assertEquals(DataSourceType.SECURITY_LAKE, securityLakeDataSourceFactory.getDataSourceType()); + } + + @Test + @SneakyThrows + void testCreateSecurityLakeDataSource() { + when(settings.getSettingValue(Settings.Key.DATASOURCES_URI_HOSTS_DENY_LIST)) + .thenReturn(Collections.emptyList()); + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = new SecurityLakeDataSourceFactory(settings); + + Map properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + properties.put("glue.lakeformation.session_tag", "session_tag"); + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_sl") + .setConnector(DataSourceType.SECURITY_LAKE) + .setProperties(properties) + .build(); + DataSource dataSource = securityLakeDataSourceFactory.createDataSource(metadata); + Assertions.assertEquals(DataSourceType.SECURITY_LAKE, dataSource.getConnectorType()); + + Assertions.assertEquals(properties.get(GlueDataSourceFactory.GLUE_ICEBERG_ENABLED), SecurityLakeDataSourceFactory.TRUE); + Assertions.assertEquals(properties.get(GlueDataSourceFactory.GLUE_LAKEFORMATION_ENABLED), SecurityLakeDataSourceFactory.TRUE); + } + + @Test + @SneakyThrows + void testCreateSecurityLakeDataSourceIcebergCannotBeDisabled() { + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = new SecurityLakeDataSourceFactory(settings); + + Map properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + properties.put("glue.iceberg.enabled", "false"); + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_sl") + .setConnector(DataSourceType.SECURITY_LAKE) + .setProperties(properties) + .build(); + + Assertions.assertThrows(IllegalArgumentException.class, () -> securityLakeDataSourceFactory.createDataSource(metadata)); + } + + @Test + @SneakyThrows + void testCreateSecurityLakeDataSourceLakeFormationCannotBeDisabled() { + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = new SecurityLakeDataSourceFactory(settings); + + Map properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + properties.put("glue.lakeformation.enabled", "false"); + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_sl") + .setConnector(DataSourceType.SECURITY_LAKE) + .setProperties(properties) + .build(); + + Assertions.assertThrows(IllegalArgumentException.class, () -> securityLakeDataSourceFactory.createDataSource(metadata)); + } + + @Test + @SneakyThrows + void testCreateGlueDataSourceWithLakeFormationNoSessionTags() { + SecurityLakeDataSourceFactory securityLakeDataSourceFactory = new SecurityLakeDataSourceFactory(settings); + + HashMap properties = new HashMap<>(); + properties.put("glue.auth.type", "iam_role"); + properties.put("glue.auth.role_arn", "role_arn"); + properties.put("glue.indexstore.opensearch.uri", "http://localhost:9200"); + properties.put("glue.indexstore.opensearch.auth", "noauth"); + properties.put("glue.indexstore.opensearch.region", "us-west-2"); + + DataSourceMetadata metadata = + new DataSourceMetadata.Builder() + .setName("my_sl") + .setConnector(DataSourceType.SECURITY_LAKE) + .setProperties(properties) + .build(); + + Assertions.assertThrows(IllegalArgumentException.class, () -> securityLakeDataSourceFactory.createDataSource(metadata)); + } +} diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java index a1b1e32955..971ef5e928 100644 --- a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java +++ b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java @@ -60,6 +60,7 @@ import org.opensearch.sql.datasources.auth.DataSourceUserAuthorizationHelperImpl; import org.opensearch.sql.datasources.encryptor.EncryptorImpl; import org.opensearch.sql.datasources.glue.GlueDataSourceFactory; +import org.opensearch.sql.datasources.glue.SecurityLakeDataSourceFactory; import org.opensearch.sql.datasources.model.transport.*; import org.opensearch.sql.datasources.rest.RestDataSourceQueryAction; import org.opensearch.sql.datasources.service.DataSourceMetadataStorage; @@ -326,6 +327,7 @@ private DataSourceServiceImpl createDataSourceService() { .add(new PrometheusStorageFactory(pluginSettings)) .add(new SparkStorageFactory(this.client, pluginSettings)) .add(new GlueDataSourceFactory(pluginSettings)) + .add(new SecurityLakeDataSourceFactory(pluginSettings)) .build(), dataSourceMetadataStorage, dataSourceUserAuthorizationHelper);