Skip to content

Commit

Permalink
Add Delta Lake product tests
Browse files Browse the repository at this point in the history
The Delta Lake product tests can be all executed with SuiteDeltaLake
suite class.

The following test product test environments are exposed:

- single-node-delta-lake-oss: used to test the compatibility of
  the Trino Delta Lake connector with Apache Spark with Delta OSS
- single-node-delta-lake-databricks: used to test the compatibility
  of the Trino Delta Lake connector with Delta Lake Databricks
- single-node-delta-lake-kerberized-hdfs: used to test Delta Lake
  connector on top of kerberized Hadoop environment
- single-node-minio-data-lake: lightweight environment that can
  be used to test the Lakehouse connectors with HMS & MinIO

The aim of the Delta Lake product tests is to ensure compatibility
with both implementations of Delta Lake:

- Delta OSS
- Databricks Delta

These product tests were originally written for the Starburst Enterprise
Delta Lake connector.

Co-authored by various engineers at Starburst Data:

Co-authored-by: Piotr Findeisen <piotr.findeisen@gmail.com>
Co-authored-by: Alex Jo <alex.jo@starburstdata.com>
Co-authored-by: Łukasz Osipiuk <lukasz@osipiuk.net>
Co-authored-by: Konrad Dziedzic <konraddziedzic@gmail.com>
Co-authored-by: Adam J. Shook <shook@datacatessen.com>
Co-authored-by: Mateusz Gajewski <mateusz.gajewski@gmail.com>
Co-authored-by: Gaurav Sehgal <gaurav.sehgal8297@gmail.com>
Co-authored-by: Raunaq Morarka <raunaqmorarka@gmail.com>
Co-authored-by: Ashhar Hasan <ashhar.hasan@starburstdata.com>
Co-authored-by: Michał Ślizak <michal.slizak+github@gmail.com>
Co-authored-by: Grzegorz Kokosiński <grzegorz@starburstdata.com>
Co-authored-by: Arkadiusz Czajkowski <arek@starburstdata.com>
Co-authored-by: Jacob I. Komissar <jacob.komissar@starburstdata.com>
Co-authored-by: Krzysztof Sobolewski <krzysztof.sobolewski@starburstdata.com>
Co-authored-by: Krzysztof Skrzypczynski <krzysztof.skrzypczynski@starburstdata.com>
Co-authored-by: Yuya Ebihara <yuya.ebihara@starburstdata.com>
Co-authored-by: Praveen Krishna <praveenkrishna@tutanota.com>
Co-authored-by: Karol Sobczak <napewnotrafi@gmail.com>
Co-authored-by: Sasha Sheikin <myminitrue@gmail.com>
Co-authored-by: Szymon Homa <szymon.homa@starburstdata.com>
  • Loading branch information
21 people committed May 24, 2022
1 parent 9532493 commit 4338264
Show file tree
Hide file tree
Showing 47 changed files with 4,642 additions and 1 deletion.
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -669,6 +669,10 @@ jobs:
- config: default
suite: suite-all
jdk: 11
# this suite is not meant to be run with different configs
- config: default
suite: suite-delta-lake
jdk: 11
# PT Launcher's timeout defaults to 2h, add some margin
timeout-minutes: 130
needs: build-pt
Expand Down
8 changes: 7 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
<dep.casandra.version>4.14.0</dep.casandra.version>
<dep.minio.version>7.1.4</dep.minio.version>

<dep.docker.images.version>56</dep.docker.images.version>
<dep.docker.images.version>57</dep.docker.images.version>

<!--
America/Bahia_Banderas has:
Expand Down Expand Up @@ -1010,6 +1010,12 @@
</exclusions>
</dependency>

<dependency>
<groupId>com.databricks</groupId>
<artifactId>databricks-jdbc</artifactId>
<version>2.6.25-1</version>
</dependency>

<dependency>
<groupId>com.datastax.oss</groupId>
<artifactId>java-driver-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.launcher.env.environment;

import io.trino.tests.product.launcher.docker.DockerFiles;
import io.trino.tests.product.launcher.env.DockerContainer;
import io.trino.tests.product.launcher.env.Environment;
import io.trino.tests.product.launcher.env.EnvironmentProvider;
import io.trino.tests.product.launcher.env.common.Standard;

import static io.trino.tests.product.launcher.env.EnvironmentContainers.COORDINATOR;
import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS;
import static io.trino.tests.product.launcher.env.EnvironmentContainers.configureTempto;
import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_PRESTO_ETC;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static org.testcontainers.utility.MountableFile.forHostPath;

/**
* Trino with Delta Lake connector and real S3 storage
*/
public abstract class AbstractSinglenodeDeltaLakeDatabricks
extends EnvironmentProvider
{
private final DockerFiles dockerFiles;

abstract String databricksTestJdbcUrl();

public AbstractSinglenodeDeltaLakeDatabricks(Standard standard, DockerFiles dockerFiles)
{
super(standard);
this.dockerFiles = requireNonNull(dockerFiles, "dockerFiles is null");
}

@Override
public void extendEnvironment(Environment.Builder builder)
{
String databricksTestJdbcUrl = databricksTestJdbcUrl();
String databricksTestJdbcDriverClass = requireNonNull(System.getenv("DATABRICKS_TEST_JDBC_DRIVER_CLASS"), "Environment DATABRICKS_TEST_JDBC_DRIVER_CLASS was not set");
String databricksTestLogin = requireNonNull(System.getenv("DATABRICKS_TEST_LOGIN"), "Environment DATABRICKS_TEST_LOGIN was not set");
String databricksTestToken = requireNonNull(System.getenv("DATABRICKS_TEST_TOKEN"), "Environment DATABRICKS_TEST_TOKEN was not set");
String hiveMetastoreUri = requireNonNull(System.getenv("HIVE_METASTORE_URI"), "Environment HIVE_METASTORE_URI was not set");
String s3Bucket = requireNonNull(System.getenv("S3_BUCKET"), "Environment S3_BUCKET was not set");
DockerFiles.ResourceProvider configDir = dockerFiles.getDockerFilesHostDirectory("conf/environment/singlenode-delta-lake-databricks");

builder.configureContainer(COORDINATOR, dockerContainer -> exportAWSCredentials(dockerContainer)
.withEnv("HIVE_METASTORE_URI", hiveMetastoreUri)
.withEnv("DATABRICKS_TEST_JDBC_URL", databricksTestJdbcUrl)
.withEnv("DATABRICKS_TEST_LOGIN", databricksTestLogin)
.withEnv("DATABRICKS_TEST_TOKEN", databricksTestToken)
.withCopyFileToContainer(forHostPath(configDir.getPath("hive.properties")), CONTAINER_PRESTO_ETC + "/catalog/hive.properties")
.withCopyFileToContainer(forHostPath(configDir.getPath("delta.properties")), CONTAINER_PRESTO_ETC + "/catalog/delta.properties"));

builder.configureContainer(TESTS, container -> exportAWSCredentials(container)
.withEnv("S3_BUCKET", s3Bucket)
.withEnv("DATABRICKS_TEST_JDBC_DRIVER_CLASS", databricksTestJdbcDriverClass)
.withEnv("DATABRICKS_TEST_JDBC_URL", databricksTestJdbcUrl)
.withEnv("DATABRICKS_TEST_LOGIN", databricksTestLogin)
.withEnv("DATABRICKS_TEST_TOKEN", databricksTestToken)
.withEnv("HIVE_METASTORE_URI", hiveMetastoreUri));

configureTempto(builder, configDir);
}

private DockerContainer exportAWSCredentials(DockerContainer container)
{
container = exportAWSCredential(container, "AWS_ACCESS_KEY_ID", true);
container = exportAWSCredential(container, "AWS_SECRET_ACCESS_KEY", true);
return exportAWSCredential(container, "AWS_SESSION_TOKEN", false);
}

private DockerContainer exportAWSCredential(DockerContainer container, String credentialEnvVariable, boolean required)
{
String credentialValue = System.getenv(credentialEnvVariable);
if (credentialValue == null) {
if (required) {
throw new IllegalStateException(format("Environment variable %s not set", credentialEnvVariable));
}
return container;
}
return container.withEnv(credentialEnvVariable, credentialValue);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.launcher.env.environment;

import io.trino.tests.product.launcher.docker.DockerFiles;
import io.trino.tests.product.launcher.env.DockerContainer;
import io.trino.tests.product.launcher.env.Environment;
import io.trino.tests.product.launcher.env.EnvironmentProvider;
import io.trino.tests.product.launcher.env.common.Hadoop;
import io.trino.tests.product.launcher.env.common.Minio;
import io.trino.tests.product.launcher.env.common.StandardMultinode;
import io.trino.tests.product.launcher.env.common.TestsEnvironment;

import javax.inject.Inject;

import static io.trino.tests.product.launcher.env.EnvironmentContainers.COORDINATOR;
import static io.trino.tests.product.launcher.env.EnvironmentContainers.WORKER;
import static io.trino.tests.product.launcher.env.common.Hadoop.CONTAINER_PRESTO_HIVE_PROPERTIES;
import static io.trino.tests.product.launcher.env.common.Hadoop.CONTAINER_PRESTO_ICEBERG_PROPERTIES;
import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_PRESTO_ETC;
import static java.util.Objects.requireNonNull;
import static org.testcontainers.utility.MountableFile.forHostPath;

/**
* Trino with S3-compatible Data Lake setup based on MinIO
*/
@TestsEnvironment
public class EnvMultinodeMinioDataLake
extends EnvironmentProvider
{
private final DockerFiles dockerFiles;

@Inject
public EnvMultinodeMinioDataLake(StandardMultinode standardMultinode, Hadoop hadoop, Minio minio, DockerFiles dockerFiles)
{
super(standardMultinode, hadoop, minio);
this.dockerFiles = requireNonNull(dockerFiles, "dockerFiles is null");
}

@Override
public void extendEnvironment(Environment.Builder builder)
{
builder.configureContainer(COORDINATOR, this::configureTrinoContainer);
builder.configureContainer(WORKER, this::configureTrinoContainer);
}

private void configureTrinoContainer(DockerContainer container)
{
container.withCopyFileToContainer(
forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-minio-data-lake/hive.properties")),
CONTAINER_PRESTO_HIVE_PROPERTIES);
container.withCopyFileToContainer(
forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-minio-data-lake/delta.properties")),
CONTAINER_PRESTO_ETC + "/catalog/delta.properties");
container.withCopyFileToContainer(
forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-minio-data-lake/iceberg.properties")),
CONTAINER_PRESTO_ICEBERG_PROPERTIES);
container.withCopyFileToContainer(
forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-minio-data-lake/memory.properties")),
CONTAINER_PRESTO_ETC + "/catalog/memory.properties");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.launcher.env.environment;

import com.google.inject.Inject;
import io.trino.tests.product.launcher.docker.DockerFiles;
import io.trino.tests.product.launcher.env.common.Standard;
import io.trino.tests.product.launcher.env.common.TestsEnvironment;

import static java.util.Objects.requireNonNull;

@TestsEnvironment
public class EnvSinglenodeDeltaLakeDatabricks
extends AbstractSinglenodeDeltaLakeDatabricks

{
@Inject
public EnvSinglenodeDeltaLakeDatabricks(Standard standard, DockerFiles dockerFiles)
{
super(standard, dockerFiles);
}

@Override
String databricksTestJdbcUrl()
{
return requireNonNull(System.getenv("DATABRICKS_TEST_JDBC_URL"), "Environment DATABRICKS_TEST_JDBC_URL was not set");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.launcher.env.environment;

import com.google.inject.Inject;
import io.trino.tests.product.launcher.docker.DockerFiles;
import io.trino.tests.product.launcher.env.common.Standard;
import io.trino.tests.product.launcher.env.common.TestsEnvironment;

import static java.util.Objects.requireNonNull;

@TestsEnvironment
public class EnvSinglenodeDeltaLakeDatabricks91
extends AbstractSinglenodeDeltaLakeDatabricks
{
@Inject
public EnvSinglenodeDeltaLakeDatabricks91(Standard standard, DockerFiles dockerFiles)
{
super(standard, dockerFiles);
}

@Override
String databricksTestJdbcUrl()
{
return requireNonNull(System.getenv("DATABRICKS_91_TEST_JDBC_URL"), "Environment DATABRICKS_91_TEST_JDBC_URL was not set");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.tests.product.launcher.env.environment;

import io.trino.tests.product.launcher.docker.DockerFiles;
import io.trino.tests.product.launcher.env.Environment;
import io.trino.tests.product.launcher.env.EnvironmentProvider;
import io.trino.tests.product.launcher.env.common.Hadoop;
import io.trino.tests.product.launcher.env.common.HadoopKerberos;
import io.trino.tests.product.launcher.env.common.Standard;
import io.trino.tests.product.launcher.env.common.TestsEnvironment;

import javax.inject.Inject;

import static io.trino.tests.product.launcher.env.EnvironmentContainers.COORDINATOR;
import static io.trino.tests.product.launcher.env.common.Standard.CONTAINER_PRESTO_ETC;
import static java.util.Objects.requireNonNull;
import static org.testcontainers.utility.MountableFile.forHostPath;

@TestsEnvironment
public class EnvSinglenodeDeltaLakeKerberizedHdfs
extends EnvironmentProvider
{
private final DockerFiles dockerFiles;

@Inject
public EnvSinglenodeDeltaLakeKerberizedHdfs(Standard standard, Hadoop hadoop, HadoopKerberos hadoopKerberos, DockerFiles dockerFiles)
{
super(standard, hadoop, hadoopKerberos);
this.dockerFiles = requireNonNull(dockerFiles, "dockerFiles is null");
}

@Override
public void extendEnvironment(Environment.Builder builder)
{
builder.configureContainer(COORDINATOR, dockerContainer -> {
dockerContainer.withCopyFileToContainer(
forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-delta-lake-kerberized-hdfs/delta.properties")),
CONTAINER_PRESTO_ETC + "/catalog/delta.properties");
});
}
}
Loading

0 comments on commit 4338264

Please sign in to comment.