trinodb · wendigo · Feb 16, 2024 · Dec 21, 2023 · ebyhr · Feb 16, 2024
diff --git a/pom.xml b/pom.xml
@@ -183,7 +183,7 @@
         <dep.aws-sdk.version>1.12.657</dep.aws-sdk.version>
         <dep.cassandra.version>4.17.0</dep.cassandra.version>
         <dep.confluent.version>7.5.1</dep.confluent.version>
-        <dep.docker.images.version>87</dep.docker.images.version>
+        <dep.docker.images.version>89</dep.docker.images.version>
         <dep.drift.version>1.21</dep.drift.version>
         <dep.duct-tape.version>1.0.8</dep.duct-tape.version>
         <dep.errorprone.version>2.24.1</dep.errorprone.version>

diff --git a/...va/io/trino/tests/product/launcher/env/environment/EnvSinglenodeHiveHudiRedirections.java b/...va/io/trino/tests/product/launcher/env/environment/EnvSinglenodeHiveHudiRedirections.java
@@ -26,7 +26,9 @@
 import io.trino.tests.product.launcher.env.common.Standard;
 import io.trino.tests.product.launcher.env.common.TestsEnvironment;
 import io.trino.tests.product.launcher.testcontainers.PortBinder;
+import org.testcontainers.containers.BindMode;
 
+import java.io.File;
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.nio.file.Files;
@@ -47,6 +49,8 @@
 public class EnvSinglenodeHiveHudiRedirections
         extends EnvironmentProvider
 {
+    private static final File HIVE_JDBC_PROVIDER = new File("testing/trino-product-tests-launcher/target/hive-jdbc.jar");
+
     private final ResourceProvider configDir;
 
     private static final int SPARK_THRIFT_PORT = 10213;
@@ -78,7 +82,10 @@ public void extendEnvironment(Environment.Builder builder)
         builder.addConnector("hive", forHostPath(configDir.getPath("hive.properties")));
         builder.addConnector("hudi", forHostPath(configDir.getPath("hudi.properties")));
 
-        builder.configureContainer(TESTS, dockerContainer -> dockerContainer.withEnv("S3_BUCKET", S3_BUCKET_NAME));
+        builder.configureContainer(TESTS, dockerContainer -> dockerContainer
+                .withEnv("S3_BUCKET", S3_BUCKET_NAME)
+                // Binding instead of copying for avoiding OutOfMemoryError https://github.com/testcontainers/testcontainers-java/issues/2863
+                .withFileSystemBind(HIVE_JDBC_PROVIDER.getParent(), "/docker/jdbc", BindMode.READ_ONLY));
 
         builder.addContainer(createSparkContainer())
                 // Ensure Hive metastore is up; Spark needs to access it during startup

diff --git a/...src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkHive.java b/...src/main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkHive.java
@@ -24,10 +24,14 @@
 import io.trino.tests.product.launcher.env.common.Standard;
 import io.trino.tests.product.launcher.env.common.TestsEnvironment;
 import io.trino.tests.product.launcher.testcontainers.PortBinder;
+import org.testcontainers.containers.BindMode;
 import org.testcontainers.containers.startupcheck.IsRunningStartupCheckStrategy;
 
+import java.io.File;
+
 import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts;
 import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP;
+import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS;
 import static io.trino.tests.product.launcher.env.EnvironmentDefaults.HADOOP_BASE_IMAGE;
 import static io.trino.tests.product.launcher.env.common.Hadoop.CONTAINER_HADOOP_INIT_D;
 import static java.util.Objects.requireNonNull;
@@ -37,6 +41,8 @@
 public class EnvSinglenodeSparkHive
         extends EnvironmentProvider
 {
+    private static final File HIVE_JDBC_PROVIDER = new File("testing/trino-product-tests-launcher/target/hive-jdbc.jar");
+
     private static final int SPARK_THRIFT_PORT = 10213;
 
     private final DockerFiles dockerFiles;
@@ -66,6 +72,10 @@ public void extendEnvironment(Environment.Builder builder)
 
         builder.addContainer(createSpark())
                 .containerDependsOn("spark", HADOOP);
+
+        builder.configureContainer(TESTS, dockerContainer -> dockerContainer
+                // Binding instead of copying for avoiding OutOfMemoryError https://github.com/testcontainers/testcontainers-java/issues/2863
+                .withFileSystemBind(HIVE_JDBC_PROVIDER.getParent(), "/docker/jdbc", BindMode.READ_ONLY));
     }
 
     @SuppressWarnings("resource")

diff --git a/...o/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkHiveNoStatsFallback.java b/...o/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkHiveNoStatsFallback.java
@@ -25,17 +25,23 @@
 import io.trino.tests.product.launcher.env.common.Standard;
 import io.trino.tests.product.launcher.env.common.TestsEnvironment;
 import io.trino.tests.product.launcher.testcontainers.PortBinder;
+import org.testcontainers.containers.BindMode;
 import org.testcontainers.containers.startupcheck.IsRunningStartupCheckStrategy;
 
+import java.io.File;
+
 import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts;
 import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP;
+import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS;
 import static java.util.Objects.requireNonNull;
 import static org.testcontainers.utility.MountableFile.forHostPath;
 
 @TestsEnvironment
 public class EnvSinglenodeSparkHiveNoStatsFallback
         extends EnvironmentProvider
 {
+    private static final File HIVE_JDBC_PROVIDER = new File("testing/trino-product-tests-launcher/target/hive-jdbc.jar");
+
     private static final int SPARK_THRIFT_PORT = 10213;
 
     private final PortBinder portBinder;
@@ -57,6 +63,10 @@ public void extendEnvironment(Environment.Builder builder)
     {
         builder.addConnector("hive", forHostPath(configDir.getPath("hive.properties")));
         builder.addContainer(createSpark()).containerDependsOn("spark", HADOOP);
+
+        builder.configureContainer(TESTS, dockerContainer -> dockerContainer
+                // Binding instead of copying for avoiding OutOfMemoryError https://github.com/testcontainers/testcontainers-java/issues/2863
+                .withFileSystemBind(HIVE_JDBC_PROVIDER.getParent(), "/docker/jdbc", BindMode.READ_ONLY));
     }
 
     @SuppressWarnings("resource")

diff --git a/.../main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIceberg.java b/.../main/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIceberg.java
@@ -24,10 +24,14 @@
 import io.trino.tests.product.launcher.env.common.Standard;
 import io.trino.tests.product.launcher.env.common.TestsEnvironment;
 import io.trino.tests.product.launcher.testcontainers.PortBinder;
+import org.testcontainers.containers.BindMode;
 import org.testcontainers.containers.startupcheck.IsRunningStartupCheckStrategy;
 
+import java.io.File;
+
 import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts;
 import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP;
+import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS;
 import static io.trino.tests.product.launcher.env.EnvironmentDefaults.HADOOP_BASE_IMAGE;
 import static io.trino.tests.product.launcher.env.common.Hadoop.CONTAINER_HADOOP_INIT_D;
 import static java.util.Objects.requireNonNull;
@@ -37,6 +41,8 @@
 public class EnvSinglenodeSparkIceberg
         extends EnvironmentProvider
 {
+    private static final File HIVE_JDBC_PROVIDER = new File("testing/trino-product-tests-launcher/target/hive-jdbc.jar");
+
     private static final int SPARK_THRIFT_PORT = 10213;
 
     private final DockerFiles dockerFiles;
@@ -66,6 +72,10 @@ public void extendEnvironment(Environment.Builder builder)
 
         builder.addContainer(createSpark())
                 .containerDependsOn("spark", HADOOP);
+
+        builder.configureContainer(TESTS, dockerContainer -> dockerContainer
+                // Binding instead of copying for avoiding OutOfMemoryError https://github.com/testcontainers/testcontainers-java/issues/2863
+                .withFileSystemBind(HIVE_JDBC_PROVIDER.getParent(), "/docker/jdbc", BindMode.READ_ONLY));
     }
 
     @SuppressWarnings("resource")

diff --git a/...io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIcebergJdbcCatalog.java b/...io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIcebergJdbcCatalog.java
@@ -24,10 +24,14 @@
 import io.trino.tests.product.launcher.env.common.Standard;
 import io.trino.tests.product.launcher.env.common.TestsEnvironment;
 import io.trino.tests.product.launcher.testcontainers.PortBinder;
+import org.testcontainers.containers.BindMode;
 import org.testcontainers.containers.startupcheck.IsRunningStartupCheckStrategy;
 
+import java.io.File;
+
 import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts;
 import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP;
+import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS;
 import static io.trino.tests.product.launcher.env.EnvironmentDefaults.HADOOP_BASE_IMAGE;
 import static io.trino.tests.product.launcher.env.common.Hadoop.CONTAINER_HADOOP_INIT_D;
 import static java.util.Objects.requireNonNull;
@@ -37,6 +41,8 @@
 public class EnvSinglenodeSparkIcebergJdbcCatalog
         extends EnvironmentProvider
 {
+    private static final File HIVE_JDBC_PROVIDER = new File("testing/trino-product-tests-launcher/target/hive-jdbc.jar");
+
     private static final int SPARK_THRIFT_PORT = 10213;
     // Use non-default PostgreSQL port to avoid conflicts with locally installed PostgreSQL if any.
     public static final int POSTGRESQL_PORT = 25432;
@@ -70,6 +76,10 @@ public void extendEnvironment(Environment.Builder builder)
 
         builder.addContainer(createSpark())
                 .containerDependsOn("spark", HADOOP);
+
+        builder.configureContainer(TESTS, dockerContainer -> dockerContainer
+                // Binding instead of copying for avoiding OutOfMemoryError https://github.com/testcontainers/testcontainers-java/issues/2863
+                .withFileSystemBind(HIVE_JDBC_PROVIDER.getParent(), "/docker/jdbc", BindMode.READ_ONLY));
     }
 
     @SuppressWarnings("resource")

diff --git a/...java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIcebergNessie.java b/...java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIcebergNessie.java
@@ -24,17 +24,23 @@
 import io.trino.tests.product.launcher.env.common.Standard;
 import io.trino.tests.product.launcher.env.common.TestsEnvironment;
 import io.trino.tests.product.launcher.testcontainers.PortBinder;
+import org.testcontainers.containers.BindMode;
 import org.testcontainers.containers.startupcheck.IsRunningStartupCheckStrategy;
 
+import java.io.File;
+
 import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts;
 import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP;
+import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS;
 import static java.util.Objects.requireNonNull;
 import static org.testcontainers.utility.MountableFile.forHostPath;
 
 @TestsEnvironment
 public class EnvSinglenodeSparkIcebergNessie
         extends EnvironmentProvider
 {
+    private static final File HIVE_JDBC_PROVIDER = new File("testing/trino-product-tests-launcher/target/hive-jdbc.jar");
+
     private static final int SPARK_THRIFT_PORT = 10213;
     private static final int NESSIE_PORT = 19120;
     private static final String NESSIE_VERSION = "0.71.0";
@@ -60,6 +66,10 @@ public void extendEnvironment(Environment.Builder builder)
         builder.addConnector("iceberg", forHostPath(dockerFiles.getDockerFilesHostPath("conf/environment/singlenode-spark-iceberg-nessie/iceberg.properties")));
 
         builder.addContainer(createSparkContainer()).containerDependsOn(SPARK, HADOOP);
+
+        builder.configureContainer(TESTS, dockerContainer -> dockerContainer
+                // Binding instead of copying for avoiding OutOfMemoryError https://github.com/testcontainers/testcontainers-java/issues/2863
+                .withFileSystemBind(HIVE_JDBC_PROVIDER.getParent(), "/docker/jdbc", BindMode.READ_ONLY));
     }
 
     @SuppressWarnings("resource")

diff --git a/...n/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIcebergRest.java b/...n/java/io/trino/tests/product/launcher/env/environment/EnvSinglenodeSparkIcebergRest.java
@@ -24,10 +24,14 @@
 import io.trino.tests.product.launcher.env.common.Standard;
 import io.trino.tests.product.launcher.env.common.TestsEnvironment;
 import io.trino.tests.product.launcher.testcontainers.PortBinder;
+import org.testcontainers.containers.BindMode;
 import org.testcontainers.containers.startupcheck.IsRunningStartupCheckStrategy;
 
+import java.io.File;
+
 import static io.trino.tests.product.launcher.docker.ContainerUtil.forSelectedPorts;
 import static io.trino.tests.product.launcher.env.EnvironmentContainers.HADOOP;
+import static io.trino.tests.product.launcher.env.EnvironmentContainers.TESTS;
 import static java.util.Objects.requireNonNull;
 import static org.testcontainers.utility.MountableFile.forHostPath;
 
@@ -38,6 +42,8 @@
 public class EnvSinglenodeSparkIcebergRest
         extends EnvironmentProvider
 {
+    private static final File HIVE_JDBC_PROVIDER = new File("testing/trino-product-tests-launcher/target/hive-jdbc.jar");
+
     private static final int SPARK_THRIFT_PORT = 10213;
     private static final int REST_SERVER_PORT = 8181;
     private static final String SPARK_CONTAINER_NAME = "spark";
@@ -65,6 +71,10 @@ public void extendEnvironment(Environment.Builder builder)
         builder.addConnector("iceberg", forHostPath(dockerFiles.getDockerFilesHostPath(
                 "conf/environment/singlenode-spark-iceberg-rest/iceberg.properties")));
         builder.addContainer(createSparkContainer()).containerDependsOn(SPARK_CONTAINER_NAME, HADOOP);
+
+        builder.configureContainer(TESTS, dockerContainer -> dockerContainer
+                // Binding instead of copying for avoiding OutOfMemoryError https://github.com/testcontainers/testcontainers-java/issues/2863
+                .withFileSystemBind(HIVE_JDBC_PROVIDER.getParent(), "/docker/jdbc", BindMode.READ_ONLY));
     }
 
     @SuppressWarnings("resource")

diff --git a/...o-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSparkCompatibility.java b/...o-product-tests/src/main/java/io/trino/tests/product/hive/TestHiveSparkCompatibility.java
@@ -17,6 +17,7 @@
 import com.google.inject.Inject;
 import io.trino.tempto.ProductTest;
 import io.trino.tempto.hadoop.hdfs.HdfsClient;
+import org.testng.SkipException;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
@@ -530,17 +531,18 @@ public void testReadSparkdDateAndTimePartitionName()
         onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='2022-04-13 00:00:00') VALUES (2)", sparkTableName));
         onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='2022-04-13 00:00') VALUES (3)", sparkTableName));
         onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='12345-06-07') VALUES (4)", sparkTableName));
-        onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='123-04-05') VALUES (5)", sparkTableName));
+        /// This INSERT statement was supported in older Spark version
+        assertQueryFailure(() -> onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='123-04-05') VALUES (5)", sparkTableName)))
+                .hasMessageContaining("cannot be cast to \"DATE\" because it is malformed");
         onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='-0001-01-01') VALUES (6)", sparkTableName));
 
-        assertThat(onTrino().executeQuery("SELECT \"$partition\" FROM " + trinoTableName))
+        assertThat(onTrino().executeQuery("SELECT value, \"$partition\" FROM " + trinoTableName))
                 .containsOnly(List.of(
-                        row("dt=2022-04-13 00%3A00%3A00.000000000"),
-                        row("dt=2022-04-13 00%3A00%3A00"),
-                        row("dt=2022-04-13 00%3A00"),
-                        row("dt=12345-06-07"),
-                        row("dt=123-04-05"),
-                        row("dt=-0001-01-01")));
+                        row(1, "dt=2022-04-13"),
+                        row(2, "dt=2022-04-13"),
+                        row(3, "dt=2022-04-13"),
+                        row(4, "dt=+12345-06-07"),
+                        row(6, "dt=-0001-01-01")));
 
         // Use date_format function to avoid exception due to java.sql.Date.valueOf() with 5 digit year
         assertThat(onSpark().executeQuery("SELECT value, date_format(dt, 'yyyy-MM-dd') FROM " + sparkTableName))
@@ -549,7 +551,6 @@ public void testReadSparkdDateAndTimePartitionName()
                         row(2, "2022-04-13"),
                         row(3, "2022-04-13"),
                         row(4, "+12345-06-07"),
-                        row(5, null),
                         row(6, "-0001-01-01")));
 
         // Use date_format function to avoid exception due to java.sql.Date.valueOf() with 5 digit year
@@ -558,8 +559,7 @@ public void testReadSparkdDateAndTimePartitionName()
                         row(1, "2022-04-13"),
                         row(2, "2022-04-13"),
                         row(3, "2022-04-13"),
-                        row(4, "12345-06-07"),
-                        row(5, "0123-04-06"),
+                        row(4, null),
                         row(6, "0002-01-03")));
 
         // Cast to varchar so that we can compare with Spark & Hive easily
@@ -569,7 +569,6 @@ public void testReadSparkdDateAndTimePartitionName()
                         row(2, "2022-04-13"),
                         row(3, "2022-04-13"),
                         row(4, "12345-06-07"),
-                        row(5, "0123-04-05"),
                         row(6, "-0001-01-01")));
 
         onTrino().executeQuery("DROP TABLE " + trinoTableName);
@@ -609,17 +608,24 @@ public void testReadSparkInvalidDatePartitionName(String inputDate, java.sql.Dat
 
         onSpark().executeQuery(format("CREATE TABLE default.%s (value integer) PARTITIONED BY (dt date)", sparkTableName));
 
-        // Spark allows creating partition with invalid date format
+        // The old Spark allowed creating partition with invalid date format
         // Hive denies creating such partitions, but allows reading
-        onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='%s') VALUES (1)", sparkTableName, inputDate));
+        if (inputDate.equals("2021-02-30") || inputDate.equals("invalid date")) {
+            assertQueryFailure(() -> onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='%s') VALUES (1)", sparkTableName, inputDate)))
+                    .hasMessageContaining("cannot be cast to \"DATE\" because it is malformed");
+            onSpark().executeQuery("DROP TABLE " + sparkTableName);
+            throw new SkipException("TODO");
+        }
+        else {
+            // Spark removes the following string after the date, e.g. 23:59:59 and invalid
+            onSpark().executeQuery(format("INSERT INTO %s PARTITION(dt='%s') VALUES (1)", sparkTableName, inputDate));
+        }
 
-        // Hive ignores time unit, and return null for invalid dates
+        Row expected = row(1, outputDate);
         assertThat(onHive().executeQuery("SELECT value, dt FROM " + sparkTableName))
-                .containsOnly(List.of(row(1, outputDate)));
-
-        // Trino throws an exception if the date is invalid format or not a whole round date
-        assertQueryFailure(() -> onTrino().executeQuery("SELECT value, dt FROM " + trinoTableName))
-                .hasMessageContaining("Invalid partition value");
+                .containsOnly(expected);
+        assertThat(onTrino().executeQuery("SELECT value, dt FROM " + trinoTableName))
+                .containsOnly(expected);
 
         onTrino().executeQuery("DROP TABLE " + trinoTableName);
     }