diff --git a/plugin/trino-delta-lake/pom.xml b/plugin/trino-delta-lake/pom.xml
index a3325091311b..632020452934 100644
--- a/plugin/trino-delta-lake/pom.xml
+++ b/plugin/trino-delta-lake/pom.xml
@@ -303,6 +303,12 @@
test
+
+ io.trino
+ trino-tpcds
+ test
+
+
io.trino
trino-tpch
diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogParser.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogParser.java
index 82a7bd23001b..03de2ce67019 100644
--- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogParser.java
+++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogParser.java
@@ -86,8 +86,8 @@ public final class TransactionLogParser
private static final Logger log = Logger.get(TransactionLogParser.class);
// Before 1900, Java Time and Joda Time are not consistent with java.sql.Date and java.util.Calendar
- // Since January 1, 1900 UTC is still December 31, 1899 in other zones, we are adding a 1 year margin.
- public static final LocalDate START_OF_MODERN_ERA = LocalDate.of(1901, 1, 1);
+ // Since January 1, 1900 UTC is still December 31, 1899 in other zones, we are adding a 1 day margin.
+ public static final LocalDate START_OF_MODERN_ERA = LocalDate.of(1900, 1, 2);
public static final String LAST_CHECKPOINT_FILENAME = "_last_checkpoint";
diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/DeltaLakeQueryRunner.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/DeltaLakeQueryRunner.java
index ba985cdc6e6f..509543dc6f61 100644
--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/DeltaLakeQueryRunner.java
+++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/DeltaLakeQueryRunner.java
@@ -19,6 +19,7 @@
import io.trino.Session;
import io.trino.plugin.hive.containers.HiveHadoop;
import io.trino.plugin.hive.containers.HiveMinioDataLake;
+import io.trino.plugin.tpcds.TpcdsPlugin;
import io.trino.plugin.tpch.TpchPlugin;
import io.trino.testing.DistributedQueryRunner;
import io.trino.testing.QueryRunner;
@@ -99,6 +100,9 @@ public DistributedQueryRunner build()
queryRunner.installPlugin(new TpchPlugin());
queryRunner.createCatalog("tpch", "tpch");
+ queryRunner.installPlugin(new TpcdsPlugin());
+ queryRunner.createCatalog("tpcds", "tpcds");
+
queryRunner.installPlugin(new TestingDeltaLakePlugin());
Map deltaProperties = new HashMap<>(this.deltaProperties.buildOrThrow());
if (!deltaProperties.containsKey("hive.metastore") && !deltaProperties.containsKey("hive.metastore.uri")) {
diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAnalyze.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAnalyze.java
index 409a8d7b5498..d83b600209cd 100644
--- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAnalyze.java
+++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeAnalyze.java
@@ -458,6 +458,27 @@ public void testDropStatsAccessControl()
}
}
+ /**
+ * Verify Delta has good stats for TPC-DS data sets. Note that TPC-DS date_dim contains
+ * dates as old as 1900-01-02, which may be problematic.
+ */
+ @Test
+ public void testStatsOnTpcDsData()
+ {
+ try (TestTable table = new TestTable(
+ getQueryRunner()::execute,
+ "test_old_date_stats",
+ "AS SELECT d_date FROM tpcds.tiny.date_dim")) {
+ runAnalyzeVerifySplitCount(table.getName(), 1);
+ // Accurate column stats on d_date are important for producing efficient query plans, e.g. on q72
+ assertQuery(
+ "SHOW STATS FOR " + table.getName(),
+ "VALUES"
+ + "('d_date', null, 72713.0, 0.0, null, '1900-01-02', '2100-01-01'),"
+ + "(null, null, null, null, 73049.0, null, null)");
+ }
+ }
+
private void runAnalyzeVerifySplitCount(String tableName, long expectedSplitCount)
{
MaterializedResultWithQueryId analyzeResult = getDistributedQueryRunner().executeWithQueryId(getSession(), "ANALYZE " + tableName);