From 91d9d7efcd4358dc78242587164f8db972d27baa Mon Sep 17 00:00:00 2001 From: Mateusz Gajewski Date: Wed, 23 Mar 2022 15:14:24 +0100 Subject: [PATCH 1/3] Improve testing MySQL's stats quality --- .../test/java/io/trino/plugin/mysql/TestingMySqlServer.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestingMySqlServer.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestingMySqlServer.java index d8b788174e74..99346482dade 100644 --- a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestingMySqlServer.java +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestingMySqlServer.java @@ -59,7 +59,11 @@ public TestingMySqlServer(String dockerImageName, boolean globalTransactionEnabl execute(format("GRANT ALL PRIVILEGES ON *.* TO '%s'", container.getUsername()), "root", container.getPassword()); } - protected void configureContainer(MySQLContainer container) {} + private void configureContainer(MySQLContainer container) + { + // MySQL configuration provided by default by testcontainers causes MySQL to produce poor estimates in CARDINALITY column of INFORMATION_SCHEMA.STATISTICS table. + container.addParameter("TC_MY_CNF", null); + } public Connection createConnection() throws SQLException From 1db242848ff9f5cd39cf936e6f6bd78d7779ceef Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Wed, 23 Mar 2022 15:12:11 +0100 Subject: [PATCH 2/3] Implement reading table statistics for MySQL --- plugin/trino-mysql/pom.xml | 16 + .../io/trino/plugin/mysql/MySqlClient.java | 307 +++++++++++- .../trino/plugin/mysql/MySqlClientModule.java | 2 + ...SqlTableStatisticsIndexStatisticsTest.java | 114 +++++ .../BaseTestMySqlTableStatisticsTest.java | 464 ++++++++++++++++++ .../trino/plugin/mysql/TestMySqlClient.java | 2 + ...lTableStatisticsMySql5IndexStatistics.java | 23 + ...tMySqlTableStatisticsMySql8Histograms.java | 95 ++++ ...lTableStatisticsMySql8IndexStatistics.java | 23 + 9 files changed, 1045 insertions(+), 1 deletion(-) create mode 100644 plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseMySqlTableStatisticsIndexStatisticsTest.java create mode 100644 plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseTestMySqlTableStatisticsTest.java create mode 100644 plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql5IndexStatistics.java create mode 100644 plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql8Histograms.java create mode 100644 plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql8IndexStatistics.java diff --git a/plugin/trino-mysql/pom.xml b/plugin/trino-mysql/pom.xml index 51b46ddbd37f..ae94f107dad8 100644 --- a/plugin/trino-mysql/pom.xml +++ b/plugin/trino-mysql/pom.xml @@ -33,6 +33,11 @@ configuration + + io.airlift + json + + io.airlift log @@ -73,6 +78,11 @@ mysql-connector-java + + org.jdbi + jdbi3-core + + io.trino @@ -131,6 +141,12 @@ test + + io.trino + trino-testing-services + test + + io.trino trino-tpch diff --git a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java index 7c6e9d322180..48f253e52213 100644 --- a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java +++ b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java @@ -13,8 +13,13 @@ */ package io.trino.plugin.mysql; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.mysql.cj.jdbc.JdbcStatement; +import io.airlift.json.JsonCodec; +import io.airlift.log.Logger; import io.trino.plugin.base.aggregation.AggregateFunctionRewriter; import io.trino.plugin.base.aggregation.AggregateFunctionRule; import io.trino.plugin.base.expression.ConnectorExpressionRewriter; @@ -26,6 +31,7 @@ import io.trino.plugin.jdbc.JdbcExpression; import io.trino.plugin.jdbc.JdbcJoinCondition; import io.trino.plugin.jdbc.JdbcSortItem; +import io.trino.plugin.jdbc.JdbcStatisticsConfig; import io.trino.plugin.jdbc.JdbcTableHandle; import io.trino.plugin.jdbc.JdbcTypeHandle; import io.trino.plugin.jdbc.PreparedQuery; @@ -53,6 +59,10 @@ import io.trino.spi.connector.JoinStatistics; import io.trino.spi.connector.JoinType; import io.trino.spi.connector.SchemaTableName; +import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.statistics.ColumnStatistics; +import io.trino.spi.statistics.Estimate; +import io.trino.spi.statistics.TableStatistics; import io.trino.spi.type.CharType; import io.trino.spi.type.DecimalType; import io.trino.spi.type.Decimals; @@ -63,6 +73,9 @@ import io.trino.spi.type.TypeManager; import io.trino.spi.type.TypeSignature; import io.trino.spi.type.VarcharType; +import org.jdbi.v3.core.Handle; +import org.jdbi.v3.core.Jdbi; +import org.jdbi.v3.core.statement.UnableToExecuteStatementException; import javax.inject.Inject; @@ -72,6 +85,7 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Types; +import java.util.AbstractMap.SimpleEntry; import java.util.Collection; import java.util.List; import java.util.Map; @@ -79,12 +93,18 @@ import java.util.function.BiFunction; import java.util.stream.Stream; +import static com.google.common.base.MoreObjects.toStringHelper; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; import static com.google.common.base.Strings.emptyToNull; +import static com.google.common.base.Strings.nullToEmpty; +import static com.google.common.base.Throwables.throwIfInstanceOf; import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.util.concurrent.MoreExecutors.directExecutor; import static com.mysql.cj.exceptions.MysqlErrorNumbers.SQL_STATE_ER_TABLE_EXISTS_ERROR; import static com.mysql.cj.exceptions.MysqlErrorNumbers.SQL_STATE_SYNTAX_ERROR; +import static io.airlift.json.JsonCodec.jsonCodec; import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.base.util.JsonTypeUtil.jsonParse; import static io.trino.plugin.jdbc.DecimalConfig.DecimalMapping.ALLOW_OVERFLOW; @@ -147,11 +167,14 @@ import static java.lang.String.format; import static java.lang.String.join; import static java.util.Locale.ENGLISH; +import static java.util.Objects.requireNonNull; import static java.util.stream.Collectors.joining; public class MySqlClient extends BaseJdbcClient { + private static final Logger log = Logger.get(MySqlClient.class); + private static final int MAX_SUPPORTED_DATE_TIME_PRECISION = 6; // MySQL driver returns width of timestamp types instead of precision. // 19 characters are used for zero-precision timestamps while others @@ -164,15 +187,28 @@ public class MySqlClient // An empty character means that the table doesn't have a comment in MySQL private static final String NO_COMMENT = ""; + private static final JsonCodec HISTOGRAM_CODEC = jsonCodec(ColumnHistogram.class); + + // We don't know null fraction, but having no null fraction will make CBO useless. Assume some arbitrary value. + private static final Estimate UNKNOWN_NULL_FRACTION_REPLACEMENT = Estimate.of(0.1); + private final Type jsonType; + private final boolean statisticsEnabled; private final ConnectorExpressionRewriter connectorExpressionRewriter; private final AggregateFunctionRewriter aggregateFunctionRewriter; @Inject - public MySqlClient(BaseJdbcConfig config, ConnectionFactory connectionFactory, QueryBuilder queryBuilder, TypeManager typeManager, IdentifierMapping identifierMapping) + public MySqlClient( + BaseJdbcConfig config, + JdbcStatisticsConfig statisticsConfig, + ConnectionFactory connectionFactory, + QueryBuilder queryBuilder, + TypeManager typeManager, + IdentifierMapping identifierMapping) { super(config, "`", connectionFactory, queryBuilder, identifierMapping); this.jsonType = typeManager.getType(new TypeSignature(StandardTypes.JSON)); + this.statisticsEnabled = requireNonNull(statisticsConfig, "statisticsConfig is null").isEnabled(); this.connectorExpressionRewriter = JdbcConnectorExpressionRewriterBuilder.newBuilder() .addStandardRules(this::quoted) @@ -685,6 +721,119 @@ protected boolean isSupportedJoinCondition(ConnectorSession session, JdbcJoinCon .noneMatch(type -> type instanceof CharType || type instanceof VarcharType); } + @Override + public TableStatistics getTableStatistics(ConnectorSession session, JdbcTableHandle handle, TupleDomain tupleDomain) + { + if (!statisticsEnabled) { + return TableStatistics.empty(); + } + if (!handle.isNamedRelation()) { + return TableStatistics.empty(); + } + try { + return readTableStatistics(session, handle); + } + catch (SQLException | RuntimeException e) { + throwIfInstanceOf(e, TrinoException.class); + throw new TrinoException(JDBC_ERROR, "Failed fetching statistics for table: " + handle, e); + } + } + + private TableStatistics readTableStatistics(ConnectorSession session, JdbcTableHandle table) + throws SQLException + { + checkArgument(table.isNamedRelation(), "Relation is not a table: %s", table); + + log.debug("Reading statistics for %s", table); + try (Connection connection = connectionFactory.openConnection(session); + Handle handle = Jdbi.open(connection)) { + StatisticsDao statisticsDao = new StatisticsDao(handle); + + Long rowCount = statisticsDao.getRowCount(table); + log.debug("Estimated row count of table %s is %s", table, rowCount); + + if (rowCount == null) { + // Table not found, or is a view. + return TableStatistics.empty(); + } + + TableStatistics.Builder tableStatistics = TableStatistics.builder(); + tableStatistics.setRowCount(Estimate.of(rowCount)); + + Map columnHistograms = statisticsDao.getColumnHistograms(table); + Map columnStatisticsFromIndexes = statisticsDao.getColumnIndexStatistics(table); + + if (columnHistograms.isEmpty() && columnStatisticsFromIndexes.isEmpty()) { + log.debug("No column histograms and index statistics read"); + // No more information to work on + return tableStatistics.build(); + } + + for (JdbcColumnHandle column : this.getColumns(session, table)) { + ColumnStatistics.Builder columnStatisticsBuilder = ColumnStatistics.builder(); + + String columnName = column.getColumnName(); + Optional histogram = getColumnHistogram(columnHistograms, columnName); + if (histogram.isPresent()) { + log.debug("Reading column statistics for %s, %s from histogram: %s", table, columnName, columnHistograms.get(columnName)); + histogram.get().updateColumnStatistics(columnStatisticsBuilder); + + // row count from INFORMATION_SCHEMA.TABLES is very inaccurate + rowCount = histogram.get().getUpdateRowCount(rowCount); + } + + ColumnIndexStatistics columnIndexStatistics = columnStatisticsFromIndexes.get(columnName); + if (columnIndexStatistics != null) { + log.debug("Reading column statistics for %s, %s from index statistics: %s", table, columnName, columnIndexStatistics); + updateColumnStatisticsFromIndexStatistics(table, columnName, columnStatisticsBuilder, columnIndexStatistics); + + // row count from INFORMATION_SCHEMA.TABLES is very inaccurate + rowCount = max(rowCount, columnIndexStatistics.getCardinality()); + } + + ColumnStatistics columnStatistics = columnStatisticsBuilder.build(); + if (!columnStatistics.getDistinctValuesCount().isUnknown() && columnStatistics.getNullsFraction().isUnknown()) { + columnStatisticsBuilder.setNullsFraction(UNKNOWN_NULL_FRACTION_REPLACEMENT); + columnStatistics = columnStatisticsBuilder.build(); + } + + tableStatistics.setColumnStatistics(column, columnStatistics); + } + + tableStatistics.setRowCount(Estimate.of(rowCount)); + return tableStatistics.build(); + } + } + + private static Optional getColumnHistogram(Map columnHistograms, String columnName) + { + return Optional.ofNullable(columnHistograms.get(columnName)) + .flatMap(histogramJson -> { + try { + return Optional.of(HISTOGRAM_CODEC.fromJson(histogramJson)); + } + catch (RuntimeException e) { + log.warn(e, "Failed to parse column statistics histogram: %s", histogramJson); + return Optional.empty(); + } + }); + } + + private static void updateColumnStatisticsFromIndexStatistics(JdbcTableHandle table, String columnName, ColumnStatistics.Builder columnStatistics, ColumnIndexStatistics columnIndexStatistics) + { + // Prefer CARDINALITY from index statistics over NDV from a histogram. + // Index column might be NULLABLE. Then CARDINALITY includes all + columnStatistics.setDistinctValuesCount(Estimate.of(columnIndexStatistics.getCardinality())); + + if (!columnIndexStatistics.nullable) { + double knownNullFraction = columnStatistics.build().getNullsFraction().getValue(); + if (knownNullFraction > 0) { + log.warn("Inconsistent statistics, null fraction for a column %s, %s, that is not nullable according to index statistics: %s", table, columnName, knownNullFraction); + } + columnStatistics.setNullsFraction(Estimate.zero()); + } + } + private ColumnMapping jsonColumnMapping() { return ColumnMapping.sliceMapping( @@ -708,4 +857,160 @@ private static boolean isGtidMode(Connection connection) throw new TrinoException(JDBC_ERROR, e); } } + + private static class StatisticsDao + { + private final Handle handle; + + public StatisticsDao(Handle handle) + { + this.handle = requireNonNull(handle, "handle is null"); + } + + Long getRowCount(JdbcTableHandle table) + { + return handle.createQuery("" + + "SELECT TABLE_ROWS FROM INFORMATION_SCHEMA.TABLES " + + "WHERE TABLE_SCHEMA = :schema AND TABLE_NAME = :table_name " + + "AND TABLE_TYPE = 'BASE TABLE' ") + .bind("schema", table.getCatalogName()) + .bind("table_name", table.getTableName()) + .mapTo(Long.class) + .findOne() + .orElse(null); + } + + Map getColumnIndexStatistics(JdbcTableHandle table) + { + return handle.createQuery("" + + "SELECT " + + " COLUMN_NAME, " + + " MAX(NULLABLE) AS NULLABLE, " + + " MAX(CARDINALITY) AS CARDINALITY " + + "FROM INFORMATION_SCHEMA.STATISTICS " + + "WHERE TABLE_SCHEMA = :schema AND TABLE_NAME = :table_name " + + "AND SEQ_IN_INDEX = 1 " + // first column in the index + "AND SUB_PART IS NULL " + // ignore cases where only a column prefix is indexed + "AND CARDINALITY IS NOT NULL " + // CARDINALITY might be null (https://stackoverflow.com/a/42242729/65458) + "GROUP BY COLUMN_NAME") // there might be multiple indexes on a column + .bind("schema", table.getCatalogName()) + .bind("table_name", table.getTableName()) + .map((rs, ctx) -> { + String columnName = rs.getString("COLUMN_NAME"); + + boolean nullable = rs.getString("NULLABLE").equalsIgnoreCase("YES"); + checkState(!rs.wasNull(), "NULLABLE is null"); + + long cardinality = rs.getLong("CARDINALITY"); + checkState(!rs.wasNull(), "CARDINALITY is null"); + + return new SimpleEntry<>(columnName, new ColumnIndexStatistics(nullable, cardinality)); + }) + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + Map getColumnHistograms(JdbcTableHandle table) + { + try { + handle.execute("SELECT 1 FROM INFORMATION_SCHEMA.COLUMN_STATISTICS WHERE 0=1"); + } + catch (UnableToExecuteStatementException e) { + if (nullToEmpty(e.getMessage()).contains("Unknown table 'COLUMN_STATISTICS'")) { + // The table is available since MySQL 8 + log.debug("INFORMATION_SCHEMA.COLUMN_STATISTICS table is not available: %s", e); + return ImmutableMap.of(); + } + } + + return handle.createQuery("" + + "SELECT COLUMN_NAME, HISTOGRAM FROM INFORMATION_SCHEMA.COLUMN_STATISTICS " + + "WHERE SCHEMA_NAME = :schema AND TABLE_NAME = :table_name") + .bind("schema", table.getCatalogName()) + .bind("table_name", table.getTableName()) + .map((rs, ctx) -> new SimpleEntry<>(rs.getString("COLUMN_NAME"), rs.getString("HISTOGRAM"))) + .collect(toImmutableMap(Map.Entry::getKey, Map.Entry::getValue)); + } + } + + private static class ColumnIndexStatistics + { + private final boolean nullable; + private final long cardinality; + + public ColumnIndexStatistics(boolean nullable, long cardinality) + { + this.cardinality = cardinality; + this.nullable = nullable; + } + + public long getCardinality() + { + return cardinality; + } + + @Override + public String toString() + { + return toStringHelper(this) + .add("cardinality", getCardinality()) + .add("nullable", nullable) + .toString(); + } + } + + // See https://dev.mysql.com/doc/refman/8.0/en/optimizer-statistics.html + public static class ColumnHistogram + { + private final Optional nullFraction; + private final Optional histogramType; + private final Optional>> buckets; + + @JsonCreator + public ColumnHistogram( + @JsonProperty("null-values") Optional nullFraction, + @JsonProperty("histogram-type") Optional histogramType, + @JsonProperty("buckets") Optional>> buckets) + { + this.nullFraction = nullFraction; + this.histogramType = histogramType; + this.buckets = buckets; + } + + public void updateColumnStatistics(ColumnStatistics.Builder columnStatistics) + { + nullFraction.map(Estimate::of).ifPresent(columnStatistics::setNullsFraction); + getDistinctValuesCount().map(Estimate::of).ifPresent(columnStatistics::setDistinctValuesCount); + } + + private Optional getDistinctValuesCount() + { + if (histogramType.isPresent() && buckets.isPresent()) { + switch (histogramType.get()) { + case "singleton": + return Optional.of((long) buckets.get().size()); + + case "equi-height": + long distinctValues = 0; + for (List bucket : buckets.get()) { + distinctValues += ((Number) bucket.get(3)).longValue(); + } + return Optional.of(distinctValues); + + default: + log.debug("Unsupported histogram type: %s", histogramType.get()); + } + } + else { + log.debug("Unsupported histogram: type: %s, bucket count: %s", histogramType, buckets.map(List::size)); + } + return Optional.empty(); + } + + public long getUpdateRowCount(long rowCount) + { + return getDistinctValuesCount() + .map(distinctValuesCount -> max(rowCount, distinctValuesCount)) + .orElse(rowCount); + } + } } diff --git a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClientModule.java b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClientModule.java index 188fc389b045..69c3238ef069 100644 --- a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClientModule.java +++ b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClientModule.java @@ -25,6 +25,7 @@ import io.trino.plugin.jdbc.DriverConnectionFactory; import io.trino.plugin.jdbc.ForBaseJdbc; import io.trino.plugin.jdbc.JdbcClient; +import io.trino.plugin.jdbc.JdbcStatisticsConfig; import io.trino.plugin.jdbc.credential.CredentialProvider; import java.sql.SQLException; @@ -41,6 +42,7 @@ public void configure(Binder binder) binder.bind(JdbcClient.class).annotatedWith(ForBaseJdbc.class).to(MySqlClient.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(MySqlJdbcConfig.class); configBinder(binder).bindConfig(MySqlConfig.class); + configBinder(binder).bindConfig(JdbcStatisticsConfig.class); binder.install(new DecimalModule()); } diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseMySqlTableStatisticsIndexStatisticsTest.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseMySqlTableStatisticsIndexStatisticsTest.java new file mode 100644 index 000000000000..81662c8d7a58 --- /dev/null +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseMySqlTableStatisticsIndexStatisticsTest.java @@ -0,0 +1,114 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.mysql; + +import io.trino.testing.MaterializedRow; +import org.testng.SkipException; + +import static java.lang.String.format; + +public abstract class BaseMySqlTableStatisticsIndexStatisticsTest + extends BaseTestMySqlTableStatisticsTest +{ + protected BaseMySqlTableStatisticsIndexStatisticsTest(String dockerImageName) + { + super(dockerImageName, + nullFraction -> 0.1, // Without histograms we have no way of knowing real null fraction, 10% is just a "wild guess" + varcharNdv -> null); // Without histograms we don't know cardinality for varchar columns + } + + @Override + protected void gatherStats(String tableName) + { + for (MaterializedRow row : computeActual("SHOW COLUMNS FROM " + tableName)) { + String columnName = (String) row.getField(0); + String columnType = (String) row.getField(1); + if (columnType.startsWith("varchar")) { + continue; + } + executeInMysql(format("CREATE INDEX %2$s ON %1$s (%2$s)", tableName, columnName).replace("\"", "`")); + } + executeInMysql("ANALYZE TABLE " + tableName.replace("\"", "`")); + } + + @Override + public void testStatsWithPredicatePushdownWithStatsPrecalculationDisabled() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } + + @Override + public void testStatsWithPredicatePushdown() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } + + @Override + public void testStatsWithVarcharPredicatePushdown() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } + + @Override + public void testStatsWithLimitPushdown() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } + + @Override + public void testStatsWithTopNPushdown() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } + + @Override + public void testStatsWithDistinctPushdown() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } + + @Override + public void testStatsWithDistinctLimitPushdown() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } + + @Override + public void testStatsWithAggregationPushdown() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } + + @Override + public void testStatsWithSimpleJoinPushdown() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } + + @Override + public void testStatsWithJoinPushdown() + { + // TODO (https://github.com/trinodb/trino/issues/11664) implement the test for MySQL, with permissive approximate assertions + throw new SkipException("Test to be implemented"); + } +} diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseTestMySqlTableStatisticsTest.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseTestMySqlTableStatisticsTest.java new file mode 100644 index 000000000000..b1c10821adce --- /dev/null +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseTestMySqlTableStatisticsTest.java @@ -0,0 +1,464 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.mysql; + +import com.google.common.collect.ImmutableMap; +import io.trino.plugin.jdbc.BaseJdbcTableStatisticsTest; +import io.trino.testing.MaterializedResult; +import io.trino.testing.MaterializedRow; +import io.trino.testing.QueryRunner; +import io.trino.testing.sql.TestTable; +import io.trino.testng.services.Flaky; +import org.assertj.core.api.AbstractDoubleAssert; +import org.jdbi.v3.core.Handle; +import org.jdbi.v3.core.Jdbi; +import org.testng.SkipException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Verify.verify; +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.Streams.stream; +import static io.trino.plugin.mysql.MySqlQueryRunner.createMySqlQueryRunner; +import static io.trino.testing.sql.TestTable.fromColumns; +import static io.trino.tpch.TpchTable.ORDERS; +import static java.lang.Math.min; +import static java.lang.String.format; +import static java.util.Objects.requireNonNull; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.withinPercentage; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertNull; + +public abstract class BaseTestMySqlTableStatisticsTest + extends BaseJdbcTableStatisticsTest +{ + protected final String dockerImageName; + protected final Function nullFractionToExpected; + protected final Function varcharNdvToExpected; + protected TestingMySqlServer mysqlServer; + + protected BaseTestMySqlTableStatisticsTest( + String dockerImageName, + Function nullFractionToExpected, + Function varcharNdvToExpected) + { + this.dockerImageName = requireNonNull(dockerImageName, "dockerImageName is null"); + this.nullFractionToExpected = requireNonNull(nullFractionToExpected, "nullFractionToExpected is null"); + this.varcharNdvToExpected = requireNonNull(varcharNdvToExpected, "varcharNdvToExpected is null"); + } + + @Override + protected QueryRunner createQueryRunner() + throws Exception + { + mysqlServer = closeAfterClass(new TestingMySqlServer(dockerImageName, false)); + + return createMySqlQueryRunner( + mysqlServer, + Map.of(), + Map.of( + "case-insensitive-name-matching", "true", + "join-pushdown.enabled", "true"), + List.of(ORDERS)); + } + + @Override + @Test + @Flaky( + // TODO replace @Flaky with assertEventually or @Flaky-like annotation for same purpose + issue = "TODO", + match = "Expecting.*to be close to|ComparisonFailure.*NDV for") + public void testNotAnalyzed() + { + String tableName = "test_not_analyzed"; + assertUpdate("DROP TABLE IF EXISTS " + tableName); + computeActual(format("CREATE TABLE %s AS SELECT * FROM tpch.tiny.orders", tableName)); + try { + MaterializedResult statsResult = computeActual("SHOW STATS FOR " + tableName); + assertColumnStats(statsResult, new MapBuilder() + .put("orderkey", null) + .put("custkey", null) + .put("orderstatus", null) + .put("totalprice", null) + .put("orderdate", null) + .put("orderpriority", null) + .put("clerk", null) + .put("shippriority", null) + .put("comment", null) + .build()); + + double cardinality = getTableCardinalityFromStats(statsResult); + // sometimes MySQL will return INFORMATION_SCHEMA.TABLES.TABLE_ROWS as 2 even after loading data + if (cardinality > 7) { + assertThat(cardinality).isCloseTo(15000, withinPercentage(50)); + } + } + finally { + assertUpdate("DROP TABLE " + tableName); + } + } + + @Override + @Test + public void testBasic() + { + String tableName = "test_stats_orders"; + assertUpdate("DROP TABLE IF EXISTS " + tableName); + computeActual(format("CREATE TABLE %s AS SELECT * FROM tpch.tiny.orders", tableName)); + try { + gatherStats(tableName); + MaterializedResult statsResult = computeActual("SHOW STATS FOR " + tableName); + assertColumnStats(statsResult, new MapBuilder() + .put("orderkey", 15000) + .put("custkey", 1000) + .put("orderstatus", varcharNdvToExpected.apply(3)) + .put("totalprice", 14996) + .put("orderdate", 2401) + .put("orderpriority", varcharNdvToExpected.apply(5)) + .put("clerk", varcharNdvToExpected.apply(1000)) + .put("shippriority", 1) + .put("comment", varcharNdvToExpected.apply(14995)) + .build()); + assertThat(getTableCardinalityFromStats(statsResult)).isCloseTo(15000, withinPercentage(20)); + } + finally { + assertUpdate("DROP TABLE " + tableName); + } + } + + @Override + @Test + public void testAllNulls() + { + String tableName = "test_stats_table_all_nulls"; + assertUpdate("DROP TABLE IF EXISTS " + tableName); + computeActual(format("CREATE TABLE %s AS SELECT orderkey, custkey, orderpriority, comment FROM tpch.tiny.orders WHERE false", tableName)); + try { + computeActual(format("INSERT INTO %s (orderkey) VALUES NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL", tableName)); + gatherStats(tableName); + MaterializedResult statsResult = computeActual("SHOW STATS FOR " + tableName); + for (MaterializedRow row : statsResult) { + String columnName = (String) row.getField(0); + if (columnName == null) { + // table summary row + return; + } + assertThat(columnName).isIn("orderkey", "custkey", "orderpriority", "comment"); + + Double dataSize = (Double) row.getField(1); + if (dataSize != null) { + assertThat(dataSize).as("Data size for " + columnName) + .isEqualTo(0); + } + + if ((columnName.equals("orderpriority") || columnName.equals("comment")) && varcharNdvToExpected.apply(2) == null) { + assertNull(row.getField(2), "NDV for " + columnName); + assertNull(row.getField(3), "null fraction for " + columnName); + } + else { + assertNotNull(row.getField(2), "NDV for " + columnName); + assertThat(((Number) row.getField(2)).doubleValue()).as("NDV for " + columnName).isBetween(0.0, 2.0); + assertEquals(row.getField(3), nullFractionToExpected.apply(1.0), "null fraction for " + columnName); + } + + assertNull(row.getField(4), "min"); + assertNull(row.getField(5), "max"); + } + double cardinality = getTableCardinalityFromStats(statsResult); + if (cardinality != 15.0) { + // sometimes all-NULLs tables are reported as containing 0-2 rows + assertThat(cardinality).isBetween(0.0, 2.0); + } + } + finally { + assertUpdate("DROP TABLE " + tableName); + } + } + + @Override + @Test + public void testNullsFraction() + { + String tableName = "test_stats_table_with_nulls"; + assertUpdate("DROP TABLE IF EXISTS " + tableName); + assertUpdate("" + + "CREATE TABLE " + tableName + " AS " + + "SELECT " + + " orderkey, " + + " if(orderkey % 3 = 0, NULL, custkey) custkey, " + + " if(orderkey % 5 = 0, NULL, orderpriority) orderpriority " + + "FROM tpch.tiny.orders", + 15000); + try { + gatherStats(tableName); + MaterializedResult statsResult = computeActual("SHOW STATS FOR " + tableName); + assertColumnStats( + statsResult, + new MapBuilder() + .put("orderkey", 15000) + .put("custkey", 1000) + .put("orderpriority", varcharNdvToExpected.apply(5)) + .build(), + new MapBuilder() + .put("orderkey", nullFractionToExpected.apply(0.0)) + .put("custkey", nullFractionToExpected.apply(1.0 / 3)) + .put("orderpriority", nullFractionToExpected.apply(1.0 / 5)) + .build()); + assertThat(getTableCardinalityFromStats(statsResult)).isCloseTo(15000, withinPercentage(20)); + } + finally { + assertUpdate("DROP TABLE " + tableName); + } + } + + @Override + @Test + public void testAverageColumnLength() + { + throw new SkipException("MySQL connector does not report average column length"); + } + + @Override + @Test + public void testPartitionedTable() + { + throw new SkipException("Not implemented"); // TODO + } + + @Override + @Test + public void testView() + { + String tableName = "test_stats_view"; + executeInMysql("CREATE OR REPLACE VIEW " + tableName + " AS SELECT orderkey, custkey, orderpriority, comment FROM orders"); + try { + assertQuery( + "SHOW STATS FOR " + tableName, + "VALUES " + + "('orderkey', null, null, null, null, null, null)," + + "('custkey', null, null, null, null, null, null)," + + "('orderpriority', null, null, null, null, null, null)," + + "('comment', null, null, null, null, null, null)," + + "(null, null, null, null, null, null, null)"); + // It's not possible to ANALYZE a VIEW in MySQL + } + finally { + executeInMysql("DROP VIEW " + tableName); + } + } + + @Override + @Test + public void testMaterializedView() + { + throw new SkipException(""); // TODO is there a concept like materialized view in MySQL? + } + + @Override + @Test(dataProvider = "testCaseColumnNamesDataProvider") + public void testCaseColumnNames(String tableName) + { + executeInMysql(("" + + "CREATE TABLE " + tableName + " " + + "AS SELECT " + + " orderkey AS CASE_UNQUOTED_UPPER, " + + " custkey AS case_unquoted_lower, " + + " orderstatus AS cASe_uNQuoTeD_miXED, " + + " totalprice AS \"CASE_QUOTED_UPPER\", " + + " orderdate AS \"case_quoted_lower\"," + + " orderpriority AS \"CasE_QuoTeD_miXED\" " + + "FROM orders") + .replace("\"", "`")); + try { + gatherStats(tableName); + MaterializedResult statsResult = computeActual("SHOW STATS FOR " + tableName); + assertColumnStats(statsResult, new MapBuilder() + .put("case_unquoted_upper", 15000) + .put("case_unquoted_lower", 1000) + .put("case_unquoted_mixed", varcharNdvToExpected.apply(3)) + .put("case_quoted_upper", 14996) + .put("case_quoted_lower", 2401) + .put("case_quoted_mixed", varcharNdvToExpected.apply(5)) + .build()); + assertThat(getTableCardinalityFromStats(statsResult)).isCloseTo(15000, withinPercentage(20)); + } + finally { + executeInMysql("DROP TABLE " + tableName.replace("\"", "`")); + } + } + + @Override + @DataProvider + public Object[][] testCaseColumnNamesDataProvider() + { + return new Object[][] { + {"TEST_STATS_MIXED_UNQUOTED_UPPER"}, + {"test_stats_mixed_unquoted_lower"}, + {"test_stats_mixed_uNQuoTeD_miXED"}, + {"\"TEST_STATS_MIXED_QUOTED_UPPER\""}, + {"\"test_stats_mixed_quoted_lower\""}, + {"\"test_stats_mixed_QuoTeD_miXED\""}, + }; + } + + @Override + @Test + public void testNumericCornerCases() + { + try (TestTable table = fromColumns( + getQueryRunner()::execute, + "test_numeric_corner_cases_", + ImmutableMap.>builder() + // TODO Infinity and NaNs not supported by MySQL +// .put("only_negative_infinity double", List.of("-infinity()", "-infinity()", "-infinity()", "-infinity()")) +// .put("only_positive_infinity double", List.of("infinity()", "infinity()", "infinity()", "infinity()")) +// .put("mixed_infinities double", List.of("-infinity()", "infinity()", "-infinity()", "infinity()")) +// .put("mixed_infinities_and_numbers double", List.of("-infinity()", "infinity()", "-5.0", "7.0")) +// .put("nans_only double", List.of("nan()", "nan()")) +// .put("nans_and_numbers double", List.of("nan()", "nan()", "-5.0", "7.0")) + .put("large_doubles double", List.of("CAST(-50371909150609548946090.0 AS DOUBLE)", "CAST(50371909150609548946090.0 AS DOUBLE)")) // 2^77 DIV 3 + .put("short_decimals_big_fraction decimal(16,15)", List.of("-1.234567890123456", "1.234567890123456")) + .put("short_decimals_big_integral decimal(16,1)", List.of("-123456789012345.6", "123456789012345.6")) + // DECIMALS up to precision 30 are supported + .put("long_decimals_big_fraction decimal(30,29)", List.of("-1.23456789012345678901234567890", "1.23456789012345678901234567890")) + .put("long_decimals_middle decimal(30,16)", List.of("-12345678901234.5678901234567890", "12345678901234.5678901234567890")) + .put("long_decimals_big_integral decimal(30,1)", List.of("-12345678901234567890123456789.0", "12345678901234567890123456789.0")) + .buildOrThrow(), + "null")) { + gatherStats(table.getName()); + assertQuery( + "SHOW STATS FOR " + table.getName(), + "VALUES " + + // TODO Infinity and NaNs not supported by MySQL +// "('only_negative_infinity', null, 1, 0, null, null, null)," + +// "('only_positive_infinity', null, 1, 0, null, null, null)," + +// "('mixed_infinities', null, 2, 0, null, null, null)," + +// "('mixed_infinities_and_numbers', null, 4.0, 0.0, null, null, null)," + +// "('nans_only', null, 1.0, 0.5, null, null, null)," + +// "('nans_and_numbers', null, 3.0, 0.0, null, null, null)," + + "('large_doubles', null, 1.9, 0.050000000000000044, null, null, null)," + + "('short_decimals_big_fraction', null, 1.9, 0.050000000000000044, null, null, null)," + + "('short_decimals_big_integral', null, 1.9, 0.050000000000000044, null, null, null)," + + "('long_decimals_big_fraction', null, 1.9, 0.050000000000000044, null, null, null)," + + "('long_decimals_middle', null, 1.9, 0.050000000000000044, null, null, null)," + + "('long_decimals_big_integral', null, 1.9, 0.050000000000000044, null, null, null)," + + "(null, null, null, null, 2, null, null)"); + } + } + + protected void executeInMysql(String sql) + { + try (Handle handle = Jdbi.open(() -> mysqlServer.createConnection())) { + handle.execute("USE tpch"); + handle.execute(sql); + } + } + + protected void assertColumnStats(MaterializedResult statsResult, Map columnNdvs) + { + assertColumnStats(statsResult, columnNdvs, nullFractionToExpected.apply(0.0)); + } + + protected void assertColumnStats(MaterializedResult statsResult, Map columnNdvs, double nullFraction) + { + Map columnNullFractions = new HashMap<>(); + columnNdvs.forEach((columnName, ndv) -> columnNullFractions.put(columnName, ndv == null ? null : nullFraction)); + + assertColumnStats(statsResult, columnNdvs, columnNullFractions); + } + + protected void assertColumnStats(MaterializedResult statsResult, Map columnNdvs, Map columnNullFractions) + { + assertEquals(columnNdvs.keySet(), columnNullFractions.keySet()); + List reportedColumns = stream(statsResult) + .map(row -> row.getField(0)) // column name + .filter(Objects::nonNull) + .map(String.class::cast) + .collect(toImmutableList()); + assertThat(reportedColumns) + .containsOnlyOnce(columnNdvs.keySet().toArray(new String[0])); + + double tableCardinality = getTableCardinalityFromStats(statsResult); + for (MaterializedRow row : statsResult) { + if (row.getField(0) == null) { + continue; + } + String columnName = (String) row.getField(0); + verify(columnNdvs.containsKey(columnName)); + Integer expectedNdv = columnNdvs.get(columnName); + verify(columnNullFractions.containsKey(columnName)); + Double expectedNullFraction = columnNullFractions.get(columnName); + + Double dataSize = (Double) row.getField(1); + if (dataSize != null) { + assertThat(dataSize).as("Data size for " + columnName) + .isEqualTo(0); + } + + AbstractDoubleAssert ndvAssertion = assertThat((Double) row.getField(2)).as("NDV for " + columnName); + if (expectedNdv == null) { + ndvAssertion.isNull(); + assertNull(row.getField(3), "null fraction for " + columnName); + } + else { + ndvAssertion.isBetween(expectedNdv * 0.5, min(expectedNdv * 4.0, tableCardinality)); // [-50%, +300%] but no more than row count + assertThat((Double) row.getField(3)).as("Null fraction for " + columnName) + .isBetween(expectedNullFraction * 0.4, min(expectedNullFraction * 1.1, 1.0)); + } + + assertNull(row.getField(4), "min"); + assertNull(row.getField(5), "max"); + } + } + + protected static double getTableCardinalityFromStats(MaterializedResult statsResult) + { + MaterializedRow lastRow = statsResult.getMaterializedRows().get(statsResult.getRowCount() - 1); + assertNull(lastRow.getField(0)); + assertNull(lastRow.getField(1)); + assertNull(lastRow.getField(2)); + assertNull(lastRow.getField(3)); + assertNull(lastRow.getField(5)); + assertNull(lastRow.getField(6)); + assertEquals(lastRow.getFieldCount(), 7); + assertNotNull(lastRow.getField(4)); + return ((Number) lastRow.getField(4)).doubleValue(); + } + + protected static class MapBuilder + { + private final Map map = new HashMap<>(); + + public MapBuilder put(K key, V value) + { + checkArgument(!map.containsKey(key), "Key already present: %s", key); + map.put(requireNonNull(key, "key is null"), value); + return this; + } + + public Map build() + { + return new HashMap<>(map); + } + } +} diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlClient.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlClient.java index cc737e34b87e..f271bbf9fdc2 100644 --- a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlClient.java +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlClient.java @@ -19,6 +19,7 @@ import io.trino.plugin.jdbc.JdbcClient; import io.trino.plugin.jdbc.JdbcColumnHandle; import io.trino.plugin.jdbc.JdbcExpression; +import io.trino.plugin.jdbc.JdbcStatisticsConfig; import io.trino.plugin.jdbc.JdbcTypeHandle; import io.trino.plugin.jdbc.mapping.DefaultIdentifierMapping; import io.trino.spi.connector.AggregateFunction; @@ -59,6 +60,7 @@ public class TestMySqlClient private static final JdbcClient JDBC_CLIENT = new MySqlClient( new BaseJdbcConfig(), + new JdbcStatisticsConfig(), session -> { throw new UnsupportedOperationException(); }, diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql5IndexStatistics.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql5IndexStatistics.java new file mode 100644 index 000000000000..294f1de459fb --- /dev/null +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql5IndexStatistics.java @@ -0,0 +1,23 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.mysql; + +public class TestMySqlTableStatisticsMySql5IndexStatistics + extends BaseMySqlTableStatisticsIndexStatisticsTest +{ + public TestMySqlTableStatisticsMySql5IndexStatistics() + { + super("mysql:5.5.46"); // oldest available on RDS + } +} diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql8Histograms.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql8Histograms.java new file mode 100644 index 000000000000..e6c2da58678a --- /dev/null +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql8Histograms.java @@ -0,0 +1,95 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.mysql; + +import com.google.common.collect.ImmutableMap; +import io.trino.testing.sql.TestTable; +import org.testng.annotations.Test; + +import java.util.List; +import java.util.function.Function; + +import static com.google.common.collect.ImmutableList.toImmutableList; +import static com.google.common.collect.Streams.stream; +import static io.trino.testing.sql.TestTable.fromColumns; +import static java.lang.String.format; +import static java.lang.String.join; + +public class TestMySqlTableStatisticsMySql8Histograms + extends BaseTestMySqlTableStatisticsTest +{ + public TestMySqlTableStatisticsMySql8Histograms() + { + super("mysql:8.0.15", + Function.identity(), + Function.identity()); + } + + @Test + @Override + public void testNumericCornerCases() + { + try (TestTable table = fromColumns( + getQueryRunner()::execute, + "test_numeric_corner_cases_", + ImmutableMap.>builder() + // TODO Infinity and NaNs not supported by MySQL +// .put("only_negative_infinity double", List.of("-infinity()", "-infinity()", "-infinity()", "-infinity()")) +// .put("only_positive_infinity double", List.of("infinity()", "infinity()", "infinity()", "infinity()")) +// .put("mixed_infinities double", List.of("-infinity()", "infinity()", "-infinity()", "infinity()")) +// .put("mixed_infinities_and_numbers double", List.of("-infinity()", "infinity()", "-5.0", "7.0")) +// .put("nans_only double", List.of("nan()", "nan()")) +// .put("nans_and_numbers double", List.of("nan()", "nan()", "-5.0", "7.0")) + .put("large_doubles double", List.of("CAST(-50371909150609548946090.0 AS DOUBLE)", "CAST(50371909150609548946090.0 AS DOUBLE)")) // 2^77 DIV 3 + .put("short_decimals_big_fraction decimal(16,15)", List.of("-1.234567890123456", "1.234567890123456")) + .put("short_decimals_big_integral decimal(16,1)", List.of("-123456789012345.6", "123456789012345.6")) + // DECIMALS up to precision 30 are supported + .put("long_decimals_big_fraction decimal(30,29)", List.of("-1.23456789012345678901234567890", "1.23456789012345678901234567890")) + .put("long_decimals_middle decimal(30,16)", List.of("-12345678901234.5678901234567890", "12345678901234.5678901234567890")) + .put("long_decimals_big_integral decimal(30,1)", List.of("-12345678901234567890123456789.0", "12345678901234567890123456789.0")) + .buildOrThrow(), + "null")) { + gatherStats(table.getName()); + assertQuery( + "SHOW STATS FOR " + table.getName(), + "VALUES " + + // TODO Infinity and NaNs not supported by MySQL +// "('only_negative_infinity', null, 1, 0, null, null, null)," + +// "('only_positive_infinity', null, 1, 0, null, null, null)," + +// "('mixed_infinities', null, 2, 0, null, null, null)," + +// "('mixed_infinities_and_numbers', null, 4.0, 0.0, null, null, null)," + +// "('nans_only', null, 1.0, 0.5, null, null, null)," + +// "('nans_and_numbers', null, 3.0, 0.0, null, null, null)," + + "('large_doubles', null, 2.0, 0.0, null, null, null)," + + "('short_decimals_big_fraction', null, 2.0, 0.0, null, null, null)," + + "('short_decimals_big_integral', null, 2.0, 0.0, null, null, null)," + + "('long_decimals_big_fraction', null, 2.0, 0.0, null, null, null)," + + "('long_decimals_middle', null, 2.0, 0.0, null, null, null)," + + "('long_decimals_big_integral', null, 2.0, 0.0, null, null, null)," + + "(null, null, null, null, 2, null, null)"); + } + } + + @Override + protected void gatherStats(String tableName) + { + List columnNames = stream(computeActual("SHOW COLUMNS FROM " + tableName)) + .map(row -> (String) row.getField(0)) + .collect(toImmutableList()); + executeInMysql(format( + "ANALYZE TABLE %s UPDATE HISTOGRAM ON %s ", + tableName, + join(", ", columnNames)).replace("\"", "`")); + } +} diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql8IndexStatistics.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql8IndexStatistics.java new file mode 100644 index 000000000000..6441c2aff7ae --- /dev/null +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/TestMySqlTableStatisticsMySql8IndexStatistics.java @@ -0,0 +1,23 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.mysql; + +public class TestMySqlTableStatisticsMySql8IndexStatistics + extends BaseMySqlTableStatisticsIndexStatisticsTest +{ + public TestMySqlTableStatisticsMySql8IndexStatistics() + { + super("mysql:8.0.15"); + } +} From 9a74550ce55fc4885c7a30cd190744581c1c6b88 Mon Sep 17 00:00:00 2001 From: Mateusz Gajewski Date: Wed, 23 Mar 2022 15:21:23 +0100 Subject: [PATCH 3/3] Add MySQL automatic cost based JOIN pushdown support --- .../main/java/io/trino/plugin/mysql/MySqlClient.java | 9 ++++++++- .../java/io/trino/plugin/mysql/MySqlClientModule.java | 10 ++++++---- .../io/trino/plugin/mysql/BaseMySqlConnectorTest.java | 9 +++++++++ .../plugin/mysql/BaseTestMySqlTableStatisticsTest.java | 4 +--- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java index 48f253e52213..d0a5ce0e981f 100644 --- a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java +++ b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClient.java @@ -112,6 +112,7 @@ import static io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalRounding; import static io.trino.plugin.jdbc.DecimalSessionSessionProperties.getDecimalRoundingMode; import static io.trino.plugin.jdbc.JdbcErrorCode.JDBC_ERROR; +import static io.trino.plugin.jdbc.JdbcJoinPushdownUtil.implementJoinCostAware; import static io.trino.plugin.jdbc.PredicatePushdownController.DISABLE_PUSHDOWN; import static io.trino.plugin.jdbc.PredicatePushdownController.FULL_PUSHDOWN; import static io.trino.plugin.jdbc.StandardColumnMappings.bigintColumnMapping; @@ -704,7 +705,13 @@ public Optional implementJoin( // Not supported in MySQL return Optional.empty(); } - return super.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics); + return implementJoinCostAware( + session, + joinType, + leftSource, + rightSource, + statistics, + () -> super.implementJoin(session, joinType, leftSource, rightSource, joinConditions, rightAssignments, leftAssignments, statistics)); } @Override diff --git a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClientModule.java b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClientModule.java index 69c3238ef069..d237a43536a9 100644 --- a/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClientModule.java +++ b/plugin/trino-mysql/src/main/java/io/trino/plugin/mysql/MySqlClientModule.java @@ -14,17 +14,18 @@ package io.trino.plugin.mysql; import com.google.inject.Binder; -import com.google.inject.Module; import com.google.inject.Provides; import com.google.inject.Scopes; import com.google.inject.Singleton; import com.mysql.jdbc.Driver; +import io.airlift.configuration.AbstractConfigurationAwareModule; import io.trino.plugin.jdbc.BaseJdbcConfig; import io.trino.plugin.jdbc.ConnectionFactory; import io.trino.plugin.jdbc.DecimalModule; import io.trino.plugin.jdbc.DriverConnectionFactory; import io.trino.plugin.jdbc.ForBaseJdbc; import io.trino.plugin.jdbc.JdbcClient; +import io.trino.plugin.jdbc.JdbcJoinPushdownSupportModule; import io.trino.plugin.jdbc.JdbcStatisticsConfig; import io.trino.plugin.jdbc.credential.CredentialProvider; @@ -34,16 +35,17 @@ import static io.airlift.configuration.ConfigBinder.configBinder; public class MySqlClientModule - implements Module + extends AbstractConfigurationAwareModule { @Override - public void configure(Binder binder) + protected void setup(Binder binder) { binder.bind(JdbcClient.class).annotatedWith(ForBaseJdbc.class).to(MySqlClient.class).in(Scopes.SINGLETON); configBinder(binder).bindConfig(MySqlJdbcConfig.class); configBinder(binder).bindConfig(MySqlConfig.class); configBinder(binder).bindConfig(JdbcStatisticsConfig.class); - binder.install(new DecimalModule()); + install(new DecimalModule()); + install(new JdbcJoinPushdownSupportModule()); } @Provides diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseMySqlConnectorTest.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseMySqlConnectorTest.java index 5b6595380476..790d1e25cdb5 100644 --- a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseMySqlConnectorTest.java +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseMySqlConnectorTest.java @@ -392,4 +392,13 @@ protected SqlExecutor onRemoteDatabase() { return mySqlServer::execute; } + + @Override + protected Session joinPushdownEnabled(Session session) + { + return Session.builder(super.joinPushdownEnabled(session)) + // strategy is AUTOMATIC by default and would not work for certain test cases (even if statistics are collected) + .setCatalogSessionProperty(session.getCatalog().orElseThrow(), "join_pushdown_strategy", "EAGER") + .build(); + } } diff --git a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseTestMySqlTableStatisticsTest.java b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseTestMySqlTableStatisticsTest.java index b1c10821adce..a0877948ca64 100644 --- a/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseTestMySqlTableStatisticsTest.java +++ b/plugin/trino-mysql/src/test/java/io/trino/plugin/mysql/BaseTestMySqlTableStatisticsTest.java @@ -76,9 +76,7 @@ protected QueryRunner createQueryRunner() return createMySqlQueryRunner( mysqlServer, Map.of(), - Map.of( - "case-insensitive-name-matching", "true", - "join-pushdown.enabled", "true"), + Map.of("case-insensitive-name-matching", "true"), List.of(ORDERS)); }