Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use pre-existing files in AbstractTestParquetPageSkipping #18407

Merged
merged 1 commit into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.plugin.hive;

import com.google.common.io.Resources;
import io.trino.Session;
import io.trino.execution.QueryStats;
import io.trino.operator.OperatorStats;
Expand All @@ -27,6 +28,8 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.net.URISyntaxException;
import java.util.Map;

import static com.google.common.collect.MoreCollectors.onlyElement;
Expand Down Expand Up @@ -77,10 +80,29 @@ private void buildSortedTables(String tableName, String sortByColumnName, String
}

@Test
public void testAndPredicates()
public void testRowGroupPruningFromPageIndexes()
throws Exception
{
String tableName = "test_and_predicate_" + randomNameSuffix();
buildSortedTables(tableName, "totalprice", "double");
String tableName = "test_row_group_pruning_" + randomNameSuffix();
File parquetFile = new File(Resources.getResource("parquet_page_skipping/orders_sorted_by_totalprice").toURI());
assertUpdate(
"""
CREATE TABLE %s (
orderkey bigint,
custkey bigint,
orderstatus varchar(1),
totalprice double,
orderdate date,
orderpriority varchar(15),
clerk varchar(15),
shippriority integer,
comment varchar(79),
rvalues double array)
WITH (
format = 'PARQUET',
external_location = '%s')
""".formatted(tableName, parquetFile.getAbsolutePath()));

int rowCount = assertColumnIndexResults("SELECT * FROM " + tableName + " WHERE totalprice BETWEEN 100000 AND 131280 AND clerk = 'Clerk#000000624'");
assertThat(rowCount).isGreaterThan(0);

Expand All @@ -92,19 +114,14 @@ public void testAndPredicates()

@Test
public void testPageSkippingWithNonSequentialOffsets()
throws URISyntaxException
{
String tableName = "test_random_" + randomNameSuffix();
int updateCount = 8192;
assertUpdate(
"CREATE TABLE " + tableName + " (col) WITH (format = 'PARQUET') AS " +
"SELECT * FROM unnest(transform(repeat(1, 8192), x -> rand()))",
updateCount);
for (int i = 0; i < 8; i++) {
assertUpdate(
"INSERT INTO " + tableName + " SELECT rand() FROM " + tableName,
updateCount);
updateCount += updateCount;
}
File parquetFile = new File(Resources.getResource("parquet_page_skipping/random").toURI());
assertUpdate(format(
"CREATE TABLE %s (col double) WITH (format = 'PARQUET', external_location = '%s')",
tableName,
parquetFile.getAbsolutePath()));
// These queries select a subset of pages which are stored at non-sequential offsets
// This reproduces the issue identified in https://github.com/trinodb/trino/issues/9097
for (double i = 0; i < 1; i += 0.1) {
Expand All @@ -115,12 +132,17 @@ public void testPageSkippingWithNonSequentialOffsets()

@Test
public void testFilteringOnColumnNameWithDot()
throws URISyntaxException
{
String nameInSql = "\"a.dot\"";
String tableName = "test_column_name_with_dot_" + randomNameSuffix();

assertUpdate("CREATE TABLE " + tableName + "(key varchar(50), " + nameInSql + " varchar(50)) WITH (format = 'PARQUET')");
assertUpdate("INSERT INTO " + tableName + " VALUES ('null value', NULL), ('sample value', 'abc'), ('other value', 'xyz')", 3);
File parquetFile = new File(Resources.getResource("parquet_page_skipping/column_name_with_dot").toURI());
assertUpdate(format(
"CREATE TABLE %s (key varchar(50), %s varchar(50)) WITH (format = 'PARQUET', external_location = '%s')",
tableName,
nameInSql,
parquetFile.getAbsolutePath()));

assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " IS NULL", "VALUES ('null value')");
assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " = 'abc'", "VALUES ('sample value')");
Expand Down Expand Up @@ -157,17 +179,15 @@ public void testPageSkipping(String sortByColumn, String sortByColumnType, Objec

@Test
public void testFilteringWithColumnIndex()
throws URISyntaxException
{
String tableName = "test_page_filtering_" + randomNameSuffix();
String catalog = getSession().getCatalog().orElseThrow();
assertUpdate(
Session.builder(getSession())
.setCatalogSessionProperty(catalog, "parquet_writer_page_size", "32kB")
.build(),
"CREATE TABLE " + tableName + " " +
"WITH (format = 'PARQUET', bucket_count = 1, bucketed_by = ARRAY['suppkey'], sorted_by = ARRAY['suppkey']) AS " +
"SELECT suppkey, extendedprice, shipmode, comment FROM tpch.tiny.lineitem",
60175);
File parquetFile = new File(Resources.getResource("parquet_page_skipping/lineitem_sorted_by_suppkey").toURI());
assertUpdate(format(
"CREATE TABLE %s (suppkey bigint, extendedprice decimal(12, 2), shipmode varchar(10), comment varchar(44)) " +
"WITH (format = 'PARQUET', external_location = '%s')",
tableName,
parquetFile.getAbsolutePath()));

verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey = 10");
verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey BETWEEN 25 AND 35");
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.