Skip to content

Commit

Permalink
Use pre-existing files in AbstractTestParquetPageSkipping
Browse files Browse the repository at this point in the history
Currently optimized parquet writer does not support writing
page indexes. Using files prepared from the legacy writer for
these tests allows having test coverage for page indexes after
removal of the legacy writer.
  • Loading branch information
raunaqmorarka authored and electrum committed Jul 26, 2023
1 parent 844e8c6 commit 6e51945
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
*/
package io.trino.plugin.hive;

import com.google.common.io.Resources;
import io.trino.Session;
import io.trino.execution.QueryStats;
import io.trino.operator.OperatorStats;
Expand All @@ -27,6 +28,8 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.net.URISyntaxException;
import java.util.Map;

import static com.google.common.collect.MoreCollectors.onlyElement;
Expand Down Expand Up @@ -77,10 +80,29 @@ private void buildSortedTables(String tableName, String sortByColumnName, String
}

@Test
public void testAndPredicates()
public void testRowGroupPruningFromPageIndexes()
throws Exception
{
String tableName = "test_and_predicate_" + randomNameSuffix();
buildSortedTables(tableName, "totalprice", "double");
String tableName = "test_row_group_pruning_" + randomNameSuffix();
File parquetFile = new File(Resources.getResource("parquet_page_skipping/orders_sorted_by_totalprice").toURI());
assertUpdate(
"""
CREATE TABLE %s (
orderkey bigint,
custkey bigint,
orderstatus varchar(1),
totalprice double,
orderdate date,
orderpriority varchar(15),
clerk varchar(15),
shippriority integer,
comment varchar(79),
rvalues double array)
WITH (
format = 'PARQUET',
external_location = '%s')
""".formatted(tableName, parquetFile.getAbsolutePath()));

int rowCount = assertColumnIndexResults("SELECT * FROM " + tableName + " WHERE totalprice BETWEEN 100000 AND 131280 AND clerk = 'Clerk#000000624'");
assertThat(rowCount).isGreaterThan(0);

Expand All @@ -92,19 +114,14 @@ public void testAndPredicates()

@Test
public void testPageSkippingWithNonSequentialOffsets()
throws URISyntaxException
{
String tableName = "test_random_" + randomNameSuffix();
int updateCount = 8192;
assertUpdate(
"CREATE TABLE " + tableName + " (col) WITH (format = 'PARQUET') AS " +
"SELECT * FROM unnest(transform(repeat(1, 8192), x -> rand()))",
updateCount);
for (int i = 0; i < 8; i++) {
assertUpdate(
"INSERT INTO " + tableName + " SELECT rand() FROM " + tableName,
updateCount);
updateCount += updateCount;
}
File parquetFile = new File(Resources.getResource("parquet_page_skipping/random").toURI());
assertUpdate(format(
"CREATE TABLE %s (col double) WITH (format = 'PARQUET', external_location = '%s')",
tableName,
parquetFile.getAbsolutePath()));
// These queries select a subset of pages which are stored at non-sequential offsets
// This reproduces the issue identified in https://github.com/trinodb/trino/issues/9097
for (double i = 0; i < 1; i += 0.1) {
Expand All @@ -115,12 +132,17 @@ public void testPageSkippingWithNonSequentialOffsets()

@Test
public void testFilteringOnColumnNameWithDot()
throws URISyntaxException
{
String nameInSql = "\"a.dot\"";
String tableName = "test_column_name_with_dot_" + randomNameSuffix();

assertUpdate("CREATE TABLE " + tableName + "(key varchar(50), " + nameInSql + " varchar(50)) WITH (format = 'PARQUET')");
assertUpdate("INSERT INTO " + tableName + " VALUES ('null value', NULL), ('sample value', 'abc'), ('other value', 'xyz')", 3);
File parquetFile = new File(Resources.getResource("parquet_page_skipping/column_name_with_dot").toURI());
assertUpdate(format(
"CREATE TABLE %s (key varchar(50), %s varchar(50)) WITH (format = 'PARQUET', external_location = '%s')",
tableName,
nameInSql,
parquetFile.getAbsolutePath()));

assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " IS NULL", "VALUES ('null value')");
assertQuery("SELECT key FROM " + tableName + " WHERE " + nameInSql + " = 'abc'", "VALUES ('sample value')");
Expand Down Expand Up @@ -157,17 +179,15 @@ public void testPageSkipping(String sortByColumn, String sortByColumnType, Objec

@Test
public void testFilteringWithColumnIndex()
throws URISyntaxException
{
String tableName = "test_page_filtering_" + randomNameSuffix();
String catalog = getSession().getCatalog().orElseThrow();
assertUpdate(
Session.builder(getSession())
.setCatalogSessionProperty(catalog, "parquet_writer_page_size", "32kB")
.build(),
"CREATE TABLE " + tableName + " " +
"WITH (format = 'PARQUET', bucket_count = 1, bucketed_by = ARRAY['suppkey'], sorted_by = ARRAY['suppkey']) AS " +
"SELECT suppkey, extendedprice, shipmode, comment FROM tpch.tiny.lineitem",
60175);
File parquetFile = new File(Resources.getResource("parquet_page_skipping/lineitem_sorted_by_suppkey").toURI());
assertUpdate(format(
"CREATE TABLE %s (suppkey bigint, extendedprice decimal(12, 2), shipmode varchar(10), comment varchar(44)) " +
"WITH (format = 'PARQUET', external_location = '%s')",
tableName,
parquetFile.getAbsolutePath()));

verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey = 10");
verifyFilteringWithColumnIndex("SELECT * FROM " + tableName + " WHERE suppkey BETWEEN 25 AND 35");
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit 6e51945

Please sign in to comment.