Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added ability to have unique table location for each iceberg table #6063

Merged
merged 1 commit into from
Aug 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public class IcebergConfig
private HiveCompressionCodec compressionCodec = GZIP;
private boolean useFileSizeFromMetadata = true;
private int maxPartitionsPerWriter = 100;
private boolean uniqueTableLocation;

@NotNull
public FileFormat getFileFormat()
Expand Down Expand Up @@ -91,4 +92,17 @@ public IcebergConfig setMaxPartitionsPerWriter(int maxPartitionsPerWriter)
this.maxPartitionsPerWriter = maxPartitionsPerWriter;
return this;
}

public boolean isUniqueTableLocation()
{
return uniqueTableLocation;
}

@Config("iceberg.unique-table-location")
@ConfigDescription("Use randomized, unique table locations")
public IcebergConfig setUniqueTableLocation(boolean uniqueTableLocation)
{
this.uniqueTableLocation = uniqueTableLocation;
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@
import java.util.Optional;
import java.util.OptionalLong;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiPredicate;
Expand Down Expand Up @@ -166,6 +165,7 @@
import static io.trino.spi.type.BigintType.BIGINT;
import static java.util.Collections.singletonList;
import static java.util.Objects.requireNonNull;
import static java.util.UUID.randomUUID;
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.joining;
import static org.apache.hadoop.hive.metastore.TableType.VIRTUAL_VIEW;
Expand Down Expand Up @@ -198,6 +198,7 @@ public class IcebergMetadata
private final JsonCodec<CommitTaskData> commitTaskCodec;
private final HiveTableOperationsProvider tableOperationsProvider;
private final String trinoVersion;
private final boolean useUniqueTableLocation;

private final Map<String, Optional<Long>> snapshotIds = new ConcurrentHashMap<>();
private final Map<SchemaTableName, TableMetadata> tableMetadataCache = new ConcurrentHashMap<>();
Expand All @@ -212,7 +213,8 @@ public IcebergMetadata(
TypeManager typeManager,
JsonCodec<CommitTaskData> commitTaskCodec,
HiveTableOperationsProvider tableOperationsProvider,
String trinoVersion)
String trinoVersion,
boolean useUniqueTableLocation)
{
this.catalogName = requireNonNull(catalogName, "catalogName is null");
this.metastore = requireNonNull(metastore, "metastore is null");
Expand All @@ -221,6 +223,7 @@ public IcebergMetadata(
this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null");
this.tableOperationsProvider = requireNonNull(tableOperationsProvider, "tableOperationsProvider is null");
this.trinoVersion = requireNonNull(trinoVersion, "trinoVersion is null");
this.useUniqueTableLocation = useUniqueTableLocation;
}

@Override
Expand Down Expand Up @@ -575,7 +578,11 @@ public ConnectorOutputTableHandle beginCreateTable(ConnectorSession session, Con
HiveIdentity identity = new HiveIdentity(session);
String targetPath = getTableLocation(tableMetadata.getProperties());
if (targetPath == null) {
targetPath = getTableDefaultLocation(database, hdfsContext, hdfsEnvironment, schemaName, tableName).toString();
String tableNameForLocation = tableName;
if (useUniqueTableLocation) {
tableNameForLocation += "-" + randomUUID().toString().replace("-", "");
}
targetPath = getTableDefaultLocation(database, hdfsContext, hdfsEnvironment, schemaName, tableNameForLocation).toString();
}

TableOperations operations = tableOperationsProvider.createTableOperations(
Expand Down Expand Up @@ -1044,7 +1051,7 @@ public void createMaterializedView(ConnectorSession session, SchemaTableName vie

// Generate a storage table name and create a storage table. The properties in the definition are table properties for the
// storage table as indicated in the materialized view definition.
String storageTableName = "st_" + UUID.randomUUID().toString().replace("-", "");
String storageTableName = "st_" + randomUUID().toString().replace("-", "");
Map<String, Object> storageTableProperties = new HashMap<>(definition.getProperties());
storageTableProperties.putIfAbsent(FILE_FORMAT_PROPERTY, DEFAULT_FILE_FORMAT_DEFAULT);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public class IcebergMetadataFactory
private final JsonCodec<CommitTaskData> commitTaskCodec;
private final HiveTableOperationsProvider tableOperationsProvider;
private final String trinoVersion;
private final boolean useUniqueTableLocation;

@Inject
public IcebergMetadataFactory(
Expand All @@ -45,7 +46,7 @@ public IcebergMetadataFactory(
HiveTableOperationsProvider tableOperationsProvider,
NodeVersion nodeVersion)
{
this(catalogName, metastore, hdfsEnvironment, typeManager, commitTaskDataJsonCodec, tableOperationsProvider, nodeVersion);
this(catalogName, metastore, hdfsEnvironment, typeManager, commitTaskDataJsonCodec, tableOperationsProvider, nodeVersion, config.isUniqueTableLocation());
}

public IcebergMetadataFactory(
Expand All @@ -55,7 +56,8 @@ public IcebergMetadataFactory(
TypeManager typeManager,
JsonCodec<CommitTaskData> commitTaskCodec,
HiveTableOperationsProvider tableOperationsProvider,
NodeVersion nodeVersion)
NodeVersion nodeVersion,
boolean useUniqueTableLocation)
{
this.catalogName = requireNonNull(catalogName, "catalogName is null");
this.metastore = requireNonNull(metastore, "metastore is null");
Expand All @@ -64,10 +66,11 @@ public IcebergMetadataFactory(
this.commitTaskCodec = requireNonNull(commitTaskCodec, "commitTaskCodec is null");
this.tableOperationsProvider = requireNonNull(tableOperationsProvider, "tableOperationsProvider is null");
this.trinoVersion = requireNonNull(nodeVersion, "nodeVersion is null").toString();
this.useUniqueTableLocation = useUniqueTableLocation;
}

public IcebergMetadata create()
{
return new IcebergMetadata(catalogName, metastore, hdfsEnvironment, typeManager, commitTaskCodec, tableOperationsProvider, trinoVersion);
return new IcebergMetadata(catalogName, metastore, hdfsEnvironment, typeManager, commitTaskCodec, tableOperationsProvider, trinoVersion, useUniqueTableLocation);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,17 @@ public static DistributedQueryRunner createIcebergQueryRunner(
List<TpchTable<?>> tables,
Optional<File> metastoreDirectory)
throws Exception
{
return createIcebergQueryRunner(extraProperties, ImmutableMap.of(), format, tables, metastoreDirectory);
}

public static DistributedQueryRunner createIcebergQueryRunner(
Map<String, String> extraProperties,
Map<String, String> connectorProperties,
FileFormat format,
List<TpchTable<?>> tables,
Optional<File> metastoreDirectory)
throws Exception

{
Session session = testSessionBuilder()
Expand All @@ -93,6 +104,7 @@ public static DistributedQueryRunner createIcebergQueryRunner(
.put("hive.metastore", "file")
.put("hive.metastore.catalog.dir", dataDir.toString())
.put("iceberg.file-format", format.name())
.putAll(connectorProperties)
.build();

queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ public void testDefaults()
.setFileFormat(ORC)
.setCompressionCodec(GZIP)
.setUseFileSizeFromMetadata(true)
.setMaxPartitionsPerWriter(100));
.setMaxPartitionsPerWriter(100)
.setUniqueTableLocation(false));
}

@Test
Expand All @@ -46,13 +47,15 @@ public void testExplicitPropertyMappings()
.put("iceberg.compression-codec", "NONE")
.put("iceberg.use-file-size-from-metadata", "false")
.put("iceberg.max-partitions-per-writer", "222")
.put("iceberg.unique-table-location", "true")
.build();

IcebergConfig expected = new IcebergConfig()
.setFileFormat(PARQUET)
.setCompressionCodec(HiveCompressionCodec.NONE)
.setUseFileSizeFromMetadata(false)
.setMaxPartitionsPerWriter(222);
.setMaxPartitionsPerWriter(222)
.setUniqueTableLocation(true);

assertFullMapping(properties, expected);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.iceberg;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.trino.plugin.hive.metastore.Table;
import io.trino.plugin.hive.metastore.file.FileHiveMetastore;
import io.trino.testing.AbstractTestQueryFramework;
import io.trino.testing.DistributedQueryRunner;
import org.testng.annotations.AfterClass;
import org.testng.annotations.Test;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.Optional;

import static com.google.common.io.MoreFiles.deleteRecursively;
import static com.google.common.io.RecursiveDeleteOption.ALLOW_INSECURE;
import static io.trino.plugin.hive.metastore.file.FileHiveMetastore.createTestingFileHiveMetastore;
import static io.trino.plugin.iceberg.IcebergQueryRunner.createIcebergQueryRunner;
import static io.trino.tpch.TpchTable.NATION;
import static java.lang.String.format;
import static org.assertj.core.api.Assertions.assertThat;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertNotEquals;
import static org.testng.Assert.assertTrue;

public class TestIcebergTableWithCustomLocation
extends AbstractTestQueryFramework
{
private FileHiveMetastore metastore;
private File metastoreDir;

@Override
protected DistributedQueryRunner createQueryRunner()
throws Exception
{
metastoreDir = Files.createTempDirectory("test_iceberg").toFile();
metastore = createTestingFileHiveMetastore(metastoreDir);

return createIcebergQueryRunner(
ImmutableMap.of(),
ImmutableMap.of("iceberg.unique-table-location", "true"),
new IcebergConfig().getFileFormat(),
ImmutableList.of(NATION),
Optional.of(metastoreDir));
}

@AfterClass(alwaysRun = true)
public void tearDown()
throws IOException
{
deleteRecursively(metastoreDir.toPath(), ALLOW_INSECURE);
}

@Test
public void testTableHasUuidSuffixInLocation()
{
String tableName = "table_with_uuid";
assertQuerySucceeds(format("CREATE TABLE %s as select 1 as val", tableName));
Optional<Table> table = metastore.getTable(null, "tpch", tableName);
assertTrue(table.isPresent(), "Table should exists");
String location = table.get().getStorage().getLocation();
assertThat(location).matches(format(".*%s-[0-9a-f]{32}", tableName));
}

@Test
public void testCreateAndDrop()
{
String tableName = "test_create_and_drop";
assertQuerySucceeds(format("CREATE TABLE %s as select 1 as val", tableName));
Optional<Table> table = metastore.getTable(null, "tpch", tableName);
assertTrue(table.isPresent(), "Table should exist");

assertQuerySucceeds(format("DROP TABLE %s", tableName));
assertFalse(metastore.getTable(null, "tpch", tableName).isPresent(), "Table should be dropped");
}

@Test
public void testCreateRenameDrop()
{
String tableName = "test_create_rename_drop";
String renamedName = "test_create_rename_drop_renamed";
assertQuerySucceeds(format("CREATE TABLE %s as select 1 as val", tableName));
Optional<Table> table = metastore.getTable(null, "tpch", tableName);
assertTrue(table.isPresent(), "Table should exist");
String tableInitialLocation = table.get().getStorage().getLocation();

assertQuerySucceeds(format("ALTER TABLE %s RENAME TO %s", tableName, renamedName));
Optional<Table> renamedTable = metastore.getTable(null, "tpch", renamedName);
assertTrue(renamedTable.isPresent(), "Table should exist");
String renamedTableLocation = renamedTable.get().getStorage().getLocation();
assertEquals(renamedTableLocation, tableInitialLocation, "Location should not be changed");

assertQuerySucceeds(format("DROP TABLE %s", renamedName));
assertFalse(metastore.getTable(null, "tpch", tableName).isPresent(), "Initial table should not exists");
assertFalse(metastore.getTable(null, "tpch", renamedName).isPresent(), "Renamed table should be dropped");
}

@Test
public void testCreateRenameCreate()
{
String tableName = "test_create_rename_create";
String renamedName = "test_create_rename_create_renamed";
assertQuerySucceeds(format("CREATE TABLE %s as select 1 as val", tableName));
Optional<Table> table = metastore.getTable(null, "tpch", tableName);
assertTrue(table.isPresent(), "Table should exist");
String tableInitialLocation = table.get().getStorage().getLocation();

assertQuerySucceeds(format("ALTER TABLE %s RENAME TO %s", tableName, renamedName));
Optional<Table> renamedTable = metastore.getTable(null, "tpch", renamedName);
assertTrue(renamedTable.isPresent(), "Table should exist");
String renamedTableLocation = renamedTable.get().getStorage().getLocation();
assertEquals(renamedTableLocation, tableInitialLocation, "Location should not be changed");

assertQuerySucceeds(format("CREATE TABLE %s as select 1 as val", tableName));
Optional<Table> recreatedTableWithInitialName = metastore.getTable(null, "tpch", tableName);
assertTrue(recreatedTableWithInitialName.isPresent(), "Table should exist");
String recreatedTableLocation = recreatedTableWithInitialName.get().getStorage().getLocation();
assertNotEquals(tableInitialLocation, recreatedTableLocation, "Location should be different");
}
}