Skip to content

Commit

Permalink
[ToBeSquashed]Address ZacBlanco's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
hantangwangd committed Feb 5, 2025
1 parent 1960dc1 commit f3c462f
Show file tree
Hide file tree
Showing 7 changed files with 173 additions and 146 deletions.
7 changes: 7 additions & 0 deletions presto-docs/src/main/sphinx/connector/iceberg.rst
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,13 @@ Property Name Description
Otherwise, it will be ignored.
======================================================= ============================================================= ============

.. note::

Hadoop catalog requires a file system that supports atomic rename operation, such as HDFS,
to maintain metadata files in order to implement atomic transaction commit. It's unsafe to
set `iceberg.catalog.warehouse` to a path in object stores and local file systems. But it's
safe to set `iceberg.catalog.hadoop.warehouse.datadir` to a path in the object stores.

Configure the `Amazon S3 <https://prestodb.io/docs/current/connector/hive.html#amazon-s3-configuration>`_
properties to specify a S3 location as the warehouse data directory for the Hadoop catalog. This way,
the data and delete files of Iceberg tables are stored in S3. An example configuration includes:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1547,7 +1547,7 @@ public void testRegisterTableWithInvalidLocation()
assertUpdate("CREATE TABLE " + tableName + " (id integer, value integer)");
assertUpdate("INSERT INTO " + tableName + " VALUES(1, 1)", 1);

String metadataLocation = getLocation(schemaName, tableName).replace("//", "/") + "_invalid";
String metadataLocation = getLocation(schemaName, tableName) + "_invalid";

@Language("RegExp") String errorMessage = format("Unable to find metadata at location %s/%s", metadataLocation, METADATA_FOLDER_NAME);
assertQueryFails("CALL system.register_table ('" + schemaName + "', '" + tableName + "', '" + metadataLocation + "')", errorMessage);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@
import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix;
import static com.facebook.presto.type.DecimalParametricType.DECIMAL;
import static com.google.common.collect.ImmutableMap.toImmutableMap;
import static com.google.common.io.Files.createTempDir;
import static java.lang.String.format;
import static java.nio.file.Files.createTempDirectory;
import static java.util.Objects.requireNonNull;
import static java.util.UUID.randomUUID;
import static java.util.function.Function.identity;
Expand Down Expand Up @@ -605,9 +605,10 @@ private void testPartitionedByTimestampTypeForFormat(Session session, FileFormat

@Test
public void testCreateTableWithCustomLocation()
throws IOException
{
String tableName = "test_table_with_custom_location";
URI tableTargetURI = createTempDir().toURI();
URI tableTargetURI = createTempDirectory(tableName).toUri();
try {
assertQuerySucceeds(format("create table %s (a int, b varchar)" +
" with (location = '%s')", tableName, tableTargetURI.toString()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
import org.testng.annotations.Ignore;
import org.testng.annotations.Test;

import java.io.IOException;
import java.net.URI;

import static com.facebook.presto.iceberg.CatalogType.HADOOP;
import static com.google.common.io.Files.createTempDir;
import static java.lang.String.format;
import static java.nio.file.Files.createTempDirectory;

@Ignore
public class TestIcebergDistributedHadoop
Expand All @@ -34,9 +35,10 @@ public TestIcebergDistributedHadoop()

@Test
public void testCreateTableWithCustomLocation()
throws IOException
{
String tableName = "test_hadoop_table_with_custom_location";
URI tableTargetURI = createTempDir().toURI();
URI tableTargetURI = createTempDirectory(tableName).toUri();
assertQueryFails(format("create table %s (a int, b varchar)" + " with (location = '%s')", tableName, tableTargetURI.toString()),
"Cannot set a custom location for a path-based table.*");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,16 @@
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;

import java.io.IOException;
import java.net.URI;

import static com.facebook.presto.iceberg.CatalogType.HADOOP;
import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.ACCESS_KEY;
import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.SECRET_KEY;
import static com.facebook.presto.testing.TestingConnectorSession.SESSION;
import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix;
import static com.google.common.io.Files.createTempDir;
import static java.lang.String.format;
import static java.nio.file.Files.createTempDirectory;

public class TestIcebergDistributedOnS3Hadoop
extends IcebergDistributedTestBase
Expand All @@ -53,10 +54,11 @@ public class TestIcebergDistributedOnS3Hadoop
HostAndPort hostAndPort;

public TestIcebergDistributedOnS3Hadoop()
throws IOException
{
super(HADOOP);
bucketName = "forhadoop-" + randomTableSuffix();
catalogWarehouseDir = createTempDir().toURI().toString();
catalogWarehouseDir = createTempDirectory(bucketName).toUri().toString();
}

protected QueryRunner createQueryRunner()
Expand All @@ -66,7 +68,6 @@ protected QueryRunner createQueryRunner()
ImmutableMap.of(
"iceberg.catalog.warehouse", catalogWarehouseDir,
"iceberg.catalog.hadoop.warehouse.datadir", getCatalogDataDirectory().toString(),
"hive.s3.use-instance-credentials", "false",
"hive.s3.aws-access-key", ACCESS_KEY,
"hive.s3.aws-secret-key", SECRET_KEY,
"hive.s3.endpoint", format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort()),
Expand Down Expand Up @@ -94,9 +95,10 @@ public void tearDown()

@Override
public void testCreateTableWithCustomLocation()
throws IOException
{
String tableName = "test_hadoop_table_with_custom_location";
URI tableTargetURI = createTempDir().toURI();
URI tableTargetURI = createTempDirectory(tableName).toUri();
assertQueryFails(format("create table %s (a int, b varchar)" + " with (location = '%s')", tableName, tableTargetURI.toString()),
"Cannot set a custom location for a path-based table.*");
}
Expand All @@ -119,8 +121,7 @@ protected HdfsEnvironment getHdfsEnvironment()
.setS3AwsAccessKey(ACCESS_KEY)
.setS3AwsSecretKey(SECRET_KEY)
.setS3PathStyleAccess(true)
.setS3Endpoint(format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort()))
.setS3UseInstanceCredentials(false);
.setS3Endpoint(format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort()));
return getHdfsEnvironment(hiveClientConfig, metastoreClientConfig, hiveS3Config);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;

import java.io.IOException;

import static com.facebook.presto.iceberg.CatalogType.HADOOP;
import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.ACCESS_KEY;
import static com.facebook.presto.iceberg.container.IcebergMinIODataLake.SECRET_KEY;
import static com.facebook.presto.tests.sql.TestTable.randomTableSuffix;
import static com.google.common.io.Files.createTempDir;
import static java.lang.String.format;
import static java.nio.file.Files.createTempDirectory;

public class TestIcebergHadoopCatalogOnS3DistributedQueries
extends TestIcebergDistributedQueries
Expand All @@ -39,10 +41,11 @@ public class TestIcebergHadoopCatalogOnS3DistributedQueries
HostAndPort hostAndPort;

public TestIcebergHadoopCatalogOnS3DistributedQueries()
throws IOException
{
super(HADOOP);
bucketName = "forhadoop-" + randomTableSuffix();
catalogWarehouseDir = createTempDir().toURI().toString();
catalogWarehouseDir = createTempDirectory(bucketName).toUri().toString();
}

protected QueryRunner createQueryRunner()
Expand All @@ -52,7 +55,6 @@ protected QueryRunner createQueryRunner()
ImmutableMap.of(
"iceberg.catalog.warehouse", catalogWarehouseDir,
"iceberg.catalog.hadoop.warehouse.datadir", format("s3://%s/%s", bucketName, WAREHOUSE_DATA_DIR),
"hive.s3.use-instance-credentials", "false",
"hive.s3.aws-access-key", ACCESS_KEY,
"hive.s3.aws-secret-key", SECRET_KEY,
"hive.s3.endpoint", format("http://%s:%s", hostAndPort.getHost(), hostAndPort.getPort()),
Expand Down
Loading

0 comments on commit f3c462f

Please sign in to comment.