diff --git a/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java b/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java index 2d363cbc5231..a7f9885a4726 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java +++ b/azure/src/main/java/org/apache/iceberg/azure/AzureProperties.java @@ -77,6 +77,17 @@ public Optional adlsWriteBlockSize() { return Optional.ofNullable(adlsWriteBlockSize); } + /** + * Applies configuration to the {@link DataLakeFileSystemClientBuilder} to provide the endpoint + * and credentials required to create an instance of the client. + * + *

The default endpoint is constructed in the form {@code + * https://{account}.dfs.core.windows.net} and default credentials are provided via the {@link + * com.azure.identity.DefaultAzureCredential}. + * + * @param account the service account name + * @param builder the builder instance + */ public void applyClientConfiguration(String account, DataLakeFileSystemClientBuilder builder) { String sasToken = adlsSasTokens.get(account); if (sasToken != null && !sasToken.isEmpty()) { @@ -93,7 +104,7 @@ public void applyClientConfiguration(String account, DataLakeFileSystemClientBui if (connectionString != null && !connectionString.isEmpty()) { builder.endpoint(connectionString); } else { - builder.endpoint("https://" + account); + builder.endpoint("https://" + account + ".dfs.core.windows.net"); } } } diff --git a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java index e73093512b82..d9b2112565c3 100644 --- a/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java +++ b/azure/src/main/java/org/apache/iceberg/azure/adlsv2/ADLSLocation.java @@ -29,13 +29,20 @@ * *

Locations follow the conventions used by Hadoop's Azure support, i.e. * - *

{@code abfs[s]://[@]/}
+ *
{@code abfs[s]://[@].dfs.core.windows.net/}
+ * + * or + * + *
{@code wasb[s]://@.blob.core.windows.net/}
+ * + * For compatibility, locations using the wasb scheme are also accepted but will use the Azure Data + * Lake Storage Gen2 REST APIs instead of the Blob Storage REST APIs. * *

See Hadoop Azure * Support */ class ADLSLocation { - private static final Pattern URI_PATTERN = Pattern.compile("^abfss?://([^/?#]+)(.*)?$"); + private static final Pattern URI_PATTERN = Pattern.compile("^(abfss?|wasbs?)://([^/?#]+)(.*)?$"); private final String storageAccount; private final String container; @@ -53,19 +60,19 @@ class ADLSLocation { ValidationException.check(matcher.matches(), "Invalid ADLS URI: %s", location); - String authority = matcher.group(1); + String authority = matcher.group(2); String[] parts = authority.split("@", -1); if (parts.length > 1) { this.container = parts[0]; - this.storageAccount = parts[1]; + String host = parts[1]; + this.storageAccount = host.split("\\.", -1)[0]; } else { this.container = null; - this.storageAccount = authority; + this.storageAccount = authority.split("\\.", -1)[0]; } - String uriPath = matcher.group(2); - uriPath = uriPath == null ? "" : uriPath.startsWith("/") ? uriPath.substring(1) : uriPath; - this.path = uriPath.split("\\?", -1)[0].split("#", -1)[0]; + String uriPath = matcher.group(3); + this.path = uriPath == null ? "" : uriPath.startsWith("/") ? uriPath.substring(1) : uriPath; } /** Returns Azure storage account. */ diff --git a/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java b/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java index 6b8287c44e58..4f032d7ab125 100644 --- a/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java +++ b/azure/src/test/java/org/apache/iceberg/azure/AzurePropertiesTest.java @@ -97,11 +97,13 @@ public void testNoSasToken() { @Test public void testWithConnectionString() { AzureProperties props = - new AzureProperties(ImmutableMap.of("adls.connection-string.account1", "http://endpoint")); + new AzureProperties( + ImmutableMap.of( + "adls.connection-string.account1", "https://account1.dfs.core.usgovcloudapi.net")); DataLakeFileSystemClientBuilder clientBuilder = mock(DataLakeFileSystemClientBuilder.class); props.applyClientConfiguration("account1", clientBuilder); - verify(clientBuilder).endpoint("http://endpoint"); + verify(clientBuilder).endpoint("https://account1.dfs.core.usgovcloudapi.net"); } @Test @@ -111,7 +113,7 @@ public void testNoMatchingConnectionString() { DataLakeFileSystemClientBuilder clientBuilder = mock(DataLakeFileSystemClientBuilder.class); props.applyClientConfiguration("account1", clientBuilder); - verify(clientBuilder).endpoint("https://account1"); + verify(clientBuilder).endpoint("https://account1.dfs.core.windows.net"); } @Test @@ -120,7 +122,7 @@ public void testNoConnectionString() { DataLakeFileSystemClientBuilder clientBuilder = mock(DataLakeFileSystemClientBuilder.class); props.applyClientConfiguration("account", clientBuilder); - verify(clientBuilder).endpoint("https://account"); + verify(clientBuilder).endpoint("https://account.dfs.core.windows.net"); } @Test diff --git a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java index 867b54b4c7e3..e07fea74397e 100644 --- a/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java +++ b/azure/src/test/java/org/apache/iceberg/azure/adlsv2/ADLSLocationTest.java @@ -33,7 +33,18 @@ public void testLocationParsing(String scheme) { String p1 = scheme + "://container@account.dfs.core.windows.net/path/to/file"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); + assertThat(location.storageAccount()).isEqualTo("account"); + assertThat(location.container().get()).isEqualTo("container"); + assertThat(location.path()).isEqualTo("path/to/file"); + } + + @ParameterizedTest + @ValueSource(strings = {"wasb", "wasbs"}) + public void testWasbLocatonParsing(String scheme) { + String p1 = scheme + "://container@account.blob.core.windows.net/path/to/file"; + ADLSLocation location = new ADLSLocation(p1); + + assertThat(location.storageAccount()).isEqualTo("account"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo("path/to/file"); } @@ -43,7 +54,7 @@ public void testEncodedString() { String p1 = "abfs://container@account.dfs.core.windows.net/path%20to%20file"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); + assertThat(location.storageAccount()).isEqualTo("account"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo("path%20to%20file"); } @@ -67,7 +78,7 @@ public void testNoContainer() { String p1 = "abfs://account.dfs.core.windows.net/path/to/file"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); + assertThat(location.storageAccount()).isEqualTo("account"); assertThat(location.container().isPresent()).isFalse(); assertThat(location.path()).isEqualTo("path/to/file"); } @@ -77,7 +88,7 @@ public void testNoPath() { String p1 = "abfs://container@account.dfs.core.windows.net"; ADLSLocation location = new ADLSLocation(p1); - assertThat(location.storageAccount()).isEqualTo("account.dfs.core.windows.net"); + assertThat(location.storageAccount()).isEqualTo("account"); assertThat(location.container().get()).isEqualTo("container"); assertThat(location.path()).isEqualTo(""); } diff --git a/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java b/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java index a858045aab8b..a8adf979f85a 100644 --- a/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java +++ b/core/src/main/java/org/apache/iceberg/io/ResolvingFileIO.java @@ -62,7 +62,9 @@ public class ResolvingFileIO implements HadoopConfigurable, DelegateFileIO { "s3n", S3_FILE_IO_IMPL, "gs", GCS_FILE_IO_IMPL, "abfs", ADLS_FILE_IO_IMPL, - "abfss", ADLS_FILE_IO_IMPL); + "abfss", ADLS_FILE_IO_IMPL, + "wasb", ADLS_FILE_IO_IMPL, + "wasbs", ADLS_FILE_IO_IMPL); private final Map ioInstances = Maps.newConcurrentMap(); private final AtomicBoolean isClosed = new AtomicBoolean(false); diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java index cc42604f700d..b89595635314 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTSessionCatalog.java @@ -615,7 +615,7 @@ public List listNamespaces(SessionContext context, Namespace namespac Map queryParams = Maps.newHashMap(); if (!namespace.isEmpty()) { - queryParams.put("parent", RESTUtil.encodeNamespace(namespace)); + queryParams.put("parent", RESTUtil.NAMESPACE_JOINER.join(namespace.levels())); } ImmutableList.Builder namespaces = ImmutableList.builder(); diff --git a/core/src/main/java/org/apache/iceberg/rest/RESTUtil.java b/core/src/main/java/org/apache/iceberg/rest/RESTUtil.java index 45422b8ae8b5..fab01162cad7 100644 --- a/core/src/main/java/org/apache/iceberg/rest/RESTUtil.java +++ b/core/src/main/java/org/apache/iceberg/rest/RESTUtil.java @@ -33,24 +33,14 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; public class RESTUtil { + private static final char NAMESPACE_SEPARATOR = '\u001f'; + public static final Joiner NAMESPACE_JOINER = Joiner.on(NAMESPACE_SEPARATOR); + public static final Splitter NAMESPACE_SPLITTER = Splitter.on(NAMESPACE_SEPARATOR); private static final String NAMESPACE_ESCAPED_SEPARATOR = "%1F"; private static final Joiner NAMESPACE_ESCAPED_JOINER = Joiner.on(NAMESPACE_ESCAPED_SEPARATOR); private static final Splitter NAMESPACE_ESCAPED_SPLITTER = Splitter.on(NAMESPACE_ESCAPED_SEPARATOR); - /** - * @deprecated since 1.7.0, will be made private in 1.8.0; use {@link - * RESTUtil#encodeNamespace(Namespace)} instead. - */ - @Deprecated public static final Joiner NAMESPACE_JOINER = Joiner.on(NAMESPACE_ESCAPED_SEPARATOR); - - /** - * @deprecated since 1.7.0, will be made private in 1.8.0; use {@link - * RESTUtil#decodeNamespace(String)} instead. - */ - @Deprecated - public static final Splitter NAMESPACE_SPLITTER = Splitter.on(NAMESPACE_ESCAPED_SEPARATOR); - private RESTUtil() {} public static String stripTrailingSlash(String path) { diff --git a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java index 6477dfcd00eb..aa77b5ad10b6 100644 --- a/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java +++ b/core/src/test/java/org/apache/iceberg/rest/RESTCatalogAdapter.java @@ -298,7 +298,11 @@ public T handleRequest( if (asNamespaceCatalog != null) { Namespace ns; if (vars.containsKey("parent")) { - ns = RESTUtil.decodeNamespace(vars.get("parent")); + ns = + Namespace.of( + RESTUtil.NAMESPACE_SPLITTER + .splitToStream(vars.get("parent")) + .toArray(String[]::new)); } else { ns = Namespace.empty(); } diff --git a/kafka-connect/build.gradle b/kafka-connect/build.gradle index d38d01768e66..15bf013f28b2 100644 --- a/kafka-connect/build.gradle +++ b/kafka-connect/build.gradle @@ -73,6 +73,7 @@ project(':iceberg-kafka-connect:iceberg-kafka-connect-runtime') { force 'org.xerial.snappy:snappy-java:1.1.10.7' force 'org.apache.commons:commons-compress:1.27.1' force 'org.apache.hadoop.thirdparty:hadoop-shaded-guava:1.3.0' + force 'com.fasterxml.woodstox:woodstox-core:6.7.0' } } } @@ -96,7 +97,6 @@ project(':iceberg-kafka-connect:iceberg-kafka-connect-runtime') { exclude group: 'org.slf4j' exclude group: 'ch.qos.reload4j' exclude group: 'org.apache.avro', module: 'avro' - exclude group: 'com.fasterxml.woodstox' exclude group: 'com.google.guava' exclude group: 'com.google.protobuf' exclude group: 'org.apache.curator' @@ -105,7 +105,6 @@ project(':iceberg-kafka-connect:iceberg-kafka-connect-runtime') { exclude group: 'org.apache.hadoop', module: 'hadoop-auth' exclude group: 'org.apache.commons', module: 'commons-configuration2' exclude group: 'org.apache.hadoop.thirdparty', module: 'hadoop-shaded-protobuf_3_7' - exclude group: 'org.codehaus.woodstox' exclude group: 'org.eclipse.jetty' } implementation project(':iceberg-orc')