From f6870627b9ed0c866a8c7be98adb3a17faebc705 Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Tue, 30 Jul 2024 10:24:17 +0200 Subject: [PATCH 1/2] Add additional S3 FileIO Attributes --- crates/iceberg/src/io/storage.rs | 2 +- crates/iceberg/src/io/storage_s3.rs | 73 ++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/crates/iceberg/src/io/storage.rs b/crates/iceberg/src/io/storage.rs index d13ac04db..870e61ec6 100644 --- a/crates/iceberg/src/io/storage.rs +++ b/crates/iceberg/src/io/storage.rs @@ -54,7 +54,7 @@ impl Storage { #[cfg(feature = "storage-s3")] Scheme::S3 => Ok(Self::S3 { scheme_str, - config: super::s3_config_parse(props).into(), + config: super::s3_config_parse(props)?.into(), }), _ => Err(Error::new( ErrorKind::FeatureUnsupported, diff --git a/crates/iceberg/src/io/storage_s3.rs b/crates/iceberg/src/io/storage_s3.rs index acce18eed..a954aa203 100644 --- a/crates/iceberg/src/io/storage_s3.rs +++ b/crates/iceberg/src/io/storage_s3.rs @@ -16,6 +16,7 @@ // under the License. use std::collections::HashMap; +use std::str::FromStr; use opendal::services::S3Config; use opendal::Operator; @@ -32,9 +33,53 @@ pub const S3_ACCESS_KEY_ID: &str = "s3.access-key-id"; pub const S3_SECRET_ACCESS_KEY: &str = "s3.secret-access-key"; /// S3 region. pub const S3_REGION: &str = "s3.region"; +/// S3 Path Style Access. +pub const S3_PATH_STYLE_ACCESS: &str = "s3.path-style-access"; +/// S3 Server Side Encryption Type. +pub const S3_SSE_TYPE: &str = "s3.sse.type"; +/// S3 Server Side Encryption Key. +/// If S3 encryption type is kms, input is a KMS Key ID. +/// In case this property is not set, default key "aws/s3" is used. +/// If encryption type is custom, input is a custom base-64 AES256 symmetric key. +pub const S3_SSE_KEY: &str = "s3.sse.key"; +/// S3 Server Side Encryption MD5. +pub const S3_SSE_MD5: &str = "s3.sse.md5"; + +/// S3 Server Side Encryption types +#[derive(Debug, Clone, PartialEq, Hash)] +pub enum S3SSEType { + /// S3 SSE-C, using customer managed keys. https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html + Custom, + /// S3 SSE KMS, either using default or custom KMS key. https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html + KMS, + /// S3 SSE-S3 encryption (S3 managed keys). https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html + S3, + /// No Server Side Encryption + None, +} + +impl FromStr for S3SSEType { + type Err = Error; + + fn from_str(s: &str) -> Result { + match s { + "custom" => Ok(Self::Custom), + "kms" => Ok(Self::KMS), + "s3" => Ok(Self::S3), + "none" => Ok(Self::None), + _ => Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid {}: {}. Expected one of (custom, kms, s3, none)", + S3_SSE_TYPE, s + ), + )), + } + } +} /// Parse iceberg props to s3 config. -pub(crate) fn s3_config_parse(mut m: HashMap) -> S3Config { +pub(crate) fn s3_config_parse(mut m: HashMap) -> Result { let mut cfg = S3Config::default(); if let Some(endpoint) = m.remove(S3_ENDPOINT) { cfg.endpoint = Some(endpoint); @@ -48,8 +93,32 @@ pub(crate) fn s3_config_parse(mut m: HashMap) -> S3Config { if let Some(region) = m.remove(S3_REGION) { cfg.region = Some(region); }; + if let Some(path_style_access) = m.remove(S3_PATH_STYLE_ACCESS) { + if ["true", "True", "1"].contains(&path_style_access.as_str()) { + cfg.enable_virtual_host_style = true; + } + }; + let s3_sse_key = m.remove(S3_SSE_KEY); + if let Some(sse_type) = m.remove(S3_SSE_TYPE) { + let sse_type = sse_type.parse()?; + match sse_type { + S3SSEType::None => {} + S3SSEType::S3 => { + cfg.server_side_encryption = Some("AES256".to_string()); + } + S3SSEType::KMS => { + cfg.server_side_encryption = Some("aws:kms".to_string()); + cfg.server_side_encryption_aws_kms_key_id = s3_sse_key; + } + S3SSEType::Custom => { + cfg.server_side_encryption_customer_algorithm = Some("AES256".to_string()); + cfg.server_side_encryption_customer_key = s3_sse_key; + cfg.server_side_encryption_customer_key_md5 = m.remove(S3_SSE_MD5); + } + } + }; - cfg + Ok(cfg) } /// Build new opendal operator from give path. From 47a35426b2d47fe2cf907cad7bfa9e3554bcca15 Mon Sep 17 00:00:00 2001 From: Christian Thiel Date: Tue, 30 Jul 2024 10:49:06 +0200 Subject: [PATCH 2/2] Remove custom S3SSEType --- crates/iceberg/src/io/storage_s3.rs | 58 +++++++++-------------------- 1 file changed, 18 insertions(+), 40 deletions(-) diff --git a/crates/iceberg/src/io/storage_s3.rs b/crates/iceberg/src/io/storage_s3.rs index a954aa203..6e92b902a 100644 --- a/crates/iceberg/src/io/storage_s3.rs +++ b/crates/iceberg/src/io/storage_s3.rs @@ -16,7 +16,6 @@ // under the License. use std::collections::HashMap; -use std::str::FromStr; use opendal::services::S3Config; use opendal::Operator; @@ -45,39 +44,6 @@ pub const S3_SSE_KEY: &str = "s3.sse.key"; /// S3 Server Side Encryption MD5. pub const S3_SSE_MD5: &str = "s3.sse.md5"; -/// S3 Server Side Encryption types -#[derive(Debug, Clone, PartialEq, Hash)] -pub enum S3SSEType { - /// S3 SSE-C, using customer managed keys. https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html - Custom, - /// S3 SSE KMS, either using default or custom KMS key. https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html - KMS, - /// S3 SSE-S3 encryption (S3 managed keys). https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html - S3, - /// No Server Side Encryption - None, -} - -impl FromStr for S3SSEType { - type Err = Error; - - fn from_str(s: &str) -> Result { - match s { - "custom" => Ok(Self::Custom), - "kms" => Ok(Self::KMS), - "s3" => Ok(Self::S3), - "none" => Ok(Self::None), - _ => Err(Error::new( - ErrorKind::DataInvalid, - format!( - "Invalid {}: {}. Expected one of (custom, kms, s3, none)", - S3_SSE_TYPE, s - ), - )), - } - } -} - /// Parse iceberg props to s3 config. pub(crate) fn s3_config_parse(mut m: HashMap) -> Result { let mut cfg = S3Config::default(); @@ -100,21 +66,33 @@ pub(crate) fn s3_config_parse(mut m: HashMap) -> Result {} - S3SSEType::S3 => { + match sse_type.to_lowercase().as_str() { + // No Server Side Encryption + "none" => {} + // S3 SSE-S3 encryption (S3 managed keys). https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html + "s3" => { cfg.server_side_encryption = Some("AES256".to_string()); } - S3SSEType::KMS => { + // S3 SSE KMS, either using default or custom KMS key. https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html + "kms" => { cfg.server_side_encryption = Some("aws:kms".to_string()); cfg.server_side_encryption_aws_kms_key_id = s3_sse_key; } - S3SSEType::Custom => { + // S3 SSE-C, using customer managed keys. https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html + "custom" => { cfg.server_side_encryption_customer_algorithm = Some("AES256".to_string()); cfg.server_side_encryption_customer_key = s3_sse_key; cfg.server_side_encryption_customer_key_md5 = m.remove(S3_SSE_MD5); } + _ => { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid {}: {}. Expected one of (custom, kms, s3, none)", + S3_SSE_TYPE, sse_type + ), + )); + } } };