From 59a1114665590f785a94b9be125e90aac3c74ef6 Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Thu, 7 Jul 2022 14:00:31 +0300 Subject: [PATCH 01/15] airbyte-4184: Make S3 output filename configurable --- .../DatabricksDestinationConfig.java | 13 +- .../databricks/DatabricksStreamCopier.java | 4 + .../src/main/resources/spec.json | 9 ++ .../src/main/resources/spec.json | 9 ++ .../destination/s3/S3DestinationConfig.java | 91 +++++++++---- .../s3/S3DestinationConstants.java | 2 +- .../destination/s3/S3StorageOperations.java | 24 +++- .../destination/s3/avro/S3AvroWriter.java | 3 +- .../destination/s3/constant/S3Constants.java | 17 +++ .../destination/s3/csv/S3CsvWriter.java | 2 +- .../destination/s3/jsonl/S3JsonlWriter.java | 4 +- .../s3/parquet/S3ParquetWriter.java | 3 +- .../template/S3FilenameTemplateManager.java | 108 +++++++++++++++ .../S3FilenameTemplateParameterObject.java | 127 ++++++++++++++++++ .../destination/s3/writer/BaseS3Writer.java | 36 +++++ .../src/main/resources/spec.json | 9 ++ .../S3FilenameTemplateManagerTest.java | 77 +++++++++++ .../src/main/resources/spec.json | 9 ++ 18 files changed, 510 insertions(+), 37 deletions(-) create mode 100644 airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java create mode 100644 airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java create mode 100644 airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java create mode 100644 airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java index f84024acfb49..a6f4c21ed120 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java @@ -8,6 +8,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Preconditions; import io.airbyte.integrations.destination.s3.S3DestinationConfig; +import io.airbyte.integrations.destination.s3.constant.S3Constants; import io.airbyte.integrations.destination.s3.parquet.S3ParquetFormatConfig; /** @@ -60,12 +61,14 @@ public static DatabricksDestinationConfig get(final JsonNode config) { public static S3DestinationConfig getDataSource(final JsonNode dataSource) { return S3DestinationConfig.create( - dataSource.get("s3_bucket_name").asText(), - dataSource.get("s3_bucket_path").asText(), - dataSource.get("s3_bucket_region").asText()) + dataSource.get(S3Constants.S_3_BUCKET_NAME).asText(), + dataSource.get(S3Constants.S_3_BUCKET_PATH).asText(), + dataSource.get(S3Constants.S_3_BUCKET_REGION).asText()) + .withFileNamePattern( + dataSource.get(S3Constants.FILE_NAME_PATTERN).asText()) .withAccessKeyCredential( - dataSource.get("s3_access_key_id").asText(), - dataSource.get("s3_secret_access_key").asText()) + dataSource.get(S3Constants.ACCESS_KEY_ID).asText(), + dataSource.get(S3Constants.S_3_SECRET_ACCESS_KEY).asText()) .withFormatConfig(new S3ParquetFormatConfig(new ObjectMapper().createObjectNode())) .get(); } diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java index cc0b3a619637..95370e331e08 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java +++ b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksStreamCopier.java @@ -4,6 +4,8 @@ package io.airbyte.integrations.destination.databricks; +import static org.apache.logging.log4j.util.Strings.EMPTY; + import com.amazonaws.services.s3.AmazonS3; import com.fasterxml.jackson.databind.ObjectMapper; import io.airbyte.db.jdbc.JdbcDatabase; @@ -18,6 +20,7 @@ import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.DestinationSyncMode; import java.sql.Timestamp; +import java.util.Optional; import java.util.UUID; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -209,6 +212,7 @@ static S3DestinationConfig getStagingS3DestinationConfig(final S3DestinationConf return S3DestinationConfig.create(config) .withBucketPath(String.join("/", config.getBucketPath(), stagingFolder)) .withFormatConfig(new S3ParquetFormatConfig(MAPPER.createObjectNode())) + .withFileNamePattern(Optional.ofNullable(config.getFileNamePattern()).orElse(EMPTY)) .get(); } diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json index c7fc3a259393..e08d6d1a52ca 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-databricks/src/main/resources/spec.json @@ -148,6 +148,15 @@ "examples": ["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"], "airbyte_secret": true, "order": 6 + }, + "file_name_pattern": { + "type": "string", + "description": "The pattern allows you to set the file-name format for the S3 staging file(s)", + "title": "S3 Filename pattern (Optional)", + "examples": [ + "{date}", "{date:yyyy_MM}", "{timestamp}", "{part_number}", "{sync_id}" + ], + "order": 7 } } } diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-redshift/src/main/resources/spec.json index 85ff89946bb7..881fd57a0f2d 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-redshift/src/main/resources/spec.json @@ -121,6 +121,15 @@ "me-south-1" ] }, + "file_name_pattern": { + "type": "string", + "description": "The pattern allows you to set the file-name format for the S3 staging file(s)", + "title": "S3 Filename pattern (Optional)", + "examples": [ + "{date}", "{date:yyyy_MM}", "{timestamp}", "{part_number}", "{sync_id}" + ], + "order": 8 + }, "access_key_id": { "type": "string", "description": "This ID grants access to the above S3 staging bucket. Airbyte requires Read and Write permissions to the given bucket. See AWS docs on how to generate an access key ID and secret access key.", diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java index 7dd2950dd2b6..fd3b28362293 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java @@ -4,6 +4,15 @@ package io.airbyte.integrations.destination.s3; +import static io.airbyte.integrations.destination.s3.constant.S3Constants.ACCESS_KEY_ID; +import static io.airbyte.integrations.destination.s3.constant.S3Constants.FILE_NAME_PATTERN; +import static io.airbyte.integrations.destination.s3.constant.S3Constants.SECRET_ACCESS_KEY; +import static io.airbyte.integrations.destination.s3.constant.S3Constants.S_3_BUCKET_NAME; +import static io.airbyte.integrations.destination.s3.constant.S3Constants.S_3_BUCKET_PATH; +import static io.airbyte.integrations.destination.s3.constant.S3Constants.S_3_BUCKET_REGION; +import static io.airbyte.integrations.destination.s3.constant.S3Constants.S_3_ENDPOINT; +import static io.airbyte.integrations.destination.s3.constant.S3Constants.S_3_PATH_FORMAT; + import com.amazonaws.ClientConfiguration; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.client.builder.AwsClientBuilder; @@ -19,8 +28,7 @@ import org.slf4j.LoggerFactory; /** - * An S3 configuration. Typical usage sets at most one of {@code bucketPath} (necessary for more - * delicate data syncing to S3) + * An S3 configuration. Typical usage sets at most one of {@code bucketPath} (necessary for more delicate data syncing to S3) */ public class S3DestinationConfig { @@ -33,18 +41,37 @@ public class S3DestinationConfig { private final String pathFormat; private final S3CredentialConfig credentialConfig; private final S3FormatConfig formatConfig; + private String fileNamePattern; private final Object lock = new Object(); private AmazonS3 s3Client; public S3DestinationConfig(final String endpoint, - final String bucketName, - final String bucketPath, - final String bucketRegion, - final String pathFormat, - final S3CredentialConfig credentialConfig, - final S3FormatConfig formatConfig, - final AmazonS3 s3Client) { + final String bucketName, + final String bucketPath, + final String bucketRegion, + final String pathFormat, + final S3CredentialConfig credentialConfig, + final S3FormatConfig formatConfig, + final AmazonS3 s3Client) { + this.endpoint = endpoint; + this.bucketName = bucketName; + this.bucketPath = bucketPath; + this.bucketRegion = bucketRegion; + this.pathFormat = pathFormat; + this.credentialConfig = credentialConfig; + this.formatConfig = formatConfig; + this.s3Client = s3Client; + } + public S3DestinationConfig(final String endpoint, + final String bucketName, + final String bucketPath, + final String bucketRegion, + final String pathFormat, + final S3CredentialConfig credentialConfig, + final S3FormatConfig formatConfig, + final AmazonS3 s3Client, + final String fileNamePattern) { this.endpoint = endpoint; this.bucketName = bucketName; this.bucketPath = bucketPath; @@ -53,6 +80,7 @@ public S3DestinationConfig(final String endpoint, this.credentialConfig = credentialConfig; this.formatConfig = formatConfig; this.s3Client = s3Client; + this.fileNamePattern = fileNamePattern; } public static Builder create(final String bucketName, final String bucketPath, final String bucketRegion) { @@ -67,26 +95,30 @@ public static Builder create(final S3DestinationConfig config) { } public static S3DestinationConfig getS3DestinationConfig(final JsonNode config) { - Builder builder = S3DestinationConfig.create( - config.get("s3_bucket_name").asText(), + Builder builder = create( + config.get(S_3_BUCKET_NAME).asText(), "", - config.get("s3_bucket_region").asText()); + config.get(S_3_BUCKET_REGION).asText()); - if (config.has("s3_bucket_path")) { - builder = builder.withBucketPath(config.get("s3_bucket_path").asText()); + if (config.has(S_3_BUCKET_PATH)) { + builder = builder.withBucketPath(config.get(S_3_BUCKET_PATH).asText()); } - if (config.has("s3_path_format")) { - builder = builder.withPathFormat(config.get("s3_path_format").asText()); + if (config.has(FILE_NAME_PATTERN)) { + builder = builder.withFileNamePattern(config.get(FILE_NAME_PATTERN).asText()); } - if (config.has("s3_endpoint")) { - builder = builder.withEndpoint(config.get("s3_endpoint").asText()); + if (config.has(S_3_PATH_FORMAT)) { + builder = builder.withPathFormat(config.get(S_3_PATH_FORMAT).asText()); + } + + if (config.has(S_3_ENDPOINT)) { + builder = builder.withEndpoint(config.get(S_3_ENDPOINT).asText()); } final S3CredentialConfig credentialConfig; - if (config.has("access_key_id")) { - credentialConfig = new S3AccessKeyCredentialConfig(config.get("access_key_id").asText(), config.get("secret_access_key").asText()); + if (config.has(ACCESS_KEY_ID)) { + credentialConfig = new S3AccessKeyCredentialConfig(config.get(ACCESS_KEY_ID).asText(), config.get(SECRET_ACCESS_KEY).asText()); } else { credentialConfig = new S3AWSDefaultProfileCredentialConfig(); } @@ -121,6 +153,10 @@ public String getBucketRegion() { return bucketRegion; } + public String getFileNamePattern() { + return fileNamePattern; + } + public S3CredentialConfig getS3CredentialConfig() { return credentialConfig; } @@ -138,7 +174,7 @@ public AmazonS3 getS3Client() { } } - public AmazonS3 resetS3Client() { + AmazonS3 resetS3Client() { synchronized (lock) { if (s3Client != null) { s3Client.shutdown(); @@ -154,14 +190,14 @@ protected AmazonS3 createS3Client() { final AWSCredentialsProvider credentialsProvider = credentialConfig.getS3CredentialsProvider(); final S3CredentialType credentialType = credentialConfig.getCredentialType(); - if (credentialType == S3CredentialType.DEFAULT_PROFILE) { + if (S3CredentialType.DEFAULT_PROFILE == credentialType) { return AmazonS3ClientBuilder.standard() .withRegion(bucketRegion) .withCredentials(credentialsProvider) .build(); } - if (endpoint == null || endpoint.isEmpty()) { + if (null == endpoint || endpoint.isEmpty()) { return AmazonS3ClientBuilder.standard() .withCredentials(credentialsProvider) .withRegion(bucketRegion) @@ -210,6 +246,7 @@ public static class Builder { private S3CredentialConfig credentialConfig; private S3FormatConfig formatConfig; private AmazonS3 s3Client; + private String fileNamePattern; private Builder(final String bucketName, final String bucketPath, final String bucketRegion) { this.bucketName = bucketName; @@ -222,6 +259,11 @@ public Builder withBucketName(final String bucketName) { return this; } + public Builder withFileNamePattern(final String fileNamePattern) { + this.fileNamePattern = fileNamePattern; + return this; + } + public Builder withBucketPath(final String bucketPath) { this.bucketPath = bucketPath; return this; @@ -271,7 +313,8 @@ public S3DestinationConfig get() { pathFormat, credentialConfig, formatConfig, - s3Client); + s3Client, + fileNamePattern); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConstants.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConstants.java index 89641d9357ad..cb8610e055fc 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConstants.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConstants.java @@ -11,7 +11,7 @@ public final class S3DestinationConstants { public static final String YYYY_MM_DD_FORMAT_STRING = "yyyy_MM_dd"; public static final S3NameTransformer NAME_TRANSFORMER = new S3NameTransformer(); - public static final String DEFAULT_PATH_FORMAT = "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_"; + public static final String DEFAULT_PATH_FORMAT = "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_/"; // gzip compression for CSV and JSONL public static final String COMPRESSION_ARG_NAME = "compression"; diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java index 59a9fa92a9ea..fb21ed73dd22 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java @@ -18,6 +18,8 @@ import io.airbyte.commons.string.Strings; import io.airbyte.integrations.destination.NamingConventionTransformer; import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateManager; import io.airbyte.integrations.destination.s3.util.StreamTransferManagerFactory; import java.io.IOException; import java.io.InputStream; @@ -29,6 +31,7 @@ import java.util.UUID; import java.util.regex.Pattern; import org.apache.commons.io.FilenameUtils; +import org.apache.commons.lang3.StringUtils; import org.joda.time.DateTime; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,6 +40,8 @@ public class S3StorageOperations extends BlobStorageOperations { private static final Logger LOGGER = LoggerFactory.getLogger(S3StorageOperations.class); + private final S3FilenameTemplateManager s3FilenameTemplateManager = new S3FilenameTemplateManager(); + private static final int DEFAULT_UPLOAD_THREADS = 10; // The S3 cli uses 10 threads by default. private static final int DEFAULT_QUEUE_CAPACITY = DEFAULT_UPLOAD_THREADS; private static final int DEFAULT_PART_SIZE = 10; @@ -135,8 +140,23 @@ public String uploadRecordsToBucket(final SerializableBuffer recordsData, private String loadDataIntoBucket(final String objectPath, final SerializableBuffer recordsData) throws IOException { final long partSize = DEFAULT_PART_SIZE; final String bucket = s3Config.getBucketName(); - final String fullObjectKey = objectPath + getPartId(objectPath) + getExtension(recordsData.getFilename()); - + final String partId = getPartId(objectPath); + final String fileExtension = getExtension(recordsData.getFilename()); + final String fullObjectKey; + if (StringUtils.isNotBlank(s3Config.getFileNamePattern())) { + fullObjectKey = s3FilenameTemplateManager + .adaptFilenameAccordingSpecificationPatternWithDefaultConfig( + S3FilenameTemplateParameterObject + .builder() + .partId(partId) + .recordsData(recordsData) + .objectPath(objectPath) + .fileExtension(fileExtension) + .fileNamePattern(s3Config.getFileNamePattern()) + .build()); + } else { + fullObjectKey = objectPath + partId + fileExtension; + } final Map metadata = new HashMap<>(); for (final BlobDecorator blobDecorator : blobDecorators) { blobDecorator.updateMetadata(metadata, getMetadataMapping()); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java index 9eece89ed9f8..0f3e8db8368e 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java @@ -47,7 +47,8 @@ public S3AvroWriter(final S3DestinationConfig config, throws IOException { super(config, s3Client, configuredStream); - final String outputFilename = BaseS3Writer.getOutputFilename(uploadTimestamp, S3Format.AVRO); + final String outputFilename = determineOutputFilename(config, S3Format.AVRO, uploadTimestamp); + objectKey = String.join("/", outputPrefix, outputFilename); LOGGER.info("Full S3 path for stream '{}': s3://{}/{}", stream.getName(), config.getBucketName(), objectKey); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java new file mode 100644 index 000000000000..c6f8ca541dba --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java @@ -0,0 +1,17 @@ +package io.airbyte.integrations.destination.s3.constant; + +public final class S3Constants { + public static final String S_3_BUCKET_PATH = "s3_bucket_path"; + public static final String FILE_NAME_PATTERN = "file_name_pattern"; + public static final String S_3_PATH_FORMAT = "s3_path_format"; + public static final String S_3_ENDPOINT = "s3_endpoint"; + public static final String ACCESS_KEY_ID = "access_key_id"; + public static final String S_3_SECRET_ACCESS_KEY = "s3_secret_access_key"; + public static final String SECRET_ACCESS_KEY = "secret_access_key"; + public static final String S_3_BUCKET_NAME = "s3_bucket_name"; + public static final String S_3_BUCKET_REGION = "s3_bucket_region"; + + + private S3Constants() { + } +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java index cce2da71e33f..ec6ccbdcd3e6 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java @@ -52,7 +52,7 @@ private S3CsvWriter(final S3DestinationConfig config, this.csvSheetGenerator = csvSheetGenerator; final String fileSuffix = "_" + UUID.randomUUID(); - final String outputFilename = BaseS3Writer.getOutputFilename(uploadTimestamp, fileSuffix, S3Format.CSV); + final String outputFilename = determineOutputFilename(config, S3Format.CSV, fileSuffix, uploadTimestamp); this.objectKey = String.join("/", outputPrefix, outputFilename); LOGGER.info("Full S3 path for stream '{}': s3://{}/{}", stream.getName(), config.getBucketName(), diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java index b415100a4e77..1b79291f3365 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java @@ -43,10 +43,10 @@ public class S3JsonlWriter extends BaseS3Writer implements DestinationFileWriter public S3JsonlWriter(final S3DestinationConfig config, final AmazonS3 s3Client, final ConfiguredAirbyteStream configuredStream, - final Timestamp uploadTimestamp) { + final Timestamp uploadTimestamp) throws IOException { super(config, s3Client, configuredStream); - final String outputFilename = BaseS3Writer.getOutputFilename(uploadTimestamp, S3Format.JSONL); + final String outputFilename = determineOutputFilename(config, S3Format.JSONL, uploadTimestamp); objectKey = String.join("/", outputPrefix, outputFilename); LOGGER.info("Full S3 path for stream '{}': s3://{}/{}", stream.getName(), config.getBucketName(), objectKey); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java index 8f86f4041e04..370ca726cd4f 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java @@ -52,7 +52,8 @@ public S3ParquetWriter(final S3DestinationConfig config, throws URISyntaxException, IOException { super(config, s3Client, configuredStream); - this.outputFilename = BaseS3Writer.getOutputFilename(uploadTimestamp, S3Format.PARQUET); + outputFilename = determineOutputFilename(config, S3Format.PARQUET, uploadTimestamp); + objectKey = String.join("/", outputPrefix, outputFilename); LOGGER.info("Full S3 path for stream '{}': s3://{}/{}", stream.getName(), config.getBucketName(), objectKey); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java new file mode 100644 index 000000000000..f93292440ebd --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java @@ -0,0 +1,108 @@ +package io.airbyte.integrations.destination.s3.template; + +import static java.util.Optional.ofNullable; +import static org.apache.commons.lang3.StringUtils.EMPTY; + +import io.airbyte.integrations.destination.s3.S3DestinationConstants; +import java.io.IOException; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.time.Instant; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.TimeZone; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import org.apache.commons.text.StringSubstitutor; +import org.apache.commons.text.lookup.StringLookupFactory; + +/** + * This class is responsible for building the filename template based on user input, + * see file_name_pattern in the specification of connector + * currently supported only S3 staging. + */ +public class S3FilenameTemplateManager { + + private static final String UTC = "UTC"; + private final StringSubstitutor stringSubstitutor; + + public S3FilenameTemplateManager() { + stringSubstitutor = new StringSubstitutor(); + } + + public String adaptFilenameAccordingSpecificationPatternWithDefaultConfig(final S3FilenameTemplateParameterObject parameterObject) throws IOException { + //sanitize fileFormat + final String sanitizedFileFormat = parameterObject + .getFileNamePattern() + .trim() + .replaceAll(" ", "_"); + + stringSubstitutor.setVariableResolver( + StringLookupFactory.INSTANCE.mapStringLookup(fillTheMapWithDefaultPlaceHolders(sanitizedFileFormat, parameterObject)) + ); + stringSubstitutor.setVariablePrefix("{"); + stringSubstitutor.setVariableSuffix("}"); + return ofNullable(parameterObject.getObjectPath()).orElse(EMPTY) + stringSubstitutor.replace(sanitizedFileFormat); + } + + private Map fillTheMapWithDefaultPlaceHolders(final String stringToReplaceWithPlaceholder, final S3FilenameTemplateParameterObject parameterObject) { + + final long currentTimeMillis = Instant.now().toEpochMilli(); + + final Map valuesMap = processExtendedPlaceholder(currentTimeMillis, stringToReplaceWithPlaceholder); + + final DateFormat defaultDateFormat = new SimpleDateFormat(S3DestinationConstants.YYYY_MM_DD_FORMAT_STRING); + defaultDateFormat.setTimeZone(TimeZone.getTimeZone(UTC)); + + // here we set default values for supported placeholders. + valuesMap.put("date", ofNullable(defaultDateFormat.format(currentTimeMillis)).orElse(EMPTY)); + valuesMap.put("timestamp", ofNullable(String.valueOf(currentTimeMillis)).orElse(EMPTY)); + valuesMap.put("sync_id", ofNullable(System.getenv("WORKER_JOB_ID")).orElse(EMPTY)); + valuesMap.put("format_extension", ofNullable(parameterObject.getFileExtension()).orElse(EMPTY)); + valuesMap.put("part_number", ofNullable(parameterObject.getPartId()).orElse(EMPTY)); + + return valuesMap; + } + + /** + * By extended placeholders we assume next types: {date:yyyy_MM}, {timestamp:millis}, {timestamp:micro}, etc Limited combinations are supported by the method see the method body. + * + * @param stringToReplaceWithPlaceholder - string where the method will search for extended placeholders + * @return map with prepared placeholders. + */ + private Map processExtendedPlaceholder(final long currentTimeMillis, final String stringToReplaceWithPlaceholder) { + final Map valuesMap = new HashMap<>(); + + final Pattern pattern = Pattern.compile("\\{(date:.+?|timestamp:.+?)\\}"); + final Matcher matcher = pattern.matcher(stringToReplaceWithPlaceholder); + + + while (matcher.find()) { + final String[] splitByColon = matcher.group(1).split(":"); + switch (splitByColon[0].toLowerCase(Locale.ROOT)) { + case "date" -> { + final DateFormat dateFormat = new SimpleDateFormat(splitByColon[1]); + dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); + valuesMap.put(matcher.group(1), dateFormat.format(currentTimeMillis)); + } + case "timestamp" -> { + switch (splitByColon[1]) { + case "millis" -> { + valuesMap.put(matcher.group(1), String.valueOf(currentTimeMillis)); + } + case "micro" -> { + valuesMap.put(matcher.group(1), String.valueOf(convertToMicrosecondsRepresentation(currentTimeMillis))); + } + } + } + } + } + return valuesMap; + } + + private long convertToMicrosecondsRepresentation(final long milliSeconds) { + // The time representation in microseconds is equal to the milliseconds multiplied by 1,000. + return milliSeconds * 1000; + } +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java new file mode 100644 index 000000000000..c21ca4717308 --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java @@ -0,0 +1,127 @@ +package io.airbyte.integrations.destination.s3.template; + +import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; +import io.airbyte.integrations.destination.s3.S3Format; +import java.util.Objects; + +/** + * This class is used as argument holder S3FilenameTemplateManager.class + * @see S3FilenameTemplateManager#adaptFilenameAccordingSpecificationPatternWithDefaultConfig(S3FilenameTemplateParameterObject) + */ +public class S3FilenameTemplateParameterObject { + + private final String objectPath; + private final SerializableBuffer recordsData; + private final String fileNamePattern; + private final String fileExtension; + private final String partId; + private final S3Format s3Format; + + S3FilenameTemplateParameterObject(final String objectPath, final SerializableBuffer recordsData, final String fileNamePattern, final String fileExtension, final String partId, final S3Format s3Format) { + this.objectPath = objectPath; + this.recordsData = recordsData; + this.fileNamePattern = fileNamePattern; + this.fileExtension = fileExtension; + this.partId = partId; + this.s3Format = s3Format; + } + + public static FilenameTemplateParameterObjectBuilder builder() { + return new FilenameTemplateParameterObjectBuilder(); + } + + + public String getObjectPath() { + return objectPath; + } + + public SerializableBuffer getRecordsData() { + return recordsData; + } + + public String getFileNamePattern() { + return fileNamePattern; + } + + public String getFileExtension() { + return fileExtension; + } + + public String getPartId() { + return partId; + } + + public S3Format getS3Format() { + return s3Format; + } + + public static class FilenameTemplateParameterObjectBuilder { + + private String objectPath; + private SerializableBuffer recordsData; + private String fileNamePattern; + private String fileExtension; + private String partId; + private S3Format s3Format; + + FilenameTemplateParameterObjectBuilder() { + } + + public FilenameTemplateParameterObjectBuilder objectPath(final String objectPath) { + this.objectPath = objectPath; + return this; + } + + public FilenameTemplateParameterObjectBuilder recordsData(final SerializableBuffer recordsData) { + this.recordsData = recordsData; + return this; + } + + public FilenameTemplateParameterObjectBuilder fileNamePattern(final String fileNamePattern) { + this.fileNamePattern = fileNamePattern; + return this; + } + + public FilenameTemplateParameterObjectBuilder fileExtension(final String fileExtension) { + this.fileExtension = fileExtension; + return this; + } + + public FilenameTemplateParameterObjectBuilder partId(final String partId) { + this.partId = partId; + return this; + } + + public FilenameTemplateParameterObjectBuilder s3Format(final S3Format s3Format) { + this.s3Format = s3Format; + return this; + } + + public S3FilenameTemplateParameterObject build() { + return new S3FilenameTemplateParameterObject(objectPath, recordsData, fileNamePattern, fileExtension, partId, s3Format); + } + + public String toString() { + return "FilenameTemplateParameterObject.FilenameTemplateParameterObjectBuilder(objectPath=" + objectPath + ", recordsData=" + recordsData + ", fileNamePattern=" + fileNamePattern + + ", fileExtension=" + fileExtension + ", partId=" + partId + ", s3Format=" + s3Format + ")"; + } + } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final S3FilenameTemplateParameterObject that = (S3FilenameTemplateParameterObject) o; + return Objects.equals(objectPath, that.objectPath) && Objects.equals(recordsData, that.recordsData) && Objects.equals(fileNamePattern, that.fileNamePattern) + && Objects.equals(fileExtension, that.fileExtension) && Objects.equals(partId, that.partId) && s3Format == that.s3Format; + } + + @Override + public int hashCode() { + return Objects.hash(objectPath, recordsData, fileNamePattern, fileExtension, partId, s3Format); + } +} \ No newline at end of file diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java index 7d0fcda79bd4..57dcf35a0024 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java @@ -12,6 +12,8 @@ import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3DestinationConstants; import io.airbyte.integrations.destination.s3.S3Format; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateManager; import io.airbyte.integrations.destination.s3.util.S3OutputPathHelper; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.ConfiguredAirbyteStream; @@ -23,6 +25,7 @@ import java.util.LinkedList; import java.util.List; import java.util.TimeZone; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -37,6 +40,8 @@ public abstract class BaseS3Writer implements DestinationFileWriter { private static final Logger LOGGER = LoggerFactory.getLogger(BaseS3Writer.class); + + private static final S3FilenameTemplateManager s3FilenameTemplateManager = new S3FilenameTemplateManager(); private static final String DEFAULT_SUFFIX = "_0"; protected final S3DestinationConfig config; @@ -130,6 +135,34 @@ protected void closeWhenFail() throws IOException { // Do nothing by default } + protected String determineOutputFilename(final S3DestinationConfig config, final S3Format s3Format, final Timestamp uploadTimestamp) throws IOException { + final String outputFilename; + if (StringUtils.isNotBlank(config.getFileNamePattern())){ + outputFilename = getOutputFilename(S3FilenameTemplateParameterObject + .builder() + .fileExtension(s3Format.getFileExtension()) + .fileNamePattern(config.getFileNamePattern()) + .build()); + } else { + outputFilename = getOutputFilename(uploadTimestamp, s3Format); + } + return outputFilename; + } + + protected String determineOutputFilename(final S3DestinationConfig config, final S3Format s3Format, final String suffix, final Timestamp uploadTimestamp) throws IOException { + final String outputFilename; + if (StringUtils.isNotBlank(config.getFileNamePattern())){ + outputFilename = getOutputFilename(S3FilenameTemplateParameterObject + .builder() + .fileExtension(s3Format.getFileExtension()) + .fileNamePattern(config.getFileNamePattern()) + .build()); + } else { + outputFilename = getOutputFilename(uploadTimestamp, suffix, s3Format); + } + return outputFilename; + } + /** * @return A string in the format "{upload-date}_{upload-millis}_0.{format-extension}". For example, * "2021_12_09_1639077474000_0.csv" @@ -156,4 +189,7 @@ public static String getOutputFilename(final Timestamp timestamp, final String c format.getFileExtension()); } + public static String getOutputFilename(final S3FilenameTemplateParameterObject parameterObject) throws IOException { + return s3FilenameTemplateManager.adaptFilenameAccordingSpecificationPatternWithDefaultConfig(parameterObject); + } } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json index 01c8f64a932a..7282ec6557c5 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-s3/src/main/resources/spec.json @@ -371,6 +371,15 @@ "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_" ], "order": 7 + }, + "file_name_pattern": { + "type": "string", + "description": "The pattern allows you to set the file-name format for the S3 staging file(s)", + "title": "S3 Filename pattern (Optional)", + "examples": [ + "{date}", "{date:yyyy_MM}", "{timestamp}", "{part_number}", "{sync_id}" + ], + "order": 8 } } } diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java new file mode 100644 index 000000000000..61c1eea1a7ac --- /dev/null +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java @@ -0,0 +1,77 @@ +package io.airbyte.integrations.destination.s3.tamplate; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.mockito.Mockito.mockStatic; + +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateManager; +import java.io.IOException; +import java.time.Clock; +import java.time.Instant; +import java.time.LocalDate; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; +import org.mockito.MockedStatic; + +class S3FilenameTemplateManagerTest { + + private final S3FilenameTemplateManager s3FilenameTemplateManager = new S3FilenameTemplateManager(); + + @Test + @DisplayName("Should replace the date placeholder with the current date in the format YYYY-MM-DD") + void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceDatePlaceholderWithCurrentDateInTheFormatYYYY_MM_DD() throws IOException { + final String fileNamePattern = "test-{date}"; + final String fileExtension = "csv"; + final String partId = "1"; + + final String actual = s3FilenameTemplateManager + .adaptFilenameAccordingSpecificationPatternWithDefaultConfig(S3FilenameTemplateParameterObject + .builder() + .objectPath("") + .fileNamePattern(fileNamePattern) + .fileExtension(fileExtension) + .partId(partId).build()); + + final String expected = "test-" + LocalDate.now().format(DateTimeFormatter.ofPattern("yyyy_MM_dd")); + assertEquals(expected, actual); + } + + @Test + @DisplayName("Should replace the timestamp placeholder with the current timestamp in milliseconds") + void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceTimestampPlaceholderWithCurrentTimestampInMilliseconds() throws IOException { + final String fileNamePattern = "test-{timestamp}.csv"; + + final Clock clock = Clock.fixed(Instant.ofEpochMilli(1657110148000L), ZoneId.of("UTC")); + final Instant instant = Instant.now(clock); + + try (final MockedStatic mocked = mockStatic(Instant.class)) { + mocked.when(Instant::now).thenReturn(instant); + final String actual = s3FilenameTemplateManager + .adaptFilenameAccordingSpecificationPatternWithDefaultConfig(S3FilenameTemplateParameterObject.builder() + .objectPath("") + .fileNamePattern(fileNamePattern) + .fileExtension("csv") + .partId("1") + .build()); + + assertEquals("test-1657110148000.csv", actual); + } + } + + @Test + @DisplayName("Should sanitize the string and adapt it to applicable S3 format") + void testIfFilenameTemplateStringWasSanitized() throws IOException { + final String fileNamePattern = " te st.csv "; + final String actual = s3FilenameTemplateManager + .adaptFilenameAccordingSpecificationPatternWithDefaultConfig(S3FilenameTemplateParameterObject.builder() + .objectPath("") + .fileNamePattern(fileNamePattern) + .fileExtension("csv") + .partId("1") + .build()); + + assertEquals("te__st.csv", actual); + } +} diff --git a/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json b/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json index 9603ae207ace..898a62e64936 100644 --- a/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json +++ b/airbyte-integrations/connectors/destination-snowflake/src/main/resources/spec.json @@ -279,6 +279,15 @@ } } ] + }, + "file_name_pattern": { + "type": "string", + "description": "The pattern allows you to set the file-name format for the S3 staging file(s)", + "title": "S3 Filename pattern (Optional)", + "examples": [ + "{date}", "{date:yyyy_MM}", "{timestamp}", "{part_number}", "{sync_id}" + ], + "order": 7 } } }, From f6624b4ecc6b9f6eb86713e6ad0e608f35a4053e Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Thu, 7 Jul 2022 17:31:33 +0300 Subject: [PATCH 02/15] airbyte-4184: Make S3 output filename configurable --- .../databricks/DatabricksDestinationConfig.java | 17 +++++++++-------- .../destination/s3/S3DestinationConstants.java | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java index a6f4c21ed120..0bdd76992030 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java @@ -60,17 +60,18 @@ public static DatabricksDestinationConfig get(final JsonNode config) { } public static S3DestinationConfig getDataSource(final JsonNode dataSource) { - return S3DestinationConfig.create( - dataSource.get(S3Constants.S_3_BUCKET_NAME).asText(), - dataSource.get(S3Constants.S_3_BUCKET_PATH).asText(), - dataSource.get(S3Constants.S_3_BUCKET_REGION).asText()) - .withFileNamePattern( - dataSource.get(S3Constants.FILE_NAME_PATTERN).asText()) + final S3DestinationConfig.Builder builder = S3DestinationConfig.create( + dataSource.get(S3Constants.S_3_BUCKET_NAME).asText(), + dataSource.get(S3Constants.S_3_BUCKET_PATH).asText(), + dataSource.get(S3Constants.S_3_BUCKET_REGION).asText()) .withAccessKeyCredential( dataSource.get(S3Constants.ACCESS_KEY_ID).asText(), dataSource.get(S3Constants.S_3_SECRET_ACCESS_KEY).asText()) - .withFormatConfig(new S3ParquetFormatConfig(new ObjectMapper().createObjectNode())) - .get(); + .withFormatConfig(new S3ParquetFormatConfig(new ObjectMapper().createObjectNode())); + if (dataSource.has(S3Constants.FILE_NAME_PATTERN)) { + builder.withFileNamePattern(dataSource.get(S3Constants.FILE_NAME_PATTERN).asText()); + } + return builder.get(); } public String getDatabricksServerHostname() { diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConstants.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConstants.java index cb8610e055fc..89641d9357ad 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConstants.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConstants.java @@ -11,7 +11,7 @@ public final class S3DestinationConstants { public static final String YYYY_MM_DD_FORMAT_STRING = "yyyy_MM_dd"; public static final S3NameTransformer NAME_TRANSFORMER = new S3NameTransformer(); - public static final String DEFAULT_PATH_FORMAT = "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_/"; + public static final String DEFAULT_PATH_FORMAT = "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_"; // gzip compression for CSV and JSONL public static final String COMPRESSION_ARG_NAME = "compression"; From 844779c265f036a99db878003fadd049b07ef777 Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Thu, 7 Jul 2022 18:29:10 +0300 Subject: [PATCH 03/15] airbyte-4184: Make S3 output filename configurable --- .../destination/databricks/DatabricksDestinationConfig.java | 2 +- .../integrations/destination/s3/constant/S3Constants.java | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java index 0bdd76992030..9ad34cc06e15 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java @@ -65,7 +65,7 @@ public static S3DestinationConfig getDataSource(final JsonNode dataSource) { dataSource.get(S3Constants.S_3_BUCKET_PATH).asText(), dataSource.get(S3Constants.S_3_BUCKET_REGION).asText()) .withAccessKeyCredential( - dataSource.get(S3Constants.ACCESS_KEY_ID).asText(), + dataSource.get(S3Constants.S_3_ACCESS_KEY_ID).asText(), dataSource.get(S3Constants.S_3_SECRET_ACCESS_KEY).asText()) .withFormatConfig(new S3ParquetFormatConfig(new ObjectMapper().createObjectNode())); if (dataSource.has(S3Constants.FILE_NAME_PATTERN)) { diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java index c6f8ca541dba..32263c48778e 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java @@ -6,6 +6,7 @@ public final class S3Constants { public static final String S_3_PATH_FORMAT = "s3_path_format"; public static final String S_3_ENDPOINT = "s3_endpoint"; public static final String ACCESS_KEY_ID = "access_key_id"; + public static final String S_3_ACCESS_KEY_ID = "s3_access_key_id"; public static final String S_3_SECRET_ACCESS_KEY = "s3_secret_access_key"; public static final String SECRET_ACCESS_KEY = "secret_access_key"; public static final String S_3_BUCKET_NAME = "s3_bucket_name"; From b4b4b0c1f1e720574c238fe76376013376480955 Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Mon, 11 Jul 2022 12:41:52 +0300 Subject: [PATCH 04/15] airbyte-4184: Run reformat. --- .../DatabricksDestinationConfig.java | 6 ++-- .../RedshiftStagingS3Destination.java | 11 +++--- ...dshiftInsertDestinationAcceptanceTest.java | 1 - .../redshift/util/RedshiftUtilTest.java | 7 +++- .../destination/s3/S3DestinationConfig.java | 36 ++++++++++--------- .../destination/s3/S3StorageOperations.java | 16 ++++----- .../destination/s3/constant/S3Constants.java | 8 +++-- .../destination/s3/jsonl/S3JsonlWriter.java | 3 +- .../template/S3FilenameTemplateManager.java | 28 +++++++++------ .../S3FilenameTemplateParameterObject.java | 26 ++++++++++---- .../destination/s3/writer/BaseS3Writer.java | 18 ++++++---- .../S3FilenameTemplateManagerTest.java | 13 +++++-- 12 files changed, 108 insertions(+), 65 deletions(-) diff --git a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java index 9ad34cc06e15..5e7446392f04 100644 --- a/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java +++ b/airbyte-integrations/connectors/destination-databricks/src/main/java/io/airbyte/integrations/destination/databricks/DatabricksDestinationConfig.java @@ -61,9 +61,9 @@ public static DatabricksDestinationConfig get(final JsonNode config) { public static S3DestinationConfig getDataSource(final JsonNode dataSource) { final S3DestinationConfig.Builder builder = S3DestinationConfig.create( - dataSource.get(S3Constants.S_3_BUCKET_NAME).asText(), - dataSource.get(S3Constants.S_3_BUCKET_PATH).asText(), - dataSource.get(S3Constants.S_3_BUCKET_REGION).asText()) + dataSource.get(S3Constants.S_3_BUCKET_NAME).asText(), + dataSource.get(S3Constants.S_3_BUCKET_PATH).asText(), + dataSource.get(S3Constants.S_3_BUCKET_REGION).asText()) .withAccessKeyCredential( dataSource.get(S3Constants.S_3_ACCESS_KEY_ID).asText(), dataSource.get(S3Constants.S_3_SECRET_ACCESS_KEY).asText()) diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java index 5dd6d2313adb..bdd76264994a 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java @@ -59,15 +59,16 @@ private boolean isEphemeralKeysAndPurgingStagingData(JsonNode config, Encryption @Override public AirbyteConnectionStatus check(final JsonNode config) { final S3DestinationConfig s3Config = getS3DestinationConfig(findS3Options(config)); - final EncryptionConfig encryptionConfig = config.has("uploading_method") ? - EncryptionConfig.fromJson(config.get("uploading_method").get("encryption")) : new NoEncryption(); + final EncryptionConfig encryptionConfig = + config.has("uploading_method") ? EncryptionConfig.fromJson(config.get("uploading_method").get("encryption")) : new NoEncryption(); if (isEphemeralKeysAndPurgingStagingData(config, encryptionConfig)) { return new AirbyteConnectionStatus() .withStatus(Status.FAILED) .withMessage( "You cannot use ephemeral keys and disable purging your staging data. This would produce S3 objects that you cannot decrypt."); } - S3Destination.attemptS3WriteAndDelete(new S3StorageOperations(new RedshiftSQLNameTransformer(), s3Config.getS3Client(), s3Config), s3Config, s3Config.getBucketPath()); + S3Destination.attemptS3WriteAndDelete(new S3StorageOperations(new RedshiftSQLNameTransformer(), s3Config.getS3Client(), s3Config), s3Config, + s3Config.getBucketPath()); final NamingConventionTransformer nameTransformer = getNamingResolver(); final RedshiftS3StagingSqlOperations redshiftS3StagingSqlOperations = @@ -124,8 +125,8 @@ public JsonNode toJdbcConfig(final JsonNode config) { public AirbyteMessageConsumer getConsumer(final JsonNode config, final ConfiguredAirbyteCatalog catalog, final Consumer outputRecordCollector) { - final EncryptionConfig encryptionConfig = config.has("uploading_method") ? - EncryptionConfig.fromJson(config.get("uploading_method").get("encryption")) : new NoEncryption(); + final EncryptionConfig encryptionConfig = + config.has("uploading_method") ? EncryptionConfig.fromJson(config.get("uploading_method").get("encryption")) : new NoEncryption(); final JsonNode s3Options = findS3Options(config); final S3DestinationConfig s3Config = getS3DestinationConfig(s3Options); return new StagingConsumerFactory().create( diff --git a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestinationAcceptanceTest.java index 4e2268540f53..8035bb854721 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftInsertDestinationAcceptanceTest.java @@ -78,7 +78,6 @@ void setup() { .withDestinationSyncMode(DestinationSyncMode.APPEND))); } - @Test void testIfSuperTmpTableWasCreatedAfterVarcharTmpTable() throws Exception { setup(); diff --git a/airbyte-integrations/connectors/destination-redshift/src/test/java/io/airbyte/integrations/destination/redshift/util/RedshiftUtilTest.java b/airbyte-integrations/connectors/destination-redshift/src/test/java/io/airbyte/integrations/destination/redshift/util/RedshiftUtilTest.java index f5e167cc7668..163b7d249b8f 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/test/java/io/airbyte/integrations/destination/redshift/util/RedshiftUtilTest.java +++ b/airbyte-integrations/connectors/destination-redshift/src/test/java/io/airbyte/integrations/destination/redshift/util/RedshiftUtilTest.java @@ -1,3 +1,7 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.destination.redshift.util; import static io.airbyte.integrations.destination.redshift.constants.RedshiftDestinationConstants.UPLOADING_METHOD; @@ -64,4 +68,5 @@ public void testAllS3RequiredAreNotNullOrEmptyThenReturnFalse() { assertFalse(RedshiftUtil.anyOfS3FieldsAreNullOrEmpty(jsonNode)); } -} \ No newline at end of file + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java index fd3b28362293..b8536b5596d4 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3DestinationConfig.java @@ -19,16 +19,17 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.fasterxml.jackson.databind.JsonNode; +import io.airbyte.integrations.destination.s3.credential.S3AWSDefaultProfileCredentialConfig; import io.airbyte.integrations.destination.s3.credential.S3AccessKeyCredentialConfig; import io.airbyte.integrations.destination.s3.credential.S3CredentialConfig; import io.airbyte.integrations.destination.s3.credential.S3CredentialType; -import io.airbyte.integrations.destination.s3.credential.S3AWSDefaultProfileCredentialConfig; import java.util.Objects; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** - * An S3 configuration. Typical usage sets at most one of {@code bucketPath} (necessary for more delicate data syncing to S3) + * An S3 configuration. Typical usage sets at most one of {@code bucketPath} (necessary for more + * delicate data syncing to S3) */ public class S3DestinationConfig { @@ -47,13 +48,13 @@ public class S3DestinationConfig { private AmazonS3 s3Client; public S3DestinationConfig(final String endpoint, - final String bucketName, - final String bucketPath, - final String bucketRegion, - final String pathFormat, - final S3CredentialConfig credentialConfig, - final S3FormatConfig formatConfig, - final AmazonS3 s3Client) { + final String bucketName, + final String bucketPath, + final String bucketRegion, + final String pathFormat, + final S3CredentialConfig credentialConfig, + final S3FormatConfig formatConfig, + final AmazonS3 s3Client) { this.endpoint = endpoint; this.bucketName = bucketName; this.bucketPath = bucketPath; @@ -63,15 +64,16 @@ public S3DestinationConfig(final String endpoint, this.formatConfig = formatConfig; this.s3Client = s3Client; } + public S3DestinationConfig(final String endpoint, - final String bucketName, - final String bucketPath, - final String bucketRegion, - final String pathFormat, - final S3CredentialConfig credentialConfig, - final S3FormatConfig formatConfig, - final AmazonS3 s3Client, - final String fileNamePattern) { + final String bucketName, + final String bucketPath, + final String bucketRegion, + final String pathFormat, + final S3CredentialConfig credentialConfig, + final S3FormatConfig formatConfig, + final AmazonS3 s3Client, + final String fileNamePattern) { this.endpoint = endpoint; this.bucketName = bucketName; this.bucketPath = bucketPath; diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java index fb21ed73dd22..417ea558598c 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java @@ -18,8 +18,8 @@ import io.airbyte.commons.string.Strings; import io.airbyte.integrations.destination.NamingConventionTransformer; import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; -import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateManager; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.util.StreamTransferManagerFactory; import java.io.IOException; import java.io.InputStream; @@ -147,13 +147,13 @@ private String loadDataIntoBucket(final String objectPath, final SerializableBuf fullObjectKey = s3FilenameTemplateManager .adaptFilenameAccordingSpecificationPatternWithDefaultConfig( S3FilenameTemplateParameterObject - .builder() - .partId(partId) - .recordsData(recordsData) - .objectPath(objectPath) - .fileExtension(fileExtension) - .fileNamePattern(s3Config.getFileNamePattern()) - .build()); + .builder() + .partId(partId) + .recordsData(recordsData) + .objectPath(objectPath) + .fileExtension(fileExtension) + .fileNamePattern(s3Config.getFileNamePattern()) + .build()); } else { fullObjectKey = objectPath + partId + fileExtension; } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java index 32263c48778e..69020576bf75 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/constant/S3Constants.java @@ -1,6 +1,11 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.destination.s3.constant; public final class S3Constants { + public static final String S_3_BUCKET_PATH = "s3_bucket_path"; public static final String FILE_NAME_PATTERN = "file_name_pattern"; public static final String S_3_PATH_FORMAT = "s3_path_format"; @@ -12,7 +17,6 @@ public final class S3Constants { public static final String S_3_BUCKET_NAME = "s3_bucket_name"; public static final String S_3_BUCKET_REGION = "s3_bucket_region"; + private S3Constants() {} - private S3Constants() { - } } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java index 1b79291f3365..7f1738fc646a 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java @@ -43,7 +43,8 @@ public class S3JsonlWriter extends BaseS3Writer implements DestinationFileWriter public S3JsonlWriter(final S3DestinationConfig config, final AmazonS3 s3Client, final ConfiguredAirbyteStream configuredStream, - final Timestamp uploadTimestamp) throws IOException { + final Timestamp uploadTimestamp) + throws IOException { super(config, s3Client, configuredStream); final String outputFilename = determineOutputFilename(config, S3Format.JSONL, uploadTimestamp); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java index f93292440ebd..3afb86b8753e 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java @@ -1,3 +1,7 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.destination.s3.template; import static java.util.Optional.ofNullable; @@ -18,9 +22,8 @@ import org.apache.commons.text.lookup.StringLookupFactory; /** - * This class is responsible for building the filename template based on user input, - * see file_name_pattern in the specification of connector - * currently supported only S3 staging. + * This class is responsible for building the filename template based on user input, see + * file_name_pattern in the specification of connector currently supported only S3 staging. */ public class S3FilenameTemplateManager { @@ -31,22 +34,23 @@ public S3FilenameTemplateManager() { stringSubstitutor = new StringSubstitutor(); } - public String adaptFilenameAccordingSpecificationPatternWithDefaultConfig(final S3FilenameTemplateParameterObject parameterObject) throws IOException { - //sanitize fileFormat + public String adaptFilenameAccordingSpecificationPatternWithDefaultConfig(final S3FilenameTemplateParameterObject parameterObject) + throws IOException { + // sanitize fileFormat final String sanitizedFileFormat = parameterObject .getFileNamePattern() .trim() .replaceAll(" ", "_"); stringSubstitutor.setVariableResolver( - StringLookupFactory.INSTANCE.mapStringLookup(fillTheMapWithDefaultPlaceHolders(sanitizedFileFormat, parameterObject)) - ); + StringLookupFactory.INSTANCE.mapStringLookup(fillTheMapWithDefaultPlaceHolders(sanitizedFileFormat, parameterObject))); stringSubstitutor.setVariablePrefix("{"); stringSubstitutor.setVariableSuffix("}"); return ofNullable(parameterObject.getObjectPath()).orElse(EMPTY) + stringSubstitutor.replace(sanitizedFileFormat); } - private Map fillTheMapWithDefaultPlaceHolders(final String stringToReplaceWithPlaceholder, final S3FilenameTemplateParameterObject parameterObject) { + private Map fillTheMapWithDefaultPlaceHolders(final String stringToReplaceWithPlaceholder, + final S3FilenameTemplateParameterObject parameterObject) { final long currentTimeMillis = Instant.now().toEpochMilli(); @@ -66,9 +70,11 @@ private Map fillTheMapWithDefaultPlaceHolders(final String strin } /** - * By extended placeholders we assume next types: {date:yyyy_MM}, {timestamp:millis}, {timestamp:micro}, etc Limited combinations are supported by the method see the method body. + * By extended placeholders we assume next types: {date:yyyy_MM}, {timestamp:millis}, + * {timestamp:micro}, etc Limited combinations are supported by the method see the method body. * - * @param stringToReplaceWithPlaceholder - string where the method will search for extended placeholders + * @param stringToReplaceWithPlaceholder - string where the method will search for extended + * placeholders * @return map with prepared placeholders. */ private Map processExtendedPlaceholder(final long currentTimeMillis, final String stringToReplaceWithPlaceholder) { @@ -77,7 +83,6 @@ private Map processExtendedPlaceholder(final long currentTimeMil final Pattern pattern = Pattern.compile("\\{(date:.+?|timestamp:.+?)\\}"); final Matcher matcher = pattern.matcher(stringToReplaceWithPlaceholder); - while (matcher.find()) { final String[] splitByColon = matcher.group(1).split(":"); switch (splitByColon[0].toLowerCase(Locale.ROOT)) { @@ -105,4 +110,5 @@ private long convertToMicrosecondsRepresentation(final long milliSeconds) { // The time representation in microseconds is equal to the milliseconds multiplied by 1,000. return milliSeconds * 1000; } + } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java index c21ca4717308..dd0fb102ddbc 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java @@ -1,3 +1,7 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.destination.s3.template; import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; @@ -6,6 +10,7 @@ /** * This class is used as argument holder S3FilenameTemplateManager.class + * * @see S3FilenameTemplateManager#adaptFilenameAccordingSpecificationPatternWithDefaultConfig(S3FilenameTemplateParameterObject) */ public class S3FilenameTemplateParameterObject { @@ -17,7 +22,12 @@ public class S3FilenameTemplateParameterObject { private final String partId; private final S3Format s3Format; - S3FilenameTemplateParameterObject(final String objectPath, final SerializableBuffer recordsData, final String fileNamePattern, final String fileExtension, final String partId, final S3Format s3Format) { + S3FilenameTemplateParameterObject(final String objectPath, + final SerializableBuffer recordsData, + final String fileNamePattern, + final String fileExtension, + final String partId, + final S3Format s3Format) { this.objectPath = objectPath; this.recordsData = recordsData; this.fileNamePattern = fileNamePattern; @@ -30,7 +40,6 @@ public static FilenameTemplateParameterObjectBuilder builder() { return new FilenameTemplateParameterObjectBuilder(); } - public String getObjectPath() { return objectPath; } @@ -64,8 +73,7 @@ public static class FilenameTemplateParameterObjectBuilder { private String partId; private S3Format s3Format; - FilenameTemplateParameterObjectBuilder() { - } + FilenameTemplateParameterObjectBuilder() {} public FilenameTemplateParameterObjectBuilder objectPath(final String objectPath) { this.objectPath = objectPath; @@ -102,9 +110,11 @@ public S3FilenameTemplateParameterObject build() { } public String toString() { - return "FilenameTemplateParameterObject.FilenameTemplateParameterObjectBuilder(objectPath=" + objectPath + ", recordsData=" + recordsData + ", fileNamePattern=" + fileNamePattern + return "FilenameTemplateParameterObject.FilenameTemplateParameterObjectBuilder(objectPath=" + objectPath + ", recordsData=" + recordsData + + ", fileNamePattern=" + fileNamePattern + ", fileExtension=" + fileExtension + ", partId=" + partId + ", s3Format=" + s3Format + ")"; } + } @Override @@ -116,7 +126,8 @@ public boolean equals(final Object o) { return false; } final S3FilenameTemplateParameterObject that = (S3FilenameTemplateParameterObject) o; - return Objects.equals(objectPath, that.objectPath) && Objects.equals(recordsData, that.recordsData) && Objects.equals(fileNamePattern, that.fileNamePattern) + return Objects.equals(objectPath, that.objectPath) && Objects.equals(recordsData, that.recordsData) + && Objects.equals(fileNamePattern, that.fileNamePattern) && Objects.equals(fileExtension, that.fileExtension) && Objects.equals(partId, that.partId) && s3Format == that.s3Format; } @@ -124,4 +135,5 @@ public boolean equals(final Object o) { public int hashCode() { return Objects.hash(objectPath, recordsData, fileNamePattern, fileExtension, partId, s3Format); } -} \ No newline at end of file + +} diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java index 57dcf35a0024..4a839b824c02 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java @@ -12,8 +12,8 @@ import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3DestinationConstants; import io.airbyte.integrations.destination.s3.S3Format; -import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateManager; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.util.S3OutputPathHelper; import io.airbyte.protocol.models.AirbyteStream; import io.airbyte.protocol.models.ConfiguredAirbyteStream; @@ -41,7 +41,7 @@ public abstract class BaseS3Writer implements DestinationFileWriter { private static final Logger LOGGER = LoggerFactory.getLogger(BaseS3Writer.class); - private static final S3FilenameTemplateManager s3FilenameTemplateManager = new S3FilenameTemplateManager(); + private static final S3FilenameTemplateManager s3FilenameTemplateManager = new S3FilenameTemplateManager(); private static final String DEFAULT_SUFFIX = "_0"; protected final S3DestinationConfig config; @@ -135,9 +135,10 @@ protected void closeWhenFail() throws IOException { // Do nothing by default } - protected String determineOutputFilename(final S3DestinationConfig config, final S3Format s3Format, final Timestamp uploadTimestamp) throws IOException { + protected String determineOutputFilename(final S3DestinationConfig config, final S3Format s3Format, final Timestamp uploadTimestamp) + throws IOException { final String outputFilename; - if (StringUtils.isNotBlank(config.getFileNamePattern())){ + if (StringUtils.isNotBlank(config.getFileNamePattern())) { outputFilename = getOutputFilename(S3FilenameTemplateParameterObject .builder() .fileExtension(s3Format.getFileExtension()) @@ -149,9 +150,13 @@ protected String determineOutputFilename(final S3DestinationConfig config, final return outputFilename; } - protected String determineOutputFilename(final S3DestinationConfig config, final S3Format s3Format, final String suffix, final Timestamp uploadTimestamp) throws IOException { + protected String determineOutputFilename(final S3DestinationConfig config, + final S3Format s3Format, + final String suffix, + final Timestamp uploadTimestamp) + throws IOException { final String outputFilename; - if (StringUtils.isNotBlank(config.getFileNamePattern())){ + if (StringUtils.isNotBlank(config.getFileNamePattern())) { outputFilename = getOutputFilename(S3FilenameTemplateParameterObject .builder() .fileExtension(s3Format.getFileExtension()) @@ -192,4 +197,5 @@ public static String getOutputFilename(final Timestamp timestamp, final String c public static String getOutputFilename(final S3FilenameTemplateParameterObject parameterObject) throws IOException { return s3FilenameTemplateManager.adaptFilenameAccordingSpecificationPatternWithDefaultConfig(parameterObject); } + } diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java index 61c1eea1a7ac..ef22d9575ca1 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java @@ -1,10 +1,14 @@ +/* + * Copyright (c) 2022 Airbyte, Inc., all rights reserved. + */ + package io.airbyte.integrations.destination.s3.tamplate; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.mockStatic; -import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateManager; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import java.io.IOException; import java.time.Clock; import java.time.Instant; @@ -21,7 +25,8 @@ class S3FilenameTemplateManagerTest { @Test @DisplayName("Should replace the date placeholder with the current date in the format YYYY-MM-DD") - void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceDatePlaceholderWithCurrentDateInTheFormatYYYY_MM_DD() throws IOException { + void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceDatePlaceholderWithCurrentDateInTheFormatYYYY_MM_DD() + throws IOException { final String fileNamePattern = "test-{date}"; final String fileExtension = "csv"; final String partId = "1"; @@ -40,7 +45,8 @@ void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceDat @Test @DisplayName("Should replace the timestamp placeholder with the current timestamp in milliseconds") - void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceTimestampPlaceholderWithCurrentTimestampInMilliseconds() throws IOException { + void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceTimestampPlaceholderWithCurrentTimestampInMilliseconds() + throws IOException { final String fileNamePattern = "test-{timestamp}.csv"; final Clock clock = Clock.fixed(Instant.ofEpochMilli(1657110148000L), ZoneId.of("UTC")); @@ -74,4 +80,5 @@ void testIfFilenameTemplateStringWasSanitized() throws IOException { assertEquals("te__st.csv", actual); } + } From f74acc7b305b20bf325822f87b9c7b085a9847bd Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Thu, 14 Jul 2022 11:20:42 +0300 Subject: [PATCH 05/15] airbyte-4184 Fixed PR comments. --- .../destination/s3/avro/S3AvroWriter.java | 7 +- .../destination/s3/csv/S3CsvWriter.java | 9 +- .../destination/s3/jsonl/S3JsonlWriter.java | 7 +- .../s3/parquet/S3ParquetWriter.java | 7 +- .../S3FilenameTemplateParameterObject.java | 86 +++++++++---------- .../destination/s3/writer/BaseS3Writer.java | 57 +++--------- .../s3/writer/BaseS3WriterTest.java | 10 ++- 7 files changed, 89 insertions(+), 94 deletions(-) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java index 0f3e8db8368e..7d479861a51d 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3Format; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.util.StreamTransferManagerFactory; import io.airbyte.integrations.destination.s3.writer.BaseS3Writer; import io.airbyte.integrations.destination.s3.writer.DestinationFileWriter; @@ -47,7 +48,11 @@ public S3AvroWriter(final S3DestinationConfig config, throws IOException { super(config, s3Client, configuredStream); - final String outputFilename = determineOutputFilename(config, S3Format.AVRO, uploadTimestamp); + final String outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject + .builder() + .fileExtension(S3Format.AVRO.getFileExtension()) + .fileNamePattern(config.getFileNamePattern()) + .build()); objectKey = String.join("/", outputPrefix, outputFilename); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java index ec6ccbdcd3e6..f7a3d514b50e 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java @@ -10,6 +10,7 @@ import com.fasterxml.jackson.databind.JsonNode; import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3Format; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.util.StreamTransferManagerFactory; import io.airbyte.integrations.destination.s3.writer.BaseS3Writer; import io.airbyte.integrations.destination.s3.writer.DestinationFileWriter; @@ -52,7 +53,13 @@ private S3CsvWriter(final S3DestinationConfig config, this.csvSheetGenerator = csvSheetGenerator; final String fileSuffix = "_" + UUID.randomUUID(); - final String outputFilename = determineOutputFilename(config, S3Format.CSV, fileSuffix, uploadTimestamp); + final String outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject + .builder() + .customSuffix(fileSuffix) + .fileExtension(S3Format.CSV.getFileExtension()) + .fileNamePattern(config.getFileNamePattern()) + .timestamp(uploadTimestamp) + .build()); this.objectKey = String.join("/", outputPrefix, outputFilename); LOGGER.info("Full S3 path for stream '{}': s3://{}/{}", stream.getName(), config.getBucketName(), diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java index 7f1738fc646a..ca8da12026ef 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java @@ -15,6 +15,7 @@ import io.airbyte.integrations.base.JavaBaseConstants; import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3Format; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.util.StreamTransferManagerFactory; import io.airbyte.integrations.destination.s3.writer.BaseS3Writer; import io.airbyte.integrations.destination.s3.writer.DestinationFileWriter; @@ -47,7 +48,11 @@ public S3JsonlWriter(final S3DestinationConfig config, throws IOException { super(config, s3Client, configuredStream); - final String outputFilename = determineOutputFilename(config, S3Format.JSONL, uploadTimestamp); + final String outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject + .builder() + .fileExtension(S3Format.JSONL.getFileExtension()) + .fileNamePattern(config.getFileNamePattern()) + .build()); objectKey = String.join("/", outputPrefix, outputFilename); LOGGER.info("Full S3 path for stream '{}': s3://{}/{}", stream.getName(), config.getBucketName(), objectKey); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java index 370ca726cd4f..3cf16fbe8d01 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java @@ -10,6 +10,7 @@ import io.airbyte.integrations.destination.s3.S3Format; import io.airbyte.integrations.destination.s3.avro.AvroRecordFactory; import io.airbyte.integrations.destination.s3.credential.S3AccessKeyCredentialConfig; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.writer.BaseS3Writer; import io.airbyte.integrations.destination.s3.writer.DestinationFileWriter; import io.airbyte.protocol.models.AirbyteRecordMessage; @@ -52,7 +53,11 @@ public S3ParquetWriter(final S3DestinationConfig config, throws URISyntaxException, IOException { super(config, s3Client, configuredStream); - outputFilename = determineOutputFilename(config, S3Format.PARQUET, uploadTimestamp); + outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject + .builder() + .fileExtension(S3Format.PARQUET.getFileExtension()) + .fileNamePattern(config.getFileNamePattern()) + .build()); objectKey = String.join("/", outputPrefix, outputFilename); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java index dd0fb102ddbc..b96c7477baf2 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java @@ -6,7 +6,7 @@ import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; import io.airbyte.integrations.destination.s3.S3Format; -import java.util.Objects; +import java.sql.Timestamp; /** * This class is used as argument holder S3FilenameTemplateManager.class @@ -21,23 +21,19 @@ public class S3FilenameTemplateParameterObject { private final String fileExtension; private final String partId; private final S3Format s3Format; + private final Timestamp timestamp; + private final String customSuffix; - S3FilenameTemplateParameterObject(final String objectPath, - final SerializableBuffer recordsData, - final String fileNamePattern, - final String fileExtension, - final String partId, - final S3Format s3Format) { + S3FilenameTemplateParameterObject(String objectPath, SerializableBuffer recordsData, String fileNamePattern, String fileExtension, String partId, S3Format s3Format, Timestamp timestamp, + String customSuffix) { this.objectPath = objectPath; this.recordsData = recordsData; this.fileNamePattern = fileNamePattern; this.fileExtension = fileExtension; this.partId = partId; this.s3Format = s3Format; - } - - public static FilenameTemplateParameterObjectBuilder builder() { - return new FilenameTemplateParameterObjectBuilder(); + this.timestamp = timestamp; + this.customSuffix = customSuffix; } public String getObjectPath() { @@ -64,7 +60,19 @@ public S3Format getS3Format() { return s3Format; } - public static class FilenameTemplateParameterObjectBuilder { + public Timestamp getTimestamp() { + return timestamp; + } + + public String getCustomSuffix() { + return customSuffix; + } + + public static S3FilenameTemplateParameterObjectBuilder builder() { + return new S3FilenameTemplateParameterObjectBuilder(); + } + + public static class S3FilenameTemplateParameterObjectBuilder { private String objectPath; private SerializableBuffer recordsData; @@ -72,68 +80,60 @@ public static class FilenameTemplateParameterObjectBuilder { private String fileExtension; private String partId; private S3Format s3Format; + private Timestamp timestamp; + private String customSuffix; - FilenameTemplateParameterObjectBuilder() {} + S3FilenameTemplateParameterObjectBuilder() { + } - public FilenameTemplateParameterObjectBuilder objectPath(final String objectPath) { + public S3FilenameTemplateParameterObjectBuilder objectPath(String objectPath) { this.objectPath = objectPath; return this; } - public FilenameTemplateParameterObjectBuilder recordsData(final SerializableBuffer recordsData) { + public S3FilenameTemplateParameterObjectBuilder recordsData(SerializableBuffer recordsData) { this.recordsData = recordsData; return this; } - public FilenameTemplateParameterObjectBuilder fileNamePattern(final String fileNamePattern) { + public S3FilenameTemplateParameterObjectBuilder fileNamePattern(String fileNamePattern) { this.fileNamePattern = fileNamePattern; return this; } - public FilenameTemplateParameterObjectBuilder fileExtension(final String fileExtension) { + public S3FilenameTemplateParameterObjectBuilder fileExtension(String fileExtension) { this.fileExtension = fileExtension; return this; } - public FilenameTemplateParameterObjectBuilder partId(final String partId) { + public S3FilenameTemplateParameterObjectBuilder partId(String partId) { this.partId = partId; return this; } - public FilenameTemplateParameterObjectBuilder s3Format(final S3Format s3Format) { + public S3FilenameTemplateParameterObjectBuilder s3Format(S3Format s3Format) { this.s3Format = s3Format; return this; } - public S3FilenameTemplateParameterObject build() { - return new S3FilenameTemplateParameterObject(objectPath, recordsData, fileNamePattern, fileExtension, partId, s3Format); + public S3FilenameTemplateParameterObjectBuilder timestamp(Timestamp timestamp) { + this.timestamp = timestamp; + return this; } - public String toString() { - return "FilenameTemplateParameterObject.FilenameTemplateParameterObjectBuilder(objectPath=" + objectPath + ", recordsData=" + recordsData - + ", fileNamePattern=" + fileNamePattern - + ", fileExtension=" + fileExtension + ", partId=" + partId + ", s3Format=" + s3Format + ")"; + public S3FilenameTemplateParameterObjectBuilder customSuffix(String customSuffix) { + this.customSuffix = customSuffix; + return this; } - } - - @Override - public boolean equals(final Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; + public S3FilenameTemplateParameterObject build() { + return new S3FilenameTemplateParameterObject(objectPath, recordsData, fileNamePattern, fileExtension, partId, s3Format, timestamp, customSuffix); } - final S3FilenameTemplateParameterObject that = (S3FilenameTemplateParameterObject) o; - return Objects.equals(objectPath, that.objectPath) && Objects.equals(recordsData, that.recordsData) - && Objects.equals(fileNamePattern, that.fileNamePattern) - && Objects.equals(fileExtension, that.fileExtension) && Objects.equals(partId, that.partId) && s3Format == that.s3Format; - } - @Override - public int hashCode() { - return Objects.hash(objectPath, recordsData, fileNamePattern, fileExtension, partId, s3Format); + public String toString() { + return "S3FilenameTemplateParameterObject.S3FilenameTemplateParameterObjectBuilder(objectPath=" + this.objectPath + ", recordsData=" + this.recordsData + ", fileNamePattern=" + + this.fileNamePattern + ", fileExtension=" + this.fileExtension + ", partId=" + this.partId + ", s3Format=" + this.s3Format + ", timestamp=" + this.timestamp + ", customSuffix=" + + this.customSuffix + ")"; + } } - } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java index 4a839b824c02..286087cbcb0c 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java @@ -4,6 +4,8 @@ package io.airbyte.integrations.destination.s3.writer; +import static org.apache.commons.lang3.StringUtils.isNotBlank; + import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.DeleteObjectsRequest; import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; @@ -135,66 +137,31 @@ protected void closeWhenFail() throws IOException { // Do nothing by default } - protected String determineOutputFilename(final S3DestinationConfig config, final S3Format s3Format, final Timestamp uploadTimestamp) - throws IOException { - final String outputFilename; - if (StringUtils.isNotBlank(config.getFileNamePattern())) { - outputFilename = getOutputFilename(S3FilenameTemplateParameterObject - .builder() - .fileExtension(s3Format.getFileExtension()) - .fileNamePattern(config.getFileNamePattern()) - .build()); - } else { - outputFilename = getOutputFilename(uploadTimestamp, s3Format); - } - return outputFilename; - } - - protected String determineOutputFilename(final S3DestinationConfig config, - final S3Format s3Format, - final String suffix, - final Timestamp uploadTimestamp) + public static String determineOutputFilename(final S3FilenameTemplateParameterObject parameterObject) throws IOException { - final String outputFilename; - if (StringUtils.isNotBlank(config.getFileNamePattern())) { - outputFilename = getOutputFilename(S3FilenameTemplateParameterObject - .builder() - .fileExtension(s3Format.getFileExtension()) - .fileNamePattern(config.getFileNamePattern()) - .build()); - } else { - outputFilename = getOutputFilename(uploadTimestamp, suffix, s3Format); - } - return outputFilename; - } - - /** - * @return A string in the format "{upload-date}_{upload-millis}_0.{format-extension}". For example, - * "2021_12_09_1639077474000_0.csv" - */ - public static String getOutputFilename(final Timestamp timestamp, final S3Format format) { - return getOutputFilename(timestamp, DEFAULT_SUFFIX, format); + return isNotBlank(parameterObject.getFileNamePattern()) ? + getOutputFilename(parameterObject) : getDefaultOutputFilename(parameterObject); } /** - * @param customSuffix A string to append to the filename. Commonly used to distinguish multiple + * @param parameterObject A string to append to the filename. Commonly used to distinguish multiple * part files within a single upload. You probably want to use strings with a leading * underscore (i.e. prefer "_0" to "0"). * @return A string in the format "{upload-date}_{upload-millis}_{suffix}.{format-extension}". For * example, "2021_12_09_1639077474000_customSuffix.csv" */ - public static String getOutputFilename(final Timestamp timestamp, final String customSuffix, final S3Format format) { + private static String getDefaultOutputFilename(final S3FilenameTemplateParameterObject parameterObject) { final DateFormat formatter = new SimpleDateFormat(S3DestinationConstants.YYYY_MM_DD_FORMAT_STRING); formatter.setTimeZone(TimeZone.getTimeZone("UTC")); return String.format( "%s_%d%s.%s", - formatter.format(timestamp), - timestamp.getTime(), - customSuffix, - format.getFileExtension()); + formatter.format(parameterObject.getTimestamp()), + parameterObject.getTimestamp().getTime(), + null == parameterObject.getCustomSuffix() ? DEFAULT_SUFFIX : parameterObject.getCustomSuffix(), + parameterObject.getS3Format().getFileExtension()); } - public static String getOutputFilename(final S3FilenameTemplateParameterObject parameterObject) throws IOException { + private static String getOutputFilename(final S3FilenameTemplateParameterObject parameterObject) throws IOException { return s3FilenameTemplateManager.adaptFilenameAccordingSpecificationPatternWithDefaultConfig(parameterObject); } diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/writer/BaseS3WriterTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/writer/BaseS3WriterTest.java index d5fd051eee91..59a5aaead5b8 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/writer/BaseS3WriterTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/writer/BaseS3WriterTest.java @@ -7,17 +7,23 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import io.airbyte.integrations.destination.s3.S3Format; +import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; +import java.io.IOException; import java.sql.Timestamp; import org.junit.jupiter.api.Test; class BaseS3WriterTest { @Test - public void testGetOutputFilename() { + public void testGetOutputFilename() throws IOException { final Timestamp timestamp = new Timestamp(1471461319000L); assertEquals( "2016_08_17_1471461319000_0.csv", - BaseS3Writer.getOutputFilename(timestamp, S3Format.CSV)); + BaseS3Writer.determineOutputFilename(S3FilenameTemplateParameterObject + .builder() + .s3Format(S3Format.CSV) + .timestamp(timestamp) + .build())); } } From 50c3a38b1891ab5a41a187c0570a7ca8dea4b15d Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Thu, 14 Jul 2022 11:26:11 +0300 Subject: [PATCH 06/15] airbyte-4184 Fix pr comments. --- .../integrations/destination/s3/writer/BaseS3Writer.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java index 286087cbcb0c..ce17e55a4420 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java @@ -144,9 +144,7 @@ public static String determineOutputFilename(final S3FilenameTemplateParameterOb } /** - * @param parameterObject A string to append to the filename. Commonly used to distinguish multiple - * part files within a single upload. You probably want to use strings with a leading - * underscore (i.e. prefer "_0" to "0"). + * @param parameterObject - an object which holds all necessary parameters required for default filename creation. * @return A string in the format "{upload-date}_{upload-millis}_{suffix}.{format-extension}". For * example, "2021_12_09_1639077474000_customSuffix.csv" */ From 6f1db7281b286dca9491210da2ea932873ba254c Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Thu, 14 Jul 2022 22:51:11 +0300 Subject: [PATCH 07/15] airbyte-4148: Fixed PR comments. --- .../destination/s3/S3StorageOperations.java | 2 +- .../template/S3FilenameTemplateManager.java | 2 +- .../S3FilenameTemplateParameterObject.java | 22 ++++++++++++++++++- .../destination/s3/writer/BaseS3Writer.java | 5 +---- .../S3FilenameTemplateManagerTest.java | 10 ++++----- 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java index 417ea558598c..e9f8bf38afba 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/S3StorageOperations.java @@ -145,7 +145,7 @@ private String loadDataIntoBucket(final String objectPath, final SerializableBuf final String fullObjectKey; if (StringUtils.isNotBlank(s3Config.getFileNamePattern())) { fullObjectKey = s3FilenameTemplateManager - .adaptFilenameAccordingSpecificationPatternWithDefaultConfig( + .applyPatternToFilename( S3FilenameTemplateParameterObject .builder() .partId(partId) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java index 3afb86b8753e..06e2c5586616 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateManager.java @@ -34,7 +34,7 @@ public S3FilenameTemplateManager() { stringSubstitutor = new StringSubstitutor(); } - public String adaptFilenameAccordingSpecificationPatternWithDefaultConfig(final S3FilenameTemplateParameterObject parameterObject) + public String applyPatternToFilename(final S3FilenameTemplateParameterObject parameterObject) throws IOException { // sanitize fileFormat final String sanitizedFileFormat = parameterObject diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java index b96c7477baf2..42c23cdadfd9 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/template/S3FilenameTemplateParameterObject.java @@ -7,11 +7,12 @@ import io.airbyte.integrations.destination.record_buffer.SerializableBuffer; import io.airbyte.integrations.destination.s3.S3Format; import java.sql.Timestamp; +import java.util.Objects; /** * This class is used as argument holder S3FilenameTemplateManager.class * - * @see S3FilenameTemplateManager#adaptFilenameAccordingSpecificationPatternWithDefaultConfig(S3FilenameTemplateParameterObject) + * @see S3FilenameTemplateManager#applyPatternToFilename(S3FilenameTemplateParameterObject) */ public class S3FilenameTemplateParameterObject { @@ -136,4 +137,23 @@ public String toString() { + this.customSuffix + ")"; } } + + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + final S3FilenameTemplateParameterObject that = (S3FilenameTemplateParameterObject) o; + return Objects.equals(objectPath, that.objectPath) && Objects.equals(recordsData, that.recordsData) && Objects.equals(fileNamePattern, that.fileNamePattern) + && Objects.equals(fileExtension, that.fileExtension) && Objects.equals(partId, that.partId) && s3Format == that.s3Format && Objects.equals(timestamp, + that.timestamp) && Objects.equals(customSuffix, that.customSuffix); + } + + @Override + public int hashCode() { + return Objects.hash(objectPath, recordsData, fileNamePattern, fileExtension, partId, s3Format, timestamp, customSuffix); + } } diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java index ce17e55a4420..377a385c8cf5 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/writer/BaseS3Writer.java @@ -13,7 +13,6 @@ import com.amazonaws.services.s3.model.S3ObjectSummary; import io.airbyte.integrations.destination.s3.S3DestinationConfig; import io.airbyte.integrations.destination.s3.S3DestinationConstants; -import io.airbyte.integrations.destination.s3.S3Format; import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateManager; import io.airbyte.integrations.destination.s3.template.S3FilenameTemplateParameterObject; import io.airbyte.integrations.destination.s3.util.S3OutputPathHelper; @@ -21,13 +20,11 @@ import io.airbyte.protocol.models.ConfiguredAirbyteStream; import io.airbyte.protocol.models.DestinationSyncMode; import java.io.IOException; -import java.sql.Timestamp; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.LinkedList; import java.util.List; import java.util.TimeZone; -import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -160,7 +157,7 @@ private static String getDefaultOutputFilename(final S3FilenameTemplateParameter } private static String getOutputFilename(final S3FilenameTemplateParameterObject parameterObject) throws IOException { - return s3FilenameTemplateManager.adaptFilenameAccordingSpecificationPatternWithDefaultConfig(parameterObject); + return s3FilenameTemplateManager.applyPatternToFilename(parameterObject); } } diff --git a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java index ef22d9575ca1..78a301b9bcd7 100644 --- a/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java +++ b/airbyte-integrations/connectors/destination-s3/src/test/java/io/airbyte/integrations/destination/s3/tamplate/S3FilenameTemplateManagerTest.java @@ -25,14 +25,14 @@ class S3FilenameTemplateManagerTest { @Test @DisplayName("Should replace the date placeholder with the current date in the format YYYY-MM-DD") - void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceDatePlaceholderWithCurrentDateInTheFormatYYYY_MM_DD() + void testDatePlaceholder() throws IOException { final String fileNamePattern = "test-{date}"; final String fileExtension = "csv"; final String partId = "1"; final String actual = s3FilenameTemplateManager - .adaptFilenameAccordingSpecificationPatternWithDefaultConfig(S3FilenameTemplateParameterObject + .applyPatternToFilename(S3FilenameTemplateParameterObject .builder() .objectPath("") .fileNamePattern(fileNamePattern) @@ -45,7 +45,7 @@ void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceDat @Test @DisplayName("Should replace the timestamp placeholder with the current timestamp in milliseconds") - void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceTimestampPlaceholderWithCurrentTimestampInMilliseconds() + void testTimestampPlaceholder() throws IOException { final String fileNamePattern = "test-{timestamp}.csv"; @@ -55,7 +55,7 @@ void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceTim try (final MockedStatic mocked = mockStatic(Instant.class)) { mocked.when(Instant::now).thenReturn(instant); final String actual = s3FilenameTemplateManager - .adaptFilenameAccordingSpecificationPatternWithDefaultConfig(S3FilenameTemplateParameterObject.builder() + .applyPatternToFilename(S3FilenameTemplateParameterObject.builder() .objectPath("") .fileNamePattern(fileNamePattern) .fileExtension("csv") @@ -71,7 +71,7 @@ void adaptFilenameAccordingSpecificationPatternWithDefaultConfigShouldReplaceTim void testIfFilenameTemplateStringWasSanitized() throws IOException { final String fileNamePattern = " te st.csv "; final String actual = s3FilenameTemplateManager - .adaptFilenameAccordingSpecificationPatternWithDefaultConfig(S3FilenameTemplateParameterObject.builder() + .applyPatternToFilename(S3FilenameTemplateParameterObject.builder() .objectPath("") .fileNamePattern(fileNamePattern) .fileExtension("csv") From f6f3768a92385fb56d56d0512f3daded0215ce71 Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Fri, 15 Jul 2022 11:25:38 +0300 Subject: [PATCH 08/15] airbyte-4148: Fixed PR comments. --- .../airbyte/integrations/destination/s3/avro/S3AvroWriter.java | 1 + .../io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java | 1 + .../airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java | 1 + .../integrations/destination/s3/parquet/S3ParquetWriter.java | 1 + 4 files changed, 4 insertions(+) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java index 7d479861a51d..198f3e6d5d8f 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java @@ -50,6 +50,7 @@ public S3AvroWriter(final S3DestinationConfig config, final String outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject .builder() + .s3Format(S3Format.AVRO) .fileExtension(S3Format.AVRO.getFileExtension()) .fileNamePattern(config.getFileNamePattern()) .build()); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java index f7a3d514b50e..0a754d6ea6a5 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/csv/S3CsvWriter.java @@ -56,6 +56,7 @@ private S3CsvWriter(final S3DestinationConfig config, final String outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject .builder() .customSuffix(fileSuffix) + .s3Format(S3Format.CSV) .fileExtension(S3Format.CSV.getFileExtension()) .fileNamePattern(config.getFileNamePattern()) .timestamp(uploadTimestamp) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java index ca8da12026ef..3f804cea4b29 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java @@ -50,6 +50,7 @@ public S3JsonlWriter(final S3DestinationConfig config, final String outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject .builder() + .s3Format(S3Format.JSONL) .fileExtension(S3Format.JSONL.getFileExtension()) .fileNamePattern(config.getFileNamePattern()) .build()); diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java index 3cf16fbe8d01..2fd4faa272e1 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java @@ -55,6 +55,7 @@ public S3ParquetWriter(final S3DestinationConfig config, outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject .builder() + .s3Format(S3Format.PARQUET) .fileExtension(S3Format.PARQUET.getFileExtension()) .fileNamePattern(config.getFileNamePattern()) .build()); From 7030070240571fd350c1db48df81deff74999327 Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Fri, 15 Jul 2022 11:57:27 +0300 Subject: [PATCH 09/15] airbyte-4148: Fixed PR comments. --- .../airbyte/integrations/destination/s3/avro/S3AvroWriter.java | 1 + .../airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java | 1 + .../integrations/destination/s3/parquet/S3ParquetWriter.java | 1 + 3 files changed, 3 insertions(+) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java index 198f3e6d5d8f..b30df0518bdf 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/avro/S3AvroWriter.java @@ -50,6 +50,7 @@ public S3AvroWriter(final S3DestinationConfig config, final String outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject .builder() + .timestamp(uploadTimestamp) .s3Format(S3Format.AVRO) .fileExtension(S3Format.AVRO.getFileExtension()) .fileNamePattern(config.getFileNamePattern()) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java index 3f804cea4b29..a973ae13746d 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/jsonl/S3JsonlWriter.java @@ -50,6 +50,7 @@ public S3JsonlWriter(final S3DestinationConfig config, final String outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject .builder() + .timestamp(uploadTimestamp) .s3Format(S3Format.JSONL) .fileExtension(S3Format.JSONL.getFileExtension()) .fileNamePattern(config.getFileNamePattern()) diff --git a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java index 2fd4faa272e1..b48975957862 100644 --- a/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java +++ b/airbyte-integrations/connectors/destination-s3/src/main/java/io/airbyte/integrations/destination/s3/parquet/S3ParquetWriter.java @@ -56,6 +56,7 @@ public S3ParquetWriter(final S3DestinationConfig config, outputFilename = determineOutputFilename(S3FilenameTemplateParameterObject .builder() .s3Format(S3Format.PARQUET) + .timestamp(uploadTimestamp) .fileExtension(S3Format.PARQUET.getFileExtension()) .fileNamePattern(config.getFileNamePattern()) .build()); From c9ae1f68f61ae1d164030f3cd065d3c930f12793 Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Fri, 15 Jul 2022 13:20:41 +0300 Subject: [PATCH 10/15] airbyte-4148: Bump versions. --- .../destination-databricks/Dockerfile | 2 +- .../destination-redshift/Dockerfile | 2 +- .../connectors/destination-s3/Dockerfile | 2 +- .../destination-snowflake/Dockerfile | 2 +- docs/integrations/destinations/databricks.md | 27 +++--- docs/integrations/destinations/redshift.md | 47 +++++----- docs/integrations/destinations/s3.md | 85 ++++++++++--------- docs/integrations/destinations/snowflake.md | 13 +-- 8 files changed, 92 insertions(+), 88 deletions(-) diff --git a/airbyte-integrations/connectors/destination-databricks/Dockerfile b/airbyte-integrations/connectors/destination-databricks/Dockerfile index 843486ac4250..92521ca3420b 100644 --- a/airbyte-integrations/connectors/destination-databricks/Dockerfile +++ b/airbyte-integrations/connectors/destination-databricks/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-databricks COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.2.4 +LABEL io.airbyte.version=0.2.5 LABEL io.airbyte.name=airbyte/destination-databricks diff --git a/airbyte-integrations/connectors/destination-redshift/Dockerfile b/airbyte-integrations/connectors/destination-redshift/Dockerfile index 2f3a8a19ddad..bc47f0d08e5b 100644 --- a/airbyte-integrations/connectors/destination-redshift/Dockerfile +++ b/airbyte-integrations/connectors/destination-redshift/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-redshift COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.3.46 +LABEL io.airbyte.version=0.3.47 LABEL io.airbyte.name=airbyte/destination-redshift diff --git a/airbyte-integrations/connectors/destination-s3/Dockerfile b/airbyte-integrations/connectors/destination-s3/Dockerfile index a33447c57ec0..9efc767675cd 100644 --- a/airbyte-integrations/connectors/destination-s3/Dockerfile +++ b/airbyte-integrations/connectors/destination-s3/Dockerfile @@ -16,5 +16,5 @@ ENV APPLICATION destination-s3 COPY --from=build /airbyte /airbyte -LABEL io.airbyte.version=0.3.10 +LABEL io.airbyte.version=0.3.11 LABEL io.airbyte.name=airbyte/destination-s3 diff --git a/airbyte-integrations/connectors/destination-snowflake/Dockerfile b/airbyte-integrations/connectors/destination-snowflake/Dockerfile index dd90c10ed3c7..56c6f6d0b97f 100644 --- a/airbyte-integrations/connectors/destination-snowflake/Dockerfile +++ b/airbyte-integrations/connectors/destination-snowflake/Dockerfile @@ -20,5 +20,5 @@ RUN tar xf ${APPLICATION}.tar --strip-components=1 ENV ENABLE_SENTRY true -LABEL io.airbyte.version=0.4.32 +LABEL io.airbyte.version=0.4.33 LABEL io.airbyte.name=airbyte/destination-snowflake diff --git a/docs/integrations/destinations/databricks.md b/docs/integrations/destinations/databricks.md index 0c4eaf1d499c..1bfb63ebaa61 100644 --- a/docs/integrations/destinations/databricks.md +++ b/docs/integrations/destinations/databricks.md @@ -102,16 +102,17 @@ Under the hood, an Airbyte data stream in Json schema is first converted to an A ## CHANGELOG -| Version | Date | Pull Request | Subject | -| :--- | :--- | :--- | :--- | -| 0.2.4 | 2022-07-14 | [\#14618](https://github.com/airbytehq/airbyte/pull/14618) | Removed additionalProperties: false from JDBC destination connectors | -| 0.2.3 | 2022-06-16 | [\#13852](https://github.com/airbytehq/airbyte/pull/13852) | Updated stacktrace format for any trace message errors | -| 0.2.2 | 2022-06-13 | [\#13722](https://github.com/airbytehq/airbyte/pull/13722) | Rename to "Databricks Lakehouse". | -| 0.2.1 | 2022-06-08 | [\#13630](https://github.com/airbytehq/airbyte/pull/13630) | Rename to "Databricks Delta Lake" and add field orders in the spec. | -| 0.2.0 | 2022-05-15 | [\#12861](https://github.com/airbytehq/airbyte/pull/12861) | Use new public Databricks JDBC driver, and open source the connector. | -| 0.1.5 | 2022-05-04 | [\#12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | -| 0.1.4 | 2022-02-14 | [\#10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.1.3 | 2022-01-06 | [\#7622](https://github.com/airbytehq/airbyte/pull/7622) [\#9153](https://github.com/airbytehq/airbyte/issues/9153) | Upgrade Spark JDBC driver to `2.6.21` to patch Log4j vulnerability; update connector fields title/description. | -| 0.1.2 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | -| 0.1.1 | 2021-10-05 | [\#6792](https://github.com/airbytehq/airbyte/pull/6792) | Require users to accept Databricks JDBC Driver [Terms & Conditions](https://databricks.com/jdbc-odbc-driver-license). | -| 0.1.0 | 2021-09-14 | [\#5998](https://github.com/airbytehq/airbyte/pull/5998) | Initial private release. | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:--------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------| +| 0.2.5 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | +| 0.2.4 | 2022-07-14 | [\#14618](https://github.com/airbytehq/airbyte/pull/14618) | Removed additionalProperties: false from JDBC destination connectors | +| 0.2.3 | 2022-06-16 | [\#13852](https://github.com/airbytehq/airbyte/pull/13852) | Updated stacktrace format for any trace message errors | +| 0.2.2 | 2022-06-13 | [\#13722](https://github.com/airbytehq/airbyte/pull/13722) | Rename to "Databricks Lakehouse". | +| 0.2.1 | 2022-06-08 | [\#13630](https://github.com/airbytehq/airbyte/pull/13630) | Rename to "Databricks Delta Lake" and add field orders in the spec. | +| 0.2.0 | 2022-05-15 | [\#12861](https://github.com/airbytehq/airbyte/pull/12861) | Use new public Databricks JDBC driver, and open source the connector. | +| 0.1.5 | 2022-05-04 | [\#12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | +| 0.1.4 | 2022-02-14 | [\#10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.1.3 | 2022-01-06 | [\#7622](https://github.com/airbytehq/airbyte/pull/7622) [\#9153](https://github.com/airbytehq/airbyte/issues/9153) | Upgrade Spark JDBC driver to `2.6.21` to patch Log4j vulnerability; update connector fields title/description. | +| 0.1.2 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | +| 0.1.1 | 2021-10-05 | [\#6792](https://github.com/airbytehq/airbyte/pull/6792) | Require users to accept Databricks JDBC Driver [Terms & Conditions](https://databricks.com/jdbc-odbc-driver-license). | +| 0.1.0 | 2021-09-14 | [\#5998](https://github.com/airbytehq/airbyte/pull/5998) | Initial private release. | diff --git a/docs/integrations/destinations/redshift.md b/docs/integrations/destinations/redshift.md index 8c639c98a613..efb88e85e64a 100644 --- a/docs/integrations/destinations/redshift.md +++ b/docs/integrations/destinations/redshift.md @@ -138,35 +138,36 @@ Each stream will be output into its own raw table in Redshift. Each table will c | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 0.3.46 | 2022-06-27 | [\#14190](https://github.com/airbytehq/airbyte/pull/13916) | Correctly cleanup S3 bucket when using a configured bucket path for S3 staging operations. | -| 0.3.45 | 2022-06-25 | [\#13916](https://github.com/airbytehq/airbyte/pull/13916) | Use the configured bucket path for S3 staging operations. | -| 0.3.44 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | +| 0.3.47 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | +| 0.3.46 | 2022-06-27 | [\#14190](https://github.com/airbytehq/airbyte/pull/13916) | Correctly cleanup S3 bucket when using a configured bucket path for S3 staging operations. | +| 0.3.45 | 2022-06-25 | [\#13916](https://github.com/airbytehq/airbyte/pull/13916) | Use the configured bucket path for S3 staging operations. | +| 0.3.44 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | | 0.3.43 | 2022-06-24 | [\#13690](https://github.com/airbytehq/airbyte/pull/13690) | Improved discovery for NOT SUPER column | | 0.3.42 | 2022-06-21 | [\#14013](https://github.com/airbytehq/airbyte/pull/14013) | Add an option to use encryption with staging in Redshift Destination | | 0.3.40 | 2022-06-17 | [\#13753](https://github.com/airbytehq/airbyte/pull/13753) | Deprecate and remove PART_SIZE_MB fields from connectors based on StreamTransferManager | | 0.3.39 | 2022-06-02 | [13415](https://github.com/airbytehq/airbyte/pull/13415) | Add dropdown to select Uploading Method.
**PLEASE NOTICE**: After this update your **uploading method** will be set to **Standard**, you will need to reconfigure the method to use **S3 Staging** again. | | 0.3.37 | 2022-05-23 | [13090](https://github.com/airbytehq/airbyte/pull/13090) | Removed redshiftDataTmpTableMode. Some refactoring. | -| 0.3.36 | 2022-05-23 | [12820](https://github.com/airbytehq/airbyte/pull/12820) | Improved 'check' operation performance | -| 0.3.35 | 2022-05-18 | [12940](https://github.com/airbytehq/airbyte/pull/12940) | Fixed maximum record size for SUPER type | -| 0.3.34 | 2022-05-16 | [12869](https://github.com/airbytehq/airbyte/pull/12869) | Fixed NPE in S3 staging check | -| 0.3.33 | 2022-05-04 | [12601](https://github.com/airbytehq/airbyte/pull/12601) | Apply buffering strategy for S3 staging | -| 0.3.32 | 2022-04-20 | [12085](https://github.com/airbytehq/airbyte/pull/12085) | Fixed bug with switching between INSERT and COPY config | +| 0.3.36 | 2022-05-23 | [12820](https://github.com/airbytehq/airbyte/pull/12820) | Improved 'check' operation performance | +| 0.3.35 | 2022-05-18 | [12940](https://github.com/airbytehq/airbyte/pull/12940) | Fixed maximum record size for SUPER type | +| 0.3.34 | 2022-05-16 | [12869](https://github.com/airbytehq/airbyte/pull/12869) | Fixed NPE in S3 staging check | +| 0.3.33 | 2022-05-04 | [12601](https://github.com/airbytehq/airbyte/pull/12601) | Apply buffering strategy for S3 staging | +| 0.3.32 | 2022-04-20 | [12085](https://github.com/airbytehq/airbyte/pull/12085) | Fixed bug with switching between INSERT and COPY config | | 0.3.31 | 2022-04-19 | [\#12064](https://github.com/airbytehq/airbyte/pull/12064) | Added option to support SUPER datatype in _airbyte_raw_** table | -| 0.3.29 | 2022-04-05 | [11729](https://github.com/airbytehq/airbyte/pull/11729) | Fixed bug with dashes in schema name | | +| 0.3.29 | 2022-04-05 | [11729](https://github.com/airbytehq/airbyte/pull/11729) | Fixed bug with dashes in schema name | | | 0.3.28 | 2022-03-18 | [\#11254](https://github.com/airbytehq/airbyte/pull/11254) | Fixed missing records during S3 staging | -| 0.3.27 | 2022-02-25 | [10421](https://github.com/airbytehq/airbyte/pull/10421) | Refactor JDBC parameters handling | -| 0.3.25 | 2022-02-14 | [#9920](https://github.com/airbytehq/airbyte/pull/9920) | Updated the size of staging files for S3 staging. Also, added closure of S3 writers to staging files when data has been written to an staging file. | -| 0.3.24 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.3.23 | 2021-12-16 | [\#8855](https://github.com/airbytehq/airbyte/pull/8855) | Add `purgeStagingData` option to enable/disable deleting the staging data | -| 0.3.22 | 2021-12-15 | [#8607](https://github.com/airbytehq/airbyte/pull/8607) | Accept a path for the staging data | -| 0.3.21 | 2021-12-10 | [#8562](https://github.com/airbytehq/airbyte/pull/8562) | Moving classes around for better dependency management | -| 0.3.20 | 2021-11-08 | [#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | -| 0.3.19 | 2021-10-21 | [7234](https://github.com/airbytehq/airbyte/pull/7234) | Allow SSL traffic only | -| 0.3.17 | 2021-10-12 | [6965](https://github.com/airbytehq/airbyte/pull/6965) | Added SSL Support | -| 0.3.16 | 2021-10-11 | [6949](https://github.com/airbytehq/airbyte/pull/6949) | Each stream was split into files of 10,000 records each for copying using S3 or GCS | -| 0.3.14 | 2021-10-08 | [5924](https://github.com/airbytehq/airbyte/pull/5924) | Fixed AWS S3 Staging COPY is writing records from different table in the same raw table | -| 0.3.13 | 2021-09-02 | [5745](https://github.com/airbytehq/airbyte/pull/5745) | Disable STATUPDATE flag when using S3 staging to speed up performance | -| 0.3.12 | 2021-07-21 | [3555](https://github.com/airbytehq/airbyte/pull/3555) | Enable partial checkpointing for halfway syncs | -| 0.3.11 | 2021-07-20 | [4874](https://github.com/airbytehq/airbyte/pull/4874) | allow `additionalProperties` in connector spec | +| 0.3.27 | 2022-02-25 | [10421](https://github.com/airbytehq/airbyte/pull/10421) | Refactor JDBC parameters handling | +| 0.3.25 | 2022-02-14 | [#9920](https://github.com/airbytehq/airbyte/pull/9920) | Updated the size of staging files for S3 staging. Also, added closure of S3 writers to staging files when data has been written to an staging file. | +| 0.3.24 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.3.23 | 2021-12-16 | [\#8855](https://github.com/airbytehq/airbyte/pull/8855) | Add `purgeStagingData` option to enable/disable deleting the staging data | +| 0.3.22 | 2021-12-15 | [#8607](https://github.com/airbytehq/airbyte/pull/8607) | Accept a path for the staging data | +| 0.3.21 | 2021-12-10 | [#8562](https://github.com/airbytehq/airbyte/pull/8562) | Moving classes around for better dependency management | +| 0.3.20 | 2021-11-08 | [#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | +| 0.3.19 | 2021-10-21 | [7234](https://github.com/airbytehq/airbyte/pull/7234) | Allow SSL traffic only | +| 0.3.17 | 2021-10-12 | [6965](https://github.com/airbytehq/airbyte/pull/6965) | Added SSL Support | +| 0.3.16 | 2021-10-11 | [6949](https://github.com/airbytehq/airbyte/pull/6949) | Each stream was split into files of 10,000 records each for copying using S3 or GCS | +| 0.3.14 | 2021-10-08 | [5924](https://github.com/airbytehq/airbyte/pull/5924) | Fixed AWS S3 Staging COPY is writing records from different table in the same raw table | +| 0.3.13 | 2021-09-02 | [5745](https://github.com/airbytehq/airbyte/pull/5745) | Disable STATUPDATE flag when using S3 staging to speed up performance | +| 0.3.12 | 2021-07-21 | [3555](https://github.com/airbytehq/airbyte/pull/3555) | Enable partial checkpointing for halfway syncs | +| 0.3.11 | 2021-07-20 | [4874](https://github.com/airbytehq/airbyte/pull/4874) | allow `additionalProperties` in connector spec | diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index 493ccfc63691..d261c7e3c061 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -313,46 +313,47 @@ In order for everything to work correctly, it is also necessary that the user wh ## CHANGELOG -| Version | Date | Pull Request | Subject | -|:--------| :--- | :--- |:---------------------------------------------------------------------------------------------------------------------------| -| 0.3.10 | 2022-06-30 | [\#14332](https://github.com/airbytehq/airbyte/pull/14332) | Change INSTANCE_PROFILE to use `AWSDefaultProfileCredential`, which supports more authentications on AWS | -| 0.3.9 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | -| 0.3.8 | 2022-06-17 | [\#13753](https://github.com/airbytehq/airbyte/pull/13753) | Deprecate and remove PART_SIZE_MB fields from connectors based on StreamTransferManager | -| 0.3.7 | 2022-06-14 | [\#13483](https://github.com/airbytehq/airbyte/pull/13483) | Added support for int, long, float data types to Avro/Parquet formats. | -| 0.3.6 | 2022-05-19 | [\#13043](https://github.com/airbytehq/airbyte/pull/13043) | Destination S3: Remove configurable part size. | -| 0.3.5 | 2022-05-12 | [\#12797](https://github.com/airbytehq/airbyte/pull/12797) | Update spec to replace markdown. | -| 0.3.4 | 2022-05-04 | [\#12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | -| 0.3.3 | 2022-04-20 | [\#12167](https://github.com/airbytehq/airbyte/pull/12167) | Add gzip compression option for CSV and JSONL formats. | -| 0.3.2 | 2022-04-22 | [\#11795](https://github.com/airbytehq/airbyte/pull/11795) | Fix the connection check to verify the provided bucket path. | -| 0.3.1 | 2022-04-05 | [\#11728](https://github.com/airbytehq/airbyte/pull/11728) | Properly clean-up bucket when running OVERWRITE sync mode | -| 0.3.0 | 2022-04-04 | [\#11666](https://github.com/airbytehq/airbyte/pull/11666) | 0.2.12 actually has breaking changes since files are compressed by default, this PR also fixes the naming to be more compatible with older versions. | -| 0.2.13 | 2022-03-29 | [\#11496](https://github.com/airbytehq/airbyte/pull/11496) | Fix S3 bucket path to be included with S3 bucket format | -| 0.2.12 | 2022-03-28 | [\#11294](https://github.com/airbytehq/airbyte/pull/11294) | Change to serialized buffering strategy to reduce memory consumption | -| 0.2.11 | 2022-03-23 | [\#11173](https://github.com/airbytehq/airbyte/pull/11173) | Added support for AWS Glue crawler | -| 0.2.10 | 2022-03-07 | [\#10856](https://github.com/airbytehq/airbyte/pull/10856) | `check` method now tests for listObjects permissions on the target bucket | -| 0.2.7 | 2022-02-14 | [\#10318](https://github.com/airbytehq/airbyte/pull/10318) | Prevented double slashes in S3 destination path | -| 0.2.6 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.2.5 | 2022-01-13 | [\#9399](https://github.com/airbytehq/airbyte/pull/9399) | Use instance profile authentication if credentials are not provided | -| 0.2.4 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | -| 0.2.3 | 2022-01-11 | [\#9367](https://github.com/airbytehq/airbyte/pull/9367) | Avro & Parquet: support array field with unknown item type; default any improperly typed field to string. | -| 0.2.2 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | -| 0.2.1 | 2021-12-20 | [\#8974](https://github.com/airbytehq/airbyte/pull/8974) | Release a new version to ensure there is no excessive logging. | -| 0.2.0 | 2021-12-15 | [\#8607](https://github.com/airbytehq/airbyte/pull/8607) | Change the output filename for CSV files - it's now `bucketPath/namespace/streamName/timestamp_epochMillis_randomUuid.csv` | -| 0.1.16 | 2021-12-10 | [\#8562](https://github.com/airbytehq/airbyte/pull/8562) | Swap dependencies with destination-jdbc. | -| 0.1.15 | 2021-12-03 | [\#8501](https://github.com/airbytehq/airbyte/pull/8501) | Remove excessive logging for Avro and Parquet invalid date strings. | -| 0.1.14 | 2021-11-09 | [\#7732](https://github.com/airbytehq/airbyte/pull/7732) | Support timestamp in Avro and Parquet | -| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | -| 0.1.12 | 2021-09-13 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by S3 | -| 0.1.11 | 2021-09-10 | [\#5729](https://github.com/airbytehq/airbyte/pull/5729) | For field names that start with a digit, a `_` will be appended at the beginning for the`Parquet` and `Avro` formats. | -| 0.1.10 | 2021-08-17 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | -| 0.1.9 | 2021-07-12 | [\#4666](https://github.com/airbytehq/airbyte/pull/4666) | Fix MinIO output for Parquet format. | -| 0.1.8 | 2021-07-07 | [\#4613](https://github.com/airbytehq/airbyte/pull/4613) | Patched schema converter to support combined restrictions. | -| 0.1.7 | 2021-06-23 | [\#4227](https://github.com/airbytehq/airbyte/pull/4227) | Added Avro and JSONL output. | -| 0.1.6 | 2021-06-16 | [\#4130](https://github.com/airbytehq/airbyte/pull/4130) | Patched the check to verify prefix access instead of full-bucket access. | -| 0.1.5 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Fixed default `max_padding_size_mb` in `spec.json`. | -| 0.1.4 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Added Parquet output. | -| 0.1.3 | 2021-06-13 | [\#4038](https://github.com/airbytehq/airbyte/pull/4038) | Added support for alternative S3. | -| 0.1.2 | 2021-06-10 | [\#4029](https://github.com/airbytehq/airbyte/pull/4029) | Fixed `_airbyte_emitted_at` field to be a UTC instead of local timestamp for consistency. | -| 0.1.1 | 2021-06-09 | [\#3973](https://github.com/airbytehq/airbyte/pull/3973) | Added `AIRBYTE_ENTRYPOINT` in base Docker image for Kubernetes support. | -| 0.1.0 | 2021-06-03 | [\#3672](https://github.com/airbytehq/airbyte/pull/3672) | Initial release with CSV output. | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:-----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------| +| 0.3.11 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | +| 0.3.10 | 2022-06-30 | [\#14332](https://github.com/airbytehq/airbyte/pull/14332) | Change INSTANCE_PROFILE to use `AWSDefaultProfileCredential`, which supports more authentications on AWS | +| 0.3.9 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | +| 0.3.8 | 2022-06-17 | [\#13753](https://github.com/airbytehq/airbyte/pull/13753) | Deprecate and remove PART_SIZE_MB fields from connectors based on StreamTransferManager | +| 0.3.7 | 2022-06-14 | [\#13483](https://github.com/airbytehq/airbyte/pull/13483) | Added support for int, long, float data types to Avro/Parquet formats. | +| 0.3.6 | 2022-05-19 | [\#13043](https://github.com/airbytehq/airbyte/pull/13043) | Destination S3: Remove configurable part size. | +| 0.3.5 | 2022-05-12 | [\#12797](https://github.com/airbytehq/airbyte/pull/12797) | Update spec to replace markdown. | +| 0.3.4 | 2022-05-04 | [\#12578](https://github.com/airbytehq/airbyte/pull/12578) | In JSON to Avro conversion, log JSON field values that do not follow Avro schema for debugging. | +| 0.3.3 | 2022-04-20 | [\#12167](https://github.com/airbytehq/airbyte/pull/12167) | Add gzip compression option for CSV and JSONL formats. | +| 0.3.2 | 2022-04-22 | [\#11795](https://github.com/airbytehq/airbyte/pull/11795) | Fix the connection check to verify the provided bucket path. | +| 0.3.1 | 2022-04-05 | [\#11728](https://github.com/airbytehq/airbyte/pull/11728) | Properly clean-up bucket when running OVERWRITE sync mode | +| 0.3.0 | 2022-04-04 | [\#11666](https://github.com/airbytehq/airbyte/pull/11666) | 0.2.12 actually has breaking changes since files are compressed by default, this PR also fixes the naming to be more compatible with older versions. | +| 0.2.13 | 2022-03-29 | [\#11496](https://github.com/airbytehq/airbyte/pull/11496) | Fix S3 bucket path to be included with S3 bucket format | +| 0.2.12 | 2022-03-28 | [\#11294](https://github.com/airbytehq/airbyte/pull/11294) | Change to serialized buffering strategy to reduce memory consumption | +| 0.2.11 | 2022-03-23 | [\#11173](https://github.com/airbytehq/airbyte/pull/11173) | Added support for AWS Glue crawler | +| 0.2.10 | 2022-03-07 | [\#10856](https://github.com/airbytehq/airbyte/pull/10856) | `check` method now tests for listObjects permissions on the target bucket | +| 0.2.7 | 2022-02-14 | [\#10318](https://github.com/airbytehq/airbyte/pull/10318) | Prevented double slashes in S3 destination path | +| 0.2.6 | 2022-02-14 | [10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.2.5 | 2022-01-13 | [\#9399](https://github.com/airbytehq/airbyte/pull/9399) | Use instance profile authentication if credentials are not provided | +| 0.2.4 | 2022-01-12 | [\#9415](https://github.com/airbytehq/airbyte/pull/9415) | BigQuery Destination : Fix GCS processing of Facebook data | +| 0.2.3 | 2022-01-11 | [\#9367](https://github.com/airbytehq/airbyte/pull/9367) | Avro & Parquet: support array field with unknown item type; default any improperly typed field to string. | +| 0.2.2 | 2021-12-21 | [\#8574](https://github.com/airbytehq/airbyte/pull/8574) | Added namespace to Avro and Parquet record types | +| 0.2.1 | 2021-12-20 | [\#8974](https://github.com/airbytehq/airbyte/pull/8974) | Release a new version to ensure there is no excessive logging. | +| 0.2.0 | 2021-12-15 | [\#8607](https://github.com/airbytehq/airbyte/pull/8607) | Change the output filename for CSV files - it's now `bucketPath/namespace/streamName/timestamp_epochMillis_randomUuid.csv` | +| 0.1.16 | 2021-12-10 | [\#8562](https://github.com/airbytehq/airbyte/pull/8562) | Swap dependencies with destination-jdbc. | +| 0.1.15 | 2021-12-03 | [\#8501](https://github.com/airbytehq/airbyte/pull/8501) | Remove excessive logging for Avro and Parquet invalid date strings. | +| 0.1.14 | 2021-11-09 | [\#7732](https://github.com/airbytehq/airbyte/pull/7732) | Support timestamp in Avro and Parquet | +| 0.1.13 | 2021-11-03 | [\#7288](https://github.com/airbytehq/airbyte/issues/7288) | Support Json `additionalProperties`. | +| 0.1.12 | 2021-09-13 | [\#5720](https://github.com/airbytehq/airbyte/issues/5720) | Added configurable block size for stream. Each stream is limited to 10,000 by S3 | +| 0.1.11 | 2021-09-10 | [\#5729](https://github.com/airbytehq/airbyte/pull/5729) | For field names that start with a digit, a `_` will be appended at the beginning for the`Parquet` and `Avro` formats. | +| 0.1.10 | 2021-08-17 | [\#4699](https://github.com/airbytehq/airbyte/pull/4699) | Added json config validator | +| 0.1.9 | 2021-07-12 | [\#4666](https://github.com/airbytehq/airbyte/pull/4666) | Fix MinIO output for Parquet format. | +| 0.1.8 | 2021-07-07 | [\#4613](https://github.com/airbytehq/airbyte/pull/4613) | Patched schema converter to support combined restrictions. | +| 0.1.7 | 2021-06-23 | [\#4227](https://github.com/airbytehq/airbyte/pull/4227) | Added Avro and JSONL output. | +| 0.1.6 | 2021-06-16 | [\#4130](https://github.com/airbytehq/airbyte/pull/4130) | Patched the check to verify prefix access instead of full-bucket access. | +| 0.1.5 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Fixed default `max_padding_size_mb` in `spec.json`. | +| 0.1.4 | 2021-06-14 | [\#3908](https://github.com/airbytehq/airbyte/pull/3908) | Added Parquet output. | +| 0.1.3 | 2021-06-13 | [\#4038](https://github.com/airbytehq/airbyte/pull/4038) | Added support for alternative S3. | +| 0.1.2 | 2021-06-10 | [\#4029](https://github.com/airbytehq/airbyte/pull/4029) | Fixed `_airbyte_emitted_at` field to be a UTC instead of local timestamp for consistency. | +| 0.1.1 | 2021-06-09 | [\#3973](https://github.com/airbytehq/airbyte/pull/3973) | Added `AIRBYTE_ENTRYPOINT` in base Docker image for Kubernetes support. | +| 0.1.0 | 2021-06-03 | [\#3672](https://github.com/airbytehq/airbyte/pull/3672) | Initial release with CSV output. | diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index 03a9ce1a8594..1391ab480bcb 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -249,12 +249,13 @@ Now that you have set up the Snowflake destination connector, check out the foll | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------| -| 0.4.32 | 2022-07-14 | [\#14618](https://github.com/airbytehq/airbyte/pull/14618) | Removed additionalProperties: false from JDBC destination connectors | -| 0.4.31 | 2022-07-07 | [\#13729](https://github.com/airbytehq/airbyte/pull/13729) | Improve configuration field description | -| 0.4.30 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | -| 0.4.29 | 2022-06-17 | [\#13753](https://github.com/airbytehq/airbyte/pull/13753) | Deprecate and remove PART_SIZE_MB fields from connectors based on StreamTransferManager | -| 0.4.28 | 2022-05-18 | [\#12952](https://github.com/airbytehq/airbyte/pull/12952) | Apply buffering strategy on GCS staging | -| 0.4.27 | 2022-05-17 | [12820](https://github.com/airbytehq/airbyte/pull/12820) | Improved 'check' operation performance | +| 0.4.33 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | +| 0.4.32 | 2022-07-14 | [\#14618](https://github.com/airbytehq/airbyte/pull/14618) | Removed additionalProperties: false from JDBC destination connectors | +| 0.4.31 | 2022-07-07 | [\#13729](https://github.com/airbytehq/airbyte/pull/13729) | Improve configuration field description | +| 0.4.30 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | +| 0.4.29 | 2022-06-17 | [\#13753](https://github.com/airbytehq/airbyte/pull/13753) | Deprecate and remove PART_SIZE_MB fields from connectors based on StreamTransferManager | +| 0.4.28 | 2022-05-18 | [\#12952](https://github.com/airbytehq/airbyte/pull/12952) | Apply buffering strategy on GCS staging | +| 0.4.27 | 2022-05-17 | [12820](https://github.com/airbytehq/airbyte/pull/12820) | Improved 'check' operation performance | | 0.4.26 | 2022-05-12 | [\#12805](https://github.com/airbytehq/airbyte/pull/12805) | Updated to latest base-java to emit AirbyteTraceMessages on error. | | 0.4.25 | 2022-05-03 | [\#12452](https://github.com/airbytehq/airbyte/pull/12452) | Add support for encrypted staging on S3; fix the purge_staging_files option | | 0.4.24 | 2022-03-24 | [\#11093](https://github.com/airbytehq/airbyte/pull/11093) | Added OAuth support (Compatible with Airbyte Version 0.35.60+) | From ea027b3d0bdda1a4f308c9bb94596bd0c97d09dd Mon Sep 17 00:00:00 2001 From: Oleksandr Tsukanov Date: Fri, 15 Jul 2022 13:43:38 +0300 Subject: [PATCH 11/15] airbyte-4148: Bump versions. --- docs/integrations/destinations/databricks.md | 27 ++++++++++---------- docs/integrations/destinations/redshift.md | 2 ++ docs/integrations/destinations/s3.md | 5 ++++ docs/integrations/destinations/snowflake.md | 19 +++++++------- 4 files changed, 31 insertions(+), 22 deletions(-) diff --git a/docs/integrations/destinations/databricks.md b/docs/integrations/destinations/databricks.md index 1bfb63ebaa61..ee6e9de3b1b0 100644 --- a/docs/integrations/destinations/databricks.md +++ b/docs/integrations/destinations/databricks.md @@ -23,19 +23,20 @@ Databricks Delta Lake supports various cloud storage as the [data source](https: ## Configuration -| Category | Parameter | Type | Notes | -| :--- | :--- | :---: | :--- | -| Databricks | Server Hostname | string | Required. Example: `abc-12345678-wxyz.cloud.databricks.com`. See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). Please note that this is the server for the Databricks Cluster. It is different from the SQL Endpoint Cluster. | -| | HTTP Path | string | Required. Example: `sql/protocolvx/o/1234567489/0000-1111111-abcd90`. See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). | -| | Port | string | Optional. Default to "443". See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). | -| | Personal Access Token | string | Required. Example: `dapi0123456789abcdefghij0123456789AB`. See [documentation](https://docs.databricks.com/sql/user/security/personal-access-tokens.html). | -| General | Database schema | string | Optional. Default to "public". Each data stream will be written to a table under this database schema. | -| | Purge Staging Data | boolean | The connector creates staging files and tables on S3. By default, they will be purged when the data sync is complete. Set it to `false` for debugging purposes. | -| Data Source - S3 | Bucket Name | string | Name of the bucket to sync data into. | -| | Bucket Path | string | Subdirectory under the above bucket to sync the data into. | -| | Region | string | See [documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions) for all region codes. | -| | Access Key ID | string | AWS/Minio credential. | -| | Secret Access Key | string | AWS/Minio credential. | +| Category | Parameter | Type | Notes | +|:-----------------|:----------------------|:-------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Databricks | Server Hostname | string | Required. Example: `abc-12345678-wxyz.cloud.databricks.com`. See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). Please note that this is the server for the Databricks Cluster. It is different from the SQL Endpoint Cluster. | +| | HTTP Path | string | Required. Example: `sql/protocolvx/o/1234567489/0000-1111111-abcd90`. See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). | +| | Port | string | Optional. Default to "443". See [documentation](https://docs.databricks.com/integrations/bi/jdbc-odbc-bi.html#get-server-hostname-port-http-path-and-jdbc-url). | +| | Personal Access Token | string | Required. Example: `dapi0123456789abcdefghij0123456789AB`. See [documentation](https://docs.databricks.com/sql/user/security/personal-access-tokens.html). | +| General | Database schema | string | Optional. Default to "public". Each data stream will be written to a table under this database schema. | +| | Purge Staging Data | boolean | The connector creates staging files and tables on S3. By default, they will be purged when the data sync is complete. Set it to `false` for debugging purposes. | +| Data Source - S3 | Bucket Name | string | Name of the bucket to sync data into. | +| | Bucket Path | string | Subdirectory under the above bucket to sync the data into. | +| | Region | string | See [documentation](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions) for all region codes. | +| | Access Key ID | string | AWS/Minio credential. | +| | Secret Access Key | string | AWS/Minio credential. | +| | S3 Filename pattern | string | The pattern allows you to set the file-name format for the S3 staging file(s), next placeholders combinations are currently supported: {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. | ⚠️ Please note that under "Full Refresh Sync" mode, data in the configured bucket and path will be wiped out before each sync. We recommend you provision a dedicated S3 resource for this sync to prevent unexpected data deletion from misconfiguration. ⚠️ diff --git a/docs/integrations/destinations/redshift.md b/docs/integrations/destinations/redshift.md index efb88e85e64a..4f5bc232f32b 100644 --- a/docs/integrations/destinations/redshift.md +++ b/docs/integrations/destinations/redshift.md @@ -36,6 +36,8 @@ For COPY strategy: * Corresponding key to the above key id. * **Part Size** * Affects the size limit of an individual Redshift table. Optional. Increase this if syncing tables larger than 100GB. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default table limit of 100GB. Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care. +* **S3 Filename pattern** + * The pattern allows you to set the file-name format for the S3 staging file(s), next placeholders combinations are currently supported: {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. Optional parameters: * **Bucket Path** diff --git a/docs/integrations/destinations/s3.md b/docs/integrations/destinations/s3.md index d261c7e3c061..c3aa041e76b9 100644 --- a/docs/integrations/destinations/s3.md +++ b/docs/integrations/destinations/s3.md @@ -44,6 +44,8 @@ Prepare S3 bucket that will be used as destination, see [this](https://docs.aws. * Additional string format on how to store data under S3 Bucket Path. Default value is `${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_`. * **S3 Endpoint** * Leave empty if using AWS S3, fill in S3 URL if using Minio S3. + * **S3 Filename pattern** + * The pattern allows you to set the file-name format for the S3 staging file(s), next placeholders combinations are currently supported: {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. 5. Click `Set up destination`. **For Airbyte OSS:** @@ -74,6 +76,9 @@ Prepare S3 bucket that will be used as destination, see [this](https://docs.aws. * Additional string format on how to store data under S3 Bucket Path. Default value is `${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_`. * **S3 Endpoint** * Leave empty if using AWS S3, fill in S3 URL if using Minio S3. + * **S3 Filename pattern** + * The pattern allows you to set the file-name format for the S3 staging file(s), next placeholders combinations are currently supported: {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. + 5. Click `Set up destination`. In order for everything to work correctly, it is also necessary that the user whose "S3 Key Id" and "S3 Access Key" are used have access to both the bucket and its contents. Policies to use: diff --git a/docs/integrations/destinations/snowflake.md b/docs/integrations/destinations/snowflake.md index 1391ab480bcb..fca173f780f7 100644 --- a/docs/integrations/destinations/snowflake.md +++ b/docs/integrations/destinations/snowflake.md @@ -185,15 +185,16 @@ Field | Description | To use AWS S3 as the cloud storage, enter the information for the S3 bucket you created in Step 2: -| Field | Description | -|---|---| -| S3 Bucket Name | The name of the staging S3 bucket (Example: `airbyte.staging`). Airbyte will write files to this bucket and read them via statements on Snowflake. | -| S3 Bucket Region | The S3 staging bucket region used. | -| S3 Key Id * | The Access Key ID granting access to the S3 staging bucket. Airbyte requires Read and Write permissions for the bucket. | -| S3 Access Key * | The corresponding secret to the S3 Key ID. | -| Stream Part Size (Optional) | Increase this if syncing tables larger than 100GB. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default limit of 100GB tables.
Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care. (e.g. 5) | -| Purge Staging Files and Tables | Determines whether to delete the staging files from S3 after completing the sync. Specifically, the connector will create CSV files named `bucketPath/namespace/streamName/syncDate_epochMillis_randomUuid.csv` containing three columns (`ab_id`, `data`, `emitted_at`). Normally these files are deleted after sync; if you want to keep them for other purposes, set `purge_staging_data` to false. | -| Encryption | Whether files on S3 are encrypted. You probably don't need to enable this, but it can provide an additional layer of security if you are sharing your data storage with other applications. If you do use encryption, you must choose between ephemeral keys (Airbyte will automatically generate a new key for each sync, and nobody but Airbyte and Snowflake will be able to read the data on S3) or providing your own key (if you have the "Purge staging files and tables" option disabled, and you want to be able to decrypt the data yourself) | +| Field | Description | +|--------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| S3 Bucket Name | The name of the staging S3 bucket (Example: `airbyte.staging`). Airbyte will write files to this bucket and read them via statements on Snowflake. | +| S3 Bucket Region | The S3 staging bucket region used. | +| S3 Key Id * | The Access Key ID granting access to the S3 staging bucket. Airbyte requires Read and Write permissions for the bucket. | +| S3 Access Key * | The corresponding secret to the S3 Key ID. | +| Stream Part Size (Optional) | Increase this if syncing tables larger than 100GB. Files are streamed to S3 in parts. This determines the size of each part, in MBs. As S3 has a limit of 10,000 parts per file, part size affects the table size. This is 10MB by default, resulting in a default limit of 100GB tables.
Note, a larger part size will result in larger memory requirements. A rule of thumb is to multiply the part size by 10 to get the memory requirement. Modify this with care. (e.g. 5) | +| Purge Staging Files and Tables | Determines whether to delete the staging files from S3 after completing the sync. Specifically, the connector will create CSV files named `bucketPath/namespace/streamName/syncDate_epochMillis_randomUuid.csv` containing three columns (`ab_id`, `data`, `emitted_at`). Normally these files are deleted after sync; if you want to keep them for other purposes, set `purge_staging_data` to false. | +| Encryption | Whether files on S3 are encrypted. You probably don't need to enable this, but it can provide an additional layer of security if you are sharing your data storage with other applications. If you do use encryption, you must choose between ephemeral keys (Airbyte will automatically generate a new key for each sync, and nobody but Airbyte and Snowflake will be able to read the data on S3) or providing your own key (if you have the "Purge staging files and tables" option disabled, and you want to be able to decrypt the data yourself) | +| S3 Filename pattern (Optional) | The pattern allows you to set the file-name format for the S3 staging file(s), next placeholders combinations are currently supported: {date}, {date:yyyy_MM}, {timestamp}, {timestamp:millis}, {timestamp:micros}, {part_number}, {sync_id}, {format_extension}. Please, don't use empty space and not supportable placeholders, as they won't recognized. | To use a Google Cloud Storage bucket, enter the information for the bucket you created in Step 2: From 7882ead06ff634c0f8a1628dea929309245d4b02 Mon Sep 17 00:00:00 2001 From: Octavia Squidington III Date: Fri, 15 Jul 2022 11:34:07 +0000 Subject: [PATCH 12/15] auto-bump connector version --- .../resources/seed/destination_definitions.yaml | 2 +- .../src/main/resources/seed/destination_specs.yaml | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 84679a9dd3bc..78f3f2909a31 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -231,7 +231,7 @@ - name: Redshift destinationDefinitionId: f7a7d195-377f-cf5b-70a5-be6b819019dc dockerRepository: airbyte/destination-redshift - dockerImageTag: 0.3.46 + dockerImageTag: 0.3.47 documentationUrl: https://docs.airbyte.io/integrations/destinations/redshift icon: redshift.svg resourceRequirements: diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 4b3aba395eaa..db13df0d0c46 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -3708,7 +3708,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-redshift:0.3.46" +- dockerImage: "airbyte/destination-redshift:0.3.47" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/redshift" connectionSpecification: @@ -3833,6 +3833,18 @@ - "eu-west-3" - "sa-east-1" - "me-south-1" + file_name_pattern: + type: "string" + description: "The pattern allows you to set the file-name format for\ + \ the S3 staging file(s)" + title: "S3 Filename pattern (Optional)" + examples: + - "{date}" + - "{date:yyyy_MM}" + - "{timestamp}" + - "{part_number}" + - "{sync_id}" + order: 8 access_key_id: type: "string" description: "This ID grants access to the above S3 staging bucket.\ From 8d45f9f00858fbb10eef21916c09387208ad4f37 Mon Sep 17 00:00:00 2001 From: Octavia Squidington III Date: Fri, 15 Jul 2022 12:32:04 +0000 Subject: [PATCH 13/15] auto-bump connector version --- .../resources/seed/destination_definitions.yaml | 2 +- .../src/main/resources/seed/destination_specs.yaml | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 78f3f2909a31..6f572a5ab039 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -270,7 +270,7 @@ - name: Snowflake destinationDefinitionId: 424892c4-daac-4491-b35d-c6688ba547ba dockerRepository: airbyte/destination-snowflake - dockerImageTag: 0.4.32 + dockerImageTag: 0.4.33 documentationUrl: https://docs.airbyte.io/integrations/destinations/snowflake icon: snowflake.svg resourceRequirements: diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index db13df0d0c46..491037468961 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -4376,7 +4376,7 @@ supported_destination_sync_modes: - "overwrite" - "append" -- dockerImage: "airbyte/destination-snowflake:0.4.32" +- dockerImage: "airbyte/destination-snowflake:0.4.33" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/snowflake" connectionSpecification: @@ -4647,6 +4647,18 @@ \ or 256 bits. Leave blank to have Airbyte generate an ephemeral\ \ key for each sync." airbyte_secret: true + file_name_pattern: + type: "string" + description: "The pattern allows you to set the file-name format for\ + \ the S3 staging file(s)" + title: "S3 Filename pattern (Optional)" + examples: + - "{date}" + - "{date:yyyy_MM}" + - "{timestamp}" + - "{part_number}" + - "{sync_id}" + order: 7 - title: "Google Cloud Storage Staging" description: "Recommended for large production workloads for better speed\ \ and scalability." From 5d202aa92d174fab2eb9dcf01bfbecb71328d5fe Mon Sep 17 00:00:00 2001 From: Octavia Squidington III Date: Fri, 15 Jul 2022 12:43:33 +0000 Subject: [PATCH 14/15] auto-bump connector version --- .../resources/seed/destination_definitions.yaml | 2 +- .../src/main/resources/seed/destination_specs.yaml | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index 6f572a5ab039..e709c19642d4 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -250,7 +250,7 @@ - name: S3 destinationDefinitionId: 4816b78f-1489-44c1-9060-4b19d5fa9362 dockerRepository: airbyte/destination-s3 - dockerImageTag: 0.3.10 + dockerImageTag: 0.3.11 documentationUrl: https://docs.airbyte.io/integrations/destinations/s3 icon: s3.svg resourceRequirements: diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 491037468961..7461c2db106d 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -3958,7 +3958,7 @@ supported_destination_sync_modes: - "append" - "overwrite" -- dockerImage: "airbyte/destination-s3:0.3.10" +- dockerImage: "airbyte/destination-s3:0.3.11" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/s3" connectionSpecification: @@ -4317,6 +4317,18 @@ examples: - "${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_" order: 7 + file_name_pattern: + type: "string" + description: "The pattern allows you to set the file-name format for the\ + \ S3 staging file(s)" + title: "S3 Filename pattern (Optional)" + examples: + - "{date}" + - "{date:yyyy_MM}" + - "{timestamp}" + - "{part_number}" + - "{sync_id}" + order: 8 supportsIncremental: true supportsNormalization: false supportsDBT: false From 1e60667e4f525d7f7dc0ef38753f17aca30c1c7e Mon Sep 17 00:00:00 2001 From: Octavia Squidington III Date: Fri, 15 Jul 2022 12:54:39 +0000 Subject: [PATCH 15/15] auto-bump connector version --- .../resources/seed/destination_definitions.yaml | 2 +- .../src/main/resources/seed/destination_specs.yaml | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml index e709c19642d4..7f1f30b13b9e 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_definitions.yaml @@ -73,7 +73,7 @@ - name: Databricks Lakehouse destinationDefinitionId: 072d5540-f236-4294-ba7c-ade8fd918496 dockerRepository: airbyte/destination-databricks - dockerImageTag: 0.2.4 + dockerImageTag: 0.2.5 documentationUrl: https://docs.airbyte.io/integrations/destinations/databricks icon: databricks.svg releaseStage: alpha diff --git a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml index 7461c2db106d..9e72ea233016 100644 --- a/airbyte-config/init/src/main/resources/seed/destination_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/destination_specs.yaml @@ -966,7 +966,7 @@ - "overwrite" - "append" - "append_dedup" -- dockerImage: "airbyte/destination-databricks:0.2.4" +- dockerImage: "airbyte/destination-databricks:0.2.5" spec: documentationUrl: "https://docs.airbyte.io/integrations/destinations/databricks" connectionSpecification: @@ -1116,6 +1116,18 @@ - "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY" airbyte_secret: true order: 6 + file_name_pattern: + type: "string" + description: "The pattern allows you to set the file-name format for\ + \ the S3 staging file(s)" + title: "S3 Filename pattern (Optional)" + examples: + - "{date}" + - "{date:yyyy_MM}" + - "{timestamp}" + - "{part_number}" + - "{sync_id}" + order: 7 order: 7 purge_staging_data: title: "Purge Staging Files and Tables"