Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

S3 destination: updating specs regarding certification #11917

Merged
merged 11 commits into from
May 3, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -16,40 +16,41 @@
],
"additionalProperties": false,
"properties": {
"s3_endpoint": {
"title": "Endpoint",
"access_key_id": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For all of these fields, where are the relevant docs linked so a user can find more information as part of this checklist item?

Copy link
Contributor Author

@sashaNeshcheret sashaNeshcheret Apr 29, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added links for required and optional first level fields.

"type": "string",
"default": "",
"description": "This is your S3 endpoint url.(if you are working with AWS S3, just leave empty).",
"examples": ["http://localhost:9000"],
"description": "The access key ID to access the S3 bucket. Airbyte requires Read and Write permissions to the given bucket. See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys) on how to generate an access key.",
"title": "S3 Key ID *",
"airbyte_secret": true,
"examples": ["A012345678910EXAMPLE"],
"order": 0
},
"s3_bucket_name": {
"title": "S3 Bucket Name",
"secret_access_key": {
"type": "string",
"description": "The name of the S3 bucket.",
"examples": ["airbyte_sync"],
"description": "The corresponding secret to the access key ID. See [this](https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys)",
"title": "S3 Access Key *",
"airbyte_secret": true,
"examples": ["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"],
"order": 1
},
"s3_bucket_path": {
"description": "Directory under the S3 bucket where data will be written.",
"s3_bucket_name": {
"title": "S3 Bucket Name *",
"type": "string",
"examples": ["data_sync/test"],
"description": "The name of the S3 bucket. See [this](https://docs.aws.amazon.com/AmazonS3/latest/userguide/create-bucket-overview.html) to create an S3 bucket. to create an S3 bucket",
"examples": ["airbyte_sync"],
"order": 2
},
"s3_path_format": {
"description": "Format string on how data will be organized inside the S3 bucket directory",
"s3_bucket_path": {
"title": "S3 Bucket Path *",
"description": "Directory under the S3 bucket where data will be written. See [this](https://docs.airbyte.com/integrations/destinations/s3#:~:text=to%20format%20the-,bucket%20path,-%3A)",
"type": "string",
"examples": [
"${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_"
],
"examples": ["data_sync/test"],
"order": 3
},
"s3_bucket_region": {
"title": "S3 Bucket Region",
"title": "S3 Bucket Region *",
"type": "string",
"default": "",
"description": "The region of the S3 bucket.",
"description": "The region of the S3 bucket. See [here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions) for all region codes.",
"enum": [
"",
"us-east-1",
Expand Down Expand Up @@ -80,38 +81,24 @@
],
"order": 4
},
"access_key_id": {
"type": "string",
"description": "The access key id to access the S3 bucket. Airbyte requires Read and Write permissions to the given bucket, if not set, Airbyte will rely on Instance Profile.",
"title": "S3 Key Id",
"airbyte_secret": true,
"examples": ["A012345678910EXAMPLE"],
"order": 5
},
"secret_access_key": {
"type": "string",
"description": "The corresponding secret to the access key id, if S3 Key Id is set, then S3 Access Key must also be provided",
"title": "S3 Access Key",
"airbyte_secret": true,
"examples": ["a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"],
"order": 6
},
"format": {
"title": "Output Format",
"title": "Output Format *",
"type": "object",
"description": "Output data format",
"description": "Output data format See [here](https://docs.airbyte.com/integrations/destinations/s3/#output-schema) for more details",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"description": "Output data format See [here](https://docs.airbyte.com/integrations/destinations/s3/#output-schema) for more details",
"description": "Format of the data output. See [here](https://docs.airbyte.com/integrations/destinations/s3/#output-schema) for more details",

"oneOf": [
{
"title": "Avro: Apache Avro",
"required": ["format_type", "compression_codec"],
"properties": {
"format_type": {
"title": "Format Type *",
"type": "string",
"enum": ["Avro"],
"default": "Avro"
"default": "Avro",
"order": 0
},
"compression_codec": {
"title": "Compression Codec",
"title": "Compression Codec *",
"description": "The compression algorithm used to compress data. Default to no compression.",
"type": "object",
"oneOf": [
Expand Down Expand Up @@ -211,14 +198,16 @@
}
}
}
]
],
"order": 1
},
"part_size_mb": {
"title": "Block Size (MB) for Amazon S3 multipart upload",
"title": "Block Size (MB) for Amazon S3 multipart upload (Optional)",
"description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes9 more memory. Allowed values: min=5MB, max=525MB Default: 5MB.",
"type": "integer",
"default": 5,
"examples": [5]
"examples": [5],
"order": 2
}
}
},
Expand All @@ -227,6 +216,7 @@
"required": ["format_type", "flattening"],
"properties": {
"format_type": {
"title": "Format Type *",
"type": "string",
"enum": ["CSV"],
"default": "CSV"
Expand All @@ -239,7 +229,7 @@
"enum": ["No flattening", "Root level flattening"]
},
"part_size_mb": {
"title": "Block Size (MB) for Amazon S3 multipart upload",
"title": "Block Size (MB) for Amazon S3 multipart upload (Optional)",
"description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes9 more memory. Allowed values: min=5MB, max=525MB Default: 5MB.",
"type": "integer",
"default": 5,
Expand Down Expand Up @@ -281,12 +271,13 @@
"required": ["format_type"],
"properties": {
"format_type": {
"title": "Format Type *",
"type": "string",
"enum": ["JSONL"],
"default": "JSONL"
},
"part_size_mb": {
"title": "Block Size (MB) for Amazon S3 multipart upload",
"title": "Block Size (MB) for Amazon S3 multipart upload (Optional)",
"description": "This is the size of a \"Part\" being buffered in memory. It limits the memory usage when writing. Larger values will allow to upload a bigger files and improve the speed, but consumes9 more memory. Allowed values: min=5MB, max=525MB Default: 5MB.",
"type": "integer",
"default": 5,
Expand Down Expand Up @@ -328,12 +319,13 @@
"required": ["format_type"],
"properties": {
"format_type": {
"title": "Format Type *",
"type": "string",
"enum": ["Parquet"],
"default": "Parquet"
},
"compression_codec": {
"title": "Compression Codec",
"title": "Compression Codec (Optional)",
"description": "The compression algorithm used to compress data pages.",
"type": "string",
"enum": [
Expand All @@ -348,42 +340,59 @@
"default": "UNCOMPRESSED"
},
"block_size_mb": {
"title": "Block Size (Row Group Size) (MB)",
"title": "Block Size (Row Group Size) (MB) (Optional)",
"description": "This is the size of a row group being buffered in memory. It limits the memory usage when writing. Larger values will improve the IO when reading, but consume more memory when writing. Default: 128 MB.",
"type": "integer",
"default": 128,
"examples": [128]
},
"max_padding_size_mb": {
"title": "Max Padding Size (MB)",
"title": "Max Padding Size (MB) (Optional)",
"description": "Maximum size allowed as padding to align row groups. This is also the minimum size of a row group. Default: 8 MB.",
"type": "integer",
"default": 8,
"examples": [8]
},
"page_size_kb": {
"title": "Page Size (KB)",
"title": "Page Size (KB) (Optional)",
"description": "The page size is for compression. A block is composed of pages. A page is the smallest unit that must be read fully to access a single record. If this value is too small, the compression will deteriorate. Default: 1024 KB.",
"type": "integer",
"default": 1024,
"examples": [1024]
},
"dictionary_page_size_kb": {
"title": "Dictionary Page Size (KB)",
"title": "Dictionary Page Size (KB) (Optional)",
"description": "There is one dictionary page per column per row group when dictionary encoding is used. The dictionary page size works like the page size but for dictionary. Default: 1024 KB.",
"type": "integer",
"default": 1024,
"examples": [1024]
},
"dictionary_encoding": {
"title": "Dictionary Encoding",
"title": "Dictionary Encoding (Optional)",
"description": "Default: true.",
"type": "boolean",
"default": true
}
}
}
],
"order": 5
},
"s3_endpoint": {
"title": "Endpoint (Optional)",
"type": "string",
"default": "",
"description": "This is your S3 endpoint url. (If you are working with AWS S3, you can leave blank). See [this](https://docs.aws.amazon.com/general/latest/gr/s3.html#:~:text=Service%20endpoints-,Amazon%20S3%20endpoints,-When%20you%20use)",
"examples": ["http://localhost:9000"],
"order": 6
},
"s3_path_format": {
"title": "S3 Path Format (Optional)",
"description": "Format string on how data will be organized inside the S3 bucket directory. See [this](https://docs.airbyte.com/integrations/destinations/s3#:~:text=The%20full%20path%20of%20the%20output%20data%20with%20the%20default%20S3%20path%20format)",
"type": "string",
"examples": [
"${NAMESPACE}/${STREAM_NAME}/${YEAR}_${MONTH}_${DAY}_${EPOCH}_"
],
"order": 7
}
}
Expand Down