From f1df9ab2ba29051016f052ffe9a629ca698289b8 Mon Sep 17 00:00:00 2001 From: Rizxcviii Date: Mon, 31 Jul 2023 21:35:32 +0100 Subject: [PATCH] feat(glue): glue tables can include storage parameters (#24498) Includes a `storageParameters` property, allowing developers to access the `tableInput.storageDescriptor.parameters` property within the `CfnTable` resource. Closes #23132. ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license* --- .../cdk.out | 2 +- packages/@aws-cdk/aws-glue-alpha/README.md | 20 +- packages/@aws-cdk/aws-glue-alpha/lib/index.ts | 5 +- .../aws-glue-alpha/lib/storage-parameter.ts | 423 ++++++++++++++++++ packages/@aws-cdk/aws-glue-alpha/lib/table.ts | 52 ++- packages/@aws-cdk/aws-glue-alpha/package.json | 4 +- .../aws-cdk-glue.assets.json | 6 +- .../aws-cdk-glue.template.json | 97 +++- ...efaultTestDeployAssert8BFB5B70.assets.json | 19 + ...aultTestDeployAssert8BFB5B70.template.json | 36 ++ .../test/integ.table.js.snapshot/cdk.out | 2 +- .../test/integ.table.js.snapshot/integ.json | 12 +- .../integ.table.js.snapshot/manifest.json | 68 ++- .../test/integ.table.js.snapshot/tree.json | 245 +++++++--- .../aws-glue-alpha/test/integ.table.ts | 31 +- .../aws-glue-alpha/test/table.test.ts | 60 ++- 16 files changed, 964 insertions(+), 118 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue-alpha/lib/storage-parameter.ts create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json create mode 100644 packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json diff --git a/packages/@aws-cdk/aws-batch-alpha/test/integ.batch-unique-name.js.snapshot/cdk.out b/packages/@aws-cdk/aws-batch-alpha/test/integ.batch-unique-name.js.snapshot/cdk.out index f0b901e7c06e5..7df7694e7a5a5 100644 --- a/packages/@aws-cdk/aws-batch-alpha/test/integ.batch-unique-name.js.snapshot/cdk.out +++ b/packages/@aws-cdk/aws-batch-alpha/test/integ.batch-unique-name.js.snapshot/cdk.out @@ -1 +1 @@ -{"version":"32.0.0"} \ No newline at end of file +{"version":"32.0.0"} diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 8044d470c0b4d..64ac86f8c6b9b 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -223,7 +223,25 @@ new glue.Table(this, 'MyTable', { }); ``` -By default, an S3 bucket will be created to store the table's data and stored in the bucket root. You can also manually pass the `bucket` and `s3Prefix`: +Glue tables can be configured to contain user-defined properties, to describe the physical storage of table data, through the `storageParameters` property: + +```ts +declare const myDatabase: glue.Database; +new glue.Table(this, 'MyTable', { + storageParameters: [ + glue.StorageParameter.skipHeaderLineCount(1), + glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + glue.StorageParameter.custom('separatorChar', ',') + ], + // ... + database: myDatabase, + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, +}); +``` ### Partition Keys diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts index 8a416993bb5ba..c6a611242c925 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/index.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/index.ts @@ -1,12 +1,13 @@ // AWS::Glue CloudFormation Resources: +export * from './code'; export * from './connection'; export * from './data-format'; export * from './data-quality-ruleset'; export * from './database'; export * from './job'; export * from './job-executable'; -export * from './code'; export * from './schema'; export * from './security-configuration'; -export * from './table'; \ No newline at end of file +export * from './storage-parameter'; +export * from './table'; diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/storage-parameter.ts b/packages/@aws-cdk/aws-glue-alpha/lib/storage-parameter.ts new file mode 100644 index 0000000000000..50b1ad934d712 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/lib/storage-parameter.ts @@ -0,0 +1,423 @@ +/** + * The compression type. + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ > _"compression_type"_ + */ +export enum CompressionType { + /** + * No compression. + */ + NONE = 'none', + + /** + * Burrows-Wheeler compression. + */ + BZIP2 = 'bzip2', + + /** + * Deflate compression. + */ + GZIP = 'gzip', + + /** + * Compression algorithm focused on high compression and decompression speeds, rather than the maximum possible compression. + */ + SNAPPY = 'snappy', +} + +/** + * Specifies the action to perform when query results contain invalid UTF-8 character values. + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ > _"invalid_char_handling"_ + */ +export enum InvalidCharHandlingAction { + /** + * Doesn't perform invalid character handling. + */ + DISABLED = 'DISABLED', + + /** + * Cancels queries that return data containing invalid UTF-8 values. + */ + FAIL = 'FAIL', + + /** + * Replaces invalid UTF-8 values with null. + */ + SET_TO_NULL = 'SET_TO_NULL', + + /** + * Replaces each value in the row with null. + */ + DROP_ROW = 'DROP_ROW', + + /** + * Replaces the invalid character with the replacement character you specify using `REPLACEMENT_CHAR`. + */ + REPLACE = 'REPLACE', +} + +/** + * Specifies the action to perform when ORC data contains an integer (for example, BIGINT or int64) that is larger than the column definition (for example, SMALLINT or int16). + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ > _"numeric_overflow_handling"_ + */ +export enum NumericOverflowHandlingAction { + /** + * Invalid character handling is turned off. + */ + DISABLED = 'DISABLED', + + /** + * Cancel the query when the data includes invalid characters. + */ + FAIL = 'FAIL', + + /** + * Set invalid characters to null. + */ + SET_TO_NULL = 'SET_TO_NULL', + + /** + * Set each value in the row to null. + */ + DROP_ROW = 'DROP_ROW', +} + +/** + * Specifies how to handle data being loaded that exceeds the length of the data type defined for columns containing VARBYTE data. By default, Redshift Spectrum sets the value to null for data that exceeds the width of the column. + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ > _"surplus_bytes_handling"_ + */ +export enum SurplusBytesHandlingAction { + /** + * Replaces data that exceeds the column width with null. + */ + SET_TO_NULL = 'SET_TO_NULL', + + /** + * Doesn't perform surplus byte handling. + */ + DISABLED = 'DISABLED', + + /** + * Cancels queries that return data exceeding the column width. + */ + FAIL = 'FAIL', + + /** + * Drop all rows that contain data exceeding column width. + */ + DROP_ROW = 'DROP_ROW', + + /** + * Removes the characters that exceed the maximum number of characters defined for the column. + */ + TRUNCATE = 'TRUNCATE', +} + +/** + * Specifies how to handle data being loaded that exceeds the length of the data type defined for columns containing VARCHAR, CHAR, or string data. By default, Redshift Spectrum sets the value to null for data that exceeds the width of the column. + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ > _"surplus_char_handling"_ + */ +export enum SurplusCharHandlingAction { + /** + * Replaces data that exceeds the column width with null. + */ + SET_TO_NULL = 'SET_TO_NULL', + + /** + * Doesn't perform surplus character handling. + */ + DISABLED = 'DISABLED', + + /** + * Cancels queries that return data exceeding the column width. + */ + FAIL = 'FAIL', + + /** + * Replaces each value in the row with null. + */ + DROP_ROW = 'DROP_ROW', + + /** + * Removes the characters that exceed the maximum number of characters defined for the column. + */ + TRUNCATE = 'TRUNCATE', +} + +/** + * Identifies if the file contains less or more values for a row than the number of columns specified in the external table definition. This property is only available for an uncompressed text file format. + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ > _"column_count_mismatch_handling"_ + */ +export enum ColumnCountMismatchHandlingAction { + /** + * Column count mismatch handling is turned off. + */ + DISABLED = 'DISABLED', + + /** + * Fail the query if the column count mismatch is detected. + */ + FAIL = 'FAIL', + + /** + * Fill missing values with NULL and ignore the additional values in each row. + */ + SET_TO_NULL = 'SET_TO_NULL', + + /** + * Drop all rows that contain column count mismatch error from the scan. + */ + DROP_ROW = 'DROP_ROW', +} + +/** + * Specifies how to handle data being loaded that exceeds the length of the data type defined for columns containing VARCHAR, CHAR, or string data. By default, Redshift Spectrum sets the value to null for data that exceeds the width of the column. + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ > _"surplus_char_handling"_ + */ +export enum WriteParallel { + /** + * Write data in parallel. + */ + ON = 'on', + + /** + * Write data serially. + */ + OFF = 'off', +} + +/** + * Specifies how to map columns when the table uses ORC data format. + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ > _"orc.schema.resolution"_ + */ +export enum OrcColumnMappingType { + /** + * Map columns by name. + */ + NAME = 'name', + + /** + * Map columns by position. + */ + POSITION = 'position', +} + +/** + * The storage parameter keys that are currently known, this list is not exhaustive and other keys may be used. + */ +export enum StorageParameters { + /** + * The number of rows to skip at the top of a CSV file when the table is being created. + */ + SKIP_HEADER_LINE_COUNT = 'skip.header.line.count', + + /** + * Determines whether data handling is on for the table. + */ + DATA_CLEANSING_ENABLED = 'data_cleansing_enabled', + + /** + * The type of compression used on the table, when the file name does not contain an extension. This value overrides the compression type specified through the extension. + */ + COMPRESSION_TYPE = 'compression_type', + + /** + * Specifies the action to perform when query results contain invalid UTF-8 character values. + */ + INVALID_CHAR_HANDLING = 'invalid_char_handling', + + /** + * Specifies the replacement character to use when you set `INVALID_CHAR_HANDLING` to `REPLACE`. + */ + REPLACEMENT_CHAR = 'replacement_char', + + /** + * Specifies the action to perform when ORC data contains an integer (for example, BIGINT or int64) that is larger than the column definition (for example, SMALLINT or int16). + */ + NUMERIC_OVERFLOW_HANDLING = 'numeric_overflow_handling', + + /** + * Specifies how to handle data being loaded that exceeds the length of the data type defined for columns containing VARBYTE data. By default, Redshift Spectrum sets the value to null for data that exceeds the width of the column. + */ + SURPLUS_BYTES_HANDLING = 'surplus_bytes_handling', + + /** + * Specifies how to handle data being loaded that exceeds the length of the data type defined for columns containing VARCHAR, CHAR, or string data. By default, Redshift Spectrum sets the value to null for data that exceeds the width of the column. + */ + SURPLUS_CHAR_HANDLING = 'surplus_char_handling', + + /** + * Identifies if the file contains less or more values for a row than the number of columns specified in the external table definition. This property is only available for an uncompressed text file format. + */ + COLUMN_COUNT_MISMATCH_HANDLING = 'column_count_mismatch_handling', + + /** + * A property that sets the numRows value for the table definition. To explicitly update an external table's statistics, set the numRows property to indicate the size of the table. Amazon Redshift doesn't analyze external tables to generate the table statistics that the query optimizer uses to generate a query plan. If table statistics aren't set for an external table, Amazon Redshift generates a query execution plan based on an assumption that external tables are the larger tables and local tables are the smaller tables. + */ + NUM_ROWS = 'num_rows', + + /** + * A property that sets number of rows to skip at the beginning of each source file. + */ + SERIALIZATION_NULL_FORMAT = 'serialization.null.format', + + /** + * A property that sets the column mapping type for tables that use ORC data format. This property is ignored for other data formats. + */ + ORC_SCHEMA_RESOLUTION = 'orc.schema.resolution', + + /** + * A property that sets whether CREATE EXTERNAL TABLE AS should write data in parallel. When 'write.parallel' is set to off, CREATE EXTERNAL TABLE AS writes to one or more data files serially onto Amazon S3. This table property also applies to any subsequent INSERT statement into the same external table. + */ + WRITE_PARALLEL = 'write.parallel', + + /** + * A property that sets the maximum size (in MB) of each file written to Amazon S3 by CREATE EXTERNAL TABLE AS. The size must be a valid integer between 5 and 6200. The default maximum file size is 6,200 MB. This table property also applies to any subsequent INSERT statement into the same external table. + */ + WRITE_MAX_FILESIZE_MB = 'write.maxfilesize.mb', + + /** + * You can specify an AWS Key Management Service key to enable Server–Side Encryption (SSE) for Amazon S3 objects. + */ + WRITE_KMS_KEY_ID = 'write.kms.key.id', +} + +/** + * A storage parameter. The list of storage parameters available is not exhaustive and other keys may be used. + * + * If you would like to specify a storage parameter that is not available as a static member of this class, use the `StorageParameter.custom` method. + * + * The list of storage parameters currently known within the CDK is listed. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/table-properties-crawler.html + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ + */ +export class StorageParameter { + /** + * The number of rows to skip at the top of a CSV file when the table is being created. + */ + public static skipHeaderLineCount(value: number): StorageParameter { + return new StorageParameter('skip.header.line.count', value.toString()); + } + + /** + * Determines whether data handling is on for the table. + */ + public static dataCleansingEnabled(value: boolean): StorageParameter { + return new StorageParameter('data_cleansing_enabled', value.toString()); + } + + /** + * The type of compression used on the table, when the file name does not contain an extension. This value overrides the compression type specified through the extension. + */ + public static compressionType(value: CompressionType): StorageParameter { + return new StorageParameter('compression_type', value); + } + + /** + * Specifies the action to perform when query results contain invalid UTF-8 character values. + */ + public static invalidCharHandling(value: InvalidCharHandlingAction): StorageParameter { + return new StorageParameter('invalid_char_handling', value); + } + + /** + * Specifies the replacement character to use when you set `INVALID_CHAR_HANDLING` to `REPLACE`. + */ + public static replacementChar(value: string): StorageParameter { + return new StorageParameter('replacement_char', value); + } + + /** + * Specifies the action to perform when ORC data contains an integer (for example, BIGINT or int64) that is larger than the column definition (for example, SMALLINT or int16). + */ + public static numericOverflowHandling(value: NumericOverflowHandlingAction): StorageParameter { + return new StorageParameter('numeric_overflow_handling', value); + } + + /** + * Specifies how to handle data being loaded that exceeds the length of the data type defined for columns containing VARBYTE data. By default, Redshift Spectrum sets the value to null for data that exceeds the width of the column. + */ + public static surplusBytesHandling(value: SurplusBytesHandlingAction): StorageParameter { + return new StorageParameter('surplus_bytes_handling', value); + } + + /** + * Specifies how to handle data being loaded that exceeds the length of the data type defined for columns containing VARCHAR, CHAR, or string data. By default, Redshift Spectrum sets the value to null for data that exceeds the width of the column. + */ + public static surplusCharHandling(value: SurplusCharHandlingAction): StorageParameter { + return new StorageParameter('surplus_char_handling', value); + } + + /** + * Identifies if the file contains less or more values for a row than the number of columns specified in the external table definition. This property is only available for an uncompressed text file format. + */ + public static columnCountMismatchHandling(value: ColumnCountMismatchHandlingAction): StorageParameter { + return new StorageParameter('column_count_mismatch_handling', value); + } + + /** + * A property that sets the numRows value for the table definition. To explicitly update an external table's statistics, set the numRows property to indicate the size of the table. Amazon Redshift doesn't analyze external tables to generate the table statistics that the query optimizer uses to generate a query plan. If table statistics aren't set for an external table, Amazon Redshift generates a query execution plan based on an assumption that external tables are the larger tables and local tables are the smaller tables. + */ + public static numRows(value: number): StorageParameter { + return new StorageParameter('num_rows', value.toString()); + } + + /** + * A property that sets number of rows to skip at the beginning of each source file. + */ + public static serializationNullFormat(value: string): StorageParameter { + return new StorageParameter('serialization.null.format', value); + } + + /** + * A property that sets the column mapping type for tables that use ORC data format. This property is ignored for other data formats. If this property is omitted, columns are mapped by `OrcColumnMappingType.NAME` by default. + * + * @default OrcColumnMappingType.NAME + */ + public static orcSchemaResolution(value: OrcColumnMappingType): StorageParameter { + return new StorageParameter('orc.schema.resolution', value); + } + + /** + * A property that sets whether CREATE EXTERNAL TABLE AS should write data in parallel. When 'write.parallel' is set to off, CREATE EXTERNAL TABLE AS writes to one or more data files serially onto Amazon S3. This table property also applies to any subsequent INSERT statement into the same external table. + * + * @default WriteParallel.ON + */ + public static writeParallel(value: WriteParallel): StorageParameter { + return new StorageParameter('write.parallel', value); + } + + /** + * A property that sets the maximum size (in MB) of each file written to Amazon S3 by CREATE EXTERNAL TABLE AS. The size must be a valid integer between 5 and 6200. The default maximum file size is 6,200 MB. This table property also applies to any subsequent INSERT statement into the same external table. + */ + public static writeMaxFileSizeMb(value: number): StorageParameter { + return new StorageParameter('write.maxfilesize.mb', value.toString()); + } + + /** + * You can specify an AWS Key Management Service key to enable Server–Side Encryption (SSE) for Amazon S3 objects. + */ + public static writeKmsKeyId(value: string): StorageParameter { + return new StorageParameter('write.kms.key.id', value); + } + + /** + * A custom storage parameter. + * @param key - The key of the storage parameter. + * @param value - The value of the storage parameter. + */ + public static custom(key: string, value: any): StorageParameter { + return new StorageParameter(key, value.toString()); + } + + protected constructor(public readonly key: string, public readonly value: string) {} +} diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/table.ts b/packages/@aws-cdk/aws-glue-alpha/lib/table.ts index ff36dce8a0f90..f62e6af2010a4 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/table.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/table.ts @@ -1,3 +1,4 @@ +import { CfnTable } from 'aws-cdk-lib/aws-glue'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as kms from 'aws-cdk-lib/aws-kms'; import * as s3 from 'aws-cdk-lib/aws-s3'; @@ -7,8 +8,8 @@ import { AwsCustomResource } from 'aws-cdk-lib/custom-resources'; import { Construct } from 'constructs'; import { DataFormat } from './data-format'; import { IDatabase } from './database'; -import { CfnTable } from 'aws-cdk-lib/aws-glue'; import { Column } from './schema'; +import { StorageParameter } from './storage-parameter'; /** * Properties of a Partition Index. @@ -181,6 +182,41 @@ export interface TableProps { * @default - The parameter is not defined */ readonly enablePartitionFiltering?: boolean; + + /** + * The user-supplied properties for the description of the physical storage of this table. These properties help describe the format of the data that is stored within the crawled data sources. + * + * The key/value pairs that are allowed to be submitted are not limited, however their functionality is not guaranteed. + * + * Some keys will be auto-populated by glue crawlers, however, you can override them by specifying the key and value in this property. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/table-properties-crawler.html + * + * @see https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_EXTERNAL_TABLE.html#r_CREATE_EXTERNAL_TABLE-parameters - under _"TABLE PROPERTIES"_ + * + * @example + * + * declare const glueDatabase: glue.IDatabase; + * const table = new glue.Table(this, 'Table', { + * storageParameters: [ + * glue.StorageParameter.skipHeaderLineCount(1), + * glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + * glue.StorageParameter.custom('foo', 'bar'), // Will have no effect + * glue.StorageParameter.custom('separatorChar', ','), // Will describe the separator char used in the data + * glue.StorageParameter.custom(glue.StorageParameters.WRITE_PARALLEL, 'off'), + * ], + * // ... + * database: glueDatabase, + * columns: [{ + * name: 'col1', + * type: glue.Schema.STRING, + * }], + * dataFormat: glue.DataFormat.CSV, + * }); + * + * @default - The parameter is not defined + */ + readonly storageParameters?: StorageParameter[]; } /** @@ -273,6 +309,11 @@ export class Table extends Resource implements ITable { */ public readonly partitionIndexes?: PartitionIndex[]; + /** + * The tables' storage descriptor properties. + */ + public readonly storageParameters?: StorageParameter[]; + /** * Partition indexes must be created one at a time. To avoid * race conditions, we store the resource and add dependencies @@ -295,6 +336,7 @@ export class Table extends Resource implements ITable { validateSchema(props.columns, props.partitionKeys); this.columns = props.columns; this.partitionKeys = props.partitionKeys; + this.storageParameters = props.storageParameters; this.compressed = props.compressed ?? false; const { bucket, encryption, encryptionKey } = createBucket(this, props); @@ -328,6 +370,14 @@ export class Table extends Resource implements ITable { serdeInfo: { serializationLibrary: props.dataFormat.serializationLibrary.className, }, + parameters: props.storageParameters ? props.storageParameters.reduce((acc, param) => { + if (param.key in acc) { + throw new Error(`Duplicate storage parameter key: ${param.key}`); + } + const key = param.key; + acc[key] = param.value; + return acc; + }, {} as { [key: string]: string }) : undefined, }, tableType: 'EXTERNAL_TABLE', diff --git a/packages/@aws-cdk/aws-glue-alpha/package.json b/packages/@aws-cdk/aws-glue-alpha/package.json index 0d31e5e201dcd..ef6e54a932b81 100644 --- a/packages/@aws-cdk/aws-glue-alpha/package.json +++ b/packages/@aws-cdk/aws-glue-alpha/package.json @@ -82,15 +82,15 @@ "license": "Apache-2.0", "devDependencies": { "@aws-cdk/cdk-build-tools": "0.0.0", + "@aws-cdk/cfn2ts": "0.0.0", "@aws-cdk/integ-runner": "0.0.0", - "@aws-cdk/pkglint": "0.0.0", "@aws-cdk/integ-tests-alpha": "0.0.0", + "@aws-cdk/pkglint": "0.0.0", "@types/jest": "^29.5.3", "aws-cdk-lib": "0.0.0", "constructs": "^10.0.0", "jest": "^29.6.2" }, - "dependencies": {}, "homepage": "https://github.com/aws/aws-cdk", "peerDependencies": { "aws-cdk-lib": "^0.0.0", diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json index e1b6a011823d6..d714c2d54aaba 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.assets.json @@ -1,7 +1,7 @@ { - "version": "31.0.0", + "version": "33.0.0", "files": { - "1b05206385b50de7e074070a25b271988c0055f2fde760f5c119af3fef3a1bcb": { + "dc6c1c5f05a8e365822e6d61c41b6fc6afd58d20a2784614b906ae1587c68754": { "source": { "path": "aws-cdk-glue.template.json", "packaging": "file" @@ -9,7 +9,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "1b05206385b50de7e074070a25b271988c0055f2fde760f5c119af3fef3a1bcb.json", + "objectKey": "dc6c1c5f05a8e365822e6d61c41b6fc6afd58d20a2784614b906ae1587c68754.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json index 45af51d9b9a02..f4415b64a1333 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/aws-cdk-glue.template.json @@ -2,8 +2,8 @@ "Resources": { "DataBucketE3889A50": { "Type": "AWS::S3::Bucket", - "UpdateReplacePolicy": "Retain", - "DeletionPolicy": "Retain" + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" }, "MyDatabase1E2517DB": { "Type": "AWS::Glue::Database", @@ -328,8 +328,8 @@ "Version": "2012-10-17" } }, - "UpdateReplacePolicy": "Retain", - "DeletionPolicy": "Retain" + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" }, "MyEncryptedTableBucket7B28486D": { "Type": "AWS::S3::Bucket", @@ -423,22 +423,6 @@ } } }, - "MyPartitionFilteredTableBucket6ACAA137": { - "Type": "AWS::S3::Bucket", - "Properties": { - "BucketEncryption": { - "ServerSideEncryptionConfiguration": [ - { - "ServerSideEncryptionByDefault": { - "SSEAlgorithm": "AES256" - } - } - ] - } - }, - "UpdateReplacePolicy": "Retain", - "DeletionPolicy": "Retain" - }, "MyPartitionFilteredTable324BA27A": { "Type": "AWS::Glue::Table", "Properties": { @@ -488,7 +472,7 @@ [ "s3://", { - "Ref": "MyPartitionFilteredTableBucket6ACAA137" + "Ref": "DataBucketE3889A50" }, "/" ] @@ -504,6 +488,77 @@ } } }, + "MyTableWithStorageDescriptorParametersTable1A347345": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "table_with_storage_descriptor_parameters generated by CDK", + "Name": "table_with_storage_descriptor_parameters", + "Parameters": { + "classification": "json", + "has_encrypted_data": true + }, + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Comment": "col2 comment", + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "array" + }, + { + "Name": "col4", + "Type": "map" + }, + { + "Name": "col5", + "Type": "struct" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "Parameters": { + "skip.header.line.count": "1", + "compression_type": "gzip", + "foo": "bar", + "separatorChar": ",", + "write.parallel": "off" + }, + "SerdeInfo": { + "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, "MyUserDC45028B": { "Type": "AWS::IAM::User" }, diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json new file mode 100644 index 0000000000000..670371bb340e7 --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json @@ -0,0 +1,19 @@ +{ + "version": "33.0.0", + "files": { + "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": { + "source": { + "path": "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json", + "packaging": "file" + }, + "destinations": { + "current_account-current_region": { + "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", + "objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" + } + } + } + }, + "dockerImages": {} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json new file mode 100644 index 0000000000000..ad9d0fb73d1dd --- /dev/null +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json @@ -0,0 +1,36 @@ +{ + "Parameters": { + "BootstrapVersion": { + "Type": "AWS::SSM::Parameter::Value", + "Default": "/cdk-bootstrap/hnb659fds/version", + "Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]" + } + }, + "Rules": { + "CheckBootstrapVersion": { + "Assertions": [ + { + "Assert": { + "Fn::Not": [ + { + "Fn::Contains": [ + [ + "1", + "2", + "3", + "4", + "5" + ], + { + "Ref": "BootstrapVersion" + } + ] + } + ] + }, + "AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI." + } + ] + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/cdk.out index 7925065efbcc4..560dae10d018f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/cdk.out +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/cdk.out @@ -1 +1 @@ -{"version":"31.0.0"} \ No newline at end of file +{"version":"33.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/integ.json index 46daab575ad89..ab7e38c81b5c6 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/integ.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/integ.json @@ -1,14 +1,12 @@ { - "version": "31.0.0", + "version": "33.0.0", "testCases": { - "integ.table": { + "aws-cdk-glue-table-integ/DefaultTest": { "stacks": [ "aws-cdk-glue" ], - "diffAssets": false, - "stackUpdateWorkflow": true + "assertionStack": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert", + "assertionStackName": "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70" } - }, - "synthContext": {}, - "enableLookups": false + } } \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json index 2f3c8c9dbc41a..02bab428fcf54 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/manifest.json @@ -1,5 +1,5 @@ { - "version": "31.0.0", + "version": "33.0.0", "artifacts": { "aws-cdk-glue.assets": { "type": "cdk:asset-manifest", @@ -17,7 +17,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/1b05206385b50de7e074070a25b271988c0055f2fde760f5c119af3fef3a1bcb.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/dc6c1c5f05a8e365822e6d61c41b6fc6afd58d20a2784614b906ae1587c68754.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ @@ -87,16 +87,16 @@ "data": "MyEncryptedTable981A88C6" } ], - "/aws-cdk-glue/MyPartitionFilteredTable/Bucket/Resource": [ + "/aws-cdk-glue/MyPartitionFilteredTable/Table": [ { "type": "aws:cdk:logicalId", - "data": "MyPartitionFilteredTableBucket6ACAA137" + "data": "MyPartitionFilteredTable324BA27A" } ], - "/aws-cdk-glue/MyPartitionFilteredTable/Table": [ + "/aws-cdk-glue/MyTableWithStorageDescriptorParameters/Table": [ { "type": "aws:cdk:logicalId", - "data": "MyPartitionFilteredTable324BA27A" + "data": "MyTableWithStorageDescriptorParametersTable1A347345" } ], "/aws-cdk-glue/MyUser/Resource": [ @@ -134,10 +134,66 @@ "type": "aws:cdk:logicalId", "data": "CheckBootstrapVersion" } + ], + "MyPartitionFilteredTableBucket6ACAA137": [ + { + "type": "aws:cdk:logicalId", + "data": "MyPartitionFilteredTableBucket6ACAA137", + "trace": [ + "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" + ] + } ] }, "displayName": "aws-cdk-glue" }, + "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets": { + "type": "cdk:asset-manifest", + "properties": { + "file": "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70": { + "type": "aws:cloudformation:stack", + "environment": "aws://unknown-account/unknown-region", + "properties": { + "templateFile": "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.template.json", + "validateOnSynth": false, + "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", + "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json", + "requiresBootstrapStackVersion": 6, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", + "additionalDependencies": [ + "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets" + ], + "lookupRole": { + "arn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-lookup-role-${AWS::AccountId}-${AWS::Region}", + "requiresBootstrapStackVersion": 8, + "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version" + } + }, + "dependencies": [ + "awscdkgluetableintegDefaultTestDeployAssert8BFB5B70.assets" + ], + "metadata": { + "/aws-cdk-glue-table-integ/DefaultTest/DeployAssert/BootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "BootstrapVersion" + } + ], + "/aws-cdk-glue-table-integ/DefaultTest/DeployAssert/CheckBootstrapVersion": [ + { + "type": "aws:cdk:logicalId", + "data": "CheckBootstrapVersion" + } + ] + }, + "displayName": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert" + }, "Tree": { "type": "cdk:tree", "properties": { diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json index 41c0de2b3b7a1..71c545fff8226 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.js.snapshot/tree.json @@ -20,13 +20,13 @@ "aws:cdk:cloudformation:props": {} }, "constructInfo": { - "fqn": "@aws-cdk/aws-s3.CfnBucket", + "fqn": "aws-cdk-lib.aws_s3.CfnBucket", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-s3.Bucket", + "fqn": "aws-cdk-lib.aws_s3.Bucket", "version": "0.0.0" } }, @@ -49,13 +49,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.CfnDatabase", + "fqn": "aws-cdk-lib.aws_glue.CfnDatabase", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.Database", + "fqn": "@aws-cdk/aws-glue-alpha.Database", "version": "0.0.0" } }, @@ -137,13 +137,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.CfnTable", + "fqn": "aws-cdk-lib.aws_glue.CfnTable", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.Table", + "fqn": "@aws-cdk/aws-glue-alpha.Table", "version": "0.0.0" } }, @@ -225,13 +225,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.CfnTable", + "fqn": "aws-cdk-lib.aws_glue.CfnTable", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.Table", + "fqn": "@aws-cdk/aws-glue-alpha.Table", "version": "0.0.0" } }, @@ -313,13 +313,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.CfnTable", + "fqn": "aws-cdk-lib.aws_glue.CfnTable", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.Table", + "fqn": "@aws-cdk/aws-glue-alpha.Table", "version": "0.0.0" } }, @@ -401,13 +401,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.CfnTable", + "fqn": "aws-cdk-lib.aws_glue.CfnTable", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.Table", + "fqn": "@aws-cdk/aws-glue-alpha.Table", "version": "0.0.0" } }, @@ -452,13 +452,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-kms.CfnKey", + "fqn": "aws-cdk-lib.aws_kms.CfnKey", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-kms.Key", + "fqn": "aws-cdk-lib.aws_kms.Key", "version": "0.0.0" } }, @@ -494,13 +494,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-s3.CfnBucket", + "fqn": "aws-cdk-lib.aws_s3.CfnBucket", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-s3.Bucket", + "fqn": "aws-cdk-lib.aws_s3.Bucket", "version": "0.0.0" } }, @@ -578,13 +578,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.CfnTable", + "fqn": "aws-cdk-lib.aws_glue.CfnTable", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.Table", + "fqn": "@aws-cdk/aws-glue-alpha.Table", "version": "0.0.0" } }, @@ -592,41 +592,92 @@ "id": "MyPartitionFilteredTable", "path": "aws-cdk-glue/MyPartitionFilteredTable", "children": { - "Bucket": { - "id": "Bucket", - "path": "aws-cdk-glue/MyPartitionFilteredTable/Bucket", - "children": { - "Resource": { - "id": "Resource", - "path": "aws-cdk-glue/MyPartitionFilteredTable/Bucket/Resource", - "attributes": { - "aws:cdk:cloudformation:type": "AWS::S3::Bucket", - "aws:cdk:cloudformation:props": { - "bucketEncryption": { - "serverSideEncryptionConfiguration": [ - { - "serverSideEncryptionByDefault": { - "sseAlgorithm": "AES256" - } - } + "Table": { + "id": "Table", + "path": "aws-cdk-glue/MyPartitionFilteredTable/Table", + "attributes": { + "aws:cdk:cloudformation:type": "AWS::Glue::Table", + "aws:cdk:cloudformation:props": { + "catalogId": { + "Ref": "AWS::AccountId" + }, + "databaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "tableInput": { + "name": "partition_filtered_table", + "description": "partition_filtered_table generated by CDK", + "parameters": { + "classification": "json", + "has_encrypted_data": true, + "partition_filtering.enabled": true + }, + "storageDescriptor": { + "location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] ] + }, + "compressed": false, + "storedAsSubDirectories": false, + "columns": [ + { + "name": "col1", + "type": "string" + }, + { + "name": "col2", + "type": "string", + "comment": "col2 comment" + }, + { + "name": "col3", + "type": "array" + }, + { + "name": "col4", + "type": "map" + }, + { + "name": "col5", + "type": "struct" + } + ], + "inputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "serdeInfo": { + "serializationLibrary": "org.openx.data.jsonserde.JsonSerDe" } - } - }, - "constructInfo": { - "fqn": "@aws-cdk/aws-s3.CfnBucket", - "version": "0.0.0" + }, + "tableType": "EXTERNAL_TABLE" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-s3.Bucket", + "fqn": "aws-cdk-lib.aws_glue.CfnTable", "version": "0.0.0" } - }, + } + }, + "constructInfo": { + "fqn": "@aws-cdk/aws-glue-alpha.Table", + "version": "0.0.0" + } + }, + "MyTableWithStorageDescriptorParameters": { + "id": "MyTableWithStorageDescriptorParameters", + "path": "aws-cdk-glue/MyTableWithStorageDescriptorParameters", + "children": { "Table": { "id": "Table", - "path": "aws-cdk-glue/MyPartitionFilteredTable/Table", + "path": "aws-cdk-glue/MyTableWithStorageDescriptorParameters/Table", "attributes": { "aws:cdk:cloudformation:type": "AWS::Glue::Table", "aws:cdk:cloudformation:props": { @@ -637,12 +688,11 @@ "Ref": "MyDatabase1E2517DB" }, "tableInput": { - "name": "partition_filtered_table", - "description": "partition_filtered_table generated by CDK", + "name": "table_with_storage_descriptor_parameters", + "description": "table_with_storage_descriptor_parameters generated by CDK", "parameters": { "classification": "json", - "has_encrypted_data": true, - "partition_filtering.enabled": true + "has_encrypted_data": true }, "storageDescriptor": { "location": { @@ -651,7 +701,7 @@ [ "s3://", { - "Ref": "MyPartitionFilteredTableBucket6ACAA137" + "Ref": "DataBucketE3889A50" }, "/" ] @@ -686,6 +736,13 @@ "outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", "serdeInfo": { "serializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + }, + "parameters": { + "skip.header.line.count": "1", + "compression_type": "gzip", + "foo": "bar", + "separatorChar": ",", + "write.parallel": "off" } }, "tableType": "EXTERNAL_TABLE" @@ -693,13 +750,13 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.CfnTable", + "fqn": "aws-cdk-lib.aws_glue.CfnTable", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-glue.Table", + "fqn": "@aws-cdk/aws-glue-alpha.Table", "version": "0.0.0" } }, @@ -715,7 +772,7 @@ "aws:cdk:cloudformation:props": {} }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnUser", + "fqn": "aws-cdk-lib.aws_iam.CfnUser", "version": "0.0.0" } }, @@ -889,19 +946,19 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnPolicy", + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Policy", + "fqn": "aws-cdk-lib.aws_iam.Policy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.User", + "fqn": "aws-cdk-lib.aws_iam.User", "version": "0.0.0" } }, @@ -917,7 +974,7 @@ "aws:cdk:cloudformation:props": {} }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnUser", + "fqn": "aws-cdk-lib.aws_iam.CfnUser", "version": "0.0.0" } }, @@ -1082,19 +1139,19 @@ } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.CfnPolicy", + "fqn": "aws-cdk-lib.aws_iam.CfnPolicy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.Policy", + "fqn": "aws-cdk-lib.aws_iam.Policy", "version": "0.0.0" } } }, "constructInfo": { - "fqn": "@aws-cdk/aws-iam.User", + "fqn": "aws-cdk-lib.aws_iam.User", "version": "0.0.0" } }, @@ -1102,22 +1159,76 @@ "id": "BootstrapVersion", "path": "aws-cdk-glue/BootstrapVersion", "constructInfo": { - "fqn": "constructs.Construct", - "version": "10.1.270" + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" } }, "CheckBootstrapVersion": { "id": "CheckBootstrapVersion", "path": "aws-cdk-glue/CheckBootstrapVersion", "constructInfo": { - "fqn": "constructs.Construct", - "version": "10.1.270" + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" } } }, "constructInfo": { - "fqn": "constructs.Construct", - "version": "10.1.270" + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + }, + "aws-cdk-glue-table-integ": { + "id": "aws-cdk-glue-table-integ", + "path": "aws-cdk-glue-table-integ", + "children": { + "DefaultTest": { + "id": "DefaultTest", + "path": "aws-cdk-glue-table-integ/DefaultTest", + "children": { + "Default": { + "id": "Default", + "path": "aws-cdk-glue-table-integ/DefaultTest/Default", + "constructInfo": { + "fqn": "constructs.Construct", + "version": "10.2.69" + } + }, + "DeployAssert": { + "id": "DeployAssert", + "path": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert", + "children": { + "BootstrapVersion": { + "id": "BootstrapVersion", + "path": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert/BootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnParameter", + "version": "0.0.0" + } + }, + "CheckBootstrapVersion": { + "id": "CheckBootstrapVersion", + "path": "aws-cdk-glue-table-integ/DefaultTest/DeployAssert/CheckBootstrapVersion", + "constructInfo": { + "fqn": "aws-cdk-lib.CfnRule", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "aws-cdk-lib.Stack", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTestCase", + "version": "0.0.0" + } + } + }, + "constructInfo": { + "fqn": "@aws-cdk/integ-tests-alpha.IntegTest", + "version": "0.0.0" } }, "Tree": { @@ -1125,13 +1236,13 @@ "path": "Tree", "constructInfo": { "fqn": "constructs.Construct", - "version": "10.1.270" + "version": "10.2.69" } } }, "constructInfo": { - "fqn": "constructs.Construct", - "version": "10.1.270" + "fqn": "aws-cdk-lib.App", + "version": "0.0.0" } } } \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts index 8ad11dfbe580d..5633ccac75c1f 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/integ.table.ts @@ -1,15 +1,18 @@ #!/usr/bin/env node +import * as integ from '@aws-cdk/integ-tests-alpha'; +import * as cdk from 'aws-cdk-lib'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as kms from 'aws-cdk-lib/aws-kms'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; const app = new cdk.App(); const stack = new cdk.Stack(app, 'aws-cdk-glue'); -const bucket = new s3.Bucket(stack, 'DataBucket'); +const bucket = new s3.Bucket(stack, 'DataBucket', { + removalPolicy: cdk.RemovalPolicy.DESTROY, +}); const database = new glue.Database(stack, 'MyDatabase', { databaseName: 'my_database', @@ -84,17 +87,35 @@ const encryptedTable = new glue.Table(stack, 'MyEncryptedTable', { partitionKeys, dataFormat: glue.DataFormat.JSON, encryption: glue.TableEncryption.KMS, - encryptionKey: new kms.Key(stack, 'MyKey'), + encryptionKey: new kms.Key(stack, 'MyKey', { + removalPolicy: cdk.RemovalPolicy.DESTROY, + }), }); new glue.Table(stack, 'MyPartitionFilteredTable', { database, + bucket, tableName: 'partition_filtered_table', columns, dataFormat: glue.DataFormat.JSON, enablePartitionFiltering: true, }); +new glue.Table(stack, 'MyTableWithStorageDescriptorParameters', { + database, + bucket, + tableName: 'table_with_storage_descriptor_parameters', + columns, + dataFormat: glue.DataFormat.JSON, + storageParameters: [ + glue.StorageParameter.skipHeaderLineCount(1), + glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + glue.StorageParameter.custom('foo', 'bar'), // Will have no effect + glue.StorageParameter.custom('separatorChar', ','), // Will describe the separator char used in the data + glue.StorageParameter.custom(glue.StorageParameters.WRITE_PARALLEL, 'off'), + ], +}); + const user = new iam.User(stack, 'MyUser'); csvTable.grantReadWrite(user); encryptedTable.grantReadWrite(user); @@ -104,4 +125,8 @@ avroTable.grantReadWrite(anotherUser); jsonTable.grantReadWrite(anotherUser); parquetTable.grantReadWrite(anotherUser); +new integ.IntegTest(app, 'aws-cdk-glue-table-integ', { + testCases: [stack], +}); + app.synth(); diff --git a/packages/@aws-cdk/aws-glue-alpha/test/table.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/table.test.ts index 09cd78779cfcb..487a5c5343e14 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/table.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/table.test.ts @@ -1,11 +1,11 @@ -import { Template, Match } from 'aws-cdk-lib/assertions'; +import * as cdk from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { CfnTable } from 'aws-cdk-lib/aws-glue'; import * as iam from 'aws-cdk-lib/aws-iam'; import * as kms from 'aws-cdk-lib/aws-kms'; import * as s3 from 'aws-cdk-lib/aws-s3'; -import * as cdk from 'aws-cdk-lib'; import * as glue from '../lib'; import { PartitionIndex } from '../lib'; -import { CfnTable } from 'aws-cdk-lib/aws-glue'; test('unpartitioned JSON table', () => { const app = new cdk.App(); @@ -1478,6 +1478,27 @@ describe('validate', () => { encryption: glue.TableEncryption.CLIENT_SIDE_KMS, })).not.toThrow(); }); + + test('unique storage descriptor parameters', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + + expect(() => new glue.Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + storageParameters: [ + glue.StorageParameter.skipHeaderLineCount(2), + glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + glue.StorageParameter.custom('foo', 'bar'), + glue.StorageParameter.custom(glue.StorageParameters.COMPRESSION_TYPE, 'true'), + ], + })).toThrowError('Duplicate storage parameter key: compression_type'); + }); }); test('Table.fromTableArn', () => { @@ -1600,6 +1621,39 @@ test('can specify a physical name', () => { }); }); +test('storage descriptor parameters', () => { + const app = new cdk.App(); + const stack = new cdk.Stack(app, 'Stack'); + const database = new glue.Database(stack, 'Database'); + new glue.Table(stack, 'Table', { + database, + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + storageParameters: [ + glue.StorageParameter.skipHeaderLineCount(2), + glue.StorageParameter.compressionType(glue.CompressionType.GZIP), + glue.StorageParameter.custom('foo', 'bar'), + glue.StorageParameter.custom('separatorChar', ','), + ], + }); + + Template.fromStack(stack).hasResourceProperties('AWS::Glue::Table', { + TableInput: { + StorageDescriptor: { + Parameters: { + 'skip.header.line.count': '2', + 'separatorChar': ',', + 'foo': 'bar', + 'compression_type': 'gzip', + }, + }, + }, + }); +}); + function createTable(props: Pick>): void { const stack = new cdk.Stack(); new glue.Table(stack, 'table', {