From c071367def4382c630057546c74fa56f00d9294c Mon Sep 17 00:00:00 2001 From: Kaizen Conroy <36202692+kaizen3031593@users.noreply.github.com> Date: Wed, 29 Dec 2021 12:22:13 -0500 Subject: [PATCH] feat(glue): support partition index on tables (#17998) This PR adds support for creating partition indexes on tables via custom resources. It offers two different ways to create indexes: ```ts // via table definition const table = new glue.Table(this, 'Table', { database, bucket, tableName: 'table', columns, partitionKeys, partitionIndexes: [{ indexName: 'my-index', keyNames: ['month'], }], dataFormat: glue.DataFormat.CSV, }); ``` ```ts // or as a function table.AddPartitionIndex([{ indexName: 'my-other-index', keyNames: ['month', 'year'], }); ``` I also refactored the format of some tests, which is what accounts for the large diff in `test.table.ts`. Motivation: Creating partition indexes on a table is something you can do via the console, but is not an exposed property in cloudformation. In this case, I think it makes sense to support this feature via custom resources as it will significantly reduce the customer pain of either provisioning a custom resource with correct permissions or manually going into the console after resource creation. Supporting this feature allows for synth-time checks and dependency chaining for multiple indexes (reason detailed in the FAQ) which removes a rather sharp edge for users provisioning custom resource indexes themselves. FAQ: Why do we need to chain dependencies between different Partition Index Custom Resources? - Because Glue only allows 1 index to be created or deleted simultaneously per table. Without dependencies the resources will try to create partition indexes simultaneously and the second sdk call with be dropped. Why is it called `partitionIndexes`? Is that really how you pluralize index? - [Yesish](https://www.nasdaq.com/articles/indexes-or-indices-whats-the-deal-2016-05-12). If you hate it it can be `partitionIndices`. Why is `keyNames` of type `string[]` and not `Column[]`? `PartitionKey` is of type `Column[]` and partition indexes must be a subset of partition keys... - This could be a debate. But my argument is that the pattern I see for defining a Table is to define partition keys inline and not declare them each as variables. It would be pretty clunky from a UX perspective: ```ts const key1 = { name: 'mykey', type: glue.Schema.STRING }; const key2 = { name: 'mykey2', type: glue.Schema.STRING }; const key3 = { name: 'mykey3', type: glue.Schema.STRING }; new glue.Table(this, 'table', { database, bucket, tableName: 'table', columns, partitionKeys: [key1, key2, key3], partitionIndexes: [key1, key2], dataFormat: glue.DataFormat.CSV, }); ``` Why are there 2 different checks for having > 3 partition indexes? - It's possible someone decides to define 3 indexes in the definition and then try to add another with `table.addPartitionIndex()`. This would be a nasty deploy time error, its better if it is synth time. It's also possible someone decides to define 4 indexes in the definition. It's better to fast-fail here before we create 3 custom resources. What if I deploy a table, manually add 3 partition indexes, and then try to call `table.addPartitionIndex()` and update the stack? Will that still be a synth time failure? - Sorry, no. Why do we need to generate names? - We don't. I just thought it would be helpful. Why is `grantToUnderlyingResources` public? - I thought it would be helpful. Some permissions need to be added to the table, the database, and the catalog. Closes #17589. ---- *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license* --- packages/@aws-cdk/aws-glue/README.md | 48 +- packages/@aws-cdk/aws-glue/lib/table.ts | 132 ++- packages/@aws-cdk/aws-glue/package.json | 2 + .../test/integ.partition-index.expected.json | 594 +++++++++++ .../aws-glue/test/integ.partition-index.ts | 75 ++ .../@aws-cdk/aws-glue/test/integ.table.ts | 5 +- packages/@aws-cdk/aws-glue/test/table.test.ts | 943 ++++++++++-------- 7 files changed, 1379 insertions(+), 420 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json create mode 100644 packages/@aws-cdk/aws-glue/test/integ.partition-index.ts diff --git a/packages/@aws-cdk/aws-glue/README.md b/packages/@aws-cdk/aws-glue/README.md index 069b572b6cd71..639198c67ed9a 100644 --- a/packages/@aws-cdk/aws-glue/README.md +++ b/packages/@aws-cdk/aws-glue/README.md @@ -194,7 +194,7 @@ new glue.Table(this, 'MyTable', { By default, an S3 bucket will be created to store the table's data and stored in the bucket root. You can also manually pass the `bucket` and `s3Prefix`: -### Partitions +### Partition Keys To improve query performance, a table can specify `partitionKeys` on which data is stored and queried separately. For example, you might partition a table by `year` and `month` to optimize queries based on a time window: @@ -218,6 +218,52 @@ new glue.Table(this, 'MyTable', { }); ``` +### Partition Indexes + +Another way to improve query performance is to specify partition indexes. If no partition indexes are +present on the table, AWS Glue loads all partitions of the table and filters the loaded partitions using +the query expression. The query takes more time to run as the number of partitions increase. With an +index, the query will try to fetch a subset of the partitions instead of loading all partitions of the +table. + +The keys of a partition index must be a subset of the partition keys of the table. You can have a +maximum of 3 partition indexes per table. To specify a partition index, you can use the `partitionIndexes` +property: + +```ts +declare const myDatabase: glue.Database; +new glue.Table(this, 'MyTable', { + database: myDatabase, + tableName: 'my_table', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }, { + name: 'month', + type: glue.Schema.SMALL_INT, + }], + partitionIndexes: [{ + indexName: 'my-index', // optional + keyNames: ['year'], + }], // supply up to 3 indexes + dataFormat: glue.DataFormat.JSON, +}); +``` + +Alternatively, you can call the `addPartitionIndex()` function on a table: + +```ts +declare const myTable: glue.Table; +myTable.addPartitionIndex({ + indexName: 'my-index', + keyNames: ['year'], +}); +``` + ## [Encryption](https://docs.aws.amazon.com/athena/latest/ug/encryption.html) You can enable encryption on a Table's data: diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index 14ff98dd3b3c4..1b17da32e5454 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -1,13 +1,33 @@ import * as iam from '@aws-cdk/aws-iam'; import * as kms from '@aws-cdk/aws-kms'; import * as s3 from '@aws-cdk/aws-s3'; -import { ArnFormat, Fn, IResource, Resource, Stack } from '@aws-cdk/core'; +import { ArnFormat, Fn, IResource, Names, Resource, Stack } from '@aws-cdk/core'; +import * as cr from '@aws-cdk/custom-resources'; +import { AwsCustomResource } from '@aws-cdk/custom-resources'; import { Construct } from 'constructs'; import { DataFormat } from './data-format'; import { IDatabase } from './database'; import { CfnTable } from './glue.generated'; import { Column } from './schema'; +/** + * Properties of a Partition Index. + */ +export interface PartitionIndex { + /** + * The name of the partition index. + * + * @default - a name will be generated for you. + */ + readonly indexName?: string; + + /** + * The partition key names that comprise the partition + * index. The names must correspond to a name in the + * table's partition keys. + */ + readonly keyNames: string[]; +} export interface ITable extends IResource { /** * @attribute @@ -102,7 +122,16 @@ export interface TableProps { * * @default table is not partitioned */ - readonly partitionKeys?: Column[] + readonly partitionKeys?: Column[]; + + /** + * Partition indexes on the table. A maximum of 3 indexes + * are allowed on a table. Keys in the index must be part + * of the table's partition keys. + * + * @default table has no partition indexes + */ + readonly partitionIndexes?: PartitionIndex[]; /** * Storage type of the table's data. @@ -230,6 +259,18 @@ export class Table extends Resource implements ITable { */ public readonly partitionKeys?: Column[]; + /** + * This table's partition indexes. + */ + public readonly partitionIndexes?: PartitionIndex[]; + + /** + * Partition indexes must be created one at a time. To avoid + * race conditions, we store the resource and add dependencies + * each time a new partition index is created. + */ + private partitionIndexCustomResources: AwsCustomResource[] = []; + constructor(scope: Construct, id: string, props: TableProps) { super(scope, id, { physicalName: props.tableName, @@ -287,6 +328,77 @@ export class Table extends Resource implements ITable { resourceName: `${this.database.databaseName}/${this.tableName}`, }); this.node.defaultChild = tableResource; + + // Partition index creation relies on created table. + if (props.partitionIndexes) { + this.partitionIndexes = props.partitionIndexes; + this.partitionIndexes.forEach((index) => this.addPartitionIndex(index)); + } + } + + /** + * Add a partition index to the table. You can have a maximum of 3 partition + * indexes to a table. Partition index keys must be a subset of the table's + * partition keys. + * + * @see https://docs.aws.amazon.com/glue/latest/dg/partition-indexes.html + */ + public addPartitionIndex(index: PartitionIndex) { + const numPartitions = this.partitionIndexCustomResources.length; + if (numPartitions >= 3) { + throw new Error('Maximum number of partition indexes allowed is 3'); + } + this.validatePartitionIndex(index); + + const indexName = index.indexName ?? this.generateIndexName(index.keyNames); + const partitionIndexCustomResource = new cr.AwsCustomResource(this, `partition-index-${indexName}`, { + onCreate: { + service: 'Glue', + action: 'createPartitionIndex', + parameters: { + DatabaseName: this.database.databaseName, + TableName: this.tableName, + PartitionIndex: { + IndexName: indexName, + Keys: index.keyNames, + }, + }, + physicalResourceId: cr.PhysicalResourceId.of( + indexName, + ), + }, + policy: cr.AwsCustomResourcePolicy.fromSdkCalls({ + resources: cr.AwsCustomResourcePolicy.ANY_RESOURCE, + }), + }); + this.grantToUnderlyingResources(partitionIndexCustomResource, ['glue:UpdateTable']); + + // Depend on previous partition index if possible, to avoid race condition + if (numPartitions > 0) { + this.partitionIndexCustomResources[numPartitions-1].node.addDependency(partitionIndexCustomResource); + } + this.partitionIndexCustomResources.push(partitionIndexCustomResource); + } + + private generateIndexName(keys: string[]): string { + const prefix = keys.join('-') + '-'; + const uniqueId = Names.uniqueId(this); + const maxIndexLength = 80; // arbitrarily specified + const startIndex = Math.max(0, uniqueId.length - (maxIndexLength - prefix.length)); + return prefix + uniqueId.substring(startIndex); + } + + private validatePartitionIndex(index: PartitionIndex) { + if (index.indexName !== undefined && (index.indexName.length < 1 || index.indexName.length > 255)) { + throw new Error(`Index name must be between 1 and 255 characters, but got ${index.indexName.length}`); + } + if (!this.partitionKeys || this.partitionKeys.length === 0) { + throw new Error('The table must have partition keys to create a partition index'); + } + const keyNames = this.partitionKeys.map(pk => pk.name); + if (!index.keyNames.every(k => keyNames.includes(k))) { + throw new Error(`All index keys must also be partition keys. Got ${index.keyNames} but partition key names are ${keyNames}`); + } } /** @@ -336,6 +448,22 @@ export class Table extends Resource implements ITable { }); } + /** + * Grant the given identity custom permissions to ALL underlying resources of the table. + * Permissions will be granted to the catalog, the database, and the table. + */ + public grantToUnderlyingResources(grantee: iam.IGrantable, actions: string[]) { + return iam.Grant.addToPrincipal({ + grantee, + resourceArns: [ + this.tableArn, + this.database.catalogArn, + this.database.databaseArn, + ], + actions, + }); + } + private getS3PrefixForGrant() { return this.s3Prefix + '*'; } diff --git a/packages/@aws-cdk/aws-glue/package.json b/packages/@aws-cdk/aws-glue/package.json index 509b196a45dea..99d837bdb1fb9 100644 --- a/packages/@aws-cdk/aws-glue/package.json +++ b/packages/@aws-cdk/aws-glue/package.json @@ -99,6 +99,7 @@ "@aws-cdk/aws-s3": "0.0.0", "@aws-cdk/aws-s3-assets": "0.0.0", "@aws-cdk/core": "0.0.0", + "@aws-cdk/custom-resources": "0.0.0", "constructs": "^3.3.69" }, "homepage": "https://github.com/aws/aws-cdk", @@ -113,6 +114,7 @@ "@aws-cdk/aws-s3": "0.0.0", "@aws-cdk/aws-s3-assets": "0.0.0", "@aws-cdk/core": "0.0.0", + "@aws-cdk/custom-resources": "0.0.0", "constructs": "^3.3.69" }, "engines": { diff --git a/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json new file mode 100644 index 0000000000000..a4b3cad50cea3 --- /dev/null +++ b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json @@ -0,0 +1,594 @@ +{ + "Resources": { + "DataBucketE3889A50": { + "Type": "AWS::S3::Bucket", + "UpdateReplacePolicy": "Retain", + "DeletionPolicy": "Retain" + }, + "MyDatabase1E2517DB": { + "Type": "AWS::Glue::Database", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseInput": { + "Name": "database" + } + } + }, + "CSVTableE499CABA": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "csv_table generated by CDK", + "Name": "csv_table", + "Parameters": { + "classification": "csv", + "has_encrypted_data": false + }, + "PartitionKeys": [ + { + "Name": "year", + "Type": "smallint" + }, + { + "Name": "month", + "Type": "bigint" + } + ], + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "string" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.apache.hadoop.hive.serde2.OpenCSVSerde" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, + "CSVTablepartitionindexindex1CustomResourcePolicy4983F2A9": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "glue:CreatePartitionIndex", + "Effect": "Allow", + "Resource": "*" + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "CSVTablepartitionindexindex1CustomResourcePolicy4983F2A9", + "Roles": [ + { + "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + } + ] + }, + "DependsOn": [ + "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F", + "CSVTablepartitionindexindex29D554319" + ] + }, + "CSVTablepartitionindexindex16247ABF6": { + "Type": "Custom::AWS", + "Properties": { + "ServiceToken": { + "Fn::GetAtt": [ + "AWS679f53fac002430cb0da5b7982bd22872D164C4C", + "Arn" + ] + }, + "Create": { + "Fn::Join": [ + "", + [ + "{\"service\":\"Glue\",\"action\":\"createPartitionIndex\",\"parameters\":{\"DatabaseName\":\"", + { + "Ref": "MyDatabase1E2517DB" + }, + "\",\"TableName\":\"", + { + "Ref": "CSVTableE499CABA" + }, + "\",\"PartitionIndex\":{\"IndexName\":\"index1\",\"Keys\":[\"month\"]}},\"physicalResourceId\":{\"id\":\"index1\"}}" + ] + ] + }, + "InstallLatestAwsSdk": true + }, + "DependsOn": [ + "CSVTablepartitionindexindex1CustomResourcePolicy4983F2A9", + "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F", + "CSVTablepartitionindexindex29D554319" + ], + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" + }, + "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "glue:CreatePartitionIndex", + "Effect": "Allow", + "Resource": "*" + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F", + "Roles": [ + { + "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + } + ] + } + }, + "CSVTablepartitionindexindex29D554319": { + "Type": "Custom::AWS", + "Properties": { + "ServiceToken": { + "Fn::GetAtt": [ + "AWS679f53fac002430cb0da5b7982bd22872D164C4C", + "Arn" + ] + }, + "Create": { + "Fn::Join": [ + "", + [ + "{\"service\":\"Glue\",\"action\":\"createPartitionIndex\",\"parameters\":{\"DatabaseName\":\"", + { + "Ref": "MyDatabase1E2517DB" + }, + "\",\"TableName\":\"", + { + "Ref": "CSVTableE499CABA" + }, + "\",\"PartitionIndex\":{\"IndexName\":\"index2\",\"Keys\":[\"month\",\"year\"]}},\"physicalResourceId\":{\"id\":\"index2\"}}" + ] + ] + }, + "InstallLatestAwsSdk": true + }, + "DependsOn": [ + "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F" + ], + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" + }, + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "lambda.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + ] + ] + } + ] + } + }, + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleDefaultPolicyD28E1A5E": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "glue:UpdateTable", + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":table/", + { + "Ref": "MyDatabase1E2517DB" + }, + "/", + { + "Ref": "CSVTableE499CABA" + } + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":catalog" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":database/", + { + "Ref": "MyDatabase1E2517DB" + } + ] + ] + } + ] + }, + { + "Action": "glue:UpdateTable", + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":table/", + { + "Ref": "MyDatabase1E2517DB" + }, + "/", + { + "Ref": "JSONTable00348F1D" + } + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":catalog" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":database/", + { + "Ref": "MyDatabase1E2517DB" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleDefaultPolicyD28E1A5E", + "Roles": [ + { + "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + } + ] + } + }, + "AWS679f53fac002430cb0da5b7982bd22872D164C4C": { + "Type": "AWS::Lambda::Function", + "Properties": { + "Code": { + "S3Bucket": { + "Ref": "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3BucketF482197E" + }, + "S3Key": { + "Fn::Join": [ + "", + [ + { + "Fn::Select": [ + 0, + { + "Fn::Split": [ + "||", + { + "Ref": "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3VersionKey38B69632" + } + ] + } + ] + }, + { + "Fn::Select": [ + 1, + { + "Fn::Split": [ + "||", + { + "Ref": "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3VersionKey38B69632" + } + ] + } + ] + } + ] + ] + } + }, + "Role": { + "Fn::GetAtt": [ + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2", + "Arn" + ] + }, + "Handler": "index.handler", + "Runtime": "nodejs12.x", + "Timeout": 120 + }, + "DependsOn": [ + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleDefaultPolicyD28E1A5E", + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + ] + }, + "JSONTable00348F1D": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "json_table generated by CDK", + "Name": "json_table", + "Parameters": { + "classification": "json", + "has_encrypted_data": false + }, + "PartitionKeys": [ + { + "Name": "year", + "Type": "smallint" + }, + { + "Name": "month", + "Type": "bigint" + } + ], + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "string" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, + "JSONTablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicy92B3C1AE": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "glue:CreatePartitionIndex", + "Effect": "Allow", + "Resource": "*" + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "JSONTablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicy92B3C1AE", + "Roles": [ + { + "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + } + ] + } + }, + "JSONTablepartitionindexyearmonthawscdkglueJSONTable937C116B74A5990F": { + "Type": "Custom::AWS", + "Properties": { + "ServiceToken": { + "Fn::GetAtt": [ + "AWS679f53fac002430cb0da5b7982bd22872D164C4C", + "Arn" + ] + }, + "Create": { + "Fn::Join": [ + "", + [ + "{\"service\":\"Glue\",\"action\":\"createPartitionIndex\",\"parameters\":{\"DatabaseName\":\"", + { + "Ref": "MyDatabase1E2517DB" + }, + "\",\"TableName\":\"", + { + "Ref": "JSONTable00348F1D" + }, + "\",\"PartitionIndex\":{\"IndexName\":\"year-month-awscdkglueJSONTable937C116B\",\"Keys\":[\"year\",\"month\"]}},\"physicalResourceId\":{\"id\":\"year-month-awscdkglueJSONTable937C116B\"}}" + ] + ] + }, + "InstallLatestAwsSdk": true + }, + "DependsOn": [ + "JSONTablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicy92B3C1AE" + ], + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" + } + }, + "Parameters": { + "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3BucketF482197E": { + "Type": "String", + "Description": "S3 bucket for asset \"6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2\"" + }, + "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3VersionKey38B69632": { + "Type": "String", + "Description": "S3 key for asset version \"6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2\"" + }, + "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2ArtifactHash4BE92B79": { + "Type": "String", + "Description": "Artifact hash for asset \"6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2\"" + } + }, + "Outputs": { + "CatalogId": { + "Value": { + "Ref": "AWS::AccountId" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts b/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts new file mode 100644 index 0000000000000..e3a514bfadf6e --- /dev/null +++ b/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts @@ -0,0 +1,75 @@ +#!/usr/bin/env node +import * as s3 from '@aws-cdk/aws-s3'; +import * as cdk from '@aws-cdk/core'; +import * as glue from '../lib'; + +/** + * Stack verification steps: + * * aws cloudformation describe-stacks --stack-name aws-cdk-glue --query Stacks[0].Outputs[0].OutputValue + * * aws glue get-partition-indexes --catalog-id --database-name database --table-name csv_table + * returns two indexes named 'index1' and 'index2' + * * aws glue get-partition-indexes --catalog-id --database-name database --table-name json_table + * returns an index with name 'year-month...' + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-cdk-glue'); +const bucket = new s3.Bucket(stack, 'DataBucket'); +const database = new glue.Database(stack, 'MyDatabase', { + databaseName: 'database', +}); + +const columns = [{ + name: 'col1', + type: glue.Schema.STRING, +}, { + name: 'col2', + type: glue.Schema.STRING, +}, { + name: 'col3', + type: glue.Schema.STRING, +}]; + +const partitionKeys = [{ + name: 'year', + type: glue.Schema.SMALL_INT, +}, { + name: 'month', + type: glue.Schema.BIG_INT, +}]; + +new glue.Table(stack, 'CSVTable', { + database, + bucket, + tableName: 'csv_table', + columns, + partitionKeys, + partitionIndexes: [{ + indexName: 'index1', + keyNames: ['month'], + }, { + indexName: 'index2', + keyNames: ['month', 'year'], + }], + dataFormat: glue.DataFormat.CSV, +}); + +const jsonTable = new glue.Table(stack, 'JSONTable', { + database, + bucket, + tableName: 'json_table', + columns, + partitionKeys, + dataFormat: glue.DataFormat.JSON, +}); + +jsonTable.addPartitionIndex({ + keyNames: ['year', 'month'], +}); + +// output necessary for stack verification +new cdk.CfnOutput(stack, 'CatalogId', { + value: database.catalogId, +}); + +app.synth(); diff --git a/packages/@aws-cdk/aws-glue/test/integ.table.ts b/packages/@aws-cdk/aws-glue/test/integ.table.ts index 59eede985e5ce..e9d54d659921e 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.table.ts +++ b/packages/@aws-cdk/aws-glue/test/integ.table.ts @@ -81,10 +81,7 @@ const encryptedTable = new glue.Table(stack, 'MyEncryptedTable', { database, tableName: 'my_encrypted_table', columns, - partitionKeys: [{ - name: 'year', - type: glue.Schema.SMALL_INT, - }], + partitionKeys, dataFormat: glue.DataFormat.JSON, encryption: glue.TableEncryption.KMS, encryptionKey: new kms.Key(stack, 'MyKey'), diff --git a/packages/@aws-cdk/aws-glue/test/table.test.ts b/packages/@aws-cdk/aws-glue/test/table.test.ts index e4dac06728e19..a7aa71474724f 100644 --- a/packages/@aws-cdk/aws-glue/test/table.test.ts +++ b/packages/@aws-cdk/aws-glue/test/table.test.ts @@ -6,6 +6,7 @@ import { testFutureBehavior } from '@aws-cdk/cdk-build-tools/lib/feature-flag'; import * as cdk from '@aws-cdk/core'; import * as cxapi from '@aws-cdk/cx-api'; import * as glue from '../lib'; +import { PartitionIndex } from '../lib'; import { CfnTable } from '../lib/glue.generated'; const s3GrantWriteCtx = { [cxapi.S3_GRANT_WRITE_WITHOUT_ACL]: true }; @@ -933,501 +934,617 @@ test('explicit s3 bucket and with empty prefix', () => { }); }); -test('grants: custom', () => { - const stack = new cdk.Stack(); - const user = new iam.User(stack, 'User'); - const database = new glue.Database(stack, 'Database', { - databaseName: 'database', - }); +describe('add partition index', () => { + test('fails if no partition keys', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); - const table = new glue.Table(stack, 'Table', { - database, - tableName: 'table', - columns: [{ - name: 'col', - type: glue.Schema.STRING, - }], - compressed: true, - dataFormat: glue.DataFormat.JSON, + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + }); + + expect(() => table.addPartitionIndex({ + indexName: 'my-part', + keyNames: ['part'], + })).toThrowError(/The table must have partition keys to create a partition index/); }); - table.grant(user, ['glue:UpdateTable']); + test('fails if partition index does not match partition keys', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: 'glue:UpdateTable', - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':glue:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':table/', - { - Ref: 'DatabaseB269D8BB', - }, - '/', - { - Ref: 'Table4C2D914F', - }, - ], - ], - }, - }, - ], - Version: '2012-10-17', - }, - PolicyName: 'UserDefaultPolicy1F97781E', - Users: [ - { - Ref: 'User00B015A1', - }, - ], + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + }); + + expect(() => table.addPartitionIndex({ + indexName: 'my-part', + keyNames: ['not-part'], + })).toThrowError(/All index keys must also be partition keys/); }); -}); -test('grants: read only', () => { - const stack = new cdk.Stack(); - const user = new iam.User(stack, 'User'); - const database = new glue.Database(stack, 'Database', { - databaseName: 'database', + test('fails with index name < 1 character', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); + + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + }); + + expect(() => table.addPartitionIndex({ + indexName: '', + keyNames: ['part'], + })).toThrowError(/Index name must be between 1 and 255 characters, but got 0/); }); - const table = new glue.Table(stack, 'Table', { - database, - tableName: 'table', - columns: [{ - name: 'col', - type: glue.Schema.STRING, - }], - compressed: true, - dataFormat: glue.DataFormat.JSON, + test('fails with > 3 indexes', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); + + const indexes: PartitionIndex[] = [{ + indexName: 'ind1', + keyNames: ['part'], + }, { + indexName: 'ind2', + keyNames: ['part'], + }, { + indexName: 'ind3', + keyNames: ['part'], + }, { + indexName: 'ind4', + keyNames: ['part'], + }]; + + expect(() => new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + partitionIndexes: indexes, + dataFormat: glue.DataFormat.JSON, + })).toThrowError('Maximum number of partition indexes allowed is 3'); }); +}); - table.grantRead(user); +describe('grants', () => { + test('custom permissions', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 'glue:BatchGetPartition', - 'glue:GetPartition', - 'glue:GetPartitions', - 'glue:GetTable', - 'glue:GetTables', - 'glue:GetTableVersion', - 'glue:GetTableVersions', - ], - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':glue:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':table/', - { - Ref: 'DatabaseB269D8BB', - }, - '/', - { - Ref: 'Table4C2D914F', - }, - ], - ], - }, - }, - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], - }, - { + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); + + table.grant(user, ['glue:UpdateTable']); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: 'glue:UpdateTable', + Effect: 'Allow', + Resource: { 'Fn::Join': [ '', [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], + Ref: 'Table4C2D914F', }, - '/*', ], ], }, - ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', }, ], - Version: '2012-10-17', - }, - PolicyName: 'UserDefaultPolicy1F97781E', - Users: [ - { - Ref: 'User00B015A1', - }, - ], + }); }); -}); -testFutureBehavior('grants: write only', s3GrantWriteCtx, cdk.App, (app) => { - const stack = new cdk.Stack(app); - const user = new iam.User(stack, 'User'); - const database = new glue.Database(stack, 'Database', { - databaseName: 'database', - }); + test('read only', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); - const table = new glue.Table(stack, 'Table', { - database, - tableName: 'table', - columns: [{ - name: 'col', - type: glue.Schema.STRING, - }], - compressed: true, - dataFormat: glue.DataFormat.JSON, - }); + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); - table.grantWrite(user); + table.grantRead(user); - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 'glue:BatchCreatePartition', - 'glue:BatchDeletePartition', - 'glue:CreatePartition', - 'glue:DeletePartition', - 'glue:UpdatePartition', - ], - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':glue:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':table/', - { - Ref: 'DatabaseB269D8BB', - }, - '/', - { - Ref: 'Table4C2D914F', - }, - ], + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', ], - }, - }, - { - Action: [ - 's3:DeleteObject*', - 's3:PutObject', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], - }, - { + Effect: 'Allow', + Resource: { 'Fn::Join': [ '', [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', }, - '/*', ], ], }, - ], + }, + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', }, ], - Version: '2012-10-17', - }, - PolicyName: 'UserDefaultPolicy1F97781E', - Users: [ - { - Ref: 'User00B015A1', - }, - ], + }); }); -}); -testFutureBehavior('grants: read and write', s3GrantWriteCtx, cdk.App, (app) => { - const stack = new cdk.Stack(app); - const user = new iam.User(stack, 'User'); - const database = new glue.Database(stack, 'Database', { - databaseName: 'database', - }); + testFutureBehavior('write only', s3GrantWriteCtx, cdk.App, (app) => { + const stack = new cdk.Stack(app); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); - const table = new glue.Table(stack, 'Table', { - database, - tableName: 'table', - columns: [{ - name: 'col', - type: glue.Schema.STRING, - }], - compressed: true, - dataFormat: glue.DataFormat.JSON, - }); + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); - table.grantReadWrite(user); + table.grantWrite(user); - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 'glue:BatchGetPartition', - 'glue:GetPartition', - 'glue:GetPartitions', - 'glue:GetTable', - 'glue:GetTables', - 'glue:GetTableVersion', - 'glue:GetTableVersions', - 'glue:BatchCreatePartition', - 'glue:BatchDeletePartition', - 'glue:CreatePartition', - 'glue:DeletePartition', - 'glue:UpdatePartition', - ], - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':glue:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':table/', - { - Ref: 'DatabaseB269D8BB', - }, - '/', - { - Ref: 'Table4C2D914F', - }, - ], + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', ], - }, - }, - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - 's3:DeleteObject*', - 's3:PutObject', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], - }, - { + Effect: 'Allow', + Resource: { 'Fn::Join': [ '', [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], + Ref: 'Table4C2D914F', }, - '/*', ], ], }, - ], + }, + { + Action: [ + 's3:DeleteObject*', + 's3:PutObject', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', }, ], - Version: '2012-10-17', - }, - PolicyName: 'UserDefaultPolicy1F97781E', - Users: [ - { - Ref: 'User00B015A1', - }, - ], + }); }); -}); -test('validate: at least one column', () => { - expect(() => { - createTable({ - columns: [], - tableName: 'name', + testFutureBehavior('read and write', s3GrantWriteCtx, cdk.App, (app) => { + const stack = new cdk.Stack(app); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', }); - }).toThrowError('you must specify at least one column for the table'); - -}); -test('validate: unique column names', () => { - expect(() => { - createTable({ - tableName: 'name', + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }, { - name: 'col1', + name: 'col', type: glue.Schema.STRING, }], + compressed: true, + dataFormat: glue.DataFormat.JSON, }); - }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); + table.grantReadWrite(user); + + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', + ], + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], + ], + }, + }, + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', + }, + ], + }); + }); }); -test('validate: unique partition keys', () => { - expect(() => { - createTable({ +describe('validate', () => { + test('at least one column', () => { + expect(() => { + createTable({ + columns: [], + tableName: 'name', + }); + }).toThrowError('you must specify at least one column for the table'); + + }); + + test('unique column names', () => { + expect(() => { + createTable({ + tableName: 'name', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }, { + name: 'col1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); + + }); + + test('unique partition keys', () => { + expect(() => { + createTable({ + tableName: 'name', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'p1', + type: glue.Schema.STRING, + }, { + name: 'p1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'p1' is duplicated"); + + }); + + test('column names and partition keys are all unique', () => { + expect(() => { + createTable({ + tableName: 'name', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); + + }); + + test('can not specify an explicit bucket and encryption', () => { + expect(() => { + createTable({ + tableName: 'name', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: glue.TableEncryption.KMS, + }); + }).toThrowError('you can not specify encryption settings if you also provide a bucket'); + }); + + test('can explicitly pass bucket if Encryption undefined', () => { + expect(() => createTable({ tableName: 'name', columns: [{ name: 'col1', type: glue.Schema.STRING, }], - partitionKeys: [{ - name: 'p1', - type: glue.Schema.STRING, - }, { - name: 'p1', - type: glue.Schema.STRING, - }], - }); - }).toThrowError("column names and partition keys must be unique, but 'p1' is duplicated"); - -}); + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: undefined, + })).not.toThrow(); + }); -test('validate: column names and partition keys are all unique', () => { - expect(() => { - createTable({ + test('can explicitly pass bucket if Unencrypted', () => { + expect(() => createTable({ tableName: 'name', columns: [{ name: 'col1', type: glue.Schema.STRING, }], - partitionKeys: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - }); - }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); - -}); + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: undefined, + })).not.toThrow(); + }); -test('validate: can not specify an explicit bucket and encryption', () => { - expect(() => { - createTable({ + test('can explicitly pass bucket if ClientSideKms', () => { + expect(() => createTable({ tableName: 'name', columns: [{ name: 'col1', type: glue.Schema.STRING, }], bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), - encryption: glue.TableEncryption.KMS, - }); - }).toThrowError('you can not specify encryption settings if you also provide a bucket'); -}); - -test('validate: can explicitly pass bucket if Encryption undefined', () => { - expect(() => createTable({ - tableName: 'name', - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), - encryption: undefined, - })).not.toThrow(); -}); - -test('validate: can explicitly pass bucket if Unencrypted', () => { - expect(() => createTable({ - tableName: 'name', - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), - encryption: undefined, - })).not.toThrow(); -}); - -test('validate: can explicitly pass bucket if ClientSideKms', () => { - expect(() => createTable({ - tableName: 'name', - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), - encryption: glue.TableEncryption.CLIENT_SIDE_KMS, - })).not.toThrow(); + encryption: glue.TableEncryption.CLIENT_SIDE_KMS, + })).not.toThrow(); + }); }); test('Table.fromTableArn', () => {