From 3d0f4ba6dd92ad7b91b00fad6cbab873964683fc Mon Sep 17 00:00:00 2001 From: Noritaka Sekiyama Date: Wed, 18 Jan 2023 12:45:31 +0900 Subject: [PATCH] fix(glue): --conf parameter is no longer a reserved keyword for glue jobs (#23673) AWS Glue has changed its public description for the job parameter `--conf`. The latest description: https://docs.aws.amazon.com/glue/latest/dg/aws-glue-programming-etl-glue-arguments.html > Controls Spark config parameters. It is for advanced use cases. According to this change, AWS CDK no longer needs to block the usage of this parameter. This PR removes the validation for the parameter `--conf`. ## Background The parameter `--conf` is expected to use several Spark capabilities. One of the typical usage is with native data lake table format support. https://aws.amazon.com/about-aws/whats-new/2022/11/aws-glue-apache-spark-native-data-lake-frameworks-apache-hudi-iceberg-delta-lake/ Public sample notebooks (e.g. https://github.com/aws-samples/aws-glue-samples/blob/master/examples/notebooks/native_hudi_sql.ipynb) use the parameter `--conf`. ---- ### All Submissions: * [ x] Have you followed the guidelines in our [Contributing guide?](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md) ### Adding new Construct Runtime Dependencies: * [ ] This PR adds new construct runtime dependencies following the process described [here](https://github.com/aws/aws-cdk/blob/main/CONTRIBUTING.md/#adding-construct-runtime-dependencies) ### New Features * [ ] Have you added the new feature to an [integration test](https://github.com/aws/aws-cdk/blob/main/INTEGRATION_TESTS.md)? * [ ] Did you use `yarn integ` to deploy the infrastructure and generate the snapshot (i.e. `yarn integ` without `--dry-run`)? *By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license* --- packages/@aws-cdk/aws-glue/lib/job.ts | 2 +- .../aws-glue-job.assets.json | 6 +- .../aws-glue-job.template.json | 9 ++- .../test/integ.job.js.snapshot/cdk.out | 2 +- .../test/integ.job.js.snapshot/integ.json | 2 +- .../test/integ.job.js.snapshot/manifest.json | 76 +------------------ .../test/integ.job.js.snapshot/tree.json | 21 ++--- packages/@aws-cdk/aws-glue/test/integ.job.ts | 5 +- packages/@aws-cdk/aws-glue/test/job.test.ts | 2 +- 9 files changed, 30 insertions(+), 95 deletions(-) diff --git a/packages/@aws-cdk/aws-glue/lib/job.ts b/packages/@aws-cdk/aws-glue/lib/job.ts index 726d397ac90f3..eebb8b1acbdb0 100644 --- a/packages/@aws-cdk/aws-glue/lib/job.ts +++ b/packages/@aws-cdk/aws-glue/lib/job.ts @@ -692,7 +692,7 @@ export class Job extends JobBase { */ private checkNoReservedArgs(defaultArguments?: { [key: string]: string }) { if (defaultArguments) { - const reservedArgs = new Set(['--conf', '--debug', '--mode', '--JOB_NAME']); + const reservedArgs = new Set(['--debug', '--mode', '--JOB_NAME']); Object.keys(defaultArguments).forEach((arg) => { if (reservedArgs.has(arg)) { throw new Error(`The ${arg} argument is reserved by Glue. Don't set it`); diff --git a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.assets.json b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.assets.json index 29ae3d1255380..8e739ca15edca 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.assets.json +++ b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.assets.json @@ -1,5 +1,5 @@ { - "version": "22.0.0", + "version": "29.0.0", "files": { "432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855": { "source": { @@ -14,7 +14,7 @@ } } }, - "0985af21379e9d6e1cba091105ecb533ee38a96b4c738816daf17d951a0752b4": { + "977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2": { "source": { "path": "aws-glue-job.template.json", "packaging": "file" @@ -22,7 +22,7 @@ "destinations": { "current_account-current_region": { "bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}", - "objectKey": "0985af21379e9d6e1cba091105ecb533ee38a96b4c738816daf17d951a0752b4.json", + "objectKey": "977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json", "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}" } } diff --git a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.template.json b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.template.json index f7838294de6e4..47f34d95c01f7 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.template.json +++ b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/aws-glue-job.template.json @@ -173,7 +173,8 @@ ] }, "arg1": "value1", - "arg2": "value2" + "arg2": "value2", + "--conf": "valueConf" }, "ExecutionProperty": { "MaxConcurrentRuns": 2 @@ -527,7 +528,8 @@ ] }, "arg1": "value1", - "arg2": "value2" + "arg2": "value2", + "--conf": "valueConf" }, "ExecutionProperty": { "MaxConcurrentRuns": 2 @@ -881,7 +883,8 @@ ] }, "arg1": "value1", - "arg2": "value2" + "arg2": "value2", + "--conf": "valueConf" }, "ExecutionProperty": { "MaxConcurrentRuns": 2 diff --git a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/cdk.out b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/cdk.out index 145739f539580..d8b441d447f8a 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/cdk.out +++ b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/cdk.out @@ -1 +1 @@ -{"version":"22.0.0"} \ No newline at end of file +{"version":"29.0.0"} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/integ.json b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/integ.json index 28e4fd8c94ff9..fa2e902e93c44 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/integ.json +++ b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/integ.json @@ -1,5 +1,5 @@ { - "version": "22.0.0", + "version": "29.0.0", "testCases": { "integ.job": { "stacks": [ diff --git a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/manifest.json b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/manifest.json index 9384baab89163..9a6172107f0bc 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/manifest.json +++ b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/manifest.json @@ -1,5 +1,5 @@ { - "version": "22.0.0", + "version": "29.0.0", "artifacts": { "aws-glue-job.assets": { "type": "cdk:asset-manifest", @@ -17,7 +17,7 @@ "validateOnSynth": false, "assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}", "cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}", - "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/0985af21379e9d6e1cba091105ecb533ee38a96b4c738816daf17d951a0752b4.json", + "stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json", "requiresBootstrapStackVersion": 6, "bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version", "additionalDependencies": [ @@ -224,78 +224,6 @@ "type": "aws:cdk:logicalId", "data": "CheckBootstrapVersion" } - ], - "EtlJobServiceRole837F781B": [ - { - "type": "aws:cdk:logicalId", - "data": "EtlJobServiceRole837F781B", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } - ], - "EtlJobServiceRoleDefaultPolicy8BFE343B": [ - { - "type": "aws:cdk:logicalId", - "data": "EtlJobServiceRoleDefaultPolicy8BFE343B", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } - ], - "EtlJobSparkUIBucketBF23744B": [ - { - "type": "aws:cdk:logicalId", - "data": "EtlJobSparkUIBucketBF23744B", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } - ], - "EtlJob7FC88E45": [ - { - "type": "aws:cdk:logicalId", - "data": "EtlJob7FC88E45", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } - ], - "EtlJobSuccessMetricRuleA72A3EF6": [ - { - "type": "aws:cdk:logicalId", - "data": "EtlJobSuccessMetricRuleA72A3EF6", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } - ], - "StreamingJobServiceRole1B4B8BF9": [ - { - "type": "aws:cdk:logicalId", - "data": "StreamingJobServiceRole1B4B8BF9", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } - ], - "StreamingJobServiceRoleDefaultPolicyA0CC4C68": [ - { - "type": "aws:cdk:logicalId", - "data": "StreamingJobServiceRoleDefaultPolicyA0CC4C68", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } - ], - "StreamingJob3783CC17": [ - { - "type": "aws:cdk:logicalId", - "data": "StreamingJob3783CC17", - "trace": [ - "!!DESTRUCTIVE_CHANGES: WILL_DESTROY" - ] - } ] }, "displayName": "aws-glue-job" diff --git a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/tree.json b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/tree.json index c601e3512008c..712c057e96df5 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/tree.json +++ b/packages/@aws-cdk/aws-glue/test/integ.job.js.snapshot/tree.json @@ -203,13 +203,13 @@ "version": "0.0.0" } }, - "Code93a4952ea654434aca8481fb2bc2a836": { - "id": "Code93a4952ea654434aca8481fb2bc2a836", - "path": "aws-glue-job/EtlJob2.0/Code93a4952ea654434aca8481fb2bc2a836", + "Codebeaf1c9f157c9b396ec6972f85317dbc": { + "id": "Codebeaf1c9f157c9b396ec6972f85317dbc", + "path": "aws-glue-job/EtlJob2.0/Codebeaf1c9f157c9b396ec6972f85317dbc", "children": { "Stage": { "id": "Stage", - "path": "aws-glue-job/EtlJob2.0/Code93a4952ea654434aca8481fb2bc2a836/Stage", + "path": "aws-glue-job/EtlJob2.0/Codebeaf1c9f157c9b396ec6972f85317dbc/Stage", "constructInfo": { "fqn": "@aws-cdk/core.AssetStaging", "version": "0.0.0" @@ -217,7 +217,7 @@ }, "AssetBucket": { "id": "AssetBucket", - "path": "aws-glue-job/EtlJob2.0/Code93a4952ea654434aca8481fb2bc2a836/AssetBucket", + "path": "aws-glue-job/EtlJob2.0/Codebeaf1c9f157c9b396ec6972f85317dbc/AssetBucket", "constructInfo": { "fqn": "@aws-cdk/aws-s3.BucketBase", "version": "0.0.0" @@ -275,7 +275,8 @@ ] }, "arg1": "value1", - "arg2": "value2" + "arg2": "value2", + "--conf": "valueConf" }, "executionProperty": { "maxConcurrentRuns": 2 @@ -788,7 +789,8 @@ ] }, "arg1": "value1", - "arg2": "value2" + "arg2": "value2", + "--conf": "valueConf" }, "executionProperty": { "maxConcurrentRuns": 2 @@ -1301,7 +1303,8 @@ ] }, "arg1": "value1", - "arg2": "value2" + "arg2": "value2", + "--conf": "valueConf" }, "executionProperty": { "maxConcurrentRuns": 2 @@ -1974,7 +1977,7 @@ "path": "Tree", "constructInfo": { "fqn": "constructs.Construct", - "version": "10.1.168" + "version": "10.1.209" } } }, diff --git a/packages/@aws-cdk/aws-glue/test/integ.job.ts b/packages/@aws-cdk/aws-glue/test/integ.job.ts index 417ede1793054..791fd734fb0ca 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.job.ts +++ b/packages/@aws-cdk/aws-glue/test/integ.job.ts @@ -38,8 +38,9 @@ const script = glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world. timeout: cdk.Duration.minutes(5), notifyDelayAfter: cdk.Duration.minutes(1), defaultArguments: { - arg1: 'value1', - arg2: 'value2', + 'arg1': 'value1', + 'arg2': 'value2', + '--conf': 'valueConf', }, sparkUI: { enabled: true, diff --git a/packages/@aws-cdk/aws-glue/test/job.test.ts b/packages/@aws-cdk/aws-glue/test/job.test.ts index d30e5897443dd..f03d41d243494 100644 --- a/packages/@aws-cdk/aws-glue/test/job.test.ts +++ b/packages/@aws-cdk/aws-glue/test/job.test.ts @@ -566,7 +566,7 @@ describe('Job', () => { }); test('with reserved args should throw', () => { - ['--conf', '--debug', '--mode', '--JOB_NAME'].forEach((arg, index) => { + ['--debug', '--mode', '--JOB_NAME'].forEach((arg, index) => { const defaultArguments: {[key: string]: string} = {}; defaultArguments[arg] = 'random value';