From 76ebaccf7e10b530c5a1c4f14f26283e13f69142 Mon Sep 17 00:00:00 2001 From: Parker <69879391+scanlonp@users.noreply.github.com> Date: Wed, 9 Aug 2023 17:45:53 -0700 Subject: [PATCH 1/4] prefix format, validation, clean + grant; tests --- packages/@aws-cdk/aws-glue-alpha/README.md | 20 +++ packages/@aws-cdk/aws-glue-alpha/lib/job.ts | 32 +++- .../@aws-cdk/aws-glue-alpha/test/job.test.ts | 160 +++++++++++++++++- 3 files changed, 205 insertions(+), 7 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 8044d470c0b4d..5a614220f8d71 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -105,6 +105,26 @@ new glue.Job(this, 'RayJob', { }); ``` +### Enable Spark UI + +Enable Spark UI setting the `sparkUI` property. + +```ts +new glue.Job(stack, 'EnableSparkUI', { + jobName: 'EtlJobWithSparkUIPrefix', + sparkUI: { + enabled: true, + }, + executable: glue.JobExecutable.pythonEtl({ + glueVersion: glue.GlueVersion.V3_0, + pythonVersion: glue.PythonVersion.THREE, + script: glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')), + }), +}); +``` + +The `sparkUI` property also allows the specification of an s3 bucket and a bucket prefix. + See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for more information on adding jobs in Glue. ## Connection diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts index 7b5c6a1cdf7c4..fd8e7b5eec55a 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts @@ -1,3 +1,4 @@ +import { EOL } from 'os'; import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; import * as events from 'aws-cdk-lib/aws-events'; import * as iam from 'aws-cdk-lib/aws-iam'; @@ -389,8 +390,9 @@ export interface SparkUIProps { /** * The path inside the bucket (objects prefix) where the Glue job stores the logs. + * Use format `'/foo/bar'` * - * @default '/' - the logs will be written at the root of the bucket + * @default - the logs will be written at the root of the bucket */ readonly prefix?: string; } @@ -804,8 +806,9 @@ export class Job extends JobBase { throw new Error('Spark UI is not available for JobType.RAY jobs'); } + this.validatePrefix(props.prefix); const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket'); - bucket.grantReadWrite(role); + bucket.grantReadWrite(role, this.cleanPrefixForGrant(props.prefix)); const args = { '--enable-spark-ui': 'true', '--spark-event-logs-path': bucket.s3UrlForObject(props.prefix), @@ -820,6 +823,31 @@ export class Job extends JobBase { }; } + private validatePrefix(prefix?: string): void { + if (!prefix || cdk.Token.isUnresolved(prefix)) { + // skip validation if prefix is not specified or is a token + return; + } + + const errors: string[] = []; + + if (!prefix.startsWith('/')) { + errors.push('Prefix must begin with \'/\''); + } + + if (prefix.endsWith('/')) { + errors.push('Prefix must not end with \'/\''); + } + + if (errors.length > 0) { + throw new Error(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`); + } + } + + private cleanPrefixForGrant(prefix?: string): string | undefined { + return prefix !== undefined ? prefix.slice(1) + '/*' : undefined; + } + private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) { const args: {[key: string]: string} = { '--enable-continuous-cloudwatch-log': 'true', diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts index cdb5a838fba09..94f5c2fc3d5bd 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts @@ -1,3 +1,4 @@ +import { EOL } from 'os'; import { Template } from 'aws-cdk-lib/assertions'; import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; import * as events from 'aws-cdk-lib/aws-events'; @@ -629,12 +630,29 @@ describe('Job', () => { }); }); }); - describe('with bucket and path provided', () => { const sparkUIBucketName = 'sparkbucketname'; - const prefix = 'some/path/'; + const prefix = '/foob/bart'; + const badPrefix = 'foob/bart/'; let sparkUIBucket: s3.IBucket; + const expectedErrors = [ + `Invalid prefix format (value: ${badPrefix})`, + 'Prefix must begin with \'/\'', + 'Prefix must not end with \'/\'', + ].join(EOL); + it('fails if path is mis-formatted', () => { + //sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName); + expect(() => new glue.Job(stack, 'BadPrefixJob', { + ...defaultProps, + sparkUI: { + enabled: true, + bucket: sparkUIBucket, + prefix: badPrefix, + }, + })).toThrow(expectedErrors); + }); + beforeEach(() => { sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName); job = new glue.Job(stack, 'Job', { @@ -642,16 +660,148 @@ describe('Job', () => { sparkUI: { enabled: true, bucket: sparkUIBucket, - prefix, + prefix: prefix, }, }); }); - test('should set spark arguments on the job', () => { + it('should grant the role read/write permissions spark ui bucket prefixed folder', () => { + //console.dir(Template.fromStack(stack).findResources('AWS::IAM::Policy'), { depth: null }); + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':s3:::sparkbucketname', + ], + ], + }, + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + `:s3:::sparkbucketname${prefix}/*`, + ], + ], + }, + ], + }, + /* + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':s3:::bucketname', + ], + ], + }, + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':s3:::bucketname/script', + ], + ], + }, + ], + }, + */ + codeBucketAccessStatement, + ], + Version: '2012-10-17', + }, + PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D', + Roles: [{ Ref: 'JobServiceRole4F432993' }], + /*PolicyDocument: { + Statement: [ + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'JobSparkUIBucket8E6A0139', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'JobSparkUIBucket8E6A0139', + 'Arn', + ], + }, + `${prefix}/*`, + ], + ], + }, + ], + }, + codeBucketAccessStatement, + ], + Version: '2012-10-17', + }, + PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D', + Roles: [ + { + Ref: 'JobServiceRole4F432993', + }, + ], */ + }); + }); + + it('should set spark arguments on the job', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { DefaultArguments: { '--enable-spark-ui': 'true', - '--spark-event-logs-path': `s3://${sparkUIBucketName}/${prefix}`, + '--spark-event-logs-path': `s3://${sparkUIBucketName}${prefix}`, }, }); }); From 6c7a74c3b785e7689bd324c5aa97aa2666fcfb9a Mon Sep 17 00:00:00 2001 From: Parker <69879391+scanlonp@users.noreply.github.com> Date: Wed, 9 Aug 2023 17:52:01 -0700 Subject: [PATCH 2/4] clean up test --- .../@aws-cdk/aws-glue-alpha/test/job.test.ts | 49 ------------------- 1 file changed, 49 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts index 94f5c2fc3d5bd..1df333bca7e79 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts @@ -745,55 +745,6 @@ describe('Job', () => { }, PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D', Roles: [{ Ref: 'JobServiceRole4F432993' }], - /*PolicyDocument: { - Statement: [ - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - 's3:DeleteObject*', - 's3:PutObject', - 's3:PutObjectLegalHold', - 's3:PutObjectRetention', - 's3:PutObjectTagging', - 's3:PutObjectVersionTagging', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::GetAtt': [ - 'JobSparkUIBucket8E6A0139', - 'Arn', - ], - }, - { - 'Fn::Join': [ - '', - [ - { - 'Fn::GetAtt': [ - 'JobSparkUIBucket8E6A0139', - 'Arn', - ], - }, - `${prefix}/*`, - ], - ], - }, - ], - }, - codeBucketAccessStatement, - ], - Version: '2012-10-17', - }, - PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D', - Roles: [ - { - Ref: 'JobServiceRole4F432993', - }, - ], */ }); }); From 3cea967f81b4df49aeef7c05a7d822f6c94fd6cf Mon Sep 17 00:00:00 2001 From: Parker <69879391+scanlonp@users.noreply.github.com> Date: Wed, 9 Aug 2023 17:54:30 -0700 Subject: [PATCH 3/4] more cleaning --- .../@aws-cdk/aws-glue-alpha/test/job.test.ts | 34 ------------------- 1 file changed, 34 deletions(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts index 1df333bca7e79..f4761da7d5b68 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts @@ -642,7 +642,6 @@ describe('Job', () => { 'Prefix must not end with \'/\'', ].join(EOL); it('fails if path is mis-formatted', () => { - //sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName); expect(() => new glue.Job(stack, 'BadPrefixJob', { ...defaultProps, sparkUI: { @@ -666,7 +665,6 @@ describe('Job', () => { }); it('should grant the role read/write permissions spark ui bucket prefixed folder', () => { - //console.dir(Template.fromStack(stack).findResources('AWS::IAM::Policy'), { depth: null }); Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { PolicyDocument: { Statement: [ @@ -707,38 +705,6 @@ describe('Job', () => { }, ], }, - /* - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::Join': [ - '', - [ - 'arn:', - { Ref: 'AWS::Partition' }, - ':s3:::bucketname', - ], - ], - }, - { - 'Fn::Join': [ - '', - [ - 'arn:', - { Ref: 'AWS::Partition' }, - ':s3:::bucketname/script', - ], - ], - }, - ], - }, - */ codeBucketAccessStatement, ], Version: '2012-10-17', From e51403b26c34a764ac52c8497a73a64e3ced47ad Mon Sep 17 00:00:00 2001 From: Parker <69879391+scanlonp@users.noreply.github.com> Date: Thu, 10 Aug 2023 09:35:14 -0700 Subject: [PATCH 4/4] fix readme --- packages/@aws-cdk/aws-glue-alpha/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 5a614220f8d71..6f762206fb04c 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -110,7 +110,7 @@ new glue.Job(this, 'RayJob', { Enable Spark UI setting the `sparkUI` property. ```ts -new glue.Job(stack, 'EnableSparkUI', { +new glue.Job(this, 'EnableSparkUI', { jobName: 'EtlJobWithSparkUIPrefix', sparkUI: { enabled: true,