Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(glue): Job construct does not honor SparkUIProps S3 prefix when granting S3 access #26696

Merged
merged 7 commits into from
Aug 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions packages/@aws-cdk/aws-glue-alpha/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,26 @@ new glue.Job(this, 'RayJob', {
});
```

### Enable Spark UI

Enable Spark UI setting the `sparkUI` property.

```ts
new glue.Job(this, 'EnableSparkUI', {
jobName: 'EtlJobWithSparkUIPrefix',
sparkUI: {
enabled: true,
},
executable: glue.JobExecutable.pythonEtl({
glueVersion: glue.GlueVersion.V3_0,
pythonVersion: glue.PythonVersion.THREE,
script: glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')),
}),
});
```

The `sparkUI` property also allows the specification of an s3 bucket and a bucket prefix.

See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for more information on adding jobs in Glue.

## Connection
Expand Down
32 changes: 30 additions & 2 deletions packages/@aws-cdk/aws-glue-alpha/lib/job.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { EOL } from 'os';
import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch';
import * as events from 'aws-cdk-lib/aws-events';
import * as iam from 'aws-cdk-lib/aws-iam';
Expand Down Expand Up @@ -389,8 +390,9 @@ export interface SparkUIProps {

/**
* The path inside the bucket (objects prefix) where the Glue job stores the logs.
* Use format `'/foo/bar'`
*
* @default '/' - the logs will be written at the root of the bucket
* @default - the logs will be written at the root of the bucket
*/
readonly prefix?: string;
}
Expand Down Expand Up @@ -813,8 +815,9 @@ export class Job extends JobBase {
throw new Error('Spark UI is not available for JobType.RAY jobs');
}

this.validatePrefix(props.prefix);
const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket');
bucket.grantReadWrite(role);
bucket.grantReadWrite(role, this.cleanPrefixForGrant(props.prefix));
const args = {
'--enable-spark-ui': 'true',
'--spark-event-logs-path': bucket.s3UrlForObject(props.prefix),
Expand All @@ -829,6 +832,31 @@ export class Job extends JobBase {
};
}

private validatePrefix(prefix?: string): void {
if (!prefix || cdk.Token.isUnresolved(prefix)) {
// skip validation if prefix is not specified or is a token
return;
}

const errors: string[] = [];

if (!prefix.startsWith('/')) {
errors.push('Prefix must begin with \'/\'');
}

if (prefix.endsWith('/')) {
errors.push('Prefix must not end with \'/\'');
}

if (errors.length > 0) {
throw new Error(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`);
}
}

private cleanPrefixForGrant(prefix?: string): string | undefined {
return prefix !== undefined ? prefix.slice(1) + '/*' : undefined;
}

private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) {
const args: {[key: string]: string} = {
'--enable-continuous-cloudwatch-log': 'true',
Expand Down
77 changes: 72 additions & 5 deletions packages/@aws-cdk/aws-glue-alpha/test/job.test.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { EOL } from 'os';
import { Template } from 'aws-cdk-lib/assertions';
import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch';
import * as events from 'aws-cdk-lib/aws-events';
Expand Down Expand Up @@ -629,29 +630,95 @@ describe('Job', () => {
});
});
});

describe('with bucket and path provided', () => {
const sparkUIBucketName = 'sparkbucketname';
const prefix = 'some/path/';
const prefix = '/foob/bart';
const badPrefix = 'foob/bart/';
let sparkUIBucket: s3.IBucket;

const expectedErrors = [
`Invalid prefix format (value: ${badPrefix})`,
'Prefix must begin with \'/\'',
'Prefix must not end with \'/\'',
].join(EOL);
it('fails if path is mis-formatted', () => {
expect(() => new glue.Job(stack, 'BadPrefixJob', {
...defaultProps,
sparkUI: {
enabled: true,
bucket: sparkUIBucket,
prefix: badPrefix,
},
})).toThrow(expectedErrors);
});

beforeEach(() => {
sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName);
job = new glue.Job(stack, 'Job', {
...defaultProps,
sparkUI: {
enabled: true,
bucket: sparkUIBucket,
prefix,
prefix: prefix,
},
});
});

test('should set spark arguments on the job', () => {
it('should grant the role read/write permissions spark ui bucket prefixed folder', () => {
Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', {
PolicyDocument: {
Statement: [
{
Action: [
's3:GetObject*',
's3:GetBucket*',
's3:List*',
's3:DeleteObject*',
's3:PutObject',
's3:PutObjectLegalHold',
's3:PutObjectRetention',
's3:PutObjectTagging',
's3:PutObjectVersionTagging',
's3:Abort*',
],
Effect: 'Allow',
Resource: [
{
'Fn::Join': [
'',
[
'arn:',
{ Ref: 'AWS::Partition' },
':s3:::sparkbucketname',
],
],
},
{
'Fn::Join': [
'',
[
'arn:',
{ Ref: 'AWS::Partition' },
`:s3:::sparkbucketname${prefix}/*`,
],
],
},
],
},
codeBucketAccessStatement,
],
Version: '2012-10-17',
},
PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D',
Roles: [{ Ref: 'JobServiceRole4F432993' }],
});
});

it('should set spark arguments on the job', () => {
Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', {
DefaultArguments: {
'--enable-spark-ui': 'true',
'--spark-event-logs-path': `s3://${sparkUIBucketName}/${prefix}`,
'--spark-event-logs-path': `s3://${sparkUIBucketName}${prefix}`,
},
});
});
Expand Down