Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(glue): support Ray jobs #23822

Merged
merged 7 commits into from
Feb 9, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions packages/@aws-cdk/aws-glue/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,23 @@ new glue.Job(this, 'PythonShellJob', {
});
```

### Ray Jobs

These jobs run in a Ray environment managed by AWS Glue.

```ts
new glue.Job(this, 'RayJob', {
executable: glue.JobExecutable.pythonRay({
glueVersion: glue.GlueVersion.V4_0,
pythonVersion: glue.PythonVersion.THREE_NINE,
script: glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')),
}),
workerType: glue.WorkerType.Z_2X,
workerCount: 2,
description: 'an example Ray job'
});
```

See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for more information on adding jobs in Glue.

## Connection
Expand Down
44 changes: 39 additions & 5 deletions packages/@aws-cdk/aws-glue/lib/job-executable.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ export enum PythonVersion {
*/
export class JobType {
/**
* Command for running a Glue ETL job.
* Command for running a Glue Spark job.
*/
public static readonly ETL = new JobType('glueetl');

/**
* Command for running a Glue streaming job.
* Command for running a Glue Spark streaming job.
*/
public static readonly STREAMING = new JobType('gluestreaming');

Expand All @@ -109,6 +109,11 @@ export class JobType {
*/
public static readonly PYTHON_SHELL = new JobType('pythonshell');

/**
* Command for running a Glue Ray job.
*/
public static readonly RAY = new JobType('glueray');

/**
* Custom type name
* @param name type name
Expand Down Expand Up @@ -211,6 +216,11 @@ export interface PythonSparkJobExecutableProps extends SharedSparkJobExecutableP
*/
export interface PythonShellExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {}

/**
* Props for creating a Python Ray job executable
*/
export interface PythonRayExecutableProps extends SharedJobExecutableProps, PythonExecutableProps {}

/**
* The executable properties related to the Glue job's GlueVersion, JobType and code
*/
Expand Down Expand Up @@ -281,6 +291,19 @@ export class JobExecutable {
});
}

/**
* Create Python executable props for Ray jobs.
*
* @param props Ray Job props.
*/
public static pythonRay(props: PythonRayExecutableProps): JobExecutable {
return new JobExecutable({
...props,
type: JobType.RAY,
language: JobLanguage.PYTHON,
});
}

/**
* Create a custom JobExecutable.
*
Expand All @@ -297,10 +320,18 @@ export class JobExecutable {
if (config.language !== JobLanguage.PYTHON) {
throw new Error('Python shell requires the language to be set to Python');
}
if ([GlueVersion.V0_9, GlueVersion.V2_0, GlueVersion.V3_0, GlueVersion.V4_0].includes(config.glueVersion)) {
if ([GlueVersion.V0_9, GlueVersion.V3_0, GlueVersion.V4_0].includes(config.glueVersion)) {
throw new Error(`Specified GlueVersion ${config.glueVersion.name} does not support Python Shell`);
}
}
if (JobType.RAY === config.type) {
if (config.language !== JobLanguage.PYTHON) {
throw new Error('Ray requires the language to be set to Python');
}
if ([GlueVersion.V0_9, GlueVersion.V1_0, GlueVersion.V2_0, GlueVersion.V3_0].includes(config.glueVersion)) {
throw new Error(`Specified GlueVersion ${config.glueVersion.name} does not support Ray`);
}
}
if (config.extraJarsFirst && [GlueVersion.V0_9, GlueVersion.V1_0].includes(config.glueVersion)) {
throw new Error(`Specified GlueVersion ${config.glueVersion.name} does not support extraJarsFirst`);
}
Expand All @@ -310,8 +341,11 @@ export class JobExecutable {
if (JobLanguage.PYTHON !== config.language && config.extraPythonFiles) {
throw new Error('extraPythonFiles is not supported for languages other than JobLanguage.PYTHON');
}
if (config.pythonVersion === PythonVersion.THREE_NINE && config.type !== JobType.PYTHON_SHELL) {
throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell');
if (config.pythonVersion === PythonVersion.THREE_NINE && config.type !== JobType.PYTHON_SHELL && config.type !== JobType.RAY) {
throw new Error('Specified PythonVersion PythonVersion.THREE_NINE is only supported for JobType Python Shell and Ray');
}
if (config.pythonVersion === PythonVersion.THREE && config.type === JobType.RAY) {
throw new Error('Specified PythonVersion PythonVersion.THREE is not supported for Ray');
}
this.config = config;
}
Expand Down
12 changes: 12 additions & 0 deletions packages/@aws-cdk/aws-glue/lib/job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ export class WorkerType {
*/
public static readonly G_2X = new WorkerType('G.2X');

/**
* Each worker maps to 0.25 DPU (2 vCPU, 4 GB of memory, 64 GB disk), and provides 1 executor per worker. Suitable for low volume streaming jobs.
*/
public static readonly G_025X = new WorkerType('G.025X');

/**
* Each worker maps to 2 high-memory DPU [M-DPU] (8 vCPU, 64 GB of memory, 128 GB disk). Supported in Ray jobs.
*/
public static readonly Z_2X = new WorkerType('Z.2X');

/**
* Custom worker type
* @param workerType custom worker type
Expand Down Expand Up @@ -726,6 +736,8 @@ export class Job extends JobBase {
private setupSparkUI(executable: JobExecutableConfig, role: iam.IRole, props: SparkUIProps) {
if (JobType.PYTHON_SHELL === executable.type) {
throw new Error('Spark UI is not available for JobType.PYTHON_SHELL jobs');
} else if (JobType.RAY === executable.type) {
throw new Error('Spark UI is not available for JobType.RAY jobs');
}

const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@
}
}
},
"977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2": {
"b553fef631f82898c826f3c20e1de0d155dbd3a35339ef92d0893052a5be69ce": {
"source": {
"path": "aws-glue-job.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json",
"objectKey": "b553fef631f82898c826f3c20e1de0d155dbd3a35339ef92d0893052a5be69ce.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,11 @@
},
"GlueVersion": "2.0",
"Name": "StreamingJob2.0",
"NumberOfWorkers": 10,
"Tags": {
"key": "value"
}
},
"WorkerType": "G.025X"
}
},
"EtlJob30ServiceRole8E675579": {
Expand Down Expand Up @@ -705,9 +707,11 @@
},
"GlueVersion": "3.0",
"Name": "StreamingJob3.0",
"NumberOfWorkers": 10,
"Tags": {
"key": "value"
}
},
"WorkerType": "G.025X"
}
},
"EtlJob40ServiceRoleBDD9998A": {
Expand Down Expand Up @@ -1060,9 +1064,11 @@
},
"GlueVersion": "4.0",
"Name": "StreamingJob4.0",
"NumberOfWorkers": 10,
"Tags": {
"key": "value"
}
},
"WorkerType": "G.025X"
}
},
"ShellJobServiceRoleCF97BC4B": {
Expand Down Expand Up @@ -1314,6 +1320,133 @@
"key": "value"
}
}
},
"RayJobServiceRole51433C3D": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Statement": [
{
"Action": "sts:AssumeRole",
"Effect": "Allow",
"Principal": {
"Service": "glue.amazonaws.com"
}
}
],
"Version": "2012-10-17"
},
"ManagedPolicyArns": [
{
"Fn::Join": [
"",
[
"arn:",
{
"Ref": "AWS::Partition"
},
":iam::aws:policy/service-role/AWSGlueServiceRole"
]
]
}
]
}
},
"RayJobServiceRoleDefaultPolicyA615640D": {
"Type": "AWS::IAM::Policy",
"Properties": {
"PolicyDocument": {
"Statement": [
{
"Action": [
"s3:GetBucket*",
"s3:GetObject*",
"s3:List*"
],
"Effect": "Allow",
"Resource": [
{
"Fn::Join": [
"",
[
"arn:",
{
"Ref": "AWS::Partition"
},
":s3:::",
{
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
},
"/*"
]
]
},
{
"Fn::Join": [
"",
[
"arn:",
{
"Ref": "AWS::Partition"
},
":s3:::",
{
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
}
]
]
}
]
}
],
"Version": "2012-10-17"
},
"PolicyName": "RayJobServiceRoleDefaultPolicyA615640D",
"Roles": [
{
"Ref": "RayJobServiceRole51433C3D"
}
]
}
},
"RayJob2F7864D9": {
"Type": "AWS::Glue::Job",
"Properties": {
"Command": {
"Name": "glueray",
"PythonVersion": "3.9",
"ScriptLocation": {
"Fn::Join": [
"",
[
"s3://",
{
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
},
"/432033e3218068a915d2532fa9be7858a12b228a2ae6e5c10faccd9097b1e855.py"
]
]
}
},
"Role": {
"Fn::GetAtt": [
"RayJobServiceRole51433C3D",
"Arn"
]
},
"DefaultArguments": {
"--job-language": "python",
"arg1": "value1",
"arg2": "value2"
},
"GlueVersion": "4.0",
"Name": "RayJob",
"NumberOfWorkers": 2,
"Tags": {
"key": "value"
},
"WorkerType": "Z.2X"
}
}
},
"Parameters": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"validateOnSynth": false,
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-deploy-role-${AWS::AccountId}-${AWS::Region}",
"cloudFormationExecutionRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-cfn-exec-role-${AWS::AccountId}-${AWS::Region}",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/977a2f07e22679bb04b03ce83cc1fac3e6cc269a794e38248ec67106ee39f0a2.json",
"stackTemplateAssetObjectUrl": "s3://cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}/b553fef631f82898c826f3c20e1de0d155dbd3a35339ef92d0893052a5be69ce.json",
"requiresBootstrapStackVersion": 6,
"bootstrapStackVersionSsmParameter": "/cdk-bootstrap/hnb659fds/version",
"additionalDependencies": [
Expand Down Expand Up @@ -213,6 +213,24 @@
"data": "ShellJob390C141361"
}
],
"/aws-glue-job/RayJob/ServiceRole/Resource": [
{
"type": "aws:cdk:logicalId",
"data": "RayJobServiceRole51433C3D"
}
],
"/aws-glue-job/RayJob/ServiceRole/DefaultPolicy/Resource": [
{
"type": "aws:cdk:logicalId",
"data": "RayJobServiceRoleDefaultPolicyA615640D"
}
],
"/aws-glue-job/RayJob/Resource": [
{
"type": "aws:cdk:logicalId",
"data": "RayJob2F7864D9"
}
],
"/aws-glue-job/BootstrapVersion": [
{
"type": "aws:cdk:logicalId",
Expand Down
Loading