From 537247d7f072714892235f34f750493ebf567062 Mon Sep 17 00:00:00 2001 From: Liem Truong Date: Sat, 15 Jul 2023 21:53:54 -0700 Subject: [PATCH 1/7] Add `runtime` to Glue Ray job command --- internal/service/glue/job.go | 11 +++++++ internal/service/glue/job_test.go | 49 +++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/internal/service/glue/job.go b/internal/service/glue/job.go index b9d3776fc820..68ca6b1faa08 100644 --- a/internal/service/glue/job.go +++ b/internal/service/glue/job.go @@ -64,6 +64,12 @@ func ResourceJob() *schema.Resource { Computed: true, ValidateFunc: validation.StringInSlice([]string{"2", "3", "3.9"}, true), }, + "runtime": { + Type: schema.TypeString, + Optional: true, + Computed: true, + ValidateFunc: validation.StringInSlice([]string{"Ray2.4"}, true), + }, }, }, }, @@ -438,6 +444,10 @@ func expandJobCommand(l []interface{}) *glue.JobCommand { jobCommand.PythonVersion = aws.String(v) } + if v, ok := m["runtime"].(string); ok && v != "" { + jobCommand.Runtime = aws.String(v) + } + return jobCommand } @@ -480,6 +490,7 @@ func flattenJobCommand(jobCommand *glue.JobCommand) []map[string]interface{} { "name": aws.StringValue(jobCommand.Name), "script_location": aws.StringValue(jobCommand.ScriptLocation), "python_version": aws.StringValue(jobCommand.PythonVersion), + "runtime": aws.StringValue(jobCommand.Runtime), } return []map[string]interface{}{m} diff --git a/internal/service/glue/job_test.go b/internal/service/glue/job_test.go index 72c49fc7bb6f..fe22a64fd47e 100644 --- a/internal/service/glue/job_test.go +++ b/internal/service/glue/job_test.go @@ -726,6 +726,34 @@ func TestAccGlueJob_pythonShell(t *testing.T) { }) } +func TestAccGlueJob_rayJob(t *testing.T) { + ctx := acctest.Context(t) + var job glue.Job + rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) + resourceName := "aws_glue_job.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { acctest.PreCheck(ctx, t) }, + ErrorCheck: acctest.ErrorCheck(t, glue.EndpointsID), + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories, + CheckDestroy: testAccCheckJobDestroy(ctx), + Steps: []resource.TestStep{ + { + Config: testAccJobConfig_rayJob(rName), + Check: resource.ComposeTestCheckFunc( + testAccCheckJobExists(ctx, resourceName, &job), + resource.TestCheckResourceAttr(resourceName, "command.#", "1"), + resource.TestCheckResourceAttr(resourceName, "command.0.script_location", "testscriptlocation"), + resource.TestCheckResourceAttr(resourceName, "command.0.name", "glueray"), + resource.TestCheckResourceAttr(resourceName, "command.0.python_version", "3.9"), + resource.TestCheckResourceAttr(resourceName, "command.0.runtime", "Ray2.4"), + resource.TestCheckResourceAttr(resourceName, "worker_type", "Z.2X"), + ), + }, + }, + }) +} + func TestAccGlueJob_maxCapacity(t *testing.T) { ctx := acctest.Context(t) var job glue.Job @@ -1198,6 +1226,27 @@ resource "aws_glue_job" "test" { `, rName, pythonVersion)) } +func testAccJobConfig_rayJob(rName string) string { + return acctest.ConfigCompose(testAccJobConfig_base(rName), fmt.Sprintf(` +resource "aws_glue_job" "test" { + glue_version = "4.0" + name = %[1]q + role_arn = aws_iam_role.test.arn + worker_type = "Z.2X" + number_of_workers = 10 + + command { + name = "glueray" + python_version = "3.9" + runtime = "Ray2.4" + script_location = "testscriptlocation" + } + + depends_on = [aws_iam_role_policy_attachment.test] +} +`, rName)) +} + func testAccJobConfig_maxCapacity(rName string, maxCapacity float64) string { return acctest.ConfigCompose(testAccJobConfig_base(rName), fmt.Sprintf(` resource "aws_glue_job" "test" { From 3281055383415a43e0354f87ad68a2be37d440f9 Mon Sep 17 00:00:00 2001 From: Liem Truong Date: Sat, 15 Jul 2023 22:39:40 -0700 Subject: [PATCH 2/7] Update docs --- website/docs/r/glue_job.html.markdown | 46 +++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/website/docs/r/glue_job.html.markdown b/website/docs/r/glue_job.html.markdown index b5004509ff32..2d10bbd1c254 100644 --- a/website/docs/r/glue_job.html.markdown +++ b/website/docs/r/glue_job.html.markdown @@ -27,6 +27,24 @@ resource "aws_glue_job" "example" { } ``` +### Ray Job + +```terraform +resource "aws_glue_job" "example" { + name = "example" + role_arn = aws_iam_role.example.arn + glue_version = "4.0" + worker_type = "Z.2X" + + command { + name = "glueray" + python_version = "3.9" + runtime = "Ray2.4" + script_location = "s3://${aws_s3_bucket.example.bucket}/example.py" + } +} +``` + ### Scala Job ```terraform @@ -89,7 +107,7 @@ The following arguments are supported: * `non_overridable_arguments` – (Optional) Non-overridable arguments for this job, specified as name-value pairs. * `description` – (Optional) Description of the job. * `execution_property` – (Optional) Execution property of the job. Defined below. -* `glue_version` - (Optional) The version of glue to use, for example "1.0". For information about available versions, see the [AWS Glue Release Notes](https://docs.aws.amazon.com/glue/latest/dg/release-notes.html). +* `glue_version` - (Optional) The version of glue to use, for example "1.0". Ray jobs should set this to 4.0 or greater. For information about available versions, see the [AWS Glue Release Notes](https://docs.aws.amazon.com/glue/latest/dg/release-notes.html). * `execution_class` - (Optional) Indicates whether the job is run with a standard or flexible execution class. The standard execution class is ideal for time-sensitive workloads that require fast job startup and dedicated resources. Valid value: `FLEX`, `STANDARD`. * `max_capacity` – (Optional) The maximum number of AWS Glue data processing units (DPUs) that can be allocated when this job runs. `Required` when `pythonshell` is set, accept either `0.0625` or `1.0`. Use `number_of_workers` and `worker_type` arguments instead with `glue_version` `2.0` and above. * `max_retries` – (Optional) The maximum number of times to retry this job if it fails. @@ -99,14 +117,36 @@ The following arguments are supported: * `tags` - (Optional) Key-value map of resource tags. If configured with a provider [`default_tags` configuration block](https://registry.terraform.io/providers/hashicorp/aws/latest/docs#default_tags-configuration-block) present, tags with matching keys will overwrite those defined at the provider-level. * `timeout` – (Optional) The job timeout in minutes. The default is 2880 minutes (48 hours) for `glueetl` and `pythonshell` jobs, and null (unlimited) for `gluestreaming` jobs. * `security_configuration` - (Optional) The name of the Security Configuration to be associated with the job. -* `worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, or G.2X. +* `worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs. Accepts the value + Z.2X for Ray jobs. + + * For the Standard worker type, each worker provides 4 vCPU, 16 GB of + memory and a 50GB disk, and 2 executors per worker. + + * For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of + memory, 64 GB disk), and provides 1 executor per worker. We recommend + this worker type for memory-intensive jobs. + + * For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of + memory, 128 GB disk), and provides 1 executor per worker. We recommend + this worker type for memory-intensive jobs. + + * For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPU, 4 + GB of memory, 64 GB disk), and provides 1 executor per worker. We recommend + this worker type for low volume streaming jobs. This worker type is only + available for Glue version 3.0 streaming jobs. + + * For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPU, 64 GB + of m emory, 128 GB disk), and provides up to 8 Ray workers based on the + autoscaler. * `number_of_workers` - (Optional) The number of workers of a defined workerType that are allocated when a job runs. ### command Argument Reference -* `name` - (Optional) The name of the job command. Defaults to `glueetl`. Use `pythonshell` for Python Shell Job Type, or `gluestreaming` for Streaming Job Type. `max_capacity` needs to be set if `pythonshell` is chosen. +* `name` - (Optional) The name of the job command. Defaults to `glueetl`. Use `pythonshell` for Python Shell Job Type, `glueray` for Ray Job Type, or `gluestreaming` for Streaming Job Type. `max_capacity` needs to be set if `pythonshell` is chosen. * `script_location` - (Required) Specifies the S3 path to a script that executes a job. * `python_version` - (Optional) The Python version being used to execute a Python shell job. Allowed values are 2, 3 or 3.9. Version 3 refers to Python 3.6. +* `runtime` - (Optional) In Ray jobs, runtime is used to specify the versions of Ray, Python and additional libraries available in your environment. This field is not used in other job types. For supported runtime environment values, see [Working with Ray jobs](https://docs.aws.amazon.com/glue/latest/dg/ray-jobs-section.html#author-job-ray-runtimes) in the Glue Developer Guide. ### execution_property Argument Reference From 55176b576dadae25d91b8862c0caa2a3e965341b Mon Sep 17 00:00:00 2001 From: Liem Truong Date: Sat, 15 Jul 2023 23:02:46 -0700 Subject: [PATCH 3/7] Create 32528.txt --- .changelog/32528.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/32528.txt diff --git a/.changelog/32528.txt b/.changelog/32528.txt new file mode 100644 index 000000000000..3f57cb945f9d --- /dev/null +++ b/.changelog/32528.txt @@ -0,0 +1,3 @@ +```release-note:enhancement +resource/aws_glue_job: Add `command.runtime` parameter +``` \ No newline at end of file From 1cf355ea2d70fecfb4d6e4b5e99dacba9fa9f10f Mon Sep 17 00:00:00 2001 From: Liem Truong Date: Sat, 15 Jul 2023 23:31:03 -0700 Subject: [PATCH 4/7] Fix for testacc and markdown lints --- internal/service/glue/job_test.go | 6 +++--- website/docs/r/glue_job.html.markdown | 28 ++++++--------------------- 2 files changed, 9 insertions(+), 25 deletions(-) diff --git a/internal/service/glue/job_test.go b/internal/service/glue/job_test.go index fe22a64fd47e..e0c46457f747 100644 --- a/internal/service/glue/job_test.go +++ b/internal/service/glue/job_test.go @@ -1236,9 +1236,9 @@ resource "aws_glue_job" "test" { number_of_workers = 10 command { - name = "glueray" - python_version = "3.9" - runtime = "Ray2.4" + name = "glueray" + python_version = "3.9" + runtime = "Ray2.4" script_location = "testscriptlocation" } diff --git a/website/docs/r/glue_job.html.markdown b/website/docs/r/glue_job.html.markdown index 2d10bbd1c254..762b18aa33d1 100644 --- a/website/docs/r/glue_job.html.markdown +++ b/website/docs/r/glue_job.html.markdown @@ -117,28 +117,12 @@ The following arguments are supported: * `tags` - (Optional) Key-value map of resource tags. If configured with a provider [`default_tags` configuration block](https://registry.terraform.io/providers/hashicorp/aws/latest/docs#default_tags-configuration-block) present, tags with matching keys will overwrite those defined at the provider-level. * `timeout` – (Optional) The job timeout in minutes. The default is 2880 minutes (48 hours) for `glueetl` and `pythonshell` jobs, and null (unlimited) for `gluestreaming` jobs. * `security_configuration` - (Optional) The name of the Security Configuration to be associated with the job. -* `worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs. Accepts the value - Z.2X for Ray jobs. - - * For the Standard worker type, each worker provides 4 vCPU, 16 GB of - memory and a 50GB disk, and 2 executors per worker. - - * For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of - memory, 64 GB disk), and provides 1 executor per worker. We recommend - this worker type for memory-intensive jobs. - - * For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of - memory, 128 GB disk), and provides 1 executor per worker. We recommend - this worker type for memory-intensive jobs. - - * For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPU, 4 - GB of memory, 64 GB disk), and provides 1 executor per worker. We recommend - this worker type for low volume streaming jobs. This worker type is only - available for Glue version 3.0 streaming jobs. - - * For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPU, 64 GB - of m emory, 128 GB disk), and provides up to 8 Ray workers based on the - autoscaler. +* `worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs. + * For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. + * For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs. + * For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs. + * For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPU, 4GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for low volume streaming jobs. Only available for Glue version 3.0. + * For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPU, 64 GB of m emory, 128 GB disk), and provides up to 8 Ray workers based on the autoscaler. * `number_of_workers` - (Optional) The number of workers of a defined workerType that are allocated when a job runs. ### command Argument Reference From cc3aadfca26e3bed2d386ddd7d3da2ae74eff170 Mon Sep 17 00:00:00 2001 From: Liem Truong Date: Sun, 16 Jul 2023 07:25:07 -0700 Subject: [PATCH 5/7] Fix indentation --- website/docs/r/glue_job.html.markdown | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/website/docs/r/glue_job.html.markdown b/website/docs/r/glue_job.html.markdown index 762b18aa33d1..bbf9f352dde9 100644 --- a/website/docs/r/glue_job.html.markdown +++ b/website/docs/r/glue_job.html.markdown @@ -118,11 +118,11 @@ The following arguments are supported: * `timeout` – (Optional) The job timeout in minutes. The default is 2880 minutes (48 hours) for `glueetl` and `pythonshell` jobs, and null (unlimited) for `gluestreaming` jobs. * `security_configuration` - (Optional) The name of the Security Configuration to be associated with the job. * `worker_type` - (Optional) The type of predefined worker that is allocated when a job runs. Accepts a value of Standard, G.1X, G.2X, or G.025X for Spark jobs. Accepts the value Z.2X for Ray jobs. - * For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. - * For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs. - * For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs. - * For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPU, 4GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for low volume streaming jobs. Only available for Glue version 3.0. - * For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPU, 64 GB of m emory, 128 GB disk), and provides up to 8 Ray workers based on the autoscaler. + * For the Standard worker type, each worker provides 4 vCPU, 16 GB of memory and a 50GB disk, and 2 executors per worker. + * For the G.1X worker type, each worker maps to 1 DPU (4 vCPU, 16 GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs. + * For the G.2X worker type, each worker maps to 2 DPU (8 vCPU, 32 GB of memory, 128 GB disk), and provides 1 executor per worker. Recommended for memory-intensive jobs. + * For the G.025X worker type, each worker maps to 0.25 DPU (2 vCPU, 4GB of memory, 64 GB disk), and provides 1 executor per worker. Recommended for low volume streaming jobs. Only available for Glue version 3.0. + * For the Z.2X worker type, each worker maps to 2 M-DPU (8vCPU, 64 GB of m emory, 128 GB disk), and provides up to 8 Ray workers based on the autoscaler. * `number_of_workers` - (Optional) The number of workers of a defined workerType that are allocated when a job runs. ### command Argument Reference From 11878aafdec1919cc6df98882cc6a58d3f2b1a82 Mon Sep 17 00:00:00 2001 From: Kit Ewbank Date: Tue, 25 Jul 2023 16:14:26 -0400 Subject: [PATCH 6/7] Tweak CHANGELOG entry. --- .changelog/32528.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changelog/32528.txt b/.changelog/32528.txt index 3f57cb945f9d..69ec249f2a13 100644 --- a/.changelog/32528.txt +++ b/.changelog/32528.txt @@ -1,3 +1,3 @@ ```release-note:enhancement -resource/aws_glue_job: Add `command.runtime` parameter +resource/aws_glue_job: Add `command.runtime` attribute ``` \ No newline at end of file From d4d459e77ab9af5d63ea7ef232b02d8bffad83fd Mon Sep 17 00:00:00 2001 From: Kit Ewbank Date: Tue, 25 Jul 2023 16:15:23 -0400 Subject: [PATCH 7/7] r/aws_glue_job: Alphabetize attributes. --- internal/service/glue/job.go | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/internal/service/glue/job.go b/internal/service/glue/job.go index 68ca6b1faa08..068f692f38ac 100644 --- a/internal/service/glue/job.go +++ b/internal/service/glue/job.go @@ -54,10 +54,6 @@ func ResourceJob() *schema.Resource { Optional: true, Default: "glueetl", }, - "script_location": { - Type: schema.TypeString, - Required: true, - }, "python_version": { Type: schema.TypeString, Optional: true, @@ -70,6 +66,10 @@ func ResourceJob() *schema.Resource { Computed: true, ValidateFunc: validation.StringInSlice([]string{"Ray2.4"}, true), }, + "script_location": { + Type: schema.TypeString, + Required: true, + }, }, }, }, @@ -87,11 +87,6 @@ func ResourceJob() *schema.Resource { Type: schema.TypeString, Optional: true, }, - "glue_version": { - Type: schema.TypeString, - Optional: true, - Computed: true, - }, "execution_class": { Type: schema.TypeString, Optional: true, @@ -113,6 +108,11 @@ func ResourceJob() *schema.Resource { }, }, }, + "glue_version": { + Type: schema.TypeString, + Optional: true, + Computed: true, + }, "max_capacity": { Type: schema.TypeFloat, Optional: true, @@ -253,7 +253,6 @@ func resourceJobCreate(ctx context.Context, d *schema.ResourceData, meta interfa input.WorkerType = aws.String(v.(string)) } - log.Printf("[DEBUG] Creating Glue Job: %s", input) output, err := conn.CreateJobWithContext(ctx, input) if err != nil { @@ -391,7 +390,6 @@ func resourceJobUpdate(ctx context.Context, d *schema.ResourceData, meta interfa JobUpdate: jobUpdate, } - log.Printf("[DEBUG] Updating Glue Job: %s", input) _, err := conn.UpdateJobWithContext(ctx, input) if err != nil {