diff --git a/aws/resource_aws_glue_crawler.go b/aws/resource_aws_glue_crawler.go index fc416850d1e..eea2d81366a 100644 --- a/aws/resource_aws_glue_crawler.go +++ b/aws/resource_aws_glue_crawler.go @@ -127,6 +127,16 @@ func resourceAwsGlueCrawler() *schema.Resource { Type: schema.TypeString, Required: true, }, + "scan_all": { + Type: schema.TypeBool, + Optional: true, + Default: true, + }, + "scan_rate": { + Type: schema.TypeFloat, + Optional: true, + ValidateFunc: validation.FloatBetween(0.1, 1.5), + }, }, }, }, @@ -373,7 +383,12 @@ func expandGlueDynamoDBTargets(targets []interface{}) []*glue.DynamoDBTarget { func expandGlueDynamoDBTarget(cfg map[string]interface{}) *glue.DynamoDBTarget { target := &glue.DynamoDBTarget{ - Path: aws.String(cfg["path"].(string)), + Path: aws.String(cfg["path"].(string)), + ScanAll: aws.Bool(cfg["scan_all"].(bool)), + } + + if v, ok := cfg["scan_rate"].(float64); ok && v != 0 { + target.ScanRate = aws.Float64(v) } return target @@ -615,6 +630,8 @@ func flattenGlueDynamoDBTargets(dynamodbTargets []*glue.DynamoDBTarget) []map[st for _, dynamodbTarget := range dynamodbTargets { attrs := make(map[string]interface{}) attrs["path"] = aws.StringValue(dynamodbTarget.Path) + attrs["scan_all"] = aws.BoolValue(dynamodbTarget.ScanAll) + attrs["scan_rate"] = aws.Float64Value(dynamodbTarget.ScanRate) result = append(result, attrs) } diff --git a/aws/resource_aws_glue_crawler_test.go b/aws/resource_aws_glue_crawler_test.go index 5370a3fc582..80d3d90ac94 100644 --- a/aws/resource_aws_glue_crawler_test.go +++ b/aws/resource_aws_glue_crawler_test.go @@ -80,6 +80,7 @@ func TestAccAWSGlueCrawler_DynamodbTarget(t *testing.T) { resource.TestCheckResourceAttr(resourceName, "description", ""), resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "1"), resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table1"), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.scan_all", "true"), resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "0"), resource.TestCheckResourceAttr(resourceName, "name", rName), resource.TestCheckResourceAttr(resourceName, "role", rName), @@ -103,6 +104,7 @@ func TestAccAWSGlueCrawler_DynamodbTarget(t *testing.T) { resource.TestCheckResourceAttr(resourceName, "description", ""), resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "1"), resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table2"), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.scan_all", "true"), resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "0"), resource.TestCheckResourceAttr(resourceName, "name", rName), resource.TestCheckResourceAttr(resourceName, "role", rName), @@ -124,6 +126,92 @@ func TestAccAWSGlueCrawler_DynamodbTarget(t *testing.T) { }) } +func TestAccAWSGlueCrawler_DynamodbTarget_scanAll(t *testing.T) { + var crawler glue.Crawler + rName := acctest.RandomWithPrefix("tf-acc-test") + resourceName := "aws_glue_crawler.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckAWSGlueCrawlerDestroy, + Steps: []resource.TestStep{ + { + Config: testAccGlueCrawlerConfig_DynamodbTargetScanAll(rName, "table1", false), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSGlueCrawlerExists(resourceName, &crawler), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table1"), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.scan_all", "false"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + { + Config: testAccGlueCrawlerConfig_DynamodbTargetScanAll(rName, "table1", true), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSGlueCrawlerExists(resourceName, &crawler), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table1"), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.scan_all", "true"), + ), + }, + { + Config: testAccGlueCrawlerConfig_DynamodbTargetScanAll(rName, "table1", false), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSGlueCrawlerExists(resourceName, &crawler), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table1"), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.scan_all", "false"), + ), + }, + }, + }) +} + +func TestAccAWSGlueCrawler_DynamodbTarget_scanRate(t *testing.T) { + var crawler glue.Crawler + rName := acctest.RandomWithPrefix("tf-acc-test") + resourceName := "aws_glue_crawler.test" + + resource.ParallelTest(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckAWSGlueCrawlerDestroy, + Steps: []resource.TestStep{ + { + Config: testAccGlueCrawlerConfig_DynamodbTargetScanRate(rName, "table1", 0.5), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSGlueCrawlerExists(resourceName, &crawler), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table1"), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.scan_rate", "0.5"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + { + Config: testAccGlueCrawlerConfig_DynamodbTargetScanRate(rName, "table1", 1.5), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSGlueCrawlerExists(resourceName, &crawler), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table1"), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.scan_rate", "1.5"), + ), + }, + { + Config: testAccGlueCrawlerConfig_DynamodbTargetScanRate(rName, "table1", 0.5), + Check: resource.ComposeTestCheckFunc( + testAccCheckAWSGlueCrawlerExists(resourceName, &crawler), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table1"), + resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.scan_rate", "0.5"), + ), + }, + }, + }) +} + func TestAccAWSGlueCrawler_JdbcTarget(t *testing.T) { var crawler glue.Crawler rName := acctest.RandomWithPrefix("tf-acc-test") @@ -1211,21 +1299,63 @@ resource "aws_glue_crawler" "test" { func testAccGlueCrawlerConfig_DynamodbTarget(rName, path string) string { return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(` resource "aws_glue_catalog_database" "test" { - name = %q + name = %[1]q } resource "aws_glue_crawler" "test" { depends_on = [aws_iam_role_policy_attachment.test-AWSGlueServiceRole] database_name = aws_glue_catalog_database.test.name - name = %q + name = %[1]q role = aws_iam_role.test.name dynamodb_target { - path = %q + path = %[2]q } } -`, rName, rName, path) +`, rName, path) +} + +func testAccGlueCrawlerConfig_DynamodbTargetScanAll(rName, path string, scanAll bool) string { + return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(` +resource "aws_glue_catalog_database" "test" { + name = %[1]q +} + +resource "aws_glue_crawler" "test" { + depends_on = [aws_iam_role_policy_attachment.test-AWSGlueServiceRole] + + database_name = aws_glue_catalog_database.test.name + name = %[1]q + role = aws_iam_role.test.name + + dynamodb_target { + path = %[2]q + scan_all = %[3]t + } +} +`, rName, path, scanAll) +} + +func testAccGlueCrawlerConfig_DynamodbTargetScanRate(rName, path string, scanRate float64) string { + return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(` +resource "aws_glue_catalog_database" "test" { + name = %[1]q +} + +resource "aws_glue_crawler" "test" { + depends_on = [aws_iam_role_policy_attachment.test-AWSGlueServiceRole] + + database_name = aws_glue_catalog_database.test.name + name = %[1]q + role = aws_iam_role.test.name + + dynamodb_target { + path = %[2]q + scan_rate = %[3]g + } +} +`, rName, path, scanRate) } func testAccGlueCrawlerConfig_JdbcTarget(rName, path string) string { diff --git a/website/docs/r/glue_crawler.html.markdown b/website/docs/r/glue_crawler.html.markdown index 512cffa5eda..6b9083b16a6 100644 --- a/website/docs/r/glue_crawler.html.markdown +++ b/website/docs/r/glue_crawler.html.markdown @@ -108,6 +108,8 @@ The following arguments are supported: ### dynamodb_target Argument Reference * `path` - (Required) The name of the DynamoDB table to crawl. +* `scan_all` - (Optional) Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to `true`. +* `scan_rate` - (Optional) The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5. ### jdbc_target Argument Reference