diff --git a/third_party/terraform/resources/resource_bigquery_table.go.erb b/third_party/terraform/resources/resource_bigquery_table.go similarity index 92% rename from third_party/terraform/resources/resource_bigquery_table.go.erb rename to third_party/terraform/resources/resource_bigquery_table.go index 329fa8ddcb9a..5d33feb3d0af 100644 --- a/third_party/terraform/resources/resource_bigquery_table.go.erb +++ b/third_party/terraform/resources/resource_bigquery_table.go @@ -1,5 +1,3 @@ -// <% autogen_exception -%> - package google import ( @@ -7,7 +5,6 @@ import ( "errors" "fmt" "log" - "regexp" "github.com/hashicorp/terraform-plugin-sdk/helper/schema" "github.com/hashicorp/terraform-plugin-sdk/helper/structure" @@ -165,8 +162,8 @@ func resourceBigQueryTable() *schema.Resource { // Range: [Optional] Range of a sheet to query from. Only used when non-empty. // Typical format: !: "range": { - Type: schema.TypeString, - Optional: true, + Type: schema.TypeString, + Optional: true, AtLeastOneOf: []string{ "external_data_configuration.0.google_sheets_options.0.skip_leading_rows", "external_data_configuration.0.google_sheets_options.0.range", @@ -175,8 +172,8 @@ func resourceBigQueryTable() *schema.Resource { // SkipLeadingRows: [Optional] The number of rows at the top // of the sheet that BigQuery will skip when reading the data. "skip_leading_rows": { - Type: schema.TypeInt, - Optional: true, + Type: schema.TypeInt, + Optional: true, AtLeastOneOf: []string{ "external_data_configuration.0.google_sheets_options.0.skip_leading_rows", "external_data_configuration.0.google_sheets_options.0.range", @@ -186,6 +183,31 @@ func resourceBigQueryTable() *schema.Resource { }, }, + // HivePartitioningOptions:: [Optional] Options for configuring hive partitioning detect. + "hive_partitioning_options": { + Type: schema.TypeList, + Optional: true, + MaxItems: 1, + Elem: &schema.Resource{ + Schema: map[string]*schema.Schema{ + // Mode: [Optional] [Experimental] When set, what mode of hive partitioning to use when reading data. + // Two modes are supported. + //* AUTO: automatically infer partition key name(s) and type(s). + //* STRINGS: automatically infer partition key name(s). + "mode": { + Type: schema.TypeString, + Optional: true, + }, + // SourceUriPrefix: [Optional] [Experimental] When hive partition detection is requested, a common for all source uris must be required. + // The prefix must end immediately before the partition key encoding begins. + "source_uri_prefix": { + Type: schema.TypeString, + Optional: true, + }, + }, + }, + }, + // IgnoreUnknownValues: [Optional] Indicates if BigQuery should // allow extra values that are not represented in the table schema. // If true, the extra values are ignored. If false, records with @@ -309,11 +331,10 @@ func resourceBigQueryTable() *schema.Resource { }, }, - <% unless version == 'ga' -%> // RangePartitioning: [Optional] If specified, configures range-based // partitioning for this table. - "range_partitioning": &schema.Schema{ - Type: schema.TypeList, + "range_partitioning": { + Type: schema.TypeList, Optional: true, MaxItems: 1, Elem: &schema.Resource{ @@ -327,8 +348,8 @@ func resourceBigQueryTable() *schema.Resource { }, // Range: [Required] Information required to partition based on ranges. - "range": &schema.Schema{ - Type: schema.TypeList, + "range": { + Type: schema.TypeList, Required: true, MaxItems: 1, Elem: &schema.Resource{ @@ -356,11 +377,10 @@ func resourceBigQueryTable() *schema.Resource { }, }, }, - <% end -%> // Clustering: [Optional] Specifies column names to use for data clustering. Up to four // top-level columns are allowed, and should be specified in descending priority order. - "clustering": &schema.Schema{ + "clustering": { Type: schema.TypeList, Optional: true, ForceNew: true, @@ -520,7 +540,6 @@ func resourceTable(d *schema.ResourceData, meta interface{}) (*bigquery.Table, e table.TimePartitioning = expandTimePartitioning(v) } - <% unless version == 'ga' -%> if v, ok := d.GetOk("range_partitioning"); ok { rangePartitioning, err := expandRangePartitioning(v) if err != nil { @@ -529,7 +548,6 @@ func resourceTable(d *schema.ResourceData, meta interface{}) (*bigquery.Table, e table.RangePartitioning = rangePartitioning } - <% end -%> if v, ok := d.GetOk("clustering"); ok { table.Clustering = &bigquery.Clustering{ @@ -620,13 +638,11 @@ func resourceBigQueryTableRead(d *schema.ResourceData, meta interface{}) error { } } - <% unless version == 'ga' -%> if res.RangePartitioning != nil { if err := d.Set("range_partitioning", flattenRangePartitioning(res.RangePartitioning)); err != nil { return err } } - <% end -%> if res.Clustering != nil { d.Set("clustering", res.Clustering.Fields) @@ -725,6 +741,9 @@ func expandExternalDataConfiguration(cfg interface{}) (*bigquery.ExternalDataCon if v, ok := raw["google_sheets_options"]; ok { edc.GoogleSheetsOptions = expandGoogleSheetsOptions(v) } + if v, ok := raw["hive_partitioning_options"]; ok { + edc.HivePartitioningOptions = expandHivePartitioningOptions(v) + } if v, ok := raw["ignore_unknown_values"]; ok { edc.IgnoreUnknownValues = v.(bool) } @@ -757,6 +776,10 @@ func flattenExternalDataConfiguration(edc *bigquery.ExternalDataConfiguration) ( result["google_sheets_options"] = flattenGoogleSheetsOptions(edc.GoogleSheetsOptions) } + if edc.HivePartitioningOptions != nil { + result["hive_partitioning_options"] = flattenHivePartitioningOptions(edc.HivePartitioningOptions) + } + if edc.IgnoreUnknownValues { result["ignore_unknown_values"] = edc.IgnoreUnknownValues } @@ -871,6 +894,39 @@ func flattenGoogleSheetsOptions(opts *bigquery.GoogleSheetsOptions) []map[string return []map[string]interface{}{result} } +func expandHivePartitioningOptions(configured interface{}) *bigquery.HivePartitioningOptions { + if len(configured.([]interface{})) == 0 { + return nil + } + + raw := configured.([]interface{})[0].(map[string]interface{}) + opts := &bigquery.HivePartitioningOptions{} + + if v, ok := raw["mode"]; ok { + opts.Mode = v.(string) + } + + if v, ok := raw["source_uri_prefix"]; ok { + opts.SourceUriPrefix = v.(string) + } + + return opts +} + +func flattenHivePartitioningOptions(opts *bigquery.HivePartitioningOptions) []map[string]interface{} { + result := map[string]interface{}{} + + if opts.Mode != "" { + result["mode"] = opts.Mode + } + + if opts.SourceUriPrefix != "" { + result["source_uri_prefix"] = opts.SourceUriPrefix + } + + return []map[string]interface{}{result} +} + func expandSchema(raw interface{}) (*bigquery.TableSchema, error) { var fields []*bigquery.TableFieldSchema @@ -913,7 +969,6 @@ func expandTimePartitioning(configured interface{}) *bigquery.TimePartitioning { return tp } -<% unless version == 'ga' -%> func expandRangePartitioning(configured interface{}) (*bigquery.RangePartitioning, error) { if configured == nil { return nil, nil @@ -945,7 +1000,6 @@ func expandRangePartitioning(configured interface{}) (*bigquery.RangePartitionin return rp, nil } -<% end -%> func flattenEncryptionConfiguration(ec *bigquery.EncryptionConfiguration) []map[string]interface{} { return []map[string]interface{}{{"kms_key_name": ec.KmsKeyName}} @@ -969,7 +1023,6 @@ func flattenTimePartitioning(tp *bigquery.TimePartitioning) []map[string]interfa return []map[string]interface{}{result} } -<% unless version == 'ga' -%> func flattenRangePartitioning(rp *bigquery.RangePartitioning) []map[string]interface{} { result := map[string]interface{}{ "field": rp.Field, @@ -984,7 +1037,6 @@ func flattenRangePartitioning(rp *bigquery.RangePartitioning) []map[string]inter return []map[string]interface{}{result} } -<% end -%> func expandView(configured interface{}) *bigquery.ViewDefinition { raw := configured.([]interface{})[0].(map[string]interface{}) diff --git a/third_party/terraform/tests/resource_bigquery_table_test.go.erb b/third_party/terraform/tests/resource_bigquery_table_test.go similarity index 88% rename from third_party/terraform/tests/resource_bigquery_table_test.go.erb rename to third_party/terraform/tests/resource_bigquery_table_test.go index 2233ba124656..d0ed2561ebdc 100644 --- a/third_party/terraform/tests/resource_bigquery_table_test.go.erb +++ b/third_party/terraform/tests/resource_bigquery_table_test.go @@ -1,4 +1,3 @@ -<% autogen_exception -%> package google import ( @@ -65,7 +64,30 @@ func TestAccBigQueryTable_Kms(t *testing.T) { }) } -<% unless version == 'ga' -%> +func TestAccBigQueryTable_HivePartitioning(t *testing.T) { + t.Parallel() + bucketName := testBucketName(t) + resourceName := "google_bigquery_table.test" + datasetID := fmt.Sprintf("tf_test_%s", randString(t, 10)) + tableID := fmt.Sprintf("tf_test_%s", randString(t, 10)) + + resource.Test(t, resource.TestCase{ + PreCheck: func() { testAccPreCheck(t) }, + Providers: testAccProviders, + CheckDestroy: testAccCheckBigQueryTableDestroyProducer(t), + Steps: []resource.TestStep{ + { + Config: testAccBigQueryTableHivePartitioning(bucketName, datasetID, tableID), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + }, + }, + }) +} + func TestAccBigQueryTable_RangePartitioning(t *testing.T) { t.Parallel() resourceName := "google_bigquery_table.test" @@ -88,7 +110,6 @@ func TestAccBigQueryTable_RangePartitioning(t *testing.T) { }, }) } -<% end -%> func TestAccBigQueryTable_View(t *testing.T) { t.Parallel() @@ -357,7 +378,43 @@ EOH `, datasetID, cryptoKeyName, tableID) } -<% unless version == 'ga' -%> +func testAccBigQueryTableHivePartitioning(bucketName, datasetID, tableID string) string { + return fmt.Sprintf(` +resource "google_storage_bucket" "test" { + name = "%s" + force_destroy = true +} + +resource "google_storage_bucket_object" "test" { + name = "key1=20200330/init.csv" + content = ";" + bucket = google_storage_bucket.test.name +} + +resource "google_bigquery_dataset" "test" { + dataset_id = "%s" +} + +resource "google_bigquery_table" "test" { + table_id = "%s" + dataset_id = google_bigquery_dataset.test.dataset_id + + external_data_configuration { + source_format = "CSV" + autodetect = true + source_uris= ["gs://${google_storage_bucket.test.name}/*"] + + hive_partitioning_options { + mode = "AUTO" + source_uri_prefix = "gs://${google_storage_bucket.test.name}/" + } + + } + depends_on = ["google_storage_bucket_object.test"] +} +`, bucketName, datasetID, tableID) +} + func testAccBigQueryTableRangePartitioning(datasetID, tableID string) string { return fmt.Sprintf(` resource "google_bigquery_dataset" "test" { @@ -392,7 +449,6 @@ EOH } `, datasetID, tableID) } -<% end -%> func testAccBigQueryTableWithView(datasetID, tableID string) string { return fmt.Sprintf(` @@ -538,8 +594,6 @@ resource "google_bigquery_table" "test" { `, datasetID, bucketName, objectName, content, tableID, format, quoteChar) } - - func testAccBigQueryTableFromSheet(context map[string]interface{}) string { return Nprintf(` resource "google_bigquery_table" "table" { diff --git a/third_party/terraform/website/docs/r/bigquery_table.html.markdown b/third_party/terraform/website/docs/r/bigquery_table.html.markdown index 54654a9e0f56..5e06277a3f60 100644 --- a/third_party/terraform/website/docs/r/bigquery_table.html.markdown +++ b/third_party/terraform/website/docs/r/bigquery_table.html.markdown @@ -127,7 +127,7 @@ The following arguments are supported: * `time_partitioning` - (Optional) If specified, configures time-based partitioning for this table. Structure is documented below. -* `range_partitioning` - (Optional, Beta) If specified, configures range-based +* `range_partitioning` - (Optional) If specified, configures range-based partitioning for this table. Structure is documented below. * `clustering` - (Optional) Specifies column names to use for data clustering. @@ -152,6 +152,11 @@ The `external_data_configuration` block supports: `source_format` is set to "GOOGLE_SHEETS". Structure is documented below. +* `hive_partitioning_options` (Optional) - When set, configures hive partitioning + support. Not all storage formats support hive partitioning -- requesting hive + partitioning on an unsupported format will lead to an error, as will providing + an invalid specification. + * `ignore_unknown_values` (Optional) - Indicates if BigQuery should allow extra values that are not represented in the table schema. If true, the extra values are ignored. If false, records with @@ -207,6 +212,26 @@ The `google_sheets_options` block supports: that BigQuery will skip when reading the data. At least one of `range` or `skip_leading_rows` must be set. +The `hive_partitioning_options` block supports: + +* `mode` (Optional) - When set, what mode of hive partitioning to use when + reading data. The following modes are supported. + * AUTO: automatically infer partition key name(s) and type(s). + * STRINGS: automatically infer partition key name(s). All types are + Not all storage formats support hive partitioning. Requesting hive + partitioning on an unsupported format will lead to an error. + Currently supported formats are: JSON, CSV, ORC, Avro and Parquet. + * CUSTOM: when set to `CUSTOM`, you must encode the partition key schema within the `source_uri_prefix` by setting `source_uri_prefix` to `gs://bucket/path_to_table/{key1:TYPE1}/{key2:TYPE2}/{key3:TYPE3}`. + +* `source_uri_prefix` (Optional) - When hive partition detection is requested, + a common for all source uris must be required. The prefix must end immediately + before the partition key encoding begins. For example, consider files following + this data layout. `gs://bucket/path_to_table/dt=2019-06-01/country=USA/id=7/file.avro` + `gs://bucket/path_to_table/dt=2019-05-31/country=CA/id=3/file.avro` When hive + partitioning is requested with either AUTO or STRINGS detection, the common prefix + can be either of `gs://bucket/path_to_table` or `gs://bucket/path_to_table/`. + Note that when `mode` is set to `CUSTOM`, you must encode the partition key schema within the `source_uri_prefix` by setting `source_uri_prefix` to `gs://bucket/path_to_table/{key1:TYPE1}/{key2:TYPE2}/{key3:TYPE3}`. + The `time_partitioning` block supports: * `expiration_ms` - (Optional) Number of milliseconds for which to keep the