Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Terraform BigQuery Table Hive partitioning support #2121

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changelog/3335.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
```release-note:enhancement
bigquery: Added support for `google_bigquery_table` `hive_partitioning_options`
```
```release-note:enhancement
bigquery: Added `google_bigquery_table` `range_partitioning` to GA
```
66 changes: 65 additions & 1 deletion google-beta/resource_bigquery_table.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
//
package google

import (
Expand Down Expand Up @@ -184,6 +183,31 @@ func resourceBigQueryTable() *schema.Resource {
},
},

// HivePartitioningOptions:: [Optional] Options for configuring hive partitioning detect.
"hive_partitioning_options": {
Type: schema.TypeList,
Optional: true,
MaxItems: 1,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
// Mode: [Optional] [Experimental] When set, what mode of hive partitioning to use when reading data.
// Two modes are supported.
//* AUTO: automatically infer partition key name(s) and type(s).
//* STRINGS: automatically infer partition key name(s).
"mode": {
Type: schema.TypeString,
Optional: true,
},
// SourceUriPrefix: [Optional] [Experimental] When hive partition detection is requested, a common for all source uris must be required.
// The prefix must end immediately before the partition key encoding begins.
"source_uri_prefix": {
Type: schema.TypeString,
Optional: true,
},
},
},
},

// IgnoreUnknownValues: [Optional] Indicates if BigQuery should
// allow extra values that are not represented in the table schema.
// If true, the extra values are ignored. If false, records with
Expand Down Expand Up @@ -717,6 +741,9 @@ func expandExternalDataConfiguration(cfg interface{}) (*bigquery.ExternalDataCon
if v, ok := raw["google_sheets_options"]; ok {
edc.GoogleSheetsOptions = expandGoogleSheetsOptions(v)
}
if v, ok := raw["hive_partitioning_options"]; ok {
edc.HivePartitioningOptions = expandHivePartitioningOptions(v)
}
if v, ok := raw["ignore_unknown_values"]; ok {
edc.IgnoreUnknownValues = v.(bool)
}
Expand Down Expand Up @@ -749,6 +776,10 @@ func flattenExternalDataConfiguration(edc *bigquery.ExternalDataConfiguration) (
result["google_sheets_options"] = flattenGoogleSheetsOptions(edc.GoogleSheetsOptions)
}

if edc.HivePartitioningOptions != nil {
result["hive_partitioning_options"] = flattenHivePartitioningOptions(edc.HivePartitioningOptions)
}

if edc.IgnoreUnknownValues {
result["ignore_unknown_values"] = edc.IgnoreUnknownValues
}
Expand Down Expand Up @@ -863,6 +894,39 @@ func flattenGoogleSheetsOptions(opts *bigquery.GoogleSheetsOptions) []map[string
return []map[string]interface{}{result}
}

func expandHivePartitioningOptions(configured interface{}) *bigquery.HivePartitioningOptions {
if len(configured.([]interface{})) == 0 {
return nil
}

raw := configured.([]interface{})[0].(map[string]interface{})
opts := &bigquery.HivePartitioningOptions{}

if v, ok := raw["mode"]; ok {
opts.Mode = v.(string)
}

if v, ok := raw["source_uri_prefix"]; ok {
opts.SourceUriPrefix = v.(string)
}

return opts
}

func flattenHivePartitioningOptions(opts *bigquery.HivePartitioningOptions) []map[string]interface{} {
result := map[string]interface{}{}

if opts.Mode != "" {
result["mode"] = opts.Mode
}

if opts.SourceUriPrefix != "" {
result["source_uri_prefix"] = opts.SourceUriPrefix
}

return []map[string]interface{}{result}
}

func expandSchema(raw interface{}) (*bigquery.TableSchema, error) {
var fields []*bigquery.TableFieldSchema

Expand Down
61 changes: 61 additions & 0 deletions google-beta/resource_bigquery_table_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,30 @@ func TestAccBigQueryTable_Kms(t *testing.T) {
})
}

func TestAccBigQueryTable_HivePartitioning(t *testing.T) {
t.Parallel()
bucketName := testBucketName(t)
resourceName := "google_bigquery_table.test"
datasetID := fmt.Sprintf("tf_test_%s", randString(t, 10))
tableID := fmt.Sprintf("tf_test_%s", randString(t, 10))

resource.Test(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckBigQueryTableDestroyProducer(t),
Steps: []resource.TestStep{
{
Config: testAccBigQueryTableHivePartitioning(bucketName, datasetID, tableID),
},
{
ResourceName: resourceName,
ImportState: true,
ImportStateVerify: true,
},
},
})
}

func TestAccBigQueryTable_RangePartitioning(t *testing.T) {
t.Parallel()
resourceName := "google_bigquery_table.test"
Expand Down Expand Up @@ -354,6 +378,43 @@ EOH
`, datasetID, cryptoKeyName, tableID)
}

func testAccBigQueryTableHivePartitioning(bucketName, datasetID, tableID string) string {
return fmt.Sprintf(`
resource "google_storage_bucket" "test" {
name = "%s"
force_destroy = true
}

resource "google_storage_bucket_object" "test" {
name = "key1=20200330/init.csv"
content = ";"
bucket = google_storage_bucket.test.name
}

resource "google_bigquery_dataset" "test" {
dataset_id = "%s"
}

resource "google_bigquery_table" "test" {
table_id = "%s"
dataset_id = google_bigquery_dataset.test.dataset_id

external_data_configuration {
source_format = "CSV"
autodetect = true
source_uris= ["gs://${google_storage_bucket.test.name}/*"]

hive_partitioning_options {
mode = "AUTO"
source_uri_prefix = "gs://${google_storage_bucket.test.name}/"
}

}
depends_on = ["google_storage_bucket_object.test"]
}
`, bucketName, datasetID, tableID)
}

func testAccBigQueryTableRangePartitioning(datasetID, tableID string) string {
return fmt.Sprintf(`
resource "google_bigquery_dataset" "test" {
Expand Down
27 changes: 26 additions & 1 deletion website/docs/r/bigquery_table.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ The following arguments are supported:
* `time_partitioning` - (Optional) If specified, configures time-based
partitioning for this table. Structure is documented below.

* `range_partitioning` - (Optional, Beta) If specified, configures range-based
* `range_partitioning` - (Optional) If specified, configures range-based
partitioning for this table. Structure is documented below.

* `clustering` - (Optional) Specifies column names to use for data clustering.
Expand All @@ -152,6 +152,11 @@ The `external_data_configuration` block supports:
`source_format` is set to "GOOGLE_SHEETS". Structure is
documented below.

* `hive_partitioning_options` (Optional) - When set, configures hive partitioning
support. Not all storage formats support hive partitioning -- requesting hive
partitioning on an unsupported format will lead to an error, as will providing
an invalid specification.

* `ignore_unknown_values` (Optional) - Indicates if BigQuery should
allow extra values that are not represented in the table schema.
If true, the extra values are ignored. If false, records with
Expand Down Expand Up @@ -207,6 +212,26 @@ The `google_sheets_options` block supports:
that BigQuery will skip when reading the data. At least one of `range` or
`skip_leading_rows` must be set.

The `hive_partitioning_options` block supports:

* `mode` (Optional) - When set, what mode of hive partitioning to use when
reading data. The following modes are supported.
* AUTO: automatically infer partition key name(s) and type(s).
* STRINGS: automatically infer partition key name(s). All types are
Not all storage formats support hive partitioning. Requesting hive
partitioning on an unsupported format will lead to an error.
Currently supported formats are: JSON, CSV, ORC, Avro and Parquet.
* CUSTOM: when set to `CUSTOM`, you must encode the partition key schema within the `source_uri_prefix` by setting `source_uri_prefix` to `gs://bucket/path_to_table/{key1:TYPE1}/{key2:TYPE2}/{key3:TYPE3}`.

* `source_uri_prefix` (Optional) - When hive partition detection is requested,
a common for all source uris must be required. The prefix must end immediately
before the partition key encoding begins. For example, consider files following
this data layout. `gs://bucket/path_to_table/dt=2019-06-01/country=USA/id=7/file.avro`
`gs://bucket/path_to_table/dt=2019-05-31/country=CA/id=3/file.avro` When hive
partitioning is requested with either AUTO or STRINGS detection, the common prefix
can be either of `gs://bucket/path_to_table` or `gs://bucket/path_to_table/`.
Note that when `mode` is set to `CUSTOM`, you must encode the partition key schema within the `source_uri_prefix` by setting `source_uri_prefix` to `gs://bucket/path_to_table/{key1:TYPE1}/{key2:TYPE2}/{key3:TYPE3}`.

The `time_partitioning` block supports:

* `expiration_ms` - (Optional) Number of milliseconds for which to keep the
Expand Down