Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for BigQuery External Data Tables #1466

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
5a18681
Change resource_bigquery_table.go to a template to allow BETA features
rickard-von-essen Mar 3, 2019
8ae562d
Initial support for BQ external tables
rickard-von-essen Mar 3, 2019
6910bb1
Added documentation
rickard-von-essen Mar 4, 2019
ea25eca
Update third_party/terraform/resources/resource_bigquery_table.go.erb
rileykarson Mar 5, 2019
225fa84
Changed external_data_configuration to TypeSet
rickard-von-essen Mar 5, 2019
344f8e2
Added Computed to most options
rickard-von-essen Mar 5, 2019
a928294
WIP: Somewhat working BQ external data tables
rickard-von-essen Mar 6, 2019
f31ea8c
Working with autodetect true
rickard-von-essen Mar 15, 2019
a413226
google_sheets_options, max_bad_records, and ignore_unknown_values
rickard-von-essen Mar 16, 2019
a55e9c7
csv_options
rickard-von-essen Mar 16, 2019
e037b93
Added bigtable_options
rickard-von-essen Mar 17, 2019
cce28b8
Clearified Google Sheets range
rickard-von-essen Mar 17, 2019
e753d5e
Added EDC Schema
rickard-von-essen Mar 17, 2019
cc20ad5
Updated docs for bigtable_options and some richer examples
rickard-von-essen Mar 17, 2019
5c883ce
Added some links in the docs describing the JSON configs
rickard-von-essen Mar 17, 2019
6165666
Removed support for external table data from Bittable (beta) from thi…
rickard-von-essen May 26, 2019
8032cfc
Fixed bug in schema handeling
rickard-von-essen May 26, 2019
8aa94d9
Set some default values
rickard-von-essen May 26, 2019
ada1e5a
Fixed indentation
rickard-von-essen Jun 11, 2019
511dc86
Fixed return type of flattenExternalDataConfiguration
rickard-von-essen Jun 11, 2019
fb59101
Moved testBucketName function
rickard-von-essen Jun 11, 2019
b63759f
bigquery_table: Added External Data Configuration test
rickard-von-essen Jun 13, 2019
06e29db
Improved docs for quote
rickard-von-essen Jun 26, 2019
129bc2e
BQ Ext Tables: removed schema, use top level instead
rickard-von-essen Jun 26, 2019
14ccee6
BQ Ext Tables: Properly handle nilable quote option in csv_options.
rickard-von-essen Jun 26, 2019
124603b
Re-arrange and put required before optional
rickard-von-essen Jun 26, 2019
3aec662
Change a test to expose the problem with updating cvs_options.quote f…
rickard-von-essen Jun 28, 2019
ed5b784
Update tracked submodules -> HEAD on Wed Jul 3 20:18:17 UTC 2019
modular-magician Jul 3, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/terraform
2 changes: 1 addition & 1 deletion build/terraform-beta
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
// <% autogen_exception -%>

package google

import (
Expand Down Expand Up @@ -62,6 +64,143 @@ func resourceBigQueryTable() *schema.Resource {
Computed: true,
},

// ExternalDataConfiguration [Optional] Describes the data format,
// location, and other properties of a table stored outside of BigQuery.
// By defining these properties, the data source can then be queried as
// if it were a standard BigQuery table.
"external_data_configuration": {
Type: schema.TypeList,
Optional: true,
MaxItems: 1,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
// Autodetect : [Required] If true, let BigQuery try to autodetect the
// schema and format of the table.
"autodetect": {
rickard-von-essen marked this conversation as resolved.
Show resolved Hide resolved
Type: schema.TypeBool,
Required: true,
},
// SourceFormat [Required] The data format.
"source_format": {
Type: schema.TypeString,
Required: true,
ValidateFunc: validation.StringInSlice([]string{
"CSV", "GOOGLE_SHEETS", "NEWLINE_DELIMITED_JSON", "AVRO", "DATSTORE_BACKUP",
}, false),
},
// SourceURIs [Required] The fully-qualified URIs that point to your data in Google Cloud.
"source_uris": {
Type: schema.TypeList,
Required: true,
Elem: &schema.Schema{Type: schema.TypeString},
},
// Compression: [Optional] The compression type of the data source.
"compression": {
Type: schema.TypeString,
Optional: true,
ValidateFunc: validation.StringInSlice([]string{"NONE", "GZIP"}, false),
Default: "NONE",
},
// CsvOptions: [Optional] Additional properties to set if
// sourceFormat is set to CSV.
"csv_options": {
Type: schema.TypeList,
Optional: true,
MaxItems: 1,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
// AllowJaggedRows: [Optional] Indicates if BigQuery should
// accept rows that are missing trailing optional columns.
"allow_jagged_rows": {
Type: schema.TypeBool,
Optional: true,
Default: false,
},
// AllowQuotedNewlines: [Optional] Indicates if BigQuery
// should allow quoted data sections that contain newline
// characters in a CSV file. The default value is false.
"allow_quoted_newlines": {
Type: schema.TypeBool,
Optional: true,
Default: false,
},
// Encoding: [Optional] The character encoding of the data.
// The supported values are UTF-8 or ISO-8859-1.
"encoding": {
Type: schema.TypeString,
Optional: true,
ValidateFunc: validation.StringInSlice([]string{"ISO-8859-1", "UTF-8"}, false),
Default: "UTF-8",
},
// FieldDelimiter: [Optional] The separator for fields in a CSV file.
"field_delimiter": {
Type: schema.TypeString,
Optional: true,
Default: ",",
},
// Quote: [Optional] The value that is used to quote data
// sections in a CSV file.
"quote": {
Type: schema.TypeString,
Optional: true,
Computed: true,
},
// SkipLeadingRows: [Optional] The number of rows at the top
// of a CSV file that BigQuery will skip when reading the data.
"skip_leading_rows": {
Type: schema.TypeInt,
Optional: true,
Default: 0,
},
},
},
},
// GoogleSheetsOptions: [Optional] Additional options if sourceFormat is set to GOOGLE_SHEETS.
"google_sheets_options": {
Type: schema.TypeList,
Optional: true,
MaxItems: 1,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
// Range: [Optional] Range of a sheet to query from. Only used when non-empty.
// Typical format: !:
"range": {
<% if version.nil? || version == 'ga' -%>
Removed: "This field is in beta. Use it in the the google-beta provider instead. See https://terraform.io/docs/providers/google/provider_versions.html for more details.",
<% end -%>
Type: schema.TypeString,
Optional: true,
},
// SkipLeadingRows: [Optional] The number of rows at the top
// of the scheet that BigQuery will skip when reading the data.
"skip_leading_rows": {
Type: schema.TypeInt,
Optional: true,
},
},
},
},

// IgnoreUnknownValues: [Optional] Indicates if BigQuery should
// allow extra values that are not represented in the table schema.
// If true, the extra values are ignored. If false, records with
// extra columns are treated as bad records, and if there are too
// many bad records, an invalid error is returned in the job result.
// The default value is false.
"ignore_unknown_values": {
Type: schema.TypeBool,
Optional: true,
},
// MaxBadRecords: [Optional] The maximum number of bad records that
// BigQuery can ignore when reading data.
"max_bad_records": {
Type: schema.TypeInt,
Optional: true,
},
},
},
},

// FriendlyName: [Optional] A descriptive name for this table.
"friendly_name": {
Type: schema.TypeString,
Expand All @@ -82,6 +221,9 @@ func resourceBigQueryTable() *schema.Resource {
},

// Schema: [Optional] Describes the schema of this table.
// Schema is required for external tables in CSV and JSON formats
// and disallowed for Google Cloud Bigtable, Cloud Datastore backups,
// and Avro formats.
"schema": {
Type: schema.TypeString,
Optional: true,
Expand Down Expand Up @@ -258,6 +400,15 @@ func resourceTable(d *schema.ResourceData, meta interface{}) (*bigquery.Table, e
table.ExpirationTime = int64(v.(int))
}

if v, ok := d.GetOk("external_data_configuration"); ok {
externalDataConfiguration, err := expandExternalDataConfiguration(v)
if err != nil {
return nil, err
}

table.ExternalDataConfiguration = externalDataConfiguration
}

if v, ok := d.GetOk("friendly_name"); ok {
table.FriendlyName = v.(string)
}
Expand Down Expand Up @@ -349,6 +500,15 @@ func resourceBigQueryTableRead(d *schema.ResourceData, meta interface{}) error {
d.Set("self_link", res.SelfLink)
d.Set("type", res.Type)

if res.ExternalDataConfiguration != nil {
externalDataConfiguration, err := flattenExternalDataConfiguration(res.ExternalDataConfiguration)
if err != nil {
return err
}

d.Set("external_data_configuration", externalDataConfiguration)
}

if res.TimePartitioning != nil {
if err := d.Set("time_partitioning", flattenTimePartitioning(res.TimePartitioning)); err != nil {
return err
Expand Down Expand Up @@ -413,9 +573,183 @@ func resourceBigQueryTableDelete(d *schema.ResourceData, meta interface{}) error
return nil
}

func expandExternalDataConfiguration(cfg interface{}) (*bigquery.ExternalDataConfiguration, error) {
raw := cfg.([]interface{})[0].(map[string]interface{})

edc := &bigquery.ExternalDataConfiguration{
Autodetect: raw["autodetect"].(bool),
}

sourceUris := []string{}
for _, rawSourceUri := range raw["source_uris"].([]interface{}) {
sourceUris = append(sourceUris, rawSourceUri.(string))
}
if len(sourceUris) > 0 {
edc.SourceUris = sourceUris
}

if v, ok := raw["compression"]; ok {
edc.Compression = v.(string)
}
if v, ok := raw["csv_options"]; ok {
edc.CsvOptions = expandCsvOptions(v)
}
if v, ok := raw["google_sheets_options"]; ok {
edc.GoogleSheetsOptions = expandGoogleSheetsOptions(v)
}
if v, ok := raw["ignore_unknown_values"]; ok {
edc.IgnoreUnknownValues = v.(bool)
}
if v, ok := raw["max_bad_records"]; ok {
edc.MaxBadRecords = int64(v.(int))
}
if v, ok := raw["source_format"]; ok {
edc.SourceFormat = v.(string)
}

return edc, nil

}

func flattenExternalDataConfiguration(edc *bigquery.ExternalDataConfiguration) ([]map[string]interface{}, error) {
result := map[string]interface{}{}

result["autodetect"] = edc.Autodetect
result["source_uris"] = edc.SourceUris

if edc.Compression != "" {
result["compression"] = edc.Compression
}

if edc.CsvOptions != nil {
result["csv_options"] = flattenCsvOptions(edc.CsvOptions)
}

if edc.GoogleSheetsOptions != nil {
result["google_sheets_options"] = flattenGoogleSheetsOptions(edc.GoogleSheetsOptions)
}

if edc.IgnoreUnknownValues == true {
result["ignore_unknown_values"] = edc.IgnoreUnknownValues
}
if edc.MaxBadRecords != 0 {
result["max_bad_records"] = edc.MaxBadRecords
}

if edc.SourceFormat != "" {
result["source_format"] = edc.SourceFormat
}

return []map[string]interface{}{result}, nil
}

func expandCsvOptions(configured interface{}) *bigquery.CsvOptions {
if len(configured.([]interface{})) == 0 {
return nil
}

raw := configured.([]interface{})[0].(map[string]interface{})
opts := &bigquery.CsvOptions{}

if v, ok := raw["allow_jagged_rows"]; ok {
opts.AllowJaggedRows = v.(bool)
}

if v, ok := raw["allow_quoted_newlines"]; ok {
opts.AllowQuotedNewlines = v.(bool)
}

if v, ok := raw["encoding"]; ok {
opts.Encoding = v.(string)
}

if v, ok := raw["field_delimiter"]; ok {
opts.FieldDelimiter = v.(string)
}

if v, ok := raw["skip_leading_rows"]; ok {
opts.SkipLeadingRows = int64(v.(int))
}

if v, ok := raw["quote"]; ok {
quote := v.(string)
opts.Quote = &quote
}

opts.ForceSendFields = []string{"Quote"}

return opts
}

func flattenCsvOptions(opts *bigquery.CsvOptions) []map[string]interface{} {
result := map[string]interface{}{}

if opts.AllowJaggedRows == true {
result["allow_jagged_rows"] = opts.AllowJaggedRows
}

if opts.AllowQuotedNewlines == true {
result["allow_quoted_newlines"] = opts.AllowQuotedNewlines
}

if opts.Encoding != "" {
result["encoding"] = opts.Encoding
}

if opts.FieldDelimiter != "" {
result["field_delimiter"] = opts.FieldDelimiter
}

if opts.SkipLeadingRows != 0 {
result["skip_leading_rows"] = opts.SkipLeadingRows
}

if opts.Quote != nil {
result["quote"] = *opts.Quote
}

return []map[string]interface{}{result}
}

func expandGoogleSheetsOptions(configured interface{}) *bigquery.GoogleSheetsOptions {
if len(configured.([]interface{})) == 0 {
return nil
}

raw := configured.([]interface{})[0].(map[string]interface{})
opts := &bigquery.GoogleSheetsOptions{}

if v, ok := raw["range"]; ok {
opts.Range = v.(string)
}

if v, ok := raw["skip_leading_rows"]; ok {
opts.SkipLeadingRows = int64(v.(int))
}
return opts
}

func flattenGoogleSheetsOptions(opts *bigquery.GoogleSheetsOptions) []map[string]interface{} {
result := map[string]interface{}{}

if opts.Range != "" {
result["range"] = opts.Range
}

if opts.SkipLeadingRows != 0 {
result["skip_leading_rows"] = opts.SkipLeadingRows
}

return []map[string]interface{}{result}
}

func expandSchema(raw interface{}) (*bigquery.TableSchema, error) {
var fields []*bigquery.TableFieldSchema

if len(raw.(string)) == 0 {
return nil, nil
}

if err := json.Unmarshal([]byte(raw.(string)), &fields); err != nil {
return nil, err
}
Expand Down
Loading