Skip to content

Commit

Permalink
Merge pull request #264 from quartzmo/bigquery-schema
Browse files Browse the repository at this point in the history
Schema builder for BigQuery Table

[closes #217]
  • Loading branch information
blowmage committed Sep 4, 2015
2 parents ddc660e + 1a25866 commit f000d07
Show file tree
Hide file tree
Showing 8 changed files with 596 additions and 135 deletions.
59 changes: 12 additions & 47 deletions lib/gcloud/bigquery.rb
Original file line number Diff line number Diff line change
Expand Up @@ -209,33 +209,13 @@ def self.bigquery project = nil, keyfile = nil, options = {}
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
#
# schema = {
# "fields" => [
# {
# "name" => "first_name",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "cities_lived",
# "type" => "RECORD",
# "mode" => "REPEATED",
# "fields" => [
# {
# "name" => "place",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "number_of_years",
# "type" => "INTEGER",
# "mode" => "REQUIRED"
# }
# ]
# }
# ]
# }
# table = dataset.create_table "people", schema: schema
# table = dataset.create_table "people" do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# Because of the repeated field in this schema, we cannot use the CSV format
# to load data into the table.
Expand Down Expand Up @@ -306,26 +286,11 @@ def self.bigquery project = nil, keyfile = nil, options = {}
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# schema = {
# "fields" => [
# {
# "name" => "name",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "sex",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "number",
# "type" => "INTEGER",
# "mode" => "REQUIRED"
# }
# ]
# }
# table = dataset.create_table "baby_names", schema: schema
# table = dataset.create_table "baby_names" do |schema|
# schema.string "name", mode: :required
# schema.string "sex", mode: :required
# schema.integer "number", mode: :required
# end
#
# file = File.open "names/yob2014.txt"
# load_job = table.load file, format: "csv"
Expand Down
62 changes: 51 additions & 11 deletions lib/gcloud/bigquery/dataset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
require "json"
require "gcloud/bigquery/errors"
require "gcloud/bigquery/table"
require "gcloud/bigquery/table/schema"
require "gcloud/bigquery/dataset/list"
require "gcloud/bigquery/dataset/access"

Expand Down Expand Up @@ -329,10 +330,11 @@ def delete options = {}
# <code>options[:description]</code>::
# A user-friendly description of the table. (+String+)
# <code>options[:schema]</code>::
# A schema specifying fields and data types for the table. See the
# A hash specifying fields and data types for the table. A block may be
# passed instead (see examples.) For the format of this hash, see the
# {Tables resource
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource]
# for more information. (+Hash+)
# . (+Hash+)
#
# === Returns
#
Expand All @@ -347,7 +349,35 @@ def delete options = {}
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
#
# A name and description can be provided:
# You can also pass name and description options.
#
# require "gcloud"
#
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
# name: "My Table",
# description: "A description of my table."
#
# You can define the table's schema using a block.
#
# require "gcloud"
#
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table" do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# Or, if you are adapting existing code that was written for the {Rest API
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource],
# you can pass the table's schema as a hash.
#
# require "gcloud"
#
Expand Down Expand Up @@ -381,20 +411,21 @@ def delete options = {}
# }
# ]
# }
# table = dataset.create_table "my_table",
# name: "My Table",
# schema: schema
# table = dataset.create_table "my_table", schema: schema
#
# :category: Table
#
def create_table table_id, options = {}
ensure_connection!
resp = connection.insert_table dataset_id, table_id, options
if resp.success?
Table.from_gapi resp.data, connection
else
fail ApiError.from_response(resp)
if block_given?
if options[:schema]
fail ArgumentError, "only schema block or schema option is allowed"
end
schema_builder = Table::Schema.new nil
yield schema_builder
options[:schema] = schema_builder.schema if schema_builder.changed?
end
insert_table table_id, options
end

##
Expand Down Expand Up @@ -710,6 +741,15 @@ def self.from_gapi gapi, conn #:nodoc:

protected

def insert_table table_id, options
resp = connection.insert_table dataset_id, table_id, options
if resp.success?
Table.from_gapi resp.data, connection
else
fail ApiError.from_response(resp)
end
end

##
# Raise an error unless an active connection is available.
def ensure_connection!
Expand Down
92 changes: 58 additions & 34 deletions lib/gcloud/bigquery/table.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
require "gcloud/bigquery/view"
require "gcloud/bigquery/data"
require "gcloud/bigquery/table/list"
require "gcloud/bigquery/table/schema"
require "gcloud/bigquery/errors"
require "gcloud/bigquery/insert_response"
require "gcloud/upload"
Expand All @@ -36,35 +37,14 @@ module Bigquery
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
#
# schema = {
# "fields" => [
# {
# "name" => "first_name",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "cities_lived",
# "type" => "RECORD",
# "mode" => "REPEATED",
# "fields" => [
# {
# "name" => "place",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "number_of_years",
# "type" => "INTEGER",
# "mode" => "REQUIRED"
# }
# ]
# }
# ]
# }
# table.schema = schema
# table = dataset.create_table "my_table" do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# row = {
# "first_name" => "Alice",
Expand Down Expand Up @@ -311,20 +291,64 @@ def location
end

##
# The schema of the table.
# Returns the table's schema as hash containing the keys and values
# returned by the Google Cloud BigQuery {Rest API
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource].
# This method can also be used to replace or update the schema by passing
# a block. See Table::Schema for available methods. To replace the current
# schema by passing a hash instead, use #schema=.
#
# === Parameters
#
# +options+::
# An optional Hash for controlling additional behavior. (+Hash+)
# <code>options[:replace]</code>::
# Whether to replace the existing schema with the new schema. If
# +false+, new fields will be added to the existing schema. The default
# value is +true+. (+Boolean+)
#
# === Examples
#
# require "gcloud"
#
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
#
# table.schema do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# :category: Attributes
#
def schema
def schema options = {}
ensure_full_data!
s = @gapi["schema"]
s = s.to_hash if s.respond_to? :to_hash
s = {} if s.nil?
s
g = @gapi
g = g.to_hash if g.respond_to? :to_hash
s = g["schema"] ||= {}
return s unless block_given?
old_schema = options[:replace] == false ? s : nil
schema_builder = Schema.new old_schema
yield schema_builder
self.schema = schema_builder.schema if schema_builder.changed?
end

##
# Updates the schema of the table.
# To update the schema using a block instead, use #schema.
#
# === Parameters
#
# +schema+::
# A hash containing keys and values as specified by the Google Cloud
# BigQuery {Rest API
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource]
# . (+Hash+)
#
# === Example
#
Expand Down
Loading

0 comments on commit f000d07

Please sign in to comment.