Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Schema builder for BigQuery Table #264

Merged
merged 2 commits into from
Sep 4, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 12 additions & 47 deletions lib/gcloud/bigquery.rb
Original file line number Diff line number Diff line change
Expand Up @@ -209,33 +209,13 @@ def self.bigquery project = nil, keyfile = nil, options = {}
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
#
# schema = {
# "fields" => [
# {
# "name" => "first_name",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "cities_lived",
# "type" => "RECORD",
# "mode" => "REPEATED",
# "fields" => [
# {
# "name" => "place",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "number_of_years",
# "type" => "INTEGER",
# "mode" => "REQUIRED"
# }
# ]
# }
# ]
# }
# table = dataset.create_table "people", schema: schema
# table = dataset.create_table "people" do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# Because of the repeated field in this schema, we cannot use the CSV format
# to load data into the table.
Expand Down Expand Up @@ -306,26 +286,11 @@ def self.bigquery project = nil, keyfile = nil, options = {}
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# schema = {
# "fields" => [
# {
# "name" => "name",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "sex",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "number",
# "type" => "INTEGER",
# "mode" => "REQUIRED"
# }
# ]
# }
# table = dataset.create_table "baby_names", schema: schema
# table = dataset.create_table "baby_names" do |schema|
# schema.string "name", mode: :required
# schema.string "sex", mode: :required
# schema.integer "number", mode: :required
# end
#
# file = File.open "names/yob2014.txt"
# load_job = table.load file, format: "csv"
Expand Down
62 changes: 51 additions & 11 deletions lib/gcloud/bigquery/dataset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
require "json"
require "gcloud/bigquery/errors"
require "gcloud/bigquery/table"
require "gcloud/bigquery/table/schema"
require "gcloud/bigquery/dataset/list"
require "gcloud/bigquery/dataset/access"

Expand Down Expand Up @@ -329,10 +330,11 @@ def delete options = {}
# <code>options[:description]</code>::
# A user-friendly description of the table. (+String+)
# <code>options[:schema]</code>::
# A schema specifying fields and data types for the table. See the
# A hash specifying fields and data types for the table. A block may be
# passed instead (see examples.) For the format of this hash, see the
# {Tables resource
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource]
# for more information. (+Hash+)
# . (+Hash+)
#
# === Returns
#
Expand All @@ -347,7 +349,35 @@ def delete options = {}
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
#
# A name and description can be provided:
# You can also pass name and description options.
#
# require "gcloud"
#
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
# name: "My Table",
# description: "A description of my table."
#
# You can define the table's schema using a block.
#
# require "gcloud"
#
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table" do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# Or, if you are adapting existing code that was written for the {Rest API
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource],
# you can pass the table's schema as a hash.
#
# require "gcloud"
#
Expand Down Expand Up @@ -381,20 +411,21 @@ def delete options = {}
# }
# ]
# }
# table = dataset.create_table "my_table",
# name: "My Table",
# schema: schema
# table = dataset.create_table "my_table", schema: schema
#
# :category: Table
#
def create_table table_id, options = {}
ensure_connection!
resp = connection.insert_table dataset_id, table_id, options
if resp.success?
Table.from_gapi resp.data, connection
else
fail ApiError.from_response(resp)
if block_given?
if options[:schema]
fail ArgumentError, "only schema block or schema option is allowed"
end
schema_builder = Table::Schema.new nil
yield schema_builder
options[:schema] = schema_builder.schema if schema_builder.changed?
end
insert_table table_id, options
end

##
Expand Down Expand Up @@ -710,6 +741,15 @@ def self.from_gapi gapi, conn #:nodoc:

protected

def insert_table table_id, options
resp = connection.insert_table dataset_id, table_id, options
if resp.success?
Table.from_gapi resp.data, connection
else
fail ApiError.from_response(resp)
end
end

##
# Raise an error unless an active connection is available.
def ensure_connection!
Expand Down
92 changes: 58 additions & 34 deletions lib/gcloud/bigquery/table.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
require "gcloud/bigquery/view"
require "gcloud/bigquery/data"
require "gcloud/bigquery/table/list"
require "gcloud/bigquery/table/schema"
require "gcloud/bigquery/errors"
require "gcloud/bigquery/insert_response"
require "gcloud/upload"
Expand All @@ -36,35 +37,14 @@ module Bigquery
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
#
# schema = {
# "fields" => [
# {
# "name" => "first_name",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "cities_lived",
# "type" => "RECORD",
# "mode" => "REPEATED",
# "fields" => [
# {
# "name" => "place",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "number_of_years",
# "type" => "INTEGER",
# "mode" => "REQUIRED"
# }
# ]
# }
# ]
# }
# table.schema = schema
# table = dataset.create_table "my_table" do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# row = {
# "first_name" => "Alice",
Expand Down Expand Up @@ -311,20 +291,64 @@ def location
end

##
# The schema of the table.
# Returns the table's schema as hash containing the keys and values
# returned by the Google Cloud BigQuery {Rest API
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource].
# This method can also be used to replace or update the schema by passing
# a block. See Table::Schema for available methods. To replace the current
# schema by passing a hash instead, use #schema=.
#
# === Parameters
#
# +options+::
# An optional Hash for controlling additional behavior. (+Hash+)
# <code>options[:replace]</code>::
# Whether to replace the existing schema with the new schema. If
# +false+, new fields will be added to the existing schema. The default
# value is +true+. (+Boolean+)
#
# === Examples
#
# require "gcloud"
#
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
#
# table.schema do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# :category: Attributes
#
def schema
def schema options = {}
ensure_full_data!
s = @gapi["schema"]
s = s.to_hash if s.respond_to? :to_hash
s = {} if s.nil?
s
g = @gapi
g = g.to_hash if g.respond_to? :to_hash
s = g["schema"] ||= {}
return s unless block_given?
old_schema = options[:replace] == false ? s : nil
schema_builder = Schema.new old_schema
yield schema_builder
self.schema = schema_builder.schema if schema_builder.changed?
end

##
# Updates the schema of the table.
# To update the schema using a block instead, use #schema.
#
# === Parameters
#
# +schema+::
# A hash containing keys and values as specified by the Google Cloud
# BigQuery {Rest API
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource]
# . (+Hash+)
#
# === Example
#
Expand Down
Loading