Skip to content

Commit

Permalink
Update documentation for BigQuery schema builder
Browse files Browse the repository at this point in the history
  • Loading branch information
quartzmo committed Sep 4, 2015
1 parent e95fec8 commit 1a25866
Show file tree
Hide file tree
Showing 5 changed files with 236 additions and 133 deletions.
59 changes: 12 additions & 47 deletions lib/gcloud/bigquery.rb
Original file line number Diff line number Diff line change
Expand Up @@ -209,33 +209,13 @@ def self.bigquery project = nil, keyfile = nil, options = {}
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
#
# schema = {
# "fields" => [
# {
# "name" => "first_name",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "cities_lived",
# "type" => "RECORD",
# "mode" => "REPEATED",
# "fields" => [
# {
# "name" => "place",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "number_of_years",
# "type" => "INTEGER",
# "mode" => "REQUIRED"
# }
# ]
# }
# ]
# }
# table = dataset.create_table "people", schema: schema
# table = dataset.create_table "people" do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# Because of the repeated field in this schema, we cannot use the CSV format
# to load data into the table.
Expand Down Expand Up @@ -306,26 +286,11 @@ def self.bigquery project = nil, keyfile = nil, options = {}
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# schema = {
# "fields" => [
# {
# "name" => "name",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "sex",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "number",
# "type" => "INTEGER",
# "mode" => "REQUIRED"
# }
# ]
# }
# table = dataset.create_table "baby_names", schema: schema
# table = dataset.create_table "baby_names" do |schema|
# schema.string "name", mode: :required
# schema.string "sex", mode: :required
# schema.integer "number", mode: :required
# end
#
# file = File.open "names/yob2014.txt"
# load_job = table.load file, format: "csv"
Expand Down
39 changes: 33 additions & 6 deletions lib/gcloud/bigquery/dataset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,11 @@ def delete options = {}
# <code>options[:description]</code>::
# A user-friendly description of the table. (+String+)
# <code>options[:schema]</code>::
# A schema specifying fields and data types for the table. See the
# A hash specifying fields and data types for the table. A block may be
# passed instead (see examples.) For the format of this hash, see the
# {Tables resource
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource]
# for more information. (+Hash+)
# . (+Hash+)
#
# === Returns
#
Expand All @@ -348,7 +349,35 @@ def delete options = {}
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
#
# A name and description can be provided:
# You can also pass name and description options.
#
# require "gcloud"
#
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
# name: "My Table",
# description: "A description of my table."
#
# You can define the table's schema using a block.
#
# require "gcloud"
#
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table" do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# Or, if you are adapting existing code that was written for the {Rest API
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource],
# you can pass the table's schema as a hash.
#
# require "gcloud"
#
Expand Down Expand Up @@ -382,9 +411,7 @@ def delete options = {}
# }
# ]
# }
# table = dataset.create_table "my_table",
# name: "My Table",
# schema: schema
# table = dataset.create_table "my_table", schema: schema
#
# :category: Table
#
Expand Down
79 changes: 49 additions & 30 deletions lib/gcloud/bigquery/table.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,35 +37,14 @@ module Bigquery
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
#
# schema = {
# "fields" => [
# {
# "name" => "first_name",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "cities_lived",
# "type" => "RECORD",
# "mode" => "REPEATED",
# "fields" => [
# {
# "name" => "place",
# "type" => "STRING",
# "mode" => "REQUIRED"
# },
# {
# "name" => "number_of_years",
# "type" => "INTEGER",
# "mode" => "REQUIRED"
# }
# ]
# }
# ]
# }
# table.schema = schema
# table = dataset.create_table "my_table" do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# row = {
# "first_name" => "Alice",
Expand Down Expand Up @@ -312,7 +291,38 @@ def location
end

##
# The schema of the table.
# Returns the table's schema as hash containing the keys and values
# returned by the Google Cloud BigQuery {Rest API
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource].
# This method can also be used to replace or update the schema by passing
# a block. See Table::Schema for available methods. To replace the current
# schema by passing a hash instead, use #schema=.
#
# === Parameters
#
# +options+::
# An optional Hash for controlling additional behavior. (+Hash+)
# <code>options[:replace]</code>::
# Whether to replace the existing schema with the new schema. If
# +false+, new fields will be added to the existing schema. The default
# value is +true+. (+Boolean+)
#
# === Examples
#
# require "gcloud"
#
# gcloud = Gcloud.new
# bigquery = gcloud.bigquery
# dataset = bigquery.dataset "my_dataset"
# table = dataset.create_table "my_table"
#
# table.schema do |schema|
# schema.string "first_name", mode: :required
# schema.record "cities_lived", mode: :repeated do |nested_schema|
# nested_schema.string "place", mode: :required
# nested_schema.integer "number_of_years", mode: :required
# end
# end
#
# :category: Attributes
#
Expand All @@ -322,14 +332,23 @@ def schema options = {}
g = g.to_hash if g.respond_to? :to_hash
s = g["schema"] ||= {}
return s unless block_given?
old_schema = options[:replace] ? nil : s
old_schema = options[:replace] == false ? s : nil
schema_builder = Schema.new old_schema
yield schema_builder
self.schema = schema_builder.schema if schema_builder.changed?
end

##
# Updates the schema of the table.
# To update the schema using a block instead, use #schema.
#
# === Parameters
#
# +schema+::
# A hash containing keys and values as specified by the Google Cloud
# BigQuery {Rest API
# }[https://cloud.google.com/bigquery/docs/reference/v2/tables#resource]
# . (+Hash+)
#
# === Example
#
Expand Down
Loading

0 comments on commit 1a25866

Please sign in to comment.