Skip to content

Commit

Permalink
BigQuery samples.
Browse files Browse the repository at this point in the history
  • Loading branch information
jmdobry committed Apr 1, 2016
1 parent aaadacf commit 67fd509
Show file tree
Hide file tree
Showing 14 changed files with 542 additions and 3 deletions.
33 changes: 33 additions & 0 deletions bigquery/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
## BigQuery Samples

These samples require two environment variables to be set:

- `GOOGLE_APPLICATION_CREDENTIALS` - Path to a service account file. You can
download one from your Google project's "permissions" page.
- `GCLOUD_PROJECT` - Id of your Google project.

## Run the samples

Install dependencies:

npm install

### getting_started.js

npm run getting_started

### dataset_size.js

Usage: `npm run dataset_size -- <projectId> <datasetId>

Example:

npm run dataset_size -- bigquery-public-data hacker_news

### load_data_from_csv.js

Usage: `npm run load_data_from_csv -- <pathToCsvFile> <datasetId> <tableName>

Example:

npm run load_data_from_csv -- data.csv my-dataset my-table
159 changes: 159 additions & 0 deletions bigquery/dataset_size.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// Copyright 2016, Google, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

'use strict';

var async = require('async');

// [START auth]
// You must set the GOOGLE_APPLICATION_CREDENTIALS and GCLOUD_PROJECT
// environment variables to run this sample
var projectId = process.env.GCLOUD_PROJECT;

// Initialize gcloud
var gcloud = require('gcloud')({
projectId: projectId
});

// Get a reference to the bigquery component
var bigquery = gcloud.bigquery();
// [END auth]

// not going to use this bigquery instance
bigquery = undefined;

// [START list_tables]
/**
* Retrieve all tables for the specified dataset.
*
* @param {Object} bigquery gcloud-node bigquery client.
* @param {string} datasetId Dataset of the tables to retrieve.
* @param {string} [pageToken] Page to retrieve.
* @param {Function} callback Callback function.
*/
function getAllTablesExample(bigquery, datasetId, pageToken, callback) {
if (typeof pageToken === 'function') {
callback = pageToken;
pageToken = undefined;
}
var dataset = bigquery.dataset(datasetId);
var options = {};
if (pageToken) {
options.pageToken = pageToken;
}

// Grab paginated tables
dataset.getTables(options, function (err, tables, nextQuery) {
// Quit on error
if (err) {
return callback(err);
}

// There is another page of tables
if (nextQuery) {
// Grab the remaining pages of tables recursively
return getAllTablesExample(
datasetId,
nextQuery.token,
function (err, _tables) {
if (err) {
return callback(err);
}
callback(null, tables.concat(_tables));
}
);
}
// Last page of tables
return callback(null, tables);
});
}
// [END list_tables]

// [START get_size]
/**
* Retrieve the size of the specified dataset.
*
* @param {string} projectId The project, .e.g. "bigquery-public-data"
* @param {string} datasetId The dataset, e.g. "hacker_news"
* @param {Function} callback Callback function.
*/
function getSizeExample(projectId, datasetId, callback) {
if (!projectId) {
return callback(new Error('projectId is required!'));
}
if (!datasetId) {
return callback(new Error('datasetId is require!'));
}

var gcloud = require('gcloud')({
projectId: projectId || process.env.GCLOUD_PROJECT
});
var bigquery = gcloud.bigquery();

// Fetch all tables in the dataset
getAllTablesExample(bigquery, datasetId, function (err, tables) {
return async.parallel(tables.map(function (table) {
return function (cb) {
// Fetch more detailed info for each table
table.get(function (err, tableInfo) {
if (err) {
return cb(err);
}
// Return numBytes converted to Megabytes
var numBytes = tableInfo.metadata.numBytes;
return cb(null, (parseInt(numBytes, 10) / 1000) / 1000);
});
};
}), function (err, sizes) {
if (err) {
return callback(err);
}
var sum = sizes.reduce(function (cur, prev) {
return cur + prev;
}, 0);
return callback(null, sum);
});
});
}
// [END get_size]

// Run the examples
exports.main = function (projectId, datasetId, cb) {
getSizeExample(projectId, datasetId, function (err, sum) {
if (err) {
return cb(err);
}
var size = 'MB';
if (sum > 1000) {
sum = sum / 1000;
size = 'GB';
}
if (sum > 1000) {
sum = sum / 1000;
size = 'TB';
}
cb(null, '' + sum.toPrecision(5) + ' ' + size);
});
};

if (module === require.main) {
var args = process.argv.slice(2);
if (args.length !== 2) {
throw new Error('Usage: node dataset_size.js <projectId> <datasetId>');
}
exports.main(
args[0],
args[1],
console.log
);
}
79 changes: 79 additions & 0 deletions bigquery/getting_started.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Copyright 2016, Google, Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// [START complete]
'use strict';

// [START auth]
// You must set the GOOGLE_APPLICATION_CREDENTIALS and GCLOUD_PROJECT
// environment variables to run this sample
var projectId = process.env.GCLOUD_PROJECT;

// Initialize gcloud
var gcloud = require('gcloud')({
projectId: projectId
});

// Get a reference to the bigquery component
var bigquery = gcloud.bigquery();
// [END auth]

// [START print]
function printExample(rows) {
console.log('Query Results:');
rows.forEach(function (row) {
var str = '';
for (var key in row) {
if (str) {
str += '\t';
}
str += key + ': ' + row[key];
}
console.log(str);
});
}
// [END print]

// [START query]
/**
* Run an example query.
*
* @param {Function} callback Callback function.
*/
function queryExample(callback) {
var query = 'SELECT TOP(corpus, 10) as title, COUNT(*) as unique_words\n' +
'FROM [publicdata:samples.shakespeare];';

bigquery.query(query, function(err, rows) {
if (err) {
return callback(err);
}

printExample(rows);
callback(null, rows);
});
}
// [END query]

// [END complete]

// Run the examples
exports.main = function (cb) {
queryExample(cb);
};

if (module === require.main) {
exports.main(
console.log
);
}
Loading

0 comments on commit 67fd509

Please sign in to comment.