Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docs(samples): create and query Amazon s3 data using external table #835

Original file line number Diff line number Diff line change
Expand Up @@ -16,46 +16,62 @@

package com.example.bigquery;

// [START bigquery_omni_create_external_table]
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.CsvOptions;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;

// Sample to create an external aws table
public class CreateExternalTableAws {

public static void main(String[] args) {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
// Create a aws connection
// projects/{project_id}/locations/{location_id}/connections/{connection_id}
String connectionId = "MY_CONNECTION_NAME";
String connectionId = "MY_CONNECTION_ID";
String sourceUri = "s3://your-bucket-name/";
CsvOptions options = CsvOptions.newBuilder().setSkipLeadingRows(1).build();
Schema schema =
Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("post_abbr", StandardSQLTypeName.STRING));
ExternalTableDefinition externalTableDefinition =
ExternalTableDefinition.newBuilder(sourceUri, options)
.setConnectionId(connectionId)
.setSchema(schema)
.build();
createExternalTableAws(datasetName, tableName, externalTableDefinition);
createExternalTableAws(projectId, datasetName, tableName, externalTableDefinition);
}

public static void createExternalTableAws(
String datasetName, String tableName, ExternalTableDefinition externalTableDefinition) {
String projectId,
String datasetName,
String tableName,
ExternalTableDefinition externalTableDefinition) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

TableId tableId = TableId.of(datasetName, tableName);
TableId tableId = TableId.of(projectId, datasetName, tableName);
TableInfo tableInfo = TableInfo.newBuilder(tableId, externalTableDefinition).build();

bigquery.create(tableInfo);
System.out.println("Aws external table created successfully");

// Clean up
bigquery.delete(TableId.of(projectId, datasetName, tableName));
} catch (BigQueryException e) {
System.out.println("Aws external was not created." + e.toString());
}
}
}
// [END bigquery_omni_create_external_table]
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

// [START bigquery_omni_query_external_aws_s3]
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.CsvOptions;
import com.google.cloud.bigquery.DatasetId;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
import com.google.cloud.bigquery.TableResult;

// Sample to queries an external data source aws s3 using a permanent table
public class QueryExternalTableAws {

public static void main(String[] args) {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String externalTableName = "MY_EXTERNAL_TABLE_NAME";
String query =
String.format(
"SELECT * FROM s%.%s.%s WHERE name LIKE 'W%%'",
projectId, datasetName, externalTableName);
stephaniewang526 marked this conversation as resolved.
Show resolved Hide resolved
queryExternalTableAws(query);
}

public static void queryExternalTableAws(String query) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

// Example query to find states starting with 'W'
TableResult results = bigquery.query(QueryJobConfiguration.of(query));

results
.iterateAll()
.forEach(row -> row.forEach(val -> System.out.printf("%s,", val.toString())));

System.out.println("Query on aws external permanent table performed successfully.");
} catch (BigQueryException | InterruptedException e) {
System.out.println("Query not performed \n" + e.toString());
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should these be caught? Perhaps just mention them or let them propagate? NOTE - BigQueryException is a RuntimeException and InterruptedException is not.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More important is why might these have been thrown and what should a user really do with them?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we will catch BigQueryException and not InterruptedException.
Updated!

}
}
// [END bigquery_omni_query_external_aws_s3]
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class CreateDatasetAwsIT {
private PrintStream out;
private PrintStream originalPrintStream;

private static final String PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");

private static String requireEnvVar(String varName) {
String value = System.getenv(varName);
Expand All @@ -51,7 +51,7 @@ private static String requireEnvVar(String varName) {

@BeforeClass
public static void checkRequirements() {
requireEnvVar("GOOGLE_CLOUD_PROJECT");
requireEnvVar("OMNI_PROJECT_ID");
}

@Before
Expand All @@ -66,7 +66,7 @@ public void setUp() {
@After
public void tearDown() {
// Clean up
DeleteDataset.deleteDataset(PROJECT_ID, datasetName);
DeleteDataset.deleteDataset(OMNI_PROJECT_ID, datasetName);
// restores print statements in the original method
System.out.flush();
System.setOut(originalPrintStream);
Expand All @@ -75,7 +75,7 @@ public void tearDown() {

@Test
public void testCreateDatasetAws() {
CreateDatasetAws.createDatasetAws(PROJECT_ID, datasetName, LOCATION);
CreateDatasetAws.createDatasetAws(OMNI_PROJECT_ID, datasetName, LOCATION);
assertThat(bout.toString()).contains("Aws dataset created successfully :");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,14 @@ public class CreateExternalTableAwsIT {
private static final String ID = UUID.randomUUID().toString().substring(0, 8);
private static final String LOCATION = "aws-us-east-1";
private final Logger log = Logger.getLogger(this.getClass().getName());
private String datasetName;
private String tableName;
private String connectionName;
private ByteArrayOutputStream bout;
private PrintStream out;
private PrintStream originalPrintStream;

private static final String PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
private static final String AWS_ACCOUNT_ID = requireEnvVar("AWS_ACCOUNT_ID");
private static final String AWS_ROLE_ID = requireEnvVar("AWS_ROLE_ID");
private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
private static final String OMNI_DATASET_NAME = requireEnvVar("OMNI_DATASET_NAME");
private static final String AWS_READ_CONNECTION_ID = requireEnvVar("AWS_READ_CONNECTION_ID");

private static String requireEnvVar(String varName) {
String value = System.getenv(varName);
Expand All @@ -69,49 +67,21 @@ private static String requireEnvVar(String varName) {
@BeforeClass
public static void checkRequirements() {
requireEnvVar("OMNI_PROJECT_ID");
requireEnvVar("AWS_ACCOUNT_ID");
requireEnvVar("AWS_ROLE_ID");
requireEnvVar("OMNI_DATASET_NAME");
requireEnvVar("AWS_READ_CONNECTION_ID");
}

@Before
public void setUp() throws IOException {
datasetName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID;
public void setUp() {
tableName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID;
connectionName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID;
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
originalPrintStream = System.out;
System.setOut(out);
// create a temporary aws connection
try (ConnectionServiceClient client = ConnectionServiceClient.create()) {
LocationName parent = LocationName.of(PROJECT_ID, LOCATION);
String iamRoleId = String.format("arn:aws:iam::%s:role/%s", AWS_ACCOUNT_ID, AWS_ROLE_ID);
AwsCrossAccountRole role = AwsCrossAccountRole.newBuilder().setIamRoleId(iamRoleId).build();
AwsProperties awsProperties = AwsProperties.newBuilder().setCrossAccountRole(role).build();
Connection connection = Connection.newBuilder().setAws(awsProperties).build();
CreateConnectionRequest request =
CreateConnectionRequest.newBuilder()
.setParent(parent.toString())
.setConnection(connection)
.setConnectionId(connectionName)
.build();
connectionName = client.createConnection(request).getName();
}
// create a temporary dataset
CreateDatasetAws.createDatasetAws(PROJECT_ID, datasetName, LOCATION);
}

@After
public void tearDown() throws IOException {
// delete a temporary aws connection
try (ConnectionServiceClient client = ConnectionServiceClient.create()) {
DeleteConnectionRequest request =
DeleteConnectionRequest.newBuilder().setName(connectionName).build();
client.deleteConnection(request);
}
// Clean up
DeleteTable.deleteTable(datasetName, tableName);
DeleteDataset.deleteDataset(PROJECT_ID, datasetName);
public void tearDown() {
// restores print statements in the original method
System.out.flush();
System.setOut(originalPrintStream);
Expand All @@ -120,18 +90,19 @@ public void tearDown() throws IOException {

@Test
public void testCreateExternalTableAws() {
String sourceUri = "s3://cloud-samples-tests/us-states.csv";
String sourceUri = "s3://steph-omni-test-bucket/us-states.csv";
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might not be your best final choice for a bucket name.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes - you're right. Created a new bucket for devrel sampels testing. Thank you!

Schema schema =
Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("post_abbr", StandardSQLTypeName.STRING));
CsvOptions options = CsvOptions.newBuilder().setSkipLeadingRows(1).build();
ExternalTableDefinition externalTable =
ExternalTableDefinition externalTableDefinition =
ExternalTableDefinition.newBuilder(sourceUri, options)
.setConnectionId(connectionName)
.setConnectionId(AWS_READ_CONNECTION_ID)
.setSchema(schema)
.build();
CreateExternalTableAws.createExternalTableAws(datasetName, tableName, externalTable);
CreateExternalTableAws.createExternalTableAws(
OMNI_PROJECT_ID, OMNI_DATASET_NAME, tableName, externalTableDefinition);
assertThat(bout.toString()).contains("Aws external table created successfully");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

import static com.google.common.truth.Truth.assertThat;
import static junit.framework.TestCase.assertNotNull;

import com.google.cloud.bigquery.CsvOptions;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.connection.v1.AwsCrossAccountRole;
import com.google.cloud.bigquery.connection.v1.AwsProperties;
import com.google.cloud.bigquery.connection.v1.Connection;
import com.google.cloud.bigquery.connection.v1.CreateConnectionRequest;
import com.google.cloud.bigquery.connection.v1.DeleteConnectionRequest;
import com.google.cloud.bigquery.connection.v1.LocationName;
import com.google.cloud.bigqueryconnection.v1.ConnectionServiceClient;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

public class QueryExternalTableAwsIT {

private final Logger log = Logger.getLogger(this.getClass().getName());
private ByteArrayOutputStream bout;
private PrintStream out;
private PrintStream originalPrintStream;

private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
private static final String OMNI_DATASET_NAME = requireEnvVar("OMNI_DATASET_NAME");
private static final String OMNI_EXTERNAL_TABLE_NAME = requireEnvVar("OMNI_EXTERNAL_TABLE_NAME");

private static String requireEnvVar(String varName) {
String value = System.getenv(varName);
assertNotNull(
"Environment variable " + varName + " is required to perform these tests.",
System.getenv(varName));
return value;
}

@BeforeClass
public static void checkRequirements() {
requireEnvVar("OMNI_PROJECT_ID");
requireEnvVar("OMNI_DATASET_NAME");
requireEnvVar("OMNI_EXTERNAL_TABLE_NAME");
}

@Before
public void setUp() {
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
originalPrintStream = System.out;
System.setOut(out);
}

@After
public void tearDown() {
// restores print statements in the original method
System.out.flush();
System.setOut(originalPrintStream);
log.log(Level.INFO, bout.toString());
}

@Test
public void testQueryExternalTableAws() {
String query =
String.format(
"SELECT * FROM %s.%s.%s WHERE name LIKE 'W%%'",
OMNI_PROJECT_ID, OMNI_DATASET_NAME, OMNI_EXTERNAL_TABLE_NAME);
QueryExternalTableAws.queryExternalTableAws(query);
assertThat(bout.toString())
.contains("Query on aws external permanent table performed successfully.");
}
}