Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding samples for dataproc - create cluster #2536

Merged
merged 5 commits into from
Nov 15, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions dataproc/create_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env python

# Copyright 2019 Google, LLC.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

# Copyright 2019 Google LLC
Drop the comma and period

#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


def create_cluster(project_id, region, cluster_name):
# [START create_cluster]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does dataproc have the product_name in each region tag?

# [START dataproc_create_cluster]

Mostly so these show up in sample tracker

from google.cloud import dataproc_v1
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NIT / up to you
from google.cloud import dataproc_v1 as dataproc


# TODO(developer): Uncomment and set the following variables
# project_id = 'YOUR_PROJECT_ID'
# region = 'YOUR_CLUSTER_REGION'
# cluster_name = 'YOUR_CLUSTER_NAME'

# Create a client with the endpoint set to the desired cluster region
client = dataproc_v1.ClusterControllerClient(client_options={
'api_endpoint': '{}-dataproc.googleapis.com:443'.format(region)
})

# Create the cluster config
cluster = {
'project_id': project_id,
'cluster_name': cluster_name,
'config': {
'master_config': {
'num_instances': 1,
'machine_type_uri': 'n1-standard-1'
},
'worker_config': {
'num_instances': 2,
'machine_type_uri': 'n1-standard-1'
}
}
}

# Create the cluster
operation = client.create_cluster(project_id, region, cluster)
result = operation.result()

# Output a success message
print('Cluster created successfully: {}'.format(result.cluster_name))
# [END create_cluster]
44 changes: 44 additions & 0 deletions dataproc/create_cluster_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright 2019 Google, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import uuid
import pytest

from google.cloud import dataproc

import create_cluster

PROJECT_ID = os.environ['GCLOUD_PROJECT']
REGION = 'us-central1'
CLUSTER_NAME = 'test-cluster-{}'.format(str(uuid.uuid4()))


@pytest.fixture(autouse=True)
def teardown():
yield

client = dataproc.ClusterControllerClient(client_options={
'api_endpoint': '{}-dataproc.googleapis.com:443'.format(REGION)
})
# Client library function
client.delete_cluster(PROJECT_ID, REGION, CLUSTER_NAME)


def test_cluster_create(capsys):
# Wrapper function for client library function
create_cluster.create_cluster(PROJECT_ID, REGION, CLUSTER_NAME)

out, _ = capsys.readouterr()
assert CLUSTER_NAME in out
2 changes: 1 addition & 1 deletion dataproc/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ google-auth==1.6.3
google-auth-httplib2==0.0.3
google-cloud==0.34.0
google-cloud-storage==1.19.1
google-cloud-dataproc==0.5.0
google-cloud-dataproc==0.6.1