Skip to content
This repository has been archived by the owner on Aug 10, 2023. It is now read-only.

Commit

Permalink
cicd-datalake-part-1 (#1564)
Browse files Browse the repository at this point in the history
* data processing pipelines ci/cd blog md file

* new  updates

* new  updates

* updated index.md file after primary review

* moved images to public bucket and updated links

* fixed brand names in description strings

* fixed brand names in description strings

* Branding updates

* fixed typo

* first line edit pass

second proofreading pass coming in a moment

* second editing pass complete

* Update index.md

* fixing character in metadata that causes test to fail

* Update index.md

* Update index.md

Co-authored-by: Prasad Alle <prasadalle@google.com>
Co-authored-by: Todd Kopriva <43478937+ToddKopriva@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 8, 2021
1 parent fbb0336 commit 896828a
Show file tree
Hide file tree
Showing 33 changed files with 1,009 additions and 0 deletions.
102 changes: 102 additions & 0 deletions tutorials/cicd-datalake-part-1/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


steps:
- id: 'branch name'
name: 'alpine'
entrypoint: 'sh'
args:
- '-c'
- |
echo "***********************"
echo "$BRANCH_NAME"
echo "***********************"
- id: 'tf init'
name: 'hashicorp/terraform:0.12.24'
entrypoint: 'sh'
args:
- '-c'
- |
if [ -d "environments/$BRANCH_NAME/" ]; then
cd environments/$BRANCH_NAME
terraform init
else
for dir in environments/*/
do
cd ${dir}
env=${dir%*/}
env=${env#*/}
echo ""
echo "*************** TERRAFORM INIT ******************"
echo "******* At environment: ${env} ********"
echo "*************************************************"
terraform init || exit 1
cd ../../
done
fi
# [START tf-plan]
- id: 'tf plan'
name: 'hashicorp/terraform:0.12.24'
entrypoint: 'sh'
args:
- '-c'
- |
if [ -d "environments/$BRANCH_NAME/" ]; then
cd environments/$BRANCH_NAME
terraform plan
else
for dir in environments/*/
do
cd ${dir}
env=${dir%*/}
env=${env#*/}
echo ""
echo "*************** TERRAFOM PLAN ******************"
echo "******* At environment: ${env} ********"
echo "*************************************************"
terraform plan || exit 1
cd ../../
done
fi
env:
- 'TF_VAR_project_id=$_PROJECT_ID'
- 'TF_VAR_service_account_email=$_SERVICE_ACCOUNT_EMAIL'
- 'TF_VAR_source_gcs_bucket=$_SOURCE_GCS_BUCKET'
- 'TF_VAR_region=$_REGION'
# [END tf-plan]

# [START tf-apply]
- id: 'tf apply'
name: 'hashicorp/terraform:0.12.24'
entrypoint: 'sh'
args:
- '-c'
- |
if [ -d "environments/$BRANCH_NAME/" ]; then
cd environments/$BRANCH_NAME
terraform apply -auto-approve
else
echo "***************************** SKIPPING APPLYING *******************************"
echo "Branch '$BRANCH_NAME' does not represent an official environment."
echo "*******************************************************************************"
fi
env:
- 'TF_VAR_project_id=$_PROJECT_ID'
- 'TF_VAR_service_account_email=$_SERVICE_ACCOUNT_EMAIL'
- 'TF_VAR_source_gcs_bucket=$_SOURCE_GCS_BUCKET'
- 'TF_VAR_region=$_REGION'
# [END tf-apply]
78 changes: 78 additions & 0 deletions tutorials/cicd-datalake-part-1/environments/dev/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# cicd-datalake-part-1

terraform {
required_version = ">= 0.12.0"
}


locals {
env = "dev"
}

provider "google" {
project = "${var.project_id}"
region = "${var.region}"
zone = "${var.region}-a"
version = "~> 2.18.0"
}

resource "random_id" "random_suffix" {
byte_length = 4
}

locals {
gcs_bucket_name = "tmp-dir-bucket-${random_id.random_suffix.hex}-${local.env}"
}

// [START gcs-buckets-block]
resource "google_storage_bucket" "tmp_dir_bucket" {
name = "${local.gcs_bucket_name}"
storage_class = "REGIONAL"
location = "${var.region}"
project = "${var.project_id}"
force_destroy = "true"
}

resource "google_bigquery_dataset" "default" {
project = "${var.project_id}"
dataset_id = "${local.env}_datalake_demo"
friendly_name = "${local.env}_datalake_demo"
description = "This is the BQ dataset for running the datalake demo"
location = "US"
default_table_expiration_ms = 3600000
}

resource "google_dataflow_job" "dataflow_job" {
project = "${var.project_id}"
region = "${var.region}"
zone = "${var.region}-a"
name = "${local.env}_datalake_cicd_batch"
on_delete = "cancel"
max_workers = 8
template_gcs_path = "gs://dataflow-templates/latest/GCS_Text_to_BigQuery"
temp_gcs_location = "gs://${local.gcs_bucket_name}/tmp_dir"
service_account_email = "${var.service_account_email}"
parameters = {
javascriptTextTransformFunctionName ="transform"
JSONPath = "gs://${var.source_gcs_bucket}/bq_schema.json"
javascriptTextTransformGcsPath = "gs://${var.source_gcs_bucket}/etl.js"
inputFilePattern = "gs://${var.source_gcs_bucket}/cc_records.csv"
outputTable = "${var.project_id}:${google_bigquery_dataset.default.dataset_id}.sample_userdata"
bigQueryLoadingTemporaryDirectory = "gs://${local.gcs_bucket_name}/tmp_dir1"
}
depends_on = [google_storage_bucket.tmp_dir_bucket]
}
18 changes: 18 additions & 0 deletions tutorials/cicd-datalake-part-1/environments/dev/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

output "project_id" {
value = var.project_id
description = "The project's ID"
}
33 changes: 33 additions & 0 deletions tutorials/cicd-datalake-part-1/environments/dev/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

variable "project_id" {
type = string
description = "The project ID to deploy to"
}

variable "region" {
type = string
description = "The region in which the bucket and the Dataflow job will be deployed"
}

variable "service_account_email" {
type = string
description = "The Service Account email used to create the job."
}

variable "source_gcs_bucket" {
type = string
description = "The source Cloud Storage bucket."
}
18 changes: 18 additions & 0 deletions tutorials/cicd-datalake-part-1/environments/dev/versions.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


terraform {
required_version = "~> 0.12.0"
}
77 changes: 77 additions & 0 deletions tutorials/cicd-datalake-part-1/environments/prod/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# cicd-datalake-part-1

terraform {
required_version = ">= 0.12.0"
}

locals {
env = "prod"
}

provider "google" {
project = "${var.project_id}"
region = "${var.region}"
zone = "${var.region}-a"
version = "~> 2.18.0"
}

resource "random_id" "random_suffix" {
byte_length = 4
}

locals {
gcs_bucket_name = "tmp-dir-bucket-${random_id.random_suffix.hex}-${local.env}"
}

// [START gcs-buckets-block]
resource "google_storage_bucket" "tmp_dir_bucket" {
name = "${local.gcs_bucket_name}"
storage_class = "REGIONAL"
location = "${var.region}"
project = "${var.project_id}"
force_destroy = "true"
}

resource "google_bigquery_dataset" "default" {
project = "${var.project_id}"
dataset_id = "${local.env}_datalake_demo"
friendly_name = "${local.env}_datalake_demo"
description = "This is the BQ dataset for running the datalake demo"
location = "US"
default_table_expiration_ms = 3600000
}

resource "google_dataflow_job" "dataflow_job" {
project = "${var.project_id}"
region = "${var.region}"
zone = "${var.region}-a"
name = "${local.env}_datalake_cicd_batch"
on_delete = "cancel"
max_workers = 8
template_gcs_path = "gs://dataflow-templates/latest/GCS_Text_to_BigQuery"
temp_gcs_location = "gs://${local.gcs_bucket_name}/tmp_dir"
service_account_email = "${var.service_account_email}"
parameters = {
javascriptTextTransformFunctionName ="transform"
JSONPath = "gs://${var.source_gcs_bucket}/bq_schema.json"
javascriptTextTransformGcsPath = "gs://${var.source_gcs_bucket}/etl.js"
inputFilePattern = "gs://${var.source_gcs_bucket}/cc_records.csv"
outputTable = "${var.project_id}:${google_bigquery_dataset.default.dataset_id}.sample_userdata"
bigQueryLoadingTemporaryDirectory = "gs://${local.gcs_bucket_name}/tmp_dir1"
}
depends_on = [google_storage_bucket.tmp_dir_bucket]
}
18 changes: 18 additions & 0 deletions tutorials/cicd-datalake-part-1/environments/prod/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

output "project_id" {
value = var.project_id
description = "The project's ID"
}
34 changes: 34 additions & 0 deletions tutorials/cicd-datalake-part-1/environments/prod/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

variable "project_id" {
type = string
description = "The project ID to deploy to"
}

variable "region" {
type = string
description = "The region in which the bucket and the Dataflow job will be deployed"
default = "us-west1"
}

variable "service_account_email" {
type = string
description = "The Service Account email used to create the job."
}

variable "source_gcs_bucket" {
type = string
description = "The source Cloud Storage bucket."
}
Loading

0 comments on commit 896828a

Please sign in to comment.