From 4a3deea3cb0dcdb6e2ad2a25eccaa3340b205869 Mon Sep 17 00:00:00 2001 From: Arnaud Meukam Date: Thu, 16 Sep 2021 23:10:13 +0200 Subject: [PATCH] kubernetes-public: BigQuery Data transfer to k8s-infra-kettle Part of : https://github.com/kubernetes/k8s.io/issues/1308/ Ref : https://github.com/kubernetes/k8s.io/issues/787 Add a service account with workload identity to ensure k8s service account kettle can push data to BQ dataset k8s-infra-kettle. Add a BQ data transfer job to copy data from k8s-gubernator:build to dataset k8s-infra-kettle. The job is not periodically triggered. Add script to auto-deploy kettle on GKE cluster aaa. Signed-off-by: Arnaud Meukam --- apps/kettle/deploy.sh | 50 ++++++++++++++++ apps/kettle/kettle-serviceaccount.yaml | 9 +++ .../terraform/kubernetes-public/k8s-kettle.tf | 58 ++++++++++++++++++- 3 files changed, 114 insertions(+), 3 deletions(-) create mode 100644 apps/kettle/deploy.sh create mode 100644 apps/kettle/kettle-serviceaccount.yaml diff --git a/apps/kettle/deploy.sh b/apps/kettle/deploy.sh new file mode 100644 index 000000000000..7d29884da03d --- /dev/null +++ b/apps/kettle/deploy.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +# Copyright 2021 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Deploys this app to the aaa cluster, or whatever cluster is pointed to +# by KUBECTL_CONTEXT if set. Assumes the app's namespace already exists. +# +# Members of k8s-infra-rbac-${app}@kubernetes.io can run this. + +set -o errexit +set -o nounset +set -o pipefail + +SCRIPT_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P) + +app=$(basename "${SCRIPT_ROOT}") + +# coordinates to locate the target cluster in gke +cluster_name="aaa" +cluster_project="kubernetes-public" +cluster_region="us-central1" + +# coordinates to locate the app on the target cluster +namespace="${app}" + +# well known name set by `gcloud container clusters get-credentials` +gke_context="gke_${cluster_project}_${cluster_region}_${cluster_name}" +context="${KUBECTL_CONTEXT:-${gke_context}}" + +# ensure we have a context to talk to the target cluster +if ! kubectl config get-contexts "${context}" >/dev/null 2>&1; then + gcloud container clusters get-credentials "${cluster_name}" --project="${cluster_project}" --region="${cluster_region}" + context="${gke_context}" +fi + +# deploy kubernetes resources +pushd "${SCRIPT_ROOT}" >/dev/null +kubectl --context="${context}" --namespace="${namespace}" apply -f . diff --git a/apps/kettle/kettle-serviceaccount.yaml b/apps/kettle/kettle-serviceaccount.yaml new file mode 100644 index 000000000000..14e2f7cf9e1c --- /dev/null +++ b/apps/kettle/kettle-serviceaccount.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kettle + namespace: kettle + labels: + app : kettle + annotations: + iam.gke.io/gcp-service-account: kettle@kubernetes-public.iam.gserviceaccount.com diff --git a/infra/gcp/terraform/kubernetes-public/k8s-kettle.tf b/infra/gcp/terraform/kubernetes-public/k8s-kettle.tf index a4fc4ace29f5..4ab30e1d660a 100644 --- a/infra/gcp/terraform/kubernetes-public/k8s-kettle.tf +++ b/infra/gcp/terraform/kubernetes-public/k8s-kettle.tf @@ -13,13 +13,22 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ - + /* This file defines: - A BigQuery dataset for kettle tests results - IAM bindings on that dataset */ +//Service used by Kubernetes Service Account kettle in namespace kettle +module "aaa_kettle_sa" { + source = "../modules/workload-identity-service-account" + project_id = "kubernetes-public" + name = "kettle" + description = "default service account for pods in ${local.cluster_name}" + cluster_namespace = "kettle" +} + // BigQuery dataset for Kettle resource "google_bigquery_dataset" "prod_kettle_dataset" { dataset_id = "k8s_infra_kettle" @@ -32,18 +41,61 @@ resource "google_bigquery_dataset" "prod_kettle_dataset" { } data "google_iam_policy" "prod_kettle_dataset_iam_policy" { + // Ensure prow on-call team have admin privileges binding { members = [ - "group:k8s-infra-prow-oncall@kubernetes.io", + "group:${local.prow_owners}", ] role = "roles/bigquery.dataOwner" } + // Ensure service accounts can create/update/get/delete dataset's table + binding { + members = [ + "serviceAccount:${module.aaa_kettle_sa.email}", + "serviceAccount:${google_service_account.bq_kettle_data_transfer_writer.email}" + ] + role = "roles/bigquery.dataEditor" + } } resource "google_bigquery_dataset_iam_policy" "prod_kettle_dataset" { dataset_id = google_bigquery_dataset.prod_kettle_dataset.dataset_id - project = google_bigquery_dataset.prod_kettle_dataset.project + project = google_bigquery_dataset.prod_kettle_dataset.project policy_data = data.google_iam_policy.prod_kettle_dataset_iam_policy.policy_data } +// Service account dedicated for BigQuery Data Transfer from BQ dataset k8s-gubernator:builds +// TODO: remove when kettle migration is over +resource "google_service_account" "bq_kettle_data_transfer_writer" { + account_id = "bq-data-transfer-kettle" + description = "Service Acccount BigQuery Data Transfer" + project = data.google_project.project.project_id +} + +// grant bigquery jobUser role to the service account +// so the job transfer can launch BigQuery jobs +resource "google_project_iam_member" "bq_kettle_data_transfer_jobuser_binding" { + project = data.google_project.project.project_id + role = "roles/bigquery.jobUser" + member = "serviceAccount:${google_service_account.bq_kettle_data_transfer_writer.email}" +} + +resource "google_bigquery_data_transfer_config" "bq_data_transfer_kettle" { + display_name = "BigQuery data transfer to ${google_bigquery_dataset.prod_kettle_dataset.dataset_id}" + project = data.google_project.project.project_id + data_source_id = "cross_region_copy" + destination_dataset_id = google_bigquery_dataset.prod_kettle_dataset.dataset_id + service_account_name = google_service_account.bq_kettle_data_transfer_writer.email + disabled = true + + params = { + overwrite_destination_table = "true" + source_dataset_id = "build" + source_project_id = "k8s-gubernator" + } + + email_preferences { + enable_failure_email = false + } +}