Skip to content

Commit

Permalink
Merge pull request #806 from spiffxp/prow-build-test
Browse files Browse the repository at this point in the history
Add infra scripts/terraform for prow build cluster prototype
  • Loading branch information
k8s-ci-robot committed Apr 28, 2020
2 parents e648557 + d58a30f commit 84e0ed5
Show file tree
Hide file tree
Showing 8 changed files with 464 additions and 18 deletions.
26 changes: 26 additions & 0 deletions infra/gcp/clusters/kubernetes-public/prow-build-test/00-inputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
This file defines:
- Required Terraform version
- Required provider versions
- Storage backend details
- GCP project configuration
*/

terraform {
required_version = ">= 0.12.8"

backend "gcs" {
bucket = "k8s-infra-clusters-terraform"
prefix = "kubernetes-public/prow-build-test" // $project_name/$cluster_name
}

required_providers {
google = "~> 2.14"
google-beta = "~> 2.14"
}
}

// This configures the source project where we should install the cluster
data "google_project" "project" {
project_id = "kubernetes-public"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
This file defines:
- GCP Service Account for nodes
- Bigquery dataset for usage metering
- GKE cluster configuration
Note that it does not configure any node pools; this is done in a separate file.
*/

locals {
cluster_name = "prow-build-test" // This is the name of the cluster defined in this file
cluster_location = "us-central1" // This is the GCP location (region or zone) where the cluster should be created
bigquery_location = "US" // This is the bigquery specific location where the dataset should be created
}

// Create SA for nodes
resource "google_service_account" "cluster_node_sa" {
project = data.google_project.project.id
account_id = "gke-nodes-${local.cluster_name}"
display_name = "Nodes in GKE cluster '${local.cluster_name}'"
}

// Add roles for SA
resource "google_project_iam_member" "cluster_node_sa_logging" {
project = data.google_project.project.id
role = "roles/logging.logWriter"
member = "serviceAccount:${google_service_account.cluster_node_sa.email}"
}
resource "google_project_iam_member" "cluster_node_sa_monitoring_viewer" {
project = data.google_project.project.id
role = "roles/monitoring.viewer"
member = "serviceAccount:${google_service_account.cluster_node_sa.email}"
}
resource "google_project_iam_member" "cluster_node_sa_monitoring_metricwriter" {
project = data.google_project.project.id
role = "roles/monitoring.metricWriter"
member = "serviceAccount:${google_service_account.cluster_node_sa.email}"
}

// BigQuery dataset for usage data
resource "google_bigquery_dataset" "usage_metering" {
dataset_id = replace("usage_metering_${local.cluster_name}", "-", "_")
project = data.google_project.project.id
description = "GKE Usage Metering for cluster '${local.cluster_name}'"
location = local.bigquery_location

access {
role = "OWNER"
special_group = "projectOwners"
}
access {
role = "WRITER"
user_by_email = google_service_account.cluster_node_sa.email
}

// This restricts deletion of this dataset if there is data in it
// IMPORTANT: Should be true on test clusters
delete_contents_on_destroy = true
}

// Create GKE cluster, but with no node pools. Node pools can be provisioned below
resource "google_container_cluster" "cluster" {
name = local.cluster_name
location = local.cluster_location

provider = google-beta
project = data.google_project.project.id

// GKE clusters are critical objects and should not be destroyed
// IMPORTANT: should be false on test clusters
lifecycle {
prevent_destroy = false
}

// Network config
network = "default"

// Start with a single node, because we're going to delete the default pool
initial_node_count = 1

// Removes the default node pool, so we can custom create them as separate
// objects
remove_default_node_pool = true

// Disable local and certificate auth
master_auth {
username = ""
password = ""

client_certificate_config {
issue_client_certificate = false
}
}

// Enable google-groups for RBAC
authenticator_groups_config {
security_group = "gke-security-groups@kubernetes.io"
}

// Enable workload identity for GCP IAM
workload_identity_config {
identity_namespace = "${data.google_project.project.id}.svc.id.goog"
}

// Enable Stackdriver Kubernetes Monitoring
logging_service = "logging.googleapis.com/kubernetes"
monitoring_service = "monitoring.googleapis.com/kubernetes"

// Set maintenance time
maintenance_policy {
daily_maintenance_window {
start_time = "11:00" // (in UTC), 03:00 PST
}
}

// Restrict master to Google IP space; use Cloud Shell to access
master_authorized_networks_config {
}

// Enable GKE Usage Metering
resource_usage_export_config {
enable_network_egress_metering = true
bigquery_destination {
dataset_id = google_bigquery_dataset.usage_metering.dataset_id
}
}

// Enable GKE Network Policy
network_policy {
enabled = true
provider = "CALICO"
}

// Configure cluster addons
addons_config {
horizontal_pod_autoscaling {
disabled = false
}
http_load_balancing {
disabled = false
}
network_policy_config {
disabled = false
}
}

// Enable PodSecurityPolicy enforcement
pod_security_policy_config {
enabled = false // TODO: we should turn this on
}

// Enable VPA
vertical_pod_autoscaling {
enabled = true
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
This file defines:
- Node pool for pool1
Note: If you wish to create additional node pools, please duplicate this file
and change the resource name, name_prefix, and any other cluster specific settings.
*/

resource "google_container_node_pool" "pool1" {
name_prefix = "pool1-"
location = google_container_cluster.cluster.location
cluster = google_container_cluster.cluster.name

provider = google-beta
project = data.google_project.project.id

// Start with a single node
initial_node_count = 1

// Auto repair, and auto upgrade nodes to match the master version
management {
auto_repair = true
auto_upgrade = true
}

// Autoscale the cluster as needed. Note that these values will be multiplied
// by 3, as the cluster will exist in three zones
autoscaling {
min_node_count = 1
max_node_count = 3
}

// Set machine type, and enable all oauth scopes tied to the service account
node_config {
// k8s-prow-builds uses n1-highmem-8
machine_type = "n1-highmem-2"
// k8s-prow-builds uses 250
disk_size_gb = 100
// k8s-prow-builds uses pd-ssd
disk_type = "pd-ssd"

service_account = google_service_account.cluster_node_sa.email
oauth_scopes = ["https://www.googleapis.com/auth/cloud-platform"]

// Needed for workload identity
workload_metadata_config {
node_metadata = "GKE_METADATA_SERVER"
}
metadata = {
disable-legacy-endpoints = "true"
}
}

// If we need to destroy the node pool, create the new one before destroying
// the old one
lifecycle {
create_before_destroy = true
}
}
155 changes: 155 additions & 0 deletions infra/gcp/ensure-e2e-projects.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
#!/usr/bin/env bash
#
# Copyright 2019 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This script creates & configures projects intended to be used for e2e
# testing of kubernetes and managed by boskos

set -o errexit
set -o nounset
set -o pipefail

SCRIPT_DIR=$(dirname "${BASH_SOURCE[0]}")
. "${SCRIPT_DIR}/lib.sh"

function usage() {
echo "usage: $0 [repo...]" > /dev/stderr
echo "example:" > /dev/stderr
echo " $0 # do all projects" > /dev/stderr
echo " $0 k8s-infra-node-e2e-project # just do one" > /dev/stderr
echo > /dev/stderr
}

## setup service accounts and ips for the prow build cluster

# TODO: replace prow-build-test with actual service account
PROW_BUILD_SVCACCT=$(svc_acct_email "kubernetes-public" "prow-build-test")

color 6 "Ensuring prow build cluster is empowered"
(
color 6 "Ensuring prow build cluster service-account exists"
ensure_service_account \
"kubernetes-public" \
"prow-build-test" \
"used by prowjobs that run in prow-build-test cluster"

color 6 "Empowering prow build cluster service-account to be used on prow build cluster"
# the namespace "test-pods" here must match the namespace defined in prow's config.yaml
# to launch pods defined by prowjobs
# eg: https://github.com/kubernetes/test-infra/blob/master/config/prow/config.yaml#L73
empower_ksa_to_svcacct \
"kubernetes-public.svc.id.goog[test-pods/prow-build]" \
"kubernetes-public" \
"${PROW_BUILD_SVCACCT}"

# manual parts:
# - create key, add to prow-build-test as service-account secret
# - gsutil iam ch serviceAccount:$PROW_BUILD_SVCACCT:objectAdmin gs://bashfire-prow
# - gsutil iam ch serviceAccount:$PROW_BUILD_SVCACCT:objectCreator gs://bashfire-prow
) 2>&1 | indent

# TODO: replace boskos-janitor-test with actual service account
BOSKOS_JANITOR_SVCACCT=$(svc_acct_email "kubernetes-public" "boskos-janitor-test")

color 6 "Ensuring boskos-janitor is empowered"
(
color 6 "Ensuring boskos-janitor service account exists"
ensure_service_account \
"kubernetes-public" \
"boskos-janitor-test" \
"used by boskos-janitor in prow-build-test cluster"

color 6 "Empowering boskos-janitor service-account to be used on prow build cluster"
# the namespace "test-pods" here must match the namespace defined in prows config.yaml
# to launch pods defined by prowjobs because most prowjobs as-written assume they can
# talk to either http://boskos (kubetest or bootstrap.py jobs) or
# https://boskos.svc.test-pods.cluster.local (some of the cluster-api jobs), and so
# all boskos components are deployed to this namespace
empower_ksa_to_svcacct \
"kubernetes-public.svc.id.goog[test-pods/boskos-janitor]" \
"kubernetes-public" \
"${BOSKOS_JANITOR_SVCACCT}"

color 6 "Ensuring external ip address exists for boskos-metrics service in prow build cluster"
# this is so monitoring.prow.k8s.io is able to scrape metrics from boskos
# TODO: replace this with a global address used by an ingress
ensure_regional_address \
"kubernetes-public" \
"us-central1" \
"boskos-metrics" \
"to allow monitoring.k8s.prow.io to scrape boskos metrics"
) 2>&1 | indent

## setup projects to be used by e2e tests for standing up clusters

# TODO: replace spiffxp- projects with actual projects
E2E_PROJECTS=(
# for manual use during node-e2e job migration, eg: --gcp-project=spiffxp-node-e2e-project
spiffxp-node-e2e-project
# for manual use during job migration, eg: --gcp-project=spiffxp-gce-project
spiffxp-gce-project
# managed by boskos, part of the gce-project pool, eg: --gcp-project-type=gce-project
spiffxp-boskos-project-01
spiffxp-boskos-project-02
spiffxp-boskos-project-03
)

if [ $# = 0 ]; then
# default to all e2e projects
set -- "${E2E_PROJECTS[@]}"
fi

color 6 "Ensuring e2e projects exist and are appropriately configured"
for prj; do
color 6 "Ensuring e2e project exists and is appropriately configured: ${prj}"
(
ensure_project "${prj}"

color 6 "Enabling APIs necessary for kubernetes e2e jobs to use e2e project: ${prj}"
enable_api "${prj}" compute.googleapis.com
enable_api "${prj}" logging.googleapis.com
enable_api "${prj}" storage-component.googleapis.com

color 6 "Empower prow-build service account to edit e2e project: ${prj}"
# TODO: this is what prow.k8s.io uses today, but it is likely over-permissioned, we could
# look into creating a more constrained IAM role and using that instead
gcloud \
projects add-iam-policy-binding "${prj}" \
--member "serviceAccount:${PROW_BUILD_SVCACCT}" \
--role roles/editor

color 6 "Empower boskos-janitor service account to clean e2e project: ${prj}"
# TODO: this is what prow.k8s.io uses today, but it is likely over-permissioned, we could
# look into creating a more constrained IAM role and using that instead
gcloud \
projects add-iam-policy-binding "${prj}" \
--member "serviceAccount:${BOSKOS_JANITOR_SVCACCT}" \
--role roles/editor

color 6 "Ensure prow-build prowjobs are able to ssh to instances in e2e project: ${prj}"
# TODO: this is what prow.k8s.io does today, we could look into use OS Login instead
prow_build_ssh_pubkey="prow:ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCmYxHh/wwcV0P1aChuFLpl28w6DFyc7G5Xrw1F8wH1Re9AdxyemM2bTZ/PhsP3u9VDnNbyOw3UN00VFdumkFLjLf1WQ7Q6rZDlPjlw7urBIvAMqUecY6ae1znqsZ0dMBxOuPXHznlnjLjM5b7O7q5WsQMCA9Szbmz6DsuSyCuX0It2osBTN+8P/Fa6BNh3W8AF60M7L8/aUzLfbXVS2LIQKAHHD8CWqvXhLPuTJ03iSwFvgtAK1/J2XJwUP+OzAFrxj6A9LW5ZZgk3R3kRKr0xT/L7hga41rB1qy8Uz+Xr/PTVMNGW+nmU4bPgFchCK0JBK7B12ZcdVVFUEdpaAiKZ prow"

# append to project-wide ssh-keys metadata if not present
ssh_pubkeys=$(mktemp "/tmp/${prj}-ssh-keys-XXXX")
gcloud compute project-info describe --project="${prj}" --format=json | \
jq -r '(.commonInstanceMetadata.items//[])[]|select(.key=="ssh-keys").value' > "${ssh_pubkeys}"
if ! grep -q "${prow_build_ssh_pubkey}" "${ssh_pubkeys}"; then
echo "${prow_build_ssh_pubkey}" >> "${ssh_pubkeys}"
gcloud compute project-info add-metadata --project="${prj}" \
--metadata-from-file ssh-keys="${ssh_pubkeys}"
fi
) 2>&1 | indent
done 2>&1 | indent
Loading

0 comments on commit 84e0ed5

Please sign in to comment.