Skip to content

Commit

Permalink
Merge pull request #2702 from GoogleCloudPlatform/release-candidate
Browse files Browse the repository at this point in the history
Release v1.35.0
  • Loading branch information
alyssa-sm authored Jun 20, 2024
2 parents 627b43a + 1b55f2d commit eaeacfb
Show file tree
Hide file tree
Showing 131 changed files with 1,872 additions and 521 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/pr-precommit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ on:
- labeled
- synchronize
branches:
- main
- develop
- release-candidate

jobs:
pre-commit:
Expand Down
13 changes: 10 additions & 3 deletions cmd/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ import (
)

func addDeployFlags(c *cobra.Command) *cobra.Command {
return addAutoApproveFlag(
addArtifactsDirFlag(
addCreateFlags(c)))
return addGroupSelectionFlags(
addAutoApproveFlag(
addArtifactsDirFlag(
addCreateFlags(c))))
}

func init() {
Expand Down Expand Up @@ -71,10 +72,16 @@ func doDeploy(deplRoot string) {
checkErr(shell.CheckWritableDir(artDir), nil)
bp, ctx := artifactBlueprintOrDie(artDir)
groups := bp.Groups
checkErr(validateGroupSelectionFlags(bp), ctx)
checkErr(validateRuntimeDependencies(deplRoot, groups), ctx)
checkErr(shell.ValidateDeploymentDirectory(groups, deplRoot), ctx)

for ig, group := range groups {
if !isGroupSelected(group.Name) {
logging.Info("skipping group %q", group.Name)
continue
}

groupDir := filepath.Join(deplRoot, string(group.Name))
checkErr(shell.ImportInputs(groupDir, artDir, bp), ctx)

Expand Down
11 changes: 8 additions & 3 deletions cmd/destroy.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ import (

func init() {
rootCmd.AddCommand(
addAutoApproveFlag(
addArtifactsDirFlag(destroyCmd)))
addGroupSelectionFlags(
addAutoApproveFlag(
addArtifactsDirFlag(destroyCmd))))
}

var (
Expand All @@ -56,13 +57,17 @@ func runDestroyCmd(cmd *cobra.Command, args []string) {
}

bp, ctx := artifactBlueprintOrDie(artifactsDir)

checkErr(validateGroupSelectionFlags(bp), ctx)
checkErr(shell.ValidateDeploymentDirectory(bp.Groups, deplRoot), ctx)

// destroy in reverse order of creation!
packerManifests := []string{}
for i := len(bp.Groups) - 1; i >= 0; i-- {
group := bp.Groups[i]
if !isGroupSelected(group.Name) {
logging.Info("skipping group %q", group.Name)
continue
}
groupDir := filepath.Join(deplRoot, string(group.Name))

if err := shell.ImportInputs(groupDir, artifactsDir, bp); err != nil {
Expand Down
2 changes: 1 addition & 1 deletion cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ HPC deployments on the Google Cloud Platform.`,
logging.Fatal("cmd.Help function failed: %s", err)
}
},
Version: "v1.34.1",
Version: "v1.35.0",
Annotations: annotation,
}
)
Expand Down
41 changes: 41 additions & 0 deletions cmd/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@
package cmd

import (
"errors"
"fmt"
"hpc-toolkit/pkg/config"
"hpc-toolkit/pkg/modulewriter"
"hpc-toolkit/pkg/shell"
"os"
"slices"

"github.com/spf13/cobra"
)
Expand Down Expand Up @@ -78,3 +81,41 @@ func filterYaml(cmd *cobra.Command, args []string, toComplete string) ([]string,
}
return []string{"yaml", "yml"}, cobra.ShellCompDirectiveFilterFileExt
}

var flagSkipGroups []string
var flagOnlyGroups []string

func addGroupSelectionFlags(c *cobra.Command) *cobra.Command {
c.Flags().StringSliceVar(&flagSkipGroups, "skip", nil, "Skip groups with the given names")
c.Flags().StringSliceVar(&flagOnlyGroups, "only", nil, "Only apply to groups with the given names")
return c
}

func validateGroupSelectionFlags(bp config.Blueprint) error {
if flagOnlyGroups != nil && flagSkipGroups != nil {
return errors.New("cannot specify both --only and --skip")
}

dict := []string{}
for _, group := range bp.Groups {
dict = append(dict, string(group.Name))
}

for _, g := range append(flagOnlyGroups, flagSkipGroups...) {
if !slices.Contains(dict, g) {
return config.HintSpelling(g, dict, fmt.Errorf("group %q not found", g))
}
}

return nil
}

func isGroupSelected(g config.GroupName) bool {
if flagOnlyGroups != nil {
return slices.Contains(flagOnlyGroups, string(g))
}
if flagSkipGroups != nil {
return !slices.Contains(flagSkipGroups, string(g))
}
return true
}
83 changes: 83 additions & 0 deletions cmd/utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package cmd

import (
"fmt"
"hpc-toolkit/pkg/config"
"testing"
)

func TestIsGroupSelected(t *testing.T) {
type test struct {
only []string
skip []string
group config.GroupName
want bool
}
tests := []test{
{nil, nil, "green", true},
{[]string{"green"}, nil, "green", true},
{[]string{"green"}, nil, "blue", false},
{nil, []string{"green"}, "green", false},
{nil, []string{"green"}, "blue", true},
}

for _, tc := range tests {
t.Run(fmt.Sprintf("%v;%v;%q", tc.only, tc.skip, tc.group), func(t *testing.T) {
flagOnlyGroups, flagSkipGroups = tc.only, tc.skip
got := isGroupSelected(tc.group)
if got != tc.want {
t.Errorf("isGroupSelected(%v) = %v; want %v", tc.group, got, tc.want)
}
})
}
}

func TestValidateGroupSelectionFlags(t *testing.T) {
type test struct {
only []string
skip []string
groups []string
err bool
}
tests := []test{
{nil, nil, []string{"green"}, false},
{[]string{"green"}, []string{"blue"}, []string{"green", "blue"}, true},
{[]string{"green"}, nil, []string{"green"}, false},
{[]string{"green"}, nil, []string{"blue"}, true},
{nil, []string{"green"}, []string{"green"}, false},
{nil, []string{"green"}, []string{"blue"}, true},
}

for _, tc := range tests {
t.Run(fmt.Sprintf("%v;%v;%v", tc.only, tc.skip, tc.groups), func(t *testing.T) {
flagOnlyGroups, flagSkipGroups = tc.only, tc.skip
bp := config.Blueprint{}
for _, g := range tc.groups {
bp.Groups = append(bp.Groups, config.Group{Name: config.GroupName(g)})
}

err := validateGroupSelectionFlags(bp)
if tc.err && err == nil {
t.Errorf("validateGroupSelectionFlags(%v) = nil; want error", tc.groups)
}
if !tc.err && err != nil {
t.Errorf("validateGroupSelectionFlags(%v) = %v; want nil", tc.groups, err)
}
})
}

}
2 changes: 1 addition & 1 deletion community/examples/hpc-build-slurm-image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ vars:
image_build_machine_type: n2d-standard-16
build_from_image_family: hpc-rocky-linux-8
build_from_image_project: cloud-hpc-image-public
build_from_git_ref: 6.5.6
build_from_git_ref: 6.5.8
built_image_family: my-custom-slurm
built_instance_image:
family: $(vars.built_image_family)
Expand Down
25 changes: 14 additions & 11 deletions community/examples/ml-gke.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ blueprint_name: ml-gke
vars:
project_id: ## Set GCP Project ID Here ##
deployment_name: ml-01
region: us-central1
region: asia-southeast1
zones:
- asia-southeast1-b # g2 machine has better availability in this zone

# Cidr block containing the IP of the machine calling terraform.
# The following line must be updated for this example to work.
Expand Down Expand Up @@ -48,22 +50,23 @@ deployment_groups:
cidr_block: $(vars.authorized_cidr)
outputs: [instructions]

# Docs at https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/community/modules/scripts/kubernetes-operations
- id: install-nvidia-drivers
source: github.com/GoogleCloudPlatform/ai-infra-cluster-provisioning//aiinfra-cluster/modules/kubernetes-operations?ref=v0.6.0
use: [gke_cluster]
settings:
install_nvidia_driver: true

- id: a2-pool
- id: g2-pool
source: community/modules/compute/gke-node-pool
use: [gke_cluster]
settings:
machine_type: a2-highgpu-8g
disk_type: pd-balanced
machine_type: g2-standard-4
guest_accelerator:
- type: nvidia-l4
count: 1
gpu_partition_size: null
gpu_sharing_config: null
gpu_driver_installation_config:
- gpu_driver_version: "DEFAULT"

- id: job-template
source: community/modules/compute/gke-job-template
use: [a2-pool]
use: [g2-pool]
settings:
image: nvidia/cuda:11.0.3-runtime-ubuntu20.04
command:
Expand Down
2 changes: 2 additions & 0 deletions community/front-end/ofe/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ PRJ_API['bigqueryconnection.googleapis.com']='BigQuery Connection API'
PRJ_API['sqladmin.googleapis.com']='Cloud SQL Admin API'
PRJ_API['servicenetworking.googleapis.com']='Service Networking API'
PRJ_API['secretmanager.googleapis.com']='Secret Manager API'
PRJ_API['serviceusage.googleapis.com']='Service Usage API'
PRJ_API['storage.googleapis.com']='Cloud Storage API'

# Location for output credential file = pwd/credential.json
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ autostart=true
autorestart=true
user=gcluster
redirect_stderr=true
environment=HOME=/opt/gcluster
stdout_logfile=/opt/gcluster/run/supvisor.log" >/etc/supervisord.d/gcluster.ini

printf "Creating systemd service..."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ limitations under the License.
|------|---------|
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 0.12.31 |
| <a name="requirement_google"></a> [google](#requirement\_google) | >= 3.54 |
| <a name="requirement_google-beta"></a> [google-beta](#requirement\_google-beta) | >= 3.83 |
| <a name="requirement_random"></a> [random](#requirement\_random) | >= 3.0 |

## Providers

| Name | Version |
|------|---------|
| <a name="provider_google"></a> [google](#provider\_google) | >= 3.54 |
| <a name="provider_google-beta"></a> [google-beta](#provider\_google-beta) | >= 3.83 |
| <a name="provider_random"></a> [random](#provider\_random) | >= 3.0 |

## Modules
Expand All @@ -36,11 +38,14 @@ No modules.

| Name | Type |
|------|------|
| [google-beta_google_compute_global_address.private_ip_alloc](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_compute_global_address) | resource |
| [google_compute_firewall.firewall_allow_ssh](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_firewall) | resource |
| [google_compute_firewall.firewall_internal](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_firewall) | resource |
| [google_compute_network.network](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_network) | resource |
| [google_compute_router.network_router](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_router) | resource |
| [google_compute_router_nat.network_nat](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_router_nat) | resource |
| [google_service_networking_connection.private_vpc_connection](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/service_networking_connection) | resource |
| [random_id.resource_name_suffix](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/id) | resource |
| [random_pet.vpc_name](https://registry.terraform.io/providers/hashicorp/random/latest/docs/resources/pet) | resource |

## Inputs
Expand Down
28 changes: 28 additions & 0 deletions community/front-end/ofe/infrastructure_files/vpc_tf/GCP/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,34 @@ resource "google_compute_firewall" "firewall_internal" {
allow { protocol = "icmp" }
}

locals {
# This label allows for billing report tracking based on module.
labels = {
created_by = "ofe"
}
}

resource "random_id" "resource_name_suffix" {
byte_length = 4
}

resource "google_compute_global_address" "private_ip_alloc" {
provider = google-beta
project = var.project
name = "global-psconnect-ip-${random_id.resource_name_suffix.hex}"
purpose = "VPC_PEERING"
address_type = "INTERNAL"
network = google_compute_network.network.self_link
prefix_length = 16
labels = local.labels
}

resource "google_service_networking_connection" "private_vpc_connection" {
network = google_compute_network.network.self_link
service = "servicenetworking.googleapis.com"
reserved_peering_ranges = [google_compute_global_address.private_ip_alloc.name]
}

output "vpc_id" {
value = google_compute_network.network.name
description = "Name of the created VPC"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ terraform {
source = "hashicorp/google"
version = ">= 3.54"
}
google-beta = {
source = "hashicorp/google-beta"
version = ">= 3.83"
}
random = {
source = "hashicorp/random"
version = ">= 3.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"n1": defaultdict(lambda: "x86_64"),
"c3": defaultdict(lambda: "sapphirerapids"),
"c3d": defaultdict(lambda: "zen2"),
"c4": defaultdict(lambda: "emeraldrapids"),
# Compute Optimized
"c2": defaultdict(lambda: "cascadelake"),
"c2d": defaultdict(
Expand Down Expand Up @@ -359,6 +360,7 @@ def get_cpu_price(num_cores, instance_type, skus):
"n2d": "N2D AMD Instance Core",
"h3": "Compute optimized Core",
"c3": "Compute optimized Core",
"c4": "Compute optimized Core",
"c2": "Compute optimized Core",
"c2d": "C2D AMD Instance Core",
"c3d": "C3D AMD Instance Core",
Expand Down Expand Up @@ -411,6 +413,7 @@ def get_mem_price(num_gb, instance_type, skus):
"h3": "Compute optimized Ram",
"c2d": "C2D AMD Instance Ram",
"c3d": "C3D AMD Instance Ram",
"c4": "C4 Instance RAM",
"t2d": "T2D AMD Instance Ram",
"a2": "A2 Instance Ram",
"m1": "Memory-optimized Instance Ram",
Expand Down
Loading

0 comments on commit eaeacfb

Please sign in to comment.