Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-9507 Modifications for DAOS v2.0 #9

Merged
merged 5 commits into from
Jan 14, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 13 additions & 8 deletions images/configs/daos_server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,29 @@ transport_config:
allow_insecure: true
provider: ofi+tcp;ofi_rxm
disable_vfio: true
crt_timeout: 200
crt_timeout: 300
nr_hugepages: 4096
control_log_file: /var/daos/daos_server.log

engines:
-
targets: 8
nr_xs_helpers: 0
first_core: 1
fabric_iface_port: 31316
bypass_health_chk: true
fabric_iface: eth0
fabric_iface_port: 31316
log_mask: ERR
log_file: /var/daos/engine.log

env_vars:
- FI_OFI_RXM_DEF_TCP_WAIT_OBJ=pollfd

scm_mount: /var/daos/ram
scm_class: ram
scm_size: 100

bdev_class: nvme
bdev_list: ["0000:00:04.0"]
storage:
-
scm_mount: /var/daos/ram
class: ram
scm_size: 100
-
class: nvme
bdev_list: ["0000:00:04.0"]
14 changes: 6 additions & 8 deletions images/daos-client-image.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
{
"variables": {
"DAOS_VERSION": "1.2.0",
"IO500_INSTALL_DIR": "/usr/local"
"DAOS_VERSION": "2.0.0"
},
"builders": [
{
"type": "googlecompute",
"name": "daos-client-centos7",
"account_file": "{{user `account_file`}}",
"project_id": "{{user `project_id`}}",
"source_image_project_id": [
"cloud-hpc-image-public"
],
"source_image_family": "hpc-centos-7",
"image_name": "daos-client-v{{isotime \"20060102-030405\"}}",
"image_family": "daos-client",
"image_name": "daos-client-centos7-v{{isotime \"20060102-030405\"}}",
"image_family": "daos-client-centos7",
markaolson marked this conversation as resolved.
Show resolved Hide resolved
"machine_type": "n1-standard-16",
"disk_size": "20",
"ssh_username": "packer",
Expand Down Expand Up @@ -41,14 +41,12 @@
{
"type": "shell",
"execute_command": "echo 'packer' | sudo -S env {{ .Vars }} {{ .Path }}",
"environment_vars": "IO500_INSTALL_DIR={{user `IO500_INSTALL_DIR`}}",
"script": "./scripts/mfu_install.sh"
"script": "./scripts/mpifileutils_install.sh"
},
{
"type": "shell",
"execute_command": "echo 'packer' | sudo -S env {{ .Vars }} {{ .Path }}",
"environment_vars": "IO500_INSTALL_DIR={{user `IO500_INSTALL_DIR`}}",
"script": "./scripts/io500_install.sh"
"script": "./scripts/io500-sc21_install.sh"
markaolson marked this conversation as resolved.
Show resolved Hide resolved
}
]
}
11 changes: 6 additions & 5 deletions images/daos-server-image.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
{
"variables": {
"DAOS_VERSION": "1.2.0"
"DAOS_VERSION": "2.0.0"
},
"builders": [
{
"type": "googlecompute",
"name": "daos-server-centos7",
"account_file": "{{user `account_file`}}",
"project_id": "{{user `project_id`}}",
"source_image_project_id": [
"centos-cloud"
],
"source_image_family": "centos-7",
"image_name": "daos-server-v{{isotime \"20060102-030405\"}}",
"image_family": "daos-server",
"image_name": "daos-server-centos7-v{{isotime \"20060102-030405\"}}",
"image_family": "daos-server-centos7",
"machine_type": "n1-standard-16",
"disk_size": "20",
"ssh_username": "packer",
Expand All @@ -35,7 +36,7 @@
"type": "shell",
"execute_command": "echo 'packer' | sudo -S env {{ .Vars }} {{ .Path }}",
"environment_vars": "DAOS_VERSION={{user `DAOS_VERSION`}}",
"script": "./scripts/install.sh"
"script": "./scripts/install-server.sh"
},
{
"type": "file",
Expand All @@ -51,7 +52,7 @@
"type": "shell",
"execute_command": "echo 'packer' | sudo -S env {{ .Vars }} {{ .Path }}",
"environment_vars": "DAOS_VERSION={{user `DAOS_VERSION`}}",
"script": "./scripts/setup.sh"
"script": "./scripts/setup-server.sh"
}
]
}
97 changes: 66 additions & 31 deletions images/make_images.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,56 +5,91 @@
# without warranty or representation for any use or purpose.
# Your use of it is subject to your agreements with Google.

#
# To build both DAOS client and server images:
# ./make_images.sh
#
# To build DAOS client images only:
# ./make_images.sh client
#
# To build DAOS server images only:
# ./make_images.sh server
#

set -e
trap 'echo "Unexpected and unchecked error. Exiting."' ERR

# Set environment variable defaults if not already set
: "${IMAGE_TYPE:=all}"

if [[ ! -z $1 ]]; then
IMAGE_TYPE=$(echo $1 | tr '[A-Z]' '[a-z]')
if [[ ! $IMAGE_TYPE =~ ^(all|server|client)$ ]]; then
echo "Invalid value passed for first arg."
echo "Valid values are 'all', 'client', 'server'"
exit 1
fi
fi

PROJECT=$(gcloud info --format="value(config.project)")
fwrulename=gcp-cloudbuild-ssh
FWRULENAME=gcp-cloudbuild-ssh


# The service account used here should have been already created
#by the "packer_build" step. We are just checking here.
CLOUD_BUILD_ACCOUNT=$(gcloud projects get-iam-policy $PROJECT \
CLOUD_BUILD_ACCOUNT=$(gcloud projects get-iam-policy "${PROJECT}" \
--filter="(bindings.role:roles/cloudbuild.builds.builder)" \
--flatten="bindings[].members" --format="value(bindings.members[])")
--flatten="bindings[].members" \
--format="value(bindings.members[])" \
--limit=1)
echo "Packer will be using service account ${CLOUD_BUILD_ACCOUNT}"


# Add cloudbuild SA permissions
gcloud projects add-iam-policy-binding $PROJECT \
--member $CLOUD_BUILD_ACCOUNT \
gcloud projects add-iam-policy-binding "${PROJECT}" \
--member "${CLOUD_BUILD_ACCOUNT}" \
--role roles/compute.instanceAdmin.v1

gcloud projects add-iam-policy-binding $PROJECT \
--member $CLOUD_BUILD_ACCOUNT \
gcloud projects add-iam-policy-binding "${PROJECT}" \
--member "${CLOUD_BUILD_ACCOUNT}" \
--role roles/iam.serviceAccountUser


# check if we have an ssh firewall rule for cloudbuild in place already
fwlist=$(gcloud compute --project=${PROJECT} firewall-rules list --filter name=${fwrulename} \
# Check if we have an ssh firewall rule for cloudbuild in place already
FWLIST=$(gcloud compute --project="${PROJECT}" \
firewall-rules list \
--filter name="${FWRULENAME}" \
--sort-by priority \
--format='value(name)')

if [ -z $fwlist ] ;
then
#setup firewall rule to allow ssh from clould build.
#FIXME: Needs to be fixed to restric to IP range
#for clound build only once we know what that is.
echo "setting up firewall rule for ssh and clouldbuild."
gcloud compute --project=${PROJECT} firewall-rules create ${fwrulename} \
--direction=INGRESS --priority=1000 --network=default --action=ALLOW \
--rules=tcp:22 --source-ranges=0.0.0.0/0
else
echo "Firewall rule for ssh and cloud build already in place. "
if [[ -z $FWLIST ]]; then
# Setup firewall rule to allow ssh from clould build.
# FIXME: Needs to be fixed to restric to IP range
# for clound build only once we know what that is.
echo "Setting up firewall rule for ssh and clouldbuild"
gcloud compute --project="${PROJECT}" firewall-rules create "${FWRULENAME}" \
--direction=INGRESS --priority=1000 --network=default --action=ALLOW \
--rules=tcp:22 --source-ranges=0.0.0.0/0
else
echo "Firewall rule for ssh and cloud build already in place. "
fi


#build image. We need to make sure we don't time out so we increase to 1hr.
gcloud builds submit --timeout=1800s \
--substitutions=_PROJECT_ID=${PROJECT} \
--config=packer_cloudbuild.yaml .


gcloud builds submit --timeout=1800s \
--substitutions=_PROJECT_ID=${PROJECT} \
--config=packer_cloudbuild-client.yaml .
# Build images.
# Increase timeout to 1hr to make sure we don't time out
if [[ $IMAGE_TYPE =~ ^(all|server)$ ]]; then
printf "\nBuilding server image(s)\n\n"
gcloud builds submit --timeout=1800s \
--substitutions=_PROJECT_ID="${PROJECT}" \
--config=packer_cloudbuild-server.yaml .
fi

# remove ssh firewall
gcloud -q compute --project=${PROJECT} firewall-rules delete ${fwrulename}
if [[ $IMAGE_TYPE =~ ^(all|client)$ ]]; then
printf "\nBuilding client image(s)\n\n"
gcloud builds submit --timeout=1800s \
--substitutions=_PROJECT_ID="${PROJECT}" \
--config=packer_cloudbuild-client.yaml .
fi

# Remove ssh firewall
gcloud -q compute --project="${PROJECT}" firewall-rules delete "${FWRULENAME}"
File renamed without changes.
88 changes: 58 additions & 30 deletions images/scripts/install-client.sh
Original file line number Diff line number Diff line change
@@ -1,42 +1,35 @@
#!/bin/bash
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Install Intel OneAPI and the DAOS Client
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
trap 'echo "An unexpected error occurred. Exiting."' ERR

echo "Installing DAOS version ${DAOS_VERSION}"
# DAOS_VERSION must be set before running this script
if [[ -z $DAOS_VERSION ]]; then
echo "DAOS_VERSION not set. Exiting."
exit 1
fi

# Install 1.2.0 RPMs from official site
tee /etc/yum.repos.d/daos.repo > /dev/null <<EOF
[daos-packages]
name=DAOS v1.2 Packages
baseurl=https://packages.daos.io/v1.2/CentOS7/packages/x86_64/
enabled=1
gpgcheck=1
protect=1
gpgkey=https://packages.daos.io/RPM-GPG-KEY
EOF

# Install DAOS RPMs
yum install -y daos-client daos-devel
log() {
local msg="$1"
printf "\n%80s" | tr " " "-"
printf "\n%s\n" "${msg}"
printf "%80s\n" | tr " " "-"
}

# enable daos_server in systemd (will be started automatically at boot time)
systemctl enable daos_agent

echo "Installing Intel oneAPI MPI"
log "Cleaning yum cache and running yum update"
yum clean all
yum makecache
yum update -y

# Install Intel MPI from oneAPI package
tee > /etc/yum.repos.d/oneAPI.repo << EOF
log "Installing Intel oneAPI MPI"

# Install Intel MPI from Intel oneAPI package
cat > /etc/yum.repos.d/oneAPI.repo <<EOF
[oneAPI]
name=Intel(R) oneAPI repository
baseurl=https://yum.repos.intel.com/oneapi
Expand All @@ -48,9 +41,44 @@ EOF

yum install -y intel-oneapi-mpi intel-oneapi-mpi-devel

# Determine which repo to use
. /etc/os-release
OS_VERSION=$(echo "${VERSION_ID}" | cut -d. -f1)
OS_VERSION_ID="${ID,,}_${OS_VERSION}"
case ${OS_VERSION_ID} in
centos_7)
DAOS_OS_VERSION="CentOS7";;
centos_8)
DAOS_OS_VERSION="CentOS8";;
rocky_8)
DAOS_OS_VERSION="CentOS8";;
*)
printf "\nUnsupported OS: %s. Exiting\n" "${OS_VERSION_ID}"
exit 1
;;
esac

log "Adding yum repo for DAOS version ${DAOS_VERSION}"
cat > /etc/yum.repos.d/daos.repo <<EOF
[daos-packages]
name=DAOS v${DAOS_VERSION} Packages
baseurl=https://packages.daos.io/v${DAOS_VERSION}/${DAOS_OS_VERSION}/packages/x86_64/
enabled=1
gpgcheck=1
protect=1
gpgkey=https://packages.daos.io/RPM-GPG-KEY
EOF

# Install DAOS client packages
log "Installing daos-client v${DAOS_VERSION}"
yum install -y daos-client daos-devel

# Install some other software helpful for development
# (e.g. to compile ior or fio)
yum install -y gcc git autoconf automake libuuid-devel devtoolset-9-gcc
log "Installing additional packages needed on DAOS clients"
yum install -y gcc git autoconf automake libuuid-devel devtoolset-9-gcc patch

# TODO:
# - enable gvnic

printf "\nDAOS client v${DAOS_VERSION} install complete!\n\n"
Loading