Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update bot configuration for EESSI 2023.06 #262

Merged
merged 3 commits into from
Jun 20, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 117 additions & 36 deletions bot/bot-eessi-aws-citc.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,23 @@ app_name = eessi-bot-citc-aws
installation_id = 33078935

# path to the private key that was generated when the GitHub App was registered
private_key = /mnt/shared/home/bot/eessi-bot-software-layer/eessi-bot-citc-aws-private-key.pem
private_key = /mnt/shared/home/bot/eessi-bot-software-layer/eessi-bot-citc-aws.2023-01-12.private-key.pem


[bot_control]
# which GH accounts have the permission to send commands to the bot
# if value is left/empty everyone can send commands
# value can be a space delimited list of GH accounts
command_permission = boegel trz42 bedroge

# format of the response when processing bot commands
command_response_fmt =
<details><summary>Updates by the bot instance <code>{app_name}</code>
<em>(click for details)</em></summary>

{comment_response}
{comment_result}
</details>


[buildenv]
Expand All @@ -35,7 +51,7 @@ container_cachedir = /mnt/shared/home/bot/eessi-bot-software-layer/containers-ca
# it may happen that we need to customize some CVMFS configuration
# the value of cvmfs_customizations is a dictionary which maps a file
# name to an entry that needs to be added to that file
cvmfs_customizations = {}
# cvmfs_customizations = {}

# if compute nodes have no internet connection, we need to set http(s)_proxy
# or commands such as pip3 cannot download software from package repositories
Expand All @@ -52,7 +68,7 @@ jobs_base_dir = /mnt/shared/home/bot/eessi-bot-software-layer/jobs
# useful/needed if some tool is not provided as system-wide package
# (read by bot and handed over to build_job_script via parameter
# --load-modules)
load_modules =
# load_modules =

# PATH to temporary directory on build node ... ends up being used for
# for example, EESSI_TMPDIR --> /tmp/$USER/EESSI
Expand All @@ -78,22 +94,91 @@ submit_command = /usr/bin/sbatch
# value can be a space delimited list of GH accounts
build_permission = boegel trz42 bedroge

# template for comment when user who set a label has no permission to trigger build jobs
no_build_permission_comment = Label `bot:build` has been set by user `{build_labeler}`, but only users `{build_permission_users}` have permission to trigger the action


[deploycfg]
# script for uploading built software packages
tarball_upload_script = /mnt/shared/home/bot/eessi-bot-software-layer/scripts/eessi-upload-to-staging

# URL to S3/minio bucket
# if attribute is set, bucket_base will be constructed as follows
# bucket_base=${endpoint_url}/${bucket_name}
# otherwise, bucket_base will be constructed as follows
# bucket_base=https://${bucket_name}.s3.amazonaws.com
# - The former variant is used for non AWS S3 services, eg, minio, or when
# the bucket name is not provided in the hostname (see latter case).
# - The latter variant is used for AWS S3 services.
# endpoint_url = URL_TO_S3_SERVER

# bucket name
bucket_name = eessi-staging-2023.06

# upload policy: defines what policy is used for uploading built artefacts
# to an S3 bucket
# 'all' ..: upload all artefacts (mulitple uploads of the same artefact possible)
# 'latest': for each build target (eessi-VERSION-{software,init,compat}-OS-ARCH)
# only upload the latest built artefact
# 'once' : only once upload any built artefact for the build target
# 'none' : do not upload any built artefacts
upload_policy = once

# which GH account has the permission to trigger the deployment (by setting
# the label 'bot:deploy' (apparently this cannot be restricted on GitHub)
# if value is left/empty everyone can trigger the deployment
# value can be a space delimited list of GH accounts
deploy_permission = boegel trz42 bedroge

# template for comment when user who set a label has no permission to trigger deploying tarballs
no_deploy_permission_comment = Label `bot:deploy` has been set by user `{deploy_labeler}`, but only users `{deploy_permission_users}` have permission to trigger the action


[architecturetargets]
# defines both for which architectures the bot will build
# and what submission parameters shall be used
# medium instances (8 cores, 16GB RAM)
#arch_target_map = { "linux/x86_64/generic" : "--constraint shape=c4.4xlarge", "linux/x86_64/intel/haswell" : "--constraint shape=c4.4xlarge", "linux/x86_64/intel/skylake_avx512" : "--constraint shape=c5.4xlarge", "linux/x86_64/amd/zen2": "--constraint shape=c5a.4xlarge", "linux/x86_64/amd/zen3" : "--constraint shape=c6a.4xlarge", "linux/aarch64/generic" : "--constraint shape=c6g.4xlarge", "linux/aarch64/graviton2" : "--constraint shape=c6g.4xlarge", "linux/aarch64/graviton3" : "--constraint shape=c7g.4xlarge"}
# larger instances (16 cores, 32GB RAM)
arch_target_map = { "linux/x86_64/generic" : "--constraint shape=c4.4xlarge", "linux/x86_64/intel/haswell" : "--constraint shape=c4.4xlarge", "linux/x86_64/intel/skylake_avx512" : "--constraint shape=c5.4xlarge", "linux/x86_64/amd/zen2": "--constraint shape=c5a.4xlarge", "linux/x86_64/amd/zen3" : "--constraint shape=c6a.4xlarge", "linux/aarch64/generic" : "--constraint shape=c6g.4xlarge", "linux/aarch64/graviton2" : "--constraint shape=c6g.4xlarge", "linux/aarch64/graviton3" : "--constraint shape=c7g.4xlarge"}
# 5 c4.2xlarge haswell 8 vCPU, 15 GiB RAM (1 + generic)
# 2 c4.4xlarge haswell 16 vCPU, 30 GiB RAM
# 5 c5.2xlarge skylake_avx512 8 vCPU, 16 GiB RAM (1)
# 1 c5.4xlarge skylake_avx512 16 vCPU, 32 GiB RAM
# 5 c5a.2xlarge zen2 8 vCPU, 16 GiB RAM (1)
# 1 c5a.4xlarge zen2 16 vCPU, 32 GiB RAM
# 5 c5d.2xlarge skylake_avx512 8 vCPU, 16 GiB RAM + 200 GB NVMe
# 5 c6a.2xlarge zen3 8 vCPU, 16 GiB RAM (1)
# 1 c6a.4xlarge zen3 16 vCPU, 32 GiB RAM
# 5 c6g.2xlarge neoverse_n1 8 vCPU, 16 GiB RAM (1 + generic)
# 2 c6g.4xlarge neoverse_n1 16 vCPU, 32 GiB RAM
# 1 c6g.8xlarge neoverse_n1 32 vCPU, 64 GiB RAM
# 1 c6i.2xlarge cascadelake 8 vCPU, 16 GiB RAM (1)
# 5 c7g.2xlarge neoverse_v1 8 vCPU, 16 GiB RAM (1)
# 1 c7g.4xlarge neoverse_v1 16 vCPU, 32 GiB RAM
# larger instances (*.4xlarge => 16 cores, 32GB RAM)
arch_target_map = {
"linux/x86_64/generic" : "--constraint shape=c4.4xlarge",
"linux/x86_64/intel/haswell" : "--constraint shape=c4.4xlarge",
"linux/x86_64/intel/skylake_avx512" : "--constraint shape=c5.4xlarge",
"linux/x86_64/amd/zen2": "--constraint shape=c5a.4xlarge",
"linux/x86_64/amd/zen3" : "--constraint shape=c6a.4xlarge",
"linux/aarch64/generic" : "--constraint shape=c6g.4xlarge",
"linux/aarch64/neoverse_n1" : "--constraint shape=c6g.4xlarge",
"linux/aarch64/neoverse_v1" : "--constraint shape=c7g.4xlarge" }

[repo_targets]
# defines for which repository a arch_target should be build for
#
# only building for repository EESSI-pilot
repo_target_map = { "linux/x86_64/generic" : ["EESSI-pilot"], "linux/x86_64/intel/haswell" : ["EESSI-pilot"], "linux/x86_64/intel/skylake_avx512" : ["EESSI-pilot"], "linux/x86_64/amd/zen2": ["EESSI-pilot"], "linux/x86_64/amd/zen3" : ["EESSI-pilot"], "linux/aarch64/generic" : ["EESSI-pilot"], "linux/aarch64/graviton2" : ["EESSI-pilot"], "linux/aarch64/graviton3" : ["EESSI-pilot"]}
repo_target_map = {
"linux/x86_64/generic" : ["eessi-2021.12","eessi-2023.06-compat","eessi-2023.06-software"],
"linux/x86_64/intel/haswell" : ["eessi-2021.12","eessi-2023.06-compat","eessi-2023.06-software"],
"linux/x86_64/intel/skylake_avx512" : ["eessi-2021.12","eessi-2023.06-compat","eessi-2023.06-software"],
"linux/x86_64/amd/zen2" : ["eessi-2021.12","eessi-2023.06-compat","eessi-2023.06-software"],
"linux/x86_64/amd/zen3" : ["eessi-2021.12","eessi-2023.06-compat","eessi-2023.06-software"],
"linux/aarch64/generic" : ["eessi-2021.12","eessi-2023.06-compat","eessi-2023.06-software"],
"linux/aarch64/neoverse_n1" : ["eessi-2021.12","eessi-2023.06-compat","eessi-2023.06-software"],
"linux/aarch64/neoverse_v1" : ["eessi-2021.12","eessi-2023.06-compat","eessi-2023.06-software"] }

# points to definition of repositories (default EESSI-pilot defined by build container)
repos_cfg_dir = /mnt/shared/home/bot/eessi-bot-software-layer/cfg-bundles
repos_cfg_dir = /mnt/shared/home/bot/eessi-bot-software-layer/repos

# configuration for event handler which receives events from a GitHub repository.
[event_handler]
Expand All @@ -118,34 +203,30 @@ poll_interval = 60
# full path to the command for manipulating existing jobs
scontrol_command = /usr/bin/scontrol

[deploycfg]
# script for uploading built software packages
tarball_upload_script = /mnt/shared/home/bot/eessi-bot-software-layer/scripts/eessi-upload-to-staging

# URL to S3/minio bucket
# if attribute is set, bucket_base will be constructed as follows
# bucket_base=${endpoint_url}/${bucket_name}
# otherwise, bucket_base will be constructed as follows
# bucket_base=https://${bucket_name}.s3.amazonaws.com
# - The former variant is used for non AWS S3 services, eg, minio, or when
# the bucket name is not provided in the hostname (see latter case).
# - The latter variant is used for AWS S3 services.
#endpoint_url = URL_TO_S3_SERVER
# variable 'comment' under 'submitted_job_comments' should not be changed as there are regular expression patterns matching it
[submitted_job_comments]
initial_comment = New job on instance `{app_name}` for architecture `{arch_name}` for repository `{repo_id}` in job dir `{symlink}`
awaits_release = job id `{job_id}` awaits release by job manager

# bucket name
bucket_name = eessi-staging

# upload policy: defines what policy is used for uploading built artefacts
# to an S3 bucket
# 'all' ..: upload all artefacts (mulitple uploads of the same artefact possible)
# 'latest': for each build target (eessi-VERSION-{software,init,compat}-OS-ARCH)
# only upload the latest built artefact
# 'once' : only once upload any built artefact for the build target
# 'none' : do not upload any built artefacts
upload_policy = once
[new_job_comments]
awaits_launch = job awaits launch by Slurm scheduler

# which GH account has the permission to trigger the deployment (by setting
# the label 'bot:deploy' (apparently this cannot be restricted on GitHub)
# if value is left/empty everyone can trigger the deployment
# value can be a space delimited list of GH accounts
deploy_permission = boegel trz42 bedroge

[running_job_comments]
running_job = job `{job_id}` is running


[finished_job_comments]
success = :grin: SUCCESS tarball `{tarball_name}` ({tarball_size} GiB) in job dir
failure = :cry: FAILURE
no_slurm_out = No slurm output `{slurm_out}` in job dir
slurm_out = Found slurm output `{slurm_out}` in job dir
missing_modules = Slurm output lacks message "No missing modules!".
no_tarball_message = Slurm output lacks message about created tarball.
no_matching_tarball = No tarball matching `{tarball_pattern}` found in job dir.
multiple_tarballs = Found {num_tarballs} tarballs in job dir - only 1 matching `{tarball_pattern}` expected.
job_result_comment_fmt = <details><summary>{summary} _(click triangle for detailed information)_</summary>Details:{details}<br/>Artefacts:{artefacts}</details>
job_result_details_item_fmt = <br/>&nbsp;&nbsp;&nbsp;&nbsp;{item}
job_result_artefacts_item_fmt = <li><code>{item}</code></li>