Skip to content

Commit

Permalink
Merge pull request #2128 from arnaldo2792/nvidia-ecs-variant
Browse files Browse the repository at this point in the history
Add aws-ecs-1-nvidia variant
  • Loading branch information
arnaldo2792 authored May 5, 2022
2 parents 0443c97 + 1e407b0 commit 6a82bc5
Show file tree
Hide file tree
Showing 23 changed files with 196 additions and 34 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,14 @@ jobs:
arch: aarch64
supported: true
fetch-upstream: "true"
- variant: aws-ecs-1-nvidia
arch: x86_64
supported: true
fetch-upstream: "true"
- variant: aws-ecs-1-nvidia
arch: aarch64
supported: true
fetch-upstream: "true"
fail-fast: false
steps:
- uses: actions/checkout@v3
Expand Down
22 changes: 22 additions & 0 deletions QUICKSTART-ECS.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,25 @@ aws ec2 run-instances --key-name YOUR_KEY_NAME \
And remember, if you used a public subnet, add `--associate-public-ip-address` or attach an Elastic IP after launch.

Once it launches, you should be able to run tasks on your Bottlerocket instance using the ECS API and console.


### aws-ecs-*-nvidia variants

The `aws-ecs-*-nvidia` variants include the required packages and configurations to leverage NVIDIA GPUs.
They come with the [NVIDIA Tesla driver](https://docs.nvidia.com/datacenter/tesla/drivers/index.html) along with the libraries required by the [CUDA toolkit](https://developer.nvidia.com/cuda-toolkit) included in your ECS tasks.
In hosts with multiple GPUs (ex. EC2 `g4dn` instances) you can assign one or multiple GPUs per container by specifying the resource requirements in your container definitions as described in the [official ECS documentation](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-gpu.html):

```json
{
"containerDefinitions": [
{
"resourceRequirements" : [
{
"type" : "GPU",
"value" : "2"
}
]
}
]
}
```
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ There are a few important caveats about the provided kdump support:
### NVIDIA GPUs Support
Bottlerocket's `nvidia` variants include the required packages and configurations to leverage NVIDIA GPUs.
The official AMIs for these variants can be used with EC2 GPU-equipped instance types such as: `p2`, `p3`, `p4`, `g4dn`, `g5` and `g5g`.
Please see [QUICKSTART-EKS](QUICKSTART-EKS.md#aws-k8s--nvidia-variants) for further details about Kubernetes variants.
Please see [QUICKSTART-EKS](QUICKSTART-EKS.md#aws-k8s--nvidia-variants) for further details about Kubernetes variants, and [QUICKSTART-ECS](QUICKSTART-ECS.md#aws-ecs--nvidia-variants) for ECS variants.

## Details

Expand Down
11 changes: 6 additions & 5 deletions packages/os/os.spec
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
%global _cross_first_party 1
%global _is_k8s_variant %(if echo %{_cross_variant} | grep -Fqw "k8s"; then echo 1; else echo 0; fi)
%global _is_ecs_variant %(if echo %{_cross_variant} | grep -Fqw "ecs"; then echo 1; else echo 0; fi)
%global _is_aws_variant %(if echo %{_cross_variant} | grep -Fqw "aws"; then echo 1; else echo 0; fi)
%global _is_vendor_variant %(if echo %{_cross_variant} | grep -Fqw "nvidia"; then echo 1; else echo 0; fi)
%undefine _debugsource_packages
Expand Down Expand Up @@ -94,7 +95,7 @@ Requires: %{_cross_os}shibaken
Requires: %{_cross_os}cfsignal
%endif

%if "%{_cross_variant}" == "aws-ecs-1"
%if %{_is_ecs_variant}
Requires: %{_cross_os}ecs-settings-applier
%endif

Expand Down Expand Up @@ -227,7 +228,7 @@ Summary: Bottlerocket certificates handler
%description -n %{_cross_os}certdog
%{summary}.

%if "%{_cross_variant}" == "aws-ecs-1"
%if %{_is_ecs_variant}
%package -n %{_cross_os}ecs-settings-applier
Summary: Settings generator for ECS
%description -n %{_cross_os}ecs-settings-applier
Expand Down Expand Up @@ -340,7 +341,7 @@ echo "** Output from non-static builds:"
-p prairiedog \
-p certdog \
-p shimpei \
%if "%{_cross_variant}" == "aws-ecs-1"
%if %{_is_ecs_variant}
-p ecs-settings-applier \
%endif
%if %{_is_aws_variant}
Expand Down Expand Up @@ -377,7 +378,7 @@ for p in \
signpost updog metricdog logdog \
ghostdog bootstrap-containers \
shimpei \
%if "%{_cross_variant}" == "aws-ecs-1"
%if %{_is_ecs_variant}
ecs-settings-applier \
%endif
%if %{_is_aws_variant}
Expand Down Expand Up @@ -562,7 +563,7 @@ install -p -m 0644 %{S:300} %{buildroot}%{_cross_udevrulesdir}/80-ephemeral-stor
%files -n %{_cross_os}logdog
%{_cross_bindir}/logdog

%if "%{_cross_variant}" == "aws-ecs-1"
%if %{_is_ecs_variant}
%files -n %{_cross_os}ecs-settings-applier
%{_cross_bindir}/ecs-settings-applier
%endif
Expand Down
1 change: 1 addition & 0 deletions sources/logdog/conf/logdog.aws-ecs-1-nvidia.conf
28 changes: 28 additions & 0 deletions sources/models/shared-defaults/ecs.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# ECS
[services.ecs]
restart-commands = ["/usr/bin/ecs-settings-applier", "/bin/systemctl try-reload-or-restart ecs.service"]
configuration-files = ["ecs-config"]

[configuration-files.ecs-config]
path = "/etc/ecs/ecs.config"
template-path = "/usr/share/templates/ecs.config"

[metadata.settings.ecs]
affected-services = ["ecs"]

[settings.ecs]
allow-privileged-containers = false
logging-drivers = ["json-file", "awslogs", "none"]
loglevel = "info"

# Metrics
[settings.metrics]
service-checks = ["apiserver", "chronyd", "containerd", "host-containerd", "docker", "ecs"]

# Network
[metadata.settings.network]
affected-services = ["containerd", "docker", "ecs", "host-containerd", "host-containers"]

# Image registry credentials
[metadata.settings.container-registry.credentials]
affected-services = ["ecs", "host-containers", "bootstrap-containers"]
30 changes: 30 additions & 0 deletions sources/models/src/aws-ecs-1-nvidia/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
use model_derive::model;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

use crate::modeled_types::Identifier;
use crate::{
AwsSettings, BootstrapContainer, CloudFormationSettings, ECSSettings, HostContainer,
KernelSettings, MetricsSettings, NetworkSettings, NtpSettings, OciHooks, PemCertificate,
RegistrySettings, UpdatesSettings,
};

// Note: we have to use 'rename' here because the top-level Settings structure is the only one
// that uses its name in serialization; internal structures use the field name that points to it
#[model(rename = "settings", impl_default = true)]
struct Settings {
motd: String,
updates: UpdatesSettings,
host_containers: HashMap<Identifier, HostContainer>,
bootstrap_containers: HashMap<Identifier, BootstrapContainer>,
ntp: NtpSettings,
network: NetworkSettings,
kernel: KernelSettings,
aws: AwsSettings,
ecs: ECSSettings,
metrics: MetricsSettings,
pki: HashMap<Identifier, PemCertificate>,
container_registry: RegistrySettings,
oci_hooks: OciHooks,
cloudformation: CloudFormationSettings,
}
28 changes: 0 additions & 28 deletions sources/models/src/aws-ecs-1/defaults.d/52-aws-ecs-1.toml

This file was deleted.

1 change: 1 addition & 0 deletions sources/models/src/aws-ecs-1/defaults.d/52-aws-ecs-1.toml
23 changes: 23 additions & 0 deletions variants/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions variants/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
members = [
"aws-dev",
"aws-ecs-1",
"aws-ecs-1-nvidia",
"aws-k8s-1.19",
"aws-k8s-1.20",
"aws-k8s-1.21",
Expand Down
6 changes: 6 additions & 0 deletions variants/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ This variant is compatible with Kubernetes 1.22, 1.23, and 1.24 clusters.
The [aws-ecs-1](aws-ecs-1/Cargo.toml) variant includes the packages needed to run an [Amazon ECS](https://ecs.aws)
container instance in AWS.

### aws-ecs-1-nvidia: Amazon ECS container instance

The [aws-ecs-1-nvidia](aws-ecs-1-nvidia/Cargo.toml) variant includes the packages needed to run an [Amazon ECS](https://ecs.aws)
container instance in AWS.
It also includes the required packages to configure containers to leverage NVIDIA GPUs.

### aws-dev: AWS development build

The [aws-dev](aws-dev/Cargo.toml) variant has useful packages for local development of the OS.
Expand Down
50 changes: 50 additions & 0 deletions variants/aws-ecs-1-nvidia/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
[package]
name = "aws-ecs-1-nvidia"
version = "0.1.0"
edition = "2018"
publish = false
build = "build.rs"

[package.metadata.build-variant.image-layout]
os-image-size-gib = 4

[package.metadata.build-variant]
kernel-parameters = [
"console=tty0",
"console=ttyS0,115200n8",
]
included-packages = [
# core
"release",
"kernel-5.10",
# docker
"docker-cli",
"docker-engine",
"docker-init",
"docker-proxy",
# ecs
"ecs-agent",
# NVIDIA support
"ecs-gpu-init",
"nvidia-container-toolkit",
"kmod-5.10-nvidia-tesla-470"
]

[lib]
path = "lib.rs"

[build-dependencies]
# core
release = { path = "../../packages/release" }
kernel-5_10 = { path = "../../packages/kernel-5.10" }
# docker
docker-cli = { path = "../../packages/docker-cli" }
docker-engine = { path = "../../packages/docker-engine" }
docker-init = { path = "../../packages/docker-init" }
docker-proxy = { path = "../../packages/docker-proxy" }
# ecs
ecs-agent = { path = "../../packages/ecs-agent" }
# NVIDIA
ecs-gpu-init = { path = "../../packages/ecs-gpu-init" }
nvidia-container-toolkit = { path = "../../packages/nvidia-container-toolkit" }
kmod-5_10-nvidia = { path = "../../packages/kmod-5.10-nvidia" }
9 changes: 9 additions & 0 deletions variants/aws-ecs-1-nvidia/build.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use std::process::{exit, Command};

fn main() -> Result<(), std::io::Error> {
let ret = Command::new("buildsys").arg("build-variant").status()?;
if !ret.success() {
exit(1);
}
Ok(())
}
1 change: 1 addition & 0 deletions variants/aws-ecs-1-nvidia/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
// not used

0 comments on commit 6a82bc5

Please sign in to comment.