Skip to content

Commit

Permalink
🚧
Browse files Browse the repository at this point in the history
  • Loading branch information
cboettig committed Feb 6, 2025
1 parent 7ee2006 commit 20407da
Show file tree
Hide file tree
Showing 60 changed files with 360 additions and 2,213 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/docker-gpu-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: GPU Image
on:
workflow_dispatch: null
push:
paths:
- Dockerfile.gpu
- rl-env.yml
- spatial-env.yml
- jupyter-ai.yml
- install.r
jobs:
build:
runs-on: ubuntu-latest
permissions: write-all
steps:
# For biggish images, github actions runs out of disk space.
# So we cleanup some unwanted things in the disk image, and reclaim that space for our docker use
# https://github.com/actions/virtual-environments/issues/2606#issuecomment-772683150
# and https://github.com/easimon/maximize-build-space/blob/b4d02c14493a9653fe7af06cc89ca5298071c66e/action.yml#L104
# This gives us a total of about 52G of free space, which should be enough for now
- name: cleanup disk space
run: |
sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc
df -h
- uses: actions/checkout@v3
- name: Login to GitHub Container Registry
if: github.repository == 'rocker-org/ml'
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- name: Build the Docker image
if: github.repository == 'rocker-org/ml'
run: docker build images/ --no-cache -f images/Dockerfile.cuda --tag ghcr.io/rocker/cuda:latest
- name: Publish
if: github.repository == 'rocker-org/ml'
run: docker push ghcr.io/rocker/cuda:latest

34 changes: 34 additions & 0 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Docker Image CI
on:
workflow_dispatch: null
push:
paths: ['images/Dockerfile', 'images/install.r', 'images/rl-env.yml', 'images/spatial-env.yml', 'images/jupyter-ai.yml']
jobs:
build:
runs-on: ubuntu-latest
permissions: write-all
steps:
# For biggish images, github actions runs out of disk space.
# So we cleanup some unwanted things in the disk image, and reclaim that space for our docker use
# https://github.com/actions/virtual-environments/issues/2606#issuecomment-772683150
# and https://github.com/easimon/maximize-build-space/blob/b4d02c14493a9653fe7af06cc89ca5298071c66e/action.yml#L104
# This gives us a total of about 52G of free space, which should be enough for now
- name: cleanup disk space
run: |
sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc
df -h
- uses: actions/checkout@v3
- name: Login to GitHub Container Registry
if: github.repository == 'boettiger-lab/k8s'
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- name: Build the Docker image
if: github.repository == 'boettiger-lab/k8s'
run: docker build images/ --tag ghcr.io/boettiger-lab/k8s:latest
- name: Publish
if: github.repository == 'boettiger-lab/k8s'
run: docker push ghcr.io/boettiger-lab/k8s:latest

35 changes: 35 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
ARG BASE=quay.io/jupyter/minimal-notebook:ubuntu-24.04
FROM $BASE

# Make code-server extensions etc persist to container, not hub
ENV XDG_DATA_HOME=/opt/share

USER root
# code-server (VSCode)
RUN curl -fsSL https://code-server.dev/install.sh | sh && rm -rf .cache

# apt utilities, code-server setup
RUN curl -s https://raw.githubusercontent.com/rocker-org/ml/refs/heads/main/install_utilities.sh | bash

RUN curl -s https://raw.githubusercontent.com/rocker-org/ml/refs/heads/main/install_r.sh | bash
RUN curl -s https://raw.githubusercontent.com/rocker-org/ml/refs/heads/main/install_rstudio.sh | bash

## Add rstudio's binaries to path for quarto
ENV PATH=$PATH:/usr/lib/rstudio-server/bin/quarto/bin

## switch non-root users from BSPM to r-universe if no sudo
COPY Rprofile /usr/lib/R/etc/Rprofile.site

# When run as root, install.r automagically handles any necessary apt-gets
COPY install.r install.r
RUN Rscript install.r

USER ${NB_USER}

COPY vscode-extensions.txt vscode-extensions.txt
RUN xargs -n 1 code-server --install-extension < vscode-extensions.txt

COPY environment.yml environment.yml
RUN conda update --all --solver=classic -n base -c conda-forge conda && \
conda env update --file environment.yml

35 changes: 35 additions & 0 deletions Dockerfile.cuda
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
ARG BASE=quay.io/jupyter/pytorch-notebook:cuda12-ubuntu-24.04
FROM $BASE

# Make code-server extensions etc persist to container, not hub
ENV XDG_DATA_HOME=/opt/share

USER root
# code-server (VSCode)
RUN curl -fsSL https://code-server.dev/install.sh | sh && rm -rf .cache

# apt utilities, code-server setup
RUN curl -s https://raw.githubusercontent.com/rocker-org/ml/refs/heads/main/install_utilities.sh | bash

RUN curl -s https://raw.githubusercontent.com/rocker-org/ml/refs/heads/main/install_r.sh | bash
RUN curl -s https://raw.githubusercontent.com/rocker-org/ml/refs/heads/main/install_rstudio.sh | bash

## Add rstudio's binaries to path for quarto
ENV PATH=$PATH:/usr/lib/rstudio-server/bin/quarto/bin

## switch non-root users from BSPM to r-universe if no sudo
COPY Rprofile /usr/lib/R/etc/Rprofile.site

# When run as root, install.r automagically handles any necessary apt-gets
COPY install.r install.r
RUN Rscript install.r

USER ${NB_USER}

COPY vscode-extensions.txt vscode-extensions.txt
RUN xargs -n 1 code-server --install-extension < vscode-extensions.txt

COPY environment.yml environment.yml
RUN conda update --all --solver=classic -n base -c conda-forge conda && \
conda env update --file environment.yml

76 changes: 1 addition & 75 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,77 +1,3 @@
# Rocker stack for Machine Learning in R

This repository contains images for machine learning and GPU-based computation in R. **EDIT** Dockerfiles are now built in modular build system at https://github.com/rocker-org/rocker-versioned2 . This repo remains for documentation around the ML part of the stack.




The dependency stack looks like so:

```
-| rocker/cuda
-| rocker/ml
-| rocker/ml-verse
```

All three are CUDA compatible and will optionally take R version tags (`rocker/ml:4.0.5`) with the option of additional trailing CUDA version tag (e.g. `rocker/ml:4.0.5-cuda10.1`).


## Quick start

**Note: GPU use requires [nvidia-docker](https://github.com/NVIDIA/nvidia-docker/)** runtime to run!

Run a bash shell or R command line:

```
# CPU-only
docker run --rm -ti rocker/ml R
# Machines with nvidia-docker and GPU support
docker run --gpus all --rm -ti rocker/ml R
```

Or run in RStudio instance:

```
docker run --gpus all -e PASSWORD=mu -p 8787:8787 rocker/ml
```


## Tags

See [current `ml` tags](https://hub.docker.com/r/rocker/ml/tags?page=1&ordering=last_updated)
See [current `ml-verse` tags](https://hub.docker.com/r/rocker/ml-verse/tags?page=1&ordering=last_updated)


## Python versions and virtualenvs

The ML images configure a default python virtualenv using the Ubuntu system python (3.8.5 for current Ubuntu 20.04 LTS), see [install_python.sh](https://github.com/rocker-org/rocker-versioned2/blob/master/scripts/install_python.sh). This virtualenv is user-writable and the default detected by `reticulate` (using `WORKON_HOME` and `PYTHON_VENV_PATH` variables).

Images also configure [pipenv](https://github.com/pypa/pipenv) with [pyenv](https://github.com/pyenv/pyenv) by default. This makes it very easy to manage projects that require specific versions of Python as well as specific python modules. For instance, a project using the popular `[greta](https://greta-stats.org/)` package for GPU-accelerated Bayesian inference needs Tensorflow 1.x, which requires Python <= 3.7, might do:

```bash
pipenv --python 3.7
```

In the bash terminal to set up a pipenv-managed virtualenv in the working directory using Python 3.7. Then in R we can activate this virtualenv

```r
venv <- system("pipenv --venv", inter = TRUE)
reticulate::use_virtualenv(venv, required = TRUE)
```

We can now install tensorflow version needed, e.g.

```r
install.packages("tensorflow")
tensorflow::install_tensorflow(version="1.14.0-gpu", extra_packages="tensorflow-probability==0.7.0")
```



## Notes

All images are based on the current Ubuntu LTS (ubuntu 20.04) and based on the official [NVIDIA CUDA docker build recipes](https://gitlab.com/nvidia/container-images/cuda/)

**PLEASE NOTE**: older images, `rocker/ml-gpu`, `rocker/tensorflow` and `rocker/tensorflow-gpu`, built with cuda 9.0, are deprecated and no longer supported.


This repository contains images for machine learning and GPU-based computation in R.
42 changes: 42 additions & 0 deletions Rprofile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Based on Jeroen's config:
# https://github.com/r-universe-org/base-image/blob/f20ec9fc6f51ef8a89aad489206a43790bd9bf77/Rprofile



local({

has_sudo <- function() {
system("sudo -n true", intern = TRUE) == 0
}

# Only use Runiverse if we don't have sudo
if (!has_sudo()) {

rver <- getRversion()
distro <- system2('lsb_release', '-sc', stdout = TRUE)
options(HTTPUserAgent = sprintf("R/%s R (%s)", rver, paste(rver, R.version$platform, R.version$arch, R.version$os)))
options(repos = c(CRAN = sprintf("https://packagemanager.rstudio.com/all/__linux__/%s/latest", distro)))

# Enable BioConductor repos
utils::setRepositories(ind = 1:4, addURLs = c(fallback = "https://cloud.r-project.org"))

# Enable universe repo(s)
my_universe <- Sys.getenv("MY_UNIVERSE", "https://cran.r-universe.dev")
if(nchar(my_universe)){
my_repos <- trimws(strsplit(my_universe, ';')[[1]])
binaries <- sprintf('%s/bin/linux/%s/%s', my_repos[1], distro, substr(rver, 1, 3))
options(repos = c(binaries = binaries, universe = my_repos, getOption("repos")))
}

}
# Other settings
options(crayon.enabled = TRUE)
Sys.unsetenv(c("CI", "GITHUB_ACTIONS"))

# Dummy token for API limits
if(is.na(Sys.getenv("GITHUB_PAT", NA))){
dummy <- c('ghp_SXg', 'LNM', 'Tu4cnal', 'tdqkZtBojc3s563G', 'iqv')
Sys.setenv(GITHUB_PAT = paste(dummy, collapse = 'e'))
}
})

32 changes: 0 additions & 32 deletions deprecated/Makefile

This file was deleted.

Loading

0 comments on commit 20407da

Please sign in to comment.