Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate kbatch #1258

Merged
merged 19 commits into from
May 26, 2022
3 changes: 3 additions & 0 deletions qhub/initialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@
"monitoring": {
"enabled": True,
},
"kbatch": {
"enabled": True,
},
"cdsdashboards": {
"enabled": True,
"cds_hide_user_named_servers": True,
Expand Down
8 changes: 8 additions & 0 deletions qhub/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,13 @@ class HelmExtension(Base):
overrides: typing.Optional[typing.Dict]


# ============== kbatch =============


class KBatch(Base):
enabled: bool


# ============== Monitoring =============


Expand Down Expand Up @@ -452,6 +459,7 @@ class Main(Base):
theme: Theme
profiles: Profiles
environments: typing.Dict[str, CondaEnvironment]
kbatch: typing.Optional[KBatch]
monitoring: typing.Optional[Monitoring]
clearml: typing.Optional[ClearML]
tf_extensions: typing.Optional[typing.List[QHubExtension]]
Expand Down
1 change: 1 addition & 0 deletions qhub/stages/input_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def stage_07_kubernetes_services(stage_outputs, config):
"dask-gateway-profiles": config["profiles"]["dask_worker"],
# monitoring
"monitoring-enabled": config["monitoring"]["enabled"],
"kbatch-enabled": config["kbatch"]["enabled"],
# prefect
"prefect-enabled": config.get("prefect", {}).get("enabled", False),
"prefect-token": config.get("prefect", {}).get("token", ""),
Expand Down
5 changes: 4 additions & 1 deletion qhub/template/stages/07-kubernetes-services/jupyterhub.tf
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,10 @@ module "jupyterhub" {

services = concat([
"dask-gateway"
], (var.prefect-enabled ? ["prefect"] : []))
],
(var.prefect-enabled ? ["prefect"] : []),
(var.kbatch-enabled ? ["kbatch"] : [])
)

general-node-group = var.node_groups.general
user-node-group = var.node_groups.user
Expand Down
23 changes: 23 additions & 0 deletions qhub/template/stages/07-kubernetes-services/kbatch.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# ======================= VARIABLES ======================
variable "kbatch-enabled" {
description = "kbatch enabled or disabled"
type = bool
}


# ====================== RESOURCES =======================
module "kbatch" {
count = var.kbatch-enabled ? 1 : 0

source = "./modules/kubernetes/services/kbatch"

namespace = var.environment
external-url = var.endpoint

jupyterhub_api_token = module.jupyterhub.services.kbatch.api_token
node-group = var.node_groups.user

dask-gateway-address = module.dask-gateway.config.gateway.address
dask-gateway-proxy-address = module.dask-gateway.config.gateway.proxy_address
dask-worker-image = var.dask-worker-image
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: dask-gateway-job
image: mcr.microsoft.com/planetary-computer/python:2021.08.02.0
args:
- python
- kbatch_dask_gateway_test.py
code: kbatch_dask_gateway_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: nb-job
image: mcr.microsoft.com/planetary-computer/python:latest
args:
- papermill
- kbatch_nb_test.ipynb
code: kbatch_nb_test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""
Start a cluster with Dask Gateway, print the dashboard link, and run some tasks.
"""
import dask
import dask_gateway
import distributed
from distributed import wait


def inc(x):
return x + 1


def main():
print(f"dask version = {dask.__version__}")
print(f"dask_gateway version = {dask_gateway.__version__}")
print(f"distributed version = {distributed.__version__}")

gateway = dask_gateway.Gateway()
options = gateway.cluster_options(use_local_defaults=False)

print("Starting cluster")
cluster = gateway.new_cluster(options)
client = cluster.get_client()
print("Dashboard at:", client.dashboard_link)

cluster.scale(2)

futures = client.map(inc, list(range(100)))
_ = wait(futures)

print("Closing cluster")
cluster.close()


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: dask-gateway-job
image: mcr.microsoft.com/planetary-computer/python:2021.08.02.0
args:
- python
- kbatch_dask_gateway.py
code: kbatch_dask_gateway.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "cba309a0-d8ff-4c18-8d94-c380e875ac46",
"metadata": {
"tags": []
},
"source": [
"# Canonical Tests\n",
"\n",
"The goal of the work we performed on behalf of OGC/USGS was to enable users on QHub (and perhaps plain JupyterHub) to:\n",
"- [ ] run long-running notebooks or scripts\n",
"- [ ] run notebooks and scripts as cronjobs\n",
"\n",
"And as a stretch goal:\n",
"- [ ] run complex workflows that require multiple steps / noteboks or scripts\n",
"\n",
"Additional requirements:\n",
"- the notebook or script should work even after the user's JupyterLab session ends\n",
"- the notebook or script can connect to the Dask-Gateway and launch a Dask cluster\n",
"\n",
"This notebooks will serve as a \"unit test\" for the above features, for more details [see this issue](https://github.com/Quansight/ogc-management/issues/6).\n",
"\n",
"## `kbatch`\n",
"\n",
"The first two features outline above will be handled by [`kbatch`](https://github.com/kbatch-dev/kbatch). `kbatch` consists of two major components, the frontend `kbatch` and backend `kbatch-proxy`. The user submits job requests to `kbatch-proxy` and `kbatch-proxy` submits those job requests to the Kubernetes API. \n",
"\n",
"> NOTE:\n",
"> At the present, no additional features have been added to `kbatch`, we have simply integrated `kbatch-proxy` into QHub. A feature enhancement PR will need to be opened on the `kbatch` repo in order to enable cronjobs.\n",
"\n",
"### Setup\n",
"\n",
"In order to use `kbatch` in it's current form, some basic setup is required of the user. Going forward, we will assume that `kbatch-proxy` has been correctly integrated into QHub.\n",
"\n",
"1. Create or modify a conda environment by adding `kbatch`. And activate this conda environment.\n",
"\n",
"```\n",
"pip install kbatch\n",
"```\n",
"\n",
"2. Create a JupyterHub API token and configure `kbatch` to talk to the `kbatch-proxy` service.\n",
"\n",
"```\n",
"kbatch configure --kbatch-url http://kbatch-kbatch-proxy.dev.svc.cluster.local --token <JUPYTERHUB_API_TOKEN>\n",
"```\n",
"\n",
"3. Submit a job to `kbatch`\n",
"\n",
"```\n",
"kbatch job submit --name=list-files --image=alpine --command='[\"ls\", \"-lh\"]'\n",
"```\n",
"\n",
"### Run this notebook\n",
"\n",
"To run this notebook as a job, you will need an image with `papermill` (or a similar CLI tool). \n",
"\n",
"Create a configuration file, `kbatch_nb_job.yaml` like the one below:\n",
"```yaml\n",
"# filename: kbatch_nb_job.yaml\n",
"name: nb-job\n",
"image: mcr.microsoft.com/planetary-computer/python:latest\n",
"args:\n",
" - papermill\n",
" - kbatch_nb.ipynb\n",
"code: kbatch_nb.ipynb\n",
"```\n",
"\n",
"Then run:\n",
"```\n",
"kbatch job submit -f kbatch_nb_job.yaml\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "ff3e90f6-f0b6-4300-81c2-b7ade75d57b4",
"metadata": {},
"outputs": [],
"source": [
"import time"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "277ff7d7-d938-4790-af92-5719534b08d5",
"metadata": {},
"outputs": [],
"source": [
"with open('kbatch_nb_output.txt', 'w') as f:\n",
" for i in range(0,10):\n",
" current_time = time.strftime(\"%Y-%m-%d-%H:%M:%S\", time.localtime())\n",
" time.sleep(1)\n",
" f.write(f'{current_time}: {i}\\n')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: nb-job
image: mcr.microsoft.com/planetary-computer/python:latest
args:
- papermill
- kbatch_nb.ipynb
code: kbatch_nb.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# Get this name dynamtically
locals {
kbatch_service_account_name = "kbatch-kbatch-proxy"
}

resource "helm_release" "kbatch" {
name = "kbatch"
namespace = var.namespace
repository = "https://kbatch-dev.github.io/helm-chart"
chart = "kbatch-proxy"
version = "0.3.1"

values = concat([
file("${path.module}/values.yaml"),
jsonencode({
app = {
jupyterhub_api_token = var.jupyterhub_api_token
jupyterhub_api_url = "https://${var.external-url}/hub/api/"
extra_env = {
KBATCH_PREFIX = ""
KBATCH_JOB_EXTRA_ENV = jsonencode({
DASK_GATEWAY__AUTH__TYPE = "jupyterhub"
DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE = "${var.dask-worker-image.name}:${var.dask-worker-image.tag}"
DASK_GATEWAY__ADDRESS = "${var.dask-gateway-address}"
DASK_GATEWAY__PROXY_ADDRESS = "${var.dask-gateway-proxy-address}"
})
}
}
image = {
tag = "0.3.1"
}
})
])

set_sensitive {
name = "jupyterHubToken"
value = var.jupyterhub_api_token
}

set {
name = "kbatchImage"
value = var.image
}

set {
name = "namespace"
value = var.namespace
}

}

resource "kubernetes_cluster_role" "kbatch" {
metadata {
name = "${var.name}-kbatch"
}

rule {
api_groups = ["", "batch"]
resources = ["*"]
verbs = ["get", "watch", "list", "patch", "create"]
}
}


resource "kubernetes_cluster_role_binding" "kbatch" {
metadata {
name = "${var.name}-kbatch"
}

role_ref {
api_group = "rbac.authorization.k8s.io"
kind = "ClusterRole"
name = kubernetes_cluster_role.kbatch.metadata.0.name
}
subject {
kind = "ServiceAccount"
name = local.kbatch_service_account_name
namespace = var.namespace
api_group = ""
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# https://github.com/kbatch-dev/helm-chart/blob/main/kbatch/values.yaml
Loading