diff --git a/qhub/initialize.py b/qhub/initialize.py index 0f4e38668..48812523d 100644 --- a/qhub/initialize.py +++ b/qhub/initialize.py @@ -58,6 +58,9 @@ "monitoring": { "enabled": True, }, + "kbatch": { + "enabled": True, + }, "cdsdashboards": { "enabled": True, "cds_hide_user_named_servers": True, diff --git a/qhub/schema.py b/qhub/schema.py index 498e5eddf..b36707d2f 100644 --- a/qhub/schema.py +++ b/qhub/schema.py @@ -74,6 +74,13 @@ class HelmExtension(Base): overrides: typing.Optional[typing.Dict] +# ============== kbatch ============= + + +class KBatch(Base): + enabled: bool + + # ============== Monitoring ============= @@ -452,6 +459,7 @@ class Main(Base): theme: Theme profiles: Profiles environments: typing.Dict[str, CondaEnvironment] + kbatch: typing.Optional[KBatch] monitoring: typing.Optional[Monitoring] clearml: typing.Optional[ClearML] tf_extensions: typing.Optional[typing.List[QHubExtension]] diff --git a/qhub/stages/input_vars.py b/qhub/stages/input_vars.py index 68e718caa..9a20b87b1 100644 --- a/qhub/stages/input_vars.py +++ b/qhub/stages/input_vars.py @@ -251,6 +251,7 @@ def stage_07_kubernetes_services(stage_outputs, config): "dask-gateway-profiles": config["profiles"]["dask_worker"], # monitoring "monitoring-enabled": config["monitoring"]["enabled"], + "kbatch-enabled": config["kbatch"]["enabled"], # prefect "prefect-enabled": config.get("prefect", {}).get("enabled", False), "prefect-token": config.get("prefect", {}).get("token", ""), diff --git a/qhub/template/stages/07-kubernetes-services/jupyterhub.tf b/qhub/template/stages/07-kubernetes-services/jupyterhub.tf index 4defc42db..114695da9 100644 --- a/qhub/template/stages/07-kubernetes-services/jupyterhub.tf +++ b/qhub/template/stages/07-kubernetes-services/jupyterhub.tf @@ -112,7 +112,10 @@ module "jupyterhub" { services = concat([ "dask-gateway" - ], (var.prefect-enabled ? ["prefect"] : [])) + ], + (var.prefect-enabled ? ["prefect"] : []), + (var.kbatch-enabled ? ["kbatch"] : []) + ) general-node-group = var.node_groups.general user-node-group = var.node_groups.user diff --git a/qhub/template/stages/07-kubernetes-services/kbatch.tf b/qhub/template/stages/07-kubernetes-services/kbatch.tf new file mode 100644 index 000000000..997af49a2 --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/kbatch.tf @@ -0,0 +1,23 @@ +# ======================= VARIABLES ====================== +variable "kbatch-enabled" { + description = "kbatch enabled or disabled" + type = bool +} + + +# ====================== RESOURCES ======================= +module "kbatch" { + count = var.kbatch-enabled ? 1 : 0 + + source = "./modules/kubernetes/services/kbatch" + + namespace = var.environment + external-url = var.endpoint + + jupyterhub_api_token = module.jupyterhub.services.kbatch.api_token + node-group = var.node_groups.user + + dask-gateway-address = module.dask-gateway.config.gateway.address + dask-gateway-proxy-address = module.dask-gateway.config.gateway.proxy_address + dask-worker-image = var.dask-worker-image +} diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/.ipynb_checkpoints/kbatch_dask_gateway_job-checkpoint.yaml b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/.ipynb_checkpoints/kbatch_dask_gateway_job-checkpoint.yaml new file mode 100644 index 000000000..086468a69 --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/.ipynb_checkpoints/kbatch_dask_gateway_job-checkpoint.yaml @@ -0,0 +1,6 @@ +name: dask-gateway-job +image: mcr.microsoft.com/planetary-computer/python:2021.08.02.0 +args: + - python + - kbatch_dask_gateway_test.py +code: kbatch_dask_gateway_test.py diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/.ipynb_checkpoints/kbatch_nb_job-checkpoint.yaml b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/.ipynb_checkpoints/kbatch_nb_job-checkpoint.yaml new file mode 100644 index 000000000..2b6db0b59 --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/.ipynb_checkpoints/kbatch_nb_job-checkpoint.yaml @@ -0,0 +1,6 @@ +name: nb-job +image: mcr.microsoft.com/planetary-computer/python:latest +args: + - papermill + - kbatch_nb_test.ipynb +code: kbatch_nb_test.ipynb diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_dask_gateway.py b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_dask_gateway.py new file mode 100644 index 000000000..d0854e93e --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_dask_gateway.py @@ -0,0 +1,37 @@ +""" +Start a cluster with Dask Gateway, print the dashboard link, and run some tasks. +""" +import dask +import dask_gateway +import distributed +from distributed import wait + + +def inc(x): + return x + 1 + + +def main(): + print(f"dask version = {dask.__version__}") + print(f"dask_gateway version = {dask_gateway.__version__}") + print(f"distributed version = {distributed.__version__}") + + gateway = dask_gateway.Gateway() + options = gateway.cluster_options(use_local_defaults=False) + + print("Starting cluster") + cluster = gateway.new_cluster(options) + client = cluster.get_client() + print("Dashboard at:", client.dashboard_link) + + cluster.scale(2) + + futures = client.map(inc, list(range(100))) + _ = wait(futures) + + print("Closing cluster") + cluster.close() + + +if __name__ == "__main__": + main() diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_dask_gateway_job.yaml b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_dask_gateway_job.yaml new file mode 100644 index 000000000..729cc5ab9 --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_dask_gateway_job.yaml @@ -0,0 +1,6 @@ +name: dask-gateway-job +image: mcr.microsoft.com/planetary-computer/python:2021.08.02.0 +args: + - python + - kbatch_dask_gateway.py +code: kbatch_dask_gateway.py diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_nb.ipynb b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_nb.ipynb new file mode 100644 index 000000000..c31052fec --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_nb.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cba309a0-d8ff-4c18-8d94-c380e875ac46", + "metadata": { + "tags": [] + }, + "source": [ + "# Canonical Tests\n", + "\n", + "The goal of the work we performed on behalf of OGC/USGS was to enable users on QHub (and perhaps plain JupyterHub) to:\n", + "- [ ] run long-running notebooks or scripts\n", + "- [ ] run notebooks and scripts as cronjobs\n", + "\n", + "And as a stretch goal:\n", + "- [ ] run complex workflows that require multiple steps / noteboks or scripts\n", + "\n", + "Additional requirements:\n", + "- the notebook or script should work even after the user's JupyterLab session ends\n", + "- the notebook or script can connect to the Dask-Gateway and launch a Dask cluster\n", + "\n", + "This notebooks will serve as a \"unit test\" for the above features, for more details [see this issue](https://github.com/Quansight/ogc-management/issues/6).\n", + "\n", + "## `kbatch`\n", + "\n", + "The first two features outline above will be handled by [`kbatch`](https://github.com/kbatch-dev/kbatch). `kbatch` consists of two major components, the frontend `kbatch` and backend `kbatch-proxy`. The user submits job requests to `kbatch-proxy` and `kbatch-proxy` submits those job requests to the Kubernetes API. \n", + "\n", + "> NOTE:\n", + "> At the present, no additional features have been added to `kbatch`, we have simply integrated `kbatch-proxy` into QHub. A feature enhancement PR will need to be opened on the `kbatch` repo in order to enable cronjobs.\n", + "\n", + "### Setup\n", + "\n", + "In order to use `kbatch` in it's current form, some basic setup is required of the user. Going forward, we will assume that `kbatch-proxy` has been correctly integrated into QHub.\n", + "\n", + "1. Create or modify a conda environment by adding `kbatch`. And activate this conda environment.\n", + "\n", + "```\n", + "pip install kbatch\n", + "```\n", + "\n", + "2. Create a JupyterHub API token and configure `kbatch` to talk to the `kbatch-proxy` service.\n", + "\n", + "```\n", + "kbatch configure --kbatch-url http://kbatch-kbatch-proxy.dev.svc.cluster.local --token \n", + "```\n", + "\n", + "3. Submit a job to `kbatch`\n", + "\n", + "```\n", + "kbatch job submit --name=list-files --image=alpine --command='[\"ls\", \"-lh\"]'\n", + "```\n", + "\n", + "### Run this notebook\n", + "\n", + "To run this notebook as a job, you will need an image with `papermill` (or a similar CLI tool). \n", + "\n", + "Create a configuration file, `kbatch_nb_job.yaml` like the one below:\n", + "```yaml\n", + "# filename: kbatch_nb_job.yaml\n", + "name: nb-job\n", + "image: mcr.microsoft.com/planetary-computer/python:latest\n", + "args:\n", + " - papermill\n", + " - kbatch_nb.ipynb\n", + "code: kbatch_nb.ipynb\n", + "```\n", + "\n", + "Then run:\n", + "```\n", + "kbatch job submit -f kbatch_nb_job.yaml\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ff3e90f6-f0b6-4300-81c2-b7ade75d57b4", + "metadata": {}, + "outputs": [], + "source": [ + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "277ff7d7-d938-4790-af92-5719534b08d5", + "metadata": {}, + "outputs": [], + "source": [ + "with open('kbatch_nb_output.txt', 'w') as f:\n", + " for i in range(0,10):\n", + " current_time = time.strftime(\"%Y-%m-%d-%H:%M:%S\", time.localtime())\n", + " time.sleep(1)\n", + " f.write(f'{current_time}: {i}\\n')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_nb_job.yaml b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_nb_job.yaml new file mode 100644 index 000000000..205dd796b --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/jupyterhub/files/examples/kbatch_nb_job.yaml @@ -0,0 +1,6 @@ +name: nb-job +image: mcr.microsoft.com/planetary-computer/python:latest +args: + - papermill + - kbatch_nb.ipynb +code: kbatch_nb.ipynb diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/main.tf b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/main.tf new file mode 100644 index 000000000..1f13aa9f5 --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/main.tf @@ -0,0 +1,81 @@ +# Get this name dynamtically +locals { + kbatch_service_account_name = "kbatch-kbatch-proxy" +} + +resource "helm_release" "kbatch" { + name = "kbatch" + namespace = var.namespace + repository = "https://kbatch-dev.github.io/helm-chart" + chart = "kbatch-proxy" + version = "0.3.1" + + values = concat([ + file("${path.module}/values.yaml"), + jsonencode({ + app = { + jupyterhub_api_token = var.jupyterhub_api_token + jupyterhub_api_url = "https://${var.external-url}/hub/api/" + extra_env = { + KBATCH_PREFIX = "" + KBATCH_JOB_EXTRA_ENV = jsonencode({ + DASK_GATEWAY__AUTH__TYPE = "jupyterhub" + DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE = "${var.dask-worker-image.name}:${var.dask-worker-image.tag}" + DASK_GATEWAY__ADDRESS = "${var.dask-gateway-address}" + DASK_GATEWAY__PROXY_ADDRESS = "${var.dask-gateway-proxy-address}" + }) + } + } + image = { + tag = "0.3.1" + } + }) + ]) + + set_sensitive { + name = "jupyterHubToken" + value = var.jupyterhub_api_token + } + + set { + name = "kbatchImage" + value = var.image + } + + set { + name = "namespace" + value = var.namespace + } + +} + +resource "kubernetes_cluster_role" "kbatch" { + metadata { + name = "${var.name}-kbatch" + } + + rule { + api_groups = ["", "batch"] + resources = ["*"] + verbs = ["get", "watch", "list", "patch", "create"] + } +} + + +resource "kubernetes_cluster_role_binding" "kbatch" { + metadata { + name = "${var.name}-kbatch" + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = kubernetes_cluster_role.kbatch.metadata.0.name + } + subject { + kind = "ServiceAccount" + name = local.kbatch_service_account_name + namespace = var.namespace + api_group = "" + } +} diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/values.yaml b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/values.yaml new file mode 100644 index 000000000..b7c2489d1 --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/values.yaml @@ -0,0 +1 @@ +# https://github.com/kbatch-dev/helm-chart/blob/main/kbatch/values.yaml diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/variables.tf b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/variables.tf new file mode 100644 index 000000000..18d0ee598 --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/variables.tf @@ -0,0 +1,57 @@ +variable "name" { + description = "name prefix to assign to kbatch" + type = string + default = "qhub" +} + +variable "jupyterhub_api_token" { + type = string + default = "" +} + +variable "namespace" { + type = string + default = "dev" +} + +variable "image" { + type = string + default = "" +} + +variable "node-group" { + description = "Node key value pair for bound resources" + type = object({ + key = string + value = string + }) +} + +variable "external-url" { + description = "External url that jupyterhub cluster is accessible" + type = string +} + +variable "overrides" { + description = "kbatch helm chart list of overrides" + type = list(string) + default = [] +} + +variable "dask-gateway-address" { + description = "Dask Gateway address" + type = string +} + +variable "dask-gateway-proxy-address" { + description = "Dask Gateway proxy-address" + type = string +} + +variable "dask-worker-image" { + description = "Dask worker image" + type = object({ + name = string + tag = string + }) +} diff --git a/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/versions.tf b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/versions.tf new file mode 100644 index 000000000..5b85894ee --- /dev/null +++ b/qhub/template/stages/07-kubernetes-services/modules/kubernetes/services/kbatch/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_providers { + helm = { + source = "hashicorp/helm" + version = "2.1.2" + } + kubernetes = { + source = "hashicorp/kubernetes" + version = "2.7.1" + } + } + required_version = ">= 1.0" +}