From c02308f2ab8e4a83dbeab56b32cc8a966a6cbd96 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Sat, 26 Sep 2020 18:25:05 +0200 Subject: [PATCH 01/15] Allow Rook CephFS Storage Class and Provisioner --- backend/kale/common/podutils.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/backend/kale/common/podutils.py b/backend/kale/common/podutils.py index ea2b82150..9e51526ce 100644 --- a/backend/kale/common/podutils.py +++ b/backend/kale/common/podutils.py @@ -26,6 +26,9 @@ ROK_CSI_STORAGE_CLASS = "rok" ROK_CSI_STORAGE_PROVISIONER = "rok.arrikto.com" +ROOK_CEPHFS_CSI_STORAGE_CLASS = "rook-cephfs" +ROOK_CEPHFS_CSI_STORAGE_PROVISIONER = "rook-ceph.cephfs.csi.ceph.com" + NAMESPACE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" K8S_SIZE_RE = re.compile(r'^([0-9]+)(E|Ei|P|Pi|T|Ti|G|Gi|M|Mi|K|Ki){0,1}$') @@ -158,19 +161,19 @@ def _list_volumes(client, namespace, pod_name, container_name): # result in an incomplete notebook snapshot. pvc = client.read_namespaced_persistent_volume_claim(pvc.claim_name, namespace) - if pvc.spec.storage_class_name != ROK_CSI_STORAGE_CLASS: - msg = ("Found PVC with storage class '%s'. Only storage class '%s'" - " is supported." - % (pvc.spec.storage_class_name, ROK_CSI_STORAGE_CLASS)) + if pvc.spec.storage_class_name != ROK_CSI_STORAGE_CLASS and pvc.spec.storage_class_name != ROOK_CEPHFS_CSI_STORAGE_CLASS: + msg = ("Found PVC with storage class '%s'. Only storage classes '%s' and '%s'" + " are supported." + % (pvc.spec.storage_class_name, ROK_CSI_STORAGE_CLASS, ROOK_CEPHFS_CSI_STORAGE_CLASS)) raise RuntimeError(msg) ann = pvc.metadata.annotations provisioner = ann.get("volume.beta.kubernetes.io/storage-provisioner", None) - if provisioner != ROK_CSI_STORAGE_PROVISIONER: + if provisioner != ROK_CSI_STORAGE_PROVISIONER and provisioner != ROOK_CEPHFS_CSI_STORAGE_PROVISIONER: msg = ("Found PVC storage provisioner '%s'. Only storage" - " provisioner '%s' is supported." - % (provisioner, ROK_CSI_STORAGE_PROVISIONER)) + " provisioners '%s' and '%s' are supported." + % (provisioner, ROK_CSI_STORAGE_PROVISIONER, ROOK_CEPHFS_CSI_STORAGE_PROVISIONER)) raise RuntimeError(msg) mount_path = _get_mount_path(container, volume) From 76c439eb916d6bfe936c24d9764a1236875b9f51 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Sat, 26 Sep 2020 19:41:20 +0200 Subject: [PATCH 02/15] Initial CephFS functions Functions are not yet optimized, but serve as working examples. --- backend/kale/rpc/cephfs.py | 151 +++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 backend/kale/rpc/cephfs.py diff --git a/backend/kale/rpc/cephfs.py b/backend/kale/rpc/cephfs.py new file mode 100644 index 000000000..a7c0541dc --- /dev/null +++ b/backend/kale/rpc/cephfs.py @@ -0,0 +1,151 @@ +# Copyright 2019-2020 The Kale Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# To allow a notebook to create Rook CephFS snapshot a ClusterRole +# and RoleBinding for the default-editor service account in the +# given namespace must be applied. Below are examples for use with the +# namespace "admin". +# +# !!!WARNING!!! +# Might not be secure, only use for testing +# +# apiVersion: rbac.authorization.k8s.io/v1 +# kind: ClusterRole +# metadata: +# name: snapshot-access +# rules: +# - apiGroups: ["snapshot.storage.k8s.io"] +# resources: ["volumesnapshots"] +# verbs: ["create", "get", "list", "watch", "patch", "delete"] +# - apiGroups: ["snapshot.storage.k8s.io"] +# resources: ["volumesnapshotcontents"] +# verbs: ["create", "get", "list", "watch", "update", "delete"] +# - apiGroups: ["snapshot.storage.k8s.io"] +# resources: ["volumesnapshotclasses"] +# verbs: ["get", "list", "watch"] +# - apiGroups: ["snapshot.storage.k8s.io"] +# resources: ["volumesnapshotcontents/status"] +# verbs: ["update"] +# - apiGroups: ["snapshot.storage.k8s.io"] +# resources: ["volumesnapshots/status"] +# verbs: ["update"] +# +# apiVersion: rbac.authorization.k8s.io/v1 +# kind: RoleBinding +# metadata: +# name: allow-snapshot-nb-admin +# namespace: admin +# subjects: +# - kind: ServiceAccount +# name: default-editor +# namespace: admin +# roleRef: +# kind: ClusterRole +# name: snapshot-access +# apiGroup: rbac.authorization.k8s.io + +import os +import copy +import logging + +from kale.common import podutils +from kale.rpc.errors import (RPCNotFoundError, RPCServiceUnavailableError) +from kale.rpc.log import create_adapter + + +def snapshot_notebook(name, source): + """Perform a snapshot over the notebook's pod.""" + snapshot_resource = { + "apiVersion": "snapshot.storage.k8s.io/v1beta1", + "kind": "VolumeSnapshot", + "metadata": {"name": name}, + "spec": { + "volumeSnapshotClassName": "csi-cephfsplugin-snapclass", + "source": {"persistentVolumeClaimName": source} + } + } + co_client = podutils._get_k8s_custom_objects_client() + namespace = podutils.get_namespace() + + co_client.create_namespaced_custom_object( + group="snapshot.storage.k8s.io", + version="v1beta1", + namespace=namespace, + plural="volumesnapshots", + body=snapshot_resource, + ) + return + + +def get_notebook_snapshot(name): + """Get a notebook snapshot.""" + co_client = podutils._get_k8s_custom_objects_client() + namespace = podutils.get_namespace() + + notebook_snapshot = co_client.get_namespaced_custom_object( + group="snapshot.storage.k8s.io", + version="v1beta1", + namespace=namespace, + plural="volumesnapshots", + name=name, + ) + return notebook_snapshot + + +def delete_notebook_snapshot(name): + """Delete a notebook snapshot.""" + co_client = podutils._get_k8s_custom_objects_client() + namespace = podutils.get_namespace() + + co_client.delete_namespaced_custom_object( + group="snapshot.storage.k8s.io", + version="v1beta1", + namespace=namespace, + plural="volumesnapshots", + name=name, + ) + return + + +def pvc_from_snapshot(name, source): + """Create a PVC from a notebook snapshot.""" + pvc_resource = { + "apiVersion": "v1", + "kind": "PersistentVolumeClaim", + "metadata": {"name": name}, + "spec": { + "dataSource": {"name": source, "kind": "VolumeSnapshot", "apiGroup": "snapshot.storage.k8s.io"}, + "accessModes": ["ReadWriteMany"], + "resources": { + "requests": {"storage": "10Gi"} + } + } + } + client = podutils._get_k8s_v1_client() + namespace = podutils.get_namespace() + + client.create_namespaced_persistent_volume_claim( + namespace=namespace, + body=pvc_resource, + ) + return + + +def delete_pvc(name): + client = podutils._get_k8s_v1_client() + namespace = podutils.get_namespace() + client.delete_namespaced_persistent_volume_claim( + namespace=namespace, + name=name,) + return \ No newline at end of file From 253ef328b628797ae7ae411b0751e10a99c41e3e Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Wed, 14 Oct 2020 17:49:16 +0200 Subject: [PATCH 03/15] Move to kale/common, add log add snapshot status checking and rewrite pvc from snapshot Creating a PVC from a snapshot is now following the same coding structure as `rokutils.py` and checks if a snapshot is ready before trying to create a PVC from it. --- .../{rpc/cephfs.py => common/cephfsutils.py} | 141 ++++++++++++------ 1 file changed, 95 insertions(+), 46 deletions(-) rename backend/kale/{rpc/cephfs.py => common/cephfsutils.py} (50%) diff --git a/backend/kale/rpc/cephfs.py b/backend/kale/common/cephfsutils.py similarity index 50% rename from backend/kale/rpc/cephfs.py rename to backend/kale/common/cephfsutils.py index a7c0541dc..43eb85319 100644 --- a/backend/kale/rpc/cephfs.py +++ b/backend/kale/common/cephfsutils.py @@ -57,89 +57,123 @@ import os import copy +import json +import time +import math import logging +import kubernetes from kale.common import podutils from kale.rpc.errors import (RPCNotFoundError, RPCServiceUnavailableError) from kale.rpc.log import create_adapter +NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE = """\ +This is a snapshot of notebook {} in namespace {}. +This snapshot was created by Kale in order to clone the volumes of the notebook +and use them to spawn a Kubeflow pipeline.\ +""" -def snapshot_notebook(name, source): - """Perform a snapshot over the notebook's pod.""" +log = logging.getLogger(__name__) + + +def snapshot_pvc(snapshot_name, pvc_name): + """Perform a snapshot over a PVC.""" snapshot_resource = { "apiVersion": "snapshot.storage.k8s.io/v1beta1", "kind": "VolumeSnapshot", - "metadata": {"name": name}, + "metadata": {"name": snapshot_name}, "spec": { "volumeSnapshotClassName": "csi-cephfsplugin-snapclass", - "source": {"persistentVolumeClaimName": source} + "source": {"persistentVolumeClaimName": pvc_name} } } co_client = podutils._get_k8s_custom_objects_client() namespace = podutils.get_namespace() - - co_client.create_namespaced_custom_object( + log.info("Taking a snapshot of PVC %s in namespace %s ..." + % (pvc_name, namespace)) + task_info = co_client.create_namespaced_custom_object( group="snapshot.storage.k8s.io", version="v1beta1", namespace=namespace, plural="volumesnapshots", body=snapshot_resource, ) - return + return task_info -def get_notebook_snapshot(name): - """Get a notebook snapshot.""" - co_client = podutils._get_k8s_custom_objects_client() - namespace = podutils.get_namespace() - - notebook_snapshot = co_client.get_namespaced_custom_object( - group="snapshot.storage.k8s.io", - version="v1beta1", - namespace=namespace, - plural="volumesnapshots", - name=name, - ) - return notebook_snapshot + +def check_snapshot_status(snapshot_name): + """Check if volume snapshot is ready to use.""" + log.info("Checking snapshot with snapshot name: %s", snapshot_name) + task = None + status = None + task = get_pvc_snapshot(snapshot_name=snapshot_name) + status = task['status']['readyToUse'] + + if status == True: + log.info("Successfully created volume snapshot") + elif status == False: + raise RuntimeError("Snapshot not ready (status: %s)" % status) + else: + raise RuntimeError("Unknown snapshot task status: %s" % status) + return status -def delete_notebook_snapshot(name): - """Delete a notebook snapshot.""" +def get_pvc_snapshot(snapshot_name): + """Get info about a pvc snapshot.""" co_client = podutils._get_k8s_custom_objects_client() namespace = podutils.get_namespace() - co_client.delete_namespaced_custom_object( + pvc_snapshot = co_client.get_namespaced_custom_object( group="snapshot.storage.k8s.io", version="v1beta1", namespace=namespace, plural="volumesnapshots", - name=name, + name=snapshot_name, ) - return + return pvc_snapshot -def pvc_from_snapshot(name, source): - """Create a PVC from a notebook snapshot.""" - pvc_resource = { - "apiVersion": "v1", - "kind": "PersistentVolumeClaim", - "metadata": {"name": name}, - "spec": { - "dataSource": {"name": source, "kind": "VolumeSnapshot", "apiGroup": "snapshot.storage.k8s.io"}, - "accessModes": ["ReadWriteMany"], - "resources": { - "requests": {"storage": "10Gi"} - } - } - } - client = podutils._get_k8s_v1_client() - namespace = podutils.get_namespace() - - client.create_namespaced_persistent_volume_claim( - namespace=namespace, - body=pvc_resource, +def hydrate_pvc_from_snapshot(new_pvc_name, source_snapshot_name): + """Create a new PVC out of a Rok snapshot.""" + log.info("Creating new PVC '%s' from snapshot %s ..." % + (new_pvc_name, source_snapshot_name)) + snapshot_info = get_pvc_snapshot(source_snapshot_name) + size_repr = snapshot_info['status']['restoreSize'] + content_name = snapshot_info['status']['boundVolumeSnapshotContentName'] + log.info("Using snapshot with content: %s" % content_name) + + # todo: kubernetes python client v11 have a + # kubernetes.utils.create_from_dict that would make it much more nicer + # here. (KFP support kubernetes <= 10) + pvc = kubernetes.client.V1PersistentVolumeClaim( + api_version="v1", + kind="PersistentVolumeClaim", + metadata=kubernetes.client.V1ObjectMeta( + name=new_pvc_name + ), + spec=kubernetes.client.V1PersistentVolumeClaimSpec( + data_source=kubernetes.client.V1TypedLocalObjectReference( + api_group="snapshot.storage.k8s.io", + kind="VolumeSnapshot", + name=source_snapshot_name + ), + access_modes=["ReadWriteMany"], + resources=kubernetes.client.V1ResourceRequirements( + requests={"storage": size_repr} + ) + ) ) - return + k8s_client = podutils._get_k8s_v1_client() + ns = podutils.get_namespace() + status = check_snapshot_status(source_snapshot_name) + if status == True: + ns_pvc = k8s_client.create_namespaced_persistent_volume_claim(ns, pvc) + elif status == False: + raise RuntimeError("Snapshot not ready (status: %s)" % status) + else: + raise RuntimeError("Unknown Rok task status: %s" % status) + return {"name": ns_pvc.metadata.name} def delete_pvc(name): @@ -148,4 +182,19 @@ def delete_pvc(name): client.delete_namespaced_persistent_volume_claim( namespace=namespace, name=name,) + return + + +def delete_pvc_snapshot(name): + """Delete a pvc snapshot.""" + co_client = podutils._get_k8s_custom_objects_client() + namespace = podutils.get_namespace() + + co_client.delete_namespaced_custom_object( + group="snapshot.storage.k8s.io", + version="v1beta1", + namespace=namespace, + plural="volumesnapshots", + name=name, + ) return \ No newline at end of file From 9a35afdcad66349834cab721bad7af8101ddcf5d Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Thu, 15 Oct 2020 21:36:45 +0200 Subject: [PATCH 04/15] Snapshot all PVCs of a pod or notebook and save notebook metadata Add ability to snapshot all the PVCs atached to the pod. If using `snapshot_notebook` snapshot all PVCs connected to the notebook and add some notebook metadata as labels (pod name, container name, if the volume was the workspace directory) and annotations (mount path and docker image of the container) to the snapshot. A `list_pvc_snapshots` function is added that can be used to list snapshots filter using a label selector. For example: `list_pvc_snapshots(label_selector="pod=dev-0, is_workspace_dir=True")`. Eventually, the plan is to be able to start a new notebook server from a (set of) snapshot(s) using the metadata, similar to how Rok functions. --- .gitignore | 1 + backend/kale/common/cephfsutils.py | 143 ++++++++++++++++++++--------- backend/kale/common/podutils.py | 20 ++-- 3 files changed, 111 insertions(+), 53 deletions(-) diff --git a/.gitignore b/.gitignore index 15b32997e..0af94a7c0 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ yarn-error.log *.tsbuildinfo package-lock.json +backend/.vscode/settings.json diff --git a/backend/kale/common/cephfsutils.py b/backend/kale/common/cephfsutils.py index 43eb85319..03a6fa956 100644 --- a/backend/kale/common/cephfsutils.py +++ b/backend/kale/common/cephfsutils.py @@ -13,9 +13,9 @@ # limitations under the License. # # To allow a notebook to create Rook CephFS snapshot a ClusterRole -# and RoleBinding for the default-editor service account in the +# and RoleBinding for the default-editor service account in the # given namespace must be applied. Below are examples for use with the -# namespace "admin". +# namespace "admin". # # !!!WARNING!!! # Might not be secure, only use for testing @@ -55,17 +55,10 @@ # name: snapshot-access # apiGroup: rbac.authorization.k8s.io -import os -import copy -import json -import time -import math import logging import kubernetes from kale.common import podutils -from kale.rpc.errors import (RPCNotFoundError, RPCServiceUnavailableError) -from kale.rpc.log import create_adapter NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE = """\ This is a snapshot of notebook {} in namespace {}. @@ -76,15 +69,22 @@ log = logging.getLogger(__name__) -def snapshot_pvc(snapshot_name, pvc_name): +def snapshot_pvc(snapshot_name, pvc_name, image="", path="", **kwargs): """Perform a snapshot over a PVC.""" snapshot_resource = { - "apiVersion": "snapshot.storage.k8s.io/v1beta1", - "kind": "VolumeSnapshot", - "metadata": {"name": snapshot_name}, - "spec": { - "volumeSnapshotClassName": "csi-cephfsplugin-snapclass", - "source": {"persistentVolumeClaimName": pvc_name} + "apiVersion": "snapshot.storage.k8s.io/v1beta1", + "kind": "VolumeSnapshot", + "metadata": { + "name": snapshot_name, + "annotations": { + "container_image": image, + "volume_path": path + }, + "labels": kwargs + }, + "spec": { + "volumeSnapshotClassName": "csi-cephfsplugin-snapclass", + "source": {"persistentVolumeClaimName": pvc_name} } } co_client = podutils._get_k8s_custom_objects_client() @@ -92,16 +92,56 @@ def snapshot_pvc(snapshot_name, pvc_name): log.info("Taking a snapshot of PVC %s in namespace %s ..." % (pvc_name, namespace)) task_info = co_client.create_namespaced_custom_object( - group="snapshot.storage.k8s.io", - version="v1beta1", - namespace=namespace, - plural="volumesnapshots", - body=snapshot_resource, - ) + group="snapshot.storage.k8s.io", + version="v1beta1", + namespace=namespace, + plural="volumesnapshots", + body=snapshot_resource) return task_info +def snapshot_pod(): + """Take snapshots of the current Pod's PVCs.""" + volumes = [(path, volume.name, size) + for path, volume, size in podutils.list_volumes()] + namespace = podutils.get_namespace() + pod_name = podutils.get_pod_name() + log.info("Taking a snapshot of pod %s in namespace %s ..." + % (pod_name, namespace)) + snapshot_names = [] + for i in volumes: + snapshot_pvc( + "snapshot." + i[1], + i[1], + pod=pod_name, + default_container=podutils.get_container_name()) + snapshot_names.append("snapshot." + i[1]) + return snapshot_names + + +def snapshot_notebook(): + """Take snapshots of the current Notebook's PVCs and store its metadata.""" + volumes = [(path, volume.name, size) + for path, volume, size in podutils.list_volumes()] + namespace = podutils.get_namespace() + pod_name = podutils.get_pod_name() + log.info("Taking a snapshot of notebook %s in namespace %s ..." + % (pod_name, namespace)) + snapshot_names = [] + for i in volumes: + snapshot_pvc( + "snapshot." + i[1], + i[1], + image=podutils.get_docker_base_image(), + path=i[0], + pod=pod_name, + default_container=podutils.get_container_name(), + is_workspace_dir=str(podutils.is_workspace_dir(i[0]))) + snapshot_names.append("snapshot." + i[1]) + return snapshot_names + + def check_snapshot_status(snapshot_name): """Check if volume snapshot is ready to use.""" log.info("Checking snapshot with snapshot name: %s", snapshot_name) @@ -110,9 +150,9 @@ def check_snapshot_status(snapshot_name): task = get_pvc_snapshot(snapshot_name=snapshot_name) status = task['status']['readyToUse'] - if status == True: + if status is True: log.info("Successfully created volume snapshot") - elif status == False: + elif status is False: raise RuntimeError("Snapshot not ready (status: %s)" % status) else: raise RuntimeError("Unknown snapshot task status: %s" % status) @@ -123,17 +163,30 @@ def get_pvc_snapshot(snapshot_name): """Get info about a pvc snapshot.""" co_client = podutils._get_k8s_custom_objects_client() namespace = podutils.get_namespace() - + pvc_snapshot = co_client.get_namespaced_custom_object( - group="snapshot.storage.k8s.io", - version="v1beta1", - namespace=namespace, - plural="volumesnapshots", - name=snapshot_name, - ) + group="snapshot.storage.k8s.io", + version="v1beta1", + namespace=namespace, + plural="volumesnapshots", + name=snapshot_name) return pvc_snapshot +def list_pvc_snapshots(label_selector=""): + """List pvc snapshots.""" + co_client = podutils._get_k8s_custom_objects_client() + namespace = podutils.get_namespace() + + pvc_snapshots = co_client.list_namespaced_custom_object( + group="snapshot.storage.k8s.io", + version="v1beta1", + namespace=namespace, + plural="volumesnapshots", + label_selector=label_selector) + return pvc_snapshots + + def hydrate_pvc_from_snapshot(new_pvc_name, source_snapshot_name): """Create a new PVC out of a Rok snapshot.""" log.info("Creating new PVC '%s' from snapshot %s ..." % @@ -150,6 +203,7 @@ def hydrate_pvc_from_snapshot(new_pvc_name, source_snapshot_name): api_version="v1", kind="PersistentVolumeClaim", metadata=kubernetes.client.V1ObjectMeta( + annotations={"snapshot_content/origin": content_name}, name=new_pvc_name ), spec=kubernetes.client.V1PersistentVolumeClaimSpec( @@ -167,34 +221,33 @@ def hydrate_pvc_from_snapshot(new_pvc_name, source_snapshot_name): k8s_client = podutils._get_k8s_v1_client() ns = podutils.get_namespace() status = check_snapshot_status(source_snapshot_name) - if status == True: + if status is True: ns_pvc = k8s_client.create_namespaced_persistent_volume_claim(ns, pvc) - elif status == False: + elif status is False: raise RuntimeError("Snapshot not ready (status: %s)" % status) else: raise RuntimeError("Unknown Rok task status: %s" % status) return {"name": ns_pvc.metadata.name} -def delete_pvc(name): +def delete_pvc(pvc_name): client = podutils._get_k8s_v1_client() namespace = podutils.get_namespace() client.delete_namespaced_persistent_volume_claim( - namespace=namespace, - name=name,) + namespace=namespace, + name=pvc_name) return -def delete_pvc_snapshot(name): +def delete_pvc_snapshot(snapshot_name): """Delete a pvc snapshot.""" co_client = podutils._get_k8s_custom_objects_client() namespace = podutils.get_namespace() - + co_client.delete_namespaced_custom_object( - group="snapshot.storage.k8s.io", - version="v1beta1", - namespace=namespace, - plural="volumesnapshots", - name=name, - ) - return \ No newline at end of file + group="snapshot.storage.k8s.io", + version="v1beta1", + namespace=namespace, + plural="volumesnapshots", + name=snapshot_name) + return diff --git a/backend/kale/common/podutils.py b/backend/kale/common/podutils.py index 9e51526ce..80d6ef505 100644 --- a/backend/kale/common/podutils.py +++ b/backend/kale/common/podutils.py @@ -161,19 +161,23 @@ def _list_volumes(client, namespace, pod_name, container_name): # result in an incomplete notebook snapshot. pvc = client.read_namespaced_persistent_volume_claim(pvc.claim_name, namespace) - if pvc.spec.storage_class_name != ROK_CSI_STORAGE_CLASS and pvc.spec.storage_class_name != ROOK_CEPHFS_CSI_STORAGE_CLASS: - msg = ("Found PVC with storage class '%s'. Only storage classes '%s' and '%s'" - " are supported." - % (pvc.spec.storage_class_name, ROK_CSI_STORAGE_CLASS, ROOK_CEPHFS_CSI_STORAGE_CLASS)) + if (pvc.spec.storage_class_name != ROK_CSI_STORAGE_CLASS + and pvc.spec.storage_class_name != ROOK_CEPHFS_CSI_STORAGE_CLASS): + msg = ("Found PVC with storage class '%s'. " + "Only storage classes '%s' and '%s' are supported." + % (pvc.spec.storage_class_name, + ROK_CSI_STORAGE_CLASS, ROOK_CEPHFS_CSI_STORAGE_CLASS)) raise RuntimeError(msg) ann = pvc.metadata.annotations provisioner = ann.get("volume.beta.kubernetes.io/storage-provisioner", None) - if provisioner != ROK_CSI_STORAGE_PROVISIONER and provisioner != ROOK_CEPHFS_CSI_STORAGE_PROVISIONER: - msg = ("Found PVC storage provisioner '%s'. Only storage" - " provisioners '%s' and '%s' are supported." - % (provisioner, ROK_CSI_STORAGE_PROVISIONER, ROOK_CEPHFS_CSI_STORAGE_PROVISIONER)) + if (provisioner != ROK_CSI_STORAGE_PROVISIONER + and provisioner != ROOK_CEPHFS_CSI_STORAGE_PROVISIONER): + msg = ("Found PVC storage provisioner '%s'. " + "Only storage provisioners '%s' and '%s' are supported." + % (provisioner, ROK_CSI_STORAGE_PROVISIONER, + ROOK_CEPHFS_CSI_STORAGE_PROVISIONER)) raise RuntimeError(msg) mount_path = _get_mount_path(container, volume) From 00377f6e66044f2151c3742e2e91ca8c85d385c3 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Fri, 16 Oct 2020 19:30:51 +0200 Subject: [PATCH 05/15] Allow restoring of Notebook from a snapshot If a snapshot is made of a notebook using "snapshot_notebook", a snapshot of each of the mounted PVCs is made and some metadata of the notebook is written to the snapshots. The "restore_notebook" function can be called with the name of a PVC snapshot that was part of the notebook as an argument(only 1 snapshot name is needed, "get_nb_pvcs_from_snapshot" will find the other mounted volumes). The function will then create PVCs from the snapshot(s) needed for the Notebook server and launch a new Notebook server with those PVCs mounted. --- backend/kale/common/cephfsutils.py | 109 ++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 3 deletions(-) diff --git a/backend/kale/common/cephfsutils.py b/backend/kale/common/cephfsutils.py index 03a6fa956..9bf3e2516 100644 --- a/backend/kale/common/cephfsutils.py +++ b/backend/kale/common/cephfsutils.py @@ -112,7 +112,7 @@ def snapshot_pod(): snapshot_names = [] for i in volumes: snapshot_pvc( - "snapshot." + i[1], + "pod-snapshot-" + i[1], i[1], pod=pod_name, default_container=podutils.get_container_name()) @@ -131,7 +131,7 @@ def snapshot_notebook(): snapshot_names = [] for i in volumes: snapshot_pvc( - "snapshot." + i[1], + "nb-snapshot-" + i[1], i[1], image=podutils.get_docker_base_image(), path=i[0], @@ -203,7 +203,7 @@ def hydrate_pvc_from_snapshot(new_pvc_name, source_snapshot_name): api_version="v1", kind="PersistentVolumeClaim", metadata=kubernetes.client.V1ObjectMeta( - annotations={"snapshot_content/origin": content_name}, + annotations={"snapshot_origin": content_name}, name=new_pvc_name ), spec=kubernetes.client.V1PersistentVolumeClaimSpec( @@ -230,7 +230,110 @@ def hydrate_pvc_from_snapshot(new_pvc_name, source_snapshot_name): return {"name": ns_pvc.metadata.name} +def get_nb_name_from_snapshot(snapshot_name): + """Get the name of the notebook that the snapshot was taken from.""" + snapshot = get_pvc_snapshot(snapshot_name=snapshot_name) + orig_notebook = snapshot["metadata"]["labels"]["default_container"] + return orig_notebook + + +def get_nb_image_from_snapshot(snapshot_name): + """Get the image of the notebook that the snapshot was taken from.""" + snapshot = get_pvc_snapshot(snapshot_name=snapshot_name) + image = snapshot["metadata"]["annotations"]["container_image"] + return image + + +def get_nb_pvcs_from_snapshot(snapshot_name): + """Get all the PVCs that were mounted to the NB when the snapshot was taken. + + Returns JSON list. + """ + selector = "default_container=" + get_nb_name_from_snapshot(snapshot_name) + all_volumes = list_pvc_snapshots(label_selector=selector)["items"] + volumes = [] + for i in all_volumes: + snapshot_name = i["metadata"]["name"] + source_pvc_name = i["spec"]["source"]["persistentVolumeClaimName"] + path = i["metadata"]["annotations"]["volume_path"] + row = { + "mountPath": path, + "snapshot_name": snapshot_name, + "source_pvc": source_pvc_name} + volumes.append(row) + return volumes + + +def restore_pvcs_from_snapshot(snapshot_name): + """Restore the NB PVCs from their snapshots.""" + source_snapshots = get_nb_pvcs_from_snapshot(snapshot_name) + replaced_volume_mounts = [] + for i in source_snapshots: + new_pvc_name = "restored-" + i["source_pvc"] + pvc_name = hydrate_pvc_from_snapshot(new_pvc_name, i["snapshot_name"]) + path = i["mountPath"] + row = {"mountPath": path, "name": pvc_name["name"]} + replaced_volume_mounts.append(row) + return replaced_volume_mounts + + +def replace_cloned_volumes(volume_mounts): + """Replace the volumes with the volumes restored from the snapshot.""" + replaced_volumes = [] + for i in volume_mounts: + name = i["name"] + row = {"name": name, "persistentVolumeClaim": {"claimName": name}} + replaced_volumes.append(row) + return replaced_volumes + + +def restore_notebook(snapshot_name): + """Restore a notebook from a PVC snapshot.""" + name = "restored-" + get_nb_name_from_snapshot(snapshot_name) + namespace = podutils.get_namespace() + image = get_nb_image_from_snapshot(snapshot_name) + volume_mounts = restore_pvcs_from_snapshot(snapshot_name) + volumes = replace_cloned_volumes(volume_mounts) + notebook_resource = { + "apiVersion": "kubeflow.org/v1alpha1", + "kind": "Notebook", + "metadata": { + "labels": { + "app": name + }, + "name": name, + "namespace": namespace}, + "spec": { + "template": { + "spec": { + "containers": [ + { + "env": [], + "image": image, + "name": name, + "resources": { + "requests": { + "cpu": "0.5", + "memory": "1.0Gi"}}, + "volumeMounts": volume_mounts}], + "serviceAccountName": "default-editor", + "ttlSecondsAfterFinished": 300, + "volumes": volumes}}}} + co_client = podutils._get_k8s_custom_objects_client() + log.info("Restoring notebook %s from PVC snapshot %s in namespace %s ..." + % (name, snapshot_name, namespace)) + task_info = co_client.create_namespaced_custom_object( + group="kubeflow.org", + version="v1alpha1", + namespace=namespace, + plural="notebooks", + body=notebook_resource) + + return task_info + + def delete_pvc(pvc_name): + """Delete a pvc.""" client = podutils._get_k8s_v1_client() namespace = podutils.get_namespace() client.delete_namespaced_persistent_volume_claim( From a12f3f921e1ceb2e43affb9b99f7cbb55e903d7d Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Sat, 17 Oct 2020 17:37:42 +0200 Subject: [PATCH 06/15] Replace hardcoded CephFS in _list_volumes with actual check Rather than hardcoding the supported storage classes and provisioners in the `_list_volumes` function it will now check if the provisioner of the PVCs has an accompanying VolumeSnapshotClass. --- backend/kale/common/cephfsutils.py | 2 +- backend/kale/common/podutils.py | 61 +++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 11 deletions(-) diff --git a/backend/kale/common/cephfsutils.py b/backend/kale/common/cephfsutils.py index 9bf3e2516..0ac2ff323 100644 --- a/backend/kale/common/cephfsutils.py +++ b/backend/kale/common/cephfsutils.py @@ -188,7 +188,7 @@ def list_pvc_snapshots(label_selector=""): def hydrate_pvc_from_snapshot(new_pvc_name, source_snapshot_name): - """Create a new PVC out of a Rok snapshot.""" + """Create a new PVC out of a volume snapshot.""" log.info("Creating new PVC '%s' from snapshot %s ..." % (new_pvc_name, source_snapshot_name)) snapshot_info = get_pvc_snapshot(source_snapshot_name) diff --git a/backend/kale/common/podutils.py b/backend/kale/common/podutils.py index 80d6ef505..9a0355b04 100644 --- a/backend/kale/common/podutils.py +++ b/backend/kale/common/podutils.py @@ -26,9 +26,6 @@ ROK_CSI_STORAGE_CLASS = "rok" ROK_CSI_STORAGE_PROVISIONER = "rok.arrikto.com" -ROOK_CEPHFS_CSI_STORAGE_CLASS = "rook-cephfs" -ROOK_CEPHFS_CSI_STORAGE_PROVISIONER = "rook-ceph.cephfs.csi.ceph.com" - NAMESPACE_PATH = "/var/run/secrets/kubernetes.io/serviceaccount/namespace" K8S_SIZE_RE = re.compile(r'^([0-9]+)(E|Ei|P|Pi|T|Ti|G|Gi|M|Mi|K|Ki){0,1}$') @@ -148,6 +145,8 @@ def _get_mount_path(container, volume): def _list_volumes(client, namespace, pod_name, container_name): pod = client.read_namespaced_pod(pod_name, namespace) container = _get_pod_container(pod, container_name) + snapshotclass_provisioners = list_snapshotclass_storage_provisioners() + provisioner_names = get_snapshotclass_provisioners_names() rok_volumes = [] for volume in pod.spec.volumes: @@ -162,22 +161,23 @@ def _list_volumes(client, namespace, pod_name, container_name): pvc = client.read_namespaced_persistent_volume_claim(pvc.claim_name, namespace) if (pvc.spec.storage_class_name != ROK_CSI_STORAGE_CLASS - and pvc.spec.storage_class_name != ROOK_CEPHFS_CSI_STORAGE_CLASS): + and pvc.spec.storage_class_name not in provisioner_names): msg = ("Found PVC with storage class '%s'. " - "Only storage classes '%s' and '%s' are supported." + "Only storage classes able to take snapshots and " + "'%s' are supported." % (pvc.spec.storage_class_name, - ROK_CSI_STORAGE_CLASS, ROOK_CEPHFS_CSI_STORAGE_CLASS)) + ROK_CSI_STORAGE_CLASS)) raise RuntimeError(msg) ann = pvc.metadata.annotations provisioner = ann.get("volume.beta.kubernetes.io/storage-provisioner", None) if (provisioner != ROK_CSI_STORAGE_PROVISIONER - and provisioner != ROOK_CEPHFS_CSI_STORAGE_PROVISIONER): + and provisioner not in snapshotclass_provisioners): msg = ("Found PVC storage provisioner '%s'. " - "Only storage provisioners '%s' and '%s' are supported." - % (provisioner, ROK_CSI_STORAGE_PROVISIONER, - ROOK_CEPHFS_CSI_STORAGE_PROVISIONER)) + "Only storage provisioners able to take snapshots and " + "'%s' are supported." + % (provisioner, ROK_CSI_STORAGE_PROVISIONER)) raise RuntimeError(msg) mount_path = _get_mount_path(container, volume) @@ -307,3 +307,44 @@ def compute_component_id(pod): component_id = component_name + "@sha256=" + component_spec_digest log.info("Computed component ID: %s", component_id) return component_id + + +def _get_k8s_storage_client(): + k8s_config.load_incluster_config() + api_client = k8s.ApiClient() + return k8s.StorageV1beta1Api(api_client) + + +def get_snapshotclasses(label_selector=""): + """List snapshotclasses.""" + co_client = _get_k8s_custom_objects_client() + + snapshotclasses = co_client.list_cluster_custom_object( + group="snapshot.storage.k8s.io", + version="v1beta1", + plural="volumesnapshotclasses", + label_selector=label_selector) + return snapshotclasses + + +def list_snapshotclass_storage_provisioners(label_selector=""): + """List the storage provisioners of the snapshotclasses.""" + snapshotclasses = get_snapshotclasses(label_selector)["items"] + snapshotclass_provisioners = [] + for i in snapshotclasses: + snapshotclass_provisioners.append(i["driver"]) + return snapshotclass_provisioners + + +def get_snapshotclass_provisioners_names(): + """Get the names of snapshotclass storage provisioners.""" + client = _get_k8s_storage_client() + classes = client.list_storage_class().items + snapshotclass_provisioners = list_snapshotclass_storage_provisioners() + storage_class_names = [] + for i in classes: + name = i.metadata.name + provisioner = i.provisioner + if provisioner in snapshotclass_provisioners: + storage_class_names.append(name) + return storage_class_names From b26e917df8d76237b9abb1297e5722b35afd1c8a Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Sat, 17 Oct 2020 18:49:04 +0200 Subject: [PATCH 07/15] Change name from cephfsutils.py to snapshotutils.py to better represent the generic nature of the functions within --- backend/kale/common/{cephfsutils.py => snapshotutils.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename backend/kale/common/{cephfsutils.py => snapshotutils.py} (100%) diff --git a/backend/kale/common/cephfsutils.py b/backend/kale/common/snapshotutils.py similarity index 100% rename from backend/kale/common/cephfsutils.py rename to backend/kale/common/snapshotutils.py From d1f5bb4238b4e881c27ad44a2748260d34ac0c4d Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Mon, 19 Oct 2020 15:46:17 +0200 Subject: [PATCH 08/15] First attempt at frontend support --- backend/kale/common/podutils.py | 25 +++++++++++++++++ backend/kale/rpc/snapshot.py | 50 +++++++++++++++++++++++++++++++++ labextension/src/widget.tsx | 21 ++++++++++++++ 3 files changed, 96 insertions(+) create mode 100644 backend/kale/rpc/snapshot.py diff --git a/backend/kale/common/podutils.py b/backend/kale/common/podutils.py index 9a0355b04..410873e73 100644 --- a/backend/kale/common/podutils.py +++ b/backend/kale/common/podutils.py @@ -336,6 +336,31 @@ def list_snapshotclass_storage_provisioners(label_selector=""): return snapshotclass_provisioners +def check_snapshot_availability(): + """Check if snapshotclasses are available for notebook.""" + client = _get_k8s_v1_client() + namespace = get_namespace() + pod_name = get_pod_name() + pod = client.read_namespaced_pod(pod_name, namespace) + snapshotclass_provisioners = list_snapshotclass_storage_provisioners() + + for volume in pod.spec.volumes: + pvc = volume.persistent_volume_claim + if not pvc: + continue + pvc = client.read_namespaced_persistent_volume_claim(pvc.claim_name, + namespace) + ann = pvc.metadata.annotations + provisioner = ann.get("volume.beta.kubernetes.io/storage-provisioner", + None) + if provisioner not in snapshotclass_provisioners: + msg = ("Found PVC storage provisioner '%s'. " + "Only storage provisioners able to take snapshots " + "are supported." + % (provisioner)) + raise RuntimeError(msg) + + def get_snapshotclass_provisioners_names(): """Get the names of snapshotclass storage provisioners.""" client = _get_k8s_storage_client() diff --git a/backend/kale/rpc/snapshot.py b/backend/kale/rpc/snapshot.py new file mode 100644 index 000000000..ce444635b --- /dev/null +++ b/backend/kale/rpc/snapshot.py @@ -0,0 +1,50 @@ +# Copyright 2019-2020 The Kale Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +from kale.common import podutils, snapshotutils +from kale.rpc.errors import (RPCServiceUnavailableError) +from kale.rpc.log import create_adapter + + +logger = create_adapter(logging.getLogger(__name__)) + + +def check_snapshot_availability(request): + """Check if snapshotclasses are available for notebook.""" + log = request.log if hasattr(request, "log") else logger + try: + podutils.check_snapshot_availability() + except Exception: + log.exception("No snapshotclass is available for this notebook") + raise RPCServiceUnavailableError(details=("No snapshotclass" + " is available for" + " this notebook"), + trans_id=request.trans_id) + + +def check_snapshot_status(request, snapshot_name): + """Check if volume snapshot is ready to use.""" + return snapshotutils.check_snapshot_status(snapshot_name) + + +def snapshot_notebook(request): + """Take snapshots of the current Notebook's PVCs and store its metadata.""" + return snapshotutils.snapshot_notebook() + + +def replace_cloned_volumes(request, volume_mounts): + """Replace the volumes with the volumes restored from the snapshot.""" + return snapshotutils.replace_cloned_volumes(volume_mounts) diff --git a/labextension/src/widget.tsx b/labextension/src/widget.tsx index 65649b3b2..5b193c138 100644 --- a/labextension/src/widget.tsx +++ b/labextension/src/widget.tsx @@ -84,6 +84,7 @@ async function activate( // env we are in (like Local Laptop, MiniKF, GCP, UI without Kale, ...) const backend = await getBackend(kernel); let rokError: IRPCError = null; + let snapshotError: IRPCError = null; if (backend) { try { await executeRpc(kernel, 'log.setup_logging'); @@ -111,6 +112,26 @@ async function activate( throw error; } } + + try { + await executeRpc(kernel, 'snapshot.check_snapshot_availability'); + } catch (error) { + const unexpectedErrorCodes = [ + RPC_CALL_STATUS.EncodingError, + RPC_CALL_STATUS.ImportError, + RPC_CALL_STATUS.UnhandledError, + ]; + if ( + error instanceof RPCError && + !unexpectedErrorCodes.includes(error.error.code) + ) { + snapshotError = error.error; + console.warn('Snapshots are not available', snapshotError); + } else { + globalUnhandledRejection({ reason: error }); + throw error; + } + } } else { rokError = { rpc: 'rok.check_rok_availability', From c8c725e509c1110da1d0391e302992cec6e6e8d1 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Mon, 19 Oct 2020 17:55:29 +0200 Subject: [PATCH 09/15] A few more frontend additions for generic snapshots --- labextension/src/lib/RPCUtils.tsx | 11 +++++++++++ labextension/src/widget.tsx | 1 + labextension/src/widgets/LeftPanel.tsx | 2 ++ labextension/src/widgets/VolumesPanel.tsx | 3 ++- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/labextension/src/lib/RPCUtils.tsx b/labextension/src/lib/RPCUtils.tsx index ef593a42f..ef11e2779 100644 --- a/labextension/src/lib/RPCUtils.tsx +++ b/labextension/src/lib/RPCUtils.tsx @@ -84,6 +84,17 @@ export const rokErrorTooltip = (rokError: IRPCError) => { ); }; +export const snapshotErrorTooltip = (snapshotError: IRPCError) => { + return ( + +
+ This feature requires snapshot support.{' '} + showRpcError(snapshotError)}>More info... +
+
+ ); +}; + const serialize = (obj: any) => window.btoa(JSON.stringify(obj)); const deserialize = (raw_data: string) => window.atob(raw_data.substring(1, raw_data.length - 1)); diff --git a/labextension/src/widget.tsx b/labextension/src/widget.tsx index 5b193c138..c9dcb06f8 100644 --- a/labextension/src/widget.tsx +++ b/labextension/src/widget.tsx @@ -196,6 +196,7 @@ async function activate( backend={backend} kernel={kernel} rokError={rokError} + snapshotError={snapshotError} />, ); widget.id = 'kubeflow-kale/kubeflowDeployment'; diff --git a/labextension/src/widgets/LeftPanel.tsx b/labextension/src/widgets/LeftPanel.tsx index ca9fb34f8..1f3501bc0 100644 --- a/labextension/src/widgets/LeftPanel.tsx +++ b/labextension/src/widgets/LeftPanel.tsx @@ -60,6 +60,7 @@ interface IProps { backend: boolean; kernel: Kernel.IKernelConnection; rokError: IRPCError; + snapshotError: IRPCError; } interface IState { @@ -817,6 +818,7 @@ export class KubeflowKaleLeftPanel extends React.Component { autosnapshot={this.state.metadata.autosnapshot} updateAutosnapshotSwitch={this.updateAutosnapshotSwitch} rokError={this.props.rokError} + snapshotError={this.props.snapshotError} updateVolumes={this.updateVolumes} storageClassName={this.state.metadata.storage_class_name} updateStorageClassName={this.updateStorageClassName} diff --git a/labextension/src/widgets/VolumesPanel.tsx b/labextension/src/widgets/VolumesPanel.tsx index 708044df4..4d8e9b08b 100644 --- a/labextension/src/widgets/VolumesPanel.tsx +++ b/labextension/src/widgets/VolumesPanel.tsx @@ -19,7 +19,7 @@ import { Button, Switch, Zoom } from '@material-ui/core'; import AddIcon from '@material-ui/icons/Add'; import DeleteIcon from '@material-ui/icons/Delete'; import { IVolumeMetadata } from './LeftPanel'; -import { IRPCError, rokErrorTooltip } from '../lib/RPCUtils'; +import { IRPCError, rokErrorTooltip, snapshotErrorTooltip } from '../lib/RPCUtils'; import { Input } from '../components/Input'; import { Select, ISelectOption } from '../components/Select'; import { LightTooltip } from '../components/LightTooltip'; @@ -93,6 +93,7 @@ interface VolumesPanelProps { useNotebookVolumes: boolean; autosnapshot: boolean; rokError: IRPCError; + snapshotError: IRPCError; updateVolumes: Function; updateVolumesSwitch: Function; updateAutosnapshotSwitch: Function; From f90847f935ca47d652cc0118be1754f44e53d0c7 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Wed, 21 Oct 2020 13:42:04 +0200 Subject: [PATCH 10/15] Update to use new k8sutils.py --- backend/kale/common/k8sutils.py | 10 ++++++++++ backend/kale/common/podutils.py | 12 +++--------- backend/kale/common/snapshotutils.py | 14 +++++++------- labextension/src/widgets/VolumesPanel.tsx | 6 +++++- 4 files changed, 25 insertions(+), 17 deletions(-) diff --git a/backend/kale/common/k8sutils.py b/backend/kale/common/k8sutils.py index da530a352..9155e387f 100644 --- a/backend/kale/common/k8sutils.py +++ b/backend/kale/common/k8sutils.py @@ -19,6 +19,7 @@ _api_client = None _api_v1_client = None _k8s_co_client = None +_k8s_storage_client = None def _load_config(): @@ -57,6 +58,15 @@ def get_co_client(): return _k8s_co_client +def get_storage_client(): + """Get the Kubernetes V1beta1 StorageClient.""" + global _k8s_storage_client + if not _k8s_storage_client: + _load_config() + _k8s_storage_client = kubernetes.client.StorageV1beta1Api() + return _k8s_storage_client + + def annotate_object(group, version, plural, name, namespace, annotations): """Annotate a custom Kubernetes object.""" patch = {"apiVersion": "%s/%s" % (group, version), diff --git a/backend/kale/common/podutils.py b/backend/kale/common/podutils.py index 410873e73..b3e65fa8a 100644 --- a/backend/kale/common/podutils.py +++ b/backend/kale/common/podutils.py @@ -309,15 +309,9 @@ def compute_component_id(pod): return component_id -def _get_k8s_storage_client(): - k8s_config.load_incluster_config() - api_client = k8s.ApiClient() - return k8s.StorageV1beta1Api(api_client) - - def get_snapshotclasses(label_selector=""): """List snapshotclasses.""" - co_client = _get_k8s_custom_objects_client() + co_client = k8sutils.get_co_client() snapshotclasses = co_client.list_cluster_custom_object( group="snapshot.storage.k8s.io", @@ -338,7 +332,7 @@ def list_snapshotclass_storage_provisioners(label_selector=""): def check_snapshot_availability(): """Check if snapshotclasses are available for notebook.""" - client = _get_k8s_v1_client() + client = k8sutils.get_v1_client() namespace = get_namespace() pod_name = get_pod_name() pod = client.read_namespaced_pod(pod_name, namespace) @@ -363,7 +357,7 @@ def check_snapshot_availability(): def get_snapshotclass_provisioners_names(): """Get the names of snapshotclass storage provisioners.""" - client = _get_k8s_storage_client() + client = k8sutils.get_storage_client() classes = client.list_storage_class().items snapshotclass_provisioners = list_snapshotclass_storage_provisioners() storage_class_names = [] diff --git a/backend/kale/common/snapshotutils.py b/backend/kale/common/snapshotutils.py index 0ac2ff323..9708bbcc1 100644 --- a/backend/kale/common/snapshotutils.py +++ b/backend/kale/common/snapshotutils.py @@ -58,7 +58,7 @@ import logging import kubernetes -from kale.common import podutils +from kale.common import podutils, k8sutils NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE = """\ This is a snapshot of notebook {} in namespace {}. @@ -87,7 +87,7 @@ def snapshot_pvc(snapshot_name, pvc_name, image="", path="", **kwargs): "source": {"persistentVolumeClaimName": pvc_name} } } - co_client = podutils._get_k8s_custom_objects_client() + co_client = k8sutils.get_co_client() namespace = podutils.get_namespace() log.info("Taking a snapshot of PVC %s in namespace %s ..." % (pvc_name, namespace)) @@ -161,7 +161,7 @@ def check_snapshot_status(snapshot_name): def get_pvc_snapshot(snapshot_name): """Get info about a pvc snapshot.""" - co_client = podutils._get_k8s_custom_objects_client() + co_client = k8sutils.get_co_client() namespace = podutils.get_namespace() pvc_snapshot = co_client.get_namespaced_custom_object( @@ -175,7 +175,7 @@ def get_pvc_snapshot(snapshot_name): def list_pvc_snapshots(label_selector=""): """List pvc snapshots.""" - co_client = podutils._get_k8s_custom_objects_client() + co_client = k8sutils.get_co_client() namespace = podutils.get_namespace() pvc_snapshots = co_client.list_namespaced_custom_object( @@ -218,7 +218,7 @@ def hydrate_pvc_from_snapshot(new_pvc_name, source_snapshot_name): ) ) ) - k8s_client = podutils._get_k8s_v1_client() + k8s_client = k8sutils.get_v1_client() ns = podutils.get_namespace() status = check_snapshot_status(source_snapshot_name) if status is True: @@ -319,7 +319,7 @@ def restore_notebook(snapshot_name): "serviceAccountName": "default-editor", "ttlSecondsAfterFinished": 300, "volumes": volumes}}}} - co_client = podutils._get_k8s_custom_objects_client() + co_client = k8sutils.get_co_client() log.info("Restoring notebook %s from PVC snapshot %s in namespace %s ..." % (name, snapshot_name, namespace)) task_info = co_client.create_namespaced_custom_object( @@ -334,7 +334,7 @@ def restore_notebook(snapshot_name): def delete_pvc(pvc_name): """Delete a pvc.""" - client = podutils._get_k8s_v1_client() + client = k8sutils.get_v1_client() namespace = podutils.get_namespace() client.delete_namespaced_persistent_volume_claim( namespace=namespace, diff --git a/labextension/src/widgets/VolumesPanel.tsx b/labextension/src/widgets/VolumesPanel.tsx index 4d8e9b08b..696b3cfa1 100644 --- a/labextension/src/widgets/VolumesPanel.tsx +++ b/labextension/src/widgets/VolumesPanel.tsx @@ -19,7 +19,11 @@ import { Button, Switch, Zoom } from '@material-ui/core'; import AddIcon from '@material-ui/icons/Add'; import DeleteIcon from '@material-ui/icons/Delete'; import { IVolumeMetadata } from './LeftPanel'; -import { IRPCError, rokErrorTooltip, snapshotErrorTooltip } from '../lib/RPCUtils'; +import { + IRPCError, + rokErrorTooltip, + snapshotErrorTooltip, +} from '../lib/RPCUtils'; import { Input } from '../components/Input'; import { Select, ISelectOption } from '../components/Select'; import { LightTooltip } from '../components/LightTooltip'; From 8be4c34eaae69378afd6262bff0c4df6ac85412c Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Wed, 21 Oct 2020 16:59:53 +0200 Subject: [PATCH 11/15] Add checks for snapshotError alongside rokError in LeftPanel --- backend/kale/common/snapshotutils.py | 2 +- labextension/src/widgets/LeftPanel.tsx | 43 +++++++++++++++++++++++--- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/backend/kale/common/snapshotutils.py b/backend/kale/common/snapshotutils.py index 9708bbcc1..2a671958c 100644 --- a/backend/kale/common/snapshotutils.py +++ b/backend/kale/common/snapshotutils.py @@ -138,7 +138,7 @@ def snapshot_notebook(): pod=pod_name, default_container=podutils.get_container_name(), is_workspace_dir=str(podutils.is_workspace_dir(i[0]))) - snapshot_names.append("snapshot." + i[1]) + snapshot_names.append("nb-snapshot-" + i[1]) return snapshot_names diff --git a/labextension/src/widgets/LeftPanel.tsx b/labextension/src/widgets/LeftPanel.tsx index 1f3501bc0..5b65c840d 100644 --- a/labextension/src/widgets/LeftPanel.tsx +++ b/labextension/src/widgets/LeftPanel.tsx @@ -443,6 +443,16 @@ export class KubeflowKaleLeftPanel extends React.Component { notebookVolumes, selectVolumeTypes, }); + } else if (!this.props.snapshotError) { + // Get information about volumes currently mounted on the notebook server + const { + notebookVolumes, + selectVolumeTypes, + } = await commands.getMountedVolumes(this.state.notebookVolumes); + this.setState({ + notebookVolumes, + selectVolumeTypes, + }); } else { this.setState((prevState, props) => ({ selectVolumeTypes: prevState.selectVolumeTypes.map(t => { @@ -533,8 +543,25 @@ export class KubeflowKaleLeftPanel extends React.Component { } return volume; }); + let snapstateVolumes = this.props.snapshotError + ? metadataVolumes + : metadataVolumes.map((volume: IVolumeMetadata) => { + if ( + volume.type === 'new_pvc' && + volume.annotations.length > 0 && + volume.annotations[0].key === 'rok/origin' + ) { + return { ...volume, type: 'snap' }; + } + return volume; + }); if (stateVolumes.length === 0 && metadataVolumes.length === 0) { metadataVolumes = stateVolumes = this.state.notebookVolumes; + } else if ( + snapstateVolumes.length === 0 && + metadataVolumes.length === 0 + ) { + metadataVolumes = snapstateVolumes = this.state.notebookVolumes; } else { metadataVolumes = metadataVolumes.concat(this.state.notebookVolumes); stateVolumes = stateVolumes.concat(this.state.notebookVolumes); @@ -558,11 +585,15 @@ export class KubeflowKaleLeftPanel extends React.Component { }, autosnapshot: notebookMetadata['autosnapshot'] === undefined - ? !this.props.rokError && this.state.notebookVolumes.length > 0 + ? !this.props.rokError && + !this.props.snapshotError && + this.state.notebookVolumes.length > 0 : notebookMetadata['autosnapshot'], snapshot_volumes: notebookMetadata['snapshot_volumes'] === undefined - ? !this.props.rokError && this.state.notebookVolumes.length > 0 + ? !this.props.rokError && + !this.props.snapshotError && + this.state.notebookVolumes.length > 0 : notebookMetadata['snapshot_volumes'], // fixme: for now we are using the 'steps_defaults' field just for poddefaults // so we replace any existing value every time @@ -578,9 +609,13 @@ export class KubeflowKaleLeftPanel extends React.Component { ...DefaultState.metadata, volumes: prevState.notebookVolumes, snapshot_volumes: - !this.props.rokError && prevState.notebookVolumes.length > 0, + !this.props.rokError && + !this.props.snapshotError && + prevState.notebookVolumes.length > 0, autosnapshot: - !this.props.rokError && prevState.notebookVolumes.length > 0, + !this.props.rokError && + !this.props.snapshotError && + prevState.notebookVolumes.length > 0, }, volumes: prevState.notebookVolumes, })); From e158e820bf66abec66f2a80d754f3e16c417063f Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Wed, 21 Oct 2020 17:35:17 +0200 Subject: [PATCH 12/15] Add checks for snapshotError alongside rokError in VolumesPanel --- labextension/src/widgets/VolumesPanel.tsx | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/labextension/src/widgets/VolumesPanel.tsx b/labextension/src/widgets/VolumesPanel.tsx index 696b3cfa1..183409d9f 100644 --- a/labextension/src/widgets/VolumesPanel.tsx +++ b/labextension/src/widgets/VolumesPanel.tsx @@ -123,9 +123,9 @@ export const VolumesPanel: React.FunctionComponent = props => const addVolume = () => { // If we add a volume to an empty list, turn autosnapshot on const autosnapshot = - !props.rokError && props.volumes.length === 0 + !props.rokError && !props.snapshotError && props.volumes.length === 0 ? true - : !props.rokError && props.autosnapshot; + : !props.rokError && !props.snapshotError && props.autosnapshot; props.updateVolumes( [...props.volumes, DEFAULT_EMPTY_VOLUME], [...props.metadataVolumes, DEFAULT_EMPTY_VOLUME], @@ -536,7 +536,8 @@ export const VolumesPanel: React.FunctionComponent = props => props.updateVolumesSwitch()} color="primary" @@ -568,7 +569,10 @@ export const VolumesPanel: React.FunctionComponent = props => props.updateAutosnapshotSwitch()} color="primary" name="enableKale" From 5da62006082cb1d33eb8dbf4f0e8656a7587b7a3 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Wed, 21 Oct 2020 18:42:22 +0200 Subject: [PATCH 13/15] Possible fix for Switch tooltips I'm not sure if this is properly implemented for all scenarios. --- labextension/src/widgets/VolumesPanel.tsx | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/labextension/src/widgets/VolumesPanel.tsx b/labextension/src/widgets/VolumesPanel.tsx index 183409d9f..5fffc57b0 100644 --- a/labextension/src/widgets/VolumesPanel.tsx +++ b/labextension/src/widgets/VolumesPanel.tsx @@ -522,13 +522,16 @@ export const VolumesPanel: React.FunctionComponent = props =>
@@ -554,13 +557,16 @@ export const VolumesPanel: React.FunctionComponent = props =>
From 58afc1f635ea7998e82cfdb9b0b5910aa70180a5 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Fri, 23 Oct 2020 14:27:55 +0200 Subject: [PATCH 14/15] Add ability to snapshot notebook with generic CSI driver If taking snapshots is possible with the current CSI driver, selecting "Use this notebook's volumes" or "Clone Notebook Volume" will create a snapshot of the notebook'ss PVC (multiple PVCs not tested but should work as well). At this point there is still an issue with getting the snapshot status when performing this procedure. --- backend/kale/common/snapshotutils.py | 12 +++--- labextension/src/lib/Commands.ts | 52 +++++++++++++++++++++++++- labextension/src/widgets/LeftPanel.tsx | 39 +++++++++++++------ 3 files changed, 84 insertions(+), 19 deletions(-) diff --git a/backend/kale/common/snapshotutils.py b/backend/kale/common/snapshotutils.py index 2a671958c..c791eae31 100644 --- a/backend/kale/common/snapshotutils.py +++ b/backend/kale/common/snapshotutils.py @@ -150,12 +150,12 @@ def check_snapshot_status(snapshot_name): task = get_pvc_snapshot(snapshot_name=snapshot_name) status = task['status']['readyToUse'] - if status is True: - log.info("Successfully created volume snapshot") - elif status is False: - raise RuntimeError("Snapshot not ready (status: %s)" % status) - else: - raise RuntimeError("Unknown snapshot task status: %s" % status) + # if status is True: + # log.info("Successfully created volume snapshot") + # elif status is False: + # raise log.info("Snapshot not ready (status: %s)" % status) + # else: + # raise log.info("Unknown snapshot task status: %s" % status) return status diff --git a/labextension/src/lib/Commands.ts b/labextension/src/lib/Commands.ts index 9892e01ff..1c3c110ca 100644 --- a/labextension/src/lib/Commands.ts +++ b/labextension/src/lib/Commands.ts @@ -20,6 +20,7 @@ import { _legacy_executeRpc, _legacy_executeRpcAndShowRPCError, RPCError, + IRPCError, } from './RPCUtils'; import { wait } from './Utils'; import { @@ -37,6 +38,7 @@ import { } from '../widgets/VolumesPanel'; import { IDocumentManager } from '@jupyterlab/docmanager'; import CellUtils from './CellUtils'; +import * as React from 'react'; enum RUN_CELL_STATUS { OK = 'ok', @@ -73,6 +75,8 @@ interface IKatibRunArgs { } export default class Commands { + rokError: IRPCError; + snapshotError: IRPCError; private readonly _notebook: NotebookPanel; private readonly _kernel: Kernel.IKernelConnection; @@ -89,6 +93,14 @@ export default class Commands { ); }; + genericsnapshotNotebook = async () => { + return await _legacy_executeRpcAndShowRPCError( + this._notebook, + this._kernel, + 'snapshot.snapshot_notebook', + ); + }; + getSnapshotProgress = async (task_id: string, ms?: number) => { const task = await _legacy_executeRpcAndShowRPCError( this._notebook, @@ -104,18 +116,31 @@ export default class Commands { return task; }; + genericgetSnapshotStatus = async (snapshot_name: string, ms?: number) => { + const isReady = await _legacy_executeRpcAndShowRPCError( + this._notebook, + this._kernel, + 'snapshot.check_snapshot_status', + { + snapshot_name, + }, + ); + if (ms) { + await wait(ms); + } + return isReady; + }; + runSnapshotProcedure = async (onUpdate: Function) => { const showSnapshotProgress = true; const snapshot = await this.snapshotNotebook(); const taskId = snapshot.task.id; let task = await this.getSnapshotProgress(taskId); onUpdate({ task, showSnapshotProgress }); - while (!['success', 'error', 'canceled'].includes(task.status)) { task = await this.getSnapshotProgress(taskId, 1000); onUpdate({ task }); } - if (task.status === 'success') { console.log('Snapshotting successful!'); return task; @@ -130,6 +155,29 @@ export default class Commands { return null; }; + runGenericSnapshotProcedure = async (onUpdate: Function) => { + const showSnapshotProgress = true; + const snapshot = await this.genericsnapshotNotebook(); + let snapshot_names = snapshot; + for (let i of snapshot_names) { + let isReady = await this.genericgetSnapshotStatus(i); + onUpdate({ isReady, showSnapshotProgress }); + while ((isReady = false)) { + isReady = await this.genericgetSnapshotStatus(i, 1000); + onUpdate({ isReady }); + } + if ((isReady = true)) { + console.log('Snapshotting successful!'); + return isReady; + } else if ((isReady = false)) { + console.error('Snapshot not ready'); + console.error('Stopping the deployment...'); + } + } + + return null; + }; + replaceClonedVolumes = async ( bucket: string, obj: string, diff --git a/labextension/src/widgets/LeftPanel.tsx b/labextension/src/widgets/LeftPanel.tsx index 5b65c840d..3e5c8a1b1 100644 --- a/labextension/src/widgets/LeftPanel.tsx +++ b/labextension/src/widgets/LeftPanel.tsx @@ -683,18 +683,35 @@ export class KubeflowKaleLeftPanel extends React.Component { metadata.volumes.filter((v: IVolumeMetadata) => v.type === 'clone') .length > 0 ) { - const task = await commands.runSnapshotProcedure(_updateDeployProgress); - console.log(task); - if (!task) { - this.setState({ runDeployment: false }); - return; + if (!this.props.rokError) { + const task = await commands.runSnapshotProcedure(_updateDeployProgress); + console.log(task); + if (!task) { + this.setState({ runDeployment: false }); + return; + } + metadata.volumes = await commands.replaceClonedVolumes( + task.bucket, + task.result.event.object, + task.result.event.version, + metadata.volumes, + ); + } else if (!this.props.snapshotError) { + const task = await commands.runGenericSnapshotProcedure( + _updateDeployProgress, + ); + console.log(task); + if (!task) { + this.setState({ runDeployment: false }); + return; + } + metadata.volumes = await commands.replaceClonedVolumes( + task.bucket, + task.result.event.object, + task.result.event.version, + metadata.volumes, + ); } - metadata.volumes = await commands.replaceClonedVolumes( - task.bucket, - task.result.event.object, - task.result.event.version, - metadata.volumes, - ); } // CREATE PIPELINE From 6610bed79420d5ac46857c086c8fef5a3561cf96 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Mon, 21 Dec 2020 17:07:58 +0100 Subject: [PATCH 15/15] small update and add data_source to template --- backend/kale/common/snapshotutils.py | 33 ++++++++++++------- .../kale/templates/pipeline_template.jinja2 | 3 ++ labextension/src/lib/Commands.ts | 13 ++++++++ labextension/src/widgets/LeftPanel.tsx | 5 +-- 4 files changed, 39 insertions(+), 15 deletions(-) diff --git a/backend/kale/common/snapshotutils.py b/backend/kale/common/snapshotutils.py index c791eae31..c3ac0a2f6 100644 --- a/backend/kale/common/snapshotutils.py +++ b/backend/kale/common/snapshotutils.py @@ -57,6 +57,7 @@ import logging import kubernetes +import time from kale.common import podutils, k8sutils @@ -145,18 +146,28 @@ def snapshot_notebook(): def check_snapshot_status(snapshot_name): """Check if volume snapshot is ready to use.""" log.info("Checking snapshot with snapshot name: %s", snapshot_name) + count = 0 + max_count = 60 task = None status = None - task = get_pvc_snapshot(snapshot_name=snapshot_name) - status = task['status']['readyToUse'] - - # if status is True: - # log.info("Successfully created volume snapshot") - # elif status is False: - # raise log.info("Snapshot not ready (status: %s)" % status) - # else: - # raise log.info("Unknown snapshot task status: %s" % status) - return status + while status is not True and count <= max_count: + count += 1 + try: + task = get_pvc_snapshot(snapshot_name=snapshot_name) + status = task['status']['readyToUse'] + log.info(task) + log.info(status) + time.sleep(2) + except KeyError: + log.info("Snapshot resource %s does not seem to be ready", snapshot_name) + time.sleep(2) + if status is True: + log.info("Successfully created volume snapshot") + elif status is False: + raise log.info("Snapshot not ready (status: %s)" % status) + else: + raise log.info("Unknown snapshot task status: %s" % status) + return task def get_pvc_snapshot(snapshot_name): @@ -220,7 +231,7 @@ def hydrate_pvc_from_snapshot(new_pvc_name, source_snapshot_name): ) k8s_client = k8sutils.get_v1_client() ns = podutils.get_namespace() - status = check_snapshot_status(source_snapshot_name) + status = check_snapshot_status(source_snapshot_name)['status']['readyToUse'] if status is True: ns_pvc = k8s_client.create_namespaced_persistent_volume_claim(ns, pvc) elif status is False: diff --git a/backend/kale/templates/pipeline_template.jinja2 b/backend/kale/templates/pipeline_template.jinja2 index 414450640..0200ff515 100644 --- a/backend/kale/templates/pipeline_template.jinja2 +++ b/backend/kale/templates/pipeline_template.jinja2 @@ -91,6 +91,9 @@ def auto_generated_pipeline({%- for arg in pipeline.pps_names -%} storage_class="{{ storage_class_name }}", {%- endif %} size='{{ pvc_size }}' + {%- if data_source_name %} + data_source="{{ data_source_name }}", + {%- endif %} ) _kale_volume = _kale_vop{{ loop.index }}.volume _kale_volume_step_names.append(_kale_vop{{ loop.index }}.name) diff --git a/labextension/src/lib/Commands.ts b/labextension/src/lib/Commands.ts index 1c3c110ca..54913436e 100644 --- a/labextension/src/lib/Commands.ts +++ b/labextension/src/lib/Commands.ts @@ -197,6 +197,19 @@ export default class Commands { ); }; + replaceGenericClonedVolumes = async ( + volumes: IVolumeMetadata[], + ) => { + return await _legacy_executeRpcAndShowRPCError( + this._notebook, + this._kernel, + 'snapshot.replace_cloned_volumes', + { + volumes, + }, + ); + }; + getMountedVolumes = async (currentNotebookVolumes: IVolumeMetadata[]) => { let notebookVolumes: IVolumeMetadata[] = await _legacy_executeRpcAndShowRPCError( this._notebook, diff --git a/labextension/src/widgets/LeftPanel.tsx b/labextension/src/widgets/LeftPanel.tsx index 3e5c8a1b1..7f794eb40 100644 --- a/labextension/src/widgets/LeftPanel.tsx +++ b/labextension/src/widgets/LeftPanel.tsx @@ -705,10 +705,7 @@ export class KubeflowKaleLeftPanel extends React.Component { this.setState({ runDeployment: false }); return; } - metadata.volumes = await commands.replaceClonedVolumes( - task.bucket, - task.result.event.object, - task.result.event.version, + metadata.volumes = await commands.replaceGenericClonedVolumes( metadata.volumes, ); }