Skip to content

Commit

Permalink
Save Databricks init scripts in the workspace [databricks] (#8961)
Browse files Browse the repository at this point in the history
* Save Databricks init scripts in the workspace [databricks]

To close: #8916

Init scripts in DBFS will be deprecated sept 1 2023.

Move Databricks init scripts in the workspace

Signed-off-by: Tim Liu <timl@nvidia.com>

* Change Databricks init scripts to workspace from dbfs for docs

Signed-off-by: Tim Liu <timl@nvidia.com>

* Reuse init scripts path

Signed-off-by: Tim Liu <timl@nvidia.com>

* Update copyright

Signed-off-by: Tim Liu <timl@nvidia.com>

* Move uploading and cleaning Databricks workspace initscripts into the common functions

Signed-off-by: Tim Liu <timl@nvidia.com>

---------

Signed-off-by: Tim Liu <timl@nvidia.com>
  • Loading branch information
NvTimLiu authored Aug 25, 2023
1 parent daedfe5 commit 69ac83c
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 16 deletions.
2 changes: 1 addition & 1 deletion docs/additional-functionality/rapids-shuffle.md
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ sudo dpkg -i ucx-1.14.0.deb ucx-cuda-1.14.0.deb &&
rm ucx-1.14.0-ubuntu20.04-mofed5-cuda11.tar.bz2 ucx-1.14.0.deb ucx-cuda-1.14.0.deb
```
Save the script in DBFS and add it to the "Init Scripts" list:
Save the script in Databricks workspace and add it to the "Init Scripts" list:
![Init scripts panel showing UCX init script](../img/Databricks/initscript_ucx.png)
Expand Down
Binary file modified docs/img/Databricks/initscript_ucx.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 5 additions & 9 deletions jenkins/Jenkinsfile-blossom.premerge-databricks
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ pipeline {
DATABRICKS_PUBKEY = credentials("SPARK_DATABRICKS_PUBKEY")
DATABRICKS_DRIVER = DbUtils.getDriver("$DB_TYPE")
DATABRICKS_WORKER = DbUtils.getWorker("$DB_TYPE")
INIT_SCRIPTS_DIR = "dbfs:/databricks/init_scripts/${BUILD_TAG}"
INIT_SCRIPTS_DIR = "/databricks/init_scripts/${BUILD_TAG}"
}

stages {
Expand Down Expand Up @@ -129,7 +129,7 @@ String getDbType() {
void databricksBuild() {
def CLUSTER_ID = ''
def SPARK_MAJOR = BASE_SPARK_VERSION_TO_INSTALL_DATABRICKS_JARS.replace('.', '')
def dbfs_path = "$INIT_SCRIPTS_DIR-$DB_TYPE"
def ws_path = "$INIT_SCRIPTS_DIR-$DB_TYPE"
try {
stage("Create $SPARK_MAJOR DB") {
script {
Expand All @@ -142,12 +142,8 @@ void databricksBuild() {

// handle init scripts if exist
if (env.INIT_SCRIPTS) {
sh "bash -c 'dbfs mkdirs $dbfs_path'"
env.INIT_SCRIPTS.split(',').each {
sh "bash -c 'dbfs cp --overwrite jenkins/databricks/${it} $dbfs_path'"
}
// foo.sh,bar.sh --> dbfs:/path/foo.sh,dbfs:/path/bar.sh
CREATE_PARAMS += " -f $dbfs_path/" + env.INIT_SCRIPTS.replace(',', ",$dbfs_path/")
// foo.sh,bar.sh --> /path/foo.sh,/path/bar.sh
CREATE_PARAMS += " -f " + DbUtils.uploadFiles(this, env.INIT_SCRIPTS, ws_path)
}

CLUSTER_ID = sh(script: "python3 ./jenkins/databricks/create.py $CREATE_PARAMS",
Expand Down Expand Up @@ -194,7 +190,7 @@ void databricksBuild() {
container('cpu') {
retry(3) {
if (env.INIT_SCRIPTS) {
sh "bash -c 'dbfs rm -r $dbfs_path'"
DbUtils.cleanUp(this, ws_path)
}
sh "python3 ./jenkins/databricks/shutdown.py -s $DATABRICKS_HOST -t $DATABRICKS_TOKEN -c $CLUSTER_ID -d"
}
Expand Down
4 changes: 2 additions & 2 deletions jenkins/databricks/clusterutils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Copyright (c) 2020-2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,7 +52,7 @@ def generate_create_templ(sshKey, cluster_name, runtime, idle_timeout,
for path in path_list:
templ['init_scripts'].append(
{
'dbfs' : {
'workspace' : {
'destination' : path
}
}
Expand Down
2 changes: 1 addition & 1 deletion jenkins/databricks/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def main():
worker_type = 'g4dn.xlarge'
driver_type = 'g4dn.xlarge'
cloud_provider = 'aws'
# comma separated init scripts, e.g. dbfs:/foo,dbfs:/bar,...
# comma separated init scripts in Databricks workspace, e.g. /foo,/bar,...
init_scripts = ''
aws_zone='us-west-2c'

Expand Down
4 changes: 2 additions & 2 deletions jenkins/databricks/init_cuda11_runtime.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash
#
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,7 +16,7 @@
#

# The init script to install cuda11.0 toolkit
# Will be automatically pushed into the dbfs:/databricks/init_scripts once it is updated.
# Will be automatically pushed into the Databricks workspace: /databricks/init_scripts once it is updated.

wget http://developer.download.nvidia.com/compute/cuda/11.0.2/local_installers/cuda_11.0.2_450.51.05_linux.run

Expand Down
2 changes: 1 addition & 1 deletion jenkins/databricks/init_cudf_udf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#

# The initscript to set up environment for the cudf_udf tests on Databricks
# Will be automatically pushed into the dbfs:/databricks/init_scripts once it is updated.
# Will be automatically pushed into the Databricks Workspace: /databricks/init_scripts/ once it is updated.

set -ex

Expand Down

0 comments on commit 69ac83c

Please sign in to comment.