From c641c60201e1c937b67c4b97f924147e2e269489 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Fri, 15 Mar 2024 18:39:32 +0000 Subject: [PATCH] Remove dataset download twice --- sdk/python/kubeflow/storage_initializer/hugging_face.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sdk/python/kubeflow/storage_initializer/hugging_face.py b/sdk/python/kubeflow/storage_initializer/hugging_face.py index 66b91392d4..0d4d344aab 100644 --- a/sdk/python/kubeflow/storage_initializer/hugging_face.py +++ b/sdk/python/kubeflow/storage_initializer/hugging_face.py @@ -102,11 +102,6 @@ def download_dataset(self): if self.config.access_token: huggingface_hub.login(self.config.access_token) - load_dataset(self.config.repo_id, cache_dir=VOLUME_PATH_DATASET) - # Load dataset and save to disk. - dataset = load_dataset( - self.config.repo_id, - split=self.config.split, - ) + dataset = load_dataset(self.config.repo_id, split=self.config.split) dataset.save_to_disk(VOLUME_PATH_DATASET)