From 6717f9358ac3de0c0e73f0ce930039dd66ca0c46 Mon Sep 17 00:00:00 2001 From: Andrey Velichkevich Date: Fri, 15 Mar 2024 18:39:54 +0000 Subject: [PATCH] Remove dataset download twice Signed-off-by: Andrey Velichkevich --- sdk/python/kubeflow/storage_initializer/hugging_face.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sdk/python/kubeflow/storage_initializer/hugging_face.py b/sdk/python/kubeflow/storage_initializer/hugging_face.py index 66b91392d4..0d4d344aab 100644 --- a/sdk/python/kubeflow/storage_initializer/hugging_face.py +++ b/sdk/python/kubeflow/storage_initializer/hugging_face.py @@ -102,11 +102,6 @@ def download_dataset(self): if self.config.access_token: huggingface_hub.login(self.config.access_token) - load_dataset(self.config.repo_id, cache_dir=VOLUME_PATH_DATASET) - # Load dataset and save to disk. - dataset = load_dataset( - self.config.repo_id, - split=self.config.split, - ) + dataset = load_dataset(self.config.repo_id, split=self.config.split) dataset.save_to_disk(VOLUME_PATH_DATASET)