From f8c1047333b52ae09aea0318d9f91ad73fbedd32 Mon Sep 17 00:00:00 2001 From: Ashish Sam T George <77051732+getwithashish@users.noreply.github.com> Date: Thu, 1 Aug 2024 14:18:08 +0530 Subject: [PATCH] build: Use python script to create and push dataset to huggingface --- .github/workflows/push-dataset.yml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/.github/workflows/push-dataset.yml b/.github/workflows/push-dataset.yml index 0872b22..846cc27 100644 --- a/.github/workflows/push-dataset.yml +++ b/.github/workflows/push-dataset.yml @@ -23,20 +23,14 @@ jobs: sudo apt-get install python3-pandas pip install pandas pip install pyarrow + pip install datasets + pip install huggingface_hub + pip install python-decouple - - name: Convert json to parquet + - name: Convert json to parquet and save it in huggingface run: | python3 parquet_dataset_generator.py - - name: Install dependencies to push to huggingface - run: | - pip install datasets huggingface-hub - - - name: Push to huggingface - run: | - datasets-cli login --token ${{ secrets.HF_TOKEN }} - datasets-cli upload ./internal_dataset.parquet getwithashish/internal-dept-dataset - # finetune: # runs-on: ubuntu-latest