.github/workflows/push-dataset.yml

name: Push Dataset and Finetune

on:
  push:
    branches:
      - "dataset"

jobs:
  prepare-dataset:
    runs-on: ubuntu-latest
    environment: huggingface

    steps:
      - name: Checkout repository
        uses: actions/checkout@v2

      - name: Set up Python
        uses: actions/setup-python@v2
        with:
          python-version: "3.10"

      - name: Install dependencies for conversion to parquet
        run: |
          sudo apt-get install python3-pandas
          pip install pandas
          pip install pyarrow
          pip install datasets
          pip install huggingface_hub
          pip install python-decouple

      - name: Add huggingface token to .env
        run: |
          echo "================="
          echo "Add environment variables to .env file"
          echo "================="
          touch .env
          echo "HF_TOKEN=${{ secrets.HF_TOKEN }}" >> .env

      - name: Convert json to parquet and save it in huggingface
        run: |
          python3 parquet_dataset_generator.py

  # finetune-ai-model:
  #   steps:
  #     - name: Install dependencies for creating Vast.ai instance, Create the vast.ai instance
  #       run: |
  #         pip install --upgrade vastai

  #     - name: Set up API key of Vast.ai
  #       run: |
  #         vastai set api-key ${{ secrets.VASTAI_TOKEN }}
      
  #     - name: Find the instance according to the requirements
  #       run: |