build: Use python script to create and push dataset to huggingface #6
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Push Dataset and Finetune | |
on: | |
push: | |
branches: | |
- "dev" | |
jobs: | |
prepare-dataset: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v2 | |
- name: Set up Python | |
uses: actions/setup-python@v2 | |
with: | |
python-version: '3.10' | |
- name: Install dependencies for conversion to parquet | |
run: | | |
sudo apt-get install python3-pandas | |
pip install pandas | |
pip install pyarrow | |
pip install datasets | |
pip install huggingface_hub | |
pip install python-decouple | |
- name: Convert json to parquet and save it in huggingface | |
run: | | |
python3 parquet_dataset_generator.py | |
# finetune: | |
# runs-on: ubuntu-latest | |
# steps: | |
# - name: Checkout repository | |
# uses: actions/checkout@v2 | |
# - name: Set up Python | |
# uses: actions/setup-python@v2 | |
# with: | |
# python-version: '3.10' | |
# - name: Install dependencies | |
# run: | | |
# pip install papermill | |
# - name: Execute Notebook | |
# run: | | |
# papermill path/to/your/notebook.ipynb path/to/your/output-notebook.ipynb |