Skip to content

pull_data

pull_data #92170

Workflow file for this run

name: pull_data
on:
schedule:
- cron: "*/10 * * * *"
workflow_run:
workflows: ["Process Google Sheet Edits"]
types:
- completed
push:
paths:
- '.github/workflows/pull_data.yml'
env:
id: '1bdyhGrj0oNz-_qW3Rv2GNGqhZZ73rgj-DYWePLA_1Ms'
remote: 'Innovation-Information-Initiative/Open-Innovation-Dataset-Index'
branch: main
jobs:
pull_sheet:
runs-on: ubuntu-latest
name: Pulling regular updates from the I³ Essential Innovation Datasets sheet
steps:
- name: checkout repository
uses: actions/checkout@v3
with:
ssh-key: "${{ secrets.COMMIT_KEY }}"
- name: Install Python 3
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: set up Python environment
run: |
python3 -m pip install --upgrade pip
python3 -m pip install -v gspread==4.0.1
python3 -m pip install gspread_dataframe
python3 -m pip install pandas
python3 -m pip install furl
python3 -m pip install Pillow
python3 -m pip install python-frontmatter
python3 -m pip install six
- name: Set Up GCP Credentials
uses: nodes-app/action-google-credentials@v1
with:
service-account-key: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }}
- name: Pull GSheet data and generate markdown
id: 'sheet_to_csv'
env:
INPUT_CREDS: ${{ secrets.GSHEET_CREDENTIALS }}
ENV: 'prod'
INPUT_SHEETS: |
[
{ "id": "${{ env.id }}", "title": "Open_Innovation_Datasets", "directory": "datasets" },
{ "id": "${{ env.id }}", "title": "Innovation_Data_Toolkit", "directory": "tools" },
{ "id": "${{ env.id }}", "title": "Data_Publishing_Platform_Guide", "directory": "platforms" },
{ "id": "${{ env.id }}", "title": "Salient_Fields" }
]
INPUT_TEMPDIR: 'index_archive'
run: python3 scripts/pull_data.py
- name: Check if files changed
id: check_diff
run: |
echo "list changed files:"
git diff --name-only
echo "check paths of new and changed files:"
git diff --name-only > files.txt
git status --porcelain | grep -e '^??' | sed -e 's/^?? //g' >> files.txt
while IFS= read -r file
do
echo "$file"
if [[ $file == *.csv ]] || [[ $file == *.md ]]; then
echo "updated files"
echo "::set-output name=commit_files::true"
fi
done < files.txt
- name: Commit files
if: steps.check_diff.outputs.commit_files == 'true'
run: |
git pull
git config user.name github-actions
git config user.email github-actions@github.com
git add index_archive datasets tools app/static/assets/thumbnails
git commit -m "generated files based on google sheet"
git push