Skip to content

Cron

Cron #284

Workflow file for this run

name: Cron
on:
schedule:
# Set any time that you'd like scrapers to run (in UTC)
- cron: "1 6 * * *"
workflow_dispatch:
env:
CI: true
PYTHON_VERSION: 3.11
PIPENV_VENV_IN_PROJECT: true
SCRAPY_SETTINGS_MODULE: city_scrapers.settings.prod
WAYBACK_ENABLED: true
AUTOTHROTTLE_MAX_DELAY: 30.0
AUTOTHROTTLE_START_DELAY: 1.5
AUTOTHROTTLE_TARGET_CONCURRENCY: 3.0
AZURE_ACCOUNT_KEY: ${{ secrets.AZURE_ACCOUNT_KEY }}
AZURE_ACCOUNT_NAME: ${{ secrets.AZURE_ACCOUNT_NAME }}
AZURE_CONTAINER: ${{ secrets.AZURE_CONTAINER }}
SENTRY_DSN: ${{ secrets.SENTRY_DSN }}
jobs:
crawl:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v5
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Pipenv
run: pip install --user pipenv
- name: Cache Python dependencies
uses: actions/cache@v4
with:
path: .venv
key: ${{ env.PYTHON_VERSION }}-${{ hashFiles('**/Pipfile.lock') }}
restore-keys: |
${{ env.PYTHON_VERSION }}-
pip-
- name: Install dependencies
run: pipenv sync
env:
PIPENV_DEFAULT_PYTHON_VERSION: ${{ env.PYTHON_VERSION }}
- name: Run scrapers
run: |
export PYTHONPATH=$(pwd):$PYTHONPATH
./.deploy.sh
- name: Combine output feeds
run: |
export PYTHONPATH=$(pwd):$PYTHONPATH
pipenv run scrapy combinefeeds -s LOG_ENABLED=False
- name: Prevent workflow deactivation
uses: gautamkrishnar/keepalive-workflow@v1
with:
committer_username: "citybureau-bot"
committer_email: "documenters@citybureau.org"