Skip to content

Daily refresh

Daily refresh #874

Workflow file for this run

name: Daily refresh
on:
schedule:
- cron: "10 15 * * *"
push:
branches:
- daily
workflow_dispatch:
inputs:
short_circuit:
type: boolean
description: "Short-circuit unchanged stages"
default: true
refresh_data:
type: boolean
description: "Refresh data (from NJSP)"
default: true
harmonize_muni_codes:
type: boolean
description: "Harmonize county/muni codes"
default: false
update_pqts:
type: boolean
description: "Update parquets (in this repo)"
default: true
update_slack_sha:
description: "\"Refresh NJSP data\" commit hash (for posting updates to Slack)"
update_crash_log:
type: boolean
description: "Update crash log in AWS"
default: true
update_plots:
type: boolean
description: "Update plots"
default: true
post_to_slack:
type: boolean
description: "Post to Slack"
default: true
build_www:
type: boolean
description: "Build www"
default: true
slack_channel_id:
description: "Slack channel override"
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: us-east-1
GH_TOKEN: ${{ secrets.GH_TOKEN }}
jobs:
refresh_data:
name: Refresh NJSP data, dispatch www rebuild (if necessary)
runs-on: ubuntu-latest
steps:
- name: Print github.event
run: |
echo "event: ${{ toJson(github.event) }}"
- name: Print inputs
run: |
echo "inputs: ${{ toJson(inputs) }}"
- name: Setup Python?
id: needs_python
run: echo "y=${{ github.event.schedule || inputs.refresh_data || inputs.harmonize_muni_codes || inputs.update_pqts || inputs.update_slack_sha || inputs.update_plots || inputs.post_to_slack || '' }}" | tee -a $GITHUB_OUTPUT
- uses: actions/checkout@v4
if: steps.needs_python.outputs.y || inputs.build_www
with:
ref: ${{ github.ref_name }}
- uses: webfactory/ssh-agent@v0.9.0
if: steps.needs_python.outputs.y
with:
ssh-private-key: ${{ secrets.GHA_DEPLOY_KEY }}
- uses: actions/setup-python@v5
if: steps.needs_python.outputs.y
with:
python-version: 3.11.8
cache: pip
- run: pip install -e .
if: steps.needs_python.outputs.y
- name: Configure Git author
if: steps.needs_python.outputs.y
run: |
git config --global user.name 'GitHub Actions'
git config --global user.email 'ryan-williams@users.noreply.github.com'
- name: Refresh data
id: refresh_data
if: github.event.schedule || inputs.refresh_data
run: njsp -cc refresh_data
- name: Harmonize county/muni codes
id: harmonize_muni_codes
if: github.event.schedule || inputs.harmonize_muni_codes
run: njsp -cc harmonize_muni_codes
- name: Update parquets
id: update_pqts
if: (github.event.schedule || inputs.update_pqts) && (steps.refresh_data.outputs.sha || !inputs.short_circuit)
run: njsp -cc update_pqts --s3
- name: Compute Slack update SHA
# Use the relevant input SHA, if present, otherwise the output SHA from the `refresh_data` step above
id: update_slack_sha
if: inputs.update_slack_sha || steps.refresh_data.outputs.sha
run: |
echo "sha=${{ inputs.update_slack_sha || steps.refresh_data.outputs.sha }}" >> $GITHUB_OUTPUT
- name: Update crash log
if: inputs.update_crash_log || steps.refresh_data.outputs.sha
run: njsp crash_log compute --s3 -v
- name: Compute plot_data.changed
id: plot_data
if: (github.event.schedule || inputs.update_plots) && (steps.update_slack_sha.outputs.sha || !inputs.short_circuit)
run: echo "changed=1" >> $GITHUB_OUTPUT
- name: Refresh annual summaries
if: steps.plot_data.outputs.changed
run: njsp -cc refresh_summaries
- name: "Fetch ≈1yr of history"
if: steps.plot_data.outputs.changed
run: |
year=$(date +%Y)
let prv_year=year-1
since="$(date --date="$(date +%Y-%m-%d) -375 day" +%Y-%m-%d)"
echo "Fetching commits since $since"
git fetch --shallow-since "$since" origin ${{ github.ref_name }}
echo "Fetched $(git rev-list --count) revisions"
- name: Update YTD / ROY projections
if: steps.plot_data.outputs.changed
run: njsp -cc update_projections
- name: Update plot data
id: update_plots
if: steps.plot_data.outputs.changed
run: njsp -cc update_plots
- name: Post to Slack
id: post_to_slack
if: (github.event.schedule || inputs.post_to_slack) && (steps.update_slack_sha.outputs.sha || !inputs.short_circuit)
run: njsp slack sync ${{ steps.update_slack_sha.outputs.sha }}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
SLACK_CHANNEL_ID: ${{ inputs.slack_channel_id || secrets.SLACK_CHANNEL_ID }}
- name: Decide whether to rebuild www
id: build_www
if: (github.event.schedule || inputs.build_www) && (steps.update_plots.outputs.sha || !inputs.short_circuit)
run: |
echo "run=1" >> $GITHUB_OUTPUT
echo "sha=$(git log -1 --format=%H)" >> $GITHUB_OUTPUT
outputs:
update_pqts: ${{ steps.update_pqts.outputs.sha }}
build_www: ${{ steps.build_www.outputs.run }}
sha: ${{ steps.build_www.outputs.sha }}
rebuild_www:
name: Rebuild www
needs: refresh_data
if: needs.refresh_data.outputs.build_www
uses: ./.github/workflows/www.yml
secrets: inherit
with:
ref: ${{ needs.refresh_data.outputs.sha }}