2024 modelling data update #3436
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: build-and-test-dbt | |
on: | |
pull_request: | |
branches: [master] | |
push: | |
branches: [master] | |
workflow_dispatch: | |
inputs: | |
models: | |
required: true | |
description: A space-separated list of dbt models | |
default: 'default.vw_pin_sale default.vw_pin_universe' | |
type: string | |
jobs: | |
build-and-test-dbt: | |
runs-on: ubuntu-latest | |
# These permissions are needed to interact with GitHub's OIDC Token endpoint | |
# so that we can authenticate with AWS | |
permissions: | |
id-token: write | |
contents: read | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Setup dbt | |
uses: ./.github/actions/setup_dbt | |
with: | |
role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }} | |
role-duration-seconds: 14400 # Worst-case time for full build | |
- name: Restore dbt state cache | |
id: cache | |
uses: ./.github/actions/restore_dbt_cache | |
with: | |
path: ${{ env.PROJECT_DIR }}/${{ env.STATE_DIR }} | |
key: ${{ env.CACHE_KEY }} | |
- name: Set command args for building/testing resources | |
run: | | |
if [[ $CACHE_HIT == 'true' ]]; then | |
echo "MODIFIED_RESOURCES_ONLY=true" >> "$GITHUB_ENV" | |
fi | |
if [[ $EVENT_NAME == 'workflow_dispatch' ]]; then | |
echo "MANUALLY_DISPATCHED=true" >> "$GITHUB_ENV" | |
fi | |
shell: bash | |
env: | |
CACHE_HIT: ${{ steps.cache.outputs.cache-hit }} | |
EVENT_NAME: ${{ github.event_name }} | |
- name: Test dbt macros | |
run: dbt run-operation test_all | |
working-directory: ${{ env.PROJECT_DIR }} | |
shell: bash | |
- name: Deploy model dependencies | |
run: ../.github/scripts/deploy_dbt_model_dependencies.sh | |
working-directory: ${{ env.PROJECT_DIR }} | |
shell: bash | |
- name: Build models | |
run: | | |
if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then | |
if [[ $MANUALLY_DISPATCHED == 'true' ]]; then | |
echo "Running build on manually selected resources" | |
dbt build -t "$TARGET" -s ${{ inputs.models }} --defer --state "$STATE_DIR" --resource-types model seed | |
elif [[ $TARGET == 'prod' ]]; then | |
# Rebuild children of modified resources in prod, since schema | |
# changes might cause those children to become stale. | |
# Note that we only rebuild children if we're in prod, since | |
# otherwise we might end up running unnecessary builds of | |
# big downstream tables on every commit. We also only do this for | |
# modified resources and not new resources, since new resources | |
# can't accidentally modify downstream model schemas without | |
# those child models _also_ being modified | |
echo "Running build on modified/new resources and their children" | |
dbt build -t "$TARGET" -s state:modified+ state:new --state "$STATE_DIR" --resource-types model seed | |
else | |
echo "Running build on modified/new resources only" | |
dbt build -t "$TARGET" -s state:modified state:new --defer --state "$STATE_DIR" --resource-types model seed | |
fi | |
else | |
echo "Running build on all resources" | |
dbt build -t "$TARGET" --resource-types model seed | |
fi | |
working-directory: ${{ env.PROJECT_DIR }} | |
shell: bash | |
- name: Test models | |
run: | | |
if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then | |
if [[ $MANUALLY_DISPATCHED == 'true' ]]; then | |
echo "Running tests on manually selected resources" | |
dbt test -t "$TARGET" -s ${{ inputs.models }} --exclude "tag:data_test_iasworld" --defer --state "$STATE_DIR" | |
else | |
echo "Running tests on modified/new resources only" | |
dbt test -t "$TARGET" -s state:modified state:new --exclude "tag:data_test_iasworld" --defer --state "$STATE_DIR" | |
fi | |
else | |
echo "Running tests on all resources" | |
dbt test -t "$TARGET" --exclude "tag:data_test_iasworld" | |
fi | |
working-directory: ${{ env.PROJECT_DIR }} | |
shell: bash | |
- if: github.ref == 'refs/heads/master' && success() | |
name: Update dbt state cache | |
uses: ./.github/actions/save_dbt_cache | |
with: | |
path: ${{ env.PROJECT_DIR }}/${{ env.TARGET_DIR }} | |
key: ${{ env.CACHE_KEY }} |