2024 modelling data update #3436

Workflow file for this run

.github/workflows/build_and_test_dbt.yaml at 7784c29

	name: build-and-test-dbt

	on:
	pull_request:
	branches: [master]
	push:
	branches: [master]

	workflow_dispatch:
	inputs:
	models:
	required: true
	description: A space-separated list of dbt models
	default: 'default.vw_pin_sale default.vw_pin_universe'
	type: string

	jobs:
	build-and-test-dbt:
	runs-on: ubuntu-latest
	# These permissions are needed to interact with GitHub's OIDC Token endpoint
	# so that we can authenticate with AWS
	permissions:
	id-token: write
	contents: read
	steps:
	- name: Checkout
	uses: actions/checkout@v4

	- name: Setup dbt
	uses: ./.github/actions/setup_dbt
	with:
	role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }}
	role-duration-seconds: 14400 # Worst-case time for full build

	- name: Restore dbt state cache
	id: cache
	uses: ./.github/actions/restore_dbt_cache
	with:
	path: ${{ env.PROJECT_DIR }}/${{ env.STATE_DIR }}
	key: ${{ env.CACHE_KEY }}

	- name: Set command args for building/testing resources
	run: \|
	if [[ $CACHE_HIT == 'true' ]]; then
	echo "MODIFIED_RESOURCES_ONLY=true" >> "$GITHUB_ENV"
	fi
	if [[ $EVENT_NAME == 'workflow_dispatch' ]]; then
	echo "MANUALLY_DISPATCHED=true" >> "$GITHUB_ENV"
	fi
	shell: bash
	env:
	CACHE_HIT: ${{ steps.cache.outputs.cache-hit }}
	EVENT_NAME: ${{ github.event_name }}

	- name: Test dbt macros
	run: dbt run-operation test_all
	working-directory: ${{ env.PROJECT_DIR }}
	shell: bash

	- name: Deploy model dependencies
	run: ../.github/scripts/deploy_dbt_model_dependencies.sh
	working-directory: ${{ env.PROJECT_DIR }}
	shell: bash

	- name: Build models
	run: \|
	if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then
	if [[ $MANUALLY_DISPATCHED == 'true' ]]; then
	echo "Running build on manually selected resources"
	dbt build -t "$TARGET" -s ${{ inputs.models }} --defer --state "$STATE_DIR" --resource-types model seed
	elif [[ $TARGET == 'prod' ]]; then
	# Rebuild children of modified resources in prod, since schema
	# changes might cause those children to become stale.
	# Note that we only rebuild children if we're in prod, since
	# otherwise we might end up running unnecessary builds of
	# big downstream tables on every commit. We also only do this for
	# modified resources and not new resources, since new resources
	# can't accidentally modify downstream model schemas without
	# those child models _also_ being modified
	echo "Running build on modified/new resources and their children"
	dbt build -t "$TARGET" -s state:modified+ state:new --state "$STATE_DIR" --resource-types model seed
	else
	echo "Running build on modified/new resources only"
	dbt build -t "$TARGET" -s state:modified state:new --defer --state "$STATE_DIR" --resource-types model seed
	fi
	else
	echo "Running build on all resources"
	dbt build -t "$TARGET" --resource-types model seed
	fi
	working-directory: ${{ env.PROJECT_DIR }}
	shell: bash

	- name: Test models
	run: \|
	if [[ $MODIFIED_RESOURCES_ONLY == 'true' ]]; then
	if [[ $MANUALLY_DISPATCHED == 'true' ]]; then
	echo "Running tests on manually selected resources"
	dbt test -t "$TARGET" -s ${{ inputs.models }} --exclude "tag:data_test_iasworld" --defer --state "$STATE_DIR"
	else
	echo "Running tests on modified/new resources only"
	dbt test -t "$TARGET" -s state:modified state:new --exclude "tag:data_test_iasworld" --defer --state "$STATE_DIR"
	fi
	else
	echo "Running tests on all resources"
	dbt test -t "$TARGET" --exclude "tag:data_test_iasworld"
	fi
	working-directory: ${{ env.PROJECT_DIR }}
	shell: bash

	- if: github.ref == 'refs/heads/master' && success()
	name: Update dbt state cache
	uses: ./.github/actions/save_dbt_cache
	with:
	path: ${{ env.PROJECT_DIR }}/${{ env.TARGET_DIR }}
	key: ${{ env.CACHE_KEY }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

2024 modelling data update #3436

Workflow file

2024 modelling data update #3436

Jobs

Run details

Workflow file for this run