-
Notifications
You must be signed in to change notification settings - Fork 4
146 lines (133 loc) · 5.17 KB
/
test_dbt_models.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
name: test-dbt-models
# This workflow is not configured to run on PRs, because PRs have their own CI
# process defined by the `build-and-test-dbt` workflow. It runs under two
# circumstances:
#
# 1. When manually triggered by the Data Department's Spark data extraction
# process upon completion. This process runs iasWorld data tests. See
# https://github.com/ccao-data/service-sqoop-iasworld
#
# 2. On the cron schedule set below, which runs non-iasWorld data tests on
# a weekly schedule to alert the team of any problems proactively
on:
workflow_dispatch:
inputs:
select:
description: >
Optional space-separated list of tests to run (defaults to all
non-iasWorld data tests)
required: false
type: string
selector:
description: >
Optional dbt selector representing tests to run (takes precedence
over the above list of tests if both are present)
required: false
type: string
upload_test_results:
description: Upload test results to S3
required: false
default: false
type: boolean
schedule:
# Every Monday at 11am UTC (6am UTC-5)
- cron: '0 11 * * 1'
env:
UV_SYSTEM_PYTHON: 1
jobs:
test-dbt-models:
runs-on: ubuntu-latest
# These permissions are needed to interact with GitHub's OIDC Token endpoint
# so that we can authenticate with AWS
permissions:
id-token: write
contents: read
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Validate and parse input variables
id: parse-inputs
run: |
# Default to running all tests
SELECT_OPTION="--selector select_data_test_non_iasworld"
if [[ -n "$SELECTOR" ]]; then
SELECT_OPTION="--selector $SELECTOR"
elif [[ -n "$SELECT" ]]; then
SELECT_OPTION="--select $SELECT"
fi
echo "Setting select option to '$SELECT_OPTION'"
echo "select-option=$SELECT_OPTION" >> "$GITHUB_OUTPUT"
shell: bash
env:
SELECT: ${{ inputs.select }}
SELECTOR: ${{ inputs.selector }}
- name: Setup dbt
uses: ./.github/actions/setup_dbt
with:
role-to-assume: ${{ secrets.AWS_IAM_ROLE_TO_ASSUME_ARN }}
- name: Restore dbt state cache
id: cache
uses: ./.github/actions/restore_dbt_cache
with:
path: ${{ env.PROJECT_DIR }}/${{ env.STATE_DIR }}
key: ${{ env.CACHE_KEY }}
- name: Install Python dependencies
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
run: uv pip install ".[dbt_tests]"
- name: Run tests
run: |
DEFER_OPTION=""
if [[ "$CACHE_HIT" == 'true' ]]; then
DEFER_OPTION="--defer --state $STATE_DIR"
fi
# shellcheck disable=SC2086
python scripts/run_iasworld_data_tests.py \
--target "$TARGET" \
--output-dir ./qc_test_results/ \
$SELECT_OPTION \
$SKIP_ARTIFACTS_OPTION \
$DEFER_OPTION
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
env:
USER: ${{ github.triggering_actor }}
GIT_SHA: ${{ github.sha }}
GIT_REF: ${{ github.ref_name }}
GIT_AUTHOR: ${{ github.event.commits[0].author.name }}
CACHE_HIT: ${{ steps.cache.outputs.cache-hit }}
SELECT_OPTION: ${{ steps.parse-inputs.outputs.select-option }}
SKIP_ARTIFACTS_OPTION: ${{ inputs.upload_test_results && '--no-skip-artifacts' || '--skip-artifacts' }}
- name: Save test results to S3
if: inputs.upload_test_results
run: |
s3_prefix="s3://ccao-data-warehouse-us-east-1/qc"
local_prefix="qc_test_results/metadata"
for dir in "test_run" "test_run_result" "test_run_failing_row"; do
dirpath="${local_prefix}/${dir}"
if [ -e "$dirpath" ]; then
echo "Copying ${dirpath} metadata to S3"
aws s3 sync "$dirpath" "${s3_prefix}/${dir}"
fi
done
crawler_name="ccao-data-warehouse-qc-crawler"
aws glue start-crawler --name "$crawler_name"
echo "Triggered Glue crawler $crawler_name"
working-directory: ${{ env.PROJECT_DIR }}
shell: bash
- name: Get current time
if: failure()
run: echo "TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%S")" >> "$GITHUB_ENV"
shell: bash
# Only triggered when called by a bot. Otherwise, notifications
# go to whoever called the workflow
- name: Send failure notification
if: github.event_name == 'workflow_dispatch' && github.triggering_actor == 'sqoop-bot[bot]' && failure()
uses: ./.github/actions/publish_sns_topic
with:
sns_topic_arn: ${{ secrets.AWS_SNS_NOTIFICATION_TOPIC_ARN }}
subject: "dbt tests failed for workflow run: ${{ github.run_id }}"
body: |
dbt tests failed for workflow ${{ github.run_id }}, run on ${{ env.TIMESTAMP }} UTC
Link to failing workflow:
https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}