-
Notifications
You must be signed in to change notification settings - Fork 96
171 lines (167 loc) · 7.43 KB
/
_produce-data.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
name: "[internal] Produce data for external analysis"
on:
workflow_call:
workflow_dispatch:
inputs:
test_workflow_run_id:
description: "Unique GitHub workflow run ID to use for data"
default: 12788722730
type: number
test_workflow_run_attempt:
description: "Run attempt of the workflow run"
default: 1
type: number
upload_data:
description: "Upload data to datastore cluster for our dashboard"
default: false
type: boolean
workflow_run:
workflows:
- "All post-commit tests"
- "(Single-card) Model perf tests"
- "(Single-card) Device perf tests"
- "(Single-card) Demo tests"
- "(Single-card) Tests for new models"
- "Nightly fast dispatch tests"
- "(Single-card) Nightly model and ttnn tests"
- "(Single-card) Tests for new models"
- "(T3K) T3000 demo tests"
- "(T3K) T3000 model perf tests"
- "(T3K) T3000 perplexity tests"
- "(T3K) T3000 model perf tests"
- "(T3K) T3000 profiler tests"
- "(T3K) T3000 unit tests"
- "(TG) TG unit tests"
- "(TG) TG demo tests"
- "(TG) TG frequent tests"
- "(TG) TG model perf tests"
- "(TGG) TGG model perf tests"
- "(TGG) TGG unit tests"
- "(TGG) TGG demo tests"
- "(TGG) TGG frequent tests"
- "ttnn - Run sweeps"
- "Blackhole post-commit tests"
- "Custom test dispatch"
types:
- completed
jobs:
produce-cicd-data:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Output (safe) pipeline values
run: |
echo "pipeline_id (id / run #): ${{ github.run_id }}/${{ github.run_attempt }}"
echo "submissions_ts: "
echo "start_ts: "
echo "end_ts: "
echo "name: ${{ github.workflow }}, but rk recommended name w/out @: ${{ github.workflow_ref }}"
echo "trigger: ${{ github.event_name }}"
echo "sha: ${{ github.sha }}"
echo "(triggering) author/actor: ${{ github.actor }}"
echo "author/actor: ${{ github.triggering_actor }}"
echo "orchestrator: github (Static)"
echo "docker_image: ${{ job.container.image }}"
echo "build duration is post-process"
- name: Get workflow run_id attempt number to analyze
id: get-run-id-and-attempt
shell: bash
run: |
event_name="${{ github.event_name }}"
if [[ "$event_name" == "workflow_dispatch" ]]; then
run_id="${{ inputs.test_workflow_run_id }}"
attempt_number="${{ inputs.test_workflow_run_attempt }}"
elif [[ "$event_name" == "workflow_run" ]]; then
run_id="${{ github.event.workflow_run.id }}"
attempt_number="${{ github.event.workflow_run.run_attempt }}"
[[ -z "$run_id" ]] && { echo "run_id is empty" ; exit 1; }
[[ -z "$attempt_number" ]] && { echo "attempt_number is empty" ; exit 1; }
else
echo "Unknown event name" && exit 1
fi
echo $run_id
echo $attempt_number
echo "run-id=$run_id" >> "$GITHUB_OUTPUT"
echo "attempt-number=$attempt_number" >> "$GITHUB_OUTPUT"
echo "::notice title=target-workflow-link::The workflow being analyzed is available at https://github.com/tenstorrent/tt-metal/actions/runs/$run_id/attempts/$attempt_number"
- name: Output auxiliary values
env:
GH_TOKEN: ${{ github.token }}
run: |
echo "[Info] Workflow run attempt"
gh api /repos/tenstorrent/tt-metal/actions/runs/${{ steps.get-run-id-and-attempt.outputs.run-id }}/attempts/${{ steps.get-run-id-and-attempt.outputs.attempt-number }}
gh api /repos/tenstorrent/tt-metal/actions/runs/${{ steps.get-run-id-and-attempt.outputs.run-id }}/attempts/${{ steps.get-run-id-and-attempt.outputs.attempt-number }} > workflow.json
echo "[Info] Workflow run attempt jobs"
gh api /repos/tenstorrent/tt-metal/actions/runs/${{ steps.get-run-id-and-attempt.outputs.run-id }}/attempts/${{ steps.get-run-id-and-attempt.outputs.attempt-number }}/jobs --paginate
# GitHub chunks the jobs array into multiple objects side-by-side if there are more than 100 jobs, so we need to slurp them into one giant object for later Python processing
gh api /repos/tenstorrent/tt-metal/actions/runs/${{ steps.get-run-id-and-attempt.outputs.run-id }}/attempts/${{ steps.get-run-id-and-attempt.outputs.attempt-number }}/jobs --paginate | jq -s '{total_count: .[0].total_count, jobs: map(.jobs) | add}' > workflow_jobs.json
- name: Collect workflow artifact and job logs
shell: bash
env:
GH_TOKEN: ${{ github.token }}
run: |
./infra/data_collection/github/download_cicd_logs_and_artifacts.sh --workflow-run-id ${{ steps.get-run-id-and-attempt.outputs.run-id }} --attempt-number ${{ steps.get-run-id-and-attempt.outputs.attempt-number }}
find generated/cicd/ -type f
- uses: actions/setup-python@v5
with:
python-version: '3.8'
cache: 'pip'
cache-dependency-path: 'infra/requirements-infra.txt'
- name: Install infra dependencies
run: pip install -r infra/requirements-infra.txt
- name: Create JSON
env:
PYTHONPATH: ${{ github.workspace }}
run: python3 .github/scripts/data_analysis/create_pipeline_json.py
- name: Show directory to see output files
run: ls -hal
- name: Upload cicd data
uses: ./.github/actions/upload-data-via-sftp
if: ${{ github.event_name == 'workflow_run' || inputs.upload_data }}
with:
ssh-private-key: ${{ secrets.SFTP_CICD_WRITER_KEY }}
sftp-batchfile: .github/actions/upload-data-via-sftp/cicd_data_batchfile.txt
username: ${{ secrets.SFTP_CICD_WRITER_USERNAME }}
hostname: ${{ secrets.SFTP_CICD_WRITER_HOSTNAME }}
- name: Upload workflow run data, even on failure
if: ${{ !cancelled() }}
uses: actions/upload-artifact@v4
with:
name: workflow-run-data
path: |
if-no-files-found: warn
path: |
pipelinecopy_*.json
workflow.json
workflow_jobs.json
- uses: ./.github/actions/slack-report
if: ${{ failure() }}
with:
slack_webhook_url: ${{ secrets.SLACK_WEBHOOK_URL }}
owner: U014XCQ9CF8 # tt-rkim
test-produce-complete-benchmark-with-environment:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5.0.0
with:
python-version: '3.10'
cache: 'pip'
cache-dependency-path: 'infra/requirements-infra.txt'
- name: Install infra dependencies
run: pip install -r infra/requirements-infra.txt
- name: create dummy partial benchmark JSON
shell: bash
env:
PYTHONPATH: ${{ github.workspace }}
run: |
mkdir -p generated/benchmark_data
python3 .github/scripts/data_analysis/create_dummy_partial_benchmark_json.py
- name: Create complete benchmark JSON test
env:
PYTHONPATH: ${{ github.workspace }}
ARCH_NAME: grayskull
run: python3 .github/scripts/data_analysis/create_benchmark_with_environment_json.py
- name: Show files
shell: bash
run: find generated/benchmark_data -type f | xargs -n 1 -I {} bash -c 'echo {} && cat {}'