generated from actions/typescript-action
-
Notifications
You must be signed in to change notification settings - Fork 0
469 lines (459 loc) · 17.6 KB
/
test.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
name: "build-test"
on: # rebuild any PRs and main branch changes
pull_request:
push:
branches:
- main
- "releases/*"
concurrency:
group: test-${{ github.ref }}
cancel-in-progress: true
env:
OSS_ACCESS_KEY_ID: ${{ secrets.OSS_ACCESS_KEY_ID }}
OSS_ACCESS_KEY_SECRET: ${{ secrets.OSS_ACCESS_KEY_SECRET }}
ONEFLOW_REF: master
ONEFLOW_CI_BUILD_PARALLEL: 32
jobs:
build: # make sure build/ci work properly
name: "Build and test this repo"
runs-on: ubuntu-latest
env:
ONEFLOW_SRC: oneflow-src
steps:
- uses: actions/checkout@v2
- name: Checkout Oneflow-Inc/oneflow
uses: actions/checkout@v2
with:
repository: Oneflow-Inc/oneflow
path: ${{ env.ONEFLOW_SRC }}
- run: |
npm install
- run: |
npm run all
test-build-oneflow:
name: "Build OneFlow Conda"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
max-parallel: 5
matrix:
entry: [gh-hosted, self-hosted]
include:
- entry: gh-hosted
os: [ubuntu-latest]
- entry: self-hosted
os: ["self-hosted", "linux", "provision"]
env:
ONEFLOW_SRC: oneflow-src
steps:
- name: Fix permissions
if: ${{ contains(matrix.os, 'self-hosted') }}
run: |
set -x
docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) .
- name: Remove leftover cuda-installer.log
if: ${{ contains(matrix.os, 'self-hosted') }}
run: |
docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log
- uses: actions/checkout@v2
- name: Checkout Oneflow-Inc/oneflow
uses: actions/checkout@v2
with:
repository: Oneflow-Inc/oneflow
ref: ${{ env.ONEFLOW_REF }}
path: ${{ env.ONEFLOW_SRC }}
- uses: ./cache-complete
name: Save cache if successful
id: save-cache
timeout-minutes: 5
with:
oneflow-src: ${{ env.ONEFLOW_SRC }}
entry: build-with-clang
digest-type: build
mark-as-completed: ${{ github.event.pull_request.head.repo.full_name == github.repository }}
- name: Checkout Oneflow-Inc/conda-env
if: ${{ !matrix.cache-hit }}
uses: actions/checkout@v2
with:
repository: Oneflow-Inc/conda-env
ref: 30a7f00eb48ee9009d85a848e720823e5054c66b
path: conda-env
- uses: ./
name: Test conda on gh-hosted
if: ${{ contains(matrix.os, 'ubuntu-latest') && !matrix.cache-hit }}
with:
cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/gh-hosted/cpu-clang.cmake
oneflow-src: ${{ env.ONEFLOW_SRC }}
oneflow-build-env: conda
conda-env-file: conda-env/dev/clang10/environment-v2.yml
conda-env-name: oneflow-dev-clang10-v2
- uses: ./
name: Test conda on self-hosted
if: ${{ contains(matrix.os, 'self-hosted') && !matrix.cache-hit }}
with:
cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/cn/fast/cpu-clang.cmake
oneflow-src: ${{ env.ONEFLOW_SRC }}
oneflow-build-env: conda
conda-env-file: conda-env/dev/clang10/environment-v2.yml
conda-installer-url: https://oneflow-static.oss-cn-beijing.aliyuncs.com/downloads/conda-installers/Miniconda3-py39_4.10.3-Linux-x86_64.sh
conda-prefix: ~/miniconda3-prefixes/py39_4.10.3
self-hosted: ${{ contains(matrix.os, 'self-hosted') }}
conda-env-name: oneflow-dev-clang10-v2
parallel: ${{ env.ONEFLOW_CI_BUILD_PARALLEL }}
find-build-cache:
name: "Find build cache"
runs-on: ubuntu-latest
env:
ONEFLOW_SRC: oneflow-src
outputs:
matrix: ${{ steps.find-cache.outputs.matrix }}
steps:
- uses: actions/checkout@v2
- name: Checkout Oneflow-Inc/oneflow
uses: actions/checkout@v2
with:
repository: Oneflow-Inc/oneflow
ref: ${{ env.ONEFLOW_REF }}
path: oneflow-src
- uses: ./cache-complete/matrix/build
name: find cache
id: find-cache
with:
delete-cache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }}
runner-labels: |
self-hosted
linux
provision
oneflow-src: ${{ env.ONEFLOW_SRC }}
entries: |
cu102
cpu
cu102_xla
test-build-manylinux:
name: "Build OneFlow manylinux"
runs-on: ${{ matrix.runs-on }}
needs: [find-build-cache]
concurrency:
group: test-build-manylinux-${{ matrix.entry }}-${{ matrix.build-digest }}
cancel-in-progress: true
strategy:
fail-fast: false
max-parallel: 1
matrix: ${{ fromJson(needs.find-build-cache.outputs.matrix) }}
env:
ONEFLOW_SRC: oneflow-src
MANYLINUX_CACHE_DIR: ~/manylinux-cache-dir/${{ matrix.entry }}
WHEELHOUSE_DIR: manylinux-wheelhouse
SSH_TANK_HOST: 192.168.1.23
SSH_TANK_PATH: /home/ci-user/tank
SSH_USERNAME: ci-user
steps:
- name: Fix permissions
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
set -x
docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) .
- name: Remove leftover cuda-installer.log
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker run --rm -v /tmp:/host/tmp -w /p busybox rm -f /host/tmp/cuda-installer.log
- name: Checkout this repo
uses: actions/checkout@v2
- name: Checkout Oneflow-Inc/oneflow
uses: actions/checkout@v2
with:
repository: Oneflow-Inc/oneflow
ref: ${{ env.ONEFLOW_REF }}
path: ${{ env.ONEFLOW_SRC }}
- uses: ./cache-complete
name: Save cache if successful
id: save-cache
with:
oneflow-src: ${{ env.ONEFLOW_SRC }}
entry: ${{ matrix.entry }}
digest-type: build
mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }}
- name: Check digest and fail if cache result not identical to matrix
if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }}
run: |
echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit"
exit 1
- uses: ./
name: Build manylinux cpu only
if: ${{ matrix.entry =='cpu' && !matrix.cache-hit }}
with:
cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cpu.cmake
build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build.sh
run-lit: true
oneflow-src: ${{ env.ONEFLOW_SRC }}
oneflow-build-env: manylinux
wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }}
clear-wheelhouse-dir: true
self-hosted: ${{ contains(matrix.runs-on, 'self-hosted') }}
cuda-version: none
manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }}
docker-run-use-system-http-proxy: false
docker-run-use-lld: true
retry-failed-build: true
clean-ccache: true
parallel: ${{ env.ONEFLOW_CI_BUILD_PARALLEL }}
python-versions: |
3.6
3.7
- uses: ./
name: Build manylinux cu102
if: ${{ matrix.entry =='cu102' && !matrix.cache-hit }}
with:
cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda.cmake
build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh
oneflow-src: ${{ env.ONEFLOW_SRC }}
oneflow-build-env: manylinux
wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }}
clear-wheelhouse-dir: true
self-hosted: ${{ contains(matrix.runs-on, 'self-hosted') }}
cuda-version: "10.2"
manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }}
docker-run-use-system-http-proxy: false
docker-run-use-lld: false
retry-failed-build: true
parallel: ${{ env.ONEFLOW_CI_BUILD_PARALLEL }}
python-versions: |
3.6
3.7
- uses: ./
name: Build manylinux cu102_xla
if: ${{ matrix.entry =='cu102_xla' && !matrix.cache-hit }}
with:
cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cuda-xla.cmake
build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build-gcc7.sh
oneflow-src: ${{ env.ONEFLOW_SRC }}
oneflow-build-env: manylinux
wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }}
clear-wheelhouse-dir: true
self-hosted: ${{ contains(matrix.runs-on, 'self-hosted') }}
cuda-version: "10.2"
manylinux-cache-dir: ${{ env.MANYLINUX_CACHE_DIR }}
docker-run-use-system-http-proxy: true
docker-run-use-lld: true
retry-failed-build: true
parallel: ${{ env.ONEFLOW_CI_BUILD_PARALLEL }}
python-versions: |
3.6
wheel-audit: false
- name: Copy key
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry !='cu102_xla' }}
run: cp ~/.ssh/id_rsa $RUNNER_TEMP/_github_workflow
- name: Upload whl
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry !='cu102_xla' }}
uses: appleboy/scp-action@master
with:
host: ${{ env.SSH_TANK_HOST }}
username: ${{ env.SSH_USERNAME }}
source: "${{ env.WHEELHOUSE_DIR }}/*.whl"
key_path: /github/workflow/id_rsa
target: "${{ env.SSH_TANK_PATH }}/digest/${{ steps.save-cache.outputs.build-digest }}/${{ matrix.entry }}/whl"
rm: true
strip_components: 1
- name: Copy CPack dir
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry !='cu102_xla' }}
run: cp -r ${{ env.MANYLINUX_CACHE_DIR }}/build/cpack cpack
- name: Upload packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry !='cu102_xla' }}
uses: appleboy/scp-action@master
with:
host: ${{ env.SSH_TANK_HOST }}
username: ${{ env.SSH_USERNAME }}
key_path: /github/workflow/id_rsa
source: "cpack/*.zip"
target: "${{ env.SSH_TANK_PATH }}/digest/${{ steps.save-cache.outputs.build-digest }}/${{ matrix.entry }}/cpack"
rm: true
strip_components: 1
check-build-cache:
name: "Check manylinux build cache"
runs-on: ubuntu-latest
needs: [test-build-manylinux]
strategy:
fail-fast: false
max-parallel: 5
matrix:
entry: ["cu102_xla", "cu102", "cpu"]
env:
ONEFLOW_SRC: oneflow-src
steps:
- name: Checkout this repo
uses: actions/checkout@v2
- name: Checkout Oneflow-Inc/oneflow
uses: actions/checkout@v2
with:
repository: Oneflow-Inc/oneflow
ref: ${{ env.ONEFLOW_REF }}
path: ${{ env.ONEFLOW_SRC }}
- uses: ./cache-complete
name: find cache
id: find-cache
with:
runner-labels: |
self-hosted
linux
provision
oneflow-src: ${{ env.ONEFLOW_SRC }}
entry: ${{ matrix.entry }}
mark-as-completed: false
digest-type: build
- name: Check built
if: ${{ !fromJSON(steps.find-cache.outputs.cache-hit) }}
run: |
exit 1
find-test-cache:
name: "Find test cache"
runs-on: ubuntu-latest
needs: [test-build-manylinux]
env:
ONEFLOW_SRC: oneflow-src
outputs:
matrix: ${{ steps.find-cache.outputs.matrix }}
steps:
- uses: actions/checkout@v2
- name: Checkout Oneflow-Inc/oneflow
uses: actions/checkout@v2
with:
repository: Oneflow-Inc/oneflow
ref: ${{ env.ONEFLOW_REF }}
path: oneflow-src
- uses: ./cache-complete/matrix/test
name: find cache
id: find-cache
with:
runner-labels: |
self-hosted
linux
provision
oneflow-src: ${{ env.ONEFLOW_SRC }}
devices: |
cuda
cpu
tests: |
module
misc
speed-test
test-oneflow-doctor:
name: Test suite
runs-on: ${{ matrix.runs-on }}
needs: [find-test-cache]
strategy:
fail-fast: false
max-parallel: 5
matrix: ${{ fromJson(needs.find-test-cache.outputs.matrix) }}
env:
ONEFLOW_SRC: oneflow-src
TEST_CONTAINER_NAME: "pr-${{ github.event.pull_request.number }}-run-id-${{ github.run_id }}-${{ matrix.entry }}-test"
TEST_WITH_TF_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-tf-2.4.0:b8b5eb2bdee6928fefd61ccabf8fb2d680835aeb
TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.9.0:e7a497b41d8b7f1bce055b1f23d027f93b1557ae
PIP_INDEX_URL: "http://192.168.1.22:8000"
PIP_INDEX_TRUST_ARGS: "--trusted-host 192.168.1.22"
SSH_TANK_HOST: 192.168.1.23
SSH_TANK_PATH: /home/ci-user/tank
steps:
- name: Fix permissions
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
set -x
docker run --rm -v $PWD:/p -w /p busybox chown -R $(id -u):$(id -g) .
- uses: actions/checkout@v2
- name: Checkout Oneflow-Inc/oneflow
uses: actions/checkout@v2
with:
repository: Oneflow-Inc/oneflow
ref: ${{ env.ONEFLOW_REF }}
path: oneflow-src
- name: Remove container
timeout-minutes: 45
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
- uses: ./cache-complete
name: Save cache if successful
id: save-cache
with:
oneflow-src: ${{ env.ONEFLOW_SRC }}
entry: ${{ matrix.entry }}
digest-type: ${{ matrix.digest-type }}
mark-as-completed: ${{ contains(matrix.runs-on, 'self-hosted') }}
- name: Check digest and fail if cache result not identical to matrix
if: ${{ fromJSON(steps.save-cache.outputs.cache-hit) != matrix.cache-hit }}
run: |
echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit"
exit 1
- name: Download wheel and binary
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type != 'do-nothing' }}
uses: ./digest/download
id: download-digest
with:
digest: ${{ steps.save-cache.outputs.build-digest }}
entry: ${{ matrix.compute-platform }}
ssh-tank-host: ${{ env.SSH_TANK_HOST }}
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Set tf container
if: ${{ fromJSON(matrix.is-single-client) }}
run: |
echo "TEST_IMG_TAG=${TEST_WITH_TF_IMG_TAG}" >> $GITHUB_ENV
- name: Set pytorch container
if: ${{ !fromJSON(matrix.is-single-client) }}
run: |
echo "TEST_IMG_TAG=${TEST_WITH_TORCH_IMG_TAG}" >> $GITHUB_ENV
- name: Unzip packed liboneflow
working-directory: ${{ env.ONEFLOW_SRC }}
env:
ONEFLOW_CPACK_PATH: ${{ steps.download-digest.outputs.entry-dir }}/cpack
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' }}
run: |
unzip ${{ env.ONEFLOW_CPACK_PATH }}/liboneflow-ci-linux.zip
- name: Start container
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type != 'do-nothing' }}
working-directory: ${{ env.ONEFLOW_SRC }}
env:
ONEFLOW_WHEEL_PATH: ${{ steps.download-digest.outputs.entry-dir }}/whl
ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin
run: |
docker run -d --privileged --network host --shm-size=8g \
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
--runtime=nvidia \
-v /dataset:/dataset:ro -v /model_zoo:/model_zoo:ro \
-v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \
-v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \
-v $HOME/test-container-cache/dot-local:/root/.local \
-v $HOME/test-container-cache/dot-cache:/root/.cache \
-e ONEFLOW_WHEEL_PATH=${ONEFLOW_WHEEL_PATH} \
-e ONEFLOW_BIN_PATH=${ONEFLOW_BIN_PATH} \
-e ONEFLOW_CI=1 \
-e TERM="xterm-256color" \
-v $PWD:$PWD \
-w $PWD \
--name ${{ env.TEST_CONTAINER_NAME }} \
${TEST_IMG_TAG} \
sleep 3600
- name: Test container
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type != 'do-nothing' }}
run: |
docker exec ${{ env.TEST_CONTAINER_NAME }} ls
- name: Run oneflow_testexe
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' }}
run: |
docker exec ${{ env.TEST_CONTAINER_NAME }} ./liboneflow-ci-linux/bin/oneflow_testexe
- name: Install OneFlow
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type != 'do-nothing' }}
env:
ONEFLOW_WHEEL_PATH: ${{ steps.download-digest.outputs.entry-dir }}/whl
run: |
ls ${{ env.ONEFLOW_WHEEL_PATH }}
docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m pip install --find-links=${{ env.ONEFLOW_WHEEL_PATH }} oneflow
- name: Run OneFlow doctor
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type != 'do-nothing' }}
run: |
docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow --doctor
- name: Remove container
timeout-minutes: 45
if: ${{ always() && contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true