forked from NVIDIA/Megatron-LM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
.gitlab-ci.yml
109 lines (104 loc) · 3.17 KB
/
.gitlab-ci.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
workflow:
rules:
- if: $CI_PROJECT_NAMESPACE != "ADLR"
when: never
- if: $CI_COMMIT_BRANCH =~ /ci-/ && $CI_PIPELINE_SOURCE != "schedule"
when: never
- if: $CI_PIPELINE_SOURCE == "schedule"
auto_cancel:
on_new_commit: none
- if: $CI_PIPELINE_SOURCE == "web"
- if: $CI_COMMIT_REF_PROTECTED == "true"
variables:
FUNCTIONAL_TEST: "no"
- if: $CI_MERGE_REQUEST_LABELS =~ /Run tests/ && $CI_MERGE_REQUEST_TARGET_BRANCH_SHA != ""
variables:
UNIT_TEST_REPEAT: 5
UNIT_TEST_TIMEOUT: 50
FUNCTIONAL_TEST: "yes"
FUNCTIONAL_TEST_SCOPE: mr
FUNCTIONAL_TEST_CLUSTER_A100: ""
FUNCTIONAL_TEST_CLUSTER_H100: ""
- if: $CI_MERGE_REQUEST_LABELS =~ /Run nightly/ && $CI_MERGE_REQUEST_TARGET_BRANCH_SHA != ""
variables:
UNIT_TEST_REPEAT: 5
UNIT_TEST_TIMEOUT: 50
FUNCTIONAL_TEST: "yes"
FUNCTIONAL_TEST_SCOPE: nightly
FUNCTIONAL_TEST_CLUSTER_A100: ""
FUNCTIONAL_TEST_CLUSTER_H100: ""
- if: $CI_MERGE_REQUEST_LABELS =~ /Run weekly/ && $CI_MERGE_REQUEST_TARGET_BRANCH_SHA != ""
variables:
UNIT_TEST_REPEAT: 5
UNIT_TEST_TIMEOUT: 50
FUNCTIONAL_TEST: "yes"
FUNCTIONAL_TEST_SCOPE: weekly
FUNCTIONAL_TEST_CLUSTER_A100: ""
FUNCTIONAL_TEST_CLUSTER_H100: ""
- if: $CI_PIPELINE_SOURCE == "merge_request_event" && $CI_MERGE_REQUEST_TARGET_BRANCH_SHA != ""
variables:
FUNCTIONAL_TEST: "no"
- when: never
auto_cancel:
on_new_commit: interruptible
stages:
- test
- functional_tests
- convergence_tests
- publish
default:
interruptible: true
variables:
FUNCTIONAL_TEST:
value: "yes"
options:
- "yes"
- "no"
description: To run the funtional test suite
FUNCTIONAL_TEST_SCOPE:
value: "mr"
options:
- "mr"
- "nightly"
- "weekly"
- "pre-release"
- "release"
description: "Testsuite to run (only for FUNCTIONAL_TEST=yes)"
FUNCTIONAL_TEST_CLUSTER_A100:
value: "dgxa100_dracooci"
options:
- "dgxa100_dracooci"
- "dgxa100_dracooci-ord"
description: 'Cluster for A100 workloads'
FUNCTIONAL_TEST_CLUSTER_H100:
value: "dgxh100_eos"
options:
- "dgxh100_coreweave"
- "dgxh100_eos"
description: 'Cluster for H100 workloads'
FUNCTIONAL_TEST_NAME:
description: "Name of functional test run (only for pre-release and release)"
PUBLISH:
value: "no"
options:
- "yes"
- "no"
description: Build and publish a wheel to PyPi
PUBLISH_SCOPE:
value: "code-freeze"
options:
- "code-freeze"
- "release"
description: Type of publish (freeze or final release)
# CI wide variables
CI_MCORE_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_ci
CI_MCORE_DEV_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_ci_dev
CI_NEMO_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/nemo_ci
LINTING_IMAGE: ${GITLAB_ENDPOINT}:5005/adlr/megatron-lm/mcore_linting
UNIT_TEST_TIMEOUT: 15
UNIT_TEST_REPEAT: 1
include:
- .gitlab/stages/00.pre.yml
- .gitlab/stages/01.tests.yml
- .gitlab/stages/02.functional-tests.yml
- .gitlab/stages/03.publish.yml