From 1c7995103bf8565001f6c5085e13d8154d5ee44e Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Fri, 19 Jan 2024 11:38:48 +0000 Subject: [PATCH 01/24] add CI/CD for unit tests --- .github/workflows/tests.yaml | 50 ++++++++++++++++++++++++++++++++++++ .gitignore | 1 - 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/tests.yaml diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 00000000..3bab7d93 --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,50 @@ +name: Run unit tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + paths: + - "src/**.py" + - "examples/**.py" + - "tests/**.py" + +jobs: + tests: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Check container state + run: | + nvidia-smi + python -c "import torch; print('torch:', torch.__version__, torch)" + python -c "import torch; print('CUDA available:', torch.cuda.is_available())" + + - name: Instal nanotron + run: | + python -m pip install --upgrade pip + pip install torch + pip install packaging; pip install "flash-attn>=2.4.2" --no-build-isolation + git clone git@github.com:huggingface/nanotron.git + cd nanotron + pip install -e . + + - name: Install test dependencies + run: | + pip install pytest + pip install pytest-cov + + - name: Python dependencies + run: | + pip list + + - name: Run tests + run: pytest --color=yes --durations=0 --verbose tests/ diff --git a/.gitignore b/.gitignore index a5bb87ac..cd63079a 100644 --- a/.gitignore +++ b/.gitignore @@ -160,6 +160,5 @@ cython_debug/ #.idea/ .vscode -.github checkpoints/ From 04491d3974c1940a848747bab02efe6471b74b13 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Fri, 19 Jan 2024 11:42:43 +0000 Subject: [PATCH 02/24] fix --- .github/workflows/tests.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 3bab7d93..6e4a71de 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -17,10 +17,15 @@ jobs: steps: - uses: actions/checkout@v2 - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.9 + python-version: 3.10 + + - name: Python environment + run: | + which python + python --version - name: Check container state run: | From fdd5d1e77e498784edad472c8830367114b7719a Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Fri, 19 Jan 2024 11:43:28 +0000 Subject: [PATCH 03/24] fix syntax --- .github/workflows/tests.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 6e4a71de..b0272cdb 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -48,8 +48,8 @@ jobs: pip install pytest-cov - name: Python dependencies - run: | - pip list + run: | + pip list - name: Run tests run: pytest --color=yes --durations=0 --verbose tests/ From 91208dd1cfe8f5bdafa94d588920a414b44e6b10 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Fri, 19 Jan 2024 11:45:32 +0000 Subject: [PATCH 04/24] fix --- .github/workflows/tests.yaml | 7 ++++++- .../mlp/0/linear/pp_block/model_bias.safetensors | Bin 0 -> 128 bytes .../0/linear/pp_block/model_weight.safetensors | Bin 0 -> 496 bytes 3 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 tests/.test_cache/eec0493c-b6bf-11ee-aa62-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors create mode 100644 tests/.test_cache/eec0493c-b6bf-11ee-aa62-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_weight.safetensors diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index b0272cdb..b16bc515 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -3,9 +3,14 @@ name: Run unit tests on: push: branches: [ main ] + paths: + - "src/**.py" + - "examples/**.py" + - "tests/**.py" + pull_request: branches: [ main ] - paths: + paths: - "src/**.py" - "examples/**.py" - "tests/**.py" diff --git a/tests/.test_cache/eec0493c-b6bf-11ee-aa62-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors b/tests/.test_cache/eec0493c-b6bf-11ee-aa62-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0befd975af8e4cc6a044af7fa2bed24e22f6f97c GIT binary patch literal 128 zcmWGwfPiYH`1st^lEjq6lEnCUC9CRM9VHMO$WlrvsVqoUvQlz0Hd4}2D$Ym*@uCe4 zVj)W6^V8CbQ%j10Vg@=U2C=oZ3JMA`v-0eoAKGLiRV`@uh^^IDgj$pR?yME6e>GC}` z4u_ZQU19Rx_Qj^D`+A<+*c~?Y-s|$hY~P8Uy!&<*Oxri>wavcpx&8ZAtn%3B958L4 zN!#AN3;VV9x&Aq_ci*4(eKRwAZEoET-`&{TVs{`*bzk2E;e9oiYV3|EOt*`2k+n+; zzO>h(DcR2Pp3>f~2X(s-Uc9omDDw2)#Cgy5aN5tbyC2-MSKK?(Zi}M7_11ucdk<-6 z+f1@7-p3}XY*)Rn+-7~etew!w+sFh&Tg@@vCy|m zU-R2`A>V%++Y+aJEW3W$-t(~9TiV^Xuim@YmP3|xpIEW^zEH1Pp?mAlgK6eP)`L61-+x2+%K9zrBd!2o!?OPDgzjrsg@4k?0PIm0l&bD#v@AqaO k&fGiwMA4r1Rho9u6Z-8=r={+DG4HEwp;nIV?6WPl0Qr*MLjV8( literal 0 HcmV?d00001 From 8da087d8c4c31bc17fc05752fb0efbf666f92bc7 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Fri, 19 Jan 2024 11:48:40 +0000 Subject: [PATCH 05/24] fix --- .github/workflows/tests.yaml | 12 ++++++------ .../0/linear/pp_block/model_bias.safetensors | Bin 128 -> 128 bytes .../0/linear/pp_block/model_weight.safetensors | Bin 0 -> 496 bytes .../1/linear/pp_block/model_bias.safetensors | Bin 0 -> 128 bytes .../1/linear/pp_block/model_weight.safetensors | Bin 0 -> 496 bytes .../0/linear/pp_block/model_weight.safetensors | Bin 496 -> 0 bytes 6 files changed, 6 insertions(+), 6 deletions(-) rename tests/.test_cache/{eec0493c-b6bf-11ee-aa62-16a08fa8d1dd => 231a2360-b6c0-11ee-8ff5-16a08fa8d1dd}/model/module/mlp/0/linear/pp_block/model_bias.safetensors (50%) create mode 100644 tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_weight.safetensors create mode 100644 tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_bias.safetensors create mode 100644 tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_weight.safetensors delete mode 100644 tests/.test_cache/eec0493c-b6bf-11ee-aa62-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_weight.safetensors diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index b16bc515..73029354 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -4,16 +4,16 @@ on: push: branches: [ main ] paths: - - "src/**.py" - - "examples/**.py" - - "tests/**.py" + - "src/**/*.py" + - "examples/**/*.py" + - "tests/**/*.py" pull_request: branches: [ main ] paths: - - "src/**.py" - - "examples/**.py" - - "tests/**.py" + - "src/**/*.py" + - "examples/**/*.py" + - "tests/**/*.py" jobs: tests: diff --git a/tests/.test_cache/eec0493c-b6bf-11ee-aa62-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors b/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors similarity index 50% rename from tests/.test_cache/eec0493c-b6bf-11ee-aa62-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors rename to tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors index 0befd975af8e4cc6a044af7fa2bed24e22f6f97c..209a4147e62ff940d4b3989bd48145b199676822 100644 GIT binary patch delta 47 zcmV+~0MP${0e}IJSSSyx!o8T_F+SlA8on%0t-J|lR6c_7a6QnP3%nFikUdg!JH9R| F4ZhK+6GH$1 delta 47 zcmV+~0MP${0e}IJSSTEtZa(M2vOF4l5kBGug*_zEQ@(>TO1)+++dTEtg*@_-kiPIa Fg1yXQ70Cbq diff --git a/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_weight.safetensors b/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7de3af5c47dfd6c1f0853025026b86430ee1cbac GIT binary patch literal 496 zcmV$B4lB7VP9`%W^-k9b0RuhFf24MFkO9pARr(hARr(h z!2dfU!UDby_bR?ydu~1i+XKE)e?2`!;ugL0bo@LWq71(Gs2x81vMoMD$3?xyGkZN; zeXG5eh>$&QZ1p{nyyQLl%AGy?SOUJKsE9q7Qzkrm%uznzdWSyVhc-TEDN#M;kN`fl zVJp4>pA0&&q#Zv060p5vWW_zw_>a7R_iDWWImo$WWDCCNl48D0_msT1vxU98#sNNz zrh~n5mr%UKW^g?=!|}bUwf(#lPf5JI_!d29zl%OeLx#Tp0^~ik-N8Lzuf)67aw|T< zj5avg{AKe;^!C>%Y--!i>} zx-C7j>;XOQrK3E~kQzRSKYhNbaxc7VmjS+t$1A~uX9zwNz* ztRTI^EzLa|P_?{$I3qr+=0v<)yNAA_!{NPD?nk~rZ{59kgPXnoU-PVl83(N>`FYWv;V#SAQL`nD2Y8JHsVGA literal 0 HcmV?d00001 diff --git a/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_bias.safetensors b/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_bias.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9900cb7b60ae40284fe999e1183e71449f3c699 GIT binary patch literal 128 zcmWGwfPiYH`1st^lEjq6lEnCUC9CRM9VHMO$WlrvsVqoUvQlz0Hd4}2D$Ym*@uCe4 zVj)W6^V8CbQ%j10Vg@=U2C=oZ3JMCI%1dpRbyV2Sy>7Da!SRoKQr)-M{x@8^cV0@1 S?eY!p_HrDXwO4OR%{~D4=r61Q literal 0 HcmV?d00001 diff --git a/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_weight.safetensors b/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_weight.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd3463d6d846d45f0812083ad4ae1fd607cc08f5 GIT binary patch literal 496 zcmV$B4lB7VP9`%W^-k9b0RuhFf24MFkO9pARr(hARr*w zu-`qjgzUZlAnQF98S6b}=4`$g46nV~gEu|IA~7UwphMBxJoRFgL#9;HA9?y#>BpBBH%?{8qk2r3b#)C+@qQ z*s48m88bc(I$k|UtTn!5vSU33uc|#HiDy0#p3gi_pCmq&Jy|^*{j5Dq+_Sv$XQn;# za%(=>Ne@1~rba#9@YFpaNt`|X^%%a+R8GCt)Y859rT0DZpMSkXsM0+!fEPZN$b&u_ zQmwu13rM~|n&&(~4iUVzNW?r}#~Z##wg$c?O%Agj$pR?yME6e>GC}` z4u_ZQU19Rx_Qj^D`+A<+*c~?Y-s|$hY~P8Uy!&<*Oxri>wavcpx&8ZAtn%3B958L4 zN!#AN3;VV9x&Aq_ci*4(eKRwAZEoET-`&{TVs{`*bzk2E;e9oiYV3|EOt*`2k+n+; zzO>h(DcR2Pp3>f~2X(s-Uc9omDDw2)#Cgy5aN5tbyC2-MSKK?(Zi}M7_11ucdk<-6 z+f1@7-p3}XY*)Rn+-7~etew!w+sFh&Tg@@vCy|m zU-R2`A>V%++Y+aJEW3W$-t(~9TiV^Xuim@YmP3|xpIEW^zEH1Pp?mAlgK6eP)`L61-+x2+%K9zrBd!2o!?OPDgzjrsg@4k?0PIm0l&bD#v@AqaO k&fGiwMA4r1Rho9u6Z-8=r={+DG4HEwp;nIV?6WPl0Qr*MLjV8( From 00875c0897f8e46c7dda634644dc5d5da4d887ff Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Fri, 19 Jan 2024 12:14:22 +0000 Subject: [PATCH 06/24] update actions/checkout --- .github/workflows/tests.yaml | 9 +++------ .../mlp/0/linear/pp_block/model_bias.safetensors | Bin 128 -> 0 bytes .../0/linear/pp_block/model_weight.safetensors | Bin 496 -> 0 bytes .../mlp/1/linear/pp_block/model_bias.safetensors | Bin 128 -> 0 bytes .../1/linear/pp_block/model_weight.safetensors | Bin 496 -> 0 bytes 5 files changed, 3 insertions(+), 6 deletions(-) delete mode 100644 tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors delete mode 100644 tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_weight.safetensors delete mode 100644 tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_bias.safetensors delete mode 100644 tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_weight.safetensors diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 73029354..52b62174 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -18,10 +18,8 @@ on: jobs: tests: runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - + - uses: actions/checkout@v3 - name: Set up Python 3.10 uses: actions/setup-python@v2 with: @@ -52,9 +50,8 @@ jobs: pip install pytest pip install pytest-cov - - name: Python dependencies - run: | - pip list + - name: Show installed libraries and their versions + command: pip freeze | tee installed.txt - name: Run tests run: pytest --color=yes --durations=0 --verbose tests/ diff --git a/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors b/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/0/linear/pp_block/model_bias.safetensors deleted file mode 100644 index 209a4147e62ff940d4b3989bd48145b199676822..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 128 zcmWGwfPiYH`1st^lEjq6lEnCUC9CRM9VHMO$WlrvsVqoUvQlz0Hd4}2D$Ym*@uCe4 zVj)W6^V8CbQ%j10Vg@=U2C=oZ3JMDRs}Jp+@xajTA)nMfoq)A_IMYJxnm!iTUYNzb RM$B4lB7VP9`%W^-k9b0RuhFf24MFkO9pARr(hARr(h z!2dfU!UDby_bR?ydu~1i+XKE)e?2`!;ugL0bo@LWq71(Gs2x81vMoMD$3?xyGkZN; zeXG5eh>$&QZ1p{nyyQLl%AGy?SOUJKsE9q7Qzkrm%uznzdWSyVhc-TEDN#M;kN`fl zVJp4>pA0&&q#Zv060p5vWW_zw_>a7R_iDWWImo$WWDCCNl48D0_msT1vxU98#sNNz zrh~n5mr%UKW^g?=!|}bUwf(#lPf5JI_!d29zl%OeLx#Tp0^~ik-N8Lzuf)67aw|T< zj5avg{AKe;^!C>%Y--!i>} zx-C7j>;XOQrK3E~kQzRSKYhNbaxc7VmjS+t$1A~uX9zwNz* ztRTI^EzLa|P_?{$I3qr+=0v<)yNAA_!{NPD?nk~rZ{59kgPXnoU-PVl83(N>`FYWv;V#SAQL`nD2Y8JHsVGA diff --git a/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_bias.safetensors b/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_bias.safetensors deleted file mode 100644 index a9900cb7b60ae40284fe999e1183e71449f3c699..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 128 zcmWGwfPiYH`1st^lEjq6lEnCUC9CRM9VHMO$WlrvsVqoUvQlz0Hd4}2D$Ym*@uCe4 zVj)W6^V8CbQ%j10Vg@=U2C=oZ3JMCI%1dpRbyV2Sy>7Da!SRoKQr)-M{x@8^cV0@1 S?eY!p_HrDXwO4OR%{~D4=r61Q diff --git a/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_weight.safetensors b/tests/.test_cache/231a2360-b6c0-11ee-8ff5-16a08fa8d1dd/model/module/mlp/1/linear/pp_block/model_weight.safetensors deleted file mode 100644 index fd3463d6d846d45f0812083ad4ae1fd607cc08f5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 496 zcmV$B4lB7VP9`%W^-k9b0RuhFf24MFkO9pARr(hARr*w zu-`qjgzUZlAnQF98S6b}=4`$g46nV~gEu|IA~7UwphMBxJoRFgL#9;HA9?y#>BpBBH%?{8qk2r3b#)C+@qQ z*s48m88bc(I$k|UtTn!5vSU33uc|#HiDy0#p3gi_pCmq&Jy|^*{j5Dq+_Sv$XQn;# za%(=>Ne@1~rba#9@YFpaNt`|X^%%a+R8GCt)Y859rT0DZpMSkXsM0+!fEPZN$b&u_ zQmwu13rM~|n&&(~4iUVzNW?r}#~Z##wg$c?O%A Date: Fri, 19 Jan 2024 14:12:51 +0100 Subject: [PATCH 07/24] new runner label --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 52b62174..9c2c455c 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -17,7 +17,7 @@ on: jobs: tests: - runs-on: ubuntu-latest + runs-on: [multi-gpu, nvidia-gpu, 8-t4, ci] steps: - uses: actions/checkout@v3 - name: Set up Python 3.10 From 338c042d3474b7e53a0868f32e57b2bbe7e16b08 Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Fri, 19 Jan 2024 14:13:54 +0100 Subject: [PATCH 08/24] fix typo --- .github/workflows/tests.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 9c2c455c..1b2d3dd1 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -51,7 +51,7 @@ jobs: pip install pytest-cov - name: Show installed libraries and their versions - command: pip freeze | tee installed.txt + run: pip freeze | tee installed.txt - name: Run tests run: pytest --color=yes --durations=0 --verbose tests/ From 0c6433ca9f6b250b4422b1df20a7f8882d7eb84a Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Fri, 19 Jan 2024 14:17:21 +0100 Subject: [PATCH 09/24] add workflow dispatch --- .github/workflows/tests.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 1b2d3dd1..37ca5787 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,6 +1,7 @@ name: Run unit tests on: + workflow_dispatch: push: branches: [ main ] paths: From 6de247236ecc31292b35f25cab7903dc2751385e Mon Sep 17 00:00:00 2001 From: Guillaume LEGENDRE Date: Fri, 19 Jan 2024 14:23:23 +0100 Subject: [PATCH 10/24] remove path filter for triggering --- .github/workflows/tests.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 37ca5787..58aed465 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -11,10 +11,10 @@ on: pull_request: branches: [ main ] - paths: - - "src/**/*.py" - - "examples/**/*.py" - - "tests/**/*.py" + #paths: + # - "src/**/*.py" + # - "examples/**/*.py" + # - "tests/**/*.py" jobs: tests: From 79b22d8fde1c09e935c40e95e48ac5f312b62533 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Tue, 23 Jan 2024 12:15:15 +0000 Subject: [PATCH 11/24] test ci --- .../workflows/{tests.yaml => test_3d_parallelism.yaml} | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) rename .github/workflows/{tests.yaml => test_3d_parallelism.yaml} (93%) diff --git a/.github/workflows/tests.yaml b/.github/workflows/test_3d_parallelism.yaml similarity index 93% rename from .github/workflows/tests.yaml rename to .github/workflows/test_3d_parallelism.yaml index 58aed465..2d3530a3 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/test_3d_parallelism.yaml @@ -4,10 +4,10 @@ on: workflow_dispatch: push: branches: [ main ] - paths: - - "src/**/*.py" - - "examples/**/*.py" - - "tests/**/*.py" + # paths: + # - "src/**/*.py" + # - "examples/**/*.py" + # - "tests/**/*.py" pull_request: branches: [ main ] From c73623b249b93aaf5ee73b32d22ab2d2f382bc85 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Tue, 23 Jan 2024 12:23:25 +0000 Subject: [PATCH 12/24] update python version --- .github/workflows/test_3d_parallelism.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_3d_parallelism.yaml b/.github/workflows/test_3d_parallelism.yaml index 2d3530a3..4c39d0c2 100644 --- a/.github/workflows/test_3d_parallelism.yaml +++ b/.github/workflows/test_3d_parallelism.yaml @@ -24,7 +24,7 @@ jobs: - name: Set up Python 3.10 uses: actions/setup-python@v2 with: - python-version: 3.10 + python-version: '3.10' - name: Python environment run: | From 5efc13555740e2213632084bf6642fdfa13064d6 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Tue, 23 Jan 2024 12:27:31 +0000 Subject: [PATCH 13/24] add code quality --- .github/workflows/code_quality.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/workflows/code_quality.yaml diff --git a/.github/workflows/code_quality.yaml b/.github/workflows/code_quality.yaml new file mode 100644 index 00000000..f3d821d0 --- /dev/null +++ b/.github/workflows/code_quality.yaml @@ -0,0 +1,17 @@ +name: Code Quality + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + cloc: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Count Lines of Code (cloc) + uses: djdefi/cloc-action@6 From 4fb80a4e525cc5af2855f44e99c8ab8b81686222 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Tue, 23 Jan 2024 12:28:55 +0000 Subject: [PATCH 14/24] refactor --- .github/workflows/test_3d_parallelism.yaml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_3d_parallelism.yaml b/.github/workflows/test_3d_parallelism.yaml index 4c39d0c2..74121185 100644 --- a/.github/workflows/test_3d_parallelism.yaml +++ b/.github/workflows/test_3d_parallelism.yaml @@ -34,8 +34,6 @@ jobs: - name: Check container state run: | nvidia-smi - python -c "import torch; print('torch:', torch.__version__, torch)" - python -c "import torch; print('CUDA available:', torch.cuda.is_available())" - name: Instal nanotron run: | @@ -50,6 +48,12 @@ jobs: run: | pip install pytest pip install pytest-cov + + - name: Check Pytorch version + run: | + nvidia-smi + python -c "import torch; print('torch:', torch.__version__, torch)" + python -c "import torch; print('CUDA available:', torch.cuda.is_available())" - name: Show installed libraries and their versions run: pip freeze | tee installed.txt From ceb21c2d41abbfa3d93e3ce9d19e1cce68456991 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Tue, 23 Jan 2024 12:30:52 +0000 Subject: [PATCH 15/24] only check src --- .github/workflows/code_quality.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/code_quality.yaml b/.github/workflows/code_quality.yaml index f3d821d0..9202e6fa 100644 --- a/.github/workflows/code_quality.yaml +++ b/.github/workflows/code_quality.yaml @@ -15,3 +15,5 @@ jobs: - name: Count Lines of Code (cloc) uses: djdefi/cloc-action@6 + with: + options: --include-dir=src From 05aa557efe7262c17599b87d9b1a2cc5fcac96ed Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Tue, 23 Jan 2024 12:35:50 +0000 Subject: [PATCH 16/24] fix --- .github/workflows/code_quality.yaml | 2 +- .github/workflows/test_3d_parallelism.yaml | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code_quality.yaml b/.github/workflows/code_quality.yaml index 9202e6fa..18709486 100644 --- a/.github/workflows/code_quality.yaml +++ b/.github/workflows/code_quality.yaml @@ -16,4 +16,4 @@ jobs: - name: Count Lines of Code (cloc) uses: djdefi/cloc-action@6 with: - options: --include-dir=src + options: --exclude-dir=docs,tests,examples --exclude-lang=YAML diff --git a/.github/workflows/test_3d_parallelism.yaml b/.github/workflows/test_3d_parallelism.yaml index 74121185..eb346de4 100644 --- a/.github/workflows/test_3d_parallelism.yaml +++ b/.github/workflows/test_3d_parallelism.yaml @@ -38,8 +38,10 @@ jobs: - name: Instal nanotron run: | python -m pip install --upgrade pip + pip install packaging + pip install wheel pip install torch - pip install packaging; pip install "flash-attn>=2.4.2" --no-build-isolation + pip install "flash-attn>=2.4.2" --no-build-isolation git clone git@github.com:huggingface/nanotron.git cd nanotron pip install -e . From 0010cfa6fd06e6fe6e5f71cdb9fe22b08e68f41a Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Tue, 23 Jan 2024 12:43:20 +0000 Subject: [PATCH 17/24] use docker image --- .github/workflows/test_3d_parallelism.yaml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test_3d_parallelism.yaml b/.github/workflows/test_3d_parallelism.yaml index eb346de4..b0dcb5a2 100644 --- a/.github/workflows/test_3d_parallelism.yaml +++ b/.github/workflows/test_3d_parallelism.yaml @@ -19,12 +19,17 @@ on: jobs: tests: runs-on: [multi-gpu, nvidia-gpu, 8-t4, ci] + container: + image: nvcr.io/nvidia/pytorch:23.03-py3 + ports: + - 80 + options: --gpus all --shm-size "8G" steps: - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v2 - with: - python-version: '3.10' + # - name: Set up Python 3.10 + # uses: actions/setup-python@v2 + # with: + # python-version: '3.10' - name: Python environment run: | @@ -40,11 +45,12 @@ jobs: python -m pip install --upgrade pip pip install packaging pip install wheel - pip install torch pip install "flash-attn>=2.4.2" --no-build-isolation git clone git@github.com:huggingface/nanotron.git cd nanotron - pip install -e . + pip install -e [dev] + pip install -e [test] + - name: Install test dependencies run: | From dba1eeddd4afb63f6018d0197f696af504092a78 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Tue, 23 Jan 2024 13:02:31 +0000 Subject: [PATCH 18/24] fix --- .github/workflows/test_3d_parallelism.yaml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test_3d_parallelism.yaml b/.github/workflows/test_3d_parallelism.yaml index b0dcb5a2..0d5fd6fd 100644 --- a/.github/workflows/test_3d_parallelism.yaml +++ b/.github/workflows/test_3d_parallelism.yaml @@ -36,9 +36,11 @@ jobs: which python python --version - - name: Check container state + - name: Check Pytorch version run: | nvidia-smi + python -c "import torch; print('torch:', torch.__version__, torch)" + python -c "import torch; print('CUDA available:', torch.cuda.is_available())" - name: Instal nanotron run: | @@ -46,8 +48,9 @@ jobs: pip install packaging pip install wheel pip install "flash-attn>=2.4.2" --no-build-isolation - git clone git@github.com:huggingface/nanotron.git + git clone https://github.com/huggingface/nanotron.git cd nanotron + pip install -e . pip install -e [dev] pip install -e [test] @@ -56,12 +59,6 @@ jobs: run: | pip install pytest pip install pytest-cov - - - name: Check Pytorch version - run: | - nvidia-smi - python -c "import torch; print('torch:', torch.__version__, torch)" - python -c "import torch; print('CUDA available:', torch.cuda.is_available())" - name: Show installed libraries and their versions run: pip freeze | tee installed.txt From b2af5d0f158ed3beaaa246d4f6b485e549bf03a3 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Tue, 23 Jan 2024 13:20:10 +0000 Subject: [PATCH 19/24] use python 10 --- .github/workflows/test_3d_parallelism.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_3d_parallelism.yaml b/.github/workflows/test_3d_parallelism.yaml index 0d5fd6fd..fefddbc5 100644 --- a/.github/workflows/test_3d_parallelism.yaml +++ b/.github/workflows/test_3d_parallelism.yaml @@ -26,10 +26,10 @@ jobs: options: --gpus all --shm-size "8G" steps: - uses: actions/checkout@v3 - # - name: Set up Python 3.10 - # uses: actions/setup-python@v2 - # with: - # python-version: '3.10' + - name: Set up Python 3.10 + uses: actions/setup-python@v2 + with: + python-version: '3.10' - name: Python environment run: | @@ -54,7 +54,6 @@ jobs: pip install -e [dev] pip install -e [test] - - name: Install test dependencies run: | pip install pytest From 8914de748211a0b59bf6e87c541bfa84fe8b2df3 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Wed, 24 Jan 2024 07:30:03 +0000 Subject: [PATCH 20/24] change docker image --- .github/workflows/test_3d_parallelism.yaml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_3d_parallelism.yaml b/.github/workflows/test_3d_parallelism.yaml index fefddbc5..96a52e2b 100644 --- a/.github/workflows/test_3d_parallelism.yaml +++ b/.github/workflows/test_3d_parallelism.yaml @@ -20,16 +20,17 @@ jobs: tests: runs-on: [multi-gpu, nvidia-gpu, 8-t4, ci] container: - image: nvcr.io/nvidia/pytorch:23.03-py3 + # image: nvcr.io/nvidia/pytorch:23.03-py3 + image: runpod/pytorch:2.1.1-py3.10-cuda12.1.1-devel-ubuntu22.04 ports: - 80 options: --gpus all --shm-size "8G" steps: - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v2 - with: - python-version: '3.10' + # - name: Set up Python 3.10 + # uses: actions/setup-python@v2 + # with: + # python-version: '3.10' - name: Python environment run: | From 368bebabb941f64b4e825714296d6f31844cdd36 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Wed, 24 Jan 2024 07:38:12 +0000 Subject: [PATCH 21/24] fix pip install --- .github/workflows/test_3d_parallelism.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_3d_parallelism.yaml b/.github/workflows/test_3d_parallelism.yaml index 96a52e2b..3eea3e66 100644 --- a/.github/workflows/test_3d_parallelism.yaml +++ b/.github/workflows/test_3d_parallelism.yaml @@ -52,8 +52,8 @@ jobs: git clone https://github.com/huggingface/nanotron.git cd nanotron pip install -e . - pip install -e [dev] - pip install -e [test] + pip install -e .[dev] + pip install -e .[test] - name: Install test dependencies run: | From 565e081cf40796eea88a89f045cff8a961f018cd Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Wed, 24 Jan 2024 08:10:41 +0000 Subject: [PATCH 22/24] add fa2-related tests --- ...sm.yaml => 3d_parallelism_unit_tests.yaml} | 4 +- .github/workflows/fa2_unit_tests.yaml | 64 +++++++++++++++++++ ...gence.py => run_layer_norm_convergence.py} | 0 tests/kernels/test_layer_norm.py | 1 + tests/pytest.ini | 2 + 5 files changed, 70 insertions(+), 1 deletion(-) rename .github/workflows/{test_3d_parallelism.yaml => 3d_parallelism_unit_tests.yaml} (88%) create mode 100644 .github/workflows/fa2_unit_tests.yaml rename tests/kernels/{test_layer_norm_convergence.py => run_layer_norm_convergence.py} (100%) diff --git a/.github/workflows/test_3d_parallelism.yaml b/.github/workflows/3d_parallelism_unit_tests.yaml similarity index 88% rename from .github/workflows/test_3d_parallelism.yaml rename to .github/workflows/3d_parallelism_unit_tests.yaml index 3eea3e66..ff51a299 100644 --- a/.github/workflows/test_3d_parallelism.yaml +++ b/.github/workflows/3d_parallelism_unit_tests.yaml @@ -64,4 +64,6 @@ jobs: run: pip freeze | tee installed.txt - name: Run tests - run: pytest --color=yes --durations=0 --verbose tests/ + # NOTE: -m "not fa2" will run all the unit tests that don't have the mark + # "fa2" (these are FA2-related tests, we can't run it on T4) + run: pytest -m "not fa2" --color=yes --durations=0 --verbose tests/ diff --git a/.github/workflows/fa2_unit_tests.yaml b/.github/workflows/fa2_unit_tests.yaml new file mode 100644 index 00000000..51c1aa48 --- /dev/null +++ b/.github/workflows/fa2_unit_tests.yaml @@ -0,0 +1,64 @@ +name: Run FA2-related unit tests + +on: + workflow_dispatch: + push: + branches: [ main ] + # paths: + # - "src/**/*.py" + # - "examples/**/*.py" + # - "tests/**/*.py" + + pull_request: + branches: [ main ] + #paths: + # - "src/**/*.py" + # - "examples/**/*.py" + # - "tests/**/*.py" + +jobs: + tests: + runs-on: [multi-gpu, nvidia-gpu, 8-t4, ci] + container: + image: runpod/pytorch:2.1.1-py3.10-cuda12.1.1-devel-ubuntu22.04 + ports: + - 80 + options: --gpus all --shm-size "8G" + steps: + - uses: actions/checkout@v3 + + - name: Python environment + run: | + which python + python --version + + - name: Check Pytorch version + run: | + nvidia-smi + python -c "import torch; print('torch:', torch.__version__, torch)" + python -c "import torch; print('CUDA available:', torch.cuda.is_available())" + + - name: Instal nanotron + run: | + python -m pip install --upgrade pip + pip install packaging + pip install wheel + pip install "flash-attn>=2.4.2" --no-build-isolation + git clone https://github.com/huggingface/nanotron.git + cd nanotron + pip install -e . + pip install -e .[dev] + pip install -e .[test] + + - name: Install test dependencies + run: | + pip install pytest + pip install pytest-cov + + - name: Show installed libraries and their versions + run: pip freeze | tee installed.txt + + - name: Run tests + # NOTE: -m fa2 will only run the unit tests that have the mark + # "fa2" (these are FA2-related tests) + run: pytest -m fa2 --color=yes --durations=0 --verbose tests/ diff --git a/tests/kernels/test_layer_norm_convergence.py b/tests/kernels/run_layer_norm_convergence.py similarity index 100% rename from tests/kernels/test_layer_norm_convergence.py rename to tests/kernels/run_layer_norm_convergence.py diff --git a/tests/kernels/test_layer_norm.py b/tests/kernels/test_layer_norm.py index f795ad95..26d01f0a 100644 --- a/tests/kernels/test_layer_norm.py +++ b/tests/kernels/test_layer_norm.py @@ -23,6 +23,7 @@ # @pytest.mark.skipif(available_gpus() < 1, reason="Testing test_fused_layer_norm requires at least 1 gpus") +@pytest.mark.fa2 @pytest.mark.parametrize( "hidden_size", [1024, 1025], # fused layer norm supports 1024 as hidden size but not 1025 diff --git a/tests/pytest.ini b/tests/pytest.ini index 66cfb528..0e0b2653 100644 --- a/tests/pytest.ini +++ b/tests/pytest.ini @@ -1,2 +1,4 @@ [pytest] addopts=-n 35 +markers = + fa2: FA2-related From 7b3832633f9dd2a609f13857cae1d0b2fb7bf4a9 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Wed, 24 Jan 2024 08:22:47 +0000 Subject: [PATCH 23/24] fix --- .github/workflows/3d_parallelism_unit_tests.yaml | 2 +- .github/workflows/fa2_unit_tests.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/3d_parallelism_unit_tests.yaml b/.github/workflows/3d_parallelism_unit_tests.yaml index ff51a299..6af2d164 100644 --- a/.github/workflows/3d_parallelism_unit_tests.yaml +++ b/.github/workflows/3d_parallelism_unit_tests.yaml @@ -1,4 +1,4 @@ -name: Run unit tests +name: Run non-FA2-related unit tests on: workflow_dispatch: diff --git a/.github/workflows/fa2_unit_tests.yaml b/.github/workflows/fa2_unit_tests.yaml index 51c1aa48..0cb169b7 100644 --- a/.github/workflows/fa2_unit_tests.yaml +++ b/.github/workflows/fa2_unit_tests.yaml @@ -18,7 +18,7 @@ on: jobs: tests: - runs-on: [multi-gpu, nvidia-gpu, 8-t4, ci] + runs-on: [single-gpu, nvidia-gpu, a10, ci] container: image: runpod/pytorch:2.1.1-py3.10-cuda12.1.1-devel-ubuntu22.04 ports: From 906477ba7c3db80648812ff24765de735232b638 Mon Sep 17 00:00:00 2001 From: Phuc Nguyen Date: Wed, 24 Jan 2024 08:47:01 +0000 Subject: [PATCH 24/24] update FA2 version --- .github/workflows/3d_parallelism_unit_tests.yaml | 1 - .github/workflows/fa2_unit_tests.yaml | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/3d_parallelism_unit_tests.yaml b/.github/workflows/3d_parallelism_unit_tests.yaml index 6af2d164..ab7884b3 100644 --- a/.github/workflows/3d_parallelism_unit_tests.yaml +++ b/.github/workflows/3d_parallelism_unit_tests.yaml @@ -48,7 +48,6 @@ jobs: python -m pip install --upgrade pip pip install packaging pip install wheel - pip install "flash-attn>=2.4.2" --no-build-isolation git clone https://github.com/huggingface/nanotron.git cd nanotron pip install -e . diff --git a/.github/workflows/fa2_unit_tests.yaml b/.github/workflows/fa2_unit_tests.yaml index 0cb169b7..0df421b9 100644 --- a/.github/workflows/fa2_unit_tests.yaml +++ b/.github/workflows/fa2_unit_tests.yaml @@ -43,7 +43,7 @@ jobs: python -m pip install --upgrade pip pip install packaging pip install wheel - pip install "flash-attn>=2.4.2" --no-build-isolation + pip install flash-attn --no-build-isolation git clone https://github.com/huggingface/nanotron.git cd nanotron pip install -e .