diff --git a/.github/workflows/smoketest.yaml b/.github/workflows/smoketest.yaml index 00121f935b..901ac30fe6 100644 --- a/.github/workflows/smoketest.yaml +++ b/.github/workflows/smoketest.yaml @@ -25,6 +25,7 @@ jobs: - "3.8" - "3.9" - "3.10" + - "3.11" steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 diff --git a/docker/Dockerfile b/docker/Dockerfile index 7d3acd7b2c..e5ae9b9468 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -118,6 +118,7 @@ RUN apt-get update && \ tcl \ libjpeg8-dev \ less \ + libsnappy-dev \ # For AWS EFA: autoconf \ autotools-dev \ @@ -269,6 +270,7 @@ RUN if [ -n "$MOFED_VERSION" ] ; then \ rm -rf /tmp/mofed ; \ fi + ##################### # Install NVIDIA Apex ##################### @@ -294,10 +296,7 @@ RUN if [[ -n "$CUDA_VERSION" ]] && [[ -z "${PYTORCH_NIGHTLY_URL}" ]]; then \ RUN if [ -n "$CUDA_VERSION" ] ; then \ pip${PYTHON_VERSION} install --upgrade --no-cache-dir ninja==1.11.1 && \ pip${PYTHON_VERSION} install --upgrade --no-cache-dir --force-reinstall packaging==22.0 && \ - git clone --branch v2.4.2 https://github.com/Dao-AILab/flash-attention.git && \ - cd flash-attention && \ - MAX_JOBS=1 python${PYTHON_VERSION} setup.py install && \ - cd .. ; \ + MAX_JOBS=1 pip${PYTHON_VERSION} install --no-cache-dir flash-attn==2.5.0; \ fi ############### @@ -356,7 +355,8 @@ RUN apt-get update && \ RUN pip install --no-cache-dir --upgrade \ certifi${CERTIFI_VERSION} \ ipython${IPYTHON_VERSION} \ - urllib3${URLLIB3_VERSION} + urllib3${URLLIB3_VERSION} \ + python-snappy ################################################## # Override NVIDIA mistaken env var for 11.8 images diff --git a/docker/README.md b/docker/README.md index 32f6f6e0e9..ca3571604c 100644 --- a/docker/README.md +++ b/docker/README.md @@ -31,6 +31,7 @@ To install composer, once inside the image, run `pip install mosaicml`. | Linux Distro | Flavor | PyTorch Version | CUDA Version | Python Version | Docker Tags | |----------------|----------|-------------------|---------------------|------------------|------------------------------------------------------------------------------------------| +| Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.11 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.3.0 | 12.1.0 (Infiniband) | 3.10 | `mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.10-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.1.2 | 12.1.0 (Infiniband) | 3.10 | `mosaicml/pytorch:latest`, `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04` | | Ubuntu 20.04 | Base | 2.1.2 | 12.1.0 (EFA) | 3.10 | `mosaicml/pytorch:latest-aws`, `mosaicml/pytorch:2.1.2_cu121-python3.10-ubuntu20.04-aws` | diff --git a/docker/build_matrix.yaml b/docker/build_matrix.yaml index 7ba413a4bb..5d1e1d25a5 100644 --- a/docker/build_matrix.yaml +++ b/docker/build_matrix.yaml @@ -167,7 +167,7 @@ - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 CUDA_VERSION: 12.1.0 - IMAGE_NAME: torch-nightly-2-3-0-20240110-cu121 + IMAGE_NAME: torch-nightly-2-3-0-20240110-cu121-python3-10 MOFED_VERSION: 5.5-1.0.3.2 NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 @@ -191,6 +191,33 @@ - mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.10-ubuntu20.04 TARGET: pytorch_stage TORCHVISION_VERSION: 0.18.0 +- AWS_OFI_NCCL_VERSION: '' + BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 + CUDA_VERSION: 12.1.0 + IMAGE_NAME: torch-nightly-2-3-0-20240110-cu121-python3-11 + MOFED_VERSION: 5.5-1.0.3.2 + NVIDIA_REQUIRE_CUDA_OVERRIDE: cuda>=12.1 brand=tesla,driver>=450,driver<451 brand=tesla,driver>=470,driver<471 + brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 + brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 + brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 + brand=tesla,driver>=510,driver<511 brand=unknown,driver>=510,driver<511 brand=nvidia,driver>=510,driver<511 + brand=nvidiartx,driver>=510,driver<511 brand=geforce,driver>=510,driver<511 brand=geforcertx,driver>=510,driver<511 + brand=quadro,driver>=510,driver<511 brand=quadrortx,driver>=510,driver<511 brand=titan,driver>=510,driver<511 + brand=titanrtx,driver>=510,driver<511 brand=tesla,driver>=515,driver<516 brand=unknown,driver>=515,driver<516 + brand=nvidia,driver>=515,driver<516 brand=nvidiartx,driver>=515,driver<516 brand=geforce,driver>=515,driver<516 + brand=geforcertx,driver>=515,driver<516 brand=quadro,driver>=515,driver<516 brand=quadrortx,driver>=515,driver<516 + brand=titan,driver>=515,driver<516 brand=titanrtx,driver>=515,driver<516 brand=tesla,driver>=525,driver<526 + brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 + brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 + brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 + PYTHON_VERSION: '3.11' + PYTORCH_NIGHTLY_URL: https://download.pytorch.org/whl/nightly/cu121 + PYTORCH_NIGHTLY_VERSION: dev20240110+cu121 + PYTORCH_VERSION: 2.3.0 + TAGS: + - mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04 + TARGET: pytorch_stage + TORCHVISION_VERSION: 0.18.0 - AWS_OFI_NCCL_VERSION: '' BASE_IMAGE: nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04 COMPOSER_INSTALL_COMMAND: mosaicml[all]==0.18.1 diff --git a/docker/generate_build_matrix.py b/docker/generate_build_matrix.py index e51662729d..1e5c550fd5 100644 --- a/docker/generate_build_matrix.py +++ b/docker/generate_build_matrix.py @@ -227,11 +227,11 @@ def _main(): entry['AWS_OFI_NCCL_VERSION'] = 'v1.7.4-aws' pytorch_entries.append(entry) - nightly_entry = { + nightly_entry_310 = { 'AWS_OFI_NCCL_VERSION': '', 'BASE_IMAGE': 'nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04', 'CUDA_VERSION': '12.1.0', - 'IMAGE_NAME': 'torch-nightly-2-3-0-20240110-cu121', + 'IMAGE_NAME': 'torch-nightly-2-3-0-20240110-cu121-python3-10', 'MOFED_VERSION': '5.5-1.0.3.2', 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.0'), 'PYTHON_VERSION': '3.10', @@ -242,7 +242,25 @@ def _main(): 'TARGET': 'pytorch_stage', 'TORCHVISION_VERSION': '0.18.0' } - pytorch_entries.append(nightly_entry) + pytorch_entries.append(nightly_entry_310) + + nightly_entry_311 = { + 'AWS_OFI_NCCL_VERSION': '', + 'BASE_IMAGE': 'nvidia/cuda:12.1.0-cudnn8-devel-ubuntu20.04', + 'CUDA_VERSION': '12.1.0', + 'IMAGE_NAME': 'torch-nightly-2-3-0-20240110-cu121-python3-11', + 'MOFED_VERSION': '5.5-1.0.3.2', + 'NVIDIA_REQUIRE_CUDA_OVERRIDE': _get_cuda_override('12.1.0'), + 'PYTHON_VERSION': '3.11', + 'PYTORCH_VERSION': '2.3.0', + 'PYTORCH_NIGHTLY_URL': 'https://download.pytorch.org/whl/nightly/cu121', + 'PYTORCH_NIGHTLY_VERSION': 'dev20240110+cu121', + 'TAGS': ['mosaicml/pytorch:2.3.0_cu121-nightly20240110-python3.11-ubuntu20.04'], + 'TARGET': 'pytorch_stage', + 'TORCHVISION_VERSION': '0.18.0' + } + pytorch_entries.append(nightly_entry_311) + composer_entries = [] # The `GIT_COMMIT` is a placeholder and Jenkins will substitute it with the actual git commit for the `composer_staging` images diff --git a/setup.py b/setup.py index dddd51bbef..66bbd8a7a6 100644 --- a/setup.py +++ b/setup.py @@ -265,6 +265,7 @@ def package_files(prefix: str, directory: str, extension: str): 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', ], install_requires=install_requires, entry_points={ diff --git a/tests/datasets/test_in_context_learning_datasets.py b/tests/datasets/test_in_context_learning_datasets.py index 9a98e2b174..4ff06c35fe 100644 --- a/tests/datasets/test_in_context_learning_datasets.py +++ b/tests/datasets/test_in_context_learning_datasets.py @@ -219,13 +219,13 @@ def test_stop_sequences_criteria(tiny_gpt2_tokenizer): seq1 = tiny_gpt2_tokenizer('Dogs are furry')['input_ids'] seq2 = tiny_gpt2_tokenizer('Dogs are furry\n\n')['input_ids'] seq1 = [50257] * (len(seq2) - len(seq1)) + seq1 - input_ids = torch.tensor([seq1, seq2]) + input_ids = torch.LongTensor([seq1, seq2]) assert not eos_criteria(input_ids, None) eos_criteria = MultiTokenEOSCriteria('\n\n', tiny_gpt2_tokenizer, 2) seq1 = tiny_gpt2_tokenizer('Dogs are furry\n\n')['input_ids'] seq2 = tiny_gpt2_tokenizer('Dogs are furry\n\n')['input_ids'] - input_ids = torch.tensor([seq1, seq2]) + input_ids = torch.LongTensor([seq1, seq2]) assert eos_criteria(input_ids, None)