Merge branch 'master' into sharded-snapshot

linshokaku · Dec 21, 2023 · 7977b2d · 7977b2d
2 parents 990c1d2 + 6f62870
commit 7977b2d
Show file tree

Hide file tree

Showing 13 changed files with 151 additions and 70 deletions.
diff --git a/.flexci/config.pbtxt b/.flexci/config.pbtxt
@@ -121,7 +121,7 @@ configs {
       image: "windows"
     }
     time_limit: {
-      seconds: 1200
+      seconds: 1800
     }
     command: ".flexci\\windows\\run.bat torch110"
   }
@@ -138,7 +138,7 @@ configs {
       image: "windows"
     }
     time_limit: {
-      seconds: 1200
+      seconds: 1800
     }
     command: ".flexci\\windows\\run.bat torch111"
   }
@@ -155,7 +155,7 @@ configs {
       image: "windows"
     }
     time_limit: {
-      seconds: 1200
+      seconds: 1800
     }
     command: ".flexci\\windows\\run.bat torch112"
   }
@@ -172,7 +172,7 @@ configs {
       image: "windows"
     }
     time_limit: {
-      seconds: 1200
+      seconds: 1800
     }
     command: ".flexci\\windows\\run.bat torch113"
   }
@@ -189,7 +189,7 @@ configs {
       image: "windows"
     }
     time_limit: {
-      seconds: 1200
+      seconds: 1800
     }
     command: ".flexci\\windows\\run.bat torch200"
   }

diff --git a/.flexci/linux/Dockerfile b/.flexci/linux/Dockerfile
@@ -15,7 +15,7 @@ RUN export DEBIAN_FRONTEND=noninteractive && \
         build-essential libssl-dev zlib1g-dev libbz2-dev \
         libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
         xz-utils tk-dev libffi-dev liblzma-dev git cmake protobuf-compiler libprotobuf-dev \
-        openmpi-bin openmpi-common && \
+        openmpi-bin openmpi-common libopenmpi-dev && \
     rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 
 # Install pyenv.
@@ -28,11 +28,13 @@ ARG python_version
 RUN pyenv install ${python_version} && \
     pyenv global ${python_version}
 
+COPY ./tests/requirements.txt ./tests/requirements.mpi.txt ./
+
 # Install test dependencies.
 ARG pip_install_torch_args
-ARG pip_install_dep_args
+ARG pip_install_dep_args	
 RUN pip install -U pip && \
     pip install -U "setuptools<59.6" && \
-    pip install ${pip_install_torch_args} && \
-    pip install ${pip_install_dep_args} && \
+    pip install -r requirements.txt -r requirements.mpi.txt ${pip_install_torch_args} && \
+    pip install ${pip_install_dep_args} && \	
     pip list
diff --git a/.flexci/linux/build_and_push.sh b/.flexci/linux/build_and_push.sh
@@ -2,11 +2,6 @@
 
 TARGET="${1}"
 
-TEST_PIP_PACKAGES="
-matplotlib tensorboard ipython ipywidgets pandas optuna onnxruntime
-pytest flake8 pysen[lint] pytest-cov slack_sdk
-"
-
 docker_build_and_push() {
     IMAGE_NAME="${PPE_FLEXCI_IMAGE_NAME}:${TARGET}"
 
@@ -17,13 +12,12 @@ docker_build_and_push() {
         CACHE_IMAGE_NAME="${IMAGE_NAME}"
     fi
 
-    pushd "$(dirname ${0})"
     DOCKER_BUILDKIT=1 docker build \
         -t "${IMAGE_NAME}" \
+        -f "$(dirname ${0})/Dockerfile" \
         --cache-from "${CACHE_IMAGE_NAME}" \
         --build-arg BUILDKIT_INLINE_CACHE=1 \
         "$@" .
-    popd
 
     if [ "${PPE_FLEXCI_IMAGE_PUSH}" = "0" ]; then
       echo "Skipping docker push."
@@ -39,7 +33,7 @@ case "${TARGET}" in
             --build-arg base_image="nvidia/cuda:11.3.1-cudnn8-devel-ubuntu20.04" \
             --build-arg python_version="3.8.15" \
             --build-arg pip_install_torch_args="torch==1.10.* torchvision==0.11.* -f https://download.pytorch.org/whl/cu113/torch_stable.html" \
-            --build-arg pip_install_dep_args="cupy-cuda11x pytorch-ignite onnx ${TEST_PIP_PACKAGES}"
+            --build-arg pip_install_dep_args="cupy-cuda11x"
         ;;
 
     torch111 )
@@ -48,7 +42,7 @@ case "${TARGET}" in
             --build-arg base_image="nvidia/cuda:11.5.2-cudnn8-devel-ubuntu20.04" \
             --build-arg python_version="3.9.7" \
             --build-arg pip_install_torch_args="torch==1.11.* torchvision==0.12.* -f https://download.pytorch.org/whl/cu115/torch_stable.html" \
-            --build-arg pip_install_dep_args="cupy-cuda11x pytorch-ignite onnx ${TEST_PIP_PACKAGES}"
+            --build-arg pip_install_dep_args="cupy-cuda11x"
         ;;
 
     torch112 )
@@ -57,7 +51,7 @@ case "${TARGET}" in
             --build-arg base_image="nvidia/cuda:11.7.1-cudnn8-devel-ubuntu20.04" \
             --build-arg python_version="3.10.5" \
             --build-arg pip_install_torch_args="torch==1.12.* torchvision==0.13.* -f https://download.pytorch.org/whl/cu117/torch_stable.html" \
-            --build-arg pip_install_dep_args="cupy-cuda11x pytorch-ignite onnx ${TEST_PIP_PACKAGES}"
+            --build-arg pip_install_dep_args="cupy-cuda11x"
         ;;
 
     torch113 )
@@ -66,7 +60,7 @@ case "${TARGET}" in
             --build-arg base_image="nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04" \
             --build-arg python_version="3.10.5" \
             --build-arg pip_install_torch_args="torch==1.13.* torchvision==0.14.* -f https://download.pytorch.org/whl/cu117/torch_stable.html" \
-            --build-arg pip_install_dep_args="cupy-cuda11x pytorch-ignite onnx ${TEST_PIP_PACKAGES}"
+            --build-arg pip_install_dep_args="cupy-cuda11x"
         ;;
 
     torch200 )
@@ -75,7 +69,7 @@ case "${TARGET}" in
             --build-arg base_image="nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04" \
             --build-arg python_version="3.10.5" \
             --build-arg pip_install_torch_args="torch==2.0.* torchvision==0.15.* -f https://download.pytorch.org/whl/cu117/torch_stable.html" \
-            --build-arg pip_install_dep_args="cupy-cuda11x pytorch-ignite onnx ${TEST_PIP_PACKAGES}"
+            --build-arg pip_install_dep_args="cupy-cuda11x"
         ;;
 
     torch201 )
@@ -84,7 +78,7 @@ case "${TARGET}" in
             --build-arg base_image="nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04" \
             --build-arg python_version="3.10.5" \
             --build-arg pip_install_torch_args="torch==2.1.* torchvision==0.16.* -f https://download.pytorch.org/whl/cu118/torch_stable.html" \
-            --build-arg pip_install_dep_args="cupy-cuda11x pytorch-ignite onnx ${TEST_PIP_PACKAGES}"
+            --build-arg pip_install_dep_args="cupy-cuda11x"
         ;;
 
     * )

diff --git a/.flexci/linux/unittest.sh b/.flexci/linux/unittest.sh
@@ -16,6 +16,9 @@ JUPYTER_PLATFORM_DIRS=1 \
 python -m pytest --cov-report=html --cov pytorch_pfn_extras .
 popd
 
+# Run unit tests with mpi
+make mpitest
+
 # Run examples
 if [ -d mnist_raw ]; then
     mkdir -p data/MNIST/raw

diff --git a/.flexci/windows/test.ps1 b/.flexci/windows/test.ps1
@@ -57,15 +57,15 @@ if ($test -eq "torch110") {
 RunOrDie python -V
 
 # Install common requirements
-RunOrDie python -m pip install pytorch-ignite pytest flake8 matplotlib tensorboard onnx ipython ipywidgets pandas optuna cupy-cuda102 onnxruntime==1.15.1 slack_sdk
+RunOrDie python -m pip install -r tests/requirements.txt cupy-cuda102
 RunOrDie python -m pip list
 
 # Install
 RunOrDie python -m pip install -e .
 
 # Unit Test
 $Env:JUPYTER_PLATFORM_DIRS = "1"
-RunOrDie python -m pytest tests
+RunOrDie python -m pytest -m "not mpi" tests
 
 # Examples
 .\.flexci\windows\download_mnist.ps1

diff --git a/.github/workflows/nightly-test-cpu.yml b/.github/workflows/nightly-test-cpu.yml
@@ -25,7 +25,7 @@ jobs:
     - name: Install
       run: |
         pip install -U pip wheel
-        pip install -v -e .[test] --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu 
+        pip install -v -e . -r ./tests/requirements.txt --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu 
         # Test PPE is importable with minimum dependency
         python -c 'import pytorch_pfn_extras'
 

diff --git a/.github/workflows/pretest-and-test.yml b/.github/workflows/pretest-and-test.yml
@@ -31,7 +31,7 @@ jobs:
     - name: Install
       run: |
         pip install -U pip wheel
-        pip install -e .[test] torch==${{ matrix.torch }} --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install -e . -r ./tests/requirements.txt torch==${{ matrix.torch }} --extra-index-url https://download.pytorch.org/whl/cpu
         # Test PPE is importable with minimum dependency
         python -c 'import pytorch_pfn_extras'
 

diff --git a/Makefile b/Makefile
@@ -8,6 +8,9 @@ PWD := $(realpath $(dir $(abspath $(firstword $(MAKEFILE_LIST)))))
 PY := python
 PIP := $(PY) -m pip
 
+PROCESS_NUM = 2
+MPI_OUTPUT_FILE_DIR = $(realpath $(shell mktemp -d))
+
 .PHONY: format
 format: ## Format the Python code.	
 	cp "$$($(PIP) show torch | awk '/^Location:/ { print $$2 }')/torch/__init__.py" stubs/torch/__init__.py
@@ -20,11 +23,19 @@ lint: ## Lint the Python code.
 
 .PHONY: test
 test: ## Run all tests.
-	$(PY) -m pytest tests
+	$(PY) -m pytest -m "not mpi" tests
 
 .PHONY: cputest
 cputest: ## Run all tests except for ones requiring GPU.
-	$(PY) -m pytest -m "not gpu" tests
+	$(PY) -m pytest -m "not gpu and not mpi" tests
+
+.PHONY: mpitest
+mpitest: ## Run all tests except for ones requiring GPU.
+	mpi_output_file_dir=$(MPI_OUTPUT_FILE_DIR); \
+	mpirun --allow-run-as-root -n $(PROCESS_NUM) --output-filename $$mpi_output_file_dir -x TORCH_DISTRIBUTED_DEBUG=DETAIL $(PY) -m pytest -m mpi tests > /dev/null 2> /dev/null &&:; \
+	ret=$$?; \
+	for i in $$(seq 0 $$(($(PROCESS_NUM) - 1))); do echo ========= MPI process $$i =========; cat $$mpi_output_file_dir/1/rank.$$i/stdout; cat $$mpi_output_file_dir/1/rank.$$i/stderr; done; \
+	[ $$ret = 0 ]
 
 .PHONY: example_lint
 example_lint: ## Format the Python code.

diff --git a/pytest.ini b/pytest.ini
@@ -36,3 +36,4 @@ filterwarnings =
     ignore:.*Conversion of an array with ndim > 0 to a scalar is deprecated:DeprecationWarning
 markers =
     gpu: Tests that require GPU
+    mpi: Tests that require MPI
diff --git a/setup.py b/setup.py
@@ -16,27 +16,6 @@
     license="MIT License",
     install_requires=["numpy", "packaging", "torch", "typing-extensions>=3.10"],
     extras_require={
-        "test": [
-            "pytest",
-            "onnxruntime",
-            "torchvision",
-            "torchaudio",
-            "pysen",
-            "black==23.3.0",
-            "flake8==4.0.1",
-            "isort==5.10.1",
-            "mypy==1.3.0",
-            "types-PyYAML",
-            "types-setuptools",
-            "matplotlib",
-            "tensorboard",
-            "ipython",
-            "ipywidgets",
-            "pandas",
-            "optuna",
-            "onnx",
-            "pytorch-ignite",
-        ],
         "onnx": ["onnx"],
     },
     python_requires=">=3.6.0",