From f6c3cb6a5054db050203d0bcf8840d987a21cef7 Mon Sep 17 00:00:00 2001
From: yuhao <72971170+howin98@users.noreply.github.com>
Date: Fri, 17 Jun 2022 00:25:36 +0800
Subject: [PATCH] Support iree ci (#8419)

* create mlir cpu and modify build gcc 7 shell script

* fix the bug of test_iree_resnet.py cuda test in cpu version error

* fix constant folding tests

* suport oneflow_test_cpu_only

* pub

* build script add flag

* modify test yml

* add python3 into \PATH

* don't use pretrain model

* install flowvision

Co-authored-by: mosout <mosout@qq.com>
Co-authored-by: jackalcooper <jackalcooper@gmail.com>
---
 .github/workflows/test.yml                    |  3 ++-
 ci/manylinux/build-gcc7.sh                    |  5 ++++
 ci/manylinux/build.sh                         |  5 ++++
 cmake/caches/cn/fast/mlir-cpu.cmake           | 24 +++++++++++++++++++
 oneflow/ir/test/Frontend/test_iree_resnet.py  |  1 +
 .../ir/test/OneFlow/folding/test_conv_bn.py   |  4 ++--
 .../OneFlow/folding/test_simple_multiply.py   | 12 ++++++++--
 .../test_conv_bn_auto_nhwc.py                 | 20 +++++++++-------
 8 files changed, 61 insertions(+), 13 deletions(-)
 create mode 100644 cmake/caches/cn/fast/mlir-cpu.cmake
 rename oneflow/ir/test/OneFlow/{folding => with_cuda}/test_conv_bn_auto_nhwc.py (78%)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ec2f429a8f0..9d465fa372b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -244,13 +244,14 @@ jobs:
         run: |
           echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit"
           exit 1
-      - uses: Oneflow-Inc/get-oneflow@support-cuda-1106
+      - uses: Oneflow-Inc/get-oneflow@support-iree-ci
         name: Build manylinux ${{ matrix.entry }}
         id: build-cpu
         if: ${{ matrix.entry =='cpu' && !matrix.cache-hit }}
         with:
           cmake-init-cache: ${{ env.ONEFLOW_SRC }}/cmake/caches/ci/cpu.cmake
           build-script: ${{ env.ONEFLOW_SRC }}/ci/manylinux/build.sh
+          run-lit: true
           oneflow-src: ${{ env.ONEFLOW_SRC }}
           oneflow-build-env: manylinux
           wheelhouse-dir: ${{ env.WHEELHOUSE_DIR }}
diff --git a/ci/manylinux/build-gcc7.sh b/ci/manylinux/build-gcc7.sh
index f9deb933083..42244968a0e 100644
--- a/ci/manylinux/build-gcc7.sh
+++ b/ci/manylinux/build-gcc7.sh
@@ -31,6 +31,11 @@ cmake -S ${ONEFLOW_CI_SRC_DIR} -C ${ONEFLOW_CI_CMAKE_INIT_CACHE} -DPython3_EXECU
 # cmake build
 cd ${ONEFLOW_CI_BUILD_DIR}
 cmake --build . --parallel ${ONEFLOW_CI_BUILD_PARALLEL}
+if [ ! -z "$ONEFLOW_CI_BUILD_RUN_LIT" ]; then
+    ${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user flowvision==0.1.0
+    export PATH=$PATH:$(dirname $ONEFLOW_CI_PYTHON_EXE)
+    cmake --build . -t c1
+fi
 
 # build pip
 cd ${ONEFLOW_CI_SRC_DIR}
diff --git a/ci/manylinux/build.sh b/ci/manylinux/build.sh
index 5ce5c448355..263a6fb5194 100644
--- a/ci/manylinux/build.sh
+++ b/ci/manylinux/build.sh
@@ -27,6 +27,11 @@ cmake -S ${ONEFLOW_CI_SRC_DIR} -C ${ONEFLOW_CI_CMAKE_INIT_CACHE} -DPython3_EXECU
 # cmake build
 cd ${ONEFLOW_CI_BUILD_DIR}
 cmake --build . --parallel ${ONEFLOW_CI_BUILD_PARALLEL}
+if [ ! -z "$ONEFLOW_CI_BUILD_RUN_LIT" ]; then
+    ${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user flowvision==0.1.0
+    export PATH=$PATH:$(dirname $ONEFLOW_CI_PYTHON_EXE)
+    cmake --build . -t c1
+fi
 
 # build pip
 cd ${ONEFLOW_CI_SRC_DIR}
diff --git a/cmake/caches/cn/fast/mlir-cpu.cmake b/cmake/caches/cn/fast/mlir-cpu.cmake
new file mode 100644
index 00000000000..7c7351e65ef
--- /dev/null
+++ b/cmake/caches/cn/fast/mlir-cpu.cmake
@@ -0,0 +1,24 @@
+set(BUILD_SHARED_LIBS YES CACHE BOOL "")
+# uncomment only if you know what you are doing
+# set(CMAKE_LINK_DEPENDS_NO_SHARED YES CACHE BOOL "")
+set(BUILD_CUDA NO CACHE BOOL "")
+set(BUILD_GIT_VERSION NO CACHE BOOL "")
+set(TREAT_WARNINGS_AS_ERRORS YES CACHE BOOL "")
+set(BUILD_HWLOC NO CACHE BOOL "")
+set(BUILD_TESTING OFF CACHE BOOL "")
+set(WITH_MLIR YES CACHE BOOL "")
+set(WITH_MLIR_CUDA_CODEGEN NO CACHE BOOL "")
+set(THIRD_PARTY_MIRROR aliyun CACHE STRING "")
+set(PIP_INDEX_MIRROR "https://pypi.tuna.tsinghua.edu.cn/simple" CACHE STRING "")
+set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
+set(CMAKE_GENERATOR Ninja CACHE STRING "")
+set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
+set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")
+set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF CACHE BOOL "")
+set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
+set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
+set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
+set(CPU_THREADING_RUNTIME SEQ CACHE STRING
+                                    "when using lld with TBB enabled, there will be linkage error")
+set(BUILD_HWLOC OFF CACHE BOOL "")
+set(WITH_ONEDNN OFF CACHE BOOL "")
diff --git a/oneflow/ir/test/Frontend/test_iree_resnet.py b/oneflow/ir/test/Frontend/test_iree_resnet.py
index 885291f4251..c538a66b575 100644
--- a/oneflow/ir/test/Frontend/test_iree_resnet.py
+++ b/oneflow/ir/test/Frontend/test_iree_resnet.py
@@ -99,6 +99,7 @@ class TestIreeResnet(oneflow.unittest.TestCase):
     def test_iree_resnet_cpu(test_case):
         _test_iree_resnet_cpu(test_case)
 
+    @unittest.skipUnless(oneflow.sysconfig.with_cuda(), "only test cpu cases")
     def test_iree_resnet_cuda(test_case):
         _test_iree_resnet_cuda(test_case)
 
diff --git a/oneflow/ir/test/OneFlow/folding/test_conv_bn.py b/oneflow/ir/test/OneFlow/folding/test_conv_bn.py
index 1b939a891c0..fc6e85370e5 100644
--- a/oneflow/ir/test/OneFlow/folding/test_conv_bn.py
+++ b/oneflow/ir/test/OneFlow/folding/test_conv_bn.py
@@ -31,7 +31,7 @@
 def _test_fuse_conv_bn(test_case):
     data = flow.randn(1, 3, 224, 224)
 
-    model = resnet50(pretrained=True, progress=True)
+    model = resnet50(pretrained=False, progress=True)
     model.eval()
     eager_res = model(data)
 
@@ -47,7 +47,7 @@ def build(self, *input):
     lazy_res = graph(data)
 
     test_case.assertTrue(
-        np.allclose(eager_res.numpy(), lazy_res.numpy(), rtol=1e-5, atol=1e-5)
+        np.allclose(eager_res.numpy(), lazy_res.numpy(), rtol=1e-4, atol=1e-4)
     )
 
 
diff --git a/oneflow/ir/test/OneFlow/folding/test_simple_multiply.py b/oneflow/ir/test/OneFlow/folding/test_simple_multiply.py
index 085d72f5c93..c07e307f822 100644
--- a/oneflow/ir/test/OneFlow/folding/test_simple_multiply.py
+++ b/oneflow/ir/test/OneFlow/folding/test_simple_multiply.py
@@ -87,10 +87,16 @@ def build(self, *args):
 class TestFoldMultiply(oneflow.unittest.TestCase):
     def test_fold_multiply(test_case):
         _test_fold_multiply(test_case, MultiplyModel, with_cuda=False)
+
+    @unittest.skipUnless(oneflow.sysconfig.with_cuda(), "only test cpu cases")
+    def test_fold_multiply_cuda(test_case):
         _test_fold_multiply(test_case, MultiplyModel, with_cuda=True)
 
     def test_fold_multiply_complex(test_case):
         _test_fold_multiply(test_case, MultiplyModelComplex, with_cuda=False)
+
+    @unittest.skipUnless(oneflow.sysconfig.with_cuda(), "only test cpu cases")
+    def test_fold_multiply_complex_cuda(test_case):
         _test_fold_multiply(test_case, MultiplyModelComplex, with_cuda=True)
 
     def test_fold_multiply_with_input(test_case):
@@ -98,8 +104,10 @@ def test_fold_multiply_with_input(test_case):
         b = flow.tensor([9, -1], dtype=flow.float32)
         _test_fold_multiply(test_case, MultiplyModelWithInput, False, a, b)
 
-        a = a.to("cuda")
-        b = b.to("cuda")
+    @unittest.skipUnless(oneflow.sysconfig.with_cuda(), "only test cpu cases")
+    def test_fold_multiply_with_input_cuda(test_case):
+        a = flow.tensor([3, 7], dtype=flow.float32, device="cuda")
+        b = flow.tensor([9, -1], dtype=flow.float32, device="cuda")
         _test_fold_multiply(test_case, MultiplyModelWithInput, True, a, b)
 
 
diff --git a/oneflow/ir/test/OneFlow/folding/test_conv_bn_auto_nhwc.py b/oneflow/ir/test/OneFlow/with_cuda/test_conv_bn_auto_nhwc.py
similarity index 78%
rename from oneflow/ir/test/OneFlow/folding/test_conv_bn_auto_nhwc.py
rename to oneflow/ir/test/OneFlow/with_cuda/test_conv_bn_auto_nhwc.py
index 1028592acee..88d7c307c1a 100644
--- a/oneflow/ir/test/OneFlow/folding/test_conv_bn_auto_nhwc.py
+++ b/oneflow/ir/test/OneFlow/with_cuda/test_conv_bn_auto_nhwc.py
@@ -29,11 +29,14 @@
 os.environ["ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION"] = "1"
 
 
-def _test_fuse_conv_bn(test_case):
-    data = flow.randn(1, 3, 224, 224).to("cuda")
-
-    model = resnet50(pretrained=True, progress=True)
-    model.to("cuda")
+def _test_fuse_conv_bn(test_case, with_cuda):
+    data = flow.randn(1, 3, 224, 224)
+    if with_cuda:
+        data = data.to("cuda")
+
+    model = resnet50(pretrained=False, progress=True)
+    if with_cuda:
+        model.to("cuda")
     model.eval()
     eager_res = model(data)
 
@@ -49,14 +52,15 @@ def build(self, *input):
     lazy_res = graph(data)
 
     test_case.assertTrue(
-        np.allclose(eager_res.numpy(), lazy_res.numpy(), rtol=1e-5, atol=1e-5)
+        np.allclose(eager_res.numpy(), lazy_res.numpy(), rtol=1e-4, atol=1e-4)
     )
 
 
 @flow.unittest.skip_unless_1n1d()
 class TestFuseConvBn(oneflow.unittest.TestCase):
-    def test_fuse_conv_bn(test_case):
-        _test_fuse_conv_bn(test_case)
+    @unittest.skipUnless(oneflow.sysconfig.with_cuda(), "only test cpu cases")
+    def test_fuse_conv_bn_cuda(test_case):
+        _test_fuse_conv_bn(test_case, True)
 
 
 if __name__ == "__main__":