[Feature] Support ball_query with cambricon MLU backend and mlu-ops l…

…ibrary. (#2520) * [Feature] Support ball_query with cambricon MLU backend and mlu-ops library. * [Fix] update operator data layout setting. * [Fix] add cxx compile option to avoid symbol conflict. * [Fix] fix lint errors. * [Fix] update ops.md with info of ball_query support by MLU backend. * [Feature] Fix typo. * [Fix] Remove print. * [Fix] get mlu-ops from MMCV_MLU_OPS_PATH env. * [Fix] update MMCV_MLU_OPS_PATH check logic. * [Fix] update error info when failed to download mlu-ops. * [Fix] check mlu-ops version matching info in mmcv. * [Fix] revise wrong filename. * [Fix] remove f.close and re.
open-mmlab · Jan 18, 2023 · dfb0380 · dfb0380
1 parent 84f60c1
commit dfb0380
Show file tree

Hide file tree

Showing 7 changed files with 336 additions and 44 deletions.
diff --git a/docs/en/understand_mmcv/ops.md b/docs/en/understand_mmcv/ops.md
@@ -6,7 +6,7 @@ We implement common ops used in detection, segmentation, etc.
 | ---------------------------- | --- | ---- | --- | --- | ------ |
 | ActiveRotatedFilter          | √   | √    |     |     |        |
 | AssignScoreWithK             |     | √    |     |     |        |
-| BallQuery                    |     | √    |     |     |        |
+| BallQuery                    |     | √    | √   |     |        |
 | BBoxOverlaps                 |     | √    | √   | √   |        |
 | BorderAlign                  |     | √    |     |     |        |
 | BoxIouRotated                | √   | √    |     |     |        |

diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md
@@ -6,7 +6,7 @@ MMCV 提供了检测、分割等任务中常用的算子
 | ---------------------------- | --- | ---- | --- | --- | ------ |
 | ActiveRotatedFilter          | √   | √    |     |     |        |
 | AssignScoreWithK             |     | √    |     |     |        |
-| BallQuery                    |     | √    |     |     |        |
+| BallQuery                    |     | √    | √   |     |        |
 | BBoxOverlaps                 |     | √    | √   | √   |        |
 | BorderAlign                  |     | √    |     |     |        |
 | BoxIouRotated                | √   | √    |     |     |        |

diff --git a/mmcv/ops/csrc/pytorch/mlu/ball_query_mlu.cpp b/mmcv/ops/csrc/pytorch/mlu/ball_query_mlu.cpp
@@ -0,0 +1,47 @@
+/*************************************************************************
+ * Copyright (C) 2022 Cambricon.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *************************************************************************/
+#include "mlu_common_helper.h"
+
+void ball_query_forward_mlu(int b, int n, int m, float min_radius,
+                            float max_radius, int nsample, const Tensor new_xyz,
+                            const Tensor xyz, Tensor idx) {
+  MluOpTensorDescriptor new_xyz_desc, xyz_desc, idx_desc;
+  new_xyz_desc.set(new_xyz);
+  xyz_desc.set(xyz);
+  idx_desc.set(idx);
+
+  auto new_xyz_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(
+      new_xyz, new_xyz.suggest_memory_format());
+  auto xyz_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(
+      xyz, new_xyz.suggest_memory_format());
+  auto idx_contiguous = torch_mlu::cnnl::ops::cnnl_contiguous(
+      idx, new_xyz.suggest_memory_format());
+
+  auto new_xyz_impl = torch_mlu::getMluTensorImpl(new_xyz_contiguous);
+  auto xyz_impl = torch_mlu::getMluTensorImpl(xyz_contiguous);
+  auto idx_impl = torch_mlu::getMluTensorImpl(idx_contiguous);
+  auto new_xyz_ptr = new_xyz_impl->cnnlMalloc();
+  auto xyz_ptr = xyz_impl->cnnlMalloc();
+  auto idx_ptr = idx_impl->cnnlMalloc();
+
+  auto handle = mluOpGetCurrentHandle();
+  mluOpBallQuery(handle, new_xyz_desc.desc(), new_xyz_ptr, xyz_desc.desc(),
+                 xyz_ptr, min_radius, max_radius, nsample, idx_desc.desc(),
+                 idx_ptr);
+}
+
+void ball_query_forward_impl(int b, int n, int m, float min_radius,
+                             float max_radius, int nsample,
+                             const Tensor new_xyz, const Tensor xyz,
+                             Tensor idx);
+
+REGISTER_DEVICE_IMPL(ball_query_forward_impl, MLU, ball_query_forward_mlu);
diff --git a/mmcv/ops/csrc/pytorch/mlu/mlu_common_helper.cpp b/mmcv/ops/csrc/pytorch/mlu/mlu_common_helper.cpp
@@ -0,0 +1,103 @@
+/*************************************************************************
+ * Copyright (C) 2022 Cambricon.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *************************************************************************/
+#include "mlu_common_helper.h"
+
+// Descriptors
+mluOpDataType_t getMluOpDataType(const caffe2::TypeMeta& data_type) {
+  const std::map<std::string, mluOpDataType_t> mapping_type = {
+      {std::string("c10::Half"), MLUOP_DTYPE_HALF},
+      {std::string("float"), MLUOP_DTYPE_FLOAT},
+      {std::string("double"), MLUOP_DTYPE_DOUBLE},
+      {std::string("int8"), MLUOP_DTYPE_INT8},
+      {std::string("signed char"), MLUOP_DTYPE_INT8},
+      {std::string("short int"), MLUOP_DTYPE_INT16},
+      {std::string("short"), MLUOP_DTYPE_INT16},
+      {std::string("int"), MLUOP_DTYPE_INT32},
+      {std::string("long int"), MLUOP_DTYPE_INT64},
+      {std::string("long"), MLUOP_DTYPE_INT64},
+      {std::string("unsigned char"), MLUOP_DTYPE_UINT8},
+      {std::string("bool"), MLUOP_DTYPE_BOOL},
+      {std::string("c10::complex<c10::Half>"), MLUOP_DTYPE_COMPLEX_HALF},
+      {std::string("c10::complex<float>"), MLUOP_DTYPE_COMPLEX_FLOAT}};
+
+  if (mapping_type.find(std::string(data_type.name())) != mapping_type.end()) {
+    return mapping_type.find(std::string(data_type.name()))->second;
+  }
+  return MLUOP_DTYPE_INVALID;
+}
+
+// laytout
+mluOpTensorLayout_t getMluOpSuggestLayout(const at::Tensor& input) {
+  auto suggest_memory_format = input.suggest_memory_format();
+  mluOpTensorLayout_t layout = MLUOP_LAYOUT_ARRAY;
+  switch (input.dim()) {
+    case 4:
+      layout = (suggest_memory_format == at::MemoryFormat::ChannelsLast)
+                   ? MLUOP_LAYOUT_NHWC
+                   : MLUOP_LAYOUT_NCHW;
+      break;
+    case 5:
+      layout = (suggest_memory_format == at::MemoryFormat::ChannelsLast3d)
+                   ? MLUOP_LAYOUT_NDHWC
+                   : MLUOP_LAYOUT_NCDHW;
+      break;
+    default:
+      layout = MLUOP_LAYOUT_ARRAY;
+  }
+  return layout;
+}
+
+void MluOpTensorDescriptor::set(Tensor t) {
+  mluOpDataType_t data_type = getMluOpDataType(t.dtype());
+  mluOpTensorLayout_t layout = getMluOpSuggestLayout(t);
+  int t_dim = t.dim();
+  std::vector<int> dim_array;
+  if (t_dim == 0) {
+    dim_array.push_back(
+        1);  // ScalarTensor(0-dim 1-item Tensor) view like size = 1 as default;
+  } else {
+    for (int i = 0; i < t_dim; i++) {
+      dim_array.push_back(static_cast<int>(t.sizes().vec()[i]));
+    }
+  }
+  set_desc(t, layout, data_type, dim_array);
+}
+
+void MluOpTensorDescriptor::set_desc(const at::Tensor& t,
+                                     mluOpTensorLayout_t layout,
+                                     mluOpDataType_t dtype,
+                                     std::vector<int>& dims) {
+  int dimNb = dims.size();
+  mluOpSetTensorDescriptor(desc_, layout, dtype, dimNb, dims.data());
+}
+
+// Handles
+std::once_flag mmcv_mluop_init_flag;
+std::mutex mmcv_mluop_mutex;
+static std::vector<MluOpHandle> mmcv_mluop_handles;
+
+mluOpHandle_t mluOpGetCurrentHandle(c10::DeviceIndex device_index) {
+  std::call_once(mmcv_mluop_init_flag,
+                 []()  // Init mmcv_mluop_handles 1-device <-> 1-handle
+                 {
+                   c10::DeviceIndex num_devices = torch_mlu::device_count();
+                   mmcv_mluop_handles.resize(num_devices);
+                 });
+
+  if (device_index == -1) {
+    device_index = torch_mlu::current_device();
+  }
+  std::lock_guard<std::mutex> mmcv_mluop_guard(mmcv_mluop_mutex);
+  auto queue = torch_mlu::getCurrentQueue(device_index).queue();
+  mmcv_mluop_handles[device_index].setQueue(queue);
+  return mmcv_mluop_handles[device_index].handle;
+}
diff --git a/mmcv/ops/csrc/pytorch/mlu/mlu_common_helper.h b/mmcv/ops/csrc/pytorch/mlu/mlu_common_helper.h
@@ -0,0 +1,54 @@
+/*************************************************************************
+ * Copyright (C) 2022 Cambricon.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *************************************************************************/
+#pragma once
+#include <ATen/ATen.h>
+#include <c10/core/ScalarType.h>
+
+#include "aten.h"
+#include "mlu_op.h"
+#include "pytorch_device_registry.hpp"
+
+#define MLUOP_MAJOR 0
+#define MLUOP_MINOR 4
+#define MLUOP_PATCHLEVEL 1
+
+mluOpDataType_t getMluOpDataType(const caffe2::TypeMeta& data_type);
+mluOpTensorLayout_t getMluOpSuggestLayout(const at::Tensor& input);
+
+class MluOpTensorDescriptor {
+ public:
+  MluOpTensorDescriptor() { mluOpCreateTensorDescriptor(&desc_); };
+  ~MluOpTensorDescriptor() { mluOpDestroyTensorDescriptor(desc_); }
+
+  void set(at::Tensor);
+  mluOpTensorDescriptor_t desc() { return desc_; }
+
+ private:
+  mluOpTensorDescriptor_t desc_;
+  void set_desc(const at::Tensor&, mluOpTensorLayout_t, mluOpDataType_t,
+                std::vector<int>& dims);
+};
+
+mluOpHandle_t mluOpGetCurrentHandle(c10::DeviceIndex device_index = -1);
+
+class MluOpHandle {
+ public:
+  MluOpHandle() : handle(nullptr) { mluOpCreate(&handle); }
+  ~MluOpHandle() {
+    if (handle) {
+      mluOpDestroy(handle);
+      handle = nullptr;
+    }
+  }
+  void setQueue(cnrtQueue_t queue) { mluOpSetQueue(handle, queue); }
+  mluOpHandle_t handle;
+};
diff --git a/setup.py b/setup.py
@@ -270,6 +270,7 @@ def get_extensions():
 
         include_dirs = []
 
+        extra_objects = []
         is_rocm_pytorch = False
         try:
             from torch.utils.cpp_extension import ROCM_HOME
@@ -300,16 +301,98 @@ def get_extensions():
                 torch.is_mlu_available()) or \
                 os.getenv('FORCE_MLU', '0') == '1':
             from torch_mlu.utils.cpp_extension import MLUExtension
+
+            def get_mluops_version(file_path):
+                with open(file_path) as f:
+                    for line in f:
+                        if re.search('MLUOP_MAJOR', line):
+                            major = line.strip().split(' ')[2]
+                        if re.search('MLUOP_MINOR', line):
+                            minor = line.strip().split(' ')[2]
+                        if re.search('MLUOP_PATCHLEVEL', line):
+                            patchlevel = line.strip().split(' ')[2]
+                mluops_version = f'v{major}.{minor}.{patchlevel}'
+                return mluops_version
+
+            mmcv_mluops_version = get_mluops_version(
+                './mmcv/ops/csrc/pytorch/mlu/mlu_common_helper.h')
+            mlu_ops_path = os.getenv('MMCV_MLU_OPS_PATH')
+            if mlu_ops_path:
+                exists_mluops_version = get_mluops_version(
+                    mlu_ops_path + '/bangc-ops/mlu_op.h')
+                if exists_mluops_version != mmcv_mluops_version:
+                    print('the version of mlu-ops provided is %s,'
+                          ' while %s is needed.' %
+                          (exists_mluops_version, mmcv_mluops_version))
+                    exit()
+                try:
+                    if os.path.exists('mlu-ops'):
+                        if os.path.islink('mlu-ops'):
+                            os.remove('mlu-ops')
+                            os.symlink(mlu_ops_path, 'mlu-ops')
+                        elif os.path.abspath('mlu-ops') != mlu_ops_path:
+                            os.symlink(mlu_ops_path, 'mlu-ops')
+                    else:
+                        os.symlink(mlu_ops_path, 'mlu-ops')
+                except Exception:
+                    raise FileExistsError(
+                        'mlu-ops already exists, please move it out,'
+                        'or rename or remove it.')
+            else:
+                if not os.path.exists('mlu-ops'):
+                    import requests
+                    mluops_url = 'https://github.com/Cambricon/mlu-ops/' + \
+                        'archive/refs/tags/' + mmcv_mluops_version + '.zip'
+                    req = requests.get(mluops_url)
+                    with open('./mlu-ops.zip', 'wb') as f:
+                        try:
+                            f.write(req.content)
+                        except Exception:
+                            raise ImportError('failed to download mlu-ops')
+
+                    from zipfile import BadZipFile, ZipFile
+                    with ZipFile('./mlu-ops.zip', 'r') as archive:
+                        try:
+                            archive.extractall()
+                            dir_name = archive.namelist()[0].split('/')[0]
+                            os.rename(dir_name, 'mlu-ops')
+                        except BadZipFile:
+                            print('invalid mlu-ops.zip file')
+                else:
+                    exists_mluops_version = get_mluops_version(
+                        './mlu-ops/bangc-ops/mlu_op.h')
+                    if exists_mluops_version != mmcv_mluops_version:
+                        print('the version of provided mlu-ops is %s,'
+                              ' while %s is needed.' %
+                              (exists_mluops_version, mmcv_mluops_version))
+                        exit()
+
             define_macros += [('MMCV_WITH_MLU', None)]
             mlu_args = os.getenv('MMCV_MLU_ARGS')
-            extra_compile_args['cncc'] = [mlu_args] if mlu_args else []
+            mluops_includes = []
+            mluops_includes.append('-I' +
+                                   os.path.abspath('./mlu-ops/bangc-ops'))
+            mluops_includes.append(
+                '-I' + os.path.abspath('./mlu-ops/bangc-ops/kernels'))
+            extra_compile_args['cncc'] = [mlu_args] + \
+                mluops_includes if mlu_args else mluops_includes
+            extra_compile_args['cxx'] += ['-fno-gnu-unique']
             op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
                 glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
                 glob.glob('./mmcv/ops/csrc/pytorch/mlu/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/common/mlu/*.mlu')
+                glob.glob('./mmcv/ops/csrc/common/mlu/*.mlu') + \
+                glob.glob(
+                    './mlu-ops/bangc-ops/core/**/*.cpp', recursive=True) + \
+                glob.glob(
+                    './mlu-ops/bangc-ops/kernels/**/*.cpp', recursive=True) + \
+                glob.glob(
+                    './mlu-ops/bangc-ops/kernels/**/*.mlu', recursive=True)
+            extra_objects = glob.glob(
+                './mlu-ops/bangc-ops/kernels/*/x86_64/*.o')
             extension = MLUExtension
             include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
             include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/mlu'))
+            include_dirs.append(os.path.abspath('./mlu-ops/bangc-ops'))
         elif (hasattr(torch.backends, 'mps')
               and torch.backends.mps.is_available()) or os.getenv(
                   'FORCE_MPS', '0') == '1':
@@ -371,6 +454,7 @@ def get_extensions():
             sources=op_files,
             include_dirs=include_dirs,
             define_macros=define_macros,
+            extra_objects=extra_objects,
             extra_compile_args=extra_compile_args)
         extensions.append(ext_ops)