Skip to content

Commit

Permalink
[Enhancement] Support torch2.1 on Ascend NPU
Browse files Browse the repository at this point in the history
  • Loading branch information
Ginray committed Aug 19, 2023
1 parent c523359 commit 203960b
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 26 deletions.
3 changes: 2 additions & 1 deletion mmcv/ops/csrc/common/pytorch_npu_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
#ifndef PYTORCH_NPU_HELPER_HPP_
#define PYTORCH_NPU_HELPER_HPP_

#include <torch_npu/csrc/aten/NPUNativeFunctions.h>
// #include <torch_npu/csrc/aten/NPUNativeFunctions.h>
#include <torch_npu/csrc/framework/utils/CalcuOpUtil.h>
#include <torch_npu/csrc/framework/utils/OpAdapter.h>
#include <torch_npu/csrc/framework/utils/CustomFunctions.h>

#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
Expand Down
6 changes: 3 additions & 3 deletions mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
gtboxesFP32 = bboxes2;
}
if (bboxes2.scalar_type() != at::ScalarType::Float) {
bboxesFP32 = NPUNativeFunctions::npu_dtype_cast(bboxesFP32, at::kFloat);
gtboxesFP32 = NPUNativeFunctions::npu_dtype_cast(gtboxesFP32, at::kFloat);
bboxesFP32 = custom_ops::npu_dtype_cast(bboxesFP32, at::kFloat);
gtboxesFP32 = custom_ops::npu_dtype_cast(gtboxesFP32, at::kFloat);
}
c10::SmallVector<int64_t, SIZE> iousSize = {gtboxesFP32.size(0),
bboxesFP32.size(0)};
Expand All @@ -42,7 +42,7 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
.Attr("aligned", aligned)
.Run();
if (bboxes2.scalar_type() != at::ScalarType::Float) {
iousFP32 = NPUNativeFunctions::npu_dtype_cast(iousFP32, at::kHalf);
iousFP32 = custom_ops::npu_dtype_cast(iousFP32, at::kHalf);
}
iousFP32 = swap_flag ? iousFP32.transpose(0, 1) : iousFP32;
ious.copy_(iousFP32);
Expand Down
26 changes: 13 additions & 13 deletions mmcv/ops/csrc/pytorch/npu/focal_loss_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ void sigmoid_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
target_y = at::mul(target_y, -1.0);
target_y = at::add(target_y, 1.0);
} else {
target_y = at_npu::native::NPUNativeFunctions::one_hot(target, n_class);
target_y = at::one_hot(target, n_class);
}
target_y =
at_npu::native::NPUNativeFunctions::npu_dtype_cast(target_y, at::kInt);
at_npu::native::custom_ops::npu_dtype_cast(target_y, at::kInt);
int64_t weight_size = weight.size(0);
at::Tensor weight_y = at::ones_like(input);
if (weight_size > 0) {
weight_y = at_npu::native::NPUNativeFunctions::npu_broadcast(weight,
weight_y = at_npu::native::custom_ops::npu_broadcast(weight,
input.sizes());
}
OpCommand cmd;
Expand All @@ -46,17 +46,17 @@ void sigmoid_focal_loss_backward_npu(Tensor input, Tensor target, Tensor weight,
if (n_class == 1) {
target_y = at::reshape(target, input.sizes());
} else {
target_y = at_npu::native::NPUNativeFunctions::one_hot(target, n_class);
target_y = at::one_hot(target, n_class);
target_y = at::mul(target_y, -1.0);
target_y = at::add(target_y, 1.0);
}
target_y =
at_npu::native::NPUNativeFunctions::npu_dtype_cast(target_y, at::kInt);
at_npu::native::custom_ops::npu_dtype_cast(target_y, at::kInt);
at::Tensor grad_up = at::ones_like(input);
int64_t weight_size = weight.size(0);
at::Tensor weight_y = at::ones_like(input);
if (weight_size > 0) {
weight_y = at_npu::native::NPUNativeFunctions::npu_broadcast(weight,
weight_y = at_npu::native::custom_ops::npu_broadcast(weight,
input.sizes());
}
OpCommand cmd;
Expand All @@ -81,13 +81,13 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
Tensor output, float gamma, float alpha) {
int64_t n_class = input.size(1);
at::Tensor target_y =
at_npu::native::NPUNativeFunctions::one_hot(target, n_class);
at::one_hot(target, n_class);
target_y =
at_npu::native::NPUNativeFunctions::npu_dtype_cast(target_y, at::kInt);
at_npu::native::custom_ops::npu_dtype_cast(target_y, at::kInt);
int64_t weight_size = weight.size(0);
at::Tensor weight_y = at::ones_like(input);
if (weight_size > 0) {
weight_y = at_npu::native::NPUNativeFunctions::npu_broadcast(weight,
weight_y = at_npu::native::custom_ops::npu_broadcast(weight,
input.sizes());
}
at::Tensor op_output = at::ones_like(input);
Expand All @@ -107,7 +107,7 @@ void softmax_focal_loss_forward_npu(Tensor input, Tensor target, Tensor weight,
c10::SmallVector<int64_t, 2> sizes = {n_batch, 1};
at::IntArrayRef offset = at::IntArrayRef(offsets);
at::IntArrayRef size = at::IntArrayRef(sizes);
at_npu::native::NPUNativeFunctions::npu_slice_out(op_output, offset, size,
at_npu::native::custom_ops::npu_slice_out(op_output, offset, size,
output);
}

Expand All @@ -120,14 +120,14 @@ void softmax_focal_loss_backward_npu(Tensor input, Tensor target, Tensor weight,
float gamma, float alpha) {
int64_t n_class = input.size(1);
at::Tensor target_y =
at_npu::native::NPUNativeFunctions::one_hot(target, n_class);
at::one_hot(target, n_class);
target_y =
at_npu::native::NPUNativeFunctions::npu_dtype_cast(target_y, at::kInt);
at_npu::native::custom_ops::npu_dtype_cast(target_y, at::kInt);
at::Tensor grad_up = at::ones_like(input);
int64_t weight_size = weight.size(0);
at::Tensor weight_y = at::ones_like(input);
if (weight_size > 0) {
weight_y = at_npu::native::NPUNativeFunctions::npu_broadcast(weight,
weight_y = at_npu::native::custom_ops::npu_broadcast(weight,
input.sizes());
}
OpCommand cmd;
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/fused_bias_leakyrelu_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Tensor fused_bias_leakyrelu_npu(const Tensor &input, const Tensor &bias,
}
}
at::Tensor bias_tmp = at::reshape(bias, input_size_tmp);
at::Tensor bias_ = at_npu::native::NPUNativeFunctions::npu_broadcast(
at::Tensor bias_ = at_npu::native::custom_ops::npu_broadcast(
bias_tmp, input.sizes());
OpCommand cmd;
cmd.Name("FusedBiasLeakyRelu")
Expand Down
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/nms_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
auto outputsizeInt = outputsizeBool.to(at::ScalarType::Int);
auto countLen = at::sum(outputsizeInt, at::ScalarType::Int);
at::Tensor actual_output = output.slice(0, 0, countLen.item().toLong());
actual_output = at_npu::native::NPUNativeFunctions::npu_dtype_cast(
actual_output = at_npu::native::custom_ops::npu_dtype_cast(
actual_output, at::kLong);
return actual_output;
}
Expand Down
6 changes: 3 additions & 3 deletions mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
at::Tensor detsCast = dets;
at::Tensor scoresCast = scores;
if (originDtype != at::ScalarType::Float) {
detsCast = NPUNativeFunctions::npu_dtype_cast(dets, at::kFloat);
scoresCast = NPUNativeFunctions::npu_dtype_cast(scores, at::kFloat);
detsCast = custom_ops::npu_dtype_cast(dets, at::kFloat);
scoresCast = custom_ops::npu_dtype_cast(scores, at::kFloat);
}
c10::SmallVector<int64_t, SIZE> selectedIndexSize = {dets.size(0)};
at::Tensor selectedBox = OpPreparation::ApplyTensor(dets);
Expand All @@ -27,6 +27,6 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
.Output(selectedIndex)
.Attr("iou_threshold", (float)iou_threshold)
.Run();
selectedIndex = NPUNativeFunctions::npu_dtype_cast(selectedIndex, at::kLong);
selectedIndex = custom_ops::npu_dtype_cast(selectedIndex, at::kLong);
return selectedIndex;
}
2 changes: 1 addition & 1 deletion mmcv/ops/csrc/pytorch/npu/voxelization_npu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ int hard_voxelize_forward_npu(const at::Tensor &points, at::Tensor &voxels,
const int max_points, const int max_voxels,
const int NDim = 3) {
at::Tensor voxel_num_tmp = OpPreparation::ApplyTensor(points, {1});
at::Tensor voxel_num = at_npu::native::NPUNativeFunctions::npu_dtype_cast(
at::Tensor voxel_num = at_npu::native::custom_ops::npu_dtype_cast(
voxel_num_tmp, at::kInt);

at::Tensor voxel_size_cpu = at::from_blob(
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def get_extensions():
cuda_args = os.getenv('MMCV_CUDA_ARGS')
extra_compile_args = {
'nvcc': [cuda_args, '-std=c++14'] if cuda_args else ['-std=c++14'],
'cxx': ['-std=c++14'],
'cxx': ['-std=c++17'],
}
if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
define_macros += [('MMCV_WITH_CUDA', None)]
Expand Down Expand Up @@ -201,13 +201,13 @@ def get_extensions():
extra_compile_args = {'cxx': []}

if platform.system() != 'Windows':
extra_compile_args['cxx'] = ['-std=c++14']
extra_compile_args['cxx'] = ['-std=c17']
else:
# TODO: In Windows, C++17 is chosen to compile extensions in
# PyTorch2.0 , but a compile error will be reported.
# As a temporary solution, force the use of C++14.
if parse_version(torch.__version__) >= parse_version('2.0.0'):
extra_compile_args['cxx'] = ['/std:c++14']
extra_compile_args['cxx'] = ['/std:c++17']

include_dirs = []
library_dirs = []
Expand Down

0 comments on commit 203960b

Please sign in to comment.