-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1. Setup building for archive static library libtorch-xpu-ops.a 2. Add Tensor factory operators for XPU backend Signed-off-by: Feng Yuan <feng1.yuanintel.com> ghstack-source-id: 95e5077053728cabf6b11185e18a0effa4f90db4 Pull Request resolved: #3
- Loading branch information
1 parent
1a61c5f
commit d779795
Showing
7 changed files
with
252 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
<div align="center"> | ||
|
||
torch-xpu-ops* | ||
=========================== | ||
|
||
torch-xpu-ops is an `xpu` implementation of PyTorch ATen operators. | ||
|
||
## Build | ||
* Standalone - Require pre-installation of PyTorch | ||
```bash | ||
mkdir build | ||
cd build && cmake -DBUILD_TEST=1 -DPYTORCH_INSTALL_DIR=YOUR_PYTORCH_INSTALLATION_DIR .. | ||
make -j x | ||
``` | ||
* Submodule - Build as a submodule of PyTorch | ||
```bash | ||
// TODO | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# define archive static library target - torch_xpu_ops | ||
|
||
set(ATen_XPU_CPP_SRCS) | ||
set(ATen_XPU_SYCL_SRCS) | ||
|
||
set(ATen_XPU_INCLUDE_DIRS ${TORCH_XPU_OPS_ROOT}/src) | ||
|
||
add_subdirectory(aten) | ||
|
||
set(SYCL_LINK_LIBRARIES_KEYWORD PRIVATE) | ||
sycl_add_library( | ||
torch_xpu_ops | ||
STATIC | ||
SYCL_SOURCES ${ATen_XPU_SYCL_SRCS} | ||
CXX_SOURCES ${ATen_XPU_CPP_SRCS}) | ||
set(SYCL_LINK_LIBRARIES_KEYWORD) | ||
|
||
# Align with PyTorch compile options | ||
# 1. submodule - PYTORCH_SRC_DIR/cmake/public/utils.cmake | ||
# 2. standalone - PYTORCH_INSTALL_DIR/share/cmake/Caffe2/public/utils.cmake | ||
torch_compile_options(torch_xpu_ops) | ||
target_compile_options_if_supported(torch_xpu_ops "-Wno-deprecated-copy") | ||
|
||
target_include_directories(torch_xpu_ops PUBLIC ${PYTORCH_INCLUDE_DIRS}) | ||
target_include_directories(torch_xpu_ops PUBLIC ${ATen_XPU_INCLUDE_DIRS}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# ATen XPU sources | ||
|
||
file(GLOB xpu_cpp "*.cpp") | ||
file(GLOB xpu_sycl "sycl/*.cpp") | ||
|
||
list(APPEND ATen_XPU_CPP_SRCS ${xpu_cpp}) | ||
list(APPEND ATen_XPU_SYCL_SRCS ${xpu_sycl}) | ||
|
||
set(ATen_XPU_CPP_SRCS ${ATen_XPU_CPP_SRCS} PARENT_SCOPE) | ||
set(ATen_XPU_SYCL_SRCS ${ATen_XPU_SYCL_SRCS} PARENT_SCOPE) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#define TORCH_ASSERT_NO_OPERATORS | ||
#include <ATen/Context.h> | ||
#include <ATen/EmptyTensor.h> | ||
#include <c10/core/DeviceGuard.h> | ||
|
||
#include <aten/EmptyTensor.h> | ||
|
||
namespace at::detail { | ||
|
||
TensorBase empty_xpu( | ||
IntArrayRef size, | ||
ScalarType dtype, | ||
c10::optional<Device> device_opt, | ||
c10::optional<c10::MemoryFormat> memory_format_opt) { | ||
const auto device = device_or_default(device_opt); | ||
TORCH_INTERNAL_ASSERT(device.is_xpu()); | ||
// XXX | ||
// const c10::DeviceGuard device_guard(device); | ||
auto* allocator = at::getCPUAllocator(); | ||
constexpr c10::DispatchKeySet xpu_dks(c10::DispatchKey::XPU); | ||
return at::detail::empty_generic( | ||
size, allocator, xpu_dks, dtype, memory_format_opt); | ||
} | ||
|
||
TensorBase empty_xpu( | ||
IntArrayRef size, | ||
c10::optional<ScalarType> dtype_opt, | ||
c10::optional<Layout> layout_opt, | ||
c10::optional<Device> device_opt, | ||
c10::optional<bool> pin_memory_opt, | ||
c10::optional<c10::MemoryFormat> memory_format_opt) { | ||
TORCH_CHECK( | ||
!pin_memory_opt.has_value() || !*pin_memory_opt, | ||
"Only dense CPU tensors can be pinned"); | ||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( | ||
layout_or_default(layout_opt) == Layout::Strided); | ||
|
||
const auto dtype = dtype_or_default(dtype_opt); | ||
return at::detail::empty_xpu(size, dtype, device_opt, memory_format_opt); | ||
} | ||
|
||
TensorBase empty_xpu(IntArrayRef size, const TensorOptions& options) { | ||
return at::detail::empty_xpu( | ||
size, | ||
optTypeMetaToScalarType(options.dtype_opt()), | ||
options.layout_opt(), | ||
options.device_opt(), | ||
options.pinned_memory_opt(), | ||
options.memory_format_opt()); | ||
} | ||
|
||
TensorBase empty_strided_xpu( | ||
IntArrayRef size, | ||
IntArrayRef stride, | ||
ScalarType dtype, | ||
c10::optional<Device> device_opt) { | ||
const auto device = device_or_default(device_opt); | ||
TORCH_INTERNAL_ASSERT(device.is_xpu()); | ||
// XXX | ||
// const c10::DeviceGuard device_guard(device); | ||
auto* allocator = at::getCPUAllocator(); | ||
constexpr c10::DispatchKeySet xpu_dks(c10::DispatchKey::XPU); | ||
return at::detail::empty_strided_generic( | ||
size, stride, allocator, xpu_dks, dtype); | ||
} | ||
|
||
TensorBase empty_strided_xpu( | ||
IntArrayRef size, | ||
IntArrayRef stride, | ||
c10::optional<ScalarType> dtype_opt, | ||
c10::optional<Layout> layout_opt, | ||
c10::optional<Device> device_opt, | ||
c10::optional<bool> pin_memory_opt) { | ||
TORCH_CHECK( | ||
!pin_memory_opt.has_value() || !*pin_memory_opt, | ||
"Only dense CPU tensors can be pinned"); | ||
TORCH_INTERNAL_ASSERT_DEBUG_ONLY( | ||
layout_or_default(layout_opt) == Layout::Strided); | ||
|
||
const auto dtype = dtype_or_default(dtype_opt); | ||
return at::detail::empty_strided_xpu(size, stride, dtype, device_opt); | ||
} | ||
|
||
TensorBase empty_strided_xpu( | ||
IntArrayRef size, | ||
IntArrayRef stride, | ||
const TensorOptions& options) { | ||
return at::detail::empty_strided_xpu( | ||
size, | ||
stride, | ||
optTypeMetaToScalarType(options.dtype_opt()), | ||
options.layout_opt(), | ||
options.device_opt(), | ||
options.pinned_memory_opt()); | ||
} | ||
|
||
} // namespace at::detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#pragma once | ||
#include <ATen/core/TensorBase.h> | ||
|
||
namespace at::detail { | ||
|
||
// XXX: add TORCH_XPU_API | ||
TensorBase empty_xpu( | ||
IntArrayRef size, | ||
ScalarType dtype, | ||
c10::optional<Device> device_opt, | ||
c10::optional<c10::MemoryFormat> memory_format_opt); | ||
|
||
TensorBase empty_xpu( | ||
IntArrayRef size, | ||
c10::optional<ScalarType> dtype_opt, | ||
c10::optional<Layout> layout_opt, | ||
c10::optional<Device> device_opt, | ||
c10::optional<bool> pin_memory_opt, | ||
c10::optional<c10::MemoryFormat> memory_format_opt); | ||
|
||
TensorBase empty_xpu(IntArrayRef size, const TensorOptions& options); | ||
|
||
TensorBase empty_strided_xpu( | ||
IntArrayRef size, | ||
IntArrayRef stride, | ||
ScalarType dtype, | ||
c10::optional<Device> device_opt); | ||
|
||
TensorBase empty_strided_xpu( | ||
IntArrayRef size, | ||
IntArrayRef stride, | ||
c10::optional<ScalarType> dtype_opt, | ||
c10::optional<Layout> layout_opt, | ||
c10::optional<Device> device_opt, | ||
c10::optional<bool> pin_memory_opt); | ||
|
||
TensorBase empty_strided_xpu( | ||
IntArrayRef size, | ||
IntArrayRef stride, | ||
const TensorOptions& options); | ||
|
||
} // namespace at::detail |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS | ||
#include <ATen/core/Tensor.h> | ||
#include <torch/library.h> | ||
|
||
#ifndef AT_PER_OPERATOR_HEADERS | ||
#include <ATen/Functions.h> | ||
#include <ATen/NativeFunctions.h> | ||
#else | ||
#include <ATen/ops/empty_native.h> | ||
#include <ATen/ops/empty_strided_native.h> | ||
#endif | ||
|
||
#include <aten/EmptyTensor.h> | ||
|
||
namespace at::native { | ||
|
||
Tensor empty_xpu(IntArrayRef size, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt, c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt, c10::optional<c10::MemoryFormat> memory_format_opt) { | ||
Tensor result = at::detail::empty_xpu(size, dtype_opt, layout_opt, device_opt, pin_memory_opt, memory_format_opt); | ||
// See Note [Enabling Deterministic Operations] | ||
TORCH_CHECK(!(C10_UNLIKELY(at::globalContext().deterministicAlgorithms() && at::globalContext().deterministicFillUninitializedMemory())), "XPU backend doesn't support deterministic implementation for empty ...") | ||
return result; | ||
} | ||
|
||
Tensor empty_strided_xpu(IntArrayRef size, IntArrayRef stride, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt, c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt) { | ||
Tensor result = at::detail::empty_strided_xpu(size, stride, dtype_opt, layout_opt, device_opt, pin_memory_opt); | ||
// See Note [Enabling Deterministic Operations] | ||
TORCH_CHECK(!(C10_UNLIKELY(at::globalContext().deterministicAlgorithms() && at::globalContext().deterministicFillUninitializedMemory())), "XPU backend doesn't support deterministic implementation for empty_strided ...") | ||
return result; | ||
} | ||
|
||
TORCH_LIBRARY_IMPL(aten, XPU, m) { | ||
m.impl(TORCH_SELECTIVE_NAME("aten::empty.memory_format"), TORCH_FN(at::native::empty_xpu)); | ||
m.impl(TORCH_SELECTIVE_NAME("aten::empty_strided"), TORCH_FN(at::native::empty_strided_xpu)); | ||
} | ||
|
||
} // namespace at::native |