Skip to content

Commit

Permalink
Add Tensor factory operators
Browse files Browse the repository at this point in the history
1. Setup building for archive static library libtorch-xpu-ops.a
2. Add Tensor factory operators for XPU backend

Signed-off-by: Feng Yuan <feng1.yuanintel.com>

ghstack-source-id: 95e5077053728cabf6b11185e18a0effa4f90db4
Pull Request resolved: #3
  • Loading branch information
fengyuan14 committed Feb 24, 2024
1 parent 1a61c5f commit d779795
Show file tree
Hide file tree
Showing 7 changed files with 252 additions and 0 deletions.
24 changes: 24 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,30 @@ if(BUILD_TEST)
add_subdirectory(${TORCH_XPU_OPS_ROOT}/test/sycl ${CMAKE_BINARY_DIR}/test_sycl)
endif()

# The library couples with PyTorch. Here are two possible building processes:
# 1. standalone - Need pre-install PyTorch. Introduce PyTorch depdenceise through PyTorch installation dirctory.
# 2. submodule - Build as a submodule of PyTorch. Introduce PyTorch dependecies through PyTorch sources directory.
if(PYTORCH_USE_XPU)
# submodule
else()
# standalone
set(Torch_COMP_VERION "2.3.0")

if(NOT PYTORCH_INSTALL_DIR)
message(FATAL_ERROR "Cannot find PYTORCH_INSTALL_DIR in standalone build mode, please set -DPYTORCH_INSTALL_DIR ...")
endif()

set(Torch_DIR ${PYTORCH_INSTALL_DIR}/share/cmake/Torch)
find_package(Torch REQUIRED)

set(Caffe2_DIR ${PYTORCH_INSTALL_DIR}/share/cmake/Caffe2)
find_package(Caffe2 REQUIRED)

set(PYTORCH_INCLUDE_DIRS ${TORCH_INCLUDE_DIRS} ${CAFFE2_INCLUDE_DIRS})
endif()

add_subdirectory(${TORCH_XPU_OPS_ROOT}/src ${CMAKE_BINARY_DIR}/torch_xpu_ops)

set(PYTORCH_FOUND_XPU TRUE)

message(STATUS "XPU found")
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<div align="center">

torch-xpu-ops*
===========================

torch-xpu-ops is an `xpu` implementation of PyTorch ATen operators.

## Build
* Standalone - Require pre-installation of PyTorch
```bash
mkdir build
cd build && cmake -DBUILD_TEST=1 -DPYTORCH_INSTALL_DIR=YOUR_PYTORCH_INSTALLATION_DIR ..
make -j x
```
* Submodule - Build as a submodule of PyTorch
```bash
// TODO
```
25 changes: 25 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# define archive static library target - torch_xpu_ops

set(ATen_XPU_CPP_SRCS)
set(ATen_XPU_SYCL_SRCS)

set(ATen_XPU_INCLUDE_DIRS ${TORCH_XPU_OPS_ROOT}/src)

add_subdirectory(aten)

set(SYCL_LINK_LIBRARIES_KEYWORD PRIVATE)
sycl_add_library(
torch_xpu_ops
STATIC
SYCL_SOURCES ${ATen_XPU_SYCL_SRCS}
CXX_SOURCES ${ATen_XPU_CPP_SRCS})
set(SYCL_LINK_LIBRARIES_KEYWORD)

# Align with PyTorch compile options
# 1. submodule - PYTORCH_SRC_DIR/cmake/public/utils.cmake
# 2. standalone - PYTORCH_INSTALL_DIR/share/cmake/Caffe2/public/utils.cmake
torch_compile_options(torch_xpu_ops)
target_compile_options_if_supported(torch_xpu_ops "-Wno-deprecated-copy")

target_include_directories(torch_xpu_ops PUBLIC ${PYTORCH_INCLUDE_DIRS})
target_include_directories(torch_xpu_ops PUBLIC ${ATen_XPU_INCLUDE_DIRS})
10 changes: 10 additions & 0 deletions src/aten/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# ATen XPU sources

file(GLOB xpu_cpp "*.cpp")
file(GLOB xpu_sycl "sycl/*.cpp")

list(APPEND ATen_XPU_CPP_SRCS ${xpu_cpp})
list(APPEND ATen_XPU_SYCL_SRCS ${xpu_sycl})

set(ATen_XPU_CPP_SRCS ${ATen_XPU_CPP_SRCS} PARENT_SCOPE)
set(ATen_XPU_SYCL_SRCS ${ATen_XPU_SYCL_SRCS} PARENT_SCOPE)
97 changes: 97 additions & 0 deletions src/aten/EmptyTensor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#define TORCH_ASSERT_NO_OPERATORS
#include <ATen/Context.h>
#include <ATen/EmptyTensor.h>
#include <c10/core/DeviceGuard.h>

#include <aten/EmptyTensor.h>

namespace at::detail {

TensorBase empty_xpu(
IntArrayRef size,
ScalarType dtype,
c10::optional<Device> device_opt,
c10::optional<c10::MemoryFormat> memory_format_opt) {
const auto device = device_or_default(device_opt);
TORCH_INTERNAL_ASSERT(device.is_xpu());
// XXX
// const c10::DeviceGuard device_guard(device);
auto* allocator = at::getCPUAllocator();
constexpr c10::DispatchKeySet xpu_dks(c10::DispatchKey::XPU);
return at::detail::empty_generic(
size, allocator, xpu_dks, dtype, memory_format_opt);
}

TensorBase empty_xpu(
IntArrayRef size,
c10::optional<ScalarType> dtype_opt,
c10::optional<Layout> layout_opt,
c10::optional<Device> device_opt,
c10::optional<bool> pin_memory_opt,
c10::optional<c10::MemoryFormat> memory_format_opt) {
TORCH_CHECK(
!pin_memory_opt.has_value() || !*pin_memory_opt,
"Only dense CPU tensors can be pinned");
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
layout_or_default(layout_opt) == Layout::Strided);

const auto dtype = dtype_or_default(dtype_opt);
return at::detail::empty_xpu(size, dtype, device_opt, memory_format_opt);
}

TensorBase empty_xpu(IntArrayRef size, const TensorOptions& options) {
return at::detail::empty_xpu(
size,
optTypeMetaToScalarType(options.dtype_opt()),
options.layout_opt(),
options.device_opt(),
options.pinned_memory_opt(),
options.memory_format_opt());
}

TensorBase empty_strided_xpu(
IntArrayRef size,
IntArrayRef stride,
ScalarType dtype,
c10::optional<Device> device_opt) {
const auto device = device_or_default(device_opt);
TORCH_INTERNAL_ASSERT(device.is_xpu());
// XXX
// const c10::DeviceGuard device_guard(device);
auto* allocator = at::getCPUAllocator();
constexpr c10::DispatchKeySet xpu_dks(c10::DispatchKey::XPU);
return at::detail::empty_strided_generic(
size, stride, allocator, xpu_dks, dtype);
}

TensorBase empty_strided_xpu(
IntArrayRef size,
IntArrayRef stride,
c10::optional<ScalarType> dtype_opt,
c10::optional<Layout> layout_opt,
c10::optional<Device> device_opt,
c10::optional<bool> pin_memory_opt) {
TORCH_CHECK(
!pin_memory_opt.has_value() || !*pin_memory_opt,
"Only dense CPU tensors can be pinned");
TORCH_INTERNAL_ASSERT_DEBUG_ONLY(
layout_or_default(layout_opt) == Layout::Strided);

const auto dtype = dtype_or_default(dtype_opt);
return at::detail::empty_strided_xpu(size, stride, dtype, device_opt);
}

TensorBase empty_strided_xpu(
IntArrayRef size,
IntArrayRef stride,
const TensorOptions& options) {
return at::detail::empty_strided_xpu(
size,
stride,
optTypeMetaToScalarType(options.dtype_opt()),
options.layout_opt(),
options.device_opt(),
options.pinned_memory_opt());
}

} // namespace at::detail
42 changes: 42 additions & 0 deletions src/aten/EmptyTensor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#pragma once
#include <ATen/core/TensorBase.h>

namespace at::detail {

// XXX: add TORCH_XPU_API
TensorBase empty_xpu(
IntArrayRef size,
ScalarType dtype,
c10::optional<Device> device_opt,
c10::optional<c10::MemoryFormat> memory_format_opt);

TensorBase empty_xpu(
IntArrayRef size,
c10::optional<ScalarType> dtype_opt,
c10::optional<Layout> layout_opt,
c10::optional<Device> device_opt,
c10::optional<bool> pin_memory_opt,
c10::optional<c10::MemoryFormat> memory_format_opt);

TensorBase empty_xpu(IntArrayRef size, const TensorOptions& options);

TensorBase empty_strided_xpu(
IntArrayRef size,
IntArrayRef stride,
ScalarType dtype,
c10::optional<Device> device_opt);

TensorBase empty_strided_xpu(
IntArrayRef size,
IntArrayRef stride,
c10::optional<ScalarType> dtype_opt,
c10::optional<Layout> layout_opt,
c10::optional<Device> device_opt,
c10::optional<bool> pin_memory_opt);

TensorBase empty_strided_xpu(
IntArrayRef size,
IntArrayRef stride,
const TensorOptions& options);

} // namespace at::detail
36 changes: 36 additions & 0 deletions src/aten/TensorFactories.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/core/Tensor.h>
#include <torch/library.h>

#ifndef AT_PER_OPERATOR_HEADERS
#include <ATen/Functions.h>
#include <ATen/NativeFunctions.h>
#else
#include <ATen/ops/empty_native.h>
#include <ATen/ops/empty_strided_native.h>
#endif

#include <aten/EmptyTensor.h>

namespace at::native {

Tensor empty_xpu(IntArrayRef size, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt, c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt, c10::optional<c10::MemoryFormat> memory_format_opt) {
Tensor result = at::detail::empty_xpu(size, dtype_opt, layout_opt, device_opt, pin_memory_opt, memory_format_opt);
// See Note [Enabling Deterministic Operations]
TORCH_CHECK(!(C10_UNLIKELY(at::globalContext().deterministicAlgorithms() && at::globalContext().deterministicFillUninitializedMemory())), "XPU backend doesn't support deterministic implementation for empty ...")
return result;
}

Tensor empty_strided_xpu(IntArrayRef size, IntArrayRef stride, c10::optional<ScalarType> dtype_opt, c10::optional<Layout> layout_opt, c10::optional<Device> device_opt, c10::optional<bool> pin_memory_opt) {
Tensor result = at::detail::empty_strided_xpu(size, stride, dtype_opt, layout_opt, device_opt, pin_memory_opt);
// See Note [Enabling Deterministic Operations]
TORCH_CHECK(!(C10_UNLIKELY(at::globalContext().deterministicAlgorithms() && at::globalContext().deterministicFillUninitializedMemory())), "XPU backend doesn't support deterministic implementation for empty_strided ...")
return result;
}

TORCH_LIBRARY_IMPL(aten, XPU, m) {
m.impl(TORCH_SELECTIVE_NAME("aten::empty.memory_format"), TORCH_FN(at::native::empty_xpu));
m.impl(TORCH_SELECTIVE_NAME("aten::empty_strided"), TORCH_FN(at::native::empty_strided_xpu));
}

} // namespace at::native

0 comments on commit d779795

Please sign in to comment.