Skip to content

Commit

Permalink
Merge pull request #186 from EthicalML/transfer_hpp_to_cpp
Browse files Browse the repository at this point in the history
Resolve moving all functions from tensor HPP to CPP
  • Loading branch information
axsaucedo authored Mar 13, 2021
2 parents 00f02cb + 7d2c782 commit 42ba714
Show file tree
Hide file tree
Showing 6 changed files with 277 additions and 232 deletions.
154 changes: 68 additions & 86 deletions single_include/kompute/Kompute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -910,35 +910,39 @@ class Tensor
void recordCopyFromDeviceToStaging(const vk::CommandBuffer& commandBuffer);

/**
* Records the buffer memory barrier into the primary buffer and command buffer which
* ensures that relevant data transfers are carried out correctly.
* Records the buffer memory barrier into the primary buffer and command
* buffer which ensures that relevant data transfers are carried out
* correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param srcAccessMask Access flags for source access mask
* @param dstAccessMask Access flags for destination access mask
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordPrimaryBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
void recordPrimaryBufferMemoryBarrier(
const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
/**
* Records the buffer memory barrier into the staging buffer and command buffer which
* ensures that relevant data transfers are carried out correctly.
* Records the buffer memory barrier into the staging buffer and command
* buffer which ensures that relevant data transfers are carried out
* correctly.
*
* @param commandBuffer Vulkan Command Buffer to record the commands into
* @param srcAccessMask Access flags for source access mask
* @param dstAccessMask Access flags for destination access mask
* @param scrStageMask Pipeline stage flags for source stage mask
* @param dstStageMask Pipeline stage flags for destination stage mask
*/
void recordStagingBufferMemoryBarrier(const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);
void recordStagingBufferMemoryBarrier(
const vk::CommandBuffer& commandBuffer,
vk::AccessFlagBits srcAccessMask,
vk::AccessFlagBits dstAccessMask,
vk::PipelineStageFlagBits srcStageMask,
vk::PipelineStageFlagBits dstStageMask);

/**
* Constructs a vulkan descriptor buffer info which can be used to specify
Expand All @@ -955,47 +959,74 @@ class Tensor
*
* @return Unsigned integer representing the total number of elements
*/
// TODO: move to cpp
uint32_t size() { return this->mSize; }
uint32_t size();

// TODO: move to cpp
uint32_t dataTypeMemorySize() { return this->mDataTypeMemorySize; }
/**
* Returns the total size of a single element of the respective data type
* that this tensor holds.
*
* @return Unsigned integer representing the memory of a single element of
* the respective data type.
*/
uint32_t dataTypeMemorySize();

// TODO: move to cpp
uint32_t memorySize() { return this->mSize * this->mDataTypeMemorySize; }
/**
* Returns the total memory size of the data contained by the Tensor object
* which would equate to (this->size() * this->dataTypeMemorySize())
*
* @return Unsigned integer representing the memory of a single element of
* the respective data type.
*/
uint32_t memorySize();

/**
* Retrieve the underlying data type of the Tensor
* Retrieve the data type of the tensor (host, device, storage)
*
* @return Data type of tensor of type kp::Tensor::TensorDataTypes
*/
TensorDataTypes dataType() { return this->mDataType; }
TensorDataTypes dataType();

void* rawData() { return this->mRawData; }
/**
* Retrieve the raw data via the pointer to the memory that contains the raw
* memory of this current tensor. This tensor gets changed to a nullptr when
* the Tensor is removed.
*
* @return Pointer to raw memory containing raw bytes data of Tensor.
*/
void* rawData();

// TODO: move to cpp
/**
* Sets / resets the data of the tensor which is directly done on the GPU
* host visible memory available by the tensor.
*/
void setRawData(const void* data);

/**
* Template to return the pointer data converted by specific type, which
* would be any of the supported types including float, double, int32,
* uint32 and bool.
*
* @return Pointer to raw memory containing raw bytes data of Tensor.
*/
template<typename T>
T* data()
{
return (T*)this->mRawData;
}

/**
* Template to get the data of the current tensor as a vector of specific
* type, which would be any of the supported types including float, double,
* int32, uint32 and bool.
*
* @return Vector of type provided by template.
*/
template<typename T>
std::vector<T> vector()
{
return { (T*)this->mRawData, ((T*)this->mRawData) + this->size() };
}

/**
* Sets / resets the vector data of the tensor. This function does not
* perform any copies into GPU memory and is only performed on the host.
*/
void setRawData(const void* data)
{
// Copy data
memcpy(this->mRawData, data, this->memorySize());
}

protected:
// -------------- ALWAYS OWNED RESOURCES
TensorTypes mTensorType;
Expand All @@ -1005,57 +1036,6 @@ class Tensor
void* mRawData;

private:
void mapRawData()
{

KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");

std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;

if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}

vk::DeviceSize bufferSize = this->memorySize();

// Given we request coherent host memory we don't need to invalidate /
// flush
this->mRawData = this->mDevice->mapMemory(
*hostVisibleMemory, 0, bufferSize, vk::MemoryMapFlags());

vk::MappedMemoryRange mappedMemoryRange(
*hostVisibleMemory, 0, bufferSize);
}

void unmapRawData()
{

KP_LOG_DEBUG("Kompute Tensor mapping data from host buffer");

std::shared_ptr<vk::DeviceMemory> hostVisibleMemory = nullptr;

if (this->mTensorType == TensorTypes::eHost) {
hostVisibleMemory = this->mPrimaryMemory;
} else if (this->mTensorType == TensorTypes::eDevice) {
hostVisibleMemory = this->mStagingMemory;
} else {
KP_LOG_WARN(
"Kompute Tensor mapping data not supported on storage tensor");
return;
}

vk::DeviceSize bufferSize = this->memorySize();
vk::MappedMemoryRange mappedRange(*hostVisibleMemory, 0, bufferSize);
this->mDevice->flushMappedMemoryRanges(1, &mappedRange);
this->mDevice->unmapMemory(*hostVisibleMemory);
}

// -------------- NEVER OWNED RESOURCES
std::shared_ptr<vk::PhysicalDevice> mPhysicalDevice;
std::shared_ptr<vk::Device> mDevice;
Expand Down Expand Up @@ -1093,9 +1073,11 @@ class Tensor
vk::MemoryPropertyFlags getPrimaryMemoryPropertyFlags();
vk::BufferUsageFlags getStagingBufferUsageFlags();
vk::MemoryPropertyFlags getStagingMemoryPropertyFlags();

void mapRawData();
void unmapRawData();
};

// TODO: Limit T to be only float, bool, double, etc
template<typename T>
class TensorT : public Tensor
{
Expand Down
47 changes: 22 additions & 25 deletions src/OpMemoryBarrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,20 @@
namespace kp {

OpMemoryBarrier::OpMemoryBarrier(
const std::vector<std::shared_ptr<Tensor>>& tensors,
const vk::AccessFlagBits& srcAccessMask,
const vk::AccessFlagBits& dstAccessMask,
const vk::PipelineStageFlagBits& srcStageMask,
const vk::PipelineStageFlagBits& dstStageMask,
bool barrierOnPrimary)
: mTensors(tensors),
mSrcAccessMask(srcAccessMask),
mDstAccessMask(dstAccessMask),
mSrcStageMask(srcStageMask),
mDstStageMask(dstStageMask),
mBarrierOnPrimary(barrierOnPrimary)
const std::vector<std::shared_ptr<Tensor>>& tensors,
const vk::AccessFlagBits& srcAccessMask,
const vk::AccessFlagBits& dstAccessMask,
const vk::PipelineStageFlagBits& srcStageMask,
const vk::PipelineStageFlagBits& dstStageMask,
bool barrierOnPrimary)
: mTensors(tensors)
, mSrcAccessMask(srcAccessMask)
, mDstAccessMask(dstAccessMask)
, mSrcStageMask(srcStageMask)
, mDstStageMask(dstStageMask)
, mBarrierOnPrimary(barrierOnPrimary)
{
KP_LOG_DEBUG("Kompute OpMemoryBarrier constructor");

}

OpMemoryBarrier::~OpMemoryBarrier()
Expand All @@ -35,21 +34,19 @@ OpMemoryBarrier::record(const vk::CommandBuffer& commandBuffer)
// Barrier to ensure the data is finished writing to buffer memory
if (this->mBarrierOnPrimary) {
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
tensor->recordPrimaryBufferMemoryBarrier(
commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
tensor->recordPrimaryBufferMemoryBarrier(commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
}
} else {
for (const std::shared_ptr<Tensor>& tensor : this->mTensors) {
tensor->recordStagingBufferMemoryBarrier(
commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
tensor->recordStagingBufferMemoryBarrier(commandBuffer,
this->mSrcAccessMask,
this->mDstAccessMask,
this->mSrcStageMask,
this->mDstStageMask);
}
}
}
Expand Down
7 changes: 1 addition & 6 deletions src/OpTensorCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ OpTensorCopy::record(const vk::CommandBuffer& commandBuffer)

// We iterate from the second tensor onwards and record a copy to all
for (size_t i = 1; i < this->mTensors.size(); i++) {
this->mTensors[i]->recordCopyFrom(
commandBuffer, this->mTensors[0]);
this->mTensors[i]->recordCopyFrom(commandBuffer, this->mTensors[0]);
}
}

Expand All @@ -60,10 +59,6 @@ OpTensorCopy::postEval(const vk::CommandBuffer& commandBuffer)
{
KP_LOG_DEBUG("Kompute OpTensorCopy postEval called");

// TODO: Simplify with a copyRawData
uint32_t size = this->mTensors[0]->size();
uint32_t dataTypeMemSize = this->mTensors[0]->dataTypeMemorySize();
uint32_t memSize = size * dataTypeMemSize;
void* data = this->mTensors[0]->rawData();

// Copy the data from the first tensor into all the tensors
Expand Down
22 changes: 12 additions & 10 deletions src/OpTensorSyncLocal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,21 @@ OpTensorSyncLocal::record(const vk::CommandBuffer& commandBuffer)
for (size_t i = 0; i < this->mTensors.size(); i++) {
if (this->mTensors[i]->tensorType() == Tensor::TensorTypes::eDevice) {

this->mTensors[i]->recordPrimaryBufferMemoryBarrier(commandBuffer,
vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eTransferRead,
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer);
this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eShaderWrite,
vk::AccessFlagBits::eTransferRead,
vk::PipelineStageFlagBits::eComputeShader,
vk::PipelineStageFlagBits::eTransfer);

this->mTensors[i]->recordCopyFromDeviceToStaging(commandBuffer);

this->mTensors[i]->recordPrimaryBufferMemoryBarrier(commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eHostRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost);
this->mTensors[i]->recordPrimaryBufferMemoryBarrier(
commandBuffer,
vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eHostRead,
vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost);
}
}
}
Expand Down
Loading

0 comments on commit 42ba714

Please sign in to comment.