Skip to content

Commit

Permalink
Merge pull request #3000 from anagainaru/bp5-cuda
Browse files Browse the repository at this point in the history
Base modifications to make BP5 GPU aware
  • Loading branch information
eisenhauer authored Jan 12, 2022
2 parents 44c3279 + b0326af commit 53c551c
Show file tree
Hide file tree
Showing 10 changed files with 75 additions and 32 deletions.
30 changes: 13 additions & 17 deletions examples/cuda/cudaWriteRead.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ int BPWrite(const std::string fname, const size_t N, int nSteps){
// Set up the ADIOS structures
adios2::ADIOS adios;
adios2::IO io = adios.DeclareIO("WriteIO");
io.SetEngine("BP5");

// Declare an array for the ADIOS data of size (NumOfProcesses * N)
const adios2::Dims shape{static_cast<size_t>(N)};
Expand Down Expand Up @@ -61,28 +62,23 @@ int BPRead(const std::string fname, const size_t N, int nSteps){
// Create ADIOS structures
adios2::ADIOS adios;
adios2::IO io = adios.DeclareIO("ReadIO");
io.SetEngine("BP5");

adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);

auto data = io.InquireVariable<float>("data");
std::cout << "Steps expected by the reader: " << bpReader.Steps() << std::endl;
std::cout << "Expecting data per step: " << data.Shape()[0];
std::cout << " elements" << std::endl;

int write_step = bpReader.Steps();
// Create the local buffer and initialize the access point in the ADIOS file
std::vector<float> simData(N); //set size to N
const adios2::Dims start{0};
const adios2::Dims count{N};
const adios2::Box<adios2::Dims> sel(start, count);
data.SetSelection(sel);

// Read the data in each of the ADIOS steps
for (size_t step = 0; step < write_step; step++)
unsigned int step = 0;
for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++step)
{
data.SetStepSelection({step, 1});
auto data = io.InquireVariable<float>("data");
// Create the local buffer and initialize the access point in the ADIOS file
std::vector<float> simData(N); //set size to N
const adios2::Dims start{0};
const adios2::Dims count{N};
const adios2::Box<adios2::Dims> sel(start, count);
data.SetSelection(sel);

bpReader.Get(data, simData.data());
bpReader.PerformGets();
bpReader.EndStep();
std::cout << "Simualation step " << step << " : ";
std::cout << simData.size() << " elements: " << simData[1] << std::endl;
}
Expand Down
4 changes: 4 additions & 0 deletions source/adios2/engine/bp5/BP5Writer.tcc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ void BP5Writer::PutCommon(Variable<T> &variable, const T *values, bool sync)
BeginStep(StepMode::Update);
}
variable.SetData(values);
// if the user buffer is allocated on the GPU always use sync mode
bool isCudaBuffer = (variable.m_MemorySpace == MemorySpace::CUDA);
if (isCudaBuffer)
sync = true;

size_t *Shape = NULL;
size_t *Start = NULL;
Expand Down
3 changes: 3 additions & 0 deletions source/adios2/helper/adiosMemory.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ void InsertToBuffer(std::vector<char> &buffer, const T *source,
template <class T>
void CopyFromGPUToBuffer(std::vector<char> &buffer, size_t &position,
const T *source, const size_t elements = 1) noexcept;
template <class T>
void CudaMemCopyToBuffer(char *buffer, size_t position, const T *source,
const size_t size) noexcept;

/**
* Wrapper around cudaMemcpy needed for isolating CUDA interface dependency
Expand Down
11 changes: 9 additions & 2 deletions source/adios2/helper/adiosMemory.inl
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,17 @@ template <class T>
void CopyFromGPUToBuffer(std::vector<char> &buffer, size_t &position,
const T *source, const size_t elements) noexcept
{
const char *src = reinterpret_cast<const char *>(source);
MemcpyGPUToBuffer(buffer.data() + position, src, elements * sizeof(T));
CudaMemCopyToBuffer(buffer.data(), position, source, elements * sizeof(T));
position += elements * sizeof(T);
}

template <class T>
void CudaMemCopyToBuffer(char *buffer, size_t position,
const T *source, const size_t size) noexcept
{
const char *src = reinterpret_cast<const char *>(source);
MemcpyGPUToBuffer(buffer + position, src, size);
}
#endif

template <class T>
Expand Down
26 changes: 20 additions & 6 deletions source/adios2/toolkit/format/bp5/BP5Serializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -570,15 +570,27 @@ void BP5Serializer::DumpDeferredBlocks(bool forceCopyDeferred)
}

static void GetMinMax(const void *Data, size_t ElemCount, const DataType Type,
core::Engine::MinMaxStruct &MinMax)
core::Engine::MinMaxStruct &MinMax, MemorySpace MemSpace)
{

MinMax.Init(Type);
if (ElemCount == 0)
return;
if (Type == DataType::Compound)
{
}
#ifdef ADIOS2_HAVE_CUDA
#define pertype(T, N) \
else if (MemSpace == MemorySpace::CUDA && \
Type == helper::GetDataType<T>()) \
{ \
const size_t size = ElemCount * sizeof(T); \
const T *values = (const T *)Data; \
helper::CUDAMinMax(values, ElemCount, MinMax.MinUnion.field_##N, \
MinMax.MaxUnion.field_##N); \
}
ADIOS2_FOREACH_MINMAX_STDTYPE_2ARGS(pertype)
#undef pertype
#endif
#define pertype(T, N) \
else if (Type == helper::GetDataType<T>()) \
{ \
Expand Down Expand Up @@ -669,7 +681,8 @@ void BP5Serializer::Marshal(void *Variable, const char *Name,
MinMax.Init(Type);
if ((m_StatsLevel > 0) && !Span)
{
GetMinMax(Data, ElemCount, (DataType)Rec->Type, MinMax);
GetMinMax(Data, ElemCount, (DataType)Rec->Type, MinMax,
VB->m_MemorySpace);
}

if (Rec->OperatorType)
Expand Down Expand Up @@ -698,9 +711,10 @@ void BP5Serializer::Marshal(void *Variable, const char *Name,
{
if (!DeferAddToVec)
{
DataOffset = m_PriorDataBufferSizeTotal +
CurDataBuffer->AddToVec(ElemCount * ElemSize, Data,
ElemSize, Sync);
DataOffset =
m_PriorDataBufferSizeTotal +
CurDataBuffer->AddToVec(ElemCount * ElemSize, Data,
ElemSize, Sync, VB->m_MemorySpace);
}
}
else
Expand Down
3 changes: 2 additions & 1 deletion source/adios2/toolkit/format/buffer/BufferV.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class BufferV
virtual void Reset();

virtual size_t AddToVec(const size_t size, const void *buf, size_t align,
bool CopyReqd) = 0;
bool CopyReqd,
MemorySpace MemSpace = MemorySpace::Host) = 0;

struct BufferPos
{
Expand Down
20 changes: 17 additions & 3 deletions source/adios2/toolkit/format/buffer/chunk/ChunkV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
*/

#include "ChunkV.h"
#include "adios2/helper/adiosFunctions.h"
#include "adios2/toolkit/format/buffer/BufferV.h"

#include <algorithm>
Expand Down Expand Up @@ -82,7 +83,7 @@ void ChunkV::CopyExternalToInternal()
}

size_t ChunkV::AddToVec(const size_t size, const void *buf, size_t align,
bool CopyReqd)
bool CopyReqd, MemorySpace MemSpace)
{
if (size == 0)
{
Expand Down Expand Up @@ -120,7 +121,7 @@ size_t ChunkV::AddToVec(const size_t size, const void *buf, size_t align,
if (AppendPossible)
{
// We can use current chunk, just append the data;
memcpy(m_TailChunk + m_TailChunkPos, buf, size);
CopyDataToBuffer(size, buf, m_TailChunkPos, MemSpace);
DataV.back().Size += size;
m_TailChunkPos += size;
}
Expand All @@ -132,7 +133,7 @@ size_t ChunkV::AddToVec(const size_t size, const void *buf, size_t align,
NewSize = size;
m_TailChunk = (char *)malloc(NewSize);
m_Chunks.push_back(m_TailChunk);
memcpy(m_TailChunk, buf, size);
CopyDataToBuffer(size, buf, 0, MemSpace);
m_TailChunkPos = size;
VecEntry entry = {false, m_TailChunk, 0, size};
DataV.push_back(entry);
Expand All @@ -142,6 +143,19 @@ size_t ChunkV::AddToVec(const size_t size, const void *buf, size_t align,
return retOffset;
}

void ChunkV::CopyDataToBuffer(const size_t size, const void *buf, size_t pos,
MemorySpace MemSpace)
{
#ifdef ADIOS2_HAVE_CUDA
if (MemSpace == MemorySpace::CUDA)
{
helper::CudaMemCopyToBuffer(m_TailChunk, pos, buf, size);
return;
}
#endif
memcpy(m_TailChunk + pos, buf, size);
}

BufferV::BufferPos ChunkV::Allocate(const size_t size, size_t align)
{
if (size == 0)
Expand Down
5 changes: 4 additions & 1 deletion source/adios2/toolkit/format/buffer/chunk/ChunkV.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,17 @@ class ChunkV : public BufferV
virtual std::vector<core::iovec> DataVec() noexcept;

virtual size_t AddToVec(const size_t size, const void *buf, size_t align,
bool CopyReqd);
bool CopyReqd,
MemorySpace MemSpace = MemorySpace::Host);

virtual BufferPos Allocate(const size_t size, size_t align);
virtual void DownsizeLastAlloc(const size_t oldSize, const size_t newSize);

virtual void *GetPtr(int bufferIdx, size_t posInBuffer);

void CopyExternalToInternal();
void CopyDataToBuffer(const size_t size, const void *buf, size_t pos,
MemorySpace MemSpace);

private:
std::vector<char *> m_Chunks;
Expand Down
2 changes: 1 addition & 1 deletion source/adios2/toolkit/format/buffer/malloc/MallocV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ void MallocV::CopyExternalToInternal()
}

size_t MallocV::AddToVec(const size_t size, const void *buf, size_t align,
bool CopyReqd)
bool CopyReqd, MemorySpace MemSpace)
{
if (size == 0)
{
Expand Down
3 changes: 2 additions & 1 deletion source/adios2/toolkit/format/buffer/malloc/MallocV.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ class MallocV : public BufferV
virtual void Reset();

virtual size_t AddToVec(const size_t size, const void *buf, size_t align,
bool CopyReqd);
bool CopyReqd,
MemorySpace MemSpace = MemorySpace::Host);

virtual BufferPos Allocate(const size_t size, size_t align);
void DownsizeLastAlloc(const size_t oldSize, const size_t newSize);
Expand Down

0 comments on commit 53c551c

Please sign in to comment.