Skip to content

Commit

Permalink
Add View::blobs() accessor
Browse files Browse the repository at this point in the history
Then rename the `storageBlobs` data member to `m_blobs` and make it private.
  • Loading branch information
bernhardmgruber committed Feb 27, 2023
1 parent dfd7fac commit 00734f1
Show file tree
Hide file tree
Showing 19 changed files with 94 additions and 91 deletions.
4 changes: 2 additions & 2 deletions docs/pages/mappings.rst
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ A script for gnuplot visualizing the heatmap can be extracted.
auto anyMapping = ...;
llama::mapping::Heatmap mapping{anyMapping};
...
mapping.writeGnuplotDataFileBinary(view.storageBlobs, std::ofstream{"heatmap.data", std::ios::binary});
mapping.writeGnuplotDataFileBinary(view.blobs(), std::ofstream{"heatmap.data", std::ios::binary});
std::ofstream{"plot.sh"} << mapping.gnuplotScriptBinary;


Expand All @@ -215,7 +215,7 @@ The mapping adds an additional blob to the blobs of the inner mapping used as st
auto anyMapping = ...;
llama::mapping::FieldAccessCount mapping{anyMapping};
...
mapping.printFieldHits(view.storageBlobs); // print report with read and writes to each field
mapping.printFieldHits(view.blobs()); // print report with read and writes to each field

The FieldAccessCount mapping uses proxy references to instrument reads and writes.
If this is problematic, it can also be configured to return raw C++ references.
Expand Down
10 changes: 2 additions & 8 deletions examples/alpaka/asyncblur/asyncblur.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,7 @@ try
hostChunkView[chunkNr](y, x) = subViewHost(y, x);
}
for(std::size_t i = 0; i < devMapping.blobCount; i++)
alpaka::memcpy(
queue[chunkNr],
devOldView[chunkNr].storageBlobs[i],
hostChunkView[chunkNr].storageBlobs[i]);
alpaka::memcpy(queue[chunkNr], devOldView[chunkNr].blobs()[i], hostChunkView[chunkNr].blobs()[i]);

alpaka::exec<Acc>(
queue[chunkNr],
Expand All @@ -343,10 +340,7 @@ try
llama::shallowCopy(devNewView[chunkNr]));

for(std::size_t i = 0; i < devMapping.blobCount; i++)
alpaka::memcpy(
queue[chunkNr],
hostChunkView[chunkNr].storageBlobs[i],
devNewView[chunkNr].storageBlobs[i]);
alpaka::memcpy(queue[chunkNr], hostChunkView[chunkNr].blobs()[i], devNewView[chunkNr].blobs()[i]);
}

// Wait for not finished tasks on accelerator
Expand Down
12 changes: 6 additions & 6 deletions examples/alpaka/daxpy/daxpy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,10 @@ void daxpyAlpakaLlama(std::string mappingName, std::ofstream& plotFile, Mapping

for(std::size_t i = 0; i < Mapping::blobCount; i++)
{
auto vx = alpaka::createView(devHost, &x.storageBlobs[0][0], mapping.blobSize(i));
auto vy = alpaka::createView(devHost, &y.storageBlobs[0][0], mapping.blobSize(i));
alpaka::memcpy(queue, viewX.storageBlobs[i], vx);
alpaka::memcpy(queue, viewY.storageBlobs[i], vy);
auto vx = alpaka::createView(devHost, &x.blobs()[0][0], mapping.blobSize(i));
auto vy = alpaka::createView(devHost, &y.blobs()[0][0], mapping.blobSize(i));
alpaka::memcpy(queue, viewX.blobs()[i], vx);
alpaka::memcpy(queue, viewY.blobs()[i], vy);
}
watch.printAndReset("copy H->D");

Expand Down Expand Up @@ -145,8 +145,8 @@ void daxpyAlpakaLlama(std::string mappingName, std::ofstream& plotFile, Mapping

for(std::size_t i = 0; i < Mapping::blobCount; i++)
{
auto vz = alpaka::createView(devHost, &z.storageBlobs[0][0], mapping.blobSize(i));
alpaka::memcpy(queue, vz, viewZ.storageBlobs[i]);
auto vz = alpaka::createView(devHost, &z.blobs()[0][0], mapping.blobSize(i));
alpaka::memcpy(queue, vz, viewZ.blobs()[i]);
}
watch.printAndReset("copy D->H");

Expand Down
4 changes: 2 additions & 2 deletions examples/alpaka/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ void run(std::ostream& plotFile)
watch.printAndReset("init");

for(std::size_t i = 0; i < mapping.blobCount; i++)
alpaka::memcpy(queue, accView.storageBlobs[i], hostView.storageBlobs[i]);
alpaka::memcpy(queue, accView.blobs()[i], hostView.blobs()[i]);
watch.printAndReset("copy H->D");

const auto workdiv = alpaka::WorkDivMembers<Dim, Size>{
Expand All @@ -307,7 +307,7 @@ void run(std::ostream& plotFile)
plotFile << std::quoted(title) << "\t" << sumUpdate / steps << '\t' << sumMove / steps << '\n';

for(std::size_t i = 0; i < mapping.blobCount; i++)
alpaka::memcpy(queue, hostView.storageBlobs[i], accView.storageBlobs[i]);
alpaka::memcpy(queue, hostView.blobs()[i], accView.blobs()[i]);
watch.printAndReset("copy D->H");

const auto [x, y, z] = hostView(referenceParticleIndex)(tag::Pos{});
Expand Down
6 changes: 3 additions & 3 deletions examples/alpaka/pic/pic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ auto setup(Queue& queue, const Dev& dev, const DevHost& /*devHost*/)
}
// std::shuffle(particlesHost.begin(), particlesHost.end(), engine);
for(auto i = 0; i < decltype(particleMapping)::blobCount; i++)
alpaka::memcpy(queue, particles.storageBlobs[i], particlesHost.storageBlobs[i]);
alpaka::memcpy(queue, particles.blobs()[i], particlesHost.blobs()[i]);

return std::tuple{e, b, j, particles};
}
Expand Down Expand Up @@ -837,7 +837,7 @@ void run(std::ostream& plotFile)
auto copyBlobs = [&](auto& fieldView)
{
for(auto i = 0; i < fieldMapping.blobCount; i++)
alpaka::memcpy(queue, hostFieldView.storageBlobs[i], fieldView.storageBlobs[i]);
alpaka::memcpy(queue, hostFieldView.blobs()[i], fieldView.blobs()[i]);
};
copyBlobs(E);
output(n, "E", hostFieldView);
Expand All @@ -850,7 +850,7 @@ void run(std::ostream& plotFile)
const auto particlesMapping = particles.mapping();
auto hostParticleView = llama::allocViewUninitialized(particlesMapping);
for(auto i = 0; i < particlesMapping.blobCount; i++)
alpaka::memcpy(queue, hostParticleView.storageBlobs[i], particles.storageBlobs[i]);
alpaka::memcpy(queue, hostParticleView.blobs()[i], particles.blobs()[i]);
output(n, hostParticleView);
}
}
Expand Down
8 changes: 4 additions & 4 deletions examples/alpaka/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,8 @@ try
const auto blobCount = decltype(mapping)::blobCount;
for(std::size_t i = 0; i < blobCount; i++)
{
alpaka::memcpy(queue, devA.storageBlobs[i], hostA.storageBlobs[i]);
alpaka::memcpy(queue, devB.storageBlobs[i], hostB.storageBlobs[i]);
alpaka::memcpy(queue, devA.blobs()[i], hostA.blobs()[i]);
alpaka::memcpy(queue, devB.blobs()[i], hostB.blobs()[i]);
}
chrono.printAndReset("Copy H->D");

Expand All @@ -175,8 +175,8 @@ try

for(std::size_t i = 0; i < blobCount; i++)
{
alpaka::memcpy(queue, hostA.storageBlobs[i], devA.storageBlobs[i]);
alpaka::memcpy(queue, hostB.storageBlobs[i], devB.storageBlobs[i]);
alpaka::memcpy(queue, hostA.blobs()[i], devA.blobs()[i]);
alpaka::memcpy(queue, hostB.blobs()[i], devB.blobs()[i]);
}
chrono.printAndReset("Copy D->H");
}
Expand Down
2 changes: 1 addition & 1 deletion examples/bufferguard/bufferguard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ void run(const std::string& mappingName)
const auto src = srcBlobs[i];
const auto dst = dstBlobs[i];
assert(mapping.blobSize(src) == mapping.blobSize(dst));
std::memcpy(&dstView.storageBlobs[dst][0], &srcView.storageBlobs[src][0], mapping.blobSize(src));
std::memcpy(&dstView.blobs()[dst][0], &srcView.blobs()[src][0], mapping.blobSize(src));
}
};

Expand Down
20 changes: 10 additions & 10 deletions examples/cuda/nbody/nbody.cu
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ try
hostView(i) = p;
}
if constexpr(countFieldAccesses)
hostView.mapping().fieldHits(hostView.storageBlobs) = {};
hostView.mapping().fieldHits(hostView.blobs()) = {};

watch.printAndReset("init");

Expand All @@ -267,18 +267,18 @@ try
};

start();
const auto blobs = hostView.storageBlobs.size() / (heatmap ? 2 : 1); // exclude heatmap blobs
const auto blobs = hostView.blobs().size() / (heatmap ? 2 : 1); // exclude heatmap blobs
for(std::size_t i = 0; i < blobs; i++)
checkError(cudaMemcpy(
&accView.storageBlobs[i][0],
&hostView.storageBlobs[i][0],
&accView.blobs()[i][0],
&hostView.blobs()[i][0],
hostView.mapping().blobSize(i),
cudaMemcpyHostToDevice));
if constexpr(heatmap)
{
auto& hmap = accView.mapping();
for(std::size_t i = 0; i < blobs; i++)
cudaMemsetAsync(hmap.blockHitsPtr(i, accView.storageBlobs), 0, hmap.blockHitsSize(i) * sizeof(CountType));
cudaMemsetAsync(hmap.blockHitsPtr(i, accView.blobs()), 0, hmap.blockHitsSize(i) * sizeof(CountType));
}
std::cout << "copy H->D " << stop() << " s\n";

Expand Down Expand Up @@ -309,17 +309,17 @@ try
plotFile << std::quoted(title) << "\t" << sumUpdate / steps << '\t' << sumMove / steps << '\n';

start();
for(std::size_t i = 0; i < hostView.storageBlobs.size(); i++)
for(std::size_t i = 0; i < hostView.blobs().size(); i++)
checkError(cudaMemcpy(
&hostView.storageBlobs[i][0],
&accView.storageBlobs[i][0],
&hostView.blobs()[i][0],
&accView.blobs()[i][0],
hostView.mapping().blobSize(i),
cudaMemcpyDeviceToHost));
std::cout << "copy D->H " << stop() << " s\n";

if constexpr(countFieldAccesses)
{
hostView.mapping().printFieldHits(hostView.storageBlobs);
hostView.mapping().printFieldHits(hostView.blobs());
}
else if constexpr(heatmap)
{
Expand All @@ -329,7 +329,7 @@ try
c = '_';
std::ofstream{"plot_heatmap.sh"} << hostView.mapping().gnuplotScript;
hostView.mapping().writeGnuplotDataFile(
hostView.storageBlobs,
hostView.blobs(),
std::ofstream{"cuda_nbody_heatmap_" + titleCopy + ".dat"});
}

Expand Down
4 changes: 2 additions & 2 deletions examples/nbody/nbody.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ namespace usellama
p(tag::Mass{}) = dist(engine) / FP{100};
}
if constexpr(countFieldAccesses)
particles.mapping().fieldHits(particles.storageBlobs) = {};
particles.mapping().fieldHits(particles.blobs()) = {};
watch.printAndReset("init");

double sumUpdate = 0;
Expand Down Expand Up @@ -393,7 +393,7 @@ namespace usellama
if constexpr(heatmap)
std::ofstream("nbody_heatmap_" + mappingName(Mapping) + ".sh") << particles.mapping().toGnuplotScript();
if constexpr(countFieldAccesses)
particles.mapping().printFieldHits(particles.storageBlobs);
particles.mapping().printFieldHits(particles.blobs());

return printReferenceParticle(particles(referenceParticleIndex)(tag::Pos{}).load());
}
Expand Down
12 changes: 6 additions & 6 deletions examples/root/lhcb_analysis/lhcb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ namespace
{
std::filesystem::create_directories(heatmapFile.parent_path());
const auto& m = v.mapping();
m.writeGnuplotDataFileBinary(v.storageBlobs, std::ofstream{heatmapFile});
m.writeGnuplotDataFileBinary(v.blobs(), std::ofstream{heatmapFile});
std::ofstream{heatmapFile.parent_path() / "plot.sh"} << View::Mapping::gnuplotScriptBinary;
}

Expand All @@ -428,13 +428,13 @@ namespace
{
const auto bc = View::Mapping::blobCount;
for(int i = bc / 2; i < bc; i++)
std::memset(&v.storageBlobs[i][0], 0, v.mapping().blobSize(i));
std::memset(&v.blobs()[i][0], 0, v.mapping().blobSize(i));
}

template<typename View>
void clearFieldAccessCounts(View& v)
{
v.mapping().fieldHits(v.storageBlobs) = {};
v.mapping().fieldHits(v.blobs()) = {};
}

template<typename View>
Expand Down Expand Up @@ -470,7 +470,7 @@ namespace
const auto& m = v.mapping();
for(std::size_t i = 0; i < View::Mapping::blobCount / 2; i++)
{
auto* bh = m.blockHitsPtr(i, v.storageBlobs);
auto* bh = m.blockHitsPtr(i, v.blobs());
auto size = m.blockHitsSize(i);
total += std::count_if(bh, bh + size, [](auto c) { return c > 0; });
}
Expand All @@ -485,7 +485,7 @@ namespace
auto [view, conversionTime] = convertRNTupleToLLAMA<Mapping>(inputFile);
if constexpr(llama::mapping::isFieldAccessCount<Mapping>)
{
view.mapping().printFieldHits(view.storageBlobs);
view.mapping().printFieldHits(view.blobs());
clearFieldAccessCounts(view);
}
if constexpr(llama::mapping::isHeatmap<Mapping>)
Expand All @@ -511,7 +511,7 @@ namespace
totalAnalysisTime += analysisTime;
}
if constexpr(llama::mapping::isFieldAccessCount<Mapping>)
view.mapping().printFieldHits(view.storageBlobs);
view.mapping().printFieldHits(view.blobs());
if constexpr(llama::mapping::isHeatmap<Mapping>)
saveHeatmap(view, heatmapFolder + "/" + mappingName + "_analysis.bin");
save(hist, mappingName);
Expand Down
2 changes: 1 addition & 1 deletion examples/vectoradd/vectoradd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ namespace usellama
}
plotFile << "\"" << mappingname << "\"\t" << acc / steps << '\n';

return static_cast<int>(c.storageBlobs[0][0]);
return static_cast<int>(c.blobs()[0][0]);
}
} // namespace usellama

Expand Down
20 changes: 8 additions & 12 deletions include/llama/Copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ namespace llama
// TODO(bgruber): this is maybe not the best parallel copying strategy
for(std::size_t i = 0; i < Mapping::blobCount; i++)
internal::parallelMemcpy(
&dstView.storageBlobs[i][0],
&srcView.storageBlobs[i][0],
&dstView.blobs()[i][0],
&srcView.blobs()[i][0],
dstView.mapping().blobSize(i),
threadId,
threadCount);
Expand Down Expand Up @@ -169,12 +169,8 @@ namespace llama
static constexpr auto dstIsAoSoA = lanesDst != std::numeric_limits<std::size_t>::max();

static_assert(srcIsAoSoA || dstIsAoSoA, "At least one of the mappings must be an AoSoA mapping");
static_assert(
!srcIsAoSoA || std::tuple_size_v<decltype(srcView.storageBlobs)> == 1,
"Implementation assumes AoSoA with single blob");
static_assert(
!dstIsAoSoA || std::tuple_size_v<decltype(dstView.storageBlobs)> == 1,
"Implementation assumes AoSoA with single blob");
static_assert(!srcIsAoSoA || SrcMapping::blobCount == 1, "Implementation assumes AoSoA with single blob");
static_assert(!dstIsAoSoA || DstMapping::blobCount == 1, "Implementation assumes AoSoA with single blob");

const auto flatSize = product(dstView.extents());

Expand Down Expand Up @@ -207,21 +203,21 @@ namespace llama
auto mapSrc = [&](std::size_t flatArrayIndex, auto rc) LLAMA_LAMBDA_INLINE
{
if constexpr(srcIsAoSoA)
return &srcView.storageBlobs[0][0] + mapAoSoA(flatArrayIndex, rc, lanesSrc);
return &srcView.blobs()[0][0] + mapAoSoA(flatArrayIndex, rc, lanesSrc);
else
{
const auto [blob, off] = mapSoA(flatArrayIndex, rc, isSrcMB);
return &srcView.storageBlobs[blob][off];
return &srcView.blobs()[blob][off];
}
};
auto mapDst = [&](std::size_t flatArrayIndex, auto rc) LLAMA_LAMBDA_INLINE
{
if constexpr(dstIsAoSoA)
return &dstView.storageBlobs[0][0] + mapAoSoA(flatArrayIndex, rc, lanesDst);
return &dstView.blobs()[0][0] + mapAoSoA(flatArrayIndex, rc, lanesDst);
else
{
const auto [blob, off] = mapSoA(flatArrayIndex, rc, isDstMB);
return &dstView.storageBlobs[blob][off];
return &dstView.blobs()[blob][off];
}
};

Expand Down
4 changes: 2 additions & 2 deletions include/llama/DumpMapping.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ namespace llama
{
const auto& mapping = view.mapping();
for(std::size_t i = 0; i < View::Mapping::blobCount; i++)
std::memset(&view.storageBlobs[i][0], pattern, mapping.blobSize(i));
std::memset(&view.blobs()[i][0], pattern, mapping.blobSize(i));
}

template<typename View, typename RecordCoord>
Expand All @@ -127,7 +127,7 @@ namespace llama

using Type = GetType<RecordDim, decltype(rc)>;
// computed values can come from anywhere, so we can only apply heuristics
auto& blobs = view.storageBlobs;
auto& blobs = view.blobs();
auto&& ref = view.mapping().compute(ai, rc, blobs);

// try to find the mapped address in one of the blobs
Expand Down
Loading

0 comments on commit 00734f1

Please sign in to comment.