From 278539227a5eaa9803104323a750555e4c48b7ab Mon Sep 17 00:00:00 2001 From: divija95 <60272396+divija95@users.noreply.github.com> Date: Sat, 13 Apr 2024 20:45:50 -0500 Subject: [PATCH] DistLocalGraph switch to LS_LC_CSR graph (#28) * changes to switch to LC_LS_CSR graph * fixing debug err * fixing pre-commit issues * changing api calls for wf4 * Added data.001.csv using lfs * fixing getEdgeData api * fix for getEdgeData() * ci fix * data.001.csv * changing test dataset * quickfix * fixing precommit * fixing graph deallocate() * fixing test * Update workflows to be realistic * CPU set * Try this again * Try this again * Slight refactor --------- Co-authored-by: AdityaAtulTewari --- .gitattributes | 2 + .github/workflows/build-and-test.yml | 22 ++- Makefile | 2 + inputs/wmd/data.001.csv | 3 + .../galois/graphs/DistributedLocalGraph.h | 69 ++++---- .../include/galois/graphs/GraphHelpers.h | 9 +- .../include/galois/graphs/LS_LC_CSR_Graph.h | 8 +- libwmd/include/galois/wmd/WMDGraph.h | 9 +- libwmd/include/galois/wmd/WMDPartitioner.h | 28 ++-- libwmd/include/galois/wmd/graph.h | 4 +- libwmd/test/wmd-graph-build.cpp | 150 +++++++++++++++--- 11 files changed, 221 insertions(+), 85 deletions(-) create mode 100644 .gitattributes create mode 100644 inputs/wmd/data.001.csv diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..71b4e67e5 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.csv filter=lfs diff=lfs merge=lfs -text +inputs/wmd/data.001.csv filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 945716c38..925406e22 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -46,7 +46,6 @@ jobs: CONTAINER_BUILD_DIR: "/pando-galois/build" CONTAINER_WORK_DIR: "/pando-galois" GALOIS_CONTAINER_ENV: "-e=GALOIS_BUILD_TOOL=Ninja" - GALOIS_CONTAINER_FLAGS: "--cpus=8" INTERACTIVE: "" defaults: run: @@ -77,6 +76,27 @@ jobs: if [ ${{ matrix.sanitizer-type }} == 'san' ]; then echo "GALOIS_CONTAINER_ENV=$GALOIS_CONTAINER_ENV -e=GALOIS_EXTRA_CXX_FLAGS='\"-fsanitize=address -fsanitize=undefined\"'" >> $GITHUB_ENV fi + if [ ${{ matrix.build-type }} == 'Debug' ]; then + echo "GALOIS_CONTAINER_ENV=$GALOIS_CONTAINER_ENV -e=GALOIS_EXTRA_CXX_FLAGS='-O3'" >> $GITHUB_ENV + fi + if [ ${{ runner.name }} == 'zerberus-0' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30'" >> $GITHUB_ENV + fi + if [ ${{ runner.name }} == 'zerberus-1' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31'" >> $GITHUB_ENV + fi + if [ ${{ runner.name }} == 'languedoc-0' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=0,1,2,3,4,5,6,7,32,33,34,35,36,37,38,39'" >> $GITHUB_ENV + fi + if [ ${{ runner.name }} == 'languedoc-1' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=16,17,18,19,20,21,22,23,48,49,50,51,52,53,54,55'" >> $GITHUB_ENV + fi + if [ ${{ runner.name }} == 'languedoc-2' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=8,9,10,11,12,13,14,15,40,41,42,43,44,45,46,47'" >> $GITHUB_ENV + fi + if [ ${{ runner.name }} == 'languedoc-3' ]; then + echo "CONTAINER_CPUSET='--cpuset-cpus=24,25,26,27,28,29,30,31,56,57,58,59,60,61,62,63'" >> $GITHUB_ENV + fi cat $GITHUB_ENV - name: Configure diff --git a/Makefile b/Makefile index a0544b1c2..011abf422 100644 --- a/Makefile +++ b/Makefile @@ -14,6 +14,7 @@ CONTAINER_BUILD_DIR ?= /pando-galois/build CONTAINER_WORKDIR ?= ${CONTAINER_SRC_DIR} CONTAINER_CONTEXT ?= default CONTAINER_OPTS ?= +CONTAINER_CPUSET ?= CONTAINER_CMD ?= bash -l INTERACTIVE ?= i @@ -91,6 +92,7 @@ docker: ${GALOIS_CONTAINER_MOUNTS} \ ${GALOIS_CONTAINER_ENV} \ ${GALOIS_CONTAINER_FLAGS} \ + ${CONTAINER_CPUSET} \ --privileged \ --workdir=${CONTAINER_WORKDIR} \ ${CONTAINER_OPTS} \ diff --git a/inputs/wmd/data.001.csv b/inputs/wmd/data.001.csv new file mode 100644 index 000000000..f5479d326 --- /dev/null +++ b/inputs/wmd/data.001.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff7df1aa0a2261d930471fc057251d1aa2cb404fa8c88c12c3b65fe2a5204bf8 +size 2879652 diff --git a/libcusp/include/galois/graphs/DistributedLocalGraph.h b/libcusp/include/galois/graphs/DistributedLocalGraph.h index 4b803c524..f826f88a3 100644 --- a/libcusp/include/galois/graphs/DistributedLocalGraph.h +++ b/libcusp/include/galois/graphs/DistributedLocalGraph.h @@ -31,7 +31,7 @@ #include #include "galois/graphs/DistributedGraph.h" -#include "galois/graphs/LS_LC_CSR_64_Graph.h" +#include "galois/graphs/LS_LC_CSR_Graph.h" #include "galois/graphs/BufferedGraph.h" #include "galois/runtime/DistStats.h" #include "galois/graphs/OfflineGraph.h" @@ -56,7 +56,7 @@ class DistLocalGraph { //! Graph name used for printing things constexpr static const char* const GRNAME = "dGraph"; - using GraphTy = galois::graphs::LS_LC_CSR_64_Graph; + using GraphTy = galois::graphs::LS_LC_CSR_Graph; // vector for determining range objects for master nodes + nodes // with edges (which includes masters) @@ -83,7 +83,7 @@ class DistLocalGraph { protected: //! The internal graph used by DistLocalGraph to represent the graph - GraphTy graph; + GraphTy* graph; //! Marks if the graph is transposed or not. bool transposed; @@ -476,15 +476,11 @@ class DistLocalGraph { public: //! Type representing a node in this graph - using GraphNode = typename GraphTy::GraphNode; - //! Expose EdgeTy to other classes + using GraphNode = typename GraphTy::VertexTopologyID; + //! Type representing an edge data in this graph using EdgeType = EdgeTy; - //! iterator type over nodes - using iterator = typename GraphTy::iterator; - //! constant iterator type over nodes - using const_iterator = typename GraphTy::const_iterator; //! iterator type over edges - using edge_iterator = typename GraphTy::edge_iterator; + using edge_iterator = typename GraphTy::EdgeIterator; /** * Constructor for DistLocalGraph. Initializes metadata fields. @@ -529,6 +525,7 @@ class DistLocalGraph { public: virtual ~DistLocalGraph() {} + void initGraph(uint64_t numNodes) { graph = new GraphTy(numNodes); } //! Determines which host has the master for a particular node //! @returns Host id of node in question inline unsigned getHostID(uint64_t gid) const { return getHostIDImpl(gid); } @@ -575,10 +572,8 @@ class DistLocalGraph { * @param mflag access flag for node data * @returns A node data object */ - inline typename GraphTy::node_data_reference - getData(GraphNode N, - galois::MethodFlag mflag = galois::MethodFlag::UNPROTECTED) { - auto& r = graph.getData(N, mflag); + inline NodeTy& getData(GraphNode N) { + auto& r = graph->getData(N); return r; } @@ -589,10 +584,14 @@ class DistLocalGraph { * @param mflag access flag for edge data * @returns The edge data for the requested edge */ - inline typename GraphTy::edge_data_reference - getEdgeData(edge_iterator ni, - galois::MethodFlag mflag = galois::MethodFlag::UNPROTECTED) { - auto& r = graph.getEdgeData(ni, mflag); + inline EdgeTy& getEdgeData(GraphNode src, edge_iterator ni) { + GraphNode dst = getEdgeDst(ni); + auto& r = graph->getEdgeData(std::make_pair(src, getGID(dst))); + return r; + } + + inline EdgeTy& getEdgeData(edge_iterator ni) { + auto& r = graph->getEdgeData(*ni); return r; } @@ -602,7 +601,9 @@ class DistLocalGraph { * @param ni edge id to get destination of * @returns Local ID of destination of edge ni */ - GraphNode getEdgeDst(edge_iterator ni) { return graph.getEdgeDst(ni); } + GraphNode getEdgeDst(edge_iterator ni) { + return getGID(graph->getEdgeDst(*ni)); + } /** * Gets the first edge of some node. @@ -611,7 +612,7 @@ class DistLocalGraph { * @returns iterator to first edge of N */ inline edge_iterator edge_begin(GraphNode N) { - return graph.edge_begin(N, galois::MethodFlag::UNPROTECTED); + return graph->edges(N).begin(); } /** @@ -621,14 +622,12 @@ class DistLocalGraph { * @returns iterator to the end of the edges of node N, i.e. the first edge * of the next node (or an "end" iterator if there is no next node) */ - inline edge_iterator edge_end(GraphNode N) { - return graph.edge_end(N, galois::MethodFlag::UNPROTECTED); - } + inline edge_iterator edge_end(GraphNode N) { return graph->edges(N).end(); } /** * Return the degree of the edge in the local graph **/ - inline uint64_t localDegree(GraphNode N) { return graph.getDegree(N); } + inline uint64_t localDegree(GraphNode N) { return graph->getDegree(N); } /** * Returns an iterable object over the edges of a particular node in the @@ -647,14 +646,14 @@ class DistLocalGraph { * * @returns number of nodes present in this (local) graph */ - inline size_t size() const { return graph.size(); } + inline size_t size() const { return graph->size(); } /** * Gets number of edges on this (local) graph. * * @returns number of edges present in this (local) graph */ - inline size_t sizeEdges() const { return graph.sizeEdges(); } + inline size_t sizeEdges() { return graph->sizeEdges(); } /** * Gets number of nodes on this (local) graph. @@ -746,7 +745,7 @@ class DistLocalGraph { */ inline void determineThreadRanges() { allNodesRanges = galois::graphs::determineUnitRangesFromPrefixSum( - galois::runtime::activeThreads, graph.getEdgePrefixSum()); + galois::runtime::activeThreads, graph->getEdgePrefixSum()); } /** @@ -770,9 +769,8 @@ class DistLocalGraph { } else { galois::gDebug("Manually det. master thread ranges"); masterRanges = galois::graphs::determineUnitRangesFromGraph( - graph, galois::runtime::activeThreads, beginMaster, - beginMaster + numOwned, 0, - (galois::graphs::is_LS_LC_CSR_64_Graph::value == 1)); + *graph, galois::runtime::activeThreads, beginMaster, + beginMaster + numOwned, 0, true); } } @@ -797,7 +795,7 @@ class DistLocalGraph { } else { galois::gDebug("Manually det. with edges thread ranges"); withEdgeRanges = galois::graphs::determineUnitRangesFromGraph( - graph, galois::runtime::activeThreads, 0, numNodesWithEdges, 0); + *graph, galois::runtime::activeThreads, 0, numNodesWithEdges, 0); } } @@ -869,7 +867,7 @@ class DistLocalGraph { */ void deallocate() { galois::gDebug("Deallocating CSR in DistLocalGraph"); - graph.deallocate(); + graph->deallocate(); } /** @@ -877,11 +875,10 @@ class DistLocalGraph { * It sorts edges of the nodes by destination. */ void sortEdgesByDestination() { - using GN = typename GraphTy::GraphNode; galois::do_all( - galois::iterate(graph), - [&](GN n) { graph.sortEdges(n, IdLess()); }, - galois::no_stats(), galois::loopname("CSREdgeSort"), galois::steal()); + galois::iterate(graph->vertices().begin(), graph->vertices().end()), + [&](GraphNode n) { graph->sortEdges(n); }, galois::no_stats(), + galois::loopname("CSREdgeSort"), galois::steal()); } //! Used by substrate to determine if some stats are to be reported diff --git a/libgalois/include/galois/graphs/GraphHelpers.h b/libgalois/include/galois/graphs/GraphHelpers.h index a5dda328c..f6bf923b6 100644 --- a/libgalois/include/galois/graphs/GraphHelpers.h +++ b/libgalois/include/galois/graphs/GraphHelpers.h @@ -262,10 +262,11 @@ void determineUnitRangesLoopGraph(GraphTy& graph, uint32_t unitsToSplit, // cannot use edge_end/begin on log strcutred CSR since its edges are not // consecutive. - uint64_t numEdgesInRange = (is_LS_LC_CSR) ? graph.sizeEdges() - : graph.edge_end(endNode - 1) - - graph.edge_begin(beginNode); - uint64_t edgeOffset = (is_LS_LC_CSR) ? 0 : *graph.edge_begin(beginNode); + uint64_t numEdgesInRange = (is_LS_LC_CSR) + ? graph.sizeEdges() + : std::distance(graph.edge_end(beginNode), + graph.edge_begin(endNode - 1)); + uint64_t edgeOffset = (is_LS_LC_CSR) ? 0 : graph[beginNode]; returnRanges[0] = beginNode; std::vector dummyScaleFactor; diff --git a/libgalois/include/galois/graphs/LS_LC_CSR_Graph.h b/libgalois/include/galois/graphs/LS_LC_CSR_Graph.h index 32dfd19e7..2ee7aea42 100644 --- a/libgalois/include/galois/graphs/LS_LC_CSR_Graph.h +++ b/libgalois/include/galois/graphs/LS_LC_CSR_Graph.h @@ -17,8 +17,8 @@ * Documentation, or loss or inaccuracy of data of any kind. */ -#ifndef GALOIS_GRAPHS_LC_CSR_GRAPH_H -#define GALOIS_GRAPHS_LC_CSR_GRAPH_H +#ifndef GALOIS_GRAPHS_LS_LC_CSR_GRAPH_H +#define GALOIS_GRAPHS_LS_LC_CSR_GRAPH_H #include #include @@ -205,6 +205,10 @@ class LS_LC_CSR_Graph : private boost::noncopyable { return m_edge_data[handle]; } + template > + inline E& getEdgeData(EdgeIterator const& it) { + return m_edge_data[*it]; + } /* * Count the total number of edges in parallel. */ diff --git a/libwmd/include/galois/wmd/WMDGraph.h b/libwmd/include/galois/wmd/WMDGraph.h index 8f1d9aef7..962296109 100644 --- a/libwmd/include/galois/wmd/WMDGraph.h +++ b/libwmd/include/galois/wmd/WMDGraph.h @@ -1142,7 +1142,8 @@ class WMDBufferedGraph : public BufferedGraph { &addedData #endif ](size_t j) { - dstGraph.getData(GIDtoLID[NodeData[j].id]) = NodeData[j]; + dstGraph->getData(GIDtoLID[NodeData[j].id]) = + NodeData[j]; }); NodeData.clear(); } @@ -1153,7 +1154,7 @@ class WMDBufferedGraph : public BufferedGraph { &addedData #endif ](size_t i) { - dstGraph.getData(GIDtoLID[nodesToSend[hostID][i].id]) = + dstGraph->getData(GIDtoLID[nodesToSend[hostID][i].id]) = nodesToSend[hostID][i]; #ifndef NDEBUG addedData++; @@ -1172,7 +1173,7 @@ class WMDBufferedGraph : public BufferedGraph { if (i != hostID) { for (uint64_t j = 0; j < proxiesOnHosts[i].size(); j++) { auto& r = - dstGraph.getData(globalToLocalMap[proxiesOnHosts[i][j]]); + dstGraph->getData(globalToLocalMap[proxiesOnHosts[i][j]]); nodesToSend[i].push_back(r); }; } @@ -1216,7 +1217,7 @@ class WMDBufferedGraph : public BufferedGraph { galois::iterate((size_t)0, IDofNodeRecv.size()), [this, &nodeRecv, &IDofNodeRecv, &dstGraph, &globalToLocalMap](size_t j) { - dstGraph.getData(globalToLocalMap[IDofNodeRecv[j]]) = nodeRecv[j]; + dstGraph->getData(globalToLocalMap[IDofNodeRecv[j]]) = nodeRecv[j]; }, galois::steal()); nodeRecv.clear(); diff --git a/libwmd/include/galois/wmd/WMDPartitioner.h b/libwmd/include/galois/wmd/WMDPartitioner.h index d5cdce65e..d3fe9cf6d 100644 --- a/libwmd/include/galois/wmd/WMDPartitioner.h +++ b/libwmd/include/galois/wmd/WMDPartitioner.h @@ -234,9 +234,7 @@ class WMDGraph : public DistLocalGraph { // Graph construction related calls base_DistGraph::beginMaster = 0; // Allocate and construct the graph - base_DistGraph::graph.allocateFrom(base_DistGraph::numNodes, - base_DistGraph::numEdges); - base_DistGraph::graph.constructNodes(); + base_DistGraph::initGraph(base_DistGraph::numNodes); // construct edges // not need to move edges from other host since all edges is already ready @@ -250,10 +248,10 @@ class WMDGraph : public DistLocalGraph { for (auto dst : edgeDst) { dstData.emplace_back(base_DistGraph::globalToLocalMap[dst]); } - auto edgeData = bufGraph.edgeDataPtr(globalID); - base_DistGraph::graph.addEdgesUnSort( - true, (globalID - bufGraph.globalNodeOffset[base_DistGraph::id]), - dstData.data(), edgeData, bufGraph.edgeNum(globalID), false); + std::vector edgeData(bufGraph.edgeNum(globalID)); + base_DistGraph::graph->addEdges( + (globalID - bufGraph.globalNodeOffset[base_DistGraph::id]), + dstData, edgeData); }, galois::steal()); @@ -269,10 +267,10 @@ class WMDGraph : public DistLocalGraph { "] LS_CSR graph local nodes: ", base_DistGraph::numNodes); galois::gInfo("[", base_DistGraph::id, "] LS_CSR graph master nodes: ", base_DistGraph::numOwned); - galois::gInfo("[", base_DistGraph::id, "] LS_CSR graph local edges: ", - base_DistGraph::graph.sizeEdges()); - assert(base_DistGraph::graph.sizeEdges() == base_DistGraph::numEdges); - assert(base_DistGraph::graph.size() == base_DistGraph::numNodes); + galois::gInfo("[", base_DistGraph::id, + "] LS_CSR graph local edges: ", base_DistGraph::sizeEdges()); + assert(base_DistGraph::sizeEdges() == base_DistGraph::numEdges); + assert(base_DistGraph::graph->size() == base_DistGraph::numNodes); bufGraph.resetAndFree(); @@ -471,14 +469,14 @@ class WMDGraph : public DistLocalGraph { } galois::gInfo("[", base_DistGraph::id, "] Start building projected graph."); - newGraph->graph.allocateFrom(newGraph->numNodes, newGraph->numEdges); + newGraph->initGraph(newGraph->numNodes); galois::do_all( galois::iterate(uint64_t(0), uint64_t(newGraph->numNodes)), [&](auto& node) { NodeLID oldGraphLID = base_DistGraph::getLID(newGraph->localToGlobalVector[node]); - newGraph->graph.getData(node) = projection.ProjectNode( + newGraph->graph->getData(node) = projection.ProjectNode( *this, base_DistGraph::getData(oldGraphLID), oldGraphLID); uint64_t numEdges = newTopology[node].size(); @@ -490,9 +488,7 @@ class WMDGraph : public DistLocalGraph { for (NodeGID gid : newTopology[node]) { localDsts.emplace_back(newGraph->getLID(gid)); } - newGraph->graph.addEdgesUnSort(true, node, localDsts.data(), - newEdgeData[node].data(), numEdges, - false); + newGraph->graph->addEdges(node, localDsts, newEdgeData[node]); newTopology[node].clear(); newEdgeData[node].clear(); diff --git a/libwmd/include/galois/wmd/graph.h b/libwmd/include/galois/wmd/graph.h index 86a28be42..03649e168 100644 --- a/libwmd/include/galois/wmd/graph.h +++ b/libwmd/include/galois/wmd/graph.h @@ -48,13 +48,13 @@ #include #include #include -#include +// #include #include #include #include "graphTypes.h" -#include "galois/graphs/LS_LC_CSR_64_Graph.h" +// #include "galois/graphs/LS_LC_CSR_Graph.h" #include "galois/shad/DataTypes.h" #define UINT shad::data_types::UINT diff --git a/libwmd/test/wmd-graph-build.cpp b/libwmd/test/wmd-graph-build.cpp index 88fca39ab..c0711bd24 100644 --- a/libwmd/test/wmd-graph-build.cpp +++ b/libwmd/test/wmd-graph-build.cpp @@ -10,6 +10,8 @@ */ #include "galois/wmd/graph.h" #include "galois/wmd/WMDPartitioner.h" +#include "galois/shad/DataTypes.h" +#include "galois/wmd/graphTypes.h" #include "galois/DistGalois.h" #include "galois/graphs/GenericPartitioners.h" @@ -17,12 +19,114 @@ #include #include #include +#include using namespace agile::workflow1; -typedef galois::graphs::WMDGraph - Graph; +typedef galois::graphs::WMDGraph Graph; + +void insertEdge( + Edge edge, + std::unordered_map>>& + vertices) { + if (vertices.find(edge.src) != vertices.end()) { + vertices[edge.src].second.push_back(edge); + } else { + assert(false); + } +} + +void parser(std::string line, + std::unordered_map>>& vertices) { + if (line.find("//") != std::string::npos || + line.find("#") != std::string::npos) { + return; + } else if (line.find("/*") != std::string::npos || + line.find("*/") != std::string::npos) { + return; + } else { + const char* ptr = line.c_str(); + std::istringstream ss(ptr); + std::string token; + std::vector tokens; + while (std::getline(ss, token, ',')) { + tokens.push_back(token); + } + if (tokens.size() == 9) + tokens.push_back(""); + if (tokens.size() == 0) + return; + assert(tokens.size() == 10); + bool isNode = tokens[0] == "Person" || tokens[0] == "ForumEvent" || + tokens[0] == "Forum" || tokens[0] == "Publication" || + tokens[0] == "Topic"; + if (isNode) { + uint64_t id = 0; + agile::workflow1::TYPES vertexType = agile::workflow1::TYPES::NONE; + if (tokens[0] == "Person") { + vertexType = agile::workflow1::TYPES::PERSON; + id = std::stoull(tokens[1]); + } else if (tokens[0] == "ForumEvent") { + vertexType = agile::workflow1::TYPES::FORUMEVENT; + id = std::stoull(tokens[4]); + } else if (tokens[0] == "Forum") { + vertexType = agile::workflow1::TYPES::FORUM; + id = std::stoull(tokens[3]); + } else if (tokens[0] == "Publication") { + vertexType = agile::workflow1::TYPES::PUBLICATION; + id = std::stoull(tokens[5]); + } else if (tokens[0] == "Topic") { + vertexType = agile::workflow1::TYPES::TOPIC; + id = std::stoull(tokens[6]); + } else { + assert(false); + } + vertices[id] = + std::pair>(vertexType, std::vector()); + } else { + Edge edge(tokens); + insertEdge(edge, vertices); + // Inverse edge + agile::workflow1::TYPES inverseEdgeType = agile::workflow1::TYPES::NONE; + if (tokens[0] == "Sale") { + inverseEdgeType = agile::workflow1::TYPES::PURCHASE; + } else if (tokens[0] == "Author") { + inverseEdgeType = agile::workflow1::TYPES::WRITTENBY; + } else if (tokens[0] == "Includes") { + inverseEdgeType = agile::workflow1::TYPES::INCLUDEDIN; + } else if (tokens[0] == "HasTopic") { + inverseEdgeType = agile::workflow1::TYPES::TOPICIN; + } else if (tokens[0] == "HasOrg") { + inverseEdgeType = agile::workflow1::TYPES::ORGIN; + } else { + assert(false); + } + agile::workflow1::Edge inverseEdge = edge; + inverseEdge.type = inverseEdgeType; + std::swap(inverseEdge.src, inverseEdge.dst); + std::swap(inverseEdge.src_type, inverseEdge.dst_type); + insertEdge(inverseEdge, vertices); + } + } +} + +void getDataFromGraph( + std::string& filename, + std::unordered_map>>& + vertices) { + // read file line by line + std::string line; + std::ifstream myfile(filename); + if (myfile.is_open()) { + while (getline(myfile, line)) { + parser(line, vertices); + } + myfile.close(); + } else { + std::cout << "Unable to open file"; + } +} int main(int argc, char* argv[]) { galois::DistMemSys G; // init galois memory @@ -39,6 +143,7 @@ int main(int argc, char* argv[]) { } std::string dataFile = argv[1]; + std::string file = dataFile; std::vector filenames; filenames.emplace_back(dataFile); std::vector>> + vertices; + if (net.ID == 0) + getDataFromGraph(file, vertices); + // generate a file with sorted token of all nodes and its outgoing edge dst // compare it with other implementation to verify the correctness std::vector>> tokenAndEdges; @@ -66,10 +176,14 @@ int main(int argc, char* argv[]) { auto end = graph->edge_end(lid); auto itr = graph->edge_begin(lid); for (; itr != end; itr++) { - edgeDst.push_back(graph->getEdgeData(itr).dst); + edgeDst.push_back(graph->getEdgeDst(itr)); } + std::vector edgeDstDbg; + for (auto& e : graph->edges(lid)) { + edgeDstDbg.push_back(graph->getEdgeDst(e)); + } + assert(edgeDst == edgeDstDbg); std::sort(edgeDst.begin(), edgeDst.end()); - tokenAndEdges[lid] = std::make_pair(token, std::move(edgeDst)); }, galois::steal()); @@ -99,23 +213,19 @@ int main(int argc, char* argv[]) { // sort the node info by token order // serilize it to file if (net.ID == 0) { - std::sort(tokenAndEdges.begin(), tokenAndEdges.end(), - [](const std::pair>& a, - const std::pair>& b) { - return a.first < b.first; - }); - - std::ofstream output; - output.open("wmd-graph-build-result.txt"); - - for (auto itr : tokenAndEdges) { - output << itr.first; - for (auto edge : itr.second) { - output << "," << edge; + // compare with vertices + assert(tokenAndEdges.size() == vertices.size()); + for (size_t i = 0; i < tokenAndEdges.size(); i++) { + auto& tokenAndEdge = tokenAndEdges[i]; + auto& vertex = vertices[tokenAndEdge.first]; + assert(vertex.second.size() == tokenAndEdge.second.size()); + std::sort(vertex.second.begin(), vertex.second.end(), + [](const agile::workflow1::Edge& a, + const agile::workflow1::Edge& b) { return a.dst < b.dst; }); + for (size_t j = 0; j < vertex.second.size(); j++) { + assert(vertex.second[j].dst == tokenAndEdge.second[j]); } - output << "\n"; } - output.close(); } return 0; }