From bce55d1b697af888ad5f30076d0acec444900789 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Thu, 27 Dec 2018 13:30:14 +0800 Subject: [PATCH 01/75] init. --- include/dgl/graph.h | 4 +- include/dgl/immutable_graph.h | 341 ++++++++++++++++++++++++++++++++++ src/graph/immutable_graph.cc | 77 ++++++++ 3 files changed, 419 insertions(+), 3 deletions(-) create mode 100644 include/dgl/immutable_graph.h create mode 100644 src/graph/immutable_graph.cc diff --git a/include/dgl/graph.h b/include/dgl/graph.h index d50018e1c6a0..3ebf9cc058e2 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -27,9 +27,7 @@ struct Subgraph; /*! * \brief Base dgl graph index class. * - * DGL's graph is directed. Vertices are integers enumerated from zero. Edges - * are uniquely identified by the two endpoints. Multi-edge is currently not - * supported. + * DGL's graph is directed. Vertices are integers enumerated from zero. * * Removal of vertices/edges is not allowed. Instead, the graph can only be "cleared" * by removing all the vertices and edges. diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h new file mode 100644 index 000000000000..0abedcb576a0 --- /dev/null +++ b/include/dgl/immutable_graph.h @@ -0,0 +1,341 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file dgl/graph.h + * \brief DGL immutable graph index class. + */ +#ifndef DGL_IMMUTABLE_GRAPH_H_ +#define DGL_IMMUTABLE_GRAPH_H_ + +#include +#include +#include +#include +#include "runtime/ndarray.h" +#include "graph.h" + +namespace dgl { + +/*! + * \brief Base dgl immutable graph index class. + * + */ +class ImmutableGraph { + public: + typedef struct { + std::vector indptr; + std::vector indices; + std::vector edge_ids; + } csr; + + ImmutableGraph(std::shared_ptr in_csr, std::shared_ptr out_csr, + bool multigraph = false) : is_multigraph_(multigraph) { + this->in_csr_ = in_csr; + this->out_csr_ = out_csr; + } + + /*! \brief default constructor */ + explicit ImmutableGraph(bool multigraph = false) : is_multigraph_(multigraph) {} + + /*! \brief default copy constructor */ + ImmutableGraph(const ImmutableGraph& other) = default; + +#ifndef _MSC_VER + /*! \brief default move constructor */ + ImmutableGraph(ImmutableGraph&& other) = default; +#else + ImmutableGraph(ImmutableGraph&& other) { + // TODO + } +#endif // _MSC_VER + + /*! \brief default assign constructor */ + ImmutableGraph& operator=(const ImmutableGraph& other) = default; + + /*! \brief default destructor */ + ~ImmutableGraph() = default; + + /*! + * \note not const since we have caches + * \return whether the graph is a multigraph + */ + bool IsMultigraph() const { + return is_multigraph_; + } + + /*! \return the number of vertices in the graph.*/ + uint64_t NumVertices() const { + return in_csr_->indptr.size() - 1; + } + + /*! \return the number of edges in the graph.*/ + uint64_t NumEdges() const { + return in_csr_->indices.size(); + } + + /*! \return true if the given vertex is in the graph.*/ + bool HasVertex(dgl_id_t vid) const { + return vid < NumVertices(); + } + + /*! \return a 0-1 array indicating whether the given vertices are in the graph.*/ + BoolArray HasVertices(IdArray vids) const; + + /*! \return true if the given edge is in the graph.*/ + bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const; + + /*! \return a 0-1 array indicating whether the given edges are in the graph.*/ + BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const; + + /*! + * \brief Find the predecessors of a vertex. + * \param vid The vertex id. + * \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1). + * \return the predecessor id array. + */ + IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const; + + /*! + * \brief Find the successors of a vertex. + * \param vid The vertex id. + * \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1). + * \return the successor id array. + */ + IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const; + + /*! + * \brief Get all edge ids between the two given endpoints + * \note Edges are associated with an integer id start from zero. + * The id is assigned when the edge is being added to the graph. + * \param src The source vertex. + * \param dst The destination vertex. + * \return the edge id array. + */ + IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const; + + /*! + * \brief Get all edge ids between the given endpoint pairs. + * \note Edges are associated with an integer id start from zero. + * The id is assigned when the edge is being added to the graph. + * If duplicate pairs exist, the returned edge IDs will also duplicate. + * The order of returned edge IDs will follow the order of src-dst pairs + * first, and ties are broken by the order of edge ID. + * \return EdgeArray containing all edges between all pairs. + */ + EdgeArray EdgeIds(IdArray src, IdArray dst) const; + + /*! + * \brief Find the edge ID and return the pair of endpoints + * \param eid The edge ID + * \return a pair whose first element is the source and the second the destination. + */ + std::pair FindEdge(dgl_id_t eid) const { + dgl_id_t src_id = in_csr_->indices[eid]; + auto it = std::lower_bound(in_csr-->indptr.begin(), in_csr_->indptr.end(), eid); + assert(it != in_csr_->indptr.end()); + dgl_id_t dst_id; + if (*it == eid) + dst_id = it - in_csr_->indptr.begin(); + else + dst_id = it - in_csr_->indptr.begin() - 1; + assert(in_csr_->edge_ids[in_csr_->indptr[dst_id]] <= eid); + assert(dst_id >= 0); + return std::make_pair(src_id, dst_id); + } + + /*! + * \brief Find the edge IDs and return their source and target node IDs. + * \param eids The edge ID array. + * \return EdgeArray containing all edges with id in eid. The order is preserved. + */ + EdgeArray FindEdges(IdArray eids) const; + + /*! + * \brief Get the in edges of the vertex. + * \note The returned dst id array is filled with vid. + * \param vid The vertex id. + * \return the edges + */ + EdgeArray InEdges(dgl_id_t vid) const; + + /*! + * \brief Get the in edges of the vertices. + * \param vids The vertex id array. + * \return the id arrays of the two endpoints of the edges. + */ + EdgeArray InEdges(IdArray vids) const; + + /*! + * \brief Get the out edges of the vertex. + * \note The returned src id array is filled with vid. + * \param vid The vertex id. + * \return the id arrays of the two endpoints of the edges. + */ + EdgeArray OutEdges(dgl_id_t vid) const; + + /*! + * \brief Get the out edges of the vertices. + * \param vids The vertex id array. + * \return the id arrays of the two endpoints of the edges. + */ + EdgeArray OutEdges(IdArray vids) const; + + /*! + * \brief Get all the edges in the graph. + * \note If sorted is true, the returned edges list is sorted by their src and + * dst ids. Otherwise, they are in their edge id order. + * \param sorted Whether the returned edge list is sorted by their src and dst ids + * \return the id arrays of the two endpoints of the edges. + */ + EdgeArray Edges(bool sorted = false) const; + + /*! + * \brief Get the in degree of the given vertex. + * \param vid The vertex id. + * \return the in degree + */ + uint64_t InDegree(dgl_id_t vid) const { + CHECK(HasVertex(vid)) << "invalid vertex: " << vid; + return in_csr_->indptr[vid + 1] - in_csr_->indptr[vid]; + } + + /*! + * \brief Get the in degrees of the given vertices. + * \param vid The vertex id array. + * \return the in degree array + */ + DegreeArray InDegrees(IdArray vids) const; + + /*! + * \brief Get the out degree of the given vertex. + * \param vid The vertex id. + * \return the out degree + */ + uint64_t OutDegree(dgl_id_t vid) const { + CHECK(HasVertex(vid)) << "invalid vertex: " << vid; + return out_csr_->indptr[vid + 1] - out_csr_->indptr[vid]; + } + + /*! + * \brief Get the out degrees of the given vertices. + * \param vid The vertex id array. + * \return the out degree array + */ + DegreeArray OutDegrees(IdArray vids) const; + + /*! + * \brief Construct the induced subgraph of the given vertices. + * + * The induced subgraph is a subgraph formed by specifying a set of vertices V' and then + * selecting all of the edges from the original graph that connect two vertices in V'. + * + * Vertices and edges in the original graph will be "reindexed" to local index. The local + * index of the vertices preserve the order of the given id array, while the local index + * of the edges preserve the index order in the original graph. Vertices not in the + * original graph are ignored. + * + * The result subgraph is read-only. + * + * \param vids The vertices in the subgraph. + * \return the induced subgraph + */ + Subgraph VertexSubgraph(IdArray vids) const; + + /*! + * \brief Construct the induced edge subgraph of the given edges. + * + * The induced edges subgraph is a subgraph formed by specifying a set of edges E' and then + * selecting all of the nodes from the original graph that are endpoints in E'. + * + * Vertices and edges in the original graph will be "reindexed" to local index. The local + * index of the edges preserve the order of the given id array, while the local index + * of the vertices preserve the index order in the original graph. Edges not in the + * original graph are ignored. + * + * The result subgraph is read-only. + * + * \param eids The edges in the subgraph. + * \return the induced edge subgraph + */ + Subgraph EdgeSubgraph(IdArray eids) const; + + /*! + * \brief Return a new graph with all the edges reversed. + * + * The returned graph preserves the vertex and edge index in the original graph. + * + * \return the reversed graph + */ + ImmutableGraph Reverse() const { + return ImmutableGraph(out_csr, in_csr, is_multigraph_); + } + + /*! + * \brief Return the successor vector + * \param vid The vertex id. + * \return the successor vector + */ + const std::vector& SuccVec(dgl_id_t vid) const { + return adjlist_[vid].succ; + } + + /*! + * \brief Return the out edge id vector + * \param vid The vertex id. + * \return the out edge id vector + */ + const std::vector& OutEdgeVec(dgl_id_t vid) const { + return adjlist_[vid].edge_id; + } + + /*! + * \brief Return the predecessor vector + * \param vid The vertex id. + * \return the predecessor vector + */ + const std::vector& PredVec(dgl_id_t vid) const { + return reverse_adjlist_[vid].succ; + } + + /*! + * \brief Return the in edge id vector + * \param vid The vertex id. + * \return the in edge id vector + */ + const std::vector& InEdgeVec(dgl_id_t vid) const { + return reverse_adjlist_[vid].edge_id; + } + + protected: + friend class GraphOp; + // Store the in-edges. + std::shared_ptr in_csr_; + // Store the out-edges. + std::shared_ptr out_csr_; + /*! + * \brief Whether if this is a multigraph. + * + * When a multiedge is added, this flag switches to true. + */ + bool is_multigraph_ = false; +}; + +/*! \brief Subgraph data structure */ +struct Subgraph { + /*! \brief The graph. */ + Graph graph; + /*! + * \brief The induced vertex ids. + * \note This is also a map from the new vertex id to the vertex id in the parent graph. + */ + IdArray induced_vertices; + /*! + * \brief The induced edge ids. + * \note This is also a map from the new edge id to the edge id in the parent graph. + */ + IdArray induced_edges; +}; + +} // namespace dgl + +#endif // DGL_IMMUTABLE_GRAPH_H_ + diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc new file mode 100644 index 000000000000..84d64c0db056 --- /dev/null +++ b/src/graph/immutable_graph.cc @@ -0,0 +1,77 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file graph/immutable_graph.cc + * \brief DGL immutable graph index implementation + */ + +BoolArray HasVertices(IdArray vids) const; + +bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const; + +BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const; + +IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const; + +IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const; + +IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const; + +EdgeArray EdgeIds(IdArray src, IdArray dst) const; + +EdgeArray FindEdges(IdArray eids) const; + +EdgeArray InEdges(dgl_id_t vid) const; + +EdgeArray InEdges(IdArray vids) const; + +EdgeArray OutEdges(dgl_id_t vid) const; + +EdgeArray OutEdges(IdArray vids) const; + +EdgeArray Edges(bool sorted = false) const; + +DegreeArray InDegrees(IdArray vids) const; + +DegreeArray OutDegrees(IdArray vids) const; + +Subgraph VertexSubgraph(IdArray vids) const; + +Subgraph EdgeSubgraph(IdArray eids) const; + +Graph Reverse() const; + + /*! + * \brief Return the successor vector + * \param vid The vertex id. + * \return the successor vector + */ + const std::vector& SuccVec(dgl_id_t vid) const { + return adjlist_[vid].succ; + } + + /*! + * \brief Return the out edge id vector + * \param vid The vertex id. + * \return the out edge id vector + */ + const std::vector& OutEdgeVec(dgl_id_t vid) const { + return adjlist_[vid].edge_id; + } + + /*! + * \brief Return the predecessor vector + * \param vid The vertex id. + * \return the predecessor vector + */ + const std::vector& PredVec(dgl_id_t vid) const { + return reverse_adjlist_[vid].succ; + } + + /*! + * \brief Return the in edge id vector + * \param vid The vertex id. + * \return the in edge id vector + */ + const std::vector& InEdgeVec(dgl_id_t vid) const { + return reverse_adjlist_[vid].edge_id; + } From 3e63c70de6ef3062424d071a9cfe859f695ca0f5 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 28 Dec 2018 20:58:10 +0800 Subject: [PATCH 02/75] it's compiled. --- include/dgl/immutable_graph.h | 182 ++++++++++---- src/graph/immutable_graph.cc | 453 ++++++++++++++++++++++++++++++---- 2 files changed, 535 insertions(+), 100 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 0abedcb576a0..ec0e1cb01b17 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -15,6 +15,8 @@ namespace dgl { +struct ImmutableSubgraph; + /*! * \brief Base dgl immutable graph index class. * @@ -22,10 +24,47 @@ namespace dgl { class ImmutableGraph { public: typedef struct { + /* \brief the two endpoints and the id of the edge */ + IdArray src, dst, id; + } EdgeArray; + + struct csr { std::vector indptr; - std::vector indices; - std::vector edge_ids; - } csr; + std::vector indices; + std::vector edge_ids; + + csr(int64_t num_vertices, int64_t expected_num_edges) { + indptr.resize(num_vertices + 1); + indices.reserve(expected_num_edges); + edge_ids.reserve(expected_num_edges); + } + + bool HasVertex(dgl_id_t vid) const { + return vid < NumVertices(); + } + + uint64_t NumVertices() const { + return indptr.size() - 1; + } + + uint64_t NumEdges() const { + return indices.size(); + } + + int64_t GetDegree(dgl_id_t vid) const { + return indptr[vid + 1] - indptr[vid]; + } + DegreeArray GetDegrees(IdArray vids) const; + EdgeArray GetEdges(dgl_id_t vid) const; + EdgeArray GetEdges(IdArray vids) const; + std::pair GetIndexRef(dgl_id_t v) const { + int64_t start = indptr[v]; + int64_t end = indptr[v + 1]; + return std::pair(&indices[start], &indices[end]); + } + std::shared_ptr Transpose() const; + std::pair, IdArray> VertexSubgraph(IdArray vids) const; + }; ImmutableGraph(std::shared_ptr in_csr, std::shared_ptr out_csr, bool multigraph = false) : is_multigraph_(multigraph) { @@ -64,12 +103,18 @@ class ImmutableGraph { /*! \return the number of vertices in the graph.*/ uint64_t NumVertices() const { - return in_csr_->indptr.size() - 1; + if (in_csr_) + return in_csr_->NumVertices(); + else + return out_csr_->NumVertices(); } /*! \return the number of edges in the graph.*/ uint64_t NumEdges() const { - return in_csr_->indices.size(); + if (in_csr_) + return in_csr_->NumEdges(); + else + return out_csr_->NumEdges(); } /*! \return true if the given vertex is in the graph.*/ @@ -81,7 +126,17 @@ class ImmutableGraph { BoolArray HasVertices(IdArray vids) const; /*! \return true if the given edge is in the graph.*/ - bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const; + bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const { + if (!HasVertex(src) || !HasVertex(dst)) return false; + if (this->in_csr_) { + auto pred = this->in_csr_->GetIndexRef(dst); + return std::binary_search(pred.first, pred.second, src); + } else { + assert(this->out_csr_); + auto succ = this->out_csr_->GetIndexRef(src); + return std::binary_search(succ.first, succ.second, dst); + } + } /*! \return a 0-1 array indicating whether the given edges are in the graph.*/ BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const; @@ -123,46 +178,24 @@ class ImmutableGraph { */ EdgeArray EdgeIds(IdArray src, IdArray dst) const; - /*! - * \brief Find the edge ID and return the pair of endpoints - * \param eid The edge ID - * \return a pair whose first element is the source and the second the destination. - */ - std::pair FindEdge(dgl_id_t eid) const { - dgl_id_t src_id = in_csr_->indices[eid]; - auto it = std::lower_bound(in_csr-->indptr.begin(), in_csr_->indptr.end(), eid); - assert(it != in_csr_->indptr.end()); - dgl_id_t dst_id; - if (*it == eid) - dst_id = it - in_csr_->indptr.begin(); - else - dst_id = it - in_csr_->indptr.begin() - 1; - assert(in_csr_->edge_ids[in_csr_->indptr[dst_id]] <= eid); - assert(dst_id >= 0); - return std::make_pair(src_id, dst_id); - } - - /*! - * \brief Find the edge IDs and return their source and target node IDs. - * \param eids The edge ID array. - * \return EdgeArray containing all edges with id in eid. The order is preserved. - */ - EdgeArray FindEdges(IdArray eids) const; - /*! * \brief Get the in edges of the vertex. * \note The returned dst id array is filled with vid. * \param vid The vertex id. * \return the edges */ - EdgeArray InEdges(dgl_id_t vid) const; + EdgeArray InEdges(dgl_id_t vid) const { + return this->GetInCSR()->GetEdges(vid); + } /*! * \brief Get the in edges of the vertices. * \param vids The vertex id array. * \return the id arrays of the two endpoints of the edges. */ - EdgeArray InEdges(IdArray vids) const; + EdgeArray InEdges(IdArray vids) const { + return this->GetInCSR()->GetEdges(vids); + } /*! * \brief Get the out edges of the vertex. @@ -170,14 +203,18 @@ class ImmutableGraph { * \param vid The vertex id. * \return the id arrays of the two endpoints of the edges. */ - EdgeArray OutEdges(dgl_id_t vid) const; + EdgeArray OutEdges(dgl_id_t vid) const { + return this->GetOutCSR()->GetEdges(vid); + } /*! * \brief Get the out edges of the vertices. * \param vids The vertex id array. * \return the id arrays of the two endpoints of the edges. */ - EdgeArray OutEdges(IdArray vids) const; + EdgeArray OutEdges(IdArray vids) const { + return this->GetOutCSR()->GetEdges(vids); + } /*! * \brief Get all the edges in the graph. @@ -195,7 +232,7 @@ class ImmutableGraph { */ uint64_t InDegree(dgl_id_t vid) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; - return in_csr_->indptr[vid + 1] - in_csr_->indptr[vid]; + return this->GetInCSR()->GetDegree(vid); } /*! @@ -203,7 +240,9 @@ class ImmutableGraph { * \param vid The vertex id array. * \return the in degree array */ - DegreeArray InDegrees(IdArray vids) const; + DegreeArray InDegrees(IdArray vids) const { + return this->GetInCSR()->GetDegrees(vids); + } /*! * \brief Get the out degree of the given vertex. @@ -212,7 +251,7 @@ class ImmutableGraph { */ uint64_t OutDegree(dgl_id_t vid) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; - return out_csr_->indptr[vid + 1] - out_csr_->indptr[vid]; + return this->GetOutCSR()->GetDegree(vid); } /*! @@ -220,7 +259,9 @@ class ImmutableGraph { * \param vid The vertex id array. * \return the out degree array */ - DegreeArray OutDegrees(IdArray vids) const; + DegreeArray OutDegrees(IdArray vids) const { + return this->GetOutCSR()->GetDegrees(vids); + } /*! * \brief Construct the induced subgraph of the given vertices. @@ -238,7 +279,9 @@ class ImmutableGraph { * \param vids The vertices in the subgraph. * \return the induced subgraph */ - Subgraph VertexSubgraph(IdArray vids) const; + ImmutableSubgraph VertexSubgraph(IdArray vids) const; + + std::vector VertexSubgraphs(const std::vector &vids) const; /*! * \brief Construct the induced edge subgraph of the given edges. @@ -256,7 +299,9 @@ class ImmutableGraph { * \param eids The edges in the subgraph. * \return the induced edge subgraph */ - Subgraph EdgeSubgraph(IdArray eids) const; + ImmutableSubgraph EdgeSubgraph(IdArray eids) const; + + std::vector EdgeSubgraphs(std::vector eids) const; /*! * \brief Return a new graph with all the edges reversed. @@ -266,7 +311,7 @@ class ImmutableGraph { * \return the reversed graph */ ImmutableGraph Reverse() const { - return ImmutableGraph(out_csr, in_csr, is_multigraph_); + return ImmutableGraph(out_csr_, in_csr_, is_multigraph_); } /*! @@ -274,8 +319,9 @@ class ImmutableGraph { * \param vid The vertex id. * \return the successor vector */ - const std::vector& SuccVec(dgl_id_t vid) const { - return adjlist_[vid].succ; + std::vector SuccVec(dgl_id_t vid) const { + return std::vector(out_csr_->indices.begin() + out_csr_->indptr[vid], + out_csr_->indices.begin() + out_csr_->indptr[vid + 1]); } /*! @@ -283,8 +329,9 @@ class ImmutableGraph { * \param vid The vertex id. * \return the out edge id vector */ - const std::vector& OutEdgeVec(dgl_id_t vid) const { - return adjlist_[vid].edge_id; + std::vector OutEdgeVec(dgl_id_t vid) const { + return std::vector(out_csr_->edge_ids.begin() + out_csr_->indptr[vid], + out_csr_->edge_ids.begin() + out_csr_->indptr[vid + 1]); } /*! @@ -292,8 +339,9 @@ class ImmutableGraph { * \param vid The vertex id. * \return the predecessor vector */ - const std::vector& PredVec(dgl_id_t vid) const { - return reverse_adjlist_[vid].succ; + std::vector PredVec(dgl_id_t vid) const { + return std::vector(in_csr_->indices.begin() + in_csr_->indptr[vid], + in_csr_->indices.begin() + in_csr_->indptr[vid + 1]); } /*! @@ -301,12 +349,38 @@ class ImmutableGraph { * \param vid The vertex id. * \return the in edge id vector */ - const std::vector& InEdgeVec(dgl_id_t vid) const { - return reverse_adjlist_[vid].edge_id; + std::vector InEdgeVec(dgl_id_t vid) const { + return std::vector(in_csr_->edge_ids.begin() + in_csr_->indptr[vid], + in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]); } protected: - friend class GraphOp; + std::pair GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; + std::pair GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; + + /* + * When we get in csr or out csr, we try to get the one cached in the structure. + * If not, we transpose the other one to get the one we need. + */ + std::shared_ptr GetInCSR() const { + if (in_csr_) { + return in_csr_; + } else { + assert(out_csr_ != nullptr); + const_cast(this)->in_csr_ = out_csr_->Transpose(); + return in_csr_; + } + } + std::shared_ptr GetOutCSR() const { + if (out_csr_) { + return out_csr_; + } else { + assert(in_csr_ != nullptr); + const_cast(this)->out_csr_ = in_csr_->Transpose(); + return out_csr_; + } + } + // Store the in-edges. std::shared_ptr in_csr_; // Store the out-edges. @@ -320,9 +394,9 @@ class ImmutableGraph { }; /*! \brief Subgraph data structure */ -struct Subgraph { +struct ImmutableSubgraph { /*! \brief The graph. */ - Graph graph; + ImmutableGraph graph; /*! * \brief The induced vertex ids. * \note This is also a map from the new vertex id to the vertex id in the parent graph. diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 84d64c0db056..9a1b5b14c8d0 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -4,74 +4,435 @@ * \brief DGL immutable graph index implementation */ -BoolArray HasVertices(IdArray vids) const; +#include -bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const; +#include "../c_api_common.h" -BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const; +namespace dgl { -IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const; +ImmutableGraph::EdgeArray ImmutableGraph::csr::GetEdges(dgl_id_t vid) const { + CHECK(HasVertex(vid)) << "invalid vertex: " << vid; + int64_t off = this->indptr[vid]; + const int64_t len = this->GetDegree(vid); + IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + dgl_id_t* src_data = static_cast(src->data); + dgl_id_t* dst_data = static_cast(dst->data); + dgl_id_t* eid_data = static_cast(eid->data); + for (int64_t i = 0; i < len; ++i) { + src_data[i] = this->indices[off + i]; + eid_data[i] = this->edge_ids[off + i]; + } + std::fill(dst_data, dst_data + len, vid); + return ImmutableGraph::EdgeArray{src, dst, eid}; +} + +ImmutableGraph::EdgeArray ImmutableGraph::csr::GetEdges(IdArray vids) const { + CHECK(IsValidIdArray(vids)) << "Invalid vertex id array."; + const auto len = vids->shape[0]; + const dgl_id_t* vid_data = static_cast(vids->data); + int64_t rstlen = 0; + for (int64_t i = 0; i < len; ++i) { + dgl_id_t vid = vid_data[i]; + CHECK(HasVertex(vid)) << "Invalid vertex: " << vid; + rstlen += this->GetDegree(vid); + } + IdArray src = IdArray::Empty({rstlen}, vids->dtype, vids->ctx); + IdArray dst = IdArray::Empty({rstlen}, vids->dtype, vids->ctx); + IdArray eid = IdArray::Empty({rstlen}, vids->dtype, vids->ctx); + dgl_id_t* src_ptr = static_cast(src->data); + dgl_id_t* dst_ptr = static_cast(dst->data); + dgl_id_t* eid_ptr = static_cast(eid->data); + for (int64_t i = 0; i < len; ++i) { + dgl_id_t vid = vid_data[i]; + int64_t off = this->indptr[vid]; + const int64_t len = this->GetDegree(vid); + const auto *pred = &this->indices[off]; + const auto *eids = &this->edge_ids[off]; + for (int64_t j = 0; j < len; ++j) { + *(src_ptr++) = pred[j]; + *(dst_ptr++) = vid; + *(eid_ptr++) = eids[j]; + } + } + return ImmutableGraph::EdgeArray{src, dst, eid}; +} + +DegreeArray ImmutableGraph::csr::GetDegrees(IdArray vids) const { + CHECK(IsValidIdArray(vids)) << "Invalid vertex id array."; + const auto len = vids->shape[0]; + const dgl_id_t* vid_data = static_cast(vids->data); + DegreeArray rst = DegreeArray::Empty({len}, vids->dtype, vids->ctx); + dgl_id_t* rst_data = static_cast(rst->data); + for (int64_t i = 0; i < len; ++i) { + const auto vid = vid_data[i]; + CHECK(HasVertex(vid)) << "Invalid vertex: " << vid; + rst_data[i] = this->GetDegree(vid); + } + return rst; +} -IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const; +class Bitmap { + const size_t size = 1024 * 1024 * 4; + const size_t mask = size - 1; + std::vector map; -IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const; + size_t hash(dgl_id_t id) const { + return id & mask; + } + public: + Bitmap(const dgl_id_t *vid_data, int64_t len): map(size) { + for (int64_t i = 0; i < len; ++i) { + map[hash(vid_data[i])] = 1; + } + } + + bool test(dgl_id_t id) const { + return map[hash(id)]; + } +}; -EdgeArray EdgeIds(IdArray src, IdArray dst) const; +/* + * This uses a hashtable to check if a node is in the given node list. + */ +class HashTableChecker { + std::unordered_map oldv2newv; + Bitmap map; -EdgeArray FindEdges(IdArray eids) const; + public: + HashTableChecker(const dgl_id_t *vid_data, int64_t len): map(vid_data, len) { + oldv2newv.reserve(len); + for (int64_t i = 0; i < len; ++i) { + oldv2newv[vid_data[i]] = i; + } + } -EdgeArray InEdges(dgl_id_t vid) const; + void CollectOnRow(const dgl_id_t col_idx[], const dgl_id_t eids[], size_t row_len, + std::vector *new_col_idx, + std::vector *orig_eids) { + // TODO(zhengda) I need to make sure the column index in each row is sorted. + for (size_t j = 0; j < row_len; ++j) { + const dgl_id_t oldsucc = col_idx[j]; + const dgl_id_t eid = eids[j]; + Collect(oldsucc, eid, new_col_idx, orig_eids); + } + } -EdgeArray InEdges(IdArray vids) const; + void Collect(const dgl_id_t old_id, const dgl_id_t old_eid, + std::vector *col_idx, + std::vector *orig_eids) { + if (!map.test(old_id)) + return; -EdgeArray OutEdges(dgl_id_t vid) const; + auto it = oldv2newv.find(old_id); + if (it != oldv2newv.end()) { + const dgl_id_t new_id = it->second; + col_idx->push_back(new_id); + if (orig_eids) + orig_eids->push_back(old_eid); + } + } +}; -EdgeArray OutEdges(IdArray vids) const; +std::pair, IdArray> ImmutableGraph::csr::VertexSubgraph(IdArray vids) const { + const dgl_id_t* vid_data = static_cast(vids->data); + const int64_t len = vids->shape[0]; -EdgeArray Edges(bool sorted = false) const; + HashTableChecker def_check(vid_data, len); + // check if varr is sorted. + CHECK(std::is_sorted(vid_data, vid_data + len)) << "The input vertex list has to be sorted"; -DegreeArray InDegrees(IdArray vids) const; + // Collect the non-zero entries in from the original graph. + std::vector orig_edge_ids; + orig_edge_ids.reserve(len * 50); + auto sub_csr = std::make_shared(len, len * 50); + sub_csr->indptr[0] = 0; + for (int64_t i = 0; i < len; ++i) { + const dgl_id_t oldvid = vid_data[i]; + CHECK_LT(oldvid, NumVertices()) << "Vertex Id " << oldvid << " isn't in a graph of " + << NumVertices() << " vertices"; + size_t row_start = indptr[oldvid]; + size_t row_len = indptr[oldvid + 1] - indptr[oldvid]; + def_check.CollectOnRow(&indices[row_start], &edge_ids[row_start], row_len, + &sub_csr->indices, &orig_edge_ids); + sub_csr->indptr[i + 1] = sub_csr->indices.size(); + } -DegreeArray OutDegrees(IdArray vids) const; + // Store the non-zeros in a subgraph with edge attributes of new edge ids. + sub_csr->edge_ids.resize(sub_csr->indices.size()); + for (int64_t i = 0; i < sub_csr->edge_ids.size(); i++) + sub_csr->edge_ids[i] = i; -Subgraph VertexSubgraph(IdArray vids) const; + IdArray rst_eids = IdArray::Empty({static_cast(orig_edge_ids.size())}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + dgl_id_t* eid_data = static_cast(rst_eids->data); + std::copy(orig_edge_ids.begin(), orig_edge_ids.end(), eid_data); -Subgraph EdgeSubgraph(IdArray eids) const; + return std::pair, IdArray>(sub_csr, rst_eids); +} -Graph Reverse() const; +struct coo { + dgl_id_t end_point1; + dgl_id_t end_point2; + dgl_id_t edge_id; +}; - /*! - * \brief Return the successor vector - * \param vid The vertex id. - * \return the successor vector - */ - const std::vector& SuccVec(dgl_id_t vid) const { - return adjlist_[vid].succ; +std::shared_ptr ImmutableGraph::csr::Transpose() const { + std::vector edges(NumEdges()); + for (size_t i = 0; i < NumVertices(); i++) { + const dgl_id_t *indices_begin = &indices[indptr[i]]; + const dgl_id_t *eid_begin = &edge_ids[indptr[i]]; + for (size_t j = 0; j < GetDegree(i); j++) { + coo e{i, indices_begin[j], eid_begin[j]}; + edges[indptr[i] + j] = e; + } } + // TODO(zhengda) we should sort in parallel. + std::sort(edges.begin(), edges.end(), [](const coo &e1, const coo &e2) { + if (e1.end_point2 == e2.end_point2) + return e1.end_point1 < e2.end_point1; + else + return e2.end_point2 < e2.end_point2; + }); + std::shared_ptr t = std::make_shared(0, 0); + t->indices.resize(NumEdges()); + t->edge_ids.resize(NumEdges()); + t->indptr.reserve(NumVertices()); + t->indptr.push_back(0); + for (size_t i = 0; i < NumEdges(); i++) { + t->indices[i] = edges[i].end_point1; + t->edge_ids[i] = edges[i].edge_id; + dgl_id_t vid = edges[i].end_point2; + int64_t off; + if (t->indptr.empty()) + off = 0; + else + off = t->indptr.back(); + while (vid > 0 && t->indptr.size() < static_cast(vid - 1)) + t->indptr.push_back(off); + if (t->indptr.size() < vid) + t->indptr.push_back(i); + assert(t->indptr.size() == vid + 1); + } + t->indptr.push_back(NumEdges()); + return t; +} - /*! - * \brief Return the out edge id vector - * \param vid The vertex id. - * \return the out edge id vector - */ - const std::vector& OutEdgeVec(dgl_id_t vid) const { - return adjlist_[vid].edge_id; +BoolArray ImmutableGraph::HasVertices(IdArray vids) const { + CHECK(IsValidIdArray(vids)) << "Invalid vertex id array."; + const auto len = vids->shape[0]; + BoolArray rst = BoolArray::Empty({len}, vids->dtype, vids->ctx); + const dgl_id_t* vid_data = static_cast(vids->data); + dgl_id_t* rst_data = static_cast(rst->data); + const int64_t nverts = NumVertices(); + for (int64_t i = 0; i < len; ++i) { + rst_data[i] = (vid_data[i] < nverts)? 1 : 0; } + return rst; +} - /*! - * \brief Return the predecessor vector - * \param vid The vertex id. - * \return the predecessor vector - */ - const std::vector& PredVec(dgl_id_t vid) const { - return reverse_adjlist_[vid].succ; +BoolArray ImmutableGraph::HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const { + CHECK(IsValidIdArray(src_ids)) << "Invalid src id array."; + CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array."; + const auto srclen = src_ids->shape[0]; + const auto dstlen = dst_ids->shape[0]; + const auto rstlen = std::max(srclen, dstlen); + BoolArray rst = BoolArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); + dgl_id_t* rst_data = static_cast(rst->data); + const dgl_id_t* src_data = static_cast(src_ids->data); + const dgl_id_t* dst_data = static_cast(dst_ids->data); + if (srclen == 1) { + // one-many + for (int64_t i = 0; i < dstlen; ++i) { + rst_data[i] = HasEdgeBetween(src_data[0], dst_data[i])? 1 : 0; + } + } else if (dstlen == 1) { + // many-one + for (int64_t i = 0; i < srclen; ++i) { + rst_data[i] = HasEdgeBetween(src_data[i], dst_data[0])? 1 : 0; + } + } else { + // many-many + CHECK(srclen == dstlen) << "Invalid src and dst id array."; + for (int64_t i = 0; i < srclen; ++i) { + rst_data[i] = HasEdgeBetween(src_data[i], dst_data[i])? 1 : 0; + } } + return rst; +} + +IdArray ImmutableGraph::Predecessors(dgl_id_t vid, uint64_t radius) const { + CHECK(HasVertex(vid)) << "invalid vertex: " << vid; + CHECK(radius >= 1) << "invalid radius: " << radius; + + auto pred = this->GetInCSR()->GetIndexRef(vid); + const int64_t len = pred.second - pred.first; + IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + dgl_id_t* rst_data = static_cast(rst->data); + + std::copy(pred.first, pred.second, rst_data); + return rst; +} + +IdArray ImmutableGraph::Successors(dgl_id_t vid, uint64_t radius) const { + CHECK(HasVertex(vid)) << "invalid vertex: " << vid; + CHECK(radius >= 1) << "invalid radius: " << radius; + + auto succ = this->GetOutCSR()->GetIndexRef(vid); + const int64_t len = succ.second - succ.first; + IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + dgl_id_t* rst_data = static_cast(rst->data); + + std::copy(succ.first, succ.second, rst_data); + return rst; +} + +std::pair ImmutableGraph::GetInEdgeIdRef(dgl_id_t src, + dgl_id_t dst) const { + assert(this->in_csr_); + auto pred = this->in_csr_->GetIndexRef(dst); + auto it = std::lower_bound(pred.first, pred.second, src); + // If there doesn't exist edges between the two nodes. + if (it == pred.second || *it != src) + return std::pair(nullptr, nullptr); + + size_t off = it - in_csr_->indices.data(); + assert(off < in_csr_->indices.size()); + const dgl_id_t *start = &in_csr_->edge_ids[off]; + int64_t len = 0; + // There are edges between the source and the destination. + for (auto it1 = it; it1 != pred.second && *it1 == src; it1++, len++); + return std::pair(start, start + len); +} - /*! - * \brief Return the in edge id vector - * \param vid The vertex id. - * \return the in edge id vector - */ - const std::vector& InEdgeVec(dgl_id_t vid) const { - return reverse_adjlist_[vid].edge_id; +std::pair ImmutableGraph::GetOutEdgeIdRef(dgl_id_t src, + dgl_id_t dst) const { + assert(this->out_csr_); + auto succ = this->out_csr_->GetIndexRef(src); + auto it = std::lower_bound(succ.first, succ.second, dst); + // If there doesn't exist edges between the two nodes. + if (it == succ.second || *it != dst) + return std::pair(nullptr, nullptr); + + size_t off = it - out_csr_->indices.data(); + assert(off < out_csr_->indices.size()); + const dgl_id_t *start = &out_csr_->edge_ids[off]; + int64_t len = 0; + // There are edges between the source and the destination. + for (auto it1 = it; it1 != succ.second && *it1 == dst; it1++, len++); + return std::pair(start, start + len); +} + +IdArray ImmutableGraph::EdgeId(dgl_id_t src, dgl_id_t dst) const { + CHECK(HasVertex(src) && HasVertex(dst)) << "invalid edge: " << src << " -> " << dst; + + std::pair edge_ids; + if (in_csr_) + edge_ids = GetInEdgeIdRef(src, dst); + else + edge_ids = GetOutEdgeIdRef(src, dst); + int64_t len = edge_ids.second - edge_ids.first; + IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + dgl_id_t* rst_data = static_cast(rst->data); + if (len > 0) + std::copy(edge_ids.first, edge_ids.second, rst_data); + + return rst; +} + +ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_ids) const { + CHECK(IsValidIdArray(src_ids)) << "Invalid src id array."; + CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array."; + const auto srclen = src_ids->shape[0]; + const auto dstlen = dst_ids->shape[0]; + int64_t i, j; + + CHECK((srclen == dstlen) || (srclen == 1) || (dstlen == 1)) + << "Invalid src and dst id array."; + + const int src_stride = (srclen == 1 && dstlen != 1) ? 0 : 1; + const int dst_stride = (dstlen == 1 && srclen != 1) ? 0 : 1; + const dgl_id_t* src_data = static_cast(src_ids->data); + const dgl_id_t* dst_data = static_cast(dst_ids->data); + + std::vector src, dst, eid; + + for (i = 0, j = 0; i < srclen && j < dstlen; i += src_stride, j += dst_stride) { + const dgl_id_t src_id = src_data[i], dst_id = dst_data[j]; + CHECK(HasVertex(src_id) && HasVertex(dst_id)) << + "invalid edge: " << src_id << " -> " << dst_id; + + std::pair edges; + if (this->in_csr_) + edges = this->GetInEdgeIdRef(src_id, dst_id); + else + edges = this->GetOutEdgeIdRef(src_id, dst_id); + + size_t len = edges.second - edges.first; + for (size_t i = 0; i < len; i++) { + src.push_back(src_id); + dst.push_back(dst_id); + eid.push_back(edges.first[i]); + } } + + int64_t rstlen = src.size(); + IdArray rst_src = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); + IdArray rst_dst = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); + IdArray rst_eid = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); + dgl_id_t* rst_src_data = static_cast(rst_src->data); + dgl_id_t* rst_dst_data = static_cast(rst_dst->data); + dgl_id_t* rst_eid_data = static_cast(rst_eid->data); + + std::copy(src.begin(), src.end(), rst_src_data); + std::copy(dst.begin(), dst.end(), rst_dst_data); + std::copy(eid.begin(), eid.end(), rst_eid_data); + + return ImmutableGraph::EdgeArray{rst_src, rst_dst, rst_eid}; +} + +ImmutableGraph::EdgeArray ImmutableGraph::Edges(bool sorted) const { + int64_t rstlen = NumEdges(); + IdArray rst_src = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray rst_dst = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray rst_eid = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + dgl_id_t* rst_src_data = static_cast(rst_src->data); + dgl_id_t* rst_dst_data = static_cast(rst_dst->data); + dgl_id_t* rst_eid_data = static_cast(rst_eid->data); + + auto out_csr = GetOutCSR(); + // If sorted, the returned edges are sorted by the source Id and dest Id. + for (size_t i = 0; i < out_csr->indptr.size() - 1; i++) { + std::fill(rst_src_data + out_csr->indptr[i], rst_src_data + out_csr->indptr[i + 1], + static_cast(i)); + } + std::copy(out_csr->indices.begin(), out_csr->indices.end(), rst_dst_data); + std::copy(out_csr->edge_ids.begin(), out_csr->edge_ids.end(), rst_eid_data); + + // TODO(zhengda) do I need to sort the edges if sorted = false? + + return ImmutableGraph::EdgeArray{rst_src, rst_dst, rst_eid}; +} + +ImmutableSubgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { + ImmutableSubgraph subg; + std::pair, IdArray> ret; + if (in_csr_) { + ret = in_csr_->VertexSubgraph(vids); + subg.graph = ImmutableGraph(ret.first, nullptr, IsMultigraph()); + } else { + assert(out_csr_); + ret = out_csr_->VertexSubgraph(vids); + subg.graph = ImmutableGraph(nullptr, ret.first, IsMultigraph()); + } + subg.induced_vertices = vids; + subg.induced_edges = ret.second; + return subg; +} + +ImmutableSubgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const { + return ImmutableSubgraph(); +} + +} // namespace dgl From 34c1d73132dea3f0de17ae2d939a2515f39a2104 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 28 Dec 2018 22:08:23 +0800 Subject: [PATCH 03/75] add immutable graph constructor. --- include/dgl/immutable_graph.h | 8 ++++ src/graph/immutable_graph.cc | 90 ++++++++++++++++++++++++----------- 2 files changed, 69 insertions(+), 29 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index ec0e1cb01b17..cd758fc5066c 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -28,6 +28,11 @@ class ImmutableGraph { IdArray src, dst, id; } EdgeArray; + struct edge { + dgl_id_t end_points[2]; + dgl_id_t edge_id; + }; + struct csr { std::vector indptr; std::vector indices; @@ -64,8 +69,11 @@ class ImmutableGraph { } std::shared_ptr Transpose() const; std::pair, IdArray> VertexSubgraph(IdArray vids) const; + static std::shared_ptr from_edges(std::vector &edges, int sort_on); }; + ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, bool multigraph = false); + ImmutableGraph(std::shared_ptr in_csr, std::shared_ptr out_csr, bool multigraph = false) : is_multigraph_(multigraph) { this->in_csr_ = in_csr; diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 9a1b5b14c8d0..53e3e6da0e90 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -172,38 +172,34 @@ std::pair, IdArray> ImmutableGraph::csr::Ve return std::pair, IdArray>(sub_csr, rst_eids); } -struct coo { - dgl_id_t end_point1; - dgl_id_t end_point2; - dgl_id_t edge_id; -}; - -std::shared_ptr ImmutableGraph::csr::Transpose() const { - std::vector edges(NumEdges()); - for (size_t i = 0; i < NumVertices(); i++) { - const dgl_id_t *indices_begin = &indices[indptr[i]]; - const dgl_id_t *eid_begin = &edge_ids[indptr[i]]; - for (size_t j = 0; j < GetDegree(i); j++) { - coo e{i, indices_begin[j], eid_begin[j]}; - edges[indptr[i] + j] = e; - } - } +std::shared_ptr ImmutableGraph::csr::from_edges(std::vector &edges, + int sort_on) { + assert(sort_on == 0 || sort_on == 1); + int other_end = sort_on == 1 ? 0 : 1; // TODO(zhengda) we should sort in parallel. - std::sort(edges.begin(), edges.end(), [](const coo &e1, const coo &e2) { - if (e1.end_point2 == e2.end_point2) - return e1.end_point1 < e2.end_point1; - else - return e2.end_point2 < e2.end_point2; - }); + struct compare { + int sort_on; + int other_end; + compare(int sort_on, int other_end) { + this->sort_on = sort_on; + this->other_end = other_end; + } + bool operator()(const edge &e1, const edge &e2) { + if (e1.end_points[sort_on] == e2.end_points[sort_on]) + return e1.end_points[other_end] < e2.end_points[other_end]; + else + return e1.end_points[sort_on] < e2.end_points[sort_on]; + } + }; + std::sort(edges.begin(), edges.end(), compare(sort_on, other_end)); std::shared_ptr t = std::make_shared(0, 0); - t->indices.resize(NumEdges()); - t->edge_ids.resize(NumEdges()); - t->indptr.reserve(NumVertices()); + t->indices.resize(edges.size()); + t->edge_ids.resize(edges.size()); t->indptr.push_back(0); - for (size_t i = 0; i < NumEdges(); i++) { - t->indices[i] = edges[i].end_point1; + for (size_t i = 0; i < edges.size(); i++) { + t->indices[i] = edges[i].end_points[other_end]; t->edge_ids[i] = edges[i].edge_id; - dgl_id_t vid = edges[i].end_point2; + dgl_id_t vid = edges[i].end_points[sort_on]; int64_t off; if (t->indptr.empty()) off = 0; @@ -215,10 +211,46 @@ std::shared_ptr ImmutableGraph::csr::Transpose() const { t->indptr.push_back(i); assert(t->indptr.size() == vid + 1); } - t->indptr.push_back(NumEdges()); + t->indptr.push_back(edges.size()); return t; } +std::shared_ptr ImmutableGraph::csr::Transpose() const { + std::vector edges(NumEdges()); + for (size_t i = 0; i < NumVertices(); i++) { + const dgl_id_t *indices_begin = &indices[indptr[i]]; + const dgl_id_t *eid_begin = &edge_ids[indptr[i]]; + for (size_t j = 0; j < GetDegree(i); j++) { + edge e; + e.end_points[0] = i; + e.end_points[1] = indices_begin[j]; + e.edge_id = eid_begin[j]; + edges[indptr[i] + j] = e; + } + } + return from_edges(edges, 1); +} + +ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, + bool multigraph) : is_multigraph_(multigraph) { + int64_t len = src_ids->shape[0]; + assert(len == dst_ids->shape[0]); + assert(len == edge_ids->shape[0]); + const dgl_id_t *src_data = static_cast(src_ids->data); + const dgl_id_t *dst_data = static_cast(dst_ids->data); + const dgl_id_t *edge_data = static_cast(edge_ids->data); + std::vector edges(len); + for (size_t i = 0; i < edges.size(); i++) { + edge e; + e.end_points[0] = src_data[i]; + e.end_points[1] = dst_data[i]; + e.edge_id = edge_data[i]; + edges[i] = e; + } + in_csr_ = csr::from_edges(edges, 1); + out_csr_ = csr::from_edges(edges, 0); +} + BoolArray ImmutableGraph::HasVertices(IdArray vids) const { CHECK(IsValidIdArray(vids)) << "Invalid vertex id array."; const auto len = vids->shape[0]; From 7aeebd8589851436f2e13d75d3610f033abca764 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 28 Dec 2018 22:08:37 +0800 Subject: [PATCH 04/75] add immutable graph API. --- src/graph/graph_apis.cc | 239 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 231 insertions(+), 8 deletions(-) diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index 3006812cfb98..c1fe3279f143 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -4,6 +4,7 @@ * \brief DGL graph index APIs */ #include +#include #include #include "../c_api_common.h" @@ -17,7 +18,8 @@ namespace dgl { namespace { // Convert EdgeArray structure to PackedFunc. -PackedFunc ConvertEdgeArrayToPackedFunc(const Graph::EdgeArray& ea) { +template +PackedFunc ConvertEdgeArrayToPackedFunc(const EdgeArray& ea) { auto body = [ea] (DGLArgs args, DGLRetValue* rv) { const int which = args[0]; if (which == 0) { @@ -53,8 +55,30 @@ PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) { return PackedFunc(body); } +// Convert Subgraph structure to PackedFunc. +PackedFunc ConvertSubgraphToPackedFunc(const ImmutableSubgraph& sg) { + auto body = [sg] (DGLArgs args, DGLRetValue* rv) { + const int which = args[0]; + if (which == 0) { + ImmutableGraph* gptr = new ImmutableGraph(); + *gptr = std::move(sg.graph); + GraphHandle ghandle = gptr; + *rv = ghandle; + } else if (which == 1) { + *rv = std::move(sg.induced_vertices); + } else if (which == 2) { + *rv = std::move(sg.induced_edges); + } else { + LOG(FATAL) << "invalid choice"; + } + }; + return PackedFunc(body); +} + } // namespace +///////////////////////////// Graph API /////////////////////////////////// + DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate") .set_body([] (DGLArgs args, DGLRetValue* rv) { bool multigraph = static_cast(args[0]); @@ -147,13 +171,6 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID") *rv = GraphOp::MapParentIdToSubgraphId(parent_vids, query); }); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLExpandIds") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - const IdArray ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); - const IdArray offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - *rv = GraphOp::ExpandIds(ids, offsets); - }); - DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; @@ -367,4 +384,210 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") *rv = lghandle; }); +///////////////////////////// Immutable Graph API /////////////////////////////////// + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphCreate") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); + const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); + bool multigraph = static_cast(args[3]); + GraphHandle ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, multigraph); + *rv = ghandle; + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphFree") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + ImmutableGraph* gptr = static_cast(ghandle); + delete gptr; + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphIsMultigraph") +.set_body([] (DGLArgs args, DGLRetValue *rv) { + GraphHandle ghandle = args[0]; + // NOTE: not const since we have caches + const ImmutableGraph* gptr = static_cast(ghandle); + *rv = gptr->IsMultigraph(); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphNumVertices") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + *rv = static_cast(gptr->NumVertices()); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphNumEdges") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + *rv = static_cast(gptr->NumEdges()); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphHasVertex") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const dgl_id_t vid = args[1]; + *rv = gptr->HasVertex(vid); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphHasVertices") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + *rv = gptr->HasVertices(vids); + }); + +DGL_REGISTER_GLOBAL("immutable_immutable_graph_index._CAPI_DGLExpandIds") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + const IdArray ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); + const IdArray offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + *rv = GraphOp::ExpandIds(ids, offsets); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphHasEdgeBetween") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const dgl_id_t src = args[1]; + const dgl_id_t dst = args[2]; + *rv = gptr->HasEdgeBetween(src, dst); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphHasEdgesBetween") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); + *rv = gptr->HasEdgesBetween(src, dst); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphPredecessors") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const dgl_id_t vid = args[1]; + const uint64_t radius = args[2]; + *rv = gptr->Predecessors(vid, radius); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphSuccessors") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const dgl_id_t vid = args[1]; + const uint64_t radius = args[2]; + *rv = gptr->Successors(vid, radius); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphEdgeId") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const dgl_id_t src = args[1]; + const dgl_id_t dst = args[2]; + *rv = gptr->EdgeId(src, dst); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphEdgeIds") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); + *rv = ConvertEdgeArrayToPackedFunc(gptr->EdgeIds(src, dst)); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphInEdges_1") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const dgl_id_t vid = args[1]; + *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vid)); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphInEdges_2") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vids)); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphOutEdges_1") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const dgl_id_t vid = args[1]; + *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vid)); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphOutEdges_2") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vids)); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphEdges") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const bool sorted = args[1]; + *rv = ConvertEdgeArrayToPackedFunc(gptr->Edges(sorted)); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphInDegree") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const dgl_id_t vid = args[1]; + *rv = static_cast(gptr->InDegree(vid)); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphInDegrees") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + *rv = gptr->InDegrees(vids); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphOutDegree") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const dgl_id_t vid = args[1]; + *rv = static_cast(gptr->OutDegree(vid)); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphOutDegrees") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + *rv = gptr->OutDegrees(vids); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphVertexSubgraph") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph* gptr = static_cast(ghandle); + const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + *rv = ConvertSubgraphToPackedFunc(gptr->VertexSubgraph(vids)); + }); + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphEdgeSubgraph") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + const ImmutableGraph *gptr = static_cast(ghandle); + const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + *rv = ConvertSubgraphToPackedFunc(gptr->EdgeSubgraph(eids)); + }); + } // namespace dgl From 7b0c433b58a4382dffeb045048d3bf6ddaeacb8e Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 4 Jan 2019 19:21:44 +0800 Subject: [PATCH 05/75] fix. --- include/dgl/immutable_graph.h | 12 +- python/dgl/immutable_graph_index.py | 207 +++++++++--------- src/graph/graph_apis.cc | 3 +- src/graph/immutable_graph.cc | 28 ++- .../{test_basics.py => test_graph_index.py} | 0 5 files changed, 125 insertions(+), 125 deletions(-) rename tests/graph_index/{test_basics.py => test_graph_index.py} (100%) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index cd758fc5066c..ebf7aeb31801 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -69,10 +69,11 @@ class ImmutableGraph { } std::shared_ptr Transpose() const; std::pair, IdArray> VertexSubgraph(IdArray vids) const; - static std::shared_ptr from_edges(std::vector &edges, int sort_on); + static std::shared_ptr from_edges(std::vector &edges, int sort_on, int64_t num_nodes); }; - ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, bool multigraph = false); + ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, + bool multigraph = false); ImmutableGraph(std::shared_ptr in_csr, std::shared_ptr out_csr, bool multigraph = false) : is_multigraph_(multigraph) { @@ -212,7 +213,9 @@ class ImmutableGraph { * \return the id arrays of the two endpoints of the edges. */ EdgeArray OutEdges(dgl_id_t vid) const { - return this->GetOutCSR()->GetEdges(vid); + auto ret = this->GetOutCSR()->GetEdges(vid); + // We should reverse the source and destination in the edge array. + return ImmutableGraph::EdgeArray{ret.dst, ret.src, ret.id}; } /*! @@ -221,7 +224,8 @@ class ImmutableGraph { * \return the id arrays of the two endpoints of the edges. */ EdgeArray OutEdges(IdArray vids) const { - return this->GetOutCSR()->GetEdges(vids); + auto ret = this->GetOutCSR()->GetEdges(vids); + return ImmutableGraph::EdgeArray{ret.dst, ret.src, ret.id}; } /*! diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py index 2cac299d6174..aadb3e79019d 100644 --- a/python/dgl/immutable_graph_index.py +++ b/python/dgl/immutable_graph_index.py @@ -22,14 +22,23 @@ class ImmutableGraphIndex(object): ---------- backend_csr: a csr array provided by the backend framework. """ - def __init__(self, backend_sparse): - self._sparse = backend_sparse + def __init__(self): + self._handle = None self._num_nodes = None self._num_edges = None - self._in_deg = None - self._out_deg = None self._cache = {} + def init(self, src_ids, dst_ids, edge_ids, num_nodes): + self._handle = _CAPI_DGLGraphCreate(src_ids.todgltensor(), dst_ids.todgltensor(), + edge_ids.todgltensor(), False, num_nodes) + self._num_nodes = num_nodes + self._num_edges = None + + def __del__(self): + """Free this graph index object.""" + if self._handle is not None: + _CAPI_DGLGraphFree(self._handle) + def add_nodes(self, num): """Add nodes. @@ -76,8 +85,7 @@ def is_multigraph(self): bool True if it is a multigraph, False otherwise. """ - # Immutable graph doesn't support multi-edge. - return False + return bool(_CAPI_DGLGraphIsMultigraph(self._handle)) def is_readonly(self): """Indicate whether the graph index is read-only. @@ -98,7 +106,7 @@ def number_of_nodes(self): The number of nodes """ if self._num_nodes is None: - self._num_nodes = self._sparse.number_of_nodes() + self._num_nodes = _CAPI_DGLGraphNumVertices(self._handle) return self._num_nodes def number_of_edges(self): @@ -110,7 +118,7 @@ def number_of_edges(self): The number of edges """ if self._num_edges is None: - self._num_edges = self._sparse.number_of_edges() + self._num_edges = _CAPI_DGLGraphNumEdges(self._handle) return self._num_edges def has_node(self, vid): @@ -126,7 +134,7 @@ def has_node(self, vid): bool True if the node exists """ - return vid < self.number_of_nodes() + return bool(_CAPI_DGLGraphHasVertex(self._handle, vid)) def has_nodes(self, vids): """Return true if the nodes exist. @@ -141,8 +149,8 @@ def has_nodes(self, vids): utils.Index 0-1 array indicating existence """ - vid_array = vids.tousertensor() - return utils.toindex(vid_array < self.number_of_nodes()) + vid_array = vids.todgltensor() + return utils.toindex(_CAPI_DGLGraphHasVertices(self._handle, vid_array)) def has_edge_between(self, u, v): """Return true if the edge exists. @@ -159,9 +167,7 @@ def has_edge_between(self, u, v): bool True if the edge exists """ - u = F.tensor([u], dtype=F.int64) - v = F.tensor([v], dtype=F.int64) - return self._sparse.has_edges(u, v).asnumpy()[0] + return bool(_CAPI_DGLGraphHasEdgeBetween(self._handle, u, v)) def has_edges_between(self, u, v): """Return true if the edge exists. @@ -178,8 +184,9 @@ def has_edges_between(self, u, v): utils.Index 0-1 array indicating existence """ - ret = self._sparse.has_edges(u.tousertensor(), v.tousertensor()) - return utils.toindex(ret) + u_array = u.todgltensor() + v_array = v.todgltensor() + return utils.toindex(_CAPI_DGLGraphHasEdgesBetween(self._handle, u_array, v_array)) def predecessors(self, v, radius=1): """Return the predecessors of the node. @@ -196,8 +203,7 @@ def predecessors(self, v, radius=1): utils.Index Array of predecessors """ - pred = self._sparse.predecessors(v, radius) - return utils.toindex(pred) + return utils.toindex(_CAPI_DGLGraphPredecessors(self._handle, v, radius)) def successors(self, v, radius=1): """Return the successors of the node. @@ -214,8 +220,7 @@ def successors(self, v, radius=1): utils.Index Array of successors """ - succ = self._sparse.successors(v, radius) - return utils.toindex(succ) + return utils.toindex(_CAPI_DGLGraphSuccessors(self._handle, v, radius)) def edge_id(self, u, v): """Return the id of the edge. @@ -232,10 +237,7 @@ def edge_id(self, u, v): int The edge id. """ - u = F.tensor([u], dtype=F.int64) - v = F.tensor([v], dtype=F.int64) - _, _, eid = self._sparse.edge_ids(u, v) - return utils.toindex(eid) + return utils.toindex(_CAPI_DGLGraphEdgeId(self._handle, u, v)) def edge_ids(self, u, v): """Return the edge ids. @@ -256,10 +258,15 @@ def edge_ids(self, u, v): utils.Index The edge ids. """ - u = u.tousertensor() - v = v.tousertensor() - u, v, ids = self._sparse.edge_ids(u, v) - return utils.toindex(u), utils.toindex(v), utils.toindex(ids) + u_array = u.todgltensor() + v_array = v.todgltensor() + edge_array = _CAPI_DGLGraphEdgeIds(self._handle, u_array, v_array) + + src = utils.toindex(edge_array(0)) + dst = utils.toindex(edge_array(1)) + eid = utils.toindex(edge_array(2)) + + return src, dst, eid def find_edges(self, eid): """Return a triplet of arrays that contains the edge IDs. @@ -297,11 +304,15 @@ def in_edges(self, v): utils.Index The edge ids. """ - dst = v.tousertensor() - indptr, src, edges = self._sparse.in_edges(dst) - off = utils.toindex(indptr) - dst = _CAPI_DGLExpandIds(v.todgltensor(), off.todgltensor()) - return utils.toindex(src), utils.toindex(dst), utils.toindex(edges) + if len(v) == 1: + edge_array = _CAPI_DGLGraphInEdges_1(self._handle, v[0]) + else: + v_array = v.todgltensor() + edge_array = _CAPI_DGLGraphInEdges_2(self._handle, v_array) + src = utils.toindex(edge_array(0)) + dst = utils.toindex(edge_array(1)) + eid = utils.toindex(edge_array(2)) + return src, dst, eid def out_edges(self, v): """Return the out edges of the node(s). @@ -320,13 +331,18 @@ def out_edges(self, v): utils.Index The edge ids. """ - src = v.tousertensor() - indptr, dst, edges = self._sparse.out_edges(src) - off = utils.toindex(indptr) - src = _CAPI_DGLExpandIds(v.todgltensor(), off.todgltensor()) - return utils.toindex(src), utils.toindex(dst), utils.toindex(edges) - - def edges(self, return_sorted=False): + if len(v) == 1: + edge_array = _CAPI_DGLGraphOutEdges_1(self._handle, v[0]) + else: + v_array = v.todgltensor() + edge_array = _CAPI_DGLGraphOutEdges_2(self._handle, v_array) + src = utils.toindex(edge_array(0)) + dst = utils.toindex(edge_array(1)) + eid = utils.toindex(edge_array(2)) + return src, dst, eid + + @utils.cached_member(cache='_cache', prefix='edges') + def edges(self, sorted=False): """Return all the edges Parameters @@ -343,21 +359,14 @@ def edges(self, return_sorted=False): utils.Index The edge ids. """ - if "all_edges" in self._cache: - return self._cache["all_edges"] - src, dst, edges = self._sparse.edges(return_sorted) - self._cache["all_edges"] = (utils.toindex(src), utils.toindex(dst), utils.toindex(edges)) - return self._cache["all_edges"] - - def _get_in_degree(self): - if 'in_deg' not in self._cache: - self._cache['in_deg'] = self._sparse.get_in_degree() - return self._cache['in_deg'] - - def _get_out_degree(self): - if 'out_deg' not in self._cache: - self._cache['out_deg'] = self._sparse.get_out_degree() - return self._cache['out_deg'] + key = 'edges_s%d' % sorted + if key not in self._cache: + edge_array = _CAPI_DGLGraphEdges(self._handle, sorted) + src = utils.toindex(edge_array(0)) + dst = utils.toindex(edge_array(1)) + eid = utils.toindex(edge_array(2)) + self._cache[key] = (src, dst, eid) + return self._cache[key] def in_degree(self, v): """Return the in degree of the node. @@ -372,8 +381,7 @@ def in_degree(self, v): int The in degree. """ - deg = self._get_in_degree() - return deg[v] + return _CAPI_DGLGraphInDegree(self._handle, v) def in_degrees(self, v): """Return the in degrees of the nodes. @@ -388,12 +396,8 @@ def in_degrees(self, v): int The in degree array. """ - deg = self._get_in_degree() - if v.is_slice(0, self.number_of_nodes()): - return utils.toindex(deg) - else: - v_array = v.tousertensor() - return utils.toindex(F.gather_row(deg, v_array)) + v_array = v.todgltensor() + return utils.toindex(_CAPI_DGLGraphInDegrees(self._handle, v_array)) def out_degree(self, v): """Return the out degree of the node. @@ -408,8 +412,7 @@ def out_degree(self, v): int The out degree. """ - deg = self._get_out_degree() - return deg[v] + return _CAPI_DGLGraphOutDegree(self._handle, v) def out_degrees(self, v): """Return the out degrees of the nodes. @@ -424,12 +427,8 @@ def out_degrees(self, v): int The out degree array. """ - deg = self._get_out_degree() - if v.is_slice(0, self.number_of_nodes()): - return utils.toindex(deg) - else: - v_array = v.tousertensor() - return utils.toindex(F.gather_row(deg, v_array)) + v_array = v.todgltensor() + return utils.toindex(_CAPI_DGLGraphOutDegrees(self._handle, v_array)) def node_subgraph(self, v): """Return the induced node subgraph. @@ -444,9 +443,10 @@ def node_subgraph(self, v): ImmutableSubgraphIndex The subgraph index. """ - v = v.tousertensor() - gidx, induced_n, induced_e = self._sparse.node_subgraph(v) - return ImmutableSubgraphIndex(gidx, self, induced_n, induced_e) + v_array = v.todgltensor() + rst = _CAPI_DGLGraphVertexSubgraph(self._handle, v_array) + induced_edges = utils.toindex(rst(2)) + return SubgraphIndex(rst(0), self, v, induced_edges) def node_subgraphs(self, vs_arr): """Return the induced node subgraphs. @@ -461,10 +461,8 @@ def node_subgraphs(self, vs_arr): a vector of ImmutableSubgraphIndex The subgraph index. """ - vs_arr = [v.tousertensor() for v in vs_arr] - gis, induced_nodes, induced_edges = self._sparse.node_subgraphs(vs_arr) - return [ImmutableSubgraphIndex(gidx, self, induced_n, induced_e) - for gidx, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)] + #TODO + pass def edge_subgraph(self, e): """Return the induced edge subgraph. @@ -584,8 +582,7 @@ def from_networkx(self, nx_graph): The nx graph """ if not isinstance(nx_graph, nx.Graph): - nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph() - else nx.DiGraph(nx_graph)) + nx_graph = nx.DiGraph(nx_graph) else: if not nx_graph.is_directed(): # to_directed creates a deep copy of the networkx graph even if @@ -614,14 +611,10 @@ def from_networkx(self, nx_graph): num_nodes = nx_graph.number_of_nodes() # We store edge Ids as an edge attribute. - eid = F.tensor(eid, dtype=np.int32) - src = F.tensor(src, dtype=np.int64) - dst = F.tensor(dst, dtype=np.int64) - out_csr, _ = F.sparse_matrix(eid, ('coo', (src, dst)), (num_nodes, num_nodes)) - in_csr, _ = F.sparse_matrix(eid, ('coo', (dst, src)), (num_nodes, num_nodes)) - out_csr = out_csr.astype(np.int64) - in_csr = in_csr.astype(np.int64) - self._sparse = F.create_immutable_graph_index(in_csr, out_csr) + eid = utils.toindex(eid) + src = utils.toindex(src) + dst = utils.toindex(dst) + self.init(src_ids, dst_ids, edge_ids, num_nodes) def from_scipy_sparse_matrix(self, adj): """Convert from scipy sparse matrix. @@ -632,10 +625,15 @@ def from_scipy_sparse_matrix(self, adj): ---------- adj : scipy sparse matrix """ - if not isinstance(adj, (sp.csr_matrix, sp.coo_matrix)): - raise DGLError("The input matrix has to be a SciPy sparse matrix.") + assert isinstance(adj, sp.csr_matrix) or isinstance(adj, sp.coo_matrix), \ + "The input matrix has to be a SciPy sparse matrix." + assert adj.shape[0] == adj.shape[1], \ + "We only support symmetric matrices" out_mat = adj.tocoo() - self._sparse.from_coo_matrix(out_mat) + src_ids = utils.toindex(out_mat.row) + dst_ids = utils.toindex(out_mat.col) + edge_ids = utils.toindex(F.arange(0, len(out_mat.row))) + self.init(src_ids, dst_ids, edge_ids, adj.shape[0]) def from_edge_list(self, elist): """Convert from an edge list. @@ -645,7 +643,15 @@ def from_edge_list(self, elist): elist : list List of (u, v) edge tuple. """ - self._sparse.from_edge_list(elist) + src, dst = zip(*elist) + src = np.array(src) + dst = np.array(dst) + src_ids = utils.toindex(src) + dst_ids = utils.toindex(dst) + num_nodes = max(src.max(), dst.max()) + 1 + edge_ids = utils.toindex(F.arange(0, len(src))) + # TODO we need to detect multigraph automatically. + self.init(src_ids, dst_ids, edge_ids, num_nodes) def line_graph(self, backtracking=True): """Return the line graph of this graph. @@ -762,25 +768,16 @@ def create_immutable_graph_index(graph_data=None): assert F.create_immutable_graph_index is not None, \ "The selected backend doesn't support read-only graph!" - try: - # Let's try using the graph data to generate an immutable graph index. - # If we are successful, we can return the immutable graph index immediately. - # If graph_data is None, we return an empty graph index. - # If we can't create a graph index, we'll use the code below to handle the graph. - return ImmutableGraphIndex(F.create_immutable_graph_index(graph_data)) - except Exception: # pylint: disable=broad-except - pass - # Let's create an empty graph index first. - gidx = ImmutableGraphIndex(F.create_immutable_graph_index()) + gi = ImmutableGraphIndex() # edge list if isinstance(graph_data, (list, tuple)): try: - gidx.from_edge_list(graph_data) - return gidx + gi.from_edge_list(graph_data) + return gi except Exception: # pylint: disable=broad-except - raise DGLError('Graph data is not a valid edge list.') + raise DGLError('Graph data is not a valid edge list for immutable_graph_index.') # scipy format if isinstance(graph_data, sp.spmatrix): diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index c1fe3279f143..1d4a4183c5b5 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -392,7 +392,8 @@ DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphCreate") const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); bool multigraph = static_cast(args[3]); - GraphHandle ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, multigraph); + int64_t num_nodes = static_cast(args[4]); + GraphHandle ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, num_nodes, multigraph); *rv = ghandle; }); diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 53e3e6da0e90..b94cce56b178 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -173,7 +173,7 @@ std::pair, IdArray> ImmutableGraph::csr::Ve } std::shared_ptr ImmutableGraph::csr::from_edges(std::vector &edges, - int sort_on) { + int sort_on, int64_t num_nodes) { assert(sort_on == 0 || sort_on == 1); int other_end = sort_on == 1 ? 0 : 1; // TODO(zhengda) we should sort in parallel. @@ -195,23 +195,18 @@ std::shared_ptr ImmutableGraph::csr::from_edges(std::vector std::shared_ptr t = std::make_shared(0, 0); t->indices.resize(edges.size()); t->edge_ids.resize(edges.size()); - t->indptr.push_back(0); for (size_t i = 0; i < edges.size(); i++) { t->indices[i] = edges[i].end_points[other_end]; + assert(t->indices[i] < num_nodes); t->edge_ids[i] = edges[i].edge_id; dgl_id_t vid = edges[i].end_points[sort_on]; - int64_t off; - if (t->indptr.empty()) - off = 0; - else - off = t->indptr.back(); - while (vid > 0 && t->indptr.size() < static_cast(vid - 1)) - t->indptr.push_back(off); - if (t->indptr.size() < vid) + assert(vid < num_nodes); + while (vid > 0 && t->indptr.size() <= static_cast(vid)) t->indptr.push_back(i); assert(t->indptr.size() == vid + 1); } - t->indptr.push_back(edges.size()); + while (t->indptr.size() < num_nodes + 1) + t->indptr.push_back(edges.size()); return t; } @@ -228,10 +223,10 @@ std::shared_ptr ImmutableGraph::csr::Transpose() const { edges[indptr[i] + j] = e; } } - return from_edges(edges, 1); + return from_edges(edges, 1, NumVertices()); } -ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, +ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, bool multigraph) : is_multigraph_(multigraph) { int64_t len = src_ids->shape[0]; assert(len == dst_ids->shape[0]); @@ -247,8 +242,8 @@ ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_id e.edge_id = edge_data[i]; edges[i] = e; } - in_csr_ = csr::from_edges(edges, 1); - out_csr_ = csr::from_edges(edges, 0); + in_csr_ = csr::from_edges(edges, 1, num_nodes); + out_csr_ = csr::from_edges(edges, 0, num_nodes); } BoolArray ImmutableGraph::HasVertices(IdArray vids) const { @@ -450,8 +445,11 @@ ImmutableGraph::EdgeArray ImmutableGraph::Edges(bool sorted) const { ImmutableSubgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { ImmutableSubgraph subg; std::pair, IdArray> ret; + // We prefer to generate a subgraph for in-csr first. if (in_csr_) { ret = in_csr_->VertexSubgraph(vids); + // When we generate a subgraph, it may be used by only accessing in-edges or out-edges. + // We don't need to generate both. subg.graph = ImmutableGraph(ret.first, nullptr, IsMultigraph()); } else { assert(out_csr_); diff --git a/tests/graph_index/test_basics.py b/tests/graph_index/test_graph_index.py similarity index 100% rename from tests/graph_index/test_basics.py rename to tests/graph_index/test_graph_index.py From 665c4c06b696be1fbead41dcd60d1b5a80dadd0d Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 4 Jan 2019 20:56:49 +0800 Subject: [PATCH 06/75] impl get adjacency matrix. --- include/dgl/immutable_graph.h | 7 ++++++ python/dgl/immutable_graph_index.py | 11 ++++++--- src/graph/graph_apis.cc | 32 ++++++++++++++++++++++++++ src/graph/immutable_graph.cc | 35 +++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 3 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index ebf7aeb31801..1cf42b01b19e 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -28,6 +28,10 @@ class ImmutableGraph { IdArray src, dst, id; } EdgeArray; + typedef struct { + IdArray indptr, indices, id; + } CSRArray; + struct edge { dgl_id_t end_points[2]; dgl_id_t edge_id; @@ -366,6 +370,9 @@ class ImmutableGraph { in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]); } + CSRArray GetInCSRArray() const; + CSRArray GetOutCSRArray() const; + protected: std::pair GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; std::pair GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py index aadb3e79019d..e9eab5af6c1a 100644 --- a/python/dgl/immutable_graph_index.py +++ b/python/dgl/immutable_graph_index.py @@ -509,13 +509,18 @@ def adjacency_matrix(self, transpose=False, ctx=F.cpu()): Returns ------- - utils.CtxCachedObject - An object that returns tensor given context. + SparseTensor + The adjacency matrix. utils.Index A index for data shuffling due to sparse format change. Return None if shuffle is not required. """ - return self._sparse.adjacency_matrix(transpose, ctx), None + rst = _CAPI_DGLGraphGetCSR(self._handle, transpose) + indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) + indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) + dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) + return F.sparse_matrix(dat, ('csr', indices, indptr), + (self.number_of_nodes(), self.number_of_nodes())) def incidence_matrix(self, typestr, ctx): """Return the incidence matrix representation of this graph. diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index 1d4a4183c5b5..a4b42065a5a3 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -35,6 +35,24 @@ PackedFunc ConvertEdgeArrayToPackedFunc(const EdgeArray& ea) { return PackedFunc(body); } +// Convert CSRArray structure to PackedFunc. +template +PackedFunc ConvertCSRArrayToPackedFunc(const CSRArray& ea) { + auto body = [ea] (DGLArgs args, DGLRetValue* rv) { + const int which = args[0]; + if (which == 0) { + *rv = std::move(ea.indptr); + } else if (which == 1) { + *rv = std::move(ea.indices); + } else if (which == 2) { + *rv = std::move(ea.id); + } else { + LOG(FATAL) << "invalid choice"; + } + }; + return PackedFunc(body); +} + // Convert Subgraph structure to PackedFunc. PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) { auto body = [sg] (DGLArgs args, DGLRetValue* rv) { @@ -591,4 +609,18 @@ DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphEdgeSubgraph") *rv = ConvertSubgraphToPackedFunc(gptr->EdgeSubgraph(eids)); }); +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphGetCSR") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + bool transpose = args[1]; + const ImmutableGraph *gptr = static_cast(ghandle); + ImmutableGraph::CSRArray csr; + if (transpose) { + csr = gptr->GetOutCSRArray(); + } else { + csr = gptr->GetInCSRArray(); + } + *rv = ConvertCSRArrayToPackedFunc(csr); + }); + } // namespace dgl diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index b94cce56b178..178867678992 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -207,6 +207,7 @@ std::shared_ptr ImmutableGraph::csr::from_edges(std::vector } while (t->indptr.size() < num_nodes + 1) t->indptr.push_back(edges.size()); + assert(t->indptr.size() == num_nodes + 1); return t; } @@ -465,4 +466,38 @@ ImmutableSubgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const { return ImmutableSubgraph(); } +ImmutableGraph::CSRArray ImmutableGraph::GetInCSRArray() const { + auto in_csr = GetInCSR(); + IdArray indptr = IdArray::Empty({static_cast(in_csr->indptr.size())}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray indices = IdArray::Empty({static_cast(in_csr->NumEdges())}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray eids = IdArray::Empty({static_cast(in_csr->NumEdges())}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + int64_t *indptr_data = static_cast(indptr->data); + dgl_id_t* indices_data = static_cast(indices->data); + dgl_id_t* eid_data = static_cast(eids->data); + std::copy(in_csr->indptr.begin(), in_csr->indptr.end(), indptr_data); + std::copy(in_csr->indices.begin(), in_csr->indices.end(), indices_data); + std::copy(in_csr->edge_ids.begin(), in_csr->edge_ids.end(), eid_data); + return CSRArray{indptr, indices, eids}; +} + +ImmutableGraph::CSRArray ImmutableGraph::GetOutCSRArray() const { + auto out_csr = GetOutCSR(); + IdArray indptr = IdArray::Empty({static_cast(out_csr->indptr.size())}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray indices = IdArray::Empty({static_cast(out_csr->NumEdges())}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray eids = IdArray::Empty({static_cast(out_csr->NumEdges())}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + int64_t *indptr_data = static_cast(indptr->data); + dgl_id_t* indices_data = static_cast(indices->data); + dgl_id_t* eid_data = static_cast(eids->data); + std::copy(out_csr->indptr.begin(), out_csr->indptr.end(), indptr_data); + std::copy(out_csr->indices.begin(), out_csr->indices.end(), indices_data); + std::copy(out_csr->edge_ids.begin(), out_csr->edge_ids.end(), eid_data); + return CSRArray{indptr, indices, eids}; +} + } // namespace dgl From 21c8e8e227d08ff3ee742d8f1bff2736dd626eb8 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 4 Jan 2019 21:46:44 +0800 Subject: [PATCH 07/75] fix. --- python/dgl/immutable_graph_index.py | 21 ++++++++++----------- src/graph/immutable_graph.cc | 12 ++++++------ tests/mxnet/test_graph_index.py | 16 ++++++++++++++++ 3 files changed, 32 insertions(+), 17 deletions(-) diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py index e9eab5af6c1a..65708d9a29e6 100644 --- a/python/dgl/immutable_graph_index.py +++ b/python/dgl/immutable_graph_index.py @@ -22,8 +22,8 @@ class ImmutableGraphIndex(object): ---------- backend_csr: a csr array provided by the backend framework. """ - def __init__(self): - self._handle = None + def __init__(self, handle): + self._handle = handle self._num_nodes = None self._num_edges = None self._cache = {} @@ -446,7 +446,7 @@ def node_subgraph(self, v): v_array = v.todgltensor() rst = _CAPI_DGLGraphVertexSubgraph(self._handle, v_array) induced_edges = utils.toindex(rst(2)) - return SubgraphIndex(rst(0), self, v, induced_edges) + return ImmutableSubgraphIndex(rst(0), self, v, induced_edges) def node_subgraphs(self, vs_arr): """Return the induced node subgraphs. @@ -461,8 +461,8 @@ def node_subgraphs(self, vs_arr): a vector of ImmutableSubgraphIndex The subgraph index. """ - #TODO - pass + # TODO(zhengda) we should parallelize the computation here in CAPI. + return [self.node_subgraph(v) for v in vs_arr] def edge_subgraph(self, e): """Return the induced edge subgraph. @@ -632,13 +632,12 @@ def from_scipy_sparse_matrix(self, adj): """ assert isinstance(adj, sp.csr_matrix) or isinstance(adj, sp.coo_matrix), \ "The input matrix has to be a SciPy sparse matrix." - assert adj.shape[0] == adj.shape[1], \ - "We only support symmetric matrices" + num_nodes = max(adj.shape[0], adj.shape[1]) out_mat = adj.tocoo() src_ids = utils.toindex(out_mat.row) dst_ids = utils.toindex(out_mat.col) edge_ids = utils.toindex(F.arange(0, len(out_mat.row))) - self.init(src_ids, dst_ids, edge_ids, adj.shape[0]) + self.init(src_ids, dst_ids, edge_ids, num_nodes) def from_edge_list(self, elist): """Convert from an edge list. @@ -688,8 +687,8 @@ class ImmutableSubgraphIndex(ImmutableGraphIndex): induced_edges : a lambda function that returns a tensor The parent edge ids in this subgraph. """ - def __init__(self, backend_sparse, parent, induced_nodes, induced_edges): - super(ImmutableSubgraphIndex, self).__init__(backend_sparse) + def __init__(self, handle, parent, induced_nodes, induced_edges): + super(ImmutableSubgraphIndex, self).__init__(handle) self._parent = parent self._induced_nodes = induced_nodes @@ -774,7 +773,7 @@ def create_immutable_graph_index(graph_data=None): "The selected backend doesn't support read-only graph!" # Let's create an empty graph index first. - gi = ImmutableGraphIndex() + gi = ImmutableGraphIndex(None) # edge list if isinstance(graph_data, (list, tuple)): diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 178867678992..9bbd8060cd54 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -446,16 +446,16 @@ ImmutableGraph::EdgeArray ImmutableGraph::Edges(bool sorted) const { ImmutableSubgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { ImmutableSubgraph subg; std::pair, IdArray> ret; - // We prefer to generate a subgraph for in-csr first. - if (in_csr_) { + // We prefer to generate a subgraph for out-csr first. + if (out_csr_) { + ret = out_csr_->VertexSubgraph(vids); + subg.graph = ImmutableGraph(nullptr, ret.first, IsMultigraph()); + } else { + assert(in_csr_); ret = in_csr_->VertexSubgraph(vids); // When we generate a subgraph, it may be used by only accessing in-edges or out-edges. // We don't need to generate both. subg.graph = ImmutableGraph(ret.first, nullptr, IsMultigraph()); - } else { - assert(out_csr_); - ret = out_csr_->VertexSubgraph(vids); - subg.graph = ImmutableGraph(nullptr, ret.first, IsMultigraph()); } subg.induced_vertices = vids; subg.induced_edges = ret.second; diff --git a/tests/mxnet/test_graph_index.py b/tests/mxnet/test_graph_index.py index d92a7fed6543..0f055491e4fd 100644 --- a/tests/mxnet/test_graph_index.py +++ b/tests/mxnet/test_graph_index.py @@ -7,6 +7,12 @@ from dgl.graph_index import map_to_subgraph_nid, GraphIndex, create_graph_index from dgl import utils +def generate_from_edgelist(): + edges = [[2, 3], [2, 5], [3, 0], [6, 10], [10, 3], [10, 15]] + g = create_graph_index(edges) + ig = create_graph_index(edges, readonly=True) + return g, ig + def generate_rand_graph(n): arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64) g = create_graph_index(arr) @@ -19,6 +25,8 @@ def check_graph_equal(g1, g2): assert mx.nd.sum(adj1 - adj2).asnumpy() == 0 def test_graph_gen(): + g, ig = generate_from_edgelist() + check_graph_equal(g, ig) g, ig = generate_rand_graph(10) check_graph_equal(g, ig) @@ -28,6 +36,9 @@ def check_basics(g, ig): edges = g.edges() iedges = ig.edges() + assert np.all(edges[0].tousertensor().asnumpy() == iedges[0].tousertensor().asnumpy()) + assert np.all(edges[1].tousertensor().asnumpy() == iedges[1].tousertensor().asnumpy()) + assert np.all(edges[2].tousertensor().asnumpy() == iedges[2].tousertensor().asnumpy()) for i in range(g.number_of_nodes()): assert g.has_node(i) == ig.has_node(i) @@ -72,6 +83,8 @@ def check_basics(g, ig): def test_basics(): + g, ig = generate_from_edgelist() + check_basics(g, ig) g, ig = generate_rand_graph(100) check_basics(g, ig) @@ -84,6 +97,7 @@ def test_node_subgraph(): randv = np.unique(randv1) subg = g.node_subgraph(utils.toindex(randv)) subig = ig.node_subgraph(utils.toindex(randv)) + check_basics(subg, subig) check_graph_equal(subg, subig) assert mx.nd.sum(map_to_subgraph_nid(subg, utils.toindex(randv1[0:10])).tousertensor() == map_to_subgraph_nid(subig, utils.toindex(randv1[0:10])).tousertensor()) == 10 @@ -97,6 +111,7 @@ def test_node_subgraph(): subgs.append(g.node_subgraph(utils.toindex(randv))) subigs= ig.node_subgraphs(randvs) for i in range(4): + check_basics(subg, subig) check_graph_equal(subgs[i], subigs[i]) def test_create_graph(): @@ -110,6 +125,7 @@ def test_create_graph(): rows = [1, 0, 0] cols = [2, 1, 2] mat = sp.sparse.coo_matrix((data, (rows, cols))) + g = dgl.DGLGraph(mat, readonly=False) ig = dgl.DGLGraph(mat, readonly=True) for edge in elist: assert g.edge_id(edge[0], edge[1]) == ig.edge_id(edge[0], edge[1]) From 78b9f29391f99f6c55913200c0cf083ce6cda1d5 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 4 Jan 2019 21:47:14 +0800 Subject: [PATCH 08/75] fix graph_index from scipy matrix. --- python/dgl/graph_index.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 4200b2f3236d..3fce05bf0b12 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -706,7 +706,8 @@ def from_scipy_sparse_matrix(self, adj): adj : scipy sparse matrix """ self.clear() - self.add_nodes(adj.shape[0]) + # what if the adj matrix isn't symmetric. + self.add_nodes(max(adj.shape[0], adj.shape[1])) adj_coo = adj.tocoo() src = utils.toindex(adj_coo.row) dst = utils.toindex(adj_coo.col) From 4c596900f00ffe4508e4b2677d3522dad9c9a72c Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 14:24:27 +0800 Subject: [PATCH 09/75] add neighbor sampling. --- include/dgl/immutable_graph.h | 23 ++ python/dgl/contrib/sampling/sampler.py | 16 +- python/dgl/immutable_graph_index.py | 47 ++- src/graph/graph_apis.cc | 61 ++++ src/graph/immutable_graph.cc | 401 +++++++++++++++++++++++++ 5 files changed, 525 insertions(+), 23 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 1cf42b01b19e..39ab685fa159 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -16,6 +16,7 @@ namespace dgl { struct ImmutableSubgraph; +struct SampledSubgraph; /*! * \brief Base dgl immutable graph index class. @@ -370,6 +371,9 @@ class ImmutableGraph { in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]); } + SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type, + int num_hops, int expand_factor) const; + CSRArray GetInCSRArray() const; CSRArray GetOutCSRArray() const; @@ -400,6 +404,10 @@ class ImmutableGraph { } } + SampledSubgraph SampleSubgraph(IdArray seed_arr, const float* probability, + const std::string &neigh_type, + int num_hops, size_t num_neighbor) const; + // Store the in-edges. std::shared_ptr in_csr_; // Store the out-edges. @@ -428,6 +436,21 @@ struct ImmutableSubgraph { IdArray induced_edges; }; +/*! + * \brief When we sample a subgraph, we need to store extra information, + * such as the layer Ids of the vertices and the sampling probability. + */ +struct SampledSubgraph: public ImmutableSubgraph { + /*! + * \brief the layer of a sampled vertex in the subgraph. + */ + IdArray layer_ids; + /*! + * \brief the probability that a vertex is sampled. + */ + runtime::NDArray sample_prob; +}; + } // namespace dgl #endif // DGL_IMMUTABLE_GRAPH_H_ diff --git a/python/dgl/contrib/sampling/sampler.py b/python/dgl/contrib/sampling/sampler.py index 5da4f6cb426b..88079120b11f 100644 --- a/python/dgl/contrib/sampling/sampler.py +++ b/python/dgl/contrib/sampling/sampler.py @@ -18,8 +18,7 @@ class NSSubgraphLoader(object): def __init__(self, g, batch_size, expand_factor, num_hops=1, neighbor_type='in', node_prob=None, seed_nodes=None, - shuffle=False, num_workers=1, max_subgraph_size=None, - return_seed_id=False): + shuffle=False, num_workers=1, return_seed_id=False): self._g = g if not g._graph.is_readonly(): raise NotImplementedError("subgraph loader only support read-only graphs.") @@ -38,11 +37,6 @@ def __init__(self, g, batch_size, expand_factor, num_hops=1, if shuffle: self._seed_nodes = F.rand_shuffle(self._seed_nodes) self._num_workers = num_workers - if max_subgraph_size is None: - # This size is set temporarily. - self._max_subgraph_size = 1000000 - else: - self._max_subgraph_size = max_subgraph_size self._neighbor_type = neighbor_type self._subgraphs = [] self._seed_ids = [] @@ -61,7 +55,7 @@ def _prefetch(self): self._subgraph_idx += 1 sgi = self._g._graph.neighbor_sampling(seed_ids, self._expand_factor, self._num_hops, self._neighbor_type, - self._node_prob, self._max_subgraph_size) + self._node_prob) subgraphs = [DGLSubGraph(self._g, i.induced_nodes, i.induced_edges, \ i) for i in sgi] self._subgraphs.extend(subgraphs) @@ -200,7 +194,7 @@ def __iter__(self): def NeighborSampler(g, batch_size, expand_factor, num_hops=1, neighbor_type='in', node_prob=None, seed_nodes=None, - shuffle=False, num_workers=1, max_subgraph_size=None, + shuffle=False, num_workers=1, return_seed_id=False, prefetch=False): '''Create a sampler that samples neighborhood. @@ -246,8 +240,6 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1, If it's None, the seed vertices are all vertices in the graph. shuffle: indicates the sampled subgraphs are shuffled. num_workers: the number of worker threads that sample subgraphs in parallel. - max_subgraph_size: the maximal subgraph size in terms of the number of nodes. - GPU doesn't support very large subgraphs. return_seed_id: indicates whether to return seed ids along with the subgraphs. The seed Ids are in the parent graph. prefetch : bool, default False @@ -260,7 +252,7 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1, information about the subgraphs. ''' loader = NSSubgraphLoader(g, batch_size, expand_factor, num_hops, neighbor_type, node_prob, - seed_nodes, shuffle, num_workers, max_subgraph_size, return_seed_id) + seed_nodes, shuffle, num_workers, return_seed_id) if not prefetch: return loader else: diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py index 65708d9a29e6..14e5c6e4e8ff 100644 --- a/python/dgl/immutable_graph_index.py +++ b/python/dgl/immutable_graph_index.py @@ -479,19 +479,21 @@ def edge_subgraph(self, e): """ raise NotImplementedError('immutable graph doesn\'t implement edge_subgraph for now.') - def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, - node_prob, max_subgraph_size): + def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, node_prob): """Neighborhood sampling""" if len(seed_ids) == 0: return [] - seed_ids = [v.tousertensor() for v in seed_ids] - gis, induced_nodes, induced_edges = self._sparse.neighbor_sampling(seed_ids, expand_factor, - num_hops, neighbor_type, - node_prob, - max_subgraph_size) - induced_nodes = [utils.toindex(v) for v in induced_nodes] - return [ImmutableSubgraphIndex(gidx, self, induced_n, induced_e) - for gidx, induced_n, induced_e in zip(gis, induced_nodes, induced_edges)] + + seed_ids = [v.todgltensor() for v in seed_ids] + num_subgs = len(seed_ids) + if node_prob is None: + rst = _DGLGraphUniformSampling(self, seed_ids, neighbor_type, num_hops, expand_factor) + else: + rst = _DGLGraphNonUniformSampling(self, node_prob, seed_ids, neighbor_type, num_hops, + expand_factor) + + return [ImmutableSubgraphIndex(rst(i), self, rst(num_subgs + i), + rst(num_subgs * 2 + i)) for i in range(num_subgs)] def adjacency_matrix(self, transpose=False, ctx=F.cpu()): """Return the adjacency matrix representation of this graph. @@ -703,7 +705,7 @@ def induced_edges(self): A lambda function that returns utils.Index The parent edge ids. """ - return lambda: utils.toindex(self._induced_edges()) + return utils.toindex(self._induced_edges) @property def induced_nodes(self): @@ -801,3 +803,26 @@ def create_immutable_graph_index(graph_data=None): return gidx _init_api("dgl.immutable_graph_index") + +_NeighborSamplingAPIs = { + 1: _CAPI_DGLGraphUniformSampling, + 2: _CAPI_DGLGraphUniformSampling2, + 4: _CAPI_DGLGraphUniformSampling4, + 8: _CAPI_DGLGraphUniformSampling8, + 16: _CAPI_DGLGraphUniformSampling16, + 32: _CAPI_DGLGraphUniformSampling32, + 64: _CAPI_DGLGraphUniformSampling64, + 128: _CAPI_DGLGraphUniformSampling128, +} + +_EmptyArrays = [utils.toindex(F.ones(shape=(0), dtype=F.int64, ctx=F.cpu()))] + +def _DGLGraphUniformSampling(gi, seed_ids, neigh_type, num_hops, expand_factor): + num_seeds = len(seed_ids) + empty_ids = [] + if len(seed_ids) > 1 and len(seed_ids) not in _NeighborSamplingAPIs.keys(): + remain = 2**int(math.ceil(math.log2(len(dgl_ids)))) - len(dgl_ids) + empty_ids = _EmptyArrays[0:remain] + seed_ids.extend([empty.todgltensor() for empty in empty_ids]) + assert len(seed_ids) in _NeighborSamplingAPIs.keys() + return _NeighborSamplingAPIs[len(seed_ids)](gi._handle, *seed_ids, neigh_type, num_hops, expand_factor, num_seeds) diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index a4b42065a5a3..d5a5ee3d6864 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -93,6 +93,30 @@ PackedFunc ConvertSubgraphToPackedFunc(const ImmutableSubgraph& sg) { return PackedFunc(body); } +// Convert Sampled Subgraph structures to PackedFunc. +PackedFunc ConvertSubgraphToPackedFunc(const std::vector& sg) { + auto body = [sg] (DGLArgs args, DGLRetValue* rv) { + const int which = args[0]; + if (which < sg.size()) { + ImmutableGraph* gptr = new ImmutableGraph(); + *gptr = std::move(sg[which].graph); + GraphHandle ghandle = gptr; + *rv = ghandle; + } else if (which >= sg.size() && which < sg.size() * 2) { + *rv = std::move(sg[which - sg.size()].induced_vertices); + } else if (which >= sg.size() * 2 && which < sg.size() * 3) { + *rv = std::move(sg[which - sg.size() * 2].induced_edges); + } else if (which >= sg.size() * 3 && which < sg.size() * 4) { + *rv = std::move(sg[which - sg.size() * 3].layer_ids); + } else if (which >= sg.size() * 4 && which < sg.size() * 5) { + *rv = std::move(sg[which - sg.size() * 4].sample_prob); + } else { + LOG(FATAL) << "invalid choice"; + } + }; + return PackedFunc(body); +} + } // namespace ///////////////////////////// Graph API /////////////////////////////////// @@ -623,4 +647,41 @@ DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphGetCSR") *rv = ConvertCSRArrayToPackedFunc(csr); }); +template +void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + std::vector seeds(num_seeds); + for (size_t i = 0; i < seeds.size(); i++) + seeds[i] = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[i + 1])); + std::string neigh_type = args[num_seeds + 1]; + int num_hops = args[num_seeds + 2]; + int num_neighbors = args[num_seeds + 3]; + int num_valid_seeds = args[num_seeds + 4]; + const ImmutableGraph *gptr = static_cast(ghandle); + assert(num_valid_seeds <= num_seeds); + std::vector subgs(seeds.size()); +#pragma omp parallel for + for (int i = 0; i < num_valid_seeds; i++) { + subgs[i] = gptr->NeighborUniformSample(seeds[i], neigh_type, num_hops, num_neighbors); + } + *rv = ConvertSubgraphToPackedFunc(subgs); +} + +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling") +.set_body(CAPI_NeighborUniformSample<1>); +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling2") +.set_body(CAPI_NeighborUniformSample<2>); +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling4") +.set_body(CAPI_NeighborUniformSample<4>); +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling8") +.set_body(CAPI_NeighborUniformSample<8>); +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling16") +.set_body(CAPI_NeighborUniformSample<16>); +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling32") +.set_body(CAPI_NeighborUniformSample<32>); +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling64") +.set_body(CAPI_NeighborUniformSample<64>); +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling128") +.set_body(CAPI_NeighborUniformSample<128>); + } // namespace dgl diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 9bbd8060cd54..aa4c63a8f0e9 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -500,4 +500,405 @@ ImmutableGraph::CSRArray ImmutableGraph::GetOutCSRArray() const { return CSRArray{indptr, indices, eids}; } +////////////////////////////// Graph Sampling /////////////////////////////// + +/* + * ArrayHeap is used to sample elements from vector + */ +class ArrayHeap { + public: + explicit ArrayHeap(const std::vector& prob) { + vec_size_ = prob.size(); + bit_len_ = ceil(log2(vec_size_)); + limit_ = 1 << bit_len_; + // allocate twice the size + heap_.resize(limit_ << 1, 0); + // allocate the leaves + for (int i = limit_; i < vec_size_+limit_; ++i) { + heap_[i] = prob[i-limit_]; + } + // iterate up the tree (this is O(m)) + for (int i = bit_len_-1; i >= 0; --i) { + for (int j = (1 << i); j < (1 << (i + 1)); ++j) { + heap_[j] = heap_[j << 1] + heap_[(j << 1) + 1]; + } + } + } + ~ArrayHeap() {} + + /* + * Remove term from index (this costs O(log m) steps) + */ + void Delete(size_t index) { + size_t i = index + limit_; + float w = heap_[i]; + for (int j = bit_len_; j >= 0; --j) { + heap_[i] -= w; + i = i >> 1; + } + } + + /* + * Add value w to index (this costs O(log m) steps) + */ + void Add(size_t index, float w) { + size_t i = index + limit_; + for (int j = bit_len_; j >= 0; --j) { + heap_[i] += w; + i = i >> 1; + } + } + + /* + * Sample from arrayHeap + */ + size_t Sample(unsigned int* seed) { + float xi = heap_[1] * (rand_r(seed)%100/101.0); + int i = 1; + while (i < limit_) { + i = i << 1; + if (xi >= heap_[i]) { + xi -= heap_[i]; + i += 1; + } + } + return i - limit_; + } + + /* + * Sample a vector by given the size n + */ + void SampleWithoutReplacement(size_t n, std::vector* samples, unsigned int* seed) { + // sample n elements + for (size_t i = 0; i < n; ++i) { + samples->at(i) = this->Sample(seed); + this->Delete(samples->at(i)); + } + } + + private: + int vec_size_; // sample size + int bit_len_; // bit size + int limit_; + std::vector heap_; +}; + +static void RandomSample(size_t set_size, + size_t num, + std::vector* out, + unsigned int* seed) { + std::unordered_set sampled_idxs; + while (sampled_idxs.size() < num) { + sampled_idxs.insert(rand_r(seed) % set_size); + } + out->clear(); + for (auto it = sampled_idxs.begin(); it != sampled_idxs.end(); it++) { + out->push_back(*it); + } +} + +static void NegateSet(const std::vector &idxs, + size_t set_size, + std::vector* out) { + // idxs must have been sorted. + auto it = idxs.begin(); + size_t i = 0; + CHECK_GT(set_size, idxs.back()); + for (; i < set_size && it != idxs.end(); i++) { + if (*it == i) { + it++; + continue; + } + out->push_back(i); + } + for (; i < set_size; i++) { + out->push_back(i); + } +} + +/* + * Uniform sample + */ +static void GetUniformSample(const dgl_id_t* val_list, + const dgl_id_t* col_list, + const size_t ver_len, + const size_t max_num_neighbor, + std::vector* out_ver, + std::vector* out_edge, + unsigned int* seed) { + // Copy ver_list to output + if (ver_len <= max_num_neighbor) { + for (size_t i = 0; i < ver_len; ++i) { + out_ver->push_back(col_list[i]); + out_edge->push_back(val_list[i]); + } + return; + } + // If we just sample a small number of elements from a large neighbor list. + std::vector sorted_idxs; + if (ver_len > max_num_neighbor * 2) { + sorted_idxs.reserve(max_num_neighbor); + RandomSample(ver_len, max_num_neighbor, &sorted_idxs, seed); + std::sort(sorted_idxs.begin(), sorted_idxs.end()); + } else { + std::vector negate; + negate.reserve(ver_len - max_num_neighbor); + RandomSample(ver_len, ver_len - max_num_neighbor, + &negate, seed); + std::sort(negate.begin(), negate.end()); + NegateSet(negate, ver_len, &sorted_idxs); + } + // verify the result. + CHECK_EQ(sorted_idxs.size(), max_num_neighbor); + for (size_t i = 1; i < sorted_idxs.size(); i++) { + CHECK_GT(sorted_idxs[i], sorted_idxs[i - 1]); + } + for (auto idx : sorted_idxs) { + out_ver->push_back(col_list[idx]); + out_edge->push_back(val_list[idx]); + } +} + +/* + * Non-uniform sample via ArrayHeap + */ +static void GetNonUniformSample(const float* probability, + const dgl_id_t* val_list, + const dgl_id_t* col_list, + const size_t ver_len, + const size_t max_num_neighbor, + std::vector* out_ver, + std::vector* out_edge, + unsigned int* seed) { + // Copy ver_list to output + if (ver_len <= max_num_neighbor) { + for (size_t i = 0; i < ver_len; ++i) { + out_ver->push_back(col_list[i]); + out_edge->push_back(val_list[i]); + } + return; + } + // Make sample + std::vector sp_index(max_num_neighbor); + std::vector sp_prob(ver_len); + for (size_t i = 0; i < ver_len; ++i) { + sp_prob[i] = probability[col_list[i]]; + } + ArrayHeap arrayHeap(sp_prob); + arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index, seed); + out_ver->resize(max_num_neighbor); + out_edge->resize(max_num_neighbor); + for (size_t i = 0; i < max_num_neighbor; ++i) { + size_t idx = sp_index[i]; + out_ver->at(i) = col_list[idx]; + out_edge->at(i) = val_list[idx]; + } + sort(out_ver->begin(), out_ver->end()); + sort(out_edge->begin(), out_edge->end()); +} + +/* + * Used for subgraph sampling + */ +struct neigh_list { + std::vector neighs; + std::vector edges; + neigh_list(const std::vector &_neighs, + const std::vector &_edges) + : neighs(_neighs), edges(_edges) {} +}; + +SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, + const float* probability, + const std::string &neigh_type, + int num_hops, + size_t num_neighbor) const { + unsigned int time_seed = time(nullptr); + size_t num_seeds = seed_arr->shape[0]; + auto orig_csr = neigh_type == "in" ? GetInCSR() : GetOutCSR(); + const dgl_id_t* val_list = orig_csr->edge_ids.data(); + const dgl_id_t* col_list = orig_csr->indices.data(); + const int64_t* indptr = orig_csr->indptr.data(); + const dgl_id_t* seed = static_cast(seed_arr->data); + + // BFS traverse the graph and sample vertices + // + std::unordered_set sub_ver_mp; + std::vector > sub_vers; + sub_vers.reserve(num_seeds * 10); + // add seed vertices + for (size_t i = 0; i < num_seeds; ++i) { + auto ret = sub_ver_mp.insert(seed[i]); + // If the vertex is inserted successfully. + if (ret.second) { + sub_vers.emplace_back(seed[i], 0); + } + } + std::vector tmp_sampled_src_list; + std::vector tmp_sampled_edge_list; + // ver_id, position + std::vector > neigh_pos; + neigh_pos.reserve(num_seeds); + std::vector neighbor_list; + int64_t num_edges = 0; + + // sub_vers is used both as a node collection and a queue. + // In the while loop, we iterate over sub_vers and new nodes are added to the vector. + // A vertex in the vector only needs to be accessed once. If there is a vertex behind idx + // isn't in the last level, we will sample its neighbors. If not, the while loop terminates. + size_t idx = 0; + while (idx < sub_vers.size()) { + dgl_id_t dst_id = sub_vers[idx].first; + int cur_node_level = sub_vers[idx].second; + idx++; + // If the node is in the last level, we don't need to sample neighbors + // from this node. + if (cur_node_level >= num_hops) + continue; + + tmp_sampled_src_list.clear(); + tmp_sampled_edge_list.clear(); + dgl_id_t ver_len = *(indptr+dst_id+1) - *(indptr+dst_id); + if (probability == nullptr) { // uniform-sample + GetUniformSample(val_list + *(indptr + dst_id), + col_list + *(indptr + dst_id), + ver_len, + num_neighbor, + &tmp_sampled_src_list, + &tmp_sampled_edge_list, + &time_seed); + } else { // non-uniform-sample + GetNonUniformSample(probability, + val_list + *(indptr + dst_id), + col_list + *(indptr + dst_id), + ver_len, + num_neighbor, + &tmp_sampled_src_list, + &tmp_sampled_edge_list, + &time_seed); + } + CHECK_EQ(tmp_sampled_src_list.size(), tmp_sampled_edge_list.size()); + size_t pos = neighbor_list.size(); + neigh_pos.emplace_back(dst_id, pos); + // First we push the size of neighbor vector + neighbor_list.push_back(tmp_sampled_edge_list.size()); + // Then push the vertices + for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) { + neighbor_list.push_back(tmp_sampled_src_list[i]); + } + // Finally we push the edge list + for (size_t i = 0; i < tmp_sampled_edge_list.size(); ++i) { + neighbor_list.push_back(tmp_sampled_edge_list[i]); + } + num_edges += tmp_sampled_src_list.size(); + for (size_t i = 0; i < tmp_sampled_src_list.size(); ++i) { + // We need to add the neighbor in the hashtable here. This ensures that + // the vertex in the queue is unique. If we see a vertex before, we don't + // need to add it to the queue again. + auto ret = sub_ver_mp.insert(tmp_sampled_src_list[i]); + // If the sampled neighbor is inserted to the map successfully. + if (ret.second) + sub_vers.emplace_back(tmp_sampled_src_list[i], cur_node_level + 1); + } + } + // Let's check if there is a vertex that we haven't sampled its neighbors. + for (; idx < sub_vers.size(); idx++) { + if (sub_vers[idx].second < num_hops) { + LOG(WARNING) + << "The sampling is truncated because we have reached the max number of vertices\n" + << "Please use a smaller number of seeds or a small neighborhood"; + break; + } + } + + // Copy sub_ver_mp to output[0] + // Copy layer + int64_t num_vertices = sub_ver_mp.size(); + std::sort(sub_vers.begin(), sub_vers.end(), + [](const std::pair &a1, const std::pair &a2) { + return a1.first < a2.first; + }); + + SampledSubgraph subg; + subg.induced_vertices = IdArray::Empty({num_vertices}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + subg.induced_edges = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + subg.layer_ids = IdArray::Empty({num_vertices}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + subg.sample_prob = runtime::NDArray::Empty({num_vertices}, DLDataType{kDLFloat, 32, 1}, DLContext{kDLCPU, 0}); + + dgl_id_t *out = static_cast(subg.induced_vertices->data); + dgl_id_t *out_layer = static_cast(subg.layer_ids->data); + for (size_t i = 0; i < sub_vers.size(); i++) { + out[i] = sub_vers[i].first; + out_layer[i] = sub_vers[i].second; + } + + // Copy sub_probability + float *sub_prob = static_cast(subg.sample_prob->data); + if (probability != nullptr) { + for (size_t i = 0; i < sub_ver_mp.size(); ++i) { + dgl_id_t idx = out[i]; + sub_prob[i] = probability[idx]; + } + } + + // Construct sub_csr_graph + auto subg_csr = std::make_shared(num_vertices, num_edges); + subg_csr->indices.resize(num_edges); + subg_csr->edge_ids.resize(num_edges); + dgl_id_t* val_list_out = static_cast(subg.induced_edges->data); + dgl_id_t* col_list_out = subg_csr->indices.data(); + int64_t* indptr_out = subg_csr->indptr.data(); + size_t collected_nedges = 0; + + // Both the out array and neigh_pos are sorted. By scanning the two arrays, we can see + // which vertices have neighbors and which don't. + std::sort(neigh_pos.begin(), neigh_pos.end(), + [](const std::pair &a1, const std::pair &a2) { + return a1.first < a2.first; + }); + size_t idx_with_neigh = 0; + for (size_t i = 0; i < num_vertices; i++) { + dgl_id_t dst_id = *(out + i); + // If a vertex is in sub_ver_mp but not in neigh_pos, this vertex must not + // have edges. + size_t edge_size = 0; + if (idx_with_neigh < neigh_pos.size() && dst_id == neigh_pos[idx_with_neigh].first) { + size_t pos = neigh_pos[idx_with_neigh].second; + CHECK_LT(pos, neighbor_list.size()); + edge_size = neighbor_list[pos]; + CHECK_LE(pos + edge_size * 2 + 1, neighbor_list.size()); + + std::copy_n(neighbor_list.begin() + pos + 1, + edge_size, + col_list_out + collected_nedges); + std::copy_n(neighbor_list.begin() + pos + edge_size + 1, + edge_size, + val_list_out + collected_nedges); + collected_nedges += edge_size; + idx_with_neigh++; + } + indptr_out[i+1] = indptr_out[i] + edge_size; + } + + for (size_t i = 0; i < subg_csr->edge_ids.size(); i++) + subg_csr->edge_ids[i] = i; + + if (neigh_type == "in") + subg.graph = ImmutableGraph(subg_csr, nullptr, IsMultigraph()); + else + subg.graph = ImmutableGraph(nullptr, subg_csr, IsMultigraph()); + + return subg; +} + +SampledSubgraph ImmutableGraph::NeighborUniformSample(IdArray seeds, + const std::string &neigh_type, + int num_hops, int expand_factor) const { + return SampleSubgraph(seeds, // seed vector + nullptr, // sample_id_probability + neigh_type, + num_hops, + expand_factor); +} + } // namespace dgl From 4cce9c7c7107f25551b86ea984300e6726813e52 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 15:39:59 +0800 Subject: [PATCH 10/75] remap vertex ids. --- include/dgl/immutable_graph.h | 2 ++ src/graph/immutable_graph.cc | 34 +++++++++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 39ab685fa159..488b95b79110 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -408,6 +408,8 @@ class ImmutableGraph { const std::string &neigh_type, int num_hops, size_t num_neighbor) const; + void CompactSubgraph(IdArray induced_vertices); + // Store the in-edges. std::shared_ptr in_csr_; // Store the out-edges. diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index aa4c63a8f0e9..4d14407530f8 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -891,14 +891,38 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, return subg; } +void CompactSubgraph(ImmutableGraph::csr &subg, + const std::unordered_map &id_map) { + for (size_t i = 0; i < subg.indices.size(); i++) { + auto it = id_map.find(subg.indices[i]); + assert(it != id_map.end()); + subg.indices[i] = it->second; + } +} + +void ImmutableGraph::CompactSubgraph(IdArray induced_vertices) { + // The key is the old id, the value is the id in the subgraph. + std::unordered_map id_map; + const dgl_id_t *vdata = static_cast(induced_vertices->data); + size_t len = induced_vertices->shape[0]; + for (size_t i = 0; i < len; i++) + id_map.insert(std::pair(vdata[i], i)); + if (in_csr_) + dgl::CompactSubgraph(*in_csr_, id_map); + if (out_csr_) + dgl::CompactSubgraph(*out_csr_, id_map); +} + SampledSubgraph ImmutableGraph::NeighborUniformSample(IdArray seeds, const std::string &neigh_type, int num_hops, int expand_factor) const { - return SampleSubgraph(seeds, // seed vector - nullptr, // sample_id_probability - neigh_type, - num_hops, - expand_factor); + auto ret = SampleSubgraph(seeds, // seed vector + nullptr, // sample_id_probability + neigh_type, + num_hops, + expand_factor); + ret.graph.CompactSubgraph(ret.induced_vertices); + return ret; } } // namespace dgl From 11f6a7c123bf5dd99faad1e81eb418cca1760b62 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 15:53:51 +0800 Subject: [PATCH 11/75] fix. --- python/dgl/immutable_graph_index.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py index 14e5c6e4e8ff..027c5b2e5e31 100644 --- a/python/dgl/immutable_graph_index.py +++ b/python/dgl/immutable_graph_index.py @@ -775,13 +775,13 @@ def create_immutable_graph_index(graph_data=None): "The selected backend doesn't support read-only graph!" # Let's create an empty graph index first. - gi = ImmutableGraphIndex(None) + gidx = ImmutableGraphIndex(None) # edge list if isinstance(graph_data, (list, tuple)): try: - gi.from_edge_list(graph_data) - return gi + gidx.from_edge_list(graph_data) + return gidx except Exception: # pylint: disable=broad-except raise DGLError('Graph data is not a valid edge list for immutable_graph_index.') @@ -790,7 +790,7 @@ def create_immutable_graph_index(graph_data=None): try: gidx.from_scipy_sparse_matrix(graph_data) return gidx - except Exception: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except raise DGLError('Graph data is not a valid scipy sparse matrix.') # networkx - any format From 304c1e69510be61d62ea9cb5e16171b1e36ea8b2 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 16:03:25 +0800 Subject: [PATCH 12/75] move sampler test. --- tests/{mxnet => compute}/test_sampler.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{mxnet => compute}/test_sampler.py (100%) diff --git a/tests/mxnet/test_sampler.py b/tests/compute/test_sampler.py similarity index 100% rename from tests/mxnet/test_sampler.py rename to tests/compute/test_sampler.py From f594e486c55563ff901554685169b5ff0110ef31 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 16:16:31 +0800 Subject: [PATCH 13/75] fix tests. --- tests/compute/test_sampler.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/tests/compute/test_sampler.py b/tests/compute/test_sampler.py index bbff369491b8..ef08dbb0d50b 100644 --- a/tests/compute/test_sampler.py +++ b/tests/compute/test_sampler.py @@ -1,6 +1,4 @@ -import os -os.environ['DGLBACKEND'] = 'mxnet' -import mxnet as mx +import backend as F import numpy as np import scipy as sp import dgl @@ -19,7 +17,7 @@ def test_1neighbor_sampler_all(): assert len(seed_ids) == 1 src, dst, eid = g.in_edges(seed_ids, form='all') # Test if there is a self loop - self_loop = mx.nd.sum(src == dst).asnumpy() == 1 + self_loop = F.asnumpy(F.sum(src == dst, 0)) == 1 if self_loop: assert subg.number_of_nodes() == len(src) else: @@ -30,22 +28,22 @@ def test_1neighbor_sampler_all(): child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all') child_src1 = subg.map_to_subgraph_nid(src) - assert mx.nd.sum(child_src1 == child_src).asnumpy() == len(src) + assert F.asnumpy(F.sum(child_src1 == child_src, 0)) == len(src) def is_sorted(arr): - return np.sum(np.sort(arr) == arr) == len(arr) + return np.sum(np.sort(arr) == arr, 0) == len(arr) def verify_subgraph(g, subg, seed_id): src, dst, eid = g.in_edges(seed_id, form='all') child_id = subg.map_to_subgraph_nid(seed_id) child_src, child_dst, child_eid = subg.in_edges(child_id, form='all') - child_src = child_src.asnumpy() + child_src = F.asnumpy(child_src) # We don't allow duplicate elements in the neighbor list. assert(len(np.unique(child_src)) == len(child_src)) # The neighbor list also needs to be sorted. assert(is_sorted(child_src)) - child_src1 = subg.map_to_subgraph_nid(src).asnumpy() + child_src1 = F.asnumpy(subg.map_to_subgraph_nid(src)) child_src1 = child_src1[child_src1 >= 0] for i in child_src: assert i in child_src1 @@ -84,7 +82,7 @@ def test_10neighbor_sampler_all(): child_src, child_dst, child_eid = subg.in_edges(child_ids, form='all') child_src1 = subg.map_to_subgraph_nid(src) - assert mx.nd.sum(child_src1 == child_src).asnumpy() == len(src) + assert F.asnumpy(F.sum(child_src1 == child_src, 0)) == len(src) def check_10neighbor_sampler(g, seeds): # In this case, NeighborSampling simply gets the neighborhood of a single vertex. From 51fc4a960e5809d5c04b9d2d8d375e5fa5fa829d Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 16:24:29 +0800 Subject: [PATCH 14/75] add comments --- include/dgl/immutable_graph.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 488b95b79110..02ac1445d2f0 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -371,10 +371,24 @@ class ImmutableGraph { in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]); } + /*! + * \brief Sample a subgraph from the seed vertices with neighbor sampling. + * The neighbors are sampled with a uniformly distribution. + * \return a subgraph + */ SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type, int num_hops, int expand_factor) const; + /*! + * \brief Get the CSR array that represents the in-edges. + * \return the CSR array. + */ CSRArray GetInCSRArray() const; + + /*! + * \brief Get the CSR array that represents the out-edges. + * \return the CSR array. + */ CSRArray GetOutCSRArray() const; protected: From 2a5d7b1810d4f19cee92837ea6769baf19899da9 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 21:00:01 +0800 Subject: [PATCH 15/75] remove mxnet-specific immutable graph. --- .../backend/mxnet/immutable_graph_index.py | 396 ------------------ 1 file changed, 396 deletions(-) delete mode 100644 python/dgl/backend/mxnet/immutable_graph_index.py diff --git a/python/dgl/backend/mxnet/immutable_graph_index.py b/python/dgl/backend/mxnet/immutable_graph_index.py deleted file mode 100644 index a96c15c79117..000000000000 --- a/python/dgl/backend/mxnet/immutable_graph_index.py +++ /dev/null @@ -1,396 +0,0 @@ -from __future__ import absolute_import - -import ctypes -import numpy as np -import networkx as nx -import scipy.sparse as sp -import mxnet as mx - -class ImmutableGraphIndex(object): - """Backend-specific graph index object on immutable graphs. - We can use a CSR matrix to represent a graph structure. For functionality, - one CSR matrix is sufficient. However, for efficient access - to in-edges and out-edges of a directed graph, we need to use two CSR matrices. - In these CSR matrices, both rows and columns represent vertices. In one CSR - matrix, a row stores in-edges of a vertex (whose source vertex is a neighbor - and destination vertex is the vertex itself). Thus, a non-zero entry is - the neighbor Id and the value is the corresponding edge Id. - The other CSR matrix stores the out-edges in the same fashion. - - Parameters - ---------- - in_csr : a csr array that stores in-edges. - MXNet CSRArray - out_csr : a csr array that stores out-edges. - MXNet CSRArray - """ - def __init__(self, in_csr, out_csr): - self._in_csr = in_csr - self._out_csr = out_csr - self._cached_adj = {} - - def number_of_nodes(self): - """Return the number of nodes. - - Returns - ------- - int - The number of nodes - """ - return len(self._in_csr) - - def number_of_edges(self): - """Return the number of edges. - - Returns - ------- - int - The number of edges - """ - return self._in_csr.indices.shape[0] - - def has_edges(self, u, v): - """Return true if the edge exists. - - Parameters - ---------- - u : NDArray - The src nodes. - v : NDArray - The dst nodes. - - Returns - ------- - NDArray - 0-1 array indicating existence - """ - ids = mx.nd.contrib.edge_id(self._in_csr, v, u) - return ids >= 0 - - def edge_ids(self, u, v): - """Return the edge ids. - - Parameters - ---------- - u : NDArray - The src nodes. - v : NDArray - The dst nodes. - - Returns - ------- - NDArray - Teh edge id array. - """ - if len(u) == 0 or len(v) == 0: - return [], [], [] - ids = mx.nd.contrib.edge_id(self._in_csr, v, u) - ids = ids.asnumpy() - v = v.asnumpy() - u = u.asnumpy() - return u[ids >= 0], v[ids >= 0], ids[ids >= 0] - - def predecessors(self, v, radius=1): - """Return the predecessors of the node. - - Parameters - ---------- - v : int - The node. - radius : int, optional - The radius of the neighborhood. - - Returns - ------- - NDArray - Array of predecessors - """ - if radius > 1: - raise Exception('Immutable graph doesn\'t support predecessors with radius > 1 for now.') - return self._in_csr[v].indices - - def successors(self, v, radius=1): - """Return the successors of the node. - - Parameters - ---------- - v : int - The node. - radius : int, optional - The radius of the neighborhood. - - Returns - ------- - NDArray - Array of successors - """ - if radius > 1: - raise Exception('Immutable graph doesn\'t support successors with radius > 1 for now.') - return self._out_csr[v].indices - - def in_edges(self, v): - """Return the in edges of the node(s). - - Parameters - ---------- - v : NDArray - The node(s). - - Returns - ------- - NDArray - index pointers - NDArray - The src nodes. - NDArray - The edge ids. - """ - rows = mx.nd.take(self._in_csr, v) - return rows.indptr, rows.indices, rows.data - - def out_edges(self, v): - """Return the out edges of the node(s). - - Parameters - ---------- - v : NDArray - The node(s). - - Returns - ------- - NDArray - index pointers - NDArray - The dst nodes. - NDArray - The edge ids. - """ - rows = mx.nd.take(self._out_csr, v) - return rows.indptr, rows.indices, rows.data - - def edges(self, sorted=False): - """Return all the edges - - Parameters - ---------- - sorted : bool - True if the returned edges are sorted by their src and dst ids. - - Returns - ------- - NDArray - The src nodes. - NDArray - The dst nodes. - NDArray - The edge ids. - """ - #TODO(zhengda) we need to return NDArray directly - # We don't need to take care of the sorted flag because the vertex Ids - # are already sorted. - coo = self._in_csr.asscipy().tocoo() - return coo.col, coo.row, coo.data - - def get_in_degree(self): - """Return the in degrees of all nodes. - - Returns - ------- - NDArray - degrees - """ - return mx.nd.contrib.getnnz(self._in_csr, axis=1) - - def get_out_degree(self): - """Return the out degrees of all nodes. - - Returns - ------- - NDArray - degrees - """ - return mx.nd.contrib.getnnz(self._out_csr, axis=1) - - def node_subgraph(self, v): - """Return the induced node subgraph. - - Parameters - ---------- - v : NDArray - The nodes. - - Returns - ------- - ImmutableGraphIndex - The subgraph index. - NDArray - Induced nodes - NDArray - Induced edges - """ - v = mx.nd.sort(v) - # when return_mapping is turned on, dgl_subgraph returns another CSRArray that - # stores the edge Ids of the original graph. - csr = mx.nd.contrib.dgl_subgraph(self._in_csr, v, return_mapping=True) - induced_nodes = v - induced_edges = lambda: csr[1].data - return ImmutableGraphIndex(csr[0], None), induced_nodes, induced_edges - - def node_subgraphs(self, vs_arr): - """Return the induced node subgraphs. - - Parameters - ---------- - vs_arr : a vector of NDArray - The nodes. - - Returns - ------- - a vector of ImmutableGraphIndex - The subgraph index. - a vector of NDArrays - Induced nodes of subgraphs. - a vector of NDArrays - Induced edges of subgraphs. - """ - vs_arr = [mx.nd.sort(v) for v in vs_arr] - res = mx.nd.contrib.dgl_subgraph(self._in_csr, *vs_arr, return_mapping=True) - in_csrs = res[0:len(vs_arr)] - induced_nodes = vs_arr - induced_edges = [lambda: e.data for e in res[len(vs_arr):]] - assert len(in_csrs) == len(induced_nodes) - assert len(in_csrs) == len(induced_edges) - gis = [] - induced_ns = [] - induced_es = [] - for in_csr, induced_n, induced_e in zip(in_csrs, induced_nodes, induced_edges): - gis.append(ImmutableGraphIndex(in_csr, None)) - induced_ns.append(induced_n) - induced_es.append(induced_e) - return gis, induced_ns, induced_es - - def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, - node_prob, max_subgraph_size): - if neighbor_type == 'in': - g = self._in_csr - elif neighbor_type == 'out': - g = self._out_csr - else: - raise NotImplementedError - num_nodes = [] - num_subgs = len(seed_ids) - if node_prob is None: - res = mx.nd.contrib.dgl_csr_neighbor_uniform_sample(g, *seed_ids, num_hops=num_hops, - num_neighbor=expand_factor, - max_num_vertices=max_subgraph_size) - else: - res = mx.nd.contrib.dgl_csr_neighbor_non_uniform_sample(g, node_prob, *seed_ids, num_hops=num_hops, - num_neighbor=expand_factor, - max_num_vertices=max_subgraph_size) - - vertices, subgraphs = res[0:num_subgs], res[num_subgs:(2*num_subgs)] - num_nodes = [subg_v[-1].asnumpy()[0] for subg_v in vertices] - - inputs = [] - inputs.extend(subgraphs) - inputs.extend(vertices) - compacts = mx.nd.contrib.dgl_graph_compact(*inputs, graph_sizes=num_nodes, return_mapping=False) - - if isinstance(compacts, mx.nd.sparse.CSRNDArray): - compacts = [compacts] - if neighbor_type == 'in': - gis = [ImmutableGraphIndex(csr, None) for csr in compacts] - elif neighbor_type == 'out': - gis = [ImmutableGraphIndex(None, csr) for csr in compacts] - parent_nodes = [v[0:size] for v, size in zip(vertices, num_nodes)] - parent_edges = [lambda: e.data for e in subgraphs] - return gis, parent_nodes, parent_edges - - def adjacency_matrix(self, transpose, ctx): - """Return the adjacency matrix representation of this graph. - - By default, a row of returned adjacency matrix represents the destination - of an edge and the column represents the source. - - When transpose is True, a row represents the source and a column represents - a destination. - - Parameters - ---------- - transpose : bool - A flag to transpose the returned adjacency matrix. - ctx : context - The device context of the returned matrix. - - Returns - ------- - NDArray - An object that returns tensor given context. - """ - if transpose: - mat = self._out_csr - else: - mat = self._in_csr - return mx.nd.contrib.dgl_adjacency(mat.as_in_context(ctx)) - - def from_coo_matrix(self, out_coo): - """construct the graph index from a SciPy coo matrix. - - Parameters - ---------- - out_coo : SciPy coo matrix - The non-zero entries indicate out-edges of the graph. - """ - edge_ids = mx.nd.arange(0, len(out_coo.data), step=1, repeat=1, dtype=np.int32) - src = mx.nd.array(out_coo.row, dtype=np.int64) - dst = mx.nd.array(out_coo.col, dtype=np.int64) - # TODO we can't generate a csr_matrix with np.int64 directly. - size = max(out_coo.shape) - self.__init__(mx.nd.sparse.csr_matrix((edge_ids, (dst, src)), shape=(size, size)).astype(np.int64), - mx.nd.sparse.csr_matrix((edge_ids, (src, dst)), shape=(size, size)).astype(np.int64)) - - def from_edge_list(self, elist): - """Convert from an edge list. - - Parameters - --------- - elist : list - List of (u, v) edge tuple. - """ - src, dst = zip(*elist) - src = np.array(src) - dst = np.array(dst) - num_nodes = max(src.max(), dst.max()) + 1 - min_nodes = min(src.min(), dst.min()) - if min_nodes != 0: - raise DGLError('Invalid edge list. Nodes must start from 0.') - edge_ids = mx.nd.arange(0, len(src), step=1, repeat=1, dtype=np.int32) - src = mx.nd.array(src, dtype=np.int64) - dst = mx.nd.array(dst, dtype=np.int64) - # TODO we can't generate a csr_matrix with np.int64 directly. - in_csr = mx.nd.sparse.csr_matrix((edge_ids, (dst, src)), - shape=(num_nodes, num_nodes)).astype(np.int64) - out_csr = mx.nd.sparse.csr_matrix((edge_ids, (src, dst)), - shape=(num_nodes, num_nodes)).astype(np.int64) - self.__init__(in_csr, out_csr) - -def create_immutable_graph_index(in_csr=None, out_csr=None): - """ Create an empty backend-specific immutable graph index. - - Parameters - ---------- - in_csr : MXNet CSRNDArray - The in-edge CSR array. - out_csr : MXNet CSRNDArray - The out-edge CSR array. - - Returns - ------- - ImmutableGraphIndex - The backend-specific immutable graph index. - """ - if in_csr is not None and not isinstance(in_csr, mx.nd.sparse.CSRNDArray): - raise TypeError() - if out_csr is not None and not isinstance(out_csr, mx.nd.sparse.CSRNDArray): - raise TypeError() - return ImmutableGraphIndex(in_csr, out_csr) From 88aeacae17c3bd4acc484f59080037b500682930 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 21:04:45 +0800 Subject: [PATCH 16/75] fix. --- python/dgl/backend/backend.py | 4 ---- python/dgl/backend/mxnet/__init__.py | 1 - python/dgl/backend/numpy/tensor.py | 2 -- python/dgl/backend/pytorch/tensor.py | 2 -- python/dgl/immutable_graph_index.py | 2 -- src/graph/graph_apis.cc | 2 +- 6 files changed, 1 insertion(+), 12 deletions(-) diff --git a/python/dgl/backend/backend.py b/python/dgl/backend/backend.py index 0f7e261f5d2d..1d05990171ff 100644 --- a/python/dgl/backend/backend.py +++ b/python/dgl/backend/backend.py @@ -834,7 +834,3 @@ def zerocopy_from_numpy(np_array): # ---------------- # These are not related to tensors. Some of them are temporary workarounds that # should be included in DGL in the future. - -def create_immutable_graph_index(): - """Create an immutable graph index object.""" - pass diff --git a/python/dgl/backend/mxnet/__init__.py b/python/dgl/backend/mxnet/__init__.py index b731e25a2fe6..c0417004f833 100644 --- a/python/dgl/backend/mxnet/__init__.py +++ b/python/dgl/backend/mxnet/__init__.py @@ -1,2 +1 @@ from .tensor import * -from .immutable_graph_index import create_immutable_graph_index diff --git a/python/dgl/backend/numpy/tensor.py b/python/dgl/backend/numpy/tensor.py index aab7ddf4db43..3a36aba1d039 100644 --- a/python/dgl/backend/numpy/tensor.py +++ b/python/dgl/backend/numpy/tensor.py @@ -142,5 +142,3 @@ def zerocopy_to_numpy(input): def zerocopy_from_numpy(np_array): return np_array - -# create_immutable_graph_index not enabled diff --git a/python/dgl/backend/pytorch/tensor.py b/python/dgl/backend/pytorch/tensor.py index 1d8275fa00da..622fdf425e2b 100644 --- a/python/dgl/backend/pytorch/tensor.py +++ b/python/dgl/backend/pytorch/tensor.py @@ -188,5 +188,3 @@ def zerocopy_to_numpy(input): def zerocopy_from_numpy(np_array): return th.from_numpy(np_array) - -# create_immutable_graph_index not enabled diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py index 027c5b2e5e31..8c9ec2456be3 100644 --- a/python/dgl/immutable_graph_index.py +++ b/python/dgl/immutable_graph_index.py @@ -771,8 +771,6 @@ def create_immutable_graph_index(graph_data=None): """ if isinstance(graph_data, ImmutableGraphIndex): return graph_data - assert F.create_immutable_graph_index is not None, \ - "The selected backend doesn't support read-only graph!" # Let's create an empty graph index first. gidx = ImmutableGraphIndex(None) diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index d5a5ee3d6864..a1c98fdb6a4f 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -484,7 +484,7 @@ DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphHasVertices") *rv = gptr->HasVertices(vids); }); -DGL_REGISTER_GLOBAL("immutable_immutable_graph_index._CAPI_DGLExpandIds") +DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLExpandIds") .set_body([] (DGLArgs args, DGLRetValue* rv) { const IdArray ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); const IdArray offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); From a35df30adfeebb8309c8d521e0603bbbc0c6fcc4 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 21:28:37 +0800 Subject: [PATCH 17/75] fix lint. --- include/dgl/immutable_graph.h | 21 +++++----- python/dgl/immutable_graph_index.py | 32 ++++++++------- src/graph/immutable_graph.cc | 63 ++++++++++++++++------------- 3 files changed, 62 insertions(+), 54 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 02ac1445d2f0..1a0cb45536b6 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -7,6 +7,7 @@ #define DGL_IMMUTABLE_GRAPH_H_ #include +#include #include #include #include @@ -39,6 +40,7 @@ class ImmutableGraph { }; struct csr { + typedef std::shared_ptr ptr; std::vector indptr; std::vector indices; std::vector edge_ids; @@ -72,16 +74,16 @@ class ImmutableGraph { int64_t end = indptr[v + 1]; return std::pair(&indices[start], &indices[end]); } - std::shared_ptr Transpose() const; - std::pair, IdArray> VertexSubgraph(IdArray vids) const; - static std::shared_ptr from_edges(std::vector &edges, int sort_on, int64_t num_nodes); + csr::ptr Transpose() const; + std::pair VertexSubgraph(IdArray vids) const; + static csr::ptr from_edges(std::vector *edges, int sort_on, int64_t num_nodes); }; ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, bool multigraph = false); - ImmutableGraph(std::shared_ptr in_csr, std::shared_ptr out_csr, - bool multigraph = false) : is_multigraph_(multigraph) { + ImmutableGraph(csr::ptr in_csr, csr::ptr out_csr, + bool multigraph = false) : is_multigraph_(multigraph) { this->in_csr_ = in_csr; this->out_csr_ = out_csr; } @@ -97,7 +99,6 @@ class ImmutableGraph { ImmutableGraph(ImmutableGraph&& other) = default; #else ImmutableGraph(ImmutableGraph&& other) { - // TODO } #endif // _MSC_VER @@ -399,7 +400,7 @@ class ImmutableGraph { * When we get in csr or out csr, we try to get the one cached in the structure. * If not, we transpose the other one to get the one we need. */ - std::shared_ptr GetInCSR() const { + csr::ptr GetInCSR() const { if (in_csr_) { return in_csr_; } else { @@ -408,7 +409,7 @@ class ImmutableGraph { return in_csr_; } } - std::shared_ptr GetOutCSR() const { + csr::ptr GetOutCSR() const { if (out_csr_) { return out_csr_; } else { @@ -425,9 +426,9 @@ class ImmutableGraph { void CompactSubgraph(IdArray induced_vertices); // Store the in-edges. - std::shared_ptr in_csr_; + csr::ptr in_csr_; // Store the out-edges. - std::shared_ptr out_csr_; + csr::ptr out_csr_; /*! * \brief Whether if this is a multigraph. * diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py index 8c9ec2456be3..484077e82c54 100644 --- a/python/dgl/immutable_graph_index.py +++ b/python/dgl/immutable_graph_index.py @@ -29,6 +29,7 @@ def __init__(self, handle): self._cache = {} def init(self, src_ids, dst_ids, edge_ids, num_nodes): + """The actual init function""" self._handle = _CAPI_DGLGraphCreate(src_ids.todgltensor(), dst_ids.todgltensor(), edge_ids.todgltensor(), False, num_nodes) self._num_nodes = num_nodes @@ -342,7 +343,7 @@ def out_edges(self, v): return src, dst, eid @utils.cached_member(cache='_cache', prefix='edges') - def edges(self, sorted=False): + def edges(self, return_sorted=False): """Return all the edges Parameters @@ -359,9 +360,9 @@ def edges(self, sorted=False): utils.Index The edge ids. """ - key = 'edges_s%d' % sorted + key = 'edges_s%d' % return_sorted if key not in self._cache: - edge_array = _CAPI_DGLGraphEdges(self._handle, sorted) + edge_array = _CAPI_DGLGraphEdges(self._handle, return_sorted) src = utils.toindex(edge_array(0)) dst = utils.toindex(edge_array(1)) eid = utils.toindex(edge_array(2)) @@ -487,10 +488,10 @@ def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, no seed_ids = [v.todgltensor() for v in seed_ids] num_subgs = len(seed_ids) if node_prob is None: - rst = _DGLGraphUniformSampling(self, seed_ids, neighbor_type, num_hops, expand_factor) + rst = _uniform_sampling(self, seed_ids, neighbor_type, num_hops, expand_factor) else: - rst = _DGLGraphNonUniformSampling(self, node_prob, seed_ids, neighbor_type, num_hops, - expand_factor) + rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops, + expand_factor) return [ImmutableSubgraphIndex(rst(i), self, rst(num_subgs + i), rst(num_subgs * 2 + i)) for i in range(num_subgs)] @@ -632,7 +633,7 @@ def from_scipy_sparse_matrix(self, adj): ---------- adj : scipy sparse matrix """ - assert isinstance(adj, sp.csr_matrix) or isinstance(adj, sp.coo_matrix), \ + assert isinstance(adj, (sp.csr_matrix, sp.coo_matrix)), \ "The input matrix has to be a SciPy sparse matrix." num_nodes = max(adj.shape[0], adj.shape[1]) out_mat = adj.tocoo() @@ -788,7 +789,7 @@ def create_immutable_graph_index(graph_data=None): try: gidx.from_scipy_sparse_matrix(graph_data) return gidx - except Exception as e: # pylint: disable=broad-except + except Exception: # pylint: disable=broad-except raise DGLError('Graph data is not a valid scipy sparse matrix.') # networkx - any format @@ -802,7 +803,7 @@ def create_immutable_graph_index(graph_data=None): _init_api("dgl.immutable_graph_index") -_NeighborSamplingAPIs = { +_NEIGHBOR_SAMPLING_APIS = { 1: _CAPI_DGLGraphUniformSampling, 2: _CAPI_DGLGraphUniformSampling2, 4: _CAPI_DGLGraphUniformSampling4, @@ -813,14 +814,15 @@ def create_immutable_graph_index(graph_data=None): 128: _CAPI_DGLGraphUniformSampling128, } -_EmptyArrays = [utils.toindex(F.ones(shape=(0), dtype=F.int64, ctx=F.cpu()))] +_EMPTY_ARRAYS = [utils.toindex(F.ones(shape=(0), dtype=F.int64, ctx=F.cpu()))] -def _DGLGraphUniformSampling(gi, seed_ids, neigh_type, num_hops, expand_factor): +def _uniform_sampling(gidx, seed_ids, neigh_type, num_hops, expand_factor): num_seeds = len(seed_ids) empty_ids = [] - if len(seed_ids) > 1 and len(seed_ids) not in _NeighborSamplingAPIs.keys(): + if len(seed_ids) > 1 and len(seed_ids) not in _NEIGHBOR_SAMPLING_APIS.keys(): remain = 2**int(math.ceil(math.log2(len(dgl_ids)))) - len(dgl_ids) - empty_ids = _EmptyArrays[0:remain] + empty_ids = _EMPTY_ARRAYS[0:remain] seed_ids.extend([empty.todgltensor() for empty in empty_ids]) - assert len(seed_ids) in _NeighborSamplingAPIs.keys() - return _NeighborSamplingAPIs[len(seed_ids)](gi._handle, *seed_ids, neigh_type, num_hops, expand_factor, num_seeds) + assert len(seed_ids) in _NEIGHBOR_SAMPLING_APIS.keys() + return _NEIGHBOR_SAMPLING_APIS[len(seed_ids)](gidx._handle, *seed_ids, neigh_type, + num_hops, expand_factor, num_seeds) diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 4d14407530f8..193582a6a845 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -135,7 +135,8 @@ class HashTableChecker { } }; -std::pair, IdArray> ImmutableGraph::csr::VertexSubgraph(IdArray vids) const { +std::pair ImmutableGraph::csr::VertexSubgraph( + IdArray vids) const { const dgl_id_t* vid_data = static_cast(vids->data); const int64_t len = vids->shape[0]; @@ -172,8 +173,8 @@ std::pair, IdArray> ImmutableGraph::csr::Ve return std::pair, IdArray>(sub_csr, rst_eids); } -std::shared_ptr ImmutableGraph::csr::from_edges(std::vector &edges, - int sort_on, int64_t num_nodes) { +ImmutableGraph::csr::ptr ImmutableGraph::csr::from_edges(std::vector *edges, + int sort_on, int64_t num_nodes) { assert(sort_on == 0 || sort_on == 1); int other_end = sort_on == 1 ? 0 : 1; // TODO(zhengda) we should sort in parallel. @@ -191,22 +192,22 @@ std::shared_ptr ImmutableGraph::csr::from_edges(std::vector return e1.end_points[sort_on] < e2.end_points[sort_on]; } }; - std::sort(edges.begin(), edges.end(), compare(sort_on, other_end)); - std::shared_ptr t = std::make_shared(0, 0); - t->indices.resize(edges.size()); - t->edge_ids.resize(edges.size()); - for (size_t i = 0; i < edges.size(); i++) { - t->indices[i] = edges[i].end_points[other_end]; + std::sort(edges->begin(), edges->end(), compare(sort_on, other_end)); + auto t = std::make_shared(0, 0); + t->indices.resize(edges->size()); + t->edge_ids.resize(edges->size()); + for (size_t i = 0; i < edges->size(); i++) { + t->indices[i] = edges->at(i).end_points[other_end]; assert(t->indices[i] < num_nodes); - t->edge_ids[i] = edges[i].edge_id; - dgl_id_t vid = edges[i].end_points[sort_on]; + t->edge_ids[i] = edges->at(i).edge_id; + dgl_id_t vid = edges->at(i).end_points[sort_on]; assert(vid < num_nodes); while (vid > 0 && t->indptr.size() <= static_cast(vid)) t->indptr.push_back(i); assert(t->indptr.size() == vid + 1); } while (t->indptr.size() < num_nodes + 1) - t->indptr.push_back(edges.size()); + t->indptr.push_back(edges->size()); assert(t->indptr.size() == num_nodes + 1); return t; } @@ -224,7 +225,7 @@ std::shared_ptr ImmutableGraph::csr::Transpose() const { edges[indptr[i] + j] = e; } } - return from_edges(edges, 1, NumVertices()); + return from_edges(&edges, 1, NumVertices()); } ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, @@ -243,8 +244,8 @@ ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_id e.edge_id = edge_data[i]; edges[i] = e; } - in_csr_ = csr::from_edges(edges, 1, num_nodes); - out_csr_ = csr::from_edges(edges, 0, num_nodes); + in_csr_ = csr::from_edges(&edges, 1, num_nodes); + out_csr_ = csr::from_edges(&edges, 0, num_nodes); } BoolArray ImmutableGraph::HasVertices(IdArray vids) const { @@ -330,7 +331,7 @@ std::pair ImmutableGraph::GetInEdgeIdRef(dgl const dgl_id_t *start = &in_csr_->edge_ids[off]; int64_t len = 0; // There are edges between the source and the destination. - for (auto it1 = it; it1 != pred.second && *it1 == src; it1++, len++); + for (auto it1 = it; it1 != pred.second && *it1 == src; it1++, len++) {} return std::pair(start, start + len); } @@ -348,7 +349,7 @@ std::pair ImmutableGraph::GetOutEdgeIdRef(dg const dgl_id_t *start = &out_csr_->edge_ids[off]; int64_t len = 0; // There are edges between the source and the destination. - for (auto it1 = it; it1 != succ.second && *it1 == dst; it1++, len++); + for (auto it1 = it; it1 != succ.second && *it1 == dst; it1++, len++) {} return std::pair(start, start + len); } @@ -394,7 +395,7 @@ ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_i std::pair edges; if (this->in_csr_) edges = this->GetInEdgeIdRef(src_id, dst_id); - else + else edges = this->GetOutEdgeIdRef(src_id, dst_id); size_t len = edges.second - edges.first; @@ -445,7 +446,7 @@ ImmutableGraph::EdgeArray ImmutableGraph::Edges(bool sorted) const { ImmutableSubgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { ImmutableSubgraph subg; - std::pair, IdArray> ret; + std::pair ret; // We prefer to generate a subgraph for out-csr first. if (out_csr_) { ret = out_csr_->VertexSubgraph(vids); @@ -820,10 +821,14 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, }); SampledSubgraph subg; - subg.induced_vertices = IdArray::Empty({num_vertices}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); - subg.induced_edges = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); - subg.layer_ids = IdArray::Empty({num_vertices}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); - subg.sample_prob = runtime::NDArray::Empty({num_vertices}, DLDataType{kDLFloat, 32, 1}, DLContext{kDLCPU, 0}); + subg.induced_vertices = IdArray::Empty({num_vertices}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + subg.induced_edges = IdArray::Empty({num_edges}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + subg.layer_ids = IdArray::Empty({num_vertices}, + DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + subg.sample_prob = runtime::NDArray::Empty({num_vertices}, + DLDataType{kDLFloat, 32, 1}, DLContext{kDLCPU, 0}); dgl_id_t *out = static_cast(subg.induced_vertices->data); dgl_id_t *out_layer = static_cast(subg.layer_ids->data); @@ -891,12 +896,12 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, return subg; } -void CompactSubgraph(ImmutableGraph::csr &subg, +void CompactSubgraph(ImmutableGraph::csr *subg, const std::unordered_map &id_map) { - for (size_t i = 0; i < subg.indices.size(); i++) { - auto it = id_map.find(subg.indices[i]); + for (size_t i = 0; i < subg->indices.size(); i++) { + auto it = id_map.find(subg->indices[i]); assert(it != id_map.end()); - subg.indices[i] = it->second; + subg->indices[i] = it->second; } } @@ -908,9 +913,9 @@ void ImmutableGraph::CompactSubgraph(IdArray induced_vertices) { for (size_t i = 0; i < len; i++) id_map.insert(std::pair(vdata[i], i)); if (in_csr_) - dgl::CompactSubgraph(*in_csr_, id_map); + dgl::CompactSubgraph(in_csr_.get(), id_map); if (out_csr_) - dgl::CompactSubgraph(*out_csr_, id_map); + dgl::CompactSubgraph(out_csr_.get(), id_map); } SampledSubgraph ImmutableGraph::NeighborUniformSample(IdArray seeds, From e8fdc5c4a1262f1d2cbf0f60aff5ab0be08fab97 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 22:52:50 +0800 Subject: [PATCH 18/75] fix. --- include/dgl/immutable_graph.h | 10 ++++++++-- src/graph/immutable_graph.cc | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 1a0cb45536b6..e76a3ff739ed 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -19,6 +19,12 @@ namespace dgl { struct ImmutableSubgraph; struct SampledSubgraph; +template +bool binary_search(ForwardIt first, ForwardIt last, const T& value) { + first = std::lower_bound(first, last, value); + return (!(first == last) && !(value < *first)); +} + /*! * \brief Base dgl immutable graph index class. * @@ -145,11 +151,11 @@ class ImmutableGraph { if (!HasVertex(src) || !HasVertex(dst)) return false; if (this->in_csr_) { auto pred = this->in_csr_->GetIndexRef(dst); - return std::binary_search(pred.first, pred.second, src); + return binary_search(pred.first, pred.second, src); } else { assert(this->out_csr_); auto succ = this->out_csr_->GetIndexRef(src); - return std::binary_search(succ.first, succ.second, dst); + return binary_search(succ.first, succ.second, dst); } } diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 193582a6a845..544f5298e740 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -4,6 +4,7 @@ * \brief DGL immutable graph index implementation */ +#include #include #include "../c_api_common.h" From 79da05b9979e876d2954dc09835c99dd2a7cb162 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 7 Jan 2019 23:24:14 +0800 Subject: [PATCH 19/75] try to fix windows compile error. --- src/graph/immutable_graph.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 544f5298e740..cc4b88003e1d 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -7,6 +7,16 @@ #include #include +#ifdef _MSC_VER +// TODO(zhengda) it seems MS compiler doesn't have rand_r. Let's +// use rand for now. This may not have good performance because rand has +// a global variable shared by all threads. +int rand_r(unsigned *seed) { + return rand(); +} +#define _CRT_RAND_S +#endif + #include "../c_api_common.h" namespace dgl { From 54e695bd6d582e5423d401e2e0df0c8da5de72be Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 8 Jan 2019 14:47:54 +0800 Subject: [PATCH 20/75] fix. --- python/dgl/immutable_graph_index.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py index 484077e82c54..8a252f898614 100644 --- a/python/dgl/immutable_graph_index.py +++ b/python/dgl/immutable_graph_index.py @@ -521,9 +521,10 @@ def adjacency_matrix(self, transpose=False, ctx=F.cpu()): rst = _CAPI_DGLGraphGetCSR(self._handle, transpose) indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) + shuffle = utils.toindex(rst(2)) dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) return F.sparse_matrix(dat, ('csr', indices, indptr), - (self.number_of_nodes(), self.number_of_nodes())) + (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle def incidence_matrix(self, typestr, ctx): """Return the incidence matrix representation of this graph. From 1f65daf5b0de7c6a7173c30e3c1ce2a6e35b0405 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 8 Jan 2019 15:34:25 +0800 Subject: [PATCH 21/75] fix. --- python/dgl/immutable_graph_index.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py index 8a252f898614..a90bd0fd2839 100644 --- a/python/dgl/immutable_graph_index.py +++ b/python/dgl/immutable_graph_index.py @@ -623,7 +623,7 @@ def from_networkx(self, nx_graph): eid = utils.toindex(eid) src = utils.toindex(src) dst = utils.toindex(dst) - self.init(src_ids, dst_ids, edge_ids, num_nodes) + self.init(src, dst, eid, num_nodes) def from_scipy_sparse_matrix(self, adj): """Convert from scipy sparse matrix. From 092e88d06693ee99a0ad17f801c01653011112dd Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 8 Jan 2019 15:54:52 +0800 Subject: [PATCH 22/75] add test. --- tests/mxnet/test_graph_index.py | 46 ++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/tests/mxnet/test_graph_index.py b/tests/mxnet/test_graph_index.py index 0f055491e4fd..37b5cc7d2192 100644 --- a/tests/mxnet/test_graph_index.py +++ b/tests/mxnet/test_graph_index.py @@ -1,5 +1,6 @@ import os os.environ['DGLBACKEND'] = 'mxnet' +import networkx as nx import mxnet as mx import numpy as np import scipy as sp @@ -7,6 +8,14 @@ from dgl.graph_index import map_to_subgraph_nid, GraphIndex, create_graph_index from dgl import utils +def generate_from_networkx(): + edges = [[2, 3], [2, 5], [3, 0], [1, 0], [4, 3], [4, 5]] + nx_graph = nx.DiGraph() + nx_graph.add_edges_from(edges) + g = create_graph_index(nx_graph) + ig = create_graph_index(nx_graph, readonly=True) + return g, ig + def generate_from_edgelist(): edges = [[2, 3], [2, 5], [3, 0], [6, 10], [10, 3], [10, 15]] g = create_graph_index(edges) @@ -30,12 +39,17 @@ def test_graph_gen(): g, ig = generate_rand_graph(10) check_graph_equal(g, ig) +def sort_edges(edges): + edges = [e.tousertensor() for e in edges] + idx = mx.nd.argsort(edges[2]) + return (edges[0][idx], edges[1][idx], edges[2][idx]) + def check_basics(g, ig): assert g.number_of_nodes() == ig.number_of_nodes() assert g.number_of_edges() == ig.number_of_edges() - edges = g.edges() - iedges = ig.edges() + edges = g.edges(True) + iedges = ig.edges(True) assert np.all(edges[0].tousertensor().asnumpy() == iedges[0].tousertensor().asnumpy()) assert np.all(edges[1].tousertensor().asnumpy() == iedges[1].tousertensor().asnumpy()) assert np.all(edges[2].tousertensor().asnumpy() == iedges[2].tousertensor().asnumpy()) @@ -49,19 +63,19 @@ def check_basics(g, ig): randv = np.random.randint(0, g.number_of_nodes(), 10) randv = utils.toindex(randv) - in_src1, in_dst1, in_eids1 = g.in_edges(randv) - in_src2, in_dst2, in_eids2 = ig.in_edges(randv) - nnz = in_src2.tousertensor().shape[0] - assert mx.nd.sum(in_src1.tousertensor() == in_src2.tousertensor()).asnumpy() == nnz - assert mx.nd.sum(in_dst1.tousertensor() == in_dst2.tousertensor()).asnumpy() == nnz - assert mx.nd.sum(in_eids1.tousertensor() == in_eids2.tousertensor()).asnumpy() == nnz - - out_src1, out_dst1, out_eids1 = g.out_edges(randv) - out_src2, out_dst2, out_eids2 = ig.out_edges(randv) - nnz = out_dst2.tousertensor().shape[0] - assert mx.nd.sum(out_dst1.tousertensor() == out_dst2.tousertensor()).asnumpy() == nnz - assert mx.nd.sum(out_src1.tousertensor() == out_src2.tousertensor()).asnumpy() == nnz - assert mx.nd.sum(out_eids1.tousertensor() == out_eids2.tousertensor()).asnumpy() == nnz + in_src1, in_dst1, in_eids1 = sort_edges(g.in_edges(randv)) + in_src2, in_dst2, in_eids2 = sort_edges(ig.in_edges(randv)) + nnz = in_src2.shape[0] + assert mx.nd.sum(in_src1 == in_src2).asnumpy() == nnz + assert mx.nd.sum(in_dst1 == in_dst2).asnumpy() == nnz + assert mx.nd.sum(in_eids1 == in_eids2).asnumpy() == nnz + + out_src1, out_dst1, out_eids1 = sort_edges(g.out_edges(randv)) + out_src2, out_dst2, out_eids2 = sort_edges(ig.out_edges(randv)) + nnz = out_dst2.shape[0] + assert mx.nd.sum(out_dst1 == out_dst2).asnumpy() == nnz + assert mx.nd.sum(out_src1 == out_src2).asnumpy() == nnz + assert mx.nd.sum(out_eids1 == out_eids2).asnumpy() == nnz num_v = len(randv) assert mx.nd.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor()).asnumpy() == num_v @@ -85,6 +99,8 @@ def check_basics(g, ig): def test_basics(): g, ig = generate_from_edgelist() check_basics(g, ig) + g, ig = generate_from_networkx() + check_basics(g, ig) g, ig = generate_rand_graph(100) check_basics(g, ig) From 3b70ed822a64555be30672e42ff97041e0e954ff Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Thu, 10 Jan 2019 22:09:25 +0800 Subject: [PATCH 23/75] unify Graph and ImmutableGraph. --- include/dgl/graph.h | 62 +++---- include/dgl/graph_interface.h | 307 ++++++++++++++++++++++++++++++++++ include/dgl/immutable_graph.h | 131 ++++++++++----- src/graph/graph.cc | 12 +- src/graph/graph_apis.cc | 27 +-- src/graph/immutable_graph.cc | 13 +- 6 files changed, 433 insertions(+), 119 deletions(-) create mode 100644 include/dgl/graph_interface.h diff --git a/include/dgl/graph.h b/include/dgl/graph.h index 3ebf9cc058e2..b1b2aefa23a4 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -12,17 +12,12 @@ #include #include "runtime/ndarray.h" -namespace dgl { +#include "graph_interface.h" -typedef uint64_t dgl_id_t; -typedef dgl::runtime::NDArray IdArray; -typedef dgl::runtime::NDArray DegreeArray; -typedef dgl::runtime::NDArray BoolArray; -typedef dgl::runtime::NDArray IntArray; +namespace dgl { class Graph; class GraphOp; -struct Subgraph; /*! * \brief Base dgl graph index class. @@ -38,14 +33,8 @@ struct Subgraph; * If the length of src id array is one, it represents one-many connections. * If the length of dst id array is one, it represents many-one connections. */ -class Graph { +class Graph: public GraphInterface { public: - /* \brief structure used to represent a list of edges */ - typedef struct { - /* \brief the two endpoints and the id of the edge */ - IdArray src, dst, id; - } EdgeArray; - /*! \brief default constructor */ explicit Graph(bool multigraph = false) : is_multigraph_(multigraph) {} @@ -309,15 +298,15 @@ class Graph { * * \return the reversed graph */ - Graph Reverse() const; + GraphInterface::ptr Reverse() const; /*! * \brief Return the successor vector * \param vid The vertex id. * \return the successor vector */ - const std::vector& SuccVec(dgl_id_t vid) const { - return adjlist_[vid].succ; + dgl_id_iters SuccVec(dgl_id_t vid) const { + return dgl_id_iters(adjlist_[vid].succ.begin(), adjlist_[vid].succ.end()); } /*! @@ -325,8 +314,8 @@ class Graph { * \param vid The vertex id. * \return the out edge id vector */ - const std::vector& OutEdgeVec(dgl_id_t vid) const { - return adjlist_[vid].edge_id; + dgl_id_iters OutEdgeVec(dgl_id_t vid) const { + return dgl_id_iters(adjlist_[vid].edge_id.begin(), adjlist_[vid].edge_id.end()); } /*! @@ -334,8 +323,8 @@ class Graph { * \param vid The vertex id. * \return the predecessor vector */ - const std::vector& PredVec(dgl_id_t vid) const { - return reverse_adjlist_[vid].succ; + dgl_id_iters PredVec(dgl_id_t vid) const { + return dgl_id_iters(reverse_adjlist_[vid].succ.begin(), reverse_adjlist_[vid].succ.end()); } /*! @@ -343,8 +332,19 @@ class Graph { * \param vid The vertex id. * \return the in edge id vector */ - const std::vector& InEdgeVec(dgl_id_t vid) const { - return reverse_adjlist_[vid].edge_id; + dgl_id_iters InEdgeVec(dgl_id_t vid) const { + return dgl_id_iters(reverse_adjlist_[vid].edge_id.begin(), + reverse_adjlist_[vid].edge_id.end()); + } + + /*! + * \brief Reset the data in the graph and move its data to the returned graph object. + * \return a raw pointer to the graph object. + */ + virtual GraphInterface *Reset() { + Graph* gptr = new Graph(); + *gptr = std::move(this); + return gptr; } protected: @@ -380,22 +380,6 @@ class Graph { uint64_t num_edges_ = 0; }; -/*! \brief Subgraph data structure */ -struct Subgraph { - /*! \brief The graph. */ - Graph graph; - /*! - * \brief The induced vertex ids. - * \note This is also a map from the new vertex id to the vertex id in the parent graph. - */ - IdArray induced_vertices; - /*! - * \brief The induced edge ids. - * \note This is also a map from the new edge id to the edge id in the parent graph. - */ - IdArray induced_edges; -}; - } // namespace dgl #endif // DGL_GRAPH_H_ diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h new file mode 100644 index 000000000000..7d368074dcf3 --- /dev/null +++ b/include/dgl/graph_interface.h @@ -0,0 +1,307 @@ +/*! + * Copyright (c) 2018 by Contributors + * \file dgl/graph_interface.h + * \brief DGL graph index class. + */ +#ifndef DGL_GRAPH_INTERFACE_H_ +#define DGL_GRAPH_INTERFACE_H_ + +namespace dgl { + +typedef uint64_t dgl_id_t; +typedef dgl::runtime::NDArray IdArray; +typedef dgl::runtime::NDArray DegreeArray; +typedef dgl::runtime::NDArray BoolArray; +typedef dgl::runtime::NDArray IntArray; + +struct Subgraph; + +/*! + * \brief dgl graph index interface. + * + * DGL's graph is directed. Vertices are integers enumerated from zero. + */ +class GraphInterface { + public: + /* \brief structure used to represent a list of edges */ + typedef struct { + /* \brief the two endpoints and the id of the edge */ + IdArray src, dst, id; + } EdgeArray; + + typedef std::shared_ptr ptr; + + typedef std::pair::const_iterator, + std::vector::const_iterator> dgl_id_iters; + + virtual ~GraphInterface() { + } + + /*! + * \brief Add vertices to the graph. + * \note Since vertices are integers enumerated from zero, only the number of + * vertices to be added needs to be specified. + * \param num_vertices The number of vertices to be added. + */ + virtual void AddVertices(uint64_t num_vertices) = 0; + + /*! + * \brief Add one edge to the graph. + * \param src The source vertex. + * \param dst The destination vertex. + */ + virtual void AddEdge(dgl_id_t src, dgl_id_t dst) = 0; + + /*! + * \brief Add edges to the graph. + * \param src_ids The source vertex id array. + * \param dst_ids The destination vertex id array. + */ + virtual void AddEdges(IdArray src_ids, IdArray dst_ids) = 0; + + /*! + * \brief Clear the graph. Remove all vertices/edges. + */ + virtual void Clear() = 0; + + /*! + * \note not const since we have caches + * \return whether the graph is a multigraph + */ + virtual bool IsMultigraph() const = 0; + + /*! \return the number of vertices in the graph.*/ + virtual uint64_t NumVertices() const; + + /*! \return the number of edges in the graph.*/ + virtual uint64_t NumEdges() const = 0; + + /*! \return true if the given vertex is in the graph.*/ + virtual bool HasVertex(dgl_id_t vid) const = 0; + + /*! \return a 0-1 array indicating whether the given vertices are in the graph.*/ + virtual BoolArray HasVertices(IdArray vids) const = 0; + + /*! \return true if the given edge is in the graph.*/ + virtual bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const = 0; + + /*! \return a 0-1 array indicating whether the given edges are in the graph.*/ + virtual BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const = 0; + + /*! + * \brief Find the predecessors of a vertex. + * \param vid The vertex id. + * \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1). + * \return the predecessor id array. + */ + virtual IdArray Predecessors(dgl_id_t vid, uint64_t radius = 1) const = 0; + + /*! + * \brief Find the successors of a vertex. + * \param vid The vertex id. + * \param radius The radius of the neighborhood. Default is immediate neighbor (radius=1). + * \return the successor id array. + */ + virtual IdArray Successors(dgl_id_t vid, uint64_t radius = 1) const = 0; + + /*! + * \brief Get all edge ids between the two given endpoints + * \note Edges are associated with an integer id start from zero. + * The id is assigned when the edge is being added to the graph. + * \param src The source vertex. + * \param dst The destination vertex. + * \return the edge id array. + */ + virtual IdArray EdgeId(dgl_id_t src, dgl_id_t dst) const = 0; + + /*! + * \brief Get all edge ids between the given endpoint pairs. + * \note Edges are associated with an integer id start from zero. + * The id is assigned when the edge is being added to the graph. + * If duplicate pairs exist, the returned edge IDs will also duplicate. + * The order of returned edge IDs will follow the order of src-dst pairs + * first, and ties are broken by the order of edge ID. + * \return EdgeArray containing all edges between all pairs. + */ + virtual EdgeArray EdgeIds(IdArray src, IdArray dst) const = 0; + + /*! + * \brief Find the edge ID and return the pair of endpoints + * \param eid The edge ID + * \return a pair whose first element is the source and the second the destination. + */ + virtual std::pair FindEdge(dgl_id_t eid) const = 0; + + /*! + * \brief Find the edge IDs and return their source and target node IDs. + * \param eids The edge ID array. + * \return EdgeArray containing all edges with id in eid. The order is preserved. + */ + virtual EdgeArray FindEdges(IdArray eids) const = 0; + + /*! + * \brief Get the in edges of the vertex. + * \note The returned dst id array is filled with vid. + * \param vid The vertex id. + * \return the edges + */ + virtual EdgeArray InEdges(dgl_id_t vid) const = 0; + + /*! + * \brief Get the in edges of the vertices. + * \param vids The vertex id array. + * \return the id arrays of the two endpoints of the edges. + */ + virtual EdgeArray InEdges(IdArray vids) const = 0; + + /*! + * \brief Get the out edges of the vertex. + * \note The returned src id array is filled with vid. + * \param vid The vertex id. + * \return the id arrays of the two endpoints of the edges. + */ + virtual EdgeArray OutEdges(dgl_id_t vid) const = 0; + + /*! + * \brief Get the out edges of the vertices. + * \param vids The vertex id array. + * \return the id arrays of the two endpoints of the edges. + */ + virtual EdgeArray OutEdges(IdArray vids) const = 0; + + /*! + * \brief Get all the edges in the graph. + * \note If sorted is true, the returned edges list is sorted by their src and + * dst ids. Otherwise, they are in their edge id order. + * \param sorted Whether the returned edge list is sorted by their src and dst ids + * \return the id arrays of the two endpoints of the edges. + */ + virtual EdgeArray Edges(bool sorted = false) const = 0; + + /*! + * \brief Get the in degree of the given vertex. + * \param vid The vertex id. + * \return the in degree + */ + virtual uint64_t InDegree(dgl_id_t vid) const = 0; + + /*! + * \brief Get the in degrees of the given vertices. + * \param vid The vertex id array. + * \return the in degree array + */ + virtual DegreeArray InDegrees(IdArray vids) const = 0; + + /*! + * \brief Get the out degree of the given vertex. + * \param vid The vertex id. + * \return the out degree + */ + virtual uint64_t OutDegree(dgl_id_t vid) const = 0; + + /*! + * \brief Get the out degrees of the given vertices. + * \param vid The vertex id array. + * \return the out degree array + */ + virtual DegreeArray OutDegrees(IdArray vids) const = 0; + + /*! + * \brief Construct the induced subgraph of the given vertices. + * + * The induced subgraph is a subgraph formed by specifying a set of vertices V' and then + * selecting all of the edges from the original graph that connect two vertices in V'. + * + * Vertices and edges in the original graph will be "reindexed" to local index. The local + * index of the vertices preserve the order of the given id array, while the local index + * of the edges preserve the index order in the original graph. Vertices not in the + * original graph are ignored. + * + * The result subgraph is read-only. + * + * \param vids The vertices in the subgraph. + * \return the induced subgraph + */ + virtual Subgraph VertexSubgraph(IdArray vids) const = 0; + + /*! + * \brief Construct the induced edge subgraph of the given edges. + * + * The induced edges subgraph is a subgraph formed by specifying a set of edges E' and then + * selecting all of the nodes from the original graph that are endpoints in E'. + * + * Vertices and edges in the original graph will be "reindexed" to local index. The local + * index of the edges preserve the order of the given id array, while the local index + * of the vertices preserve the index order in the original graph. Edges not in the + * original graph are ignored. + * + * The result subgraph is read-only. + * + * \param eids The edges in the subgraph. + * \return the induced edge subgraph + */ + virtual Subgraph EdgeSubgraph(IdArray eids) const = 0; + + /*! + * \brief Return a new graph with all the edges reversed. + * + * The returned graph preserves the vertex and edge index in the original graph. + * + * \return the reversed graph + */ + virtual GraphInterface::ptr Reverse() const = 0; + + /*! + * \brief Return the successor vector + * \param vid The vertex id. + * \return the successor vector iterator pair. + */ + virtual dgl_id_iters SuccVec(dgl_id_t vid) const = 0; + + /*! + * \brief Return the out edge id vector + * \param vid The vertex id. + * \return the out edge id vector iterator pair. + */ + virtual dgl_id_iters OutEdgeVec(dgl_id_t vid) const = 0; + + /*! + * \brief Return the predecessor vector + * \param vid The vertex id. + * \return the predecessor vector iterator pair. + */ + virtual dgl_id_iters PredVec(dgl_id_t vid) const = 0; + + /*! + * \brief Return the in edge id vector + * \param vid The vertex id. + * \return the in edge id vector iterator pair. + */ + virtual dgl_id_iters InEdgeVec(dgl_id_t vid) const = 0; + + /*! + * \brief Reset the data in the graph and move its data to the returned graph object. + * \return a raw pointer to the graph object. + */ + virtual GraphInterface *Reset() = 0; +}; + +/*! \brief Subgraph data structure */ +struct Subgraph { + /*! \brief The graph. */ + GraphInterface::ptr graph; + /*! + * \brief The induced vertex ids. + * \note This is also a map from the new vertex id to the vertex id in the parent graph. + */ + IdArray induced_vertices; + /*! + * \brief The induced edge ids. + * \note This is also a map from the new edge id to the edge id in the parent graph. + */ + IdArray induced_edges; +}; + +} // namespace dgl + +#endif // DGL_GRAPH_INTERFACE_H_ diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index e76a3ff739ed..7947888cd7e0 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -1,6 +1,6 @@ /*! * Copyright (c) 2018 by Contributors - * \file dgl/graph.h + * \file dgl/immutable_graph.h * \brief DGL immutable graph index class. */ #ifndef DGL_IMMUTABLE_GRAPH_H_ @@ -12,11 +12,10 @@ #include #include #include "runtime/ndarray.h" -#include "graph.h" +#include "graph_interface.h" namespace dgl { -struct ImmutableSubgraph; struct SampledSubgraph; template @@ -29,13 +28,8 @@ bool binary_search(ForwardIt first, ForwardIt last, const T& value) { * \brief Base dgl immutable graph index class. * */ -class ImmutableGraph { +class ImmutableGraph: public GraphInterface { public: - typedef struct { - /* \brief the two endpoints and the id of the edge */ - IdArray src, dst, id; - } EdgeArray; - typedef struct { IdArray indptr, indices, id; } CSRArray; @@ -114,6 +108,41 @@ class ImmutableGraph { /*! \brief default destructor */ ~ImmutableGraph() = default; + /*! + * \brief Add vertices to the graph. + * \note Since vertices are integers enumerated from zero, only the number of + * vertices to be added needs to be specified. + * \param num_vertices The number of vertices to be added. + */ + void AddVertices(uint64_t num_vertices) { + LOG(FATAL) << "Immutable graph doesn't support adding vertices"; + } + + /*! + * \brief Add one edge to the graph. + * \param src The source vertex. + * \param dst The destination vertex. + */ + void AddEdge(dgl_id_t src, dgl_id_t dst) { + LOG(FATAL) << "Immutable graph doesn't support adding edge"; + } + + /*! + * \brief Add edges to the graph. + * \param src_ids The source vertex id array. + * \param dst_ids The destination vertex id array. + */ + void AddEdges(IdArray src_ids, IdArray dst_ids) { + LOG(FATAL) << "Immutable graph doesn't support adding edges"; + } + + /*! + * \brief Clear the graph. Remove all vertices/edges. + */ + void Clear() { + LOG(FATAL) << "Immutable graph doesn't support clearing vertices and edges"; + } + /*! * \note not const since we have caches * \return whether the graph is a multigraph @@ -199,6 +228,26 @@ class ImmutableGraph { */ EdgeArray EdgeIds(IdArray src, IdArray dst) const; + /*! + * \brief Find the edge ID and return the pair of endpoints + * \param eid The edge ID + * \return a pair whose first element is the source and the second the destination. + */ + std::pair FindEdge(dgl_id_t eid) const { + LOG(FATAL) << "not implemented"; + return std::pair(); + } + + /*! + * \brief Find the edge IDs and return their source and target node IDs. + * \param eids The edge ID array. + * \return EdgeArray containing all edges with id in eid. The order is preserved. + */ + EdgeArray FindEdges(IdArray eids) const { + LOG(FATAL) << "not implemented"; + return EdgeArray(); + } + /*! * \brief Get the in edges of the vertex. * \note The returned dst id array is filled with vid. @@ -303,9 +352,9 @@ class ImmutableGraph { * \param vids The vertices in the subgraph. * \return the induced subgraph */ - ImmutableSubgraph VertexSubgraph(IdArray vids) const; + Subgraph VertexSubgraph(IdArray vids) const; - std::vector VertexSubgraphs(const std::vector &vids) const; + std::vector VertexSubgraphs(const std::vector &vids) const; /*! * \brief Construct the induced edge subgraph of the given edges. @@ -323,9 +372,9 @@ class ImmutableGraph { * \param eids The edges in the subgraph. * \return the induced edge subgraph */ - ImmutableSubgraph EdgeSubgraph(IdArray eids) const; + Subgraph EdgeSubgraph(IdArray eids) const; - std::vector EdgeSubgraphs(std::vector eids) const; + std::vector EdgeSubgraphs(std::vector eids) const; /*! * \brief Return a new graph with all the edges reversed. @@ -334,8 +383,8 @@ class ImmutableGraph { * * \return the reversed graph */ - ImmutableGraph Reverse() const { - return ImmutableGraph(out_csr_, in_csr_, is_multigraph_); + GraphInterface::ptr Reverse() const { + return GraphInterface::ptr(new ImmutableGraph(out_csr_, in_csr_, is_multigraph_)); } /*! @@ -343,9 +392,9 @@ class ImmutableGraph { * \param vid The vertex id. * \return the successor vector */ - std::vector SuccVec(dgl_id_t vid) const { - return std::vector(out_csr_->indices.begin() + out_csr_->indptr[vid], - out_csr_->indices.begin() + out_csr_->indptr[vid + 1]); + dgl_id_iters SuccVec(dgl_id_t vid) const { + return dgl_id_iters(out_csr_->indices.begin() + out_csr_->indptr[vid], + out_csr_->indices.begin() + out_csr_->indptr[vid + 1]); } /*! @@ -353,9 +402,9 @@ class ImmutableGraph { * \param vid The vertex id. * \return the out edge id vector */ - std::vector OutEdgeVec(dgl_id_t vid) const { - return std::vector(out_csr_->edge_ids.begin() + out_csr_->indptr[vid], - out_csr_->edge_ids.begin() + out_csr_->indptr[vid + 1]); + dgl_id_iters OutEdgeVec(dgl_id_t vid) const { + return dgl_id_iters(out_csr_->edge_ids.begin() + out_csr_->indptr[vid], + out_csr_->edge_ids.begin() + out_csr_->indptr[vid + 1]); } /*! @@ -363,9 +412,9 @@ class ImmutableGraph { * \param vid The vertex id. * \return the predecessor vector */ - std::vector PredVec(dgl_id_t vid) const { - return std::vector(in_csr_->indices.begin() + in_csr_->indptr[vid], - in_csr_->indices.begin() + in_csr_->indptr[vid + 1]); + dgl_id_iters PredVec(dgl_id_t vid) const { + return dgl_id_iters(in_csr_->indices.begin() + in_csr_->indptr[vid], + in_csr_->indices.begin() + in_csr_->indptr[vid + 1]); } /*! @@ -373,9 +422,19 @@ class ImmutableGraph { * \param vid The vertex id. * \return the in edge id vector */ - std::vector InEdgeVec(dgl_id_t vid) const { - return std::vector(in_csr_->edge_ids.begin() + in_csr_->indptr[vid], - in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]); + dgl_id_iters InEdgeVec(dgl_id_t vid) const { + return dgl_id_iters(in_csr_->edge_ids.begin() + in_csr_->indptr[vid], + in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]); + } + + /*! + * \brief Reset the data in the graph and move its data to the returned graph object. + * \return a raw pointer to the graph object. + */ + virtual GraphInterface *Reset() { + ImmutableGraph* gptr = new ImmutableGraph(); + *gptr = std::move(this); + return gptr; } /*! @@ -443,27 +502,11 @@ class ImmutableGraph { bool is_multigraph_ = false; }; -/*! \brief Subgraph data structure */ -struct ImmutableSubgraph { - /*! \brief The graph. */ - ImmutableGraph graph; - /*! - * \brief The induced vertex ids. - * \note This is also a map from the new vertex id to the vertex id in the parent graph. - */ - IdArray induced_vertices; - /*! - * \brief The induced edge ids. - * \note This is also a map from the new edge id to the edge id in the parent graph. - */ - IdArray induced_edges; -}; - /*! * \brief When we sample a subgraph, we need to store extra information, * such as the layer Ids of the vertices and the sampling probability. */ -struct SampledSubgraph: public ImmutableSubgraph { +struct SampledSubgraph: public Subgraph { /*! * \brief the layer of a sampled vertex in the subgraph. */ diff --git a/src/graph/graph.cc b/src/graph/graph.cc index 56d7c94c89d2..f66024ea1b6f 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -417,7 +417,7 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const { } Subgraph rst; rst.induced_vertices = vids; - rst.graph.AddVertices(len); + rst.graph->AddVertices(len); for (int64_t i = 0; i < len; ++i) { const dgl_id_t oldvid = vid_data[i]; const dgl_id_t newvid = i; @@ -426,7 +426,7 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const { if (oldv2newv.count(oldsucc)) { const dgl_id_t newsucc = oldv2newv[oldsucc]; edges.push_back(adjlist_[oldvid].edge_id[j]); - rst.graph.AddEdge(newvid, newsucc); + rst.graph->AddEdge(newvid, newsucc); } } } @@ -454,12 +454,12 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { Subgraph rst; rst.induced_edges = eids; - rst.graph.AddVertices(nodes.size()); + rst.graph->AddVertices(nodes.size()); for (int64_t i = 0; i < len; ++i) { dgl_id_t src_id = all_edges_src_[eid_data[i]]; dgl_id_t dst_id = all_edges_dst_[eid_data[i]]; - rst.graph.AddEdge(oldv2newv[src_id], oldv2newv[dst_id]); + rst.graph->AddEdge(oldv2newv[src_id], oldv2newv[dst_id]); } rst.induced_vertices = IdArray::Empty( @@ -469,9 +469,9 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { return rst; } -Graph Graph::Reverse() const { +GraphInterface::ptr Graph::Reverse() const { LOG(FATAL) << "not implemented"; - return *this; + return nullptr; } } // namespace dgl diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index a1c98fdb6a4f..cf1f32e60558 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -58,28 +58,7 @@ PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) { auto body = [sg] (DGLArgs args, DGLRetValue* rv) { const int which = args[0]; if (which == 0) { - Graph* gptr = new Graph(); - *gptr = std::move(sg.graph); - GraphHandle ghandle = gptr; - *rv = ghandle; - } else if (which == 1) { - *rv = std::move(sg.induced_vertices); - } else if (which == 2) { - *rv = std::move(sg.induced_edges); - } else { - LOG(FATAL) << "invalid choice"; - } - }; - return PackedFunc(body); -} - -// Convert Subgraph structure to PackedFunc. -PackedFunc ConvertSubgraphToPackedFunc(const ImmutableSubgraph& sg) { - auto body = [sg] (DGLArgs args, DGLRetValue* rv) { - const int which = args[0]; - if (which == 0) { - ImmutableGraph* gptr = new ImmutableGraph(); - *gptr = std::move(sg.graph); + Graph* gptr = sg.graph->reset(); GraphHandle ghandle = gptr; *rv = ghandle; } else if (which == 1) { @@ -98,8 +77,8 @@ PackedFunc ConvertSubgraphToPackedFunc(const std::vector& sg) { auto body = [sg] (DGLArgs args, DGLRetValue* rv) { const int which = args[0]; if (which < sg.size()) { - ImmutableGraph* gptr = new ImmutableGraph(); - *gptr = std::move(sg[which].graph); + Graph* gptr = sg[which].graph->reset(); + *gptr = std::move(); GraphHandle ghandle = gptr; *rv = ghandle; } else if (which >= sg.size() && which < sg.size() * 2) { diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index cc4b88003e1d..ceb7721739b4 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -455,27 +455,28 @@ ImmutableGraph::EdgeArray ImmutableGraph::Edges(bool sorted) const { return ImmutableGraph::EdgeArray{rst_src, rst_dst, rst_eid}; } -ImmutableSubgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { - ImmutableSubgraph subg; +Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { + Subgraph subg; std::pair ret; // We prefer to generate a subgraph for out-csr first. if (out_csr_) { ret = out_csr_->VertexSubgraph(vids); - subg.graph = ImmutableGraph(nullptr, ret.first, IsMultigraph()); + subg.graph = GraphInterface::ptr(new ImmutableGraph(nullptr, ret.first, IsMultigraph())); } else { assert(in_csr_); ret = in_csr_->VertexSubgraph(vids); // When we generate a subgraph, it may be used by only accessing in-edges or out-edges. // We don't need to generate both. - subg.graph = ImmutableGraph(ret.first, nullptr, IsMultigraph()); + subg.graph = GraphInterface::ptr(new ImmutableGraph(ret.first, nullptr, IsMultigraph())); } subg.induced_vertices = vids; subg.induced_edges = ret.second; return subg; } -ImmutableSubgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const { - return ImmutableSubgraph(); +Subgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const { + LOG(FATAL) << "not implemented"; + return Subgraph(); } ImmutableGraph::CSRArray ImmutableGraph::GetInCSRArray() const { From bd1648777464e2684b7784fba74149fbf2d96f79 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Thu, 10 Jan 2019 23:08:42 +0800 Subject: [PATCH 24/75] fix bugs. --- include/dgl/graph.h | 2 +- include/dgl/graph_interface.h | 23 ++- include/dgl/immutable_graph.h | 2 +- src/graph/graph_apis.cc | 271 +++++----------------------------- src/graph/immutable_graph.cc | 6 +- 5 files changed, 67 insertions(+), 237 deletions(-) diff --git a/include/dgl/graph.h b/include/dgl/graph.h index b1b2aefa23a4..8cfad4d43790 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -343,7 +343,7 @@ class Graph: public GraphInterface { */ virtual GraphInterface *Reset() { Graph* gptr = new Graph(); - *gptr = std::move(this); + *gptr = std::move(*this); return gptr; } diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index 7d368074dcf3..5a98c087ba62 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -31,8 +31,27 @@ class GraphInterface { typedef std::shared_ptr ptr; - typedef std::pair::const_iterator, - std::vector::const_iterator> dgl_id_iters; + class dgl_id_iters { + std::vector::const_iterator b, e; + public: + dgl_id_iters(std::vector::const_iterator begin, + std::vector::const_iterator end) { + this->b = begin; + this->e = end; + } + std::vector::const_iterator begin() const { + return this->b; + } + std::vector::const_iterator end() const { + return this->e; + } + dgl_id_t operator[](int64_t i) const { + return *(this->b + i); + } + size_t size() const { + return this->e - this->b; + } + }; virtual ~GraphInterface() { } diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 7947888cd7e0..f9d325d31106 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -433,7 +433,7 @@ class ImmutableGraph: public GraphInterface { */ virtual GraphInterface *Reset() { ImmutableGraph* gptr = new ImmutableGraph(); - *gptr = std::move(this); + *gptr = std::move(*this); return gptr; } diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index cf1f32e60558..ae1d860e9af3 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -58,7 +58,7 @@ PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) { auto body = [sg] (DGLArgs args, DGLRetValue* rv) { const int which = args[0]; if (which == 0) { - Graph* gptr = sg.graph->reset(); + GraphInterface* gptr = sg.graph->Reset(); GraphHandle ghandle = gptr; *rv = ghandle; } else if (which == 1) { @@ -77,8 +77,7 @@ PackedFunc ConvertSubgraphToPackedFunc(const std::vector& sg) { auto body = [sg] (DGLArgs args, DGLRetValue* rv) { const int which = args[0]; if (which < sg.size()) { - Graph* gptr = sg[which].graph->reset(); - *gptr = std::move(); + GraphInterface* gptr = sg[which].graph->Reset(); GraphHandle ghandle = gptr; *rv = ghandle; } else if (which >= sg.size() && which < sg.size() * 2) { @@ -110,14 +109,14 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFree") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - Graph* gptr = static_cast(ghandle); + GraphInterface* gptr = static_cast(ghandle); delete gptr; }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddVertices") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - Graph* gptr = static_cast(ghandle); + GraphInterface* gptr = static_cast(ghandle); uint64_t num_vertices = args[1]; gptr->AddVertices(num_vertices); }); @@ -125,7 +124,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddVertices") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdge") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - Graph* gptr = static_cast(ghandle); + GraphInterface* gptr = static_cast(ghandle); const dgl_id_t src = args[1]; const dgl_id_t dst = args[2]; gptr->AddEdge(src, dst); @@ -134,7 +133,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdge") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - Graph* gptr = static_cast(ghandle); + GraphInterface* gptr = static_cast(ghandle); const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); gptr->AddEdges(src, dst); @@ -143,7 +142,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphAddEdges") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphClear") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - Graph* gptr = static_cast(ghandle); + GraphInterface* gptr = static_cast(ghandle); gptr->Clear(); }); @@ -151,28 +150,28 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphIsMultigraph") .set_body([] (DGLArgs args, DGLRetValue *rv) { GraphHandle ghandle = args[0]; // NOTE: not const since we have caches - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); *rv = gptr->IsMultigraph(); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumVertices") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); *rv = static_cast(gptr->NumVertices()); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumEdges") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); *rv = static_cast(gptr->NumEdges()); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertex") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const dgl_id_t vid = args[1]; *rv = gptr->HasVertex(vid); }); @@ -180,7 +179,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertex") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasVertices") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); *rv = gptr->HasVertices(vids); }); @@ -195,7 +194,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLMapSubgraphNID") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const dgl_id_t src = args[1]; const dgl_id_t dst = args[2]; *rv = gptr->HasEdgeBetween(src, dst); @@ -204,7 +203,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgeBetween") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); *rv = gptr->HasEdgesBetween(src, dst); @@ -213,7 +212,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphHasEdgesBetween") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphPredecessors") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const dgl_id_t vid = args[1]; const uint64_t radius = args[2]; *rv = gptr->Predecessors(vid, radius); @@ -222,7 +221,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphPredecessors") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphSuccessors") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const dgl_id_t vid = args[1]; const uint64_t radius = args[2]; *rv = gptr->Successors(vid, radius); @@ -231,7 +230,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphSuccessors") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeId") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const dgl_id_t src = args[1]; const dgl_id_t dst = args[2]; *rv = gptr->EdgeId(src, dst); @@ -240,7 +239,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeId") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); *rv = ConvertEdgeArrayToPackedFunc(gptr->EdgeIds(src, dst)); @@ -249,7 +248,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeIds") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); *rv = ConvertEdgeArrayToPackedFunc(gptr->FindEdges(eids)); }); @@ -257,7 +256,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFindEdges") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_1") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const dgl_id_t vid = args[1]; *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vid)); }); @@ -265,7 +264,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_1") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vids)); }); @@ -273,7 +272,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInEdges_2") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_1") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const dgl_id_t vid = args[1]; *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vid)); }); @@ -281,7 +280,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_1") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vids)); }); @@ -289,7 +288,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutEdges_2") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdges") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const bool sorted = args[1]; *rv = ConvertEdgeArrayToPackedFunc(gptr->Edges(sorted)); }); @@ -297,7 +296,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdges") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegree") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const dgl_id_t vid = args[1]; *rv = static_cast(gptr->InDegree(vid)); }); @@ -305,7 +304,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegree") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); *rv = gptr->InDegrees(vids); }); @@ -313,7 +312,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegrees") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegree") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const dgl_id_t vid = args[1]; *rv = static_cast(gptr->OutDegree(vid)); }); @@ -321,7 +320,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegree") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); *rv = gptr->OutDegrees(vids); }); @@ -329,7 +328,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphOutDegrees") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); + const GraphInterface* gptr = static_cast(ghandle); const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); *rv = ConvertSubgraphToPackedFunc(gptr->VertexSubgraph(vids)); }); @@ -337,7 +336,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphVertexSubgraph") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdgeSubgraph") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph *gptr = static_cast(ghandle); + const GraphInterface *gptr = static_cast(ghandle); const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); *rv = ConvertSubgraphToPackedFunc(gptr->EdgeSubgraph(eids)); }); @@ -349,6 +348,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion") int list_size = args[1]; std::vector graphs; for (int i = 0; i < list_size; ++i) { +// const GraphInterface *ptr = static_cast(inhandles[i]); const Graph* gr = static_cast(inhandles[i]); graphs.push_back(gr); } @@ -361,7 +361,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); +// const GraphInterface *ptr = static_cast(ghandle); + const Graph* gptr = static_cast(ghandle); int64_t num = args[1]; std::vector&& rst = GraphOp::DisjointPartitionByNum(gptr, num); // return the pointer array as an integer array @@ -379,7 +380,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; - const Graph* gptr = static_cast(ghandle); +// const GraphInterface *ptr = static_cast(ghandle); + const Graph* gptr = static_cast(ghandle); const IdArray sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); std::vector&& rst = GraphOp::DisjointPartitionBySizes(gptr, sizes); // return the pointer array as an integer array @@ -398,7 +400,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; bool backtracking = args[1]; - const Graph* gptr = static_cast(ghandle); +// const GraphInterface *ptr = static_cast(ghandle); + const Graph* gptr = static_cast(ghandle); Graph* lgptr = new Graph(); *lgptr = GraphOp::LineGraph(gptr, backtracking); GraphHandle lghandle = lgptr; @@ -418,205 +421,12 @@ DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphCreate") *rv = ghandle; }); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphFree") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - ImmutableGraph* gptr = static_cast(ghandle); - delete gptr; - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphIsMultigraph") -.set_body([] (DGLArgs args, DGLRetValue *rv) { - GraphHandle ghandle = args[0]; - // NOTE: not const since we have caches - const ImmutableGraph* gptr = static_cast(ghandle); - *rv = gptr->IsMultigraph(); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphNumVertices") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - *rv = static_cast(gptr->NumVertices()); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphNumEdges") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - *rv = static_cast(gptr->NumEdges()); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphHasVertex") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const dgl_id_t vid = args[1]; - *rv = gptr->HasVertex(vid); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphHasVertices") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - *rv = gptr->HasVertices(vids); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLExpandIds") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - const IdArray ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); - const IdArray offsets = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - *rv = GraphOp::ExpandIds(ids, offsets); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphHasEdgeBetween") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const dgl_id_t src = args[1]; - const dgl_id_t dst = args[2]; - *rv = gptr->HasEdgeBetween(src, dst); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphHasEdgesBetween") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); - *rv = gptr->HasEdgesBetween(src, dst); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphPredecessors") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const dgl_id_t vid = args[1]; - const uint64_t radius = args[2]; - *rv = gptr->Predecessors(vid, radius); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphSuccessors") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const dgl_id_t vid = args[1]; - const uint64_t radius = args[2]; - *rv = gptr->Successors(vid, radius); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphEdgeId") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const dgl_id_t src = args[1]; - const dgl_id_t dst = args[2]; - *rv = gptr->EdgeId(src, dst); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphEdgeIds") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const IdArray src = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - const IdArray dst = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); - *rv = ConvertEdgeArrayToPackedFunc(gptr->EdgeIds(src, dst)); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphInEdges_1") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const dgl_id_t vid = args[1]; - *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vid)); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphInEdges_2") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - *rv = ConvertEdgeArrayToPackedFunc(gptr->InEdges(vids)); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphOutEdges_1") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const dgl_id_t vid = args[1]; - *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vid)); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphOutEdges_2") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - *rv = ConvertEdgeArrayToPackedFunc(gptr->OutEdges(vids)); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphEdges") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const bool sorted = args[1]; - *rv = ConvertEdgeArrayToPackedFunc(gptr->Edges(sorted)); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphInDegree") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const dgl_id_t vid = args[1]; - *rv = static_cast(gptr->InDegree(vid)); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphInDegrees") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - *rv = gptr->InDegrees(vids); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphOutDegree") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const dgl_id_t vid = args[1]; - *rv = static_cast(gptr->OutDegree(vid)); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphOutDegrees") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - *rv = gptr->OutDegrees(vids); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphVertexSubgraph") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph* gptr = static_cast(ghandle); - const IdArray vids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - *rv = ConvertSubgraphToPackedFunc(gptr->VertexSubgraph(vids)); - }); - -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphEdgeSubgraph") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - const ImmutableGraph *gptr = static_cast(ghandle); - const IdArray eids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - *rv = ConvertSubgraphToPackedFunc(gptr->EdgeSubgraph(eids)); - }); - DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphGetCSR") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; bool transpose = args[1]; - const ImmutableGraph *gptr = static_cast(ghandle); +// const GraphInterface *ptr = static_cast(ghandle); + const ImmutableGraph *gptr = static_cast(ghandle); ImmutableGraph::CSRArray csr; if (transpose) { csr = gptr->GetOutCSRArray(); @@ -636,7 +446,8 @@ void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) { int num_hops = args[num_seeds + 2]; int num_neighbors = args[num_seeds + 3]; int num_valid_seeds = args[num_seeds + 4]; - const ImmutableGraph *gptr = static_cast(ghandle); +// const GraphInterface *ptr = static_cast(ghandle); + const ImmutableGraph *gptr = static_cast(ghandle); assert(num_valid_seeds <= num_seeds); std::vector subgs(seeds.size()); #pragma omp parallel for diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index ceb7721739b4..7f24521e3e74 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -901,9 +901,9 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, subg_csr->edge_ids[i] = i; if (neigh_type == "in") - subg.graph = ImmutableGraph(subg_csr, nullptr, IsMultigraph()); + subg.graph = GraphInterface::ptr(new ImmutableGraph(subg_csr, nullptr, IsMultigraph())); else - subg.graph = ImmutableGraph(nullptr, subg_csr, IsMultigraph()); + subg.graph = GraphInterface::ptr(new ImmutableGraph(nullptr, subg_csr, IsMultigraph())); return subg; } @@ -938,7 +938,7 @@ SampledSubgraph ImmutableGraph::NeighborUniformSample(IdArray seeds, neigh_type, num_hops, expand_factor); - ret.graph.CompactSubgraph(ret.induced_vertices); + std::static_pointer_cast(ret.graph)->CompactSubgraph(ret.induced_vertices); return ret; } From cfaa487ee1559f89322f37d689770424d7dbafa6 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Thu, 10 Jan 2019 23:21:05 +0800 Subject: [PATCH 25/75] fix compile. --- include/dgl/graph_interface.h | 46 +++++++++++++++++------------------ src/graph/graph_apis.cc | 24 +++++++++--------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index 5a98c087ba62..bd31b4f28ecf 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -16,6 +16,28 @@ typedef dgl::runtime::NDArray IntArray; struct Subgraph; +class dgl_id_iters { + std::vector::const_iterator b, e; + public: + dgl_id_iters(std::vector::const_iterator begin, + std::vector::const_iterator end) { + this->b = begin; + this->e = end; + } + std::vector::const_iterator begin() const { + return this->b; + } + std::vector::const_iterator end() const { + return this->e; + } + dgl_id_t operator[](int64_t i) const { + return *(this->b + i); + } + size_t size() const { + return this->e - this->b; + } +}; + /*! * \brief dgl graph index interface. * @@ -31,28 +53,6 @@ class GraphInterface { typedef std::shared_ptr ptr; - class dgl_id_iters { - std::vector::const_iterator b, e; - public: - dgl_id_iters(std::vector::const_iterator begin, - std::vector::const_iterator end) { - this->b = begin; - this->e = end; - } - std::vector::const_iterator begin() const { - return this->b; - } - std::vector::const_iterator end() const { - return this->e; - } - dgl_id_t operator[](int64_t i) const { - return *(this->b + i); - } - size_t size() const { - return this->e - this->b; - } - }; - virtual ~GraphInterface() { } @@ -90,7 +90,7 @@ class GraphInterface { virtual bool IsMultigraph() const = 0; /*! \return the number of vertices in the graph.*/ - virtual uint64_t NumVertices() const; + virtual uint64_t NumVertices() const = 0; /*! \return the number of edges in the graph.*/ virtual uint64_t NumEdges() const = 0; diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index ae1d860e9af3..b948c3060342 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -348,8 +348,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion") int list_size = args[1]; std::vector graphs; for (int i = 0; i < list_size; ++i) { -// const GraphInterface *ptr = static_cast(inhandles[i]); - const Graph* gr = static_cast(inhandles[i]); + const GraphInterface *ptr = static_cast(inhandles[i]); + const Graph* gr = dynamic_cast(ptr); graphs.push_back(gr); } Graph* gptr = new Graph(); @@ -361,8 +361,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; -// const GraphInterface *ptr = static_cast(ghandle); - const Graph* gptr = static_cast(ghandle); + const GraphInterface *ptr = static_cast(ghandle); + const Graph* gptr = dynamic_cast(ptr); int64_t num = args[1]; std::vector&& rst = GraphOp::DisjointPartitionByNum(gptr, num); // return the pointer array as an integer array @@ -380,8 +380,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; -// const GraphInterface *ptr = static_cast(ghandle); - const Graph* gptr = static_cast(ghandle); + const GraphInterface *ptr = static_cast(ghandle); + const Graph* gptr = dynamic_cast(ptr); const IdArray sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); std::vector&& rst = GraphOp::DisjointPartitionBySizes(gptr, sizes); // return the pointer array as an integer array @@ -400,8 +400,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; bool backtracking = args[1]; -// const GraphInterface *ptr = static_cast(ghandle); - const Graph* gptr = static_cast(ghandle); + const GraphInterface *ptr = static_cast(ghandle); + const Graph* gptr = dynamic_cast(ptr); Graph* lgptr = new Graph(); *lgptr = GraphOp::LineGraph(gptr, backtracking); GraphHandle lghandle = lgptr; @@ -425,8 +425,8 @@ DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphGetCSR") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; bool transpose = args[1]; -// const GraphInterface *ptr = static_cast(ghandle); - const ImmutableGraph *gptr = static_cast(ghandle); + const GraphInterface *ptr = static_cast(ghandle); + const ImmutableGraph *gptr = dynamic_cast(ptr); ImmutableGraph::CSRArray csr; if (transpose) { csr = gptr->GetOutCSRArray(); @@ -446,8 +446,8 @@ void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) { int num_hops = args[num_seeds + 2]; int num_neighbors = args[num_seeds + 3]; int num_valid_seeds = args[num_seeds + 4]; -// const GraphInterface *ptr = static_cast(ghandle); - const ImmutableGraph *gptr = static_cast(ghandle); + const GraphInterface *ptr = static_cast(ghandle); + const ImmutableGraph *gptr = dynamic_cast(ptr); assert(num_valid_seeds <= num_seeds); std::vector subgs(seeds.size()); #pragma omp parallel for From d06bda7d97015bcd4621f872ed0c77ffbd982098 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 11 Jan 2019 00:05:14 +0800 Subject: [PATCH 26/75] move immutable graph. --- python/dgl/graph_index.py | 401 +++++++++++++- python/dgl/immutable_graph_index.py | 829 ---------------------------- src/graph/graph_apis.cc | 20 +- 3 files changed, 410 insertions(+), 840 deletions(-) delete mode 100644 python/dgl/immutable_graph_index.py diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 3fce05bf0b12..2e22210380af 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -11,7 +11,6 @@ from .base import DGLError from . import backend as F from . import utils -from .immutable_graph_index import create_immutable_graph_index GraphIndexHandle = ctypes.c_void_p @@ -936,4 +935,404 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): return gidx + +class ImmutableGraphIndex(GraphIndex): + """Graph index object on immutable graphs. + + Parameters + ---------- + backend_csr: a csr array provided by the backend framework. + """ + def __init__(self, handle): + self._handle = handle + self._cache = {} + + def init(self, src_ids, dst_ids, edge_ids, num_nodes): + """The actual init function""" + self._handle = _CAPI_DGLGraphCreateImmutable(src_ids.todgltensor(), dst_ids.todgltensor(), + edge_ids.todgltensor(), False, num_nodes) + + def add_nodes(self, num): + """Add nodes. + + Parameters + ---------- + num : int + Number of nodes to be added. + """ + raise DGLError('Immutable graph doesn\'t support adding nodes') + + def add_edge(self, u, v): + """Add one edge. + + Parameters + ---------- + u : int + The src node. + v : int + The dst node. + """ + raise DGLError('Immutable graph doesn\'t support adding an edge') + + def add_edges(self, u, v): + """Add many edges. + + Parameters + ---------- + u : utils.Index + The src nodes. + v : utils.Index + The dst nodes. + """ + raise DGLError('Immutable graph doesn\'t support adding edges') + + def clear(self): + """Clear the graph.""" + raise DGLError('Immutable graph doesn\'t support clearing up') + + def is_readonly(self): + """Indicate whether the graph index is read-only. + + Returns + ------- + bool + True if it is a read-only graph, False otherwise. + """ + return True + + def find_edges(self, eid): + """Return a triplet of arrays that contains the edge IDs. + + Parameters + ---------- + eid : utils.Index + The edge ids. + + Returns + ------- + utils.Index + The src nodes. + utils.Index + The dst nodes. + utils.Index + The edge ids. + """ + raise NotImplementedError('immutable graph doesn\'t implement find_edges for now.') + + def edge_subgraph(self, e): + """Return the induced edge subgraph. + + Parameters + ---------- + e : utils.Index + The edges. + + Returns + ------- + SubgraphIndex + The subgraph index. + """ + raise NotImplementedError('immutable graph doesn\'t implement edge_subgraph for now.') + + def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, node_prob): + """Neighborhood sampling""" + if len(seed_ids) == 0: + return [] + + seed_ids = [v.todgltensor() for v in seed_ids] + num_subgs = len(seed_ids) + if node_prob is None: + rst = _uniform_sampling(self, seed_ids, neighbor_type, num_hops, expand_factor) + else: + rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops, + expand_factor) + + return [ImmutableSubgraphIndex(rst(i), self, rst(num_subgs + i), + rst(num_subgs * 2 + i)) for i in range(num_subgs)] + + def adjacency_matrix(self, transpose=False, ctx=F.cpu()): + """Return the adjacency matrix representation of this graph. + + By default, a row of returned adjacency matrix represents the destination + of an edge and the column represents the source. + + When transpose is True, a row represents the source and a column represents + a destination. + + Parameters + ---------- + transpose : bool + A flag to tranpose the returned adjacency matrix. + + Returns + ------- + SparseTensor + The adjacency matrix. + utils.Index + A index for data shuffling due to sparse format change. Return None + if shuffle is not required. + """ + rst = _CAPI_DGLGraphGetCSR(self._handle, transpose) + indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) + indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) + shuffle = utils.toindex(rst(2)) + dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) + return F.sparse_matrix(dat, ('csr', indices, indptr), + (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle + + def incidence_matrix(self, typestr, ctx): + """Return the incidence matrix representation of this graph. + + An incidence matrix is an n x m sparse matrix, where n is + the number of nodes and m is the number of edges. Each nnz + value indicating whether the edge is incident to the node + or not. + + There are three types of an incidence matrix `I`: + * "in": + - I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e); + - I[v, e] = 0 otherwise. + * "out": + - I[v, e] = 1 if e is the out-edge of v (or v is the src node of e); + - I[v, e] = 0 otherwise. + * "both": + - I[v, e] = 1 if e is the in-edge of v; + - I[v, e] = -1 if e is the out-edge of v; + - I[v, e] = 0 otherwise (including self-loop). + + Parameters + ---------- + typestr : str + Can be either "in", "out" or "both" + ctx : context + The context of returned incidence matrix. + + Returns + ------- + SparseTensor + The incidence matrix. + utils.Index + A index for data shuffling due to sparse format change. Return None + if shuffle is not required. + """ + raise NotImplementedError('immutable graph doesn\'t implement incidence_matrix for now.') + + def to_networkx(self): + """Convert to networkx graph. + + The edge id will be saved as the 'id' edge attribute. + + Returns + ------- + networkx.DiGraph + The nx graph + """ + src, dst, eid = self.edges() + ret = nx.DiGraph() + for u, v, e in zip(src, dst, eid): + ret.add_edge(u, v, id=e) + return ret + + def from_networkx(self, nx_graph): + """Convert from networkx graph. + + If 'id' edge attribute exists, the edge will be added follows + the edge id order. Otherwise, order is undefined. + + Parameters + ---------- + nx_graph : networkx.DiGraph + The nx graph + """ + if not isinstance(nx_graph, nx.Graph): + nx_graph = nx.DiGraph(nx_graph) + else: + nx_graph = nx_graph.to_directed() + + assert nx_graph.number_of_edges() > 0, "can't create an empty immutable graph" + + # nx_graph.edges(data=True) returns src, dst, attr_dict + has_edge_id = 'id' in next(iter(nx_graph.edges(data=True)))[-1] + if has_edge_id: + num_edges = nx_graph.number_of_edges() + src = np.zeros((num_edges,), dtype=np.int64) + dst = np.zeros((num_edges,), dtype=np.int64) + for u, v, attr in nx_graph.edges(data=True): + eid = attr['id'] + src[eid] = u + dst[eid] = v + else: + src = [] + dst = [] + for e in nx_graph.edges: + src.append(e[0]) + dst.append(e[1]) + eid = np.arange(0, len(src), dtype=np.int64) + + num_nodes = nx_graph.number_of_nodes() + # We store edge Ids as an edge attribute. + eid = utils.toindex(eid) + src = utils.toindex(src) + dst = utils.toindex(dst) + self.init(src, dst, eid, num_nodes) + + def from_scipy_sparse_matrix(self, adj): + """Convert from scipy sparse matrix. + + NOTE: we assume the row is src nodes and the col is dst nodes. + + Parameters + ---------- + adj : scipy sparse matrix + """ + assert isinstance(adj, (scipy.sparse.csr_matrix, scipy.sparse.coo_matrix)), \ + "The input matrix has to be a SciPy sparse matrix." + num_nodes = max(adj.shape[0], adj.shape[1]) + out_mat = adj.tocoo() + src_ids = utils.toindex(out_mat.row) + dst_ids = utils.toindex(out_mat.col) + edge_ids = utils.toindex(F.arange(0, len(out_mat.row))) + self.init(src_ids, dst_ids, edge_ids, num_nodes) + + def from_edge_list(self, elist): + """Convert from an edge list. + + Paramters + --------- + elist : list + List of (u, v) edge tuple. + """ + src, dst = zip(*elist) + src = np.array(src) + dst = np.array(dst) + src_ids = utils.toindex(src) + dst_ids = utils.toindex(dst) + num_nodes = max(src.max(), dst.max()) + 1 + edge_ids = utils.toindex(F.arange(0, len(src))) + # TODO we need to detect multigraph automatically. + self.init(src_ids, dst_ids, edge_ids, num_nodes) + + def line_graph(self, backtracking=True): + """Return the line graph of this graph. + + Parameters + ---------- + backtracking : bool, optional (default=False) + Whether (i, j) ~ (j, i) in L(G). + (i, j) ~ (j, i) is the behavior of networkx.line_graph. + + Returns + ------- + ImmutableGraphIndex + The line graph of this graph. + """ + raise NotImplementedError('immutable graph doesn\'t implement line_graph') + +class ImmutableSubgraphIndex(ImmutableGraphIndex): + """Graph index for an immutable subgraph. + + Parameters + ---------- + backend_sparse : a sparse matrix from the backend framework. + The sparse matrix that represents a subgraph. + paranet : GraphIndex + The parent graph index. + induced_nodes : tensor + The parent node ids in this subgraph. + induced_edges : a lambda function that returns a tensor + The parent edge ids in this subgraph. + """ + def __init__(self, handle, parent, induced_nodes, induced_edges): + super(ImmutableSubgraphIndex, self).__init__(handle) + + self._parent = parent + self._induced_nodes = induced_nodes + self._induced_edges = induced_edges + + @property + def induced_edges(self): + """Return parent edge ids. + + Returns + ------- + A lambda function that returns utils.Index + The parent edge ids. + """ + return utils.toindex(self._induced_edges) + + @property + def induced_nodes(self): + """Return parent node ids. + + Returns + ------- + utils.Index + The parent node ids. + """ + return utils.toindex(self._induced_nodes) + +def create_immutable_graph_index(graph_data=None): + """Create a graph index object. + + Parameters + ---------- + graph_data : graph data, optional + Data to initialize graph. Same as networkx's semantics. + """ + if isinstance(graph_data, ImmutableGraphIndex): + return graph_data + + # Let's create an empty graph index first. + gidx = ImmutableGraphIndex(None) + + # edge list + if isinstance(graph_data, (list, tuple)): + try: + gidx.from_edge_list(graph_data) + return gidx + except Exception: # pylint: disable=broad-except + raise DGLError('Graph data is not a valid edge list for immutable_graph_index.') + + # scipy format + if isinstance(graph_data, scipy.sparse.spmatrix): + try: + gidx.from_scipy_sparse_matrix(graph_data) + return gidx + except Exception as e: # pylint: disable=broad-except + print(e) + raise DGLError('Graph data is not a valid scipy sparse matrix.') + + # networkx - any format + try: + gidx.from_networkx(graph_data) + except Exception: # pylint: disable=broad-except + raise DGLError('Error while creating graph from input of type "%s".' + % type(graph_data)) + + return gidx + _init_api("dgl.graph_index") + +_NEIGHBOR_SAMPLING_APIS = { + 1: _CAPI_DGLGraphUniformSampling, + 2: _CAPI_DGLGraphUniformSampling2, + 4: _CAPI_DGLGraphUniformSampling4, + 8: _CAPI_DGLGraphUniformSampling8, + 16: _CAPI_DGLGraphUniformSampling16, + 32: _CAPI_DGLGraphUniformSampling32, + 64: _CAPI_DGLGraphUniformSampling64, + 128: _CAPI_DGLGraphUniformSampling128, +} + +_EMPTY_ARRAYS = [utils.toindex(F.ones(shape=(0), dtype=F.int64, ctx=F.cpu()))] + +def _uniform_sampling(gidx, seed_ids, neigh_type, num_hops, expand_factor): + num_seeds = len(seed_ids) + empty_ids = [] + if len(seed_ids) > 1 and len(seed_ids) not in _NEIGHBOR_SAMPLING_APIS.keys(): + remain = 2**int(math.ceil(math.log2(len(dgl_ids)))) - len(dgl_ids) + empty_ids = _EMPTY_ARRAYS[0:remain] + seed_ids.extend([empty.todgltensor() for empty in empty_ids]) + assert len(seed_ids) in _NEIGHBOR_SAMPLING_APIS.keys() + return _NEIGHBOR_SAMPLING_APIS[len(seed_ids)](gidx._handle, *seed_ids, neigh_type, + num_hops, expand_factor, num_seeds) diff --git a/python/dgl/immutable_graph_index.py b/python/dgl/immutable_graph_index.py deleted file mode 100644 index a90bd0fd2839..000000000000 --- a/python/dgl/immutable_graph_index.py +++ /dev/null @@ -1,829 +0,0 @@ -"""Module for immutable graph index. - -NOTE: this is currently a temporary solution. -""" -# pylint: disable=abstract-method,unused-argument - -from __future__ import absolute_import - -import numpy as np -import networkx as nx -import scipy.sparse as sp - -from ._ffi.function import _init_api -from . import backend as F -from . import utils -from .base import DGLError - -class ImmutableGraphIndex(object): - """Graph index object on immutable graphs. - - Parameters - ---------- - backend_csr: a csr array provided by the backend framework. - """ - def __init__(self, handle): - self._handle = handle - self._num_nodes = None - self._num_edges = None - self._cache = {} - - def init(self, src_ids, dst_ids, edge_ids, num_nodes): - """The actual init function""" - self._handle = _CAPI_DGLGraphCreate(src_ids.todgltensor(), dst_ids.todgltensor(), - edge_ids.todgltensor(), False, num_nodes) - self._num_nodes = num_nodes - self._num_edges = None - - def __del__(self): - """Free this graph index object.""" - if self._handle is not None: - _CAPI_DGLGraphFree(self._handle) - - def add_nodes(self, num): - """Add nodes. - - Parameters - ---------- - num : int - Number of nodes to be added. - """ - raise DGLError('Immutable graph doesn\'t support adding nodes') - - def add_edge(self, u, v): - """Add one edge. - - Parameters - ---------- - u : int - The src node. - v : int - The dst node. - """ - raise DGLError('Immutable graph doesn\'t support adding an edge') - - def add_edges(self, u, v): - """Add many edges. - - Parameters - ---------- - u : utils.Index - The src nodes. - v : utils.Index - The dst nodes. - """ - raise DGLError('Immutable graph doesn\'t support adding edges') - - def clear(self): - """Clear the graph.""" - raise DGLError('Immutable graph doesn\'t support clearing up') - - def is_multigraph(self): - """Return whether the graph is a multigraph - - Returns - ------- - bool - True if it is a multigraph, False otherwise. - """ - return bool(_CAPI_DGLGraphIsMultigraph(self._handle)) - - def is_readonly(self): - """Indicate whether the graph index is read-only. - - Returns - ------- - bool - True if it is a read-only graph, False otherwise. - """ - return True - - def number_of_nodes(self): - """Return the number of nodes. - - Returns - ------- - int - The number of nodes - """ - if self._num_nodes is None: - self._num_nodes = _CAPI_DGLGraphNumVertices(self._handle) - return self._num_nodes - - def number_of_edges(self): - """Return the number of edges. - - Returns - ------- - int - The number of edges - """ - if self._num_edges is None: - self._num_edges = _CAPI_DGLGraphNumEdges(self._handle) - return self._num_edges - - def has_node(self, vid): - """Return true if the node exists. - - Parameters - ---------- - vid : int - The nodes - - Returns - ------- - bool - True if the node exists - """ - return bool(_CAPI_DGLGraphHasVertex(self._handle, vid)) - - def has_nodes(self, vids): - """Return true if the nodes exist. - - Parameters - ---------- - vid : utils.Index - The nodes - - Returns - ------- - utils.Index - 0-1 array indicating existence - """ - vid_array = vids.todgltensor() - return utils.toindex(_CAPI_DGLGraphHasVertices(self._handle, vid_array)) - - def has_edge_between(self, u, v): - """Return true if the edge exists. - - Parameters - ---------- - u : int - The src node. - v : int - The dst node. - - Returns - ------- - bool - True if the edge exists - """ - return bool(_CAPI_DGLGraphHasEdgeBetween(self._handle, u, v)) - - def has_edges_between(self, u, v): - """Return true if the edge exists. - - Parameters - ---------- - u : utils.Index - The src nodes. - v : utils.Index - The dst nodes. - - Returns - ------- - utils.Index - 0-1 array indicating existence - """ - u_array = u.todgltensor() - v_array = v.todgltensor() - return utils.toindex(_CAPI_DGLGraphHasEdgesBetween(self._handle, u_array, v_array)) - - def predecessors(self, v, radius=1): - """Return the predecessors of the node. - - Parameters - ---------- - v : int - The node. - radius : int, optional - The radius of the neighborhood. - - Returns - ------- - utils.Index - Array of predecessors - """ - return utils.toindex(_CAPI_DGLGraphPredecessors(self._handle, v, radius)) - - def successors(self, v, radius=1): - """Return the successors of the node. - - Parameters - ---------- - v : int - The node. - radius : int, optional - The radius of the neighborhood. - - Returns - ------- - utils.Index - Array of successors - """ - return utils.toindex(_CAPI_DGLGraphSuccessors(self._handle, v, radius)) - - def edge_id(self, u, v): - """Return the id of the edge. - - Parameters - ---------- - u : int - The src node. - v : int - The dst node. - - Returns - ------- - int - The edge id. - """ - return utils.toindex(_CAPI_DGLGraphEdgeId(self._handle, u, v)) - - def edge_ids(self, u, v): - """Return the edge ids. - - Parameters - ---------- - u : utils.Index - The src nodes. - v : utils.Index - The dst nodes. - - Returns - ------- - utils.Index - The src nodes. - utils.Index - The dst nodes. - utils.Index - The edge ids. - """ - u_array = u.todgltensor() - v_array = v.todgltensor() - edge_array = _CAPI_DGLGraphEdgeIds(self._handle, u_array, v_array) - - src = utils.toindex(edge_array(0)) - dst = utils.toindex(edge_array(1)) - eid = utils.toindex(edge_array(2)) - - return src, dst, eid - - def find_edges(self, eid): - """Return a triplet of arrays that contains the edge IDs. - - Parameters - ---------- - eid : utils.Index - The edge ids. - - Returns - ------- - utils.Index - The src nodes. - utils.Index - The dst nodes. - utils.Index - The edge ids. - """ - raise NotImplementedError('immutable graph doesn\'t implement find_edges for now.') - - def in_edges(self, v): - """Return the in edges of the node(s). - - Parameters - ---------- - v : utils.Index - The node(s). - - Returns - ------- - utils.Index - The src nodes. - utils.Index - The dst nodes. - utils.Index - The edge ids. - """ - if len(v) == 1: - edge_array = _CAPI_DGLGraphInEdges_1(self._handle, v[0]) - else: - v_array = v.todgltensor() - edge_array = _CAPI_DGLGraphInEdges_2(self._handle, v_array) - src = utils.toindex(edge_array(0)) - dst = utils.toindex(edge_array(1)) - eid = utils.toindex(edge_array(2)) - return src, dst, eid - - def out_edges(self, v): - """Return the out edges of the node(s). - - Parameters - ---------- - v : utils.Index - The node(s). - - Returns - ------- - utils.Index - The src nodes. - utils.Index - The dst nodes. - utils.Index - The edge ids. - """ - if len(v) == 1: - edge_array = _CAPI_DGLGraphOutEdges_1(self._handle, v[0]) - else: - v_array = v.todgltensor() - edge_array = _CAPI_DGLGraphOutEdges_2(self._handle, v_array) - src = utils.toindex(edge_array(0)) - dst = utils.toindex(edge_array(1)) - eid = utils.toindex(edge_array(2)) - return src, dst, eid - - @utils.cached_member(cache='_cache', prefix='edges') - def edges(self, return_sorted=False): - """Return all the edges - - Parameters - ---------- - return_sorted : bool - True if the returned edges are sorted by their src and dst ids. - - Returns - ------- - utils.Index - The src nodes. - utils.Index - The dst nodes. - utils.Index - The edge ids. - """ - key = 'edges_s%d' % return_sorted - if key not in self._cache: - edge_array = _CAPI_DGLGraphEdges(self._handle, return_sorted) - src = utils.toindex(edge_array(0)) - dst = utils.toindex(edge_array(1)) - eid = utils.toindex(edge_array(2)) - self._cache[key] = (src, dst, eid) - return self._cache[key] - - def in_degree(self, v): - """Return the in degree of the node. - - Parameters - ---------- - v : int - The node. - - Returns - ------- - int - The in degree. - """ - return _CAPI_DGLGraphInDegree(self._handle, v) - - def in_degrees(self, v): - """Return the in degrees of the nodes. - - Parameters - ---------- - v : utils.Index - The nodes. - - Returns - ------- - int - The in degree array. - """ - v_array = v.todgltensor() - return utils.toindex(_CAPI_DGLGraphInDegrees(self._handle, v_array)) - - def out_degree(self, v): - """Return the out degree of the node. - - Parameters - ---------- - v : int - The node. - - Returns - ------- - int - The out degree. - """ - return _CAPI_DGLGraphOutDegree(self._handle, v) - - def out_degrees(self, v): - """Return the out degrees of the nodes. - - Parameters - ---------- - v : utils.Index - The nodes. - - Returns - ------- - int - The out degree array. - """ - v_array = v.todgltensor() - return utils.toindex(_CAPI_DGLGraphOutDegrees(self._handle, v_array)) - - def node_subgraph(self, v): - """Return the induced node subgraph. - - Parameters - ---------- - v : utils.Index - The nodes. - - Returns - ------- - ImmutableSubgraphIndex - The subgraph index. - """ - v_array = v.todgltensor() - rst = _CAPI_DGLGraphVertexSubgraph(self._handle, v_array) - induced_edges = utils.toindex(rst(2)) - return ImmutableSubgraphIndex(rst(0), self, v, induced_edges) - - def node_subgraphs(self, vs_arr): - """Return the induced node subgraphs. - - Parameters - ---------- - vs_arr : a vector of utils.Index - The nodes. - - Returns - ------- - a vector of ImmutableSubgraphIndex - The subgraph index. - """ - # TODO(zhengda) we should parallelize the computation here in CAPI. - return [self.node_subgraph(v) for v in vs_arr] - - def edge_subgraph(self, e): - """Return the induced edge subgraph. - - Parameters - ---------- - e : utils.Index - The edges. - - Returns - ------- - SubgraphIndex - The subgraph index. - """ - raise NotImplementedError('immutable graph doesn\'t implement edge_subgraph for now.') - - def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, node_prob): - """Neighborhood sampling""" - if len(seed_ids) == 0: - return [] - - seed_ids = [v.todgltensor() for v in seed_ids] - num_subgs = len(seed_ids) - if node_prob is None: - rst = _uniform_sampling(self, seed_ids, neighbor_type, num_hops, expand_factor) - else: - rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops, - expand_factor) - - return [ImmutableSubgraphIndex(rst(i), self, rst(num_subgs + i), - rst(num_subgs * 2 + i)) for i in range(num_subgs)] - - def adjacency_matrix(self, transpose=False, ctx=F.cpu()): - """Return the adjacency matrix representation of this graph. - - By default, a row of returned adjacency matrix represents the destination - of an edge and the column represents the source. - - When transpose is True, a row represents the source and a column represents - a destination. - - Parameters - ---------- - transpose : bool - A flag to transpose the returned adjacency matrix. - - Returns - ------- - SparseTensor - The adjacency matrix. - utils.Index - A index for data shuffling due to sparse format change. Return None - if shuffle is not required. - """ - rst = _CAPI_DGLGraphGetCSR(self._handle, transpose) - indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) - indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) - shuffle = utils.toindex(rst(2)) - dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) - return F.sparse_matrix(dat, ('csr', indices, indptr), - (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle - - def incidence_matrix(self, typestr, ctx): - """Return the incidence matrix representation of this graph. - - An incidence matrix is an n x m sparse matrix, where n is - the number of nodes and m is the number of edges. Each nnz - value indicating whether the edge is incident to the node - or not. - - There are three types of an incidence matrix `I`: - * "in": - - I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e); - - I[v, e] = 0 otherwise. - * "out": - - I[v, e] = 1 if e is the out-edge of v (or v is the src node of e); - - I[v, e] = 0 otherwise. - * "both": - - I[v, e] = 1 if e is the in-edge of v; - - I[v, e] = -1 if e is the out-edge of v; - - I[v, e] = 0 otherwise (including self-loop). - - Parameters - ---------- - typestr : str - Can be either "in", "out" or "both" - ctx : context - The context of returned incidence matrix. - - Returns - ------- - SparseTensor - The incidence matrix. - utils.Index - A index for data shuffling due to sparse format change. Return None - if shuffle is not required. - """ - raise NotImplementedError('immutable graph doesn\'t implement incidence_matrix for now.') - - def to_networkx(self): - """Convert to networkx graph. - - The edge id will be saved as the 'id' edge attribute. - - Returns - ------- - networkx.DiGraph - The nx graph - """ - src, dst, eid = self.edges() - ret = nx.DiGraph() - for u, v, e in zip(src, dst, eid): - ret.add_edge(u, v, id=e) - return ret - - def from_networkx(self, nx_graph): - """Convert from networkx graph. - - If 'id' edge attribute exists, the edge will be added follows - the edge id order. Otherwise, order is undefined. - - Parameters - ---------- - nx_graph : networkx.DiGraph - The nx graph - """ - if not isinstance(nx_graph, nx.Graph): - nx_graph = nx.DiGraph(nx_graph) - else: - if not nx_graph.is_directed(): - # to_directed creates a deep copy of the networkx graph even if - # the original graph is already directed and we do not want to do it. - nx_graph = nx_graph.to_directed() - - assert nx_graph.number_of_edges() > 0, "can't create an empty immutable graph" - - # nx_graph.edges(data=True) returns src, dst, attr_dict - has_edge_id = 'id' in next(iter(nx_graph.edges(data=True)))[-1] - if has_edge_id: - num_edges = nx_graph.number_of_edges() - src = np.zeros((num_edges,), dtype=np.int64) - dst = np.zeros((num_edges,), dtype=np.int64) - for u, v, attr in nx_graph.edges(data=True): - eid = attr['id'] - src[eid] = u - dst[eid] = v - else: - src = [] - dst = [] - for e in nx_graph.edges: - src.append(e[0]) - dst.append(e[1]) - eid = np.arange(0, len(src), dtype=np.int64) - - num_nodes = nx_graph.number_of_nodes() - # We store edge Ids as an edge attribute. - eid = utils.toindex(eid) - src = utils.toindex(src) - dst = utils.toindex(dst) - self.init(src, dst, eid, num_nodes) - - def from_scipy_sparse_matrix(self, adj): - """Convert from scipy sparse matrix. - - NOTE: we assume the row is src nodes and the col is dst nodes. - - Parameters - ---------- - adj : scipy sparse matrix - """ - assert isinstance(adj, (sp.csr_matrix, sp.coo_matrix)), \ - "The input matrix has to be a SciPy sparse matrix." - num_nodes = max(adj.shape[0], adj.shape[1]) - out_mat = adj.tocoo() - src_ids = utils.toindex(out_mat.row) - dst_ids = utils.toindex(out_mat.col) - edge_ids = utils.toindex(F.arange(0, len(out_mat.row))) - self.init(src_ids, dst_ids, edge_ids, num_nodes) - - def from_edge_list(self, elist): - """Convert from an edge list. - - Paramters - --------- - elist : list - List of (u, v) edge tuple. - """ - src, dst = zip(*elist) - src = np.array(src) - dst = np.array(dst) - src_ids = utils.toindex(src) - dst_ids = utils.toindex(dst) - num_nodes = max(src.max(), dst.max()) + 1 - edge_ids = utils.toindex(F.arange(0, len(src))) - # TODO we need to detect multigraph automatically. - self.init(src_ids, dst_ids, edge_ids, num_nodes) - - def line_graph(self, backtracking=True): - """Return the line graph of this graph. - - Parameters - ---------- - backtracking : bool, optional (default=False) - Whether (i, j) ~ (j, i) in L(G). - (i, j) ~ (j, i) is the behavior of networkx.line_graph. - - Returns - ------- - ImmutableGraphIndex - The line graph of this graph. - """ - raise NotImplementedError('immutable graph doesn\'t implement line_graph') - -class ImmutableSubgraphIndex(ImmutableGraphIndex): - """Graph index for an immutable subgraph. - - Parameters - ---------- - backend_sparse : a sparse matrix from the backend framework. - The sparse matrix that represents a subgraph. - paranet : GraphIndex - The parent graph index. - induced_nodes : tensor - The parent node ids in this subgraph. - induced_edges : a lambda function that returns a tensor - The parent edge ids in this subgraph. - """ - def __init__(self, handle, parent, induced_nodes, induced_edges): - super(ImmutableSubgraphIndex, self).__init__(handle) - - self._parent = parent - self._induced_nodes = induced_nodes - self._induced_edges = induced_edges - - @property - def induced_edges(self): - """Return parent edge ids. - - Returns - ------- - A lambda function that returns utils.Index - The parent edge ids. - """ - return utils.toindex(self._induced_edges) - - @property - def induced_nodes(self): - """Return parent node ids. - - Returns - ------- - utils.Index - The parent node ids. - """ - return utils.toindex(self._induced_nodes) - -def disjoint_union(graphs): - """Return a disjoint union of the input graphs. - - The new graph will include all the nodes/edges in the given graphs. - Nodes/Edges will be relabeled by adding the cumsum of the previous graph sizes - in the given sequence order. For example, giving input [g1, g2, g3], where - they have 5, 6, 7 nodes respectively. Then node#2 of g2 will become node#7 - in the result graph. Edge ids are re-assigned similarly. - - Parameters - ---------- - graphs : iterable of GraphIndex - The input graphs - - Returns - ------- - GraphIndex - The disjoint union - """ - raise NotImplementedError('immutable graph doesn\'t implement disjoint_union for now.') - -def disjoint_partition(graph, num_or_size_splits): - """Partition the graph disjointly. - - This is a reverse operation of DisjointUnion. The graph will be partitioned - into num graphs. This requires the given number of partitions to evenly - divides the number of nodes in the graph. If the a size list is given, - the sum of the given sizes is equal. - - Parameters - ---------- - graph : GraphIndex - The graph to be partitioned - num_or_size_splits : int or utils.Index - The partition number of size splits - - Returns - ------- - list of GraphIndex - The partitioned graphs - """ - raise NotImplementedError('immutable graph doesn\'t implement disjoint_partition for now.') - -def create_immutable_graph_index(graph_data=None): - """Create a graph index object. - - Parameters - ---------- - graph_data : graph data, optional - Data to initialize graph. Same as networkx's semantics. - """ - if isinstance(graph_data, ImmutableGraphIndex): - return graph_data - - # Let's create an empty graph index first. - gidx = ImmutableGraphIndex(None) - - # edge list - if isinstance(graph_data, (list, tuple)): - try: - gidx.from_edge_list(graph_data) - return gidx - except Exception: # pylint: disable=broad-except - raise DGLError('Graph data is not a valid edge list for immutable_graph_index.') - - # scipy format - if isinstance(graph_data, sp.spmatrix): - try: - gidx.from_scipy_sparse_matrix(graph_data) - return gidx - except Exception: # pylint: disable=broad-except - raise DGLError('Graph data is not a valid scipy sparse matrix.') - - # networkx - any format - try: - gidx.from_networkx(graph_data) - except Exception: # pylint: disable=broad-except - raise DGLError('Error while creating graph from input of type "%s".' - % type(graph_data)) - - return gidx - -_init_api("dgl.immutable_graph_index") - -_NEIGHBOR_SAMPLING_APIS = { - 1: _CAPI_DGLGraphUniformSampling, - 2: _CAPI_DGLGraphUniformSampling2, - 4: _CAPI_DGLGraphUniformSampling4, - 8: _CAPI_DGLGraphUniformSampling8, - 16: _CAPI_DGLGraphUniformSampling16, - 32: _CAPI_DGLGraphUniformSampling32, - 64: _CAPI_DGLGraphUniformSampling64, - 128: _CAPI_DGLGraphUniformSampling128, -} - -_EMPTY_ARRAYS = [utils.toindex(F.ones(shape=(0), dtype=F.int64, ctx=F.cpu()))] - -def _uniform_sampling(gidx, seed_ids, neigh_type, num_hops, expand_factor): - num_seeds = len(seed_ids) - empty_ids = [] - if len(seed_ids) > 1 and len(seed_ids) not in _NEIGHBOR_SAMPLING_APIS.keys(): - remain = 2**int(math.ceil(math.log2(len(dgl_ids)))) - len(dgl_ids) - empty_ids = _EMPTY_ARRAYS[0:remain] - seed_ids.extend([empty.todgltensor() for empty in empty_ids]) - assert len(seed_ids) in _NEIGHBOR_SAMPLING_APIS.keys() - return _NEIGHBOR_SAMPLING_APIS[len(seed_ids)](gidx._handle, *seed_ids, neigh_type, - num_hops, expand_factor, num_seeds) diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index b948c3060342..0917ee2e791d 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -410,7 +410,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") ///////////////////////////// Immutable Graph API /////////////////////////////////// -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphCreate") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateImmutable") .set_body([] (DGLArgs args, DGLRetValue* rv) { const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); @@ -421,7 +421,7 @@ DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphCreate") *rv = ghandle; }); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphGetCSR") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetCSR") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; bool transpose = args[1]; @@ -457,21 +457,21 @@ void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) { *rv = ConvertSubgraphToPackedFunc(subgs); } -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling") .set_body(CAPI_NeighborUniformSample<1>); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling2") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling2") .set_body(CAPI_NeighborUniformSample<2>); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling4") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling4") .set_body(CAPI_NeighborUniformSample<4>); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling8") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling8") .set_body(CAPI_NeighborUniformSample<8>); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling16") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling16") .set_body(CAPI_NeighborUniformSample<16>); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling32") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling32") .set_body(CAPI_NeighborUniformSample<32>); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling64") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling64") .set_body(CAPI_NeighborUniformSample<64>); -DGL_REGISTER_GLOBAL("immutable_graph_index._CAPI_DGLGraphUniformSampling128") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling128") .set_body(CAPI_NeighborUniformSample<128>); } // namespace dgl From 9e4781823019fdfed5513fef231a6a4234880d55 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 11 Jan 2019 00:25:18 +0800 Subject: [PATCH 27/75] fix. --- src/graph/graph.cc | 2 ++ src/graph/graph_apis.cc | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/graph/graph.cc b/src/graph/graph.cc index f66024ea1b6f..0727a1c71b24 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -416,6 +416,7 @@ Subgraph Graph::VertexSubgraph(IdArray vids) const { oldv2newv[vid_data[i]] = i; } Subgraph rst; + rst.graph = std::make_shared(IsMultigraph()); rst.induced_vertices = vids; rst.graph->AddVertices(len); for (int64_t i = 0; i < len; ++i) { @@ -453,6 +454,7 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { } Subgraph rst; + rst.graph = std::make_shared(IsMultigraph()); rst.induced_edges = eids; rst.graph->AddVertices(nodes.size()); diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index 0917ee2e791d..c80902ec71e0 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -412,6 +412,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateImmutable") .set_body([] (DGLArgs args, DGLRetValue* rv) { + printf("create immutable\n"); const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); @@ -419,6 +420,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateImmutable") int64_t num_nodes = static_cast(args[4]); GraphHandle ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, num_nodes, multigraph); *rv = ghandle; + printf("create immutable1\n"); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetCSR") From c63516fa1d0185f71372d1053eea599f0d2eed72 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 11 Jan 2019 00:26:58 +0800 Subject: [PATCH 28/75] remove print. --- src/graph/graph_apis.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index c80902ec71e0..0917ee2e791d 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -412,7 +412,6 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateImmutable") .set_body([] (DGLArgs args, DGLRetValue* rv) { - printf("create immutable\n"); const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); @@ -420,7 +419,6 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateImmutable") int64_t num_nodes = static_cast(args[4]); GraphHandle ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, num_nodes, multigraph); *rv = ghandle; - printf("create immutable1\n"); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetCSR") From 550ceaa3e4c02c7a21d15e57141817d34bfcce7e Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 11 Jan 2019 00:44:25 +0800 Subject: [PATCH 29/75] fix lint. --- include/dgl/graph.h | 1 - include/dgl/graph_interface.h | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/include/dgl/graph.h b/include/dgl/graph.h index 8cfad4d43790..5d12feb46101 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -10,7 +10,6 @@ #include #include #include -#include "runtime/ndarray.h" #include "graph_interface.h" diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index bd31b4f28ecf..c91cf77d42e6 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -6,6 +6,10 @@ #ifndef DGL_GRAPH_INTERFACE_H_ #define DGL_GRAPH_INTERFACE_H_ +#include +#include +#include "runtime/ndarray.h" + namespace dgl { typedef uint64_t dgl_id_t; @@ -268,7 +272,7 @@ class GraphInterface { * * \return the reversed graph */ - virtual GraphInterface::ptr Reverse() const = 0; + virtual GraphInterface::ptr Reverse() const = 0; /*! * \brief Return the successor vector From a5f6544e9d16ca33a90c94d666272fb9d7f0c4e8 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 11 Jan 2019 00:50:37 +0800 Subject: [PATCH 30/75] fix --- python/dgl/graph_index.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 2e22210380af..a62b62548eb4 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -944,8 +944,7 @@ class ImmutableGraphIndex(GraphIndex): backend_csr: a csr array provided by the backend framework. """ def __init__(self, handle): - self._handle = handle - self._cache = {} + super(ImmutableGraphIndex, self).__init__(handle) def init(self, src_ids, dst_ids, edge_ids, num_nodes): """The actual init function""" From 0d3c3c5799e76b19607d55d4ed7aa18c1c669950 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 11 Jan 2019 01:13:39 +0800 Subject: [PATCH 31/75] fix lint. --- python/dgl/graph_index.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index a62b62548eb4..e973484ee854 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -944,7 +944,7 @@ class ImmutableGraphIndex(GraphIndex): backend_csr: a csr array provided by the backend framework. """ def __init__(self, handle): - super(ImmutableGraphIndex, self).__init__(handle) + pass def init(self, src_ids, dst_ids, edge_ids, num_nodes): """The actual init function""" @@ -1248,6 +1248,15 @@ def __init__(self, handle, parent, induced_nodes, induced_edges): self._induced_nodes = induced_nodes self._induced_edges = induced_edges + def edge_subgraph(self, e): + raise NotImplementedError('immutable graph doesn\'t implement edge_subgraph for now.') + + def line_graph(self, backtracking=True): + raise NotImplementedError('immutable graph doesn\'t implement line_graph') + + def incidence_matrix(self, typestr, ctx): + raise NotImplementedError('immutable graph doesn\'t implement incidence_matrix for now.') + @property def induced_edges(self): """Return parent edge ids. From b9a260aecb69e4160dbc83076670326480d0ef04 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 11 Jan 2019 01:22:00 +0800 Subject: [PATCH 32/75] fix lint. --- python/dgl/graph_index.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index e973484ee854..aaf67559830a 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -938,14 +938,7 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): class ImmutableGraphIndex(GraphIndex): """Graph index object on immutable graphs. - - Parameters - ---------- - backend_csr: a csr array provided by the backend framework. """ - def __init__(self, handle): - pass - def init(self, src_ids, dst_ids, edge_ids, num_nodes): """The actual init function""" self._handle = _CAPI_DGLGraphCreateImmutable(src_ids.todgltensor(), dst_ids.todgltensor(), From 16e1231a11563b3b1f0437cb5aabef4f90ef4ba5 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 11 Jan 2019 19:33:10 +0800 Subject: [PATCH 33/75] fix test. --- tests/mxnet/test_graph_index.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/mxnet/test_graph_index.py b/tests/mxnet/test_graph_index.py index 37b5cc7d2192..d3f3d1c02d01 100644 --- a/tests/mxnet/test_graph_index.py +++ b/tests/mxnet/test_graph_index.py @@ -41,8 +41,11 @@ def test_graph_gen(): def sort_edges(edges): edges = [e.tousertensor() for e in edges] - idx = mx.nd.argsort(edges[2]) - return (edges[0][idx], edges[1][idx], edges[2][idx]) + if np.prod(edges[2].shape) > 0: + idx = mx.nd.argsort(edges[2]) + return (edges[0][idx], edges[1][idx], edges[2][idx]) + else: + return (edges[0], edges[1], edges[2]) def check_basics(g, ig): assert g.number_of_nodes() == ig.number_of_nodes() From 3ada031d1722536ef7771586dfa650874f2fe7a5 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Fri, 11 Jan 2019 19:38:25 +0800 Subject: [PATCH 34/75] fix comments. --- src/graph/immutable_graph.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 7f24521e3e74..5d0830b103ee 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -8,9 +8,7 @@ #include #ifdef _MSC_VER -// TODO(zhengda) it seems MS compiler doesn't have rand_r. Let's -// use rand for now. This may not have good performance because rand has -// a global variable shared by all threads. +// rand in MS compiler works well in multi-threading. int rand_r(unsigned *seed) { return rand(); } From 7301fa9d970d82c2c04be0af37fa4c03da5c41e9 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Sun, 13 Jan 2019 22:20:03 +0800 Subject: [PATCH 35/75] merge GraphIndex and ImmutableGraphIndex. --- include/dgl/graph.h | 31 ++ include/dgl/graph_interface.h | 52 +++ include/dgl/immutable_graph.h | 53 +-- python/dgl/backend/backend.py | 8 + python/dgl/backend/mxnet/tensor.py | 8 + python/dgl/backend/numpy/tensor.py | 8 + python/dgl/backend/pytorch/tensor.py | 8 + python/dgl/graph_index.py | 507 +++++---------------------- src/graph/graph.cc | 30 ++ src/graph/graph_apis.cc | 72 ++-- src/graph/immutable_graph.cc | 3 +- 11 files changed, 309 insertions(+), 471 deletions(-) diff --git a/include/dgl/graph.h b/include/dgl/graph.h index 5d12feb46101..b8e49d7cda9b 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -37,6 +37,9 @@ class Graph: public GraphInterface { /*! \brief default constructor */ explicit Graph(bool multigraph = false) : is_multigraph_(multigraph) {} + Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, + bool multigraph = false); + /*! \brief default copy constructor */ Graph(const Graph& other) = default; @@ -104,6 +107,13 @@ class Graph: public GraphInterface { return is_multigraph_; } + /*! + * \return whether the graph is read-only + */ + virtual bool IsReadonly() const { + return false; + } + /*! \return the number of vertices in the graph.*/ uint64_t NumVertices() const { return adjlist_.size(); @@ -346,6 +356,27 @@ class Graph: public GraphInterface { return gptr; } + /*! + * \brief Get the adjacency matrix of the graph. + * + * By default, a row of returned adjacency matrix represents the destination + * of an edge and the column represents the source. + * \param transpose A flag to transpose the returned adjacency matrix. + * \param fmt the format of the returned adjacency matrix. + * \return a vector of three IdArray. + */ + virtual std::vector GetAdj(bool transpose, const std::string &fmt) const; + + /*! + * \brief Sample a subgraph from the seed vertices with neighbor sampling. + * The neighbors are sampled with a uniformly distribution. + * \return a subgraph + */ + virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type, + int num_hops, int expand_factor) const { + throw NotImplemented("NeighborUniformSample"); + } + protected: friend class GraphOp; /*! \brief Internal edge list type */ diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index c91cf77d42e6..4e06336e7b34 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -12,6 +12,18 @@ namespace dgl { +class NotImplemented: public std::exception { + std::string msg; + public: + NotImplemented(const std::string &name) { + this->msg = name + " isn't implemented"; + } + + virtual const char* what() const noexcept { + return msg.c_str(); + } +}; + typedef uint64_t dgl_id_t; typedef dgl::runtime::NDArray IdArray; typedef dgl::runtime::NDArray DegreeArray; @@ -19,6 +31,7 @@ typedef dgl::runtime::NDArray BoolArray; typedef dgl::runtime::NDArray IntArray; struct Subgraph; +struct SampledSubgraph; class dgl_id_iters { std::vector::const_iterator b, e; @@ -93,6 +106,11 @@ class GraphInterface { */ virtual bool IsMultigraph() const = 0; + /*! + * \return whether the graph is read-only + */ + virtual bool IsReadonly() const = 0; + /*! \return the number of vertices in the graph.*/ virtual uint64_t NumVertices() const = 0; @@ -307,6 +325,25 @@ class GraphInterface { * \return a raw pointer to the graph object. */ virtual GraphInterface *Reset() = 0; + + /*! + * \brief Get the adjacency matrix of the graph. + * + * By default, a row of returned adjacency matrix represents the destination + * of an edge and the column represents the source. + * \param transpose A flag to transpose the returned adjacency matrix. + * \param fmt the format of the returned adjacency matrix. + * \return a vector of three IdArray. + */ + virtual std::vector GetAdj(bool transpose, const std::string &fmt) const = 0; + + /*! + * \brief Sample a subgraph from the seed vertices with neighbor sampling. + * The neighbors are sampled with a uniformly distribution. + * \return a subgraph + */ + virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type, + int num_hops, int expand_factor) const = 0; }; /*! \brief Subgraph data structure */ @@ -325,6 +362,21 @@ struct Subgraph { IdArray induced_edges; }; +/*! + * \brief When we sample a subgraph, we need to store extra information, + * such as the layer Ids of the vertices and the sampling probability. + */ +struct SampledSubgraph: public Subgraph { + /*! + * \brief the layer of a sampled vertex in the subgraph. + */ + IdArray layer_ids; + /*! + * \brief the probability that a vertex is sampled. + */ + runtime::NDArray sample_prob; +}; + } // namespace dgl #endif // DGL_GRAPH_INTERFACE_H_ diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index f9d325d31106..090a52921bc6 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -16,8 +16,6 @@ namespace dgl { -struct SampledSubgraph; - template bool binary_search(ForwardIt first, ForwardIt last, const T& value) { first = std::lower_bound(first, last, value); @@ -115,7 +113,7 @@ class ImmutableGraph: public GraphInterface { * \param num_vertices The number of vertices to be added. */ void AddVertices(uint64_t num_vertices) { - LOG(FATAL) << "Immutable graph doesn't support adding vertices"; + throw NotImplemented("AddVertices"); } /*! @@ -124,7 +122,7 @@ class ImmutableGraph: public GraphInterface { * \param dst The destination vertex. */ void AddEdge(dgl_id_t src, dgl_id_t dst) { - LOG(FATAL) << "Immutable graph doesn't support adding edge"; + throw NotImplemented("AddEdge"); } /*! @@ -133,14 +131,14 @@ class ImmutableGraph: public GraphInterface { * \param dst_ids The destination vertex id array. */ void AddEdges(IdArray src_ids, IdArray dst_ids) { - LOG(FATAL) << "Immutable graph doesn't support adding edges"; + throw NotImplemented("AddEdges"); } /*! * \brief Clear the graph. Remove all vertices/edges. */ void Clear() { - LOG(FATAL) << "Immutable graph doesn't support clearing vertices and edges"; + throw NotImplemented("Clear"); } /*! @@ -151,6 +149,13 @@ class ImmutableGraph: public GraphInterface { return is_multigraph_; } + /*! + * \return whether the graph is read-only + */ + virtual bool IsReadonly() const { + return true; + } + /*! \return the number of vertices in the graph.*/ uint64_t NumVertices() const { if (in_csr_) @@ -234,8 +239,7 @@ class ImmutableGraph: public GraphInterface { * \return a pair whose first element is the source and the second the destination. */ std::pair FindEdge(dgl_id_t eid) const { - LOG(FATAL) << "not implemented"; - return std::pair(); + throw NotImplemented("FindEdge"); } /*! @@ -244,8 +248,7 @@ class ImmutableGraph: public GraphInterface { * \return EdgeArray containing all edges with id in eid. The order is preserved. */ EdgeArray FindEdges(IdArray eids) const { - LOG(FATAL) << "not implemented"; - return EdgeArray(); + throw NotImplemented("FindEdges"); } /*! @@ -457,6 +460,21 @@ class ImmutableGraph: public GraphInterface { */ CSRArray GetOutCSRArray() const; + /*! + * \brief Get the adjacency matrix of the graph. + * + * By default, a row of returned adjacency matrix represents the destination + * of an edge and the column represents the source. + * \param transpose A flag to transpose the returned adjacency matrix. + * \param fmt the format of the returned adjacency matrix. + * \return a vector of three IdArray. + */ + virtual std::vector GetAdj(bool transpose, const std::string &fmt) const { + assert(fmt == "csr"); + CSRArray arrs = transpose ? this->GetOutCSRArray() : this->GetOutCSRArray(); + return std::vector{arrs.indptr, arrs.indices, arrs.id}; + } + protected: std::pair GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; std::pair GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; @@ -502,21 +520,6 @@ class ImmutableGraph: public GraphInterface { bool is_multigraph_ = false; }; -/*! - * \brief When we sample a subgraph, we need to store extra information, - * such as the layer Ids of the vertices and the sampling probability. - */ -struct SampledSubgraph: public Subgraph { - /*! - * \brief the layer of a sampled vertex in the subgraph. - */ - IdArray layer_ids; - /*! - * \brief the probability that a vertex is sampled. - */ - runtime::NDArray sample_prob; -}; - } // namespace dgl #endif // DGL_IMMUTABLE_GRAPH_H_ diff --git a/python/dgl/backend/backend.py b/python/dgl/backend/backend.py index 1d05990171ff..096bce56a530 100644 --- a/python/dgl/backend/backend.py +++ b/python/dgl/backend/backend.py @@ -74,6 +74,14 @@ def tensor(data, dtype=None): """ pass +def get_preferred_sparse_format(): + """Get the preferred sparse matrix format supported by the backend. + + Different backends have their preferred backend. This info is useful when + constructing a sparse matrix. + """ + pass + def sparse_matrix(data, index, shape, force_format=False): """Create a sparse matrix. diff --git a/python/dgl/backend/mxnet/tensor.py b/python/dgl/backend/mxnet/tensor.py index eef2ca57f8b9..4cccec7f9d62 100644 --- a/python/dgl/backend/mxnet/tensor.py +++ b/python/dgl/backend/mxnet/tensor.py @@ -28,6 +28,14 @@ def tensor(data, dtype=None): dtype = np.float32 return nd.array(data, dtype=dtype) +def get_preferred_sparse_format(): + """Get the preferred sparse matrix format supported by the backend. + + Different backends have their preferred backend. This info is useful when + constructing a sparse matrix. + """ + return "csr" + def sparse_matrix(data, index, shape, force_format=False): fmt = index[0] if fmt == 'coo': diff --git a/python/dgl/backend/numpy/tensor.py b/python/dgl/backend/numpy/tensor.py index 3a36aba1d039..44b15a19d6c6 100644 --- a/python/dgl/backend/numpy/tensor.py +++ b/python/dgl/backend/numpy/tensor.py @@ -22,6 +22,14 @@ def cpu(): def tensor(data, dtype=None): return np.array(data, dtype) +def get_preferred_sparse_format(): + """Get the preferred sparse matrix format supported by the backend. + + Different backends have their preferred backend. This info is useful when + constructing a sparse matrix. + """ + return "csr" + def sparse_matrix(data, index, shape, force_format=False): fmt = index[0] if fmt == 'coo': diff --git a/python/dgl/backend/pytorch/tensor.py b/python/dgl/backend/pytorch/tensor.py index 622fdf425e2b..a23cf6a1aea9 100644 --- a/python/dgl/backend/pytorch/tensor.py +++ b/python/dgl/backend/pytorch/tensor.py @@ -23,6 +23,14 @@ def cpu(): def tensor(data, dtype=None): return th.tensor(data, dtype=dtype) +def get_preferred_sparse_format(): + """Get the preferred sparse matrix format supported by the backend. + + Different backends have their preferred backend. This info is useful when + constructing a sparse matrix. + """ + return "coo" + if TH_VERSION.version[0] == 0: def sparse_matrix(data, index, shape, force_format=False): fmt = index[0] diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index aaf67559830a..4fcfb0c7ab2c 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -22,8 +22,12 @@ class GraphIndex(object): handle : GraphIndexHandle Handler """ - def __init__(self, handle): - self._handle = handle + def __init__(self, multigraph, readonly): + # if the graph is readonly, we'll init later. + if not readonly: + self._handle = _CAPI_DGLGraphCreateMutable(multigraph) + self._multigraph = multigraph + self._readonly = readonly self._cache = {} def __del__(self): @@ -43,13 +47,19 @@ def __setstate__(self, state): """ n_nodes, multigraph, src, dst = state - self._handle = _CAPI_DGLGraphCreate(multigraph) + self._handle = _CAPI_DGLGraphCreateMutable(multigraph) self._cache = {} self.clear() self.add_nodes(n_nodes) self.add_edges(src, dst) + def init(self, src_ids, dst_ids, edge_ids, num_nodes): + """The actual init function""" + self._handle = _CAPI_DGLGraphCreate(src_ids.todgltensor(), dst_ids.todgltensor(), + edge_ids.todgltensor(), self._multigraph, num_nodes, + self._readonly) + def add_nodes(self, num): """Add nodes. @@ -116,7 +126,7 @@ def is_readonly(self): bool True if it is a read-only graph, False otherwise. """ - return False + return self._readonly def number_of_nodes(self): """Return the number of nodes. @@ -533,25 +543,40 @@ def adjacency_matrix(self, transpose, ctx): A index for data shuffling due to sparse format change. Return None if shuffle is not required. """ - if not isinstance(transpose, bool): - raise DGLError('Expect bool value for "transpose" arg,' - ' but got %s.' % (type(transpose))) - src, dst, _ = self.edges(False) - src = src.tousertensor(ctx) # the index of the ctx will be cached - dst = dst.tousertensor(ctx) # the index of the ctx will be cached - src = F.unsqueeze(src, dim=0) - dst = F.unsqueeze(dst, dim=0) - if transpose: - idx = F.cat([src, dst], dim=0) - else: - idx = F.cat([dst, src], dim=0) - n = self.number_of_nodes() - m = self.number_of_edges() - # FIXME(minjie): data type - dat = F.ones((m,), dtype=F.float32, ctx=ctx) - adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n)) - shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None - return adj, shuffle_idx + #if not isinstance(transpose, bool): + # raise DGLError('Expect bool value for "transpose" arg,' + # ' but got %s.' % (type(transpose))) + #src, dst, _ = self.edges(False) + #src = src.tousertensor(ctx) # the index of the ctx will be cached + #dst = dst.tousertensor(ctx) # the index of the ctx will be cached + #src = F.unsqueeze(src, dim=0) + #dst = F.unsqueeze(dst, dim=0) + #if transpose: + # idx = F.cat([src, dst], dim=0) + #else: + # idx = F.cat([dst, src], dim=0) + #n = self.number_of_nodes() + #m = self.number_of_edges() + ## FIXME(minjie): data type + #dat = F.ones((m,), dtype=F.float32, ctx=ctx) + #adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n)) + #shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None + #return adj, shuffle_idx + fmt = F.get_preferred_sparse_format() + rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt) + if fmt == "csr": + indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) + indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) + shuffle = utils.toindex(rst(2)) + dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) + return F.sparse_matrix(dat, ('csr', indices, indptr), + (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle + elif fmt == "coo": + rows = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) + cols = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) + dat = F.ones(rows.shape, dtype=F.float32, ctx=ctx) + return F.sparse_matrix(dat, ('coo', rows, cols), + (self.number_of_nodes(), self.number_of_nodes()))[0], None @utils.cached_member(cache='_cache', prefix='inc') def incidence_matrix(self, typestr, ctx): @@ -632,6 +657,22 @@ def incidence_matrix(self, typestr, ctx): shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None return inc, shuffle_idx + def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, node_prob): + """Neighborhood sampling""" + if len(seed_ids) == 0: + return [] + + seed_ids = [v.todgltensor() for v in seed_ids] + num_subgs = len(seed_ids) + if node_prob is None: + rst = _uniform_sampling(self, seed_ids, neighbor_type, num_hops, expand_factor) + else: + rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops, + expand_factor) + + return [ImmutableSubgraphIndex(rst(i), self, rst(num_subgs + i), + rst(num_subgs * 2 + i)) for i in range(num_subgs)] + def to_networkx(self): """Convert to networkx graph. @@ -660,7 +701,8 @@ def from_networkx(self, nx_graph): nx_graph : networkx.DiGraph The nx graph """ - self.clear() + if not self.is_readonly(): + self.clear() if not isinstance(nx_graph, nx.Graph): nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph() @@ -672,9 +714,10 @@ def from_networkx(self, nx_graph): nx_graph = nx_graph.to_directed() num_nodes = nx_graph.number_of_nodes() - self.add_nodes(num_nodes) if nx_graph.number_of_edges() == 0: + if self.is_readonly(): + raise Exception("can't create an empty immutable graph") return # nx_graph.edges(data=True) returns src, dst, attr_dict @@ -693,9 +736,14 @@ def from_networkx(self, nx_graph): for e in nx_graph.edges: src.append(e[0]) dst.append(e[1]) + eid = np.arange(0, len(src), dtype=np.int64) + num_nodes = nx_graph.number_of_nodes() + # We store edge Ids as an edge attribute. + eid = utils.toindex(eid) src = utils.toindex(src) dst = utils.toindex(dst) - self.add_edges(src, dst) + self.init(src, dst, eid, num_nodes) + def from_scipy_sparse_matrix(self, adj): """Convert from scipy sparse matrix. @@ -704,13 +752,18 @@ def from_scipy_sparse_matrix(self, adj): ---------- adj : scipy sparse matrix """ - self.clear() + assert isinstance(adj, (scipy.sparse.csr_matrix, scipy.sparse.coo_matrix)), \ + "The input matrix has to be a SciPy sparse matrix." + if not self.is_readonly(): + self.clear() # what if the adj matrix isn't symmetric. - self.add_nodes(max(adj.shape[0], adj.shape[1])) + num_nodes = max(adj.shape[0], adj.shape[1]) adj_coo = adj.tocoo() src = utils.toindex(adj_coo.row) dst = utils.toindex(adj_coo.col) - self.add_edges(src, dst) + edge_ids = utils.toindex(F.arange(0, len(adj_coo.row))) + self.init(src, dst, edge_ids, num_nodes) + def from_edge_list(self, elist): """Convert from an edge list. @@ -720,16 +773,19 @@ def from_edge_list(self, elist): elist : list List of (u, v) edge tuple. """ - self.clear() + if not self.is_readonly(): + self.clear() src, dst = zip(*elist) src = np.array(src) dst = np.array(dst) + src_ids = utils.toindex(src) + dst_ids = utils.toindex(dst) num_nodes = max(src.max(), dst.max()) + 1 min_nodes = min(src.min(), dst.min()) if min_nodes != 0: raise DGLError('Invalid edge list. Nodes must start from 0.') - self.add_nodes(num_nodes) - self.add_edges(utils.toindex(src), utils.toindex(dst)) + edge_ids = utils.toindex(F.arange(0, len(src))) + self.init(src_ids, dst_ids, edge_ids, num_nodes) def line_graph(self, backtracking=True): """Return the line graph of this graph. @@ -901,13 +957,10 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): # FIXME(minjie): this return is not correct for mutable graph index return graph_data - if readonly: - return create_immutable_graph_index(graph_data) - - handle = _CAPI_DGLGraphCreate(multigraph) - gidx = GraphIndex(handle) - - if graph_data is None: + gidx = GraphIndex(multigraph, readonly) + if graph_data is None and readonly: + raise Exception("can't create an empty immutable graph") + elif graph_data is None: return gidx # edge list @@ -936,382 +989,6 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): return gidx -class ImmutableGraphIndex(GraphIndex): - """Graph index object on immutable graphs. - """ - def init(self, src_ids, dst_ids, edge_ids, num_nodes): - """The actual init function""" - self._handle = _CAPI_DGLGraphCreateImmutable(src_ids.todgltensor(), dst_ids.todgltensor(), - edge_ids.todgltensor(), False, num_nodes) - - def add_nodes(self, num): - """Add nodes. - - Parameters - ---------- - num : int - Number of nodes to be added. - """ - raise DGLError('Immutable graph doesn\'t support adding nodes') - - def add_edge(self, u, v): - """Add one edge. - - Parameters - ---------- - u : int - The src node. - v : int - The dst node. - """ - raise DGLError('Immutable graph doesn\'t support adding an edge') - - def add_edges(self, u, v): - """Add many edges. - - Parameters - ---------- - u : utils.Index - The src nodes. - v : utils.Index - The dst nodes. - """ - raise DGLError('Immutable graph doesn\'t support adding edges') - - def clear(self): - """Clear the graph.""" - raise DGLError('Immutable graph doesn\'t support clearing up') - - def is_readonly(self): - """Indicate whether the graph index is read-only. - - Returns - ------- - bool - True if it is a read-only graph, False otherwise. - """ - return True - - def find_edges(self, eid): - """Return a triplet of arrays that contains the edge IDs. - - Parameters - ---------- - eid : utils.Index - The edge ids. - - Returns - ------- - utils.Index - The src nodes. - utils.Index - The dst nodes. - utils.Index - The edge ids. - """ - raise NotImplementedError('immutable graph doesn\'t implement find_edges for now.') - - def edge_subgraph(self, e): - """Return the induced edge subgraph. - - Parameters - ---------- - e : utils.Index - The edges. - - Returns - ------- - SubgraphIndex - The subgraph index. - """ - raise NotImplementedError('immutable graph doesn\'t implement edge_subgraph for now.') - - def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, node_prob): - """Neighborhood sampling""" - if len(seed_ids) == 0: - return [] - - seed_ids = [v.todgltensor() for v in seed_ids] - num_subgs = len(seed_ids) - if node_prob is None: - rst = _uniform_sampling(self, seed_ids, neighbor_type, num_hops, expand_factor) - else: - rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops, - expand_factor) - - return [ImmutableSubgraphIndex(rst(i), self, rst(num_subgs + i), - rst(num_subgs * 2 + i)) for i in range(num_subgs)] - - def adjacency_matrix(self, transpose=False, ctx=F.cpu()): - """Return the adjacency matrix representation of this graph. - - By default, a row of returned adjacency matrix represents the destination - of an edge and the column represents the source. - - When transpose is True, a row represents the source and a column represents - a destination. - - Parameters - ---------- - transpose : bool - A flag to tranpose the returned adjacency matrix. - - Returns - ------- - SparseTensor - The adjacency matrix. - utils.Index - A index for data shuffling due to sparse format change. Return None - if shuffle is not required. - """ - rst = _CAPI_DGLGraphGetCSR(self._handle, transpose) - indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) - indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) - shuffle = utils.toindex(rst(2)) - dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) - return F.sparse_matrix(dat, ('csr', indices, indptr), - (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle - - def incidence_matrix(self, typestr, ctx): - """Return the incidence matrix representation of this graph. - - An incidence matrix is an n x m sparse matrix, where n is - the number of nodes and m is the number of edges. Each nnz - value indicating whether the edge is incident to the node - or not. - - There are three types of an incidence matrix `I`: - * "in": - - I[v, e] = 1 if e is the in-edge of v (or v is the dst node of e); - - I[v, e] = 0 otherwise. - * "out": - - I[v, e] = 1 if e is the out-edge of v (or v is the src node of e); - - I[v, e] = 0 otherwise. - * "both": - - I[v, e] = 1 if e is the in-edge of v; - - I[v, e] = -1 if e is the out-edge of v; - - I[v, e] = 0 otherwise (including self-loop). - - Parameters - ---------- - typestr : str - Can be either "in", "out" or "both" - ctx : context - The context of returned incidence matrix. - - Returns - ------- - SparseTensor - The incidence matrix. - utils.Index - A index for data shuffling due to sparse format change. Return None - if shuffle is not required. - """ - raise NotImplementedError('immutable graph doesn\'t implement incidence_matrix for now.') - - def to_networkx(self): - """Convert to networkx graph. - - The edge id will be saved as the 'id' edge attribute. - - Returns - ------- - networkx.DiGraph - The nx graph - """ - src, dst, eid = self.edges() - ret = nx.DiGraph() - for u, v, e in zip(src, dst, eid): - ret.add_edge(u, v, id=e) - return ret - - def from_networkx(self, nx_graph): - """Convert from networkx graph. - - If 'id' edge attribute exists, the edge will be added follows - the edge id order. Otherwise, order is undefined. - - Parameters - ---------- - nx_graph : networkx.DiGraph - The nx graph - """ - if not isinstance(nx_graph, nx.Graph): - nx_graph = nx.DiGraph(nx_graph) - else: - nx_graph = nx_graph.to_directed() - - assert nx_graph.number_of_edges() > 0, "can't create an empty immutable graph" - - # nx_graph.edges(data=True) returns src, dst, attr_dict - has_edge_id = 'id' in next(iter(nx_graph.edges(data=True)))[-1] - if has_edge_id: - num_edges = nx_graph.number_of_edges() - src = np.zeros((num_edges,), dtype=np.int64) - dst = np.zeros((num_edges,), dtype=np.int64) - for u, v, attr in nx_graph.edges(data=True): - eid = attr['id'] - src[eid] = u - dst[eid] = v - else: - src = [] - dst = [] - for e in nx_graph.edges: - src.append(e[0]) - dst.append(e[1]) - eid = np.arange(0, len(src), dtype=np.int64) - - num_nodes = nx_graph.number_of_nodes() - # We store edge Ids as an edge attribute. - eid = utils.toindex(eid) - src = utils.toindex(src) - dst = utils.toindex(dst) - self.init(src, dst, eid, num_nodes) - - def from_scipy_sparse_matrix(self, adj): - """Convert from scipy sparse matrix. - - NOTE: we assume the row is src nodes and the col is dst nodes. - - Parameters - ---------- - adj : scipy sparse matrix - """ - assert isinstance(adj, (scipy.sparse.csr_matrix, scipy.sparse.coo_matrix)), \ - "The input matrix has to be a SciPy sparse matrix." - num_nodes = max(adj.shape[0], adj.shape[1]) - out_mat = adj.tocoo() - src_ids = utils.toindex(out_mat.row) - dst_ids = utils.toindex(out_mat.col) - edge_ids = utils.toindex(F.arange(0, len(out_mat.row))) - self.init(src_ids, dst_ids, edge_ids, num_nodes) - - def from_edge_list(self, elist): - """Convert from an edge list. - - Paramters - --------- - elist : list - List of (u, v) edge tuple. - """ - src, dst = zip(*elist) - src = np.array(src) - dst = np.array(dst) - src_ids = utils.toindex(src) - dst_ids = utils.toindex(dst) - num_nodes = max(src.max(), dst.max()) + 1 - edge_ids = utils.toindex(F.arange(0, len(src))) - # TODO we need to detect multigraph automatically. - self.init(src_ids, dst_ids, edge_ids, num_nodes) - - def line_graph(self, backtracking=True): - """Return the line graph of this graph. - - Parameters - ---------- - backtracking : bool, optional (default=False) - Whether (i, j) ~ (j, i) in L(G). - (i, j) ~ (j, i) is the behavior of networkx.line_graph. - - Returns - ------- - ImmutableGraphIndex - The line graph of this graph. - """ - raise NotImplementedError('immutable graph doesn\'t implement line_graph') - -class ImmutableSubgraphIndex(ImmutableGraphIndex): - """Graph index for an immutable subgraph. - - Parameters - ---------- - backend_sparse : a sparse matrix from the backend framework. - The sparse matrix that represents a subgraph. - paranet : GraphIndex - The parent graph index. - induced_nodes : tensor - The parent node ids in this subgraph. - induced_edges : a lambda function that returns a tensor - The parent edge ids in this subgraph. - """ - def __init__(self, handle, parent, induced_nodes, induced_edges): - super(ImmutableSubgraphIndex, self).__init__(handle) - - self._parent = parent - self._induced_nodes = induced_nodes - self._induced_edges = induced_edges - - def edge_subgraph(self, e): - raise NotImplementedError('immutable graph doesn\'t implement edge_subgraph for now.') - - def line_graph(self, backtracking=True): - raise NotImplementedError('immutable graph doesn\'t implement line_graph') - - def incidence_matrix(self, typestr, ctx): - raise NotImplementedError('immutable graph doesn\'t implement incidence_matrix for now.') - - @property - def induced_edges(self): - """Return parent edge ids. - - Returns - ------- - A lambda function that returns utils.Index - The parent edge ids. - """ - return utils.toindex(self._induced_edges) - - @property - def induced_nodes(self): - """Return parent node ids. - - Returns - ------- - utils.Index - The parent node ids. - """ - return utils.toindex(self._induced_nodes) - -def create_immutable_graph_index(graph_data=None): - """Create a graph index object. - - Parameters - ---------- - graph_data : graph data, optional - Data to initialize graph. Same as networkx's semantics. - """ - if isinstance(graph_data, ImmutableGraphIndex): - return graph_data - - # Let's create an empty graph index first. - gidx = ImmutableGraphIndex(None) - - # edge list - if isinstance(graph_data, (list, tuple)): - try: - gidx.from_edge_list(graph_data) - return gidx - except Exception: # pylint: disable=broad-except - raise DGLError('Graph data is not a valid edge list for immutable_graph_index.') - - # scipy format - if isinstance(graph_data, scipy.sparse.spmatrix): - try: - gidx.from_scipy_sparse_matrix(graph_data) - return gidx - except Exception as e: # pylint: disable=broad-except - print(e) - raise DGLError('Graph data is not a valid scipy sparse matrix.') - - # networkx - any format - try: - gidx.from_networkx(graph_data) - except Exception: # pylint: disable=broad-except - raise DGLError('Error while creating graph from input of type "%s".' - % type(graph_data)) - - return gidx - _init_api("dgl.graph_index") _NEIGHBOR_SAMPLING_APIS = { diff --git a/src/graph/graph.cc b/src/graph/graph.cc index 0727a1c71b24..1d63c191ee7b 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -12,6 +12,33 @@ #include "../c_api_common.h" namespace dgl { + +Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, + bool multigraph): is_multigraph_(multigraph) { + this->AddVertices(num_nodes); + num_edges_ = src_ids->shape[0]; + assert(num_edges_ == dst_ids->shape[0]); + assert(num_edges_ == edge_ids->shape[0]); + const dgl_id_t *src_data = static_cast(src_ids->data); + const dgl_id_t *dst_data = static_cast(dst_ids->data); + const dgl_id_t *edge_data = static_cast(edge_ids->data); + for (int64_t i = 0; i < num_edges_; i++) { + auto src = src_data[i]; + auto dst = dst_data[i]; + auto eid = edge_data[i]; + CHECK(HasVertex(src) && HasVertex(dst)) + << "Invalid vertices: src=" << src << " dst=" << dst; + + adjlist_[src].succ.push_back(dst); + adjlist_[src].edge_id.push_back(eid); + reverse_adjlist_[dst].succ.push_back(src); + reverse_adjlist_[dst].edge_id.push_back(eid); + + all_edges_src_.push_back(src); + all_edges_dst_.push_back(dst); + } +} + void Graph::AddVertices(uint64_t num_vertices) { CHECK(!read_only_) << "Graph is read-only. Mutations are not allowed."; adjlist_.resize(adjlist_.size() + num_vertices); @@ -471,6 +498,9 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { return rst; } +std::vector Graph::GetAdj(bool transpose, const std::string &fmt) const { +} + GraphInterface::ptr Graph::Reverse() const { LOG(FATAL) << "not implemented"; return nullptr; diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index 0917ee2e791d..bca275365dc2 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -99,13 +99,29 @@ PackedFunc ConvertSubgraphToPackedFunc(const std::vector& sg) { ///////////////////////////// Graph API /////////////////////////////////// -DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate") +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateMutable") .set_body([] (DGLArgs args, DGLRetValue* rv) { bool multigraph = static_cast(args[0]); GraphHandle ghandle = new Graph(multigraph); *rv = ghandle; }); +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); + const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); + const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); + bool multigraph = static_cast(args[3]); + int64_t num_nodes = static_cast(args[4]); + bool readonly = static_cast(args[5]); + GraphHandle ghandle; + if (readonly) + ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, num_nodes, multigraph); + else + ghandle = new Graph(src_ids, dst_ids, edge_ids, num_nodes, multigraph); + *rv = ghandle; + }); + DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphFree") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; @@ -154,6 +170,14 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphIsMultigraph") *rv = gptr->IsMultigraph(); }); +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphIsReadonly") +.set_body([] (DGLArgs args, DGLRetValue *rv) { + GraphHandle ghandle = args[0]; + // NOTE: not const since we have caches + const GraphInterface* gptr = static_cast(ghandle); + *rv = gptr->IsReadonly(); + }); + DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNumVertices") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; @@ -408,34 +432,6 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") *rv = lghandle; }); -///////////////////////////// Immutable Graph API /////////////////////////////////// - -DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreateImmutable") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); - const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); - const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); - bool multigraph = static_cast(args[3]); - int64_t num_nodes = static_cast(args[4]); - GraphHandle ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, num_nodes, multigraph); - *rv = ghandle; - }); - -DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetCSR") -.set_body([] (DGLArgs args, DGLRetValue* rv) { - GraphHandle ghandle = args[0]; - bool transpose = args[1]; - const GraphInterface *ptr = static_cast(ghandle); - const ImmutableGraph *gptr = dynamic_cast(ptr); - ImmutableGraph::CSRArray csr; - if (transpose) { - csr = gptr->GetOutCSRArray(); - } else { - csr = gptr->GetInCSRArray(); - } - *rv = ConvertCSRArrayToPackedFunc(csr); - }); - template void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; @@ -474,4 +470,22 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling64") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling128") .set_body(CAPI_NeighborUniformSample<128>); +///////////////////////////// Immutable Graph API /////////////////////////////////// + +DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetAdj") +.set_body([] (DGLArgs args, DGLRetValue* rv) { + GraphHandle ghandle = args[0]; + bool transpose = args[1]; + std::string format = args[2]; + const GraphInterface *ptr = static_cast(ghandle); + const ImmutableGraph *gptr = dynamic_cast(ptr); + ImmutableGraph::CSRArray csr; + if (transpose) { + csr = gptr->GetOutCSRArray(); + } else { + csr = gptr->GetInCSRArray(); + } + *rv = ConvertCSRArrayToPackedFunc(csr); + }); + } // namespace dgl diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 5d0830b103ee..c37b79792821 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -473,8 +473,7 @@ Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { } Subgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const { - LOG(FATAL) << "not implemented"; - return Subgraph(); + throw NotImplemented("EdgeSubgraph"); } ImmutableGraph::CSRArray ImmutableGraph::GetInCSRArray() const { From 52075993ce0855cd795a2d98cb1c1fac16716cc5 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 10:35:32 +0800 Subject: [PATCH 36/75] temp fix. --- python/dgl/graph_index.py | 60 ++++++++++++++++++++------------------- src/graph/graph.cc | 16 +++++++++++ src/graph/graph_apis.cc | 23 ++++----------- 3 files changed, 52 insertions(+), 47 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 4fcfb0c7ab2c..39745c3a2d2a 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -543,40 +543,41 @@ def adjacency_matrix(self, transpose, ctx): A index for data shuffling due to sparse format change. Return None if shuffle is not required. """ - #if not isinstance(transpose, bool): - # raise DGLError('Expect bool value for "transpose" arg,' - # ' but got %s.' % (type(transpose))) - #src, dst, _ = self.edges(False) - #src = src.tousertensor(ctx) # the index of the ctx will be cached - #dst = dst.tousertensor(ctx) # the index of the ctx will be cached - #src = F.unsqueeze(src, dim=0) - #dst = F.unsqueeze(dst, dim=0) - #if transpose: - # idx = F.cat([src, dst], dim=0) - #else: - # idx = F.cat([dst, src], dim=0) - #n = self.number_of_nodes() - #m = self.number_of_edges() - ## FIXME(minjie): data type - #dat = F.ones((m,), dtype=F.float32, ctx=ctx) - #adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n)) - #shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None - #return adj, shuffle_idx - fmt = F.get_preferred_sparse_format() - rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt) - if fmt == "csr": + if not isinstance(transpose, bool): + raise DGLError('Expect bool value for "transpose" arg,' + ' but got %s.' % (type(transpose))) + if self.is_readonly(): + #if fmt == "csr": + fmt = F.get_preferred_sparse_format() + rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt) indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) shuffle = utils.toindex(rst(2)) dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) return F.sparse_matrix(dat, ('csr', indices, indptr), (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle - elif fmt == "coo": - rows = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) - cols = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) - dat = F.ones(rows.shape, dtype=F.float32, ctx=ctx) - return F.sparse_matrix(dat, ('coo', rows, cols), - (self.number_of_nodes(), self.number_of_nodes()))[0], None + else: + #elif fmt == "coo": + #src, dst, _ = self.edges(False) + #src = src.tousertensor(ctx) # the index of the ctx will be cached + #dst = dst.tousertensor(ctx) # the index of the ctx will be cached + #src = F.unsqueeze(src, dim=0) + #dst = F.unsqueeze(dst, dim=0) + #if transpose: + # idx = F.cat([src, dst], dim=0) + #else: + # idx = F.cat([dst, src], dim=0) + #print(idx.shape) + ## FIXME(minjie): data type + rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, "coo") + idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) + m = self.number_of_edges() + idx = F.reshape(idx, (2, m)) + dat = F.ones((m,), dtype=F.float32, ctx=ctx) + n = self.number_of_nodes() + adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n)) + shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None + return adj, shuffle_idx @utils.cached_member(cache='_cache', prefix='inc') def incidence_matrix(self, typestr, ctx): @@ -819,7 +820,8 @@ class SubgraphIndex(GraphIndex): The parent edge ids in this subgraph. """ def __init__(self, handle, parent, induced_nodes, induced_edges): - super(SubgraphIndex, self).__init__(handle) + super(SubgraphIndex, self).__init__(parent.is_multigraph(), parent.is_readonly()) + self._handle = handle self._parent = parent self._induced_nodes = induced_nodes self._induced_edges = induced_edges diff --git a/src/graph/graph.cc b/src/graph/graph.cc index 1d63c191ee7b..ba734c92532a 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -499,6 +499,22 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { } std::vector Graph::GetAdj(bool transpose, const std::string &fmt) const { + if (fmt == "coo") { + int64_t num_edges = num_edges_; + IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + int64_t *idx_data = static_cast(idx->data); + std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data); + std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data + num_edges); + IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + int64_t *eid_data = static_cast(eid->data); + for (uint64_t eid = 0; eid < num_edges; ++eid) { + eid_data[eid] = eid; + } + return std::vector{idx, eid}; + } else { + + } + return std::vector(); } GraphInterface::ptr Graph::Reverse() const { diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index bca275365dc2..03781d7fde1c 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -36,16 +36,11 @@ PackedFunc ConvertEdgeArrayToPackedFunc(const EdgeArray& ea) { } // Convert CSRArray structure to PackedFunc. -template -PackedFunc ConvertCSRArrayToPackedFunc(const CSRArray& ea) { +PackedFunc ConvertAdjToPackedFunc(const std::vector& ea) { auto body = [ea] (DGLArgs args, DGLRetValue* rv) { const int which = args[0]; - if (which == 0) { - *rv = std::move(ea.indptr); - } else if (which == 1) { - *rv = std::move(ea.indices); - } else if (which == 2) { - *rv = std::move(ea.id); + if ((size_t) which < ea.size()) { + *rv = std::move(ea[which]); } else { LOG(FATAL) << "invalid choice"; } @@ -470,22 +465,14 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling64") DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling128") .set_body(CAPI_NeighborUniformSample<128>); -///////////////////////////// Immutable Graph API /////////////////////////////////// - DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetAdj") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; bool transpose = args[1]; std::string format = args[2]; const GraphInterface *ptr = static_cast(ghandle); - const ImmutableGraph *gptr = dynamic_cast(ptr); - ImmutableGraph::CSRArray csr; - if (transpose) { - csr = gptr->GetOutCSRArray(); - } else { - csr = gptr->GetInCSRArray(); - } - *rv = ConvertCSRArrayToPackedFunc(csr); + auto res = ptr->GetAdj(transpose, format); + *rv = ConvertAdjToPackedFunc(res); }); } // namespace dgl From 61ff441d70493722acf74614ffd21a1729e78eba Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 15:26:47 +0800 Subject: [PATCH 37/75] impl GetAdj. --- python/dgl/graph_index.py | 13 ++++++------ src/graph/graph.cc | 43 ++++++++++++++++++++++++++++++++------- 2 files changed, 42 insertions(+), 14 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 39745c3a2d2a..c1fcc2c86b4c 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -546,18 +546,16 @@ def adjacency_matrix(self, transpose, ctx): if not isinstance(transpose, bool): raise DGLError('Expect bool value for "transpose" arg,' ' but got %s.' % (type(transpose))) - if self.is_readonly(): - #if fmt == "csr": - fmt = F.get_preferred_sparse_format() - rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt) + fmt = F.get_preferred_sparse_format() + rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt) + if fmt == "csr": indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) shuffle = utils.toindex(rst(2)) dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) return F.sparse_matrix(dat, ('csr', indices, indptr), (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle - else: - #elif fmt == "coo": + elif fmt == "coo": #src, dst, _ = self.edges(False) #src = src.tousertensor(ctx) # the index of the ctx will be cached #dst = dst.tousertensor(ctx) # the index of the ctx will be cached @@ -569,7 +567,6 @@ def adjacency_matrix(self, transpose, ctx): # idx = F.cat([dst, src], dim=0) #print(idx.shape) ## FIXME(minjie): data type - rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, "coo") idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) m = self.number_of_edges() idx = F.reshape(idx, (2, m)) @@ -578,6 +575,8 @@ def adjacency_matrix(self, transpose, ctx): adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n)) shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None return adj, shuffle_idx + else: + raise Exception("unknown format") @utils.cached_member(cache='_cache', prefix='inc') def incidence_matrix(self, typestr, ctx): diff --git a/src/graph/graph.cc b/src/graph/graph.cc index ba734c92532a..0fb5acea2f99 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -499,22 +499,51 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { } std::vector Graph::GetAdj(bool transpose, const std::string &fmt) const { + int64_t num_edges = num_edges_; + int64_t num_nodes = NumVertices(); if (fmt == "coo") { - int64_t num_edges = num_edges_; IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); int64_t *idx_data = static_cast(idx->data); - std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data); - std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data + num_edges); + if (transpose) { + std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data); + std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges); + } else { + std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data); + std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data + num_edges); + } IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); int64_t *eid_data = static_cast(eid->data); - for (uint64_t eid = 0; eid < num_edges; ++eid) { + for (uint64_t eid = 0; eid < num_edges; ++eid) eid_data[eid] = eid; - } return std::vector{idx, eid}; + } else if (fmt == "csr") { + IdArray indptr = IdArray::Empty({num_nodes + 1}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray indices = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + int64_t *indptr_data = static_cast(indptr->data); + int64_t *indices_data = static_cast(indices->data); + int64_t *eid_data = static_cast(eid->data); + const AdjacencyList *adjlist; + if (transpose) { + // Out-edges. + adjlist = &adjlist_; + } else { + // In-edges. + adjlist = &reverse_adjlist_; + } + indptr_data[0] = 0; + for (size_t i = 0; i < adjlist->size(); i++) { + indptr_data[i + 1] = indptr_data[i] + adjlist->at(i).succ.size(); + std::copy(adjlist->at(i).succ.begin(), adjlist->at(i).succ.end(), + indices_data + indptr_data[i]); + std::copy(adjlist->at(i).edge_id.begin(), adjlist->at(i).edge_id.end(), + eid_data + indptr_data[i]); + } + return std::vector{indptr, indices, eid}; } else { - + LOG(FATAL) << "unsupported format"; + return std::vector(); } - return std::vector(); } GraphInterface::ptr Graph::Reverse() const { From f6440bc7ed7db1c790fbc06117e187d4da3b4ee1 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 15:52:17 +0800 Subject: [PATCH 38/75] fix lint --- include/dgl/graph_interface.h | 3 ++- python/dgl/graph_index.py | 14 ++++++++------ src/graph/graph.cc | 3 ++- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index 4e06336e7b34..342c0477ea38 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -6,6 +6,7 @@ #ifndef DGL_GRAPH_INTERFACE_H_ #define DGL_GRAPH_INTERFACE_H_ +#include #include #include #include "runtime/ndarray.h" @@ -15,7 +16,7 @@ namespace dgl { class NotImplemented: public std::exception { std::string msg; public: - NotImplemented(const std::string &name) { + explicit NotImplemented(const std::string &name) { this->msg = name + " isn't implemented"; } diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index c1fcc2c86b4c..0fca41cad9b9 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -22,10 +22,8 @@ class GraphIndex(object): handle : GraphIndexHandle Handler """ - def __init__(self, multigraph, readonly): - # if the graph is readonly, we'll init later. - if not readonly: - self._handle = _CAPI_DGLGraphCreateMutable(multigraph) + def __init__(self, handle=None, multigraph=None, readonly=None): + self._handle = handle self._multigraph = multigraph self._readonly = readonly self._cache = {} @@ -116,7 +114,9 @@ def is_multigraph(self): bool True if it is a multigraph, False otherwise. """ - return bool(_CAPI_DGLGraphIsMultigraph(self._handle)) + if self._multigraph is None: + self._multigraph = bool(_CAPI_DGLGraphIsMultigraph(self._handle)) + return self._multigraph def is_readonly(self): """Indicate whether the graph index is read-only. @@ -126,6 +126,8 @@ def is_readonly(self): bool True if it is a read-only graph, False otherwise. """ + if self._readonly is None: + self._readonly = bool(_CAPI_DGLGraphIsReadonly(self._handle)) return self._readonly def number_of_nodes(self): @@ -958,7 +960,7 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): # FIXME(minjie): this return is not correct for mutable graph index return graph_data - gidx = GraphIndex(multigraph, readonly) + gidx = GraphIndex(None, multigraph, readonly) if graph_data is None and readonly: raise Exception("can't create an empty immutable graph") elif graph_data is None: diff --git a/src/graph/graph.cc b/src/graph/graph.cc index 0fb5acea2f99..361f2cb05b1c 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -517,7 +517,8 @@ std::vector Graph::GetAdj(bool transpose, const std::string &fmt) const eid_data[eid] = eid; return std::vector{idx, eid}; } else if (fmt == "csr") { - IdArray indptr = IdArray::Empty({num_nodes + 1}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray indptr = IdArray::Empty({num_nodes + 1}, DLDataType{kDLInt, 64, 1}, + DLContext{kDLCPU, 0}); IdArray indices = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); int64_t *indptr_data = static_cast(indptr->data); From a1743918c6f16ddb23684513ee91c001f684ece3 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 15:54:50 +0800 Subject: [PATCH 39/75] fix. --- include/dgl/graph.h | 18 +++++++++--------- include/dgl/graph_interface.h | 12 ++++++------ include/dgl/immutable_graph.h | 24 ++++++++++++------------ 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/include/dgl/graph.h b/include/dgl/graph.h index b8e49d7cda9b..ca7c170ae668 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -314,8 +314,8 @@ class Graph: public GraphInterface { * \param vid The vertex id. * \return the successor vector */ - dgl_id_iters SuccVec(dgl_id_t vid) const { - return dgl_id_iters(adjlist_[vid].succ.begin(), adjlist_[vid].succ.end()); + DGLIdIters SuccVec(dgl_id_t vid) const { + return DGLIdIters(adjlist_[vid].succ.begin(), adjlist_[vid].succ.end()); } /*! @@ -323,8 +323,8 @@ class Graph: public GraphInterface { * \param vid The vertex id. * \return the out edge id vector */ - dgl_id_iters OutEdgeVec(dgl_id_t vid) const { - return dgl_id_iters(adjlist_[vid].edge_id.begin(), adjlist_[vid].edge_id.end()); + DGLIdIters OutEdgeVec(dgl_id_t vid) const { + return DGLIdIters(adjlist_[vid].edge_id.begin(), adjlist_[vid].edge_id.end()); } /*! @@ -332,8 +332,8 @@ class Graph: public GraphInterface { * \param vid The vertex id. * \return the predecessor vector */ - dgl_id_iters PredVec(dgl_id_t vid) const { - return dgl_id_iters(reverse_adjlist_[vid].succ.begin(), reverse_adjlist_[vid].succ.end()); + DGLIdIters PredVec(dgl_id_t vid) const { + return DGLIdIters(reverse_adjlist_[vid].succ.begin(), reverse_adjlist_[vid].succ.end()); } /*! @@ -341,9 +341,9 @@ class Graph: public GraphInterface { * \param vid The vertex id. * \return the in edge id vector */ - dgl_id_iters InEdgeVec(dgl_id_t vid) const { - return dgl_id_iters(reverse_adjlist_[vid].edge_id.begin(), - reverse_adjlist_[vid].edge_id.end()); + DGLIdIters InEdgeVec(dgl_id_t vid) const { + return DGLIdIters(reverse_adjlist_[vid].edge_id.begin(), + reverse_adjlist_[vid].edge_id.end()); } /*! diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index 342c0477ea38..b407155c58c0 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -34,10 +34,10 @@ typedef dgl::runtime::NDArray IntArray; struct Subgraph; struct SampledSubgraph; -class dgl_id_iters { +class DGLIdIters { std::vector::const_iterator b, e; public: - dgl_id_iters(std::vector::const_iterator begin, + DGLIdIters(std::vector::const_iterator begin, std::vector::const_iterator end) { this->b = begin; this->e = end; @@ -298,28 +298,28 @@ class GraphInterface { * \param vid The vertex id. * \return the successor vector iterator pair. */ - virtual dgl_id_iters SuccVec(dgl_id_t vid) const = 0; + virtual DGLIdIters SuccVec(dgl_id_t vid) const = 0; /*! * \brief Return the out edge id vector * \param vid The vertex id. * \return the out edge id vector iterator pair. */ - virtual dgl_id_iters OutEdgeVec(dgl_id_t vid) const = 0; + virtual DGLIdIters OutEdgeVec(dgl_id_t vid) const = 0; /*! * \brief Return the predecessor vector * \param vid The vertex id. * \return the predecessor vector iterator pair. */ - virtual dgl_id_iters PredVec(dgl_id_t vid) const = 0; + virtual DGLIdIters PredVec(dgl_id_t vid) const = 0; /*! * \brief Return the in edge id vector * \param vid The vertex id. * \return the in edge id vector iterator pair. */ - virtual dgl_id_iters InEdgeVec(dgl_id_t vid) const = 0; + virtual DGLIdIters InEdgeVec(dgl_id_t vid) const = 0; /*! * \brief Reset the data in the graph and move its data to the returned graph object. diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 090a52921bc6..50fba6c7eea9 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -395,9 +395,9 @@ class ImmutableGraph: public GraphInterface { * \param vid The vertex id. * \return the successor vector */ - dgl_id_iters SuccVec(dgl_id_t vid) const { - return dgl_id_iters(out_csr_->indices.begin() + out_csr_->indptr[vid], - out_csr_->indices.begin() + out_csr_->indptr[vid + 1]); + DGLIdIters SuccVec(dgl_id_t vid) const { + return DGLIdIters(out_csr_->indices.begin() + out_csr_->indptr[vid], + out_csr_->indices.begin() + out_csr_->indptr[vid + 1]); } /*! @@ -405,9 +405,9 @@ class ImmutableGraph: public GraphInterface { * \param vid The vertex id. * \return the out edge id vector */ - dgl_id_iters OutEdgeVec(dgl_id_t vid) const { - return dgl_id_iters(out_csr_->edge_ids.begin() + out_csr_->indptr[vid], - out_csr_->edge_ids.begin() + out_csr_->indptr[vid + 1]); + DGLIdIters OutEdgeVec(dgl_id_t vid) const { + return DGLIdIters(out_csr_->edge_ids.begin() + out_csr_->indptr[vid], + out_csr_->edge_ids.begin() + out_csr_->indptr[vid + 1]); } /*! @@ -415,9 +415,9 @@ class ImmutableGraph: public GraphInterface { * \param vid The vertex id. * \return the predecessor vector */ - dgl_id_iters PredVec(dgl_id_t vid) const { - return dgl_id_iters(in_csr_->indices.begin() + in_csr_->indptr[vid], - in_csr_->indices.begin() + in_csr_->indptr[vid + 1]); + DGLIdIters PredVec(dgl_id_t vid) const { + return DGLIdIters(in_csr_->indices.begin() + in_csr_->indptr[vid], + in_csr_->indices.begin() + in_csr_->indptr[vid + 1]); } /*! @@ -425,9 +425,9 @@ class ImmutableGraph: public GraphInterface { * \param vid The vertex id. * \return the in edge id vector */ - dgl_id_iters InEdgeVec(dgl_id_t vid) const { - return dgl_id_iters(in_csr_->edge_ids.begin() + in_csr_->indptr[vid], - in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]); + DGLIdIters InEdgeVec(dgl_id_t vid) const { + return DGLIdIters(in_csr_->edge_ids.begin() + in_csr_->indptr[vid], + in_csr_->edge_ids.begin() + in_csr_->indptr[vid + 1]); } /*! From c483664bf40ef0846bb7f56506e46dd5316b871b Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 16:14:52 +0800 Subject: [PATCH 40/75] fix. --- python/dgl/graph_index.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 0fca41cad9b9..7a7607c9dca4 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -54,6 +54,8 @@ def __setstate__(self, state): def init(self, src_ids, dst_ids, edge_ids, num_nodes): """The actual init function""" + assert(len(src_ids) == len(dst_ids)) + assert(len(src_ids) == len(edge_ids)) self._handle = _CAPI_DGLGraphCreate(src_ids.todgltensor(), dst_ids.todgltensor(), edge_ids.todgltensor(), self._multigraph, num_nodes, self._readonly) @@ -138,6 +140,7 @@ def number_of_nodes(self): int The number of nodes """ + assert self._handle is not None return _CAPI_DGLGraphNumVertices(self._handle) def number_of_edges(self): @@ -738,7 +741,7 @@ def from_networkx(self, nx_graph): for e in nx_graph.edges: src.append(e[0]) dst.append(e[1]) - eid = np.arange(0, len(src), dtype=np.int64) + eid = np.arange(0, len(src), dtype=np.int64) num_nodes = nx_graph.number_of_nodes() # We store edge Ids as an edge attribute. eid = utils.toindex(eid) @@ -964,6 +967,9 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): if graph_data is None and readonly: raise Exception("can't create an empty immutable graph") elif graph_data is None: + # This is a mutable graph. + handle = _CAPI_DGLGraphCreateMutable(multigraph) + gidx = GraphIndex(handle, multigraph, readonly) return gidx # edge list From 5e347780edc9c16e40c2aa61b9ae5c8a13765cc9 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 16:16:41 +0800 Subject: [PATCH 41/75] fix. --- python/dgl/graph_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 7a7607c9dca4..4115b95b13d0 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -54,8 +54,8 @@ def __setstate__(self, state): def init(self, src_ids, dst_ids, edge_ids, num_nodes): """The actual init function""" - assert(len(src_ids) == len(dst_ids)) - assert(len(src_ids) == len(edge_ids)) + assert len(src_ids) == len(dst_ids) + assert len(src_ids) == len(edge_ids) self._handle = _CAPI_DGLGraphCreate(src_ids.todgltensor(), dst_ids.todgltensor(), edge_ids.todgltensor(), self._multigraph, num_nodes, self._readonly) From 2a60e45647729dc5f3cee18384f7ba0bf860cd61 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 16:20:02 +0800 Subject: [PATCH 42/75] fix. --- python/dgl/graph_index.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 4115b95b13d0..36ce43965f00 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -963,13 +963,15 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): # FIXME(minjie): this return is not correct for mutable graph index return graph_data - gidx = GraphIndex(None, multigraph, readonly) + if readonly: + gidx = GraphIndex(None, multigraph, readonly) + else: + handle = _CAPI_DGLGraphCreateMutable(multigraph) + gidx = GraphIndex(handle, multigraph, readonly) + if graph_data is None and readonly: raise Exception("can't create an empty immutable graph") elif graph_data is None: - # This is a mutable graph. - handle = _CAPI_DGLGraphCreateMutable(multigraph) - gidx = GraphIndex(handle, multigraph, readonly) return gidx # edge list From 4042b3df59cd8593c5defc103d48b2dae95273f0 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 17:38:42 +0800 Subject: [PATCH 43/75] fix. --- include/dgl/immutable_graph.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 50fba6c7eea9..f8a13d8f1f0e 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -471,7 +471,7 @@ class ImmutableGraph: public GraphInterface { */ virtual std::vector GetAdj(bool transpose, const std::string &fmt) const { assert(fmt == "csr"); - CSRArray arrs = transpose ? this->GetOutCSRArray() : this->GetOutCSRArray(); + CSRArray arrs = transpose ? this->GetOutCSRArray() : this->GetInCSRArray(); return std::vector{arrs.indptr, arrs.indices, arrs.id}; } From 8e24bb033af8504531b22849de5b7567b168e0d5 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 17:39:49 +0800 Subject: [PATCH 44/75] use csr only for readonly graph. --- python/dgl/graph_index.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 36ce43965f00..07c42702bf4c 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -551,37 +551,37 @@ def adjacency_matrix(self, transpose, ctx): if not isinstance(transpose, bool): raise DGLError('Expect bool value for "transpose" arg,' ' but got %s.' % (type(transpose))) - fmt = F.get_preferred_sparse_format() - rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt) - if fmt == "csr": + if self.is_readonly(): + fmt = F.get_preferred_sparse_format() + rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt) indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) shuffle = utils.toindex(rst(2)) dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) return F.sparse_matrix(dat, ('csr', indices, indptr), (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle - elif fmt == "coo": - #src, dst, _ = self.edges(False) - #src = src.tousertensor(ctx) # the index of the ctx will be cached - #dst = dst.tousertensor(ctx) # the index of the ctx will be cached - #src = F.unsqueeze(src, dim=0) - #dst = F.unsqueeze(dst, dim=0) - #if transpose: - # idx = F.cat([src, dst], dim=0) - #else: - # idx = F.cat([dst, src], dim=0) - #print(idx.shape) + else: + src, dst, _ = self.edges(False) + src = src.tousertensor(ctx) # the index of the ctx will be cached + dst = dst.tousertensor(ctx) # the index of the ctx will be cached + src = F.unsqueeze(src, dim=0) + dst = F.unsqueeze(dst, dim=0) + if transpose: + idx = F.cat([src, dst], dim=0) + else: + idx = F.cat([dst, src], dim=0) + ## FIXME(minjie): data type - idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) + #idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) m = self.number_of_edges() - idx = F.reshape(idx, (2, m)) + #idx = F.reshape(idx, (2, m)) dat = F.ones((m,), dtype=F.float32, ctx=ctx) n = self.number_of_nodes() adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n)) shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None return adj, shuffle_idx - else: - raise Exception("unknown format") + #else: + # raise Exception("unknown format") @utils.cached_member(cache='_cache', prefix='inc') def incidence_matrix(self, typestr, ctx): From 6ef6b7a6762f5364e68570ee523d9087972c903f Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 17:41:41 +0800 Subject: [PATCH 45/75] Revert "use csr only for readonly graph." This reverts commit 8e24bb033af8504531b22849de5b7567b168e0d5. --- python/dgl/graph_index.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 07c42702bf4c..36ce43965f00 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -551,37 +551,37 @@ def adjacency_matrix(self, transpose, ctx): if not isinstance(transpose, bool): raise DGLError('Expect bool value for "transpose" arg,' ' but got %s.' % (type(transpose))) - if self.is_readonly(): - fmt = F.get_preferred_sparse_format() - rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt) + fmt = F.get_preferred_sparse_format() + rst = _CAPI_DGLGraphGetAdj(self._handle, transpose, fmt) + if fmt == "csr": indptr = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) indices = F.copy_to(utils.toindex(rst(1)).tousertensor(), ctx) shuffle = utils.toindex(rst(2)) dat = F.ones(indices.shape, dtype=F.float32, ctx=ctx) return F.sparse_matrix(dat, ('csr', indices, indptr), (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle - else: - src, dst, _ = self.edges(False) - src = src.tousertensor(ctx) # the index of the ctx will be cached - dst = dst.tousertensor(ctx) # the index of the ctx will be cached - src = F.unsqueeze(src, dim=0) - dst = F.unsqueeze(dst, dim=0) - if transpose: - idx = F.cat([src, dst], dim=0) - else: - idx = F.cat([dst, src], dim=0) - + elif fmt == "coo": + #src, dst, _ = self.edges(False) + #src = src.tousertensor(ctx) # the index of the ctx will be cached + #dst = dst.tousertensor(ctx) # the index of the ctx will be cached + #src = F.unsqueeze(src, dim=0) + #dst = F.unsqueeze(dst, dim=0) + #if transpose: + # idx = F.cat([src, dst], dim=0) + #else: + # idx = F.cat([dst, src], dim=0) + #print(idx.shape) ## FIXME(minjie): data type - #idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) + idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) m = self.number_of_edges() - #idx = F.reshape(idx, (2, m)) + idx = F.reshape(idx, (2, m)) dat = F.ones((m,), dtype=F.float32, ctx=ctx) n = self.number_of_nodes() adj, shuffle_idx = F.sparse_matrix(dat, ('coo', idx), (n, n)) shuffle_idx = utils.toindex(shuffle_idx) if shuffle_idx is not None else None return adj, shuffle_idx - #else: - # raise Exception("unknown format") + else: + raise Exception("unknown format") @utils.cached_member(cache='_cache', prefix='inc') def incidence_matrix(self, typestr, ctx): From c02a1eaede8393b3285a69f3487098556ca5b996 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 17:42:27 +0800 Subject: [PATCH 46/75] remove code. --- python/dgl/graph_index.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 36ce43965f00..c5aabac44f32 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -561,16 +561,6 @@ def adjacency_matrix(self, transpose, ctx): return F.sparse_matrix(dat, ('csr', indices, indptr), (self.number_of_nodes(), self.number_of_nodes()))[0], shuffle elif fmt == "coo": - #src, dst, _ = self.edges(False) - #src = src.tousertensor(ctx) # the index of the ctx will be cached - #dst = dst.tousertensor(ctx) # the index of the ctx will be cached - #src = F.unsqueeze(src, dim=0) - #dst = F.unsqueeze(dst, dim=0) - #if transpose: - # idx = F.cat([src, dst], dim=0) - #else: - # idx = F.cat([dst, src], dim=0) - #print(idx.shape) ## FIXME(minjie): data type idx = F.copy_to(utils.toindex(rst(0)).tousertensor(), ctx) m = self.number_of_edges() From 0dbf525f8666ddea2ca3921357228a255e3c848f Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 17:46:13 +0800 Subject: [PATCH 47/75] fix. --- python/dgl/graph_index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index c5aabac44f32..f9b1e24cfcfb 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -696,9 +696,6 @@ def from_networkx(self, nx_graph): nx_graph : networkx.DiGraph The nx graph """ - if not self.is_readonly(): - self.clear() - if not isinstance(nx_graph, nx.Graph): nx_graph = (nx.MultiDiGraph(nx_graph) if self.is_multigraph() else nx.DiGraph(nx_graph)) @@ -709,6 +706,9 @@ def from_networkx(self, nx_graph): nx_graph = nx_graph.to_directed() num_nodes = nx_graph.number_of_nodes() + if not self.is_readonly(): + self.clear() + self.add_nodes(num_nodes) if nx_graph.number_of_edges() == 0: if self.is_readonly(): From 4ba49b50a884cac25bab18932a9ade94cf433459 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 17:49:16 +0800 Subject: [PATCH 48/75] fix. --- python/dgl/graph_index.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index f9b1e24cfcfb..88ad4904123d 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -665,8 +665,8 @@ def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, no rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops, expand_factor) - return [ImmutableSubgraphIndex(rst(i), self, rst(num_subgs + i), - rst(num_subgs * 2 + i)) for i in range(num_subgs)] + return [SubgraphIndex(rst(i), self, rst(num_subgs + i), + rst(num_subgs * 2 + i)) for i in range(num_subgs)] def to_networkx(self): """Convert to networkx graph. @@ -867,7 +867,7 @@ def map_to_subgraph_nid(subgraph, parent_nids): Parameters ---------- - subgraph: SubgraphIndex or ImmutableSubgraphIndex + subgraph: SubgraphIndex the graph index of a subgraph parent_nids: utils.Index From 7da4512006c7f30fe290fcf4f25b40862140f2c8 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 17:50:16 +0800 Subject: [PATCH 49/75] fix. --- python/dgl/contrib/sampling/sampler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/dgl/contrib/sampling/sampler.py b/python/dgl/contrib/sampling/sampler.py index 88079120b11f..fdceb0d9e475 100644 --- a/python/dgl/contrib/sampling/sampler.py +++ b/python/dgl/contrib/sampling/sampler.py @@ -1,5 +1,6 @@ # This file contains subgraph samplers. +import sys import numpy as np import threading import random From 1eea27de84c6e3dc102c5e6ba20c880eabbdcda4 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 18:46:51 +0800 Subject: [PATCH 50/75] fix. --- python/dgl/graph_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 88ad4904123d..f5f1234118eb 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -665,8 +665,8 @@ def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, no rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops, expand_factor) - return [SubgraphIndex(rst(i), self, rst(num_subgs + i), - rst(num_subgs * 2 + i)) for i in range(num_subgs)] + return [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)), + utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)] def to_networkx(self): """Convert to networkx graph. From c8436260e55076d6e3996cbfdcac11ccb575f3b5 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 18:59:18 +0800 Subject: [PATCH 51/75] fix. --- src/graph/graph.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/graph/graph.cc b/src/graph/graph.cc index 361f2cb05b1c..af8395e6c5fe 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -505,11 +505,11 @@ std::vector Graph::GetAdj(bool transpose, const std::string &fmt) const IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); int64_t *idx_data = static_cast(idx->data); if (transpose) { - std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data); - std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges); - } else { std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data); std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data + num_edges); + } else { + std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data); + std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges); } IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); int64_t *eid_data = static_cast(eid->data); From fb8e8a0d161d9ce3440544b6c9bb6a18e6eb1899 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Mon, 14 Jan 2019 19:35:17 +0800 Subject: [PATCH 52/75] fix. --- include/dgl/immutable_graph.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index f8a13d8f1f0e..3303f4cb910d 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -97,6 +97,11 @@ class ImmutableGraph: public GraphInterface { ImmutableGraph(ImmutableGraph&& other) = default; #else ImmutableGraph(ImmutableGraph&& other) { + this->in_csr_ = other.in_csr_; + this->out_csr_ = other.out_csr_; + this->is_multigraph_ = other.is_multigraph_; + other.in_csr_ = nullptr; + other.out_csr_ = nullptr; } #endif // _MSC_VER From 52b8022d17febd8afd7d5226d1c29a002ab3ffc8 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 11:27:47 +0800 Subject: [PATCH 53/75] address comments. --- include/dgl/graph.h | 3 ++- include/dgl/graph_interface.h | 31 +++++++++++++++++++------------ include/dgl/immutable_graph.h | 4 ++-- src/graph/graph.cc | 2 +- src/graph/immutable_graph.cc | 8 ++++---- 5 files changed, 28 insertions(+), 20 deletions(-) diff --git a/include/dgl/graph.h b/include/dgl/graph.h index ca7c170ae668..ec8e9b33181c 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -37,6 +37,7 @@ class Graph: public GraphInterface { /*! \brief default constructor */ explicit Graph(bool multigraph = false) : is_multigraph_(multigraph) {} + /*! \brief construct a graph from the coo format. */ Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, bool multigraph = false); @@ -307,7 +308,7 @@ class Graph: public GraphInterface { * * \return the reversed graph */ - GraphInterface::ptr Reverse() const; + GraphPtr Reverse() const; /*! * \brief Return the successor vector diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index b407155c58c0..200a60a4f28f 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -34,28 +34,37 @@ typedef dgl::runtime::NDArray IntArray; struct Subgraph; struct SampledSubgraph; +/*! + * \brief This class references data in std::vector. + * + * This isn't a STL-style iterator. It provides a STL data container interface. + * but it doesn't own data itself. instead, it only references data in std::vector. + */ class DGLIdIters { - std::vector::const_iterator b, e; + std::vector::const_iterator begin_, end_; public: DGLIdIters(std::vector::const_iterator begin, - std::vector::const_iterator end) { - this->b = begin; - this->e = end; + std::vector::const_iterator end) { + this->begin_ = begin; + this->end_ = end; } std::vector::const_iterator begin() const { - return this->b; + return this->begin_; } std::vector::const_iterator end() const { - return this->e; + return this->end_; } dgl_id_t operator[](int64_t i) const { - return *(this->b + i); + return *(this->begin_ + i); } size_t size() const { - return this->e - this->b; + return this->end_ - this->begin_; } }; +class GraphInterface; +typedef std::shared_ptr GraphPtr; + /*! * \brief dgl graph index interface. * @@ -69,8 +78,6 @@ class GraphInterface { IdArray src, dst, id; } EdgeArray; - typedef std::shared_ptr ptr; - virtual ~GraphInterface() { } @@ -291,7 +298,7 @@ class GraphInterface { * * \return the reversed graph */ - virtual GraphInterface::ptr Reverse() const = 0; + virtual GraphPtr Reverse() const = 0; /*! * \brief Return the successor vector @@ -350,7 +357,7 @@ class GraphInterface { /*! \brief Subgraph data structure */ struct Subgraph { /*! \brief The graph. */ - GraphInterface::ptr graph; + GraphPtr graph; /*! * \brief The induced vertex ids. * \note This is also a map from the new vertex id to the vertex id in the parent graph. diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 3303f4cb910d..59a011f80c1f 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -391,8 +391,8 @@ class ImmutableGraph: public GraphInterface { * * \return the reversed graph */ - GraphInterface::ptr Reverse() const { - return GraphInterface::ptr(new ImmutableGraph(out_csr_, in_csr_, is_multigraph_)); + GraphPtr Reverse() const { + return GraphPtr(new ImmutableGraph(out_csr_, in_csr_, is_multigraph_)); } /*! diff --git a/src/graph/graph.cc b/src/graph/graph.cc index af8395e6c5fe..e8fe788759ca 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -547,7 +547,7 @@ std::vector Graph::GetAdj(bool transpose, const std::string &fmt) const } } -GraphInterface::ptr Graph::Reverse() const { +GraphPtr Graph::Reverse() const { LOG(FATAL) << "not implemented"; return nullptr; } diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index c37b79792821..072f38bdfac0 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -459,13 +459,13 @@ Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { // We prefer to generate a subgraph for out-csr first. if (out_csr_) { ret = out_csr_->VertexSubgraph(vids); - subg.graph = GraphInterface::ptr(new ImmutableGraph(nullptr, ret.first, IsMultigraph())); + subg.graph = GraphPtr(new ImmutableGraph(nullptr, ret.first, IsMultigraph())); } else { assert(in_csr_); ret = in_csr_->VertexSubgraph(vids); // When we generate a subgraph, it may be used by only accessing in-edges or out-edges. // We don't need to generate both. - subg.graph = GraphInterface::ptr(new ImmutableGraph(ret.first, nullptr, IsMultigraph())); + subg.graph = GraphPtr(new ImmutableGraph(ret.first, nullptr, IsMultigraph())); } subg.induced_vertices = vids; subg.induced_edges = ret.second; @@ -898,9 +898,9 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, subg_csr->edge_ids[i] = i; if (neigh_type == "in") - subg.graph = GraphInterface::ptr(new ImmutableGraph(subg_csr, nullptr, IsMultigraph())); + subg.graph = GraphPtr(new ImmutableGraph(subg_csr, nullptr, IsMultigraph())); else - subg.graph = GraphInterface::ptr(new ImmutableGraph(nullptr, subg_csr, IsMultigraph())); + subg.graph = GraphPtr(new ImmutableGraph(nullptr, subg_csr, IsMultigraph())); return subg; } From 6350408a0b04eebdd4a855fbaff746c5e707507c Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 12:28:22 +0800 Subject: [PATCH 54/75] fix for comments. --- include/dgl/graph.h | 2 +- include/dgl/graph_interface.h | 5 ++- include/dgl/immutable_graph.h | 66 +++++++++++++++++++---------------- python/dgl/backend/backend.py | 5 +++ python/dgl/graph_index.py | 30 ++++++++++------ src/graph/immutable_graph.cc | 42 +++++++++++----------- 6 files changed, 83 insertions(+), 67 deletions(-) diff --git a/include/dgl/graph.h b/include/dgl/graph.h index ec8e9b33181c..21c2dcbe2106 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -370,7 +370,7 @@ class Graph: public GraphInterface { /*! * \brief Sample a subgraph from the seed vertices with neighbor sampling. - * The neighbors are sampled with a uniformly distribution. + * The neighbors are sampled with a uniform distribution. * \return a subgraph */ virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type, diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index 200a60a4f28f..36cd2c1082bb 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -78,8 +78,7 @@ class GraphInterface { IdArray src, dst, id; } EdgeArray; - virtual ~GraphInterface() { - } + virtual ~GraphInterface() = default; /*! * \brief Add vertices to the graph. @@ -341,7 +340,7 @@ class GraphInterface { * of an edge and the column represents the source. * \param transpose A flag to transpose the returned adjacency matrix. * \param fmt the format of the returned adjacency matrix. - * \return a vector of three IdArray. + * \return a vector of IdArrays. */ virtual std::vector GetAdj(bool transpose, const std::string &fmt) const = 0; diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 59a011f80c1f..6d7061dd966e 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -23,8 +23,9 @@ bool binary_search(ForwardIt first, ForwardIt last, const T& value) { } /*! - * \brief Base dgl immutable graph index class. + * \brief DGL immutable graph index class. * + * DGL's graph is directed. Vertices are integers enumerated from zero. */ class ImmutableGraph: public GraphInterface { public: @@ -32,18 +33,18 @@ class ImmutableGraph: public GraphInterface { IdArray indptr, indices, id; } CSRArray; - struct edge { + struct Edge { dgl_id_t end_points[2]; dgl_id_t edge_id; }; - struct csr { - typedef std::shared_ptr ptr; + struct CSR { + typedef std::shared_ptr Ptr; std::vector indptr; std::vector indices; std::vector edge_ids; - csr(int64_t num_vertices, int64_t expected_num_edges) { + CSR(int64_t num_vertices, int64_t expected_num_edges) { indptr.resize(num_vertices + 1); indices.reserve(expected_num_edges); edge_ids.reserve(expected_num_edges); @@ -68,19 +69,19 @@ class ImmutableGraph: public GraphInterface { EdgeArray GetEdges(dgl_id_t vid) const; EdgeArray GetEdges(IdArray vids) const; std::pair GetIndexRef(dgl_id_t v) const { - int64_t start = indptr[v]; - int64_t end = indptr[v + 1]; + const int64_t start = indptr[v]; + const int64_t end = indptr[v + 1]; return std::pair(&indices[start], &indices[end]); } - csr::ptr Transpose() const; - std::pair VertexSubgraph(IdArray vids) const; - static csr::ptr from_edges(std::vector *edges, int sort_on, int64_t num_nodes); + CSR::Ptr Transpose() const; + std::pair VertexSubgraph(IdArray vids) const; + static CSR::Ptr FromEdges(std::vector *edges, int sort_on, int64_t num_nodes); }; ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, bool multigraph = false); - ImmutableGraph(csr::ptr in_csr, csr::ptr out_csr, + ImmutableGraph(CSR::Ptr in_csr, CSR::Ptr out_csr, bool multigraph = false) : is_multigraph_(multigraph) { this->in_csr_ = in_csr; this->out_csr_ = out_csr; @@ -362,8 +363,6 @@ class ImmutableGraph: public GraphInterface { */ Subgraph VertexSubgraph(IdArray vids) const; - std::vector VertexSubgraphs(const std::vector &vids) const; - /*! * \brief Construct the induced edge subgraph of the given edges. * @@ -382,8 +381,6 @@ class ImmutableGraph: public GraphInterface { */ Subgraph EdgeSubgraph(IdArray eids) const; - std::vector EdgeSubgraphs(std::vector eids) const; - /*! * \brief Return a new graph with all the edges reversed. * @@ -453,18 +450,6 @@ class ImmutableGraph: public GraphInterface { SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type, int num_hops, int expand_factor) const; - /*! - * \brief Get the CSR array that represents the in-edges. - * \return the CSR array. - */ - CSRArray GetInCSRArray() const; - - /*! - * \brief Get the CSR array that represents the out-edges. - * \return the CSR array. - */ - CSRArray GetOutCSRArray() const; - /*! * \brief Get the adjacency matrix of the graph. * @@ -488,7 +473,7 @@ class ImmutableGraph: public GraphInterface { * When we get in csr or out csr, we try to get the one cached in the structure. * If not, we transpose the other one to get the one we need. */ - csr::ptr GetInCSR() const { + CSR::Ptr GetInCSR() const { if (in_csr_) { return in_csr_; } else { @@ -497,7 +482,7 @@ class ImmutableGraph: public GraphInterface { return in_csr_; } } - csr::ptr GetOutCSR() const { + CSR::Ptr GetOutCSR() const { if (out_csr_) { return out_csr_; } else { @@ -507,16 +492,35 @@ class ImmutableGraph: public GraphInterface { } } + /*! + * \brief Get the CSR array that represents the in-edges. + * This method copies data from std::vector to IdArray. + * \return the CSR array. + */ + CSRArray GetInCSRArray() const; + + /*! + * \brief Get the CSR array that represents the out-edges. + * This method copies data from std::vector to IdArray. + * \return the CSR array. + */ + CSRArray GetOutCSRArray() const; + SampledSubgraph SampleSubgraph(IdArray seed_arr, const float* probability, const std::string &neigh_type, int num_hops, size_t num_neighbor) const; + /*! + * \brief Compact a subgraph. + * In a sampled subgraph, the vertex Id is still in the ones in the original graph. + * We want to convert them to the subgraph Ids. + */ void CompactSubgraph(IdArray induced_vertices); // Store the in-edges. - csr::ptr in_csr_; + CSR::Ptr in_csr_; // Store the out-edges. - csr::ptr out_csr_; + CSR::Ptr out_csr_; /*! * \brief Whether if this is a multigraph. * diff --git a/python/dgl/backend/backend.py b/python/dgl/backend/backend.py index 096bce56a530..bca54d319205 100644 --- a/python/dgl/backend/backend.py +++ b/python/dgl/backend/backend.py @@ -79,6 +79,11 @@ def get_preferred_sparse_format(): Different backends have their preferred backend. This info is useful when constructing a sparse matrix. + + Returns + ------- + string + the name of the preferred sparse matrix format. """ pass diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 0ae69c0d70ca..3de8cd4b0cb2 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -36,21 +36,27 @@ def __getstate__(self): src, dst, _ = self.edges() n_nodes = self.number_of_nodes() multigraph = self.is_multigraph() + readonly = self.is_readonly() - return n_nodes, multigraph, src, dst + return n_nodes, multigraph, readonly, src, dst def __setstate__(self, state): """The pickle state of GraphIndex is defined as a triplet - (number_of_nodes, multigraph, src_nodes, dst_nodes) + (number_of_nodes, multigraph, readonly, src_nodes, dst_nodes) """ - n_nodes, multigraph, src, dst = state + n_nodes, multigraph, readonly, src, dst = state - self._handle = _CAPI_DGLGraphCreateMutable(multigraph) - self._cache = {} + if readonly: + self._readonly = readonly + self._multigraph = multigraph + self.init(src, dst, F.arange(0, len(src)), n_nodes) + else: + self._handle = _CAPI_DGLGraphCreateMutable(multigraph) + self._cache = {} - self.clear() - self.add_nodes(n_nodes) - self.add_edges(src, dst) + self.clear() + self.add_nodes(n_nodes) + self.add_edges(src, dst) def init(self, src_ids, dst_ids, edge_ids, num_nodes): """The actual init function""" @@ -140,7 +146,6 @@ def number_of_nodes(self): int The number of nodes """ - assert self._handle is not None return _CAPI_DGLGraphNumVertices(self._handle) def number_of_edges(self): @@ -749,8 +754,9 @@ def from_scipy_sparse_matrix(self, adj): "The input matrix has to be a SciPy sparse matrix." if not self.is_readonly(): self.clear() - # what if the adj matrix isn't symmetric. - num_nodes = max(adj.shape[0], adj.shape[1]) + if adj.shape[0] != adj.shape[1]: + raise ValueError("we don't support a rectangle matrix") + num_nodes = adj.shape[0] adj_coo = adj.tocoo() src = utils.toindex(adj_coo.row) dst = utils.toindex(adj_coo.col) @@ -952,6 +958,7 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): return graph_data if readonly: + # FIXME(zhengda): we should construct a C graph index before constructing GraphIndex. gidx = GraphIndex(None, multigraph, readonly) else: handle = _CAPI_DGLGraphCreateMutable(multigraph) @@ -990,6 +997,7 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False): _init_api("dgl.graph_index") +# TODO(zhengda): we'll support variable-length inputs. _NEIGHBOR_SAMPLING_APIS = { 1: _CAPI_DGLGraphUniformSampling, 2: _CAPI_DGLGraphUniformSampling2, diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 072f38bdfac0..1cae1af864a3 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -19,7 +19,7 @@ int rand_r(unsigned *seed) { namespace dgl { -ImmutableGraph::EdgeArray ImmutableGraph::csr::GetEdges(dgl_id_t vid) const { +ImmutableGraph::EdgeArray ImmutableGraph::CSR::GetEdges(dgl_id_t vid) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; int64_t off = this->indptr[vid]; const int64_t len = this->GetDegree(vid); @@ -37,7 +37,7 @@ ImmutableGraph::EdgeArray ImmutableGraph::csr::GetEdges(dgl_id_t vid) const { return ImmutableGraph::EdgeArray{src, dst, eid}; } -ImmutableGraph::EdgeArray ImmutableGraph::csr::GetEdges(IdArray vids) const { +ImmutableGraph::EdgeArray ImmutableGraph::CSR::GetEdges(IdArray vids) const { CHECK(IsValidIdArray(vids)) << "Invalid vertex id array."; const auto len = vids->shape[0]; const dgl_id_t* vid_data = static_cast(vids->data); @@ -68,7 +68,7 @@ ImmutableGraph::EdgeArray ImmutableGraph::csr::GetEdges(IdArray vids) const { return ImmutableGraph::EdgeArray{src, dst, eid}; } -DegreeArray ImmutableGraph::csr::GetDegrees(IdArray vids) const { +DegreeArray ImmutableGraph::CSR::GetDegrees(IdArray vids) const { CHECK(IsValidIdArray(vids)) << "Invalid vertex id array."; const auto len = vids->shape[0]; const dgl_id_t* vid_data = static_cast(vids->data); @@ -144,7 +144,7 @@ class HashTableChecker { } }; -std::pair ImmutableGraph::csr::VertexSubgraph( +std::pair ImmutableGraph::CSR::VertexSubgraph( IdArray vids) const { const dgl_id_t* vid_data = static_cast(vids->data); const int64_t len = vids->shape[0]; @@ -156,7 +156,7 @@ std::pair ImmutableGraph::csr::VertexSubgraph // Collect the non-zero entries in from the original graph. std::vector orig_edge_ids; orig_edge_ids.reserve(len * 50); - auto sub_csr = std::make_shared(len, len * 50); + auto sub_csr = std::make_shared(len, len * 50); sub_csr->indptr[0] = 0; for (int64_t i = 0; i < len; ++i) { const dgl_id_t oldvid = vid_data[i]; @@ -179,11 +179,11 @@ std::pair ImmutableGraph::csr::VertexSubgraph dgl_id_t* eid_data = static_cast(rst_eids->data); std::copy(orig_edge_ids.begin(), orig_edge_ids.end(), eid_data); - return std::pair, IdArray>(sub_csr, rst_eids); + return std::pair(sub_csr, rst_eids); } -ImmutableGraph::csr::ptr ImmutableGraph::csr::from_edges(std::vector *edges, - int sort_on, int64_t num_nodes) { +ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector *edges, + int sort_on, int64_t num_nodes) { assert(sort_on == 0 || sort_on == 1); int other_end = sort_on == 1 ? 0 : 1; // TODO(zhengda) we should sort in parallel. @@ -194,7 +194,7 @@ ImmutableGraph::csr::ptr ImmutableGraph::csr::from_edges(std::vector *edge this->sort_on = sort_on; this->other_end = other_end; } - bool operator()(const edge &e1, const edge &e2) { + bool operator()(const Edge &e1, const Edge &e2) { if (e1.end_points[sort_on] == e2.end_points[sort_on]) return e1.end_points[other_end] < e2.end_points[other_end]; else @@ -202,7 +202,7 @@ ImmutableGraph::csr::ptr ImmutableGraph::csr::from_edges(std::vector *edge } }; std::sort(edges->begin(), edges->end(), compare(sort_on, other_end)); - auto t = std::make_shared(0, 0); + auto t = std::make_shared(0, 0); t->indices.resize(edges->size()); t->edge_ids.resize(edges->size()); for (size_t i = 0; i < edges->size(); i++) { @@ -221,20 +221,20 @@ ImmutableGraph::csr::ptr ImmutableGraph::csr::from_edges(std::vector *edge return t; } -std::shared_ptr ImmutableGraph::csr::Transpose() const { - std::vector edges(NumEdges()); +ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::Transpose() const { + std::vector edges(NumEdges()); for (size_t i = 0; i < NumVertices(); i++) { const dgl_id_t *indices_begin = &indices[indptr[i]]; const dgl_id_t *eid_begin = &edge_ids[indptr[i]]; for (size_t j = 0; j < GetDegree(i); j++) { - edge e; + Edge e; e.end_points[0] = i; e.end_points[1] = indices_begin[j]; e.edge_id = eid_begin[j]; edges[indptr[i] + j] = e; } } - return from_edges(&edges, 1, NumVertices()); + return FromEdges(&edges, 1, NumVertices()); } ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, @@ -245,16 +245,16 @@ ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_id const dgl_id_t *src_data = static_cast(src_ids->data); const dgl_id_t *dst_data = static_cast(dst_ids->data); const dgl_id_t *edge_data = static_cast(edge_ids->data); - std::vector edges(len); + std::vector edges(len); for (size_t i = 0; i < edges.size(); i++) { - edge e; + Edge e; e.end_points[0] = src_data[i]; e.end_points[1] = dst_data[i]; e.edge_id = edge_data[i]; edges[i] = e; } - in_csr_ = csr::from_edges(&edges, 1, num_nodes); - out_csr_ = csr::from_edges(&edges, 0, num_nodes); + in_csr_ = CSR::FromEdges(&edges, 1, num_nodes); + out_csr_ = CSR::FromEdges(&edges, 0, num_nodes); } BoolArray ImmutableGraph::HasVertices(IdArray vids) const { @@ -455,7 +455,7 @@ ImmutableGraph::EdgeArray ImmutableGraph::Edges(bool sorted) const { Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { Subgraph subg; - std::pair ret; + std::pair ret; // We prefer to generate a subgraph for out-csr first. if (out_csr_) { ret = out_csr_->VertexSubgraph(vids); @@ -856,7 +856,7 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, } // Construct sub_csr_graph - auto subg_csr = std::make_shared(num_vertices, num_edges); + auto subg_csr = std::make_shared(num_vertices, num_edges); subg_csr->indices.resize(num_edges); subg_csr->edge_ids.resize(num_edges); dgl_id_t* val_list_out = static_cast(subg.induced_edges->data); @@ -905,7 +905,7 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, return subg; } -void CompactSubgraph(ImmutableGraph::csr *subg, +void CompactSubgraph(ImmutableGraph::CSR *subg, const std::unordered_map &id_map) { for (size_t i = 0; i < subg->indices.size(); i++) { auto it = id_map.find(subg->indices[i]); From 439a7eb4dc772ebaa4fed53967bfd355a4cc6286 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 12:33:20 +0800 Subject: [PATCH 55/75] fix comments. --- include/dgl/graph_interface.h | 2 +- include/dgl/immutable_graph.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index 36cd2c1082bb..a7837b9d930e 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -346,7 +346,7 @@ class GraphInterface { /*! * \brief Sample a subgraph from the seed vertices with neighbor sampling. - * The neighbors are sampled with a uniformly distribution. + * The neighbors are sampled with a uniform distribution. * \return a subgraph */ virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type, diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 6d7061dd966e..faa201c0d12d 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -444,7 +444,7 @@ class ImmutableGraph: public GraphInterface { /*! * \brief Sample a subgraph from the seed vertices with neighbor sampling. - * The neighbors are sampled with a uniformly distribution. + * The neighbors are sampled with a uniform distribution. * \return a subgraph */ SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type, From 2cffc7c52b88035b230b3c8872bfb25b0aa3c8cd Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 12:37:48 +0800 Subject: [PATCH 56/75] revert. --- python/dgl/graph_index.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 3de8cd4b0cb2..0c9aa4eb2fe5 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -754,9 +754,7 @@ def from_scipy_sparse_matrix(self, adj): "The input matrix has to be a SciPy sparse matrix." if not self.is_readonly(): self.clear() - if adj.shape[0] != adj.shape[1]: - raise ValueError("we don't support a rectangle matrix") - num_nodes = adj.shape[0] + num_nodes = max(adj.shape[0], adj.shape[1]) adj_coo = adj.tocoo() src = utils.toindex(adj_coo.row) dst = utils.toindex(adj_coo.col) From 18b2ea856d66fe9e833652136d951699ffbc9a61 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 12:52:27 +0800 Subject: [PATCH 57/75] move test_graph_index to compute. --- tests/{mxnet => compute}/test_graph_index.py | 40 ++++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) rename tests/{mxnet => compute}/test_graph_index.py (76%) diff --git a/tests/mxnet/test_graph_index.py b/tests/compute/test_graph_index.py similarity index 76% rename from tests/mxnet/test_graph_index.py rename to tests/compute/test_graph_index.py index d3f3d1c02d01..f725645b3d5e 100644 --- a/tests/mxnet/test_graph_index.py +++ b/tests/compute/test_graph_index.py @@ -1,7 +1,5 @@ -import os -os.environ['DGLBACKEND'] = 'mxnet' +import backend as F import networkx as nx -import mxnet as mx import numpy as np import scipy as sp import dgl @@ -29,9 +27,9 @@ def generate_rand_graph(n): return g, ig def check_graph_equal(g1, g2): - adj1 = g1.adjacency_matrix(False, mx.cpu())[0] != 0 - adj2 = g2.adjacency_matrix(False, mx.cpu())[0] != 0 - assert mx.nd.sum(adj1 - adj2).asnumpy() == 0 + adj1 = g1.adjacency_matrix(False, F.cpu())[0] != 0 + adj2 = g2.adjacency_matrix(False, F.cpu())[0] != 0 + assert F.allclose(adj1, adj2) def test_graph_gen(): g, ig = generate_from_edgelist() @@ -42,7 +40,7 @@ def test_graph_gen(): def sort_edges(edges): edges = [e.tousertensor() for e in edges] if np.prod(edges[2].shape) > 0: - idx = mx.nd.argsort(edges[2]) + val, idx = F.sort_1d(edges[2]) return (edges[0][idx], edges[1][idx], edges[2][idx]) else: return (edges[0], edges[1], edges[2]) @@ -61,28 +59,28 @@ def check_basics(g, ig): assert g.has_node(i) == ig.has_node(i) for i in range(g.number_of_nodes()): - assert mx.nd.sum(g.predecessors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.predecessors(i).tousertensor()).asnumpy() - assert mx.nd.sum(g.successors(i).tousertensor()).asnumpy() == mx.nd.sum(ig.successors(i).tousertensor()).asnumpy() + assert F.asnumpy(F.sum(g.predecessors(i).tousertensor(), 0)) == F.asnumpy(F.sum(ig.predecessors(i).tousertensor(), 0)) + assert F.asnumpy(F.sum(g.successors(i).tousertensor(), 0)) == F.asnumpy(F.sum(ig.successors(i).tousertensor(), 0)) randv = np.random.randint(0, g.number_of_nodes(), 10) randv = utils.toindex(randv) in_src1, in_dst1, in_eids1 = sort_edges(g.in_edges(randv)) in_src2, in_dst2, in_eids2 = sort_edges(ig.in_edges(randv)) nnz = in_src2.shape[0] - assert mx.nd.sum(in_src1 == in_src2).asnumpy() == nnz - assert mx.nd.sum(in_dst1 == in_dst2).asnumpy() == nnz - assert mx.nd.sum(in_eids1 == in_eids2).asnumpy() == nnz + assert F.asnumpy(F.sum(in_src1 == in_src2, 0)) == nnz + assert F.asnumpy(F.sum(in_dst1 == in_dst2, 0)) == nnz + assert F.asnumpy(F.sum(in_eids1 == in_eids2, 0)) == nnz out_src1, out_dst1, out_eids1 = sort_edges(g.out_edges(randv)) out_src2, out_dst2, out_eids2 = sort_edges(ig.out_edges(randv)) nnz = out_dst2.shape[0] - assert mx.nd.sum(out_dst1 == out_dst2).asnumpy() == nnz - assert mx.nd.sum(out_src1 == out_src2).asnumpy() == nnz - assert mx.nd.sum(out_eids1 == out_eids2).asnumpy() == nnz + assert F.asnumpy(F.sum(out_dst1 == out_dst2, 0)) == nnz + assert F.asnumpy(F.sum(out_src1 == out_src2, 0)) == nnz + assert F.asnumpy(F.sum(out_eids1 == out_eids2, 0)) == nnz num_v = len(randv) - assert mx.nd.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor()).asnumpy() == num_v - assert mx.nd.sum(g.out_degrees(randv).tousertensor() == ig.out_degrees(randv).tousertensor()).asnumpy() == num_v + assert F.asnumpy(F.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor(), 0)) == num_v + assert F.asnumpy(F.sum(g.out_degrees(randv).tousertensor() == ig.out_degrees(randv).tousertensor(), 0)) == num_v randv = randv.tousertensor() for v in randv.asnumpy(): assert g.in_degree(v) == ig.in_degree(v) @@ -95,8 +93,8 @@ def check_basics(g, ig): assert g.has_edge_between(u, v) == ig.has_edge_between(u, v) randv = utils.toindex(randv) ids = g.edge_ids(randv, randv)[2].tonumpy() - assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids) == len(ids) - assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy()) == len(randv) + assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids, 0) == len(ids) + assert sum(g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between(randv, randv).tonumpy(), 0) == len(randv) def test_basics(): @@ -118,8 +116,8 @@ def test_node_subgraph(): subig = ig.node_subgraph(utils.toindex(randv)) check_basics(subg, subig) check_graph_equal(subg, subig) - assert mx.nd.sum(map_to_subgraph_nid(subg, utils.toindex(randv1[0:10])).tousertensor() - == map_to_subgraph_nid(subig, utils.toindex(randv1[0:10])).tousertensor()) == 10 + assert F.sum(map_to_subgraph_nid(subg, utils.toindex(randv1[0:10])).tousertensor() + == map_to_subgraph_nid(subig, utils.toindex(randv1[0:10])).tousertensor(), 0) == 10 # node_subgraphs randvs = [] From 95fc7999f6ca6ec21245b07cc17c8b9dd05786af Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 13:00:40 +0800 Subject: [PATCH 58/75] fix. --- tests/compute/test_graph_index.py | 32 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/compute/test_graph_index.py b/tests/compute/test_graph_index.py index f725645b3d5e..c60a6f57b3f7 100644 --- a/tests/compute/test_graph_index.py +++ b/tests/compute/test_graph_index.py @@ -51,43 +51,43 @@ def check_basics(g, ig): edges = g.edges(True) iedges = ig.edges(True) - assert np.all(edges[0].tousertensor().asnumpy() == iedges[0].tousertensor().asnumpy()) - assert np.all(edges[1].tousertensor().asnumpy() == iedges[1].tousertensor().asnumpy()) - assert np.all(edges[2].tousertensor().asnumpy() == iedges[2].tousertensor().asnumpy()) + assert F.allclose(edges[0].tousertensor(), iedges[0].tousertensor()) + assert F.allclose(edges[1].tousertensor(), iedges[1].tousertensor()) + assert F.allclose(edges[2].tousertensor(), iedges[2].tousertensor()) for i in range(g.number_of_nodes()): assert g.has_node(i) == ig.has_node(i) for i in range(g.number_of_nodes()): - assert F.asnumpy(F.sum(g.predecessors(i).tousertensor(), 0)) == F.asnumpy(F.sum(ig.predecessors(i).tousertensor(), 0)) - assert F.asnumpy(F.sum(g.successors(i).tousertensor(), 0)) == F.asnumpy(F.sum(ig.successors(i).tousertensor(), 0)) + assert F.allclose(g.predecessors(i).tousertensor(), ig.predecessors(i).tousertensor()) + assert F.allclose(g.successors(i).tousertensor(), ig.successors(i).tousertensor()) randv = np.random.randint(0, g.number_of_nodes(), 10) randv = utils.toindex(randv) in_src1, in_dst1, in_eids1 = sort_edges(g.in_edges(randv)) in_src2, in_dst2, in_eids2 = sort_edges(ig.in_edges(randv)) nnz = in_src2.shape[0] - assert F.asnumpy(F.sum(in_src1 == in_src2, 0)) == nnz - assert F.asnumpy(F.sum(in_dst1 == in_dst2, 0)) == nnz - assert F.asnumpy(F.sum(in_eids1 == in_eids2, 0)) == nnz + assert F.allclose(in_src1, in_src2) + assert F.allclose(in_dst1, in_dst2) + assert F.allclose(in_eids1, in_eids2) out_src1, out_dst1, out_eids1 = sort_edges(g.out_edges(randv)) out_src2, out_dst2, out_eids2 = sort_edges(ig.out_edges(randv)) nnz = out_dst2.shape[0] - assert F.asnumpy(F.sum(out_dst1 == out_dst2, 0)) == nnz - assert F.asnumpy(F.sum(out_src1 == out_src2, 0)) == nnz - assert F.asnumpy(F.sum(out_eids1 == out_eids2, 0)) == nnz + assert F.allclose(out_dst1, out_dst2) + assert F.allclose(out_src1, out_src2) + assert F.allclose(out_eids1, out_eids2) num_v = len(randv) - assert F.asnumpy(F.sum(g.in_degrees(randv).tousertensor() == ig.in_degrees(randv).tousertensor(), 0)) == num_v - assert F.asnumpy(F.sum(g.out_degrees(randv).tousertensor() == ig.out_degrees(randv).tousertensor(), 0)) == num_v + assert F.allclose(g.in_degrees(randv).tousertensor(), ig.in_degrees(randv).tousertensor()) + assert F.allclose(g.out_degrees(randv).tousertensor(), ig.out_degrees(randv).tousertensor()) randv = randv.tousertensor() - for v in randv.asnumpy(): + for v in F.asnumpy(randv): assert g.in_degree(v) == ig.in_degree(v) assert g.out_degree(v) == ig.out_degree(v) - for u in randv.asnumpy(): - for v in randv.asnumpy(): + for u in F.asnumpy(randv): + for v in F.asnumpy(randv): if len(g.edge_id(u, v)) == 1: assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy() assert g.has_edge_between(u, v) == ig.has_edge_between(u, v) From 582b677b16b0d16c89a6ea050ed846f47d6f4a7e Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 13:08:48 +0800 Subject: [PATCH 59/75] fix. --- tests/compute/test_graph_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/compute/test_graph_index.py b/tests/compute/test_graph_index.py index c60a6f57b3f7..31feb5ccb5b8 100644 --- a/tests/compute/test_graph_index.py +++ b/tests/compute/test_graph_index.py @@ -27,8 +27,8 @@ def generate_rand_graph(n): return g, ig def check_graph_equal(g1, g2): - adj1 = g1.adjacency_matrix(False, F.cpu())[0] != 0 - adj2 = g2.adjacency_matrix(False, F.cpu())[0] != 0 + adj1 = g1.adjacency_matrix(False, F.cpu())[0] + adj2 = g2.adjacency_matrix(False, F.cpu())[0] assert F.allclose(adj1, adj2) def test_graph_gen(): From fd3c512aca5e357c16836112e82c35cb544ce1ca Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 16:01:18 +0800 Subject: [PATCH 60/75] impl GetAdj for coo. --- include/dgl/immutable_graph.h | 6 +--- src/graph/immutable_graph.cc | 64 ++++++++++++++++++++++++----------- 2 files changed, 46 insertions(+), 24 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index faa201c0d12d..a17c22845bf2 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -459,11 +459,7 @@ class ImmutableGraph: public GraphInterface { * \param fmt the format of the returned adjacency matrix. * \return a vector of three IdArray. */ - virtual std::vector GetAdj(bool transpose, const std::string &fmt) const { - assert(fmt == "csr"); - CSRArray arrs = transpose ? this->GetOutCSRArray() : this->GetInCSRArray(); - return std::vector{arrs.indptr, arrs.indices, arrs.id}; - } + virtual std::vector GetAdj(bool transpose, const std::string &fmt) const; protected: std::pair GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 1cae1af864a3..0dfdbb7b5ca9 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -107,27 +107,10 @@ class Bitmap { */ class HashTableChecker { std::unordered_map oldv2newv; + // This bitmap is used as a bloom filter to remove some lookups. + // Hashtable is very slow. Using bloom filter can significantly speed up lookups. Bitmap map; - public: - HashTableChecker(const dgl_id_t *vid_data, int64_t len): map(vid_data, len) { - oldv2newv.reserve(len); - for (int64_t i = 0; i < len; ++i) { - oldv2newv[vid_data[i]] = i; - } - } - - void CollectOnRow(const dgl_id_t col_idx[], const dgl_id_t eids[], size_t row_len, - std::vector *new_col_idx, - std::vector *orig_eids) { - // TODO(zhengda) I need to make sure the column index in each row is sorted. - for (size_t j = 0; j < row_len; ++j) { - const dgl_id_t oldsucc = col_idx[j]; - const dgl_id_t eid = eids[j]; - Collect(oldsucc, eid, new_col_idx, orig_eids); - } - } - void Collect(const dgl_id_t old_id, const dgl_id_t old_eid, std::vector *col_idx, std::vector *orig_eids) { @@ -142,6 +125,25 @@ class HashTableChecker { orig_eids->push_back(old_eid); } } + + public: + HashTableChecker(const dgl_id_t *vid_data, int64_t len): map(vid_data, len) { + oldv2newv.reserve(len); + for (int64_t i = 0; i < len; ++i) { + oldv2newv[vid_data[i]] = i; + } + } + + void CollectOnRow(const dgl_id_t neigh_idx[], const dgl_id_t eids[], size_t row_len, + std::vector *new_neigh_idx, + std::vector *orig_eids) { + // TODO(zhengda) I need to make sure the column index in each row is sorted. + for (size_t j = 0; j < row_len; ++j) { + const dgl_id_t oldsucc = neigh_idx[j]; + const dgl_id_t eid = eids[j]; + Collect(oldsucc, eid, new_neigh_idx, orig_eids); + } + } }; std::pair ImmutableGraph::CSR::VertexSubgraph( @@ -510,6 +512,30 @@ ImmutableGraph::CSRArray ImmutableGraph::GetOutCSRArray() const { return CSRArray{indptr, indices, eids}; } +std::vector ImmutableGraph::GetAdj(bool transpose, const std::string &fmt) const { + if (fmt == "csr") { + CSRArray arrs = transpose ? this->GetOutCSRArray() : this->GetInCSRArray(); + return std::vector{arrs.indptr, arrs.indices, arrs.id}; + } else if (fmt == "coo") { + int64_t num_edges = this->NumEdges(); + IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); + CSR::Ptr csr = transpose ? GetOutCSR() : GetInCSR(); + int64_t *idx_data = static_cast(idx->data); + dgl_id_t *eid_data = static_cast(eid->data); + for (size_t i = 0; i < csr->indptr.size() - 1; i++) { + for (int64_t j = csr->indptr[i]; j < csr->indptr[i + 1]; j++) + idx_data[j] = i; + } + std::copy(csr->indices.begin(), csr->indices.end(), idx_data + num_edges); + std::copy(csr->edge_ids.begin(), csr->edge_ids.end(), eid_data); + return std::vector{idx, eid}; + } else { + LOG(FATAL) << "unsupported adjacency matrix format"; + return std::vector(); + } +} + ////////////////////////////// Graph Sampling /////////////////////////////// /* From b15a19e58c6b2a888842834a6cee10ed7bd4df0a Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 16:05:34 +0800 Subject: [PATCH 61/75] fix. --- tests/compute/test_graph_index.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/compute/test_graph_index.py b/tests/compute/test_graph_index.py index 31feb5ccb5b8..64fe4c1c2543 100644 --- a/tests/compute/test_graph_index.py +++ b/tests/compute/test_graph_index.py @@ -29,7 +29,7 @@ def generate_rand_graph(n): def check_graph_equal(g1, g2): adj1 = g1.adjacency_matrix(False, F.cpu())[0] adj2 = g2.adjacency_matrix(False, F.cpu())[0] - assert F.allclose(adj1, adj2) + assert F.array_equal(adj1, adj2) def test_graph_gen(): g, ig = generate_from_edgelist() @@ -51,36 +51,36 @@ def check_basics(g, ig): edges = g.edges(True) iedges = ig.edges(True) - assert F.allclose(edges[0].tousertensor(), iedges[0].tousertensor()) - assert F.allclose(edges[1].tousertensor(), iedges[1].tousertensor()) - assert F.allclose(edges[2].tousertensor(), iedges[2].tousertensor()) + assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor()) + assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor()) + assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor()) for i in range(g.number_of_nodes()): assert g.has_node(i) == ig.has_node(i) for i in range(g.number_of_nodes()): - assert F.allclose(g.predecessors(i).tousertensor(), ig.predecessors(i).tousertensor()) - assert F.allclose(g.successors(i).tousertensor(), ig.successors(i).tousertensor()) + assert F.array_equal(g.predecessors(i).tousertensor(), ig.predecessors(i).tousertensor()) + assert F.array_equal(g.successors(i).tousertensor(), ig.successors(i).tousertensor()) randv = np.random.randint(0, g.number_of_nodes(), 10) randv = utils.toindex(randv) in_src1, in_dst1, in_eids1 = sort_edges(g.in_edges(randv)) in_src2, in_dst2, in_eids2 = sort_edges(ig.in_edges(randv)) nnz = in_src2.shape[0] - assert F.allclose(in_src1, in_src2) - assert F.allclose(in_dst1, in_dst2) - assert F.allclose(in_eids1, in_eids2) + assert F.array_equal(in_src1, in_src2) + assert F.array_equal(in_dst1, in_dst2) + assert F.array_equal(in_eids1, in_eids2) out_src1, out_dst1, out_eids1 = sort_edges(g.out_edges(randv)) out_src2, out_dst2, out_eids2 = sort_edges(ig.out_edges(randv)) nnz = out_dst2.shape[0] - assert F.allclose(out_dst1, out_dst2) - assert F.allclose(out_src1, out_src2) - assert F.allclose(out_eids1, out_eids2) + assert F.array_equal(out_dst1, out_dst2) + assert F.array_equal(out_src1, out_src2) + assert F.array_equal(out_eids1, out_eids2) num_v = len(randv) - assert F.allclose(g.in_degrees(randv).tousertensor(), ig.in_degrees(randv).tousertensor()) - assert F.allclose(g.out_degrees(randv).tousertensor(), ig.out_degrees(randv).tousertensor()) + assert F.array_equal(g.in_degrees(randv).tousertensor(), ig.in_degrees(randv).tousertensor()) + assert F.array_equal(g.out_degrees(randv).tousertensor(), ig.out_degrees(randv).tousertensor()) randv = randv.tousertensor() for v in F.asnumpy(randv): assert g.in_degree(v) == ig.in_degree(v) From 905c82088045c2d582f1bf94c497b258e47075b4 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 16:26:16 +0800 Subject: [PATCH 62/75] fix tests. --- python/dgl/backend/pytorch/tensor.py | 5 ++++- tests/compute/test_graph_index.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/python/dgl/backend/pytorch/tensor.py b/python/dgl/backend/pytorch/tensor.py index a23cf6a1aea9..a709e229f95e 100644 --- a/python/dgl/backend/pytorch/tensor.py +++ b/python/dgl/backend/pytorch/tensor.py @@ -72,7 +72,10 @@ def astype(input, ty): return input.type(ty) def asnumpy(input): - return input.cpu().numpy() + if isinstance(input, th.sparse.FloatTensor): + return input.to_dense().cpu().numpy() + else: + return input.cpu().numpy() def copy_to(input, ctx): if ctx.type == 'cpu': diff --git a/tests/compute/test_graph_index.py b/tests/compute/test_graph_index.py index 64fe4c1c2543..ff53f04d599a 100644 --- a/tests/compute/test_graph_index.py +++ b/tests/compute/test_graph_index.py @@ -29,7 +29,7 @@ def generate_rand_graph(n): def check_graph_equal(g1, g2): adj1 = g1.adjacency_matrix(False, F.cpu())[0] adj2 = g2.adjacency_matrix(False, F.cpu())[0] - assert F.array_equal(adj1, adj2) + assert np.all(F.asnumpy(adj1) == F.asnumpy(adj2)) def test_graph_gen(): g, ig = generate_from_edgelist() From 224122e1cd0f2570abeef7de7c20b63827d840c1 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 16:45:17 +0800 Subject: [PATCH 63/75] address comments. --- src/graph/immutable_graph.cc | 55 ++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 0dfdbb7b5ca9..369a5c3b042b 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -619,6 +619,9 @@ class ArrayHeap { std::vector heap_; }; +/* + * Uniformly sample integers from [0, set_size) without replacement. + */ static void RandomSample(size_t set_size, size_t num, std::vector* out, @@ -633,30 +636,34 @@ static void RandomSample(size_t set_size, } } -static void NegateSet(const std::vector &idxs, - size_t set_size, - std::vector* out) { - // idxs must have been sorted. - auto it = idxs.begin(); +/* + * For a sparse array whose non-zeros are represented by nz_idxs, + * negate the sparse array and outputs the non-zeros in the negated array. + */ +static void NegateArray(const std::vector &nz_idxs, + size_t arr_size, + std::vector* out) { + // nz_idxs must have been sorted. + auto it = nz_idxs.begin(); size_t i = 0; - CHECK_GT(set_size, idxs.back()); - for (; i < set_size && it != idxs.end(); i++) { + CHECK_GT(arr_size, nz_idxs.back()); + for (; i < arr_size && it != nz_idxs.end(); i++) { if (*it == i) { it++; continue; } out->push_back(i); } - for (; i < set_size; i++) { + for (; i < arr_size; i++) { out->push_back(i); } } /* - * Uniform sample + * Uniform sample vertices from a list of vertices. */ static void GetUniformSample(const dgl_id_t* val_list, - const dgl_id_t* col_list, + const dgl_id_t* ver_list, const size_t ver_len, const size_t max_num_neighbor, std::vector* out_ver, @@ -665,7 +672,7 @@ static void GetUniformSample(const dgl_id_t* val_list, // Copy ver_list to output if (ver_len <= max_num_neighbor) { for (size_t i = 0; i < ver_len; ++i) { - out_ver->push_back(col_list[i]); + out_ver->push_back(ver_list[i]); out_edge->push_back(val_list[i]); } return; @@ -682,7 +689,7 @@ static void GetUniformSample(const dgl_id_t* val_list, RandomSample(ver_len, ver_len - max_num_neighbor, &negate, seed); std::sort(negate.begin(), negate.end()); - NegateSet(negate, ver_len, &sorted_idxs); + NegateArray(negate, ver_len, &sorted_idxs); } // verify the result. CHECK_EQ(sorted_idxs.size(), max_num_neighbor); @@ -690,7 +697,7 @@ static void GetUniformSample(const dgl_id_t* val_list, CHECK_GT(sorted_idxs[i], sorted_idxs[i - 1]); } for (auto idx : sorted_idxs) { - out_ver->push_back(col_list[idx]); + out_ver->push_back(ver_list[idx]); out_edge->push_back(val_list[idx]); } } @@ -700,7 +707,7 @@ static void GetUniformSample(const dgl_id_t* val_list, */ static void GetNonUniformSample(const float* probability, const dgl_id_t* val_list, - const dgl_id_t* col_list, + const dgl_id_t* ver_list, const size_t ver_len, const size_t max_num_neighbor, std::vector* out_ver, @@ -709,7 +716,7 @@ static void GetNonUniformSample(const float* probability, // Copy ver_list to output if (ver_len <= max_num_neighbor) { for (size_t i = 0; i < ver_len; ++i) { - out_ver->push_back(col_list[i]); + out_ver->push_back(ver_list[i]); out_edge->push_back(val_list[i]); } return; @@ -718,7 +725,7 @@ static void GetNonUniformSample(const float* probability, std::vector sp_index(max_num_neighbor); std::vector sp_prob(ver_len); for (size_t i = 0; i < ver_len; ++i) { - sp_prob[i] = probability[col_list[i]]; + sp_prob[i] = probability[ver_list[i]]; } ArrayHeap arrayHeap(sp_prob); arrayHeap.SampleWithoutReplacement(max_num_neighbor, &sp_index, seed); @@ -726,7 +733,7 @@ static void GetNonUniformSample(const float* probability, out_edge->resize(max_num_neighbor); for (size_t i = 0; i < max_num_neighbor; ++i) { size_t idx = sp_index[i]; - out_ver->at(i) = col_list[idx]; + out_ver->at(i) = ver_list[idx]; out_edge->at(i) = val_list[idx]; } sort(out_ver->begin(), out_ver->end()); @@ -759,12 +766,12 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, // BFS traverse the graph and sample vertices // - std::unordered_set sub_ver_mp; + std::unordered_set sub_ver_map; std::vector > sub_vers; sub_vers.reserve(num_seeds * 10); // add seed vertices for (size_t i = 0; i < num_seeds; ++i) { - auto ret = sub_ver_mp.insert(seed[i]); + auto ret = sub_ver_map.insert(seed[i]); // If the vertex is inserted successfully. if (ret.second) { sub_vers.emplace_back(seed[i], 0); @@ -831,7 +838,7 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, // We need to add the neighbor in the hashtable here. This ensures that // the vertex in the queue is unique. If we see a vertex before, we don't // need to add it to the queue again. - auto ret = sub_ver_mp.insert(tmp_sampled_src_list[i]); + auto ret = sub_ver_map.insert(tmp_sampled_src_list[i]); // If the sampled neighbor is inserted to the map successfully. if (ret.second) sub_vers.emplace_back(tmp_sampled_src_list[i], cur_node_level + 1); @@ -847,9 +854,9 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, } } - // Copy sub_ver_mp to output[0] + // Copy sub_ver_map to output[0] // Copy layer - int64_t num_vertices = sub_ver_mp.size(); + int64_t num_vertices = sub_ver_map.size(); std::sort(sub_vers.begin(), sub_vers.end(), [](const std::pair &a1, const std::pair &a2) { return a1.first < a2.first; @@ -875,7 +882,7 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, // Copy sub_probability float *sub_prob = static_cast(subg.sample_prob->data); if (probability != nullptr) { - for (size_t i = 0; i < sub_ver_mp.size(); ++i) { + for (size_t i = 0; i < sub_ver_map.size(); ++i) { dgl_id_t idx = out[i]; sub_prob[i] = probability[idx]; } @@ -899,7 +906,7 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr, size_t idx_with_neigh = 0; for (size_t i = 0; i < num_vertices; i++) { dgl_id_t dst_id = *(out + i); - // If a vertex is in sub_ver_mp but not in neigh_pos, this vertex must not + // If a vertex is in sub_ver_map but not in neigh_pos, this vertex must not // have edges. size_t edge_size = 0; if (idx_with_neigh < neigh_pos.size() && dst_id == neigh_pos[idx_with_neigh].first) { From d6d5f1ebf142a87627d079ecca664beb24f7fc50 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Tue, 15 Jan 2019 16:50:58 +0800 Subject: [PATCH 64/75] address comments. --- include/dgl/immutable_graph.h | 6 +++--- src/graph/graph.cc | 4 ++-- src/graph/graph_apis.cc | 2 +- src/graph/immutable_graph.cc | 26 +++++++++++++------------- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index a17c22845bf2..14ce6be534da 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -193,7 +193,7 @@ class ImmutableGraph: public GraphInterface { auto pred = this->in_csr_->GetIndexRef(dst); return binary_search(pred.first, pred.second, src); } else { - assert(this->out_csr_); + CHECK(this->out_csr_) << "one of the CSRs must exist"; auto succ = this->out_csr_->GetIndexRef(src); return binary_search(succ.first, succ.second, dst); } @@ -473,7 +473,7 @@ class ImmutableGraph: public GraphInterface { if (in_csr_) { return in_csr_; } else { - assert(out_csr_ != nullptr); + CHECK(out_csr_ != nullptr) << "one of the CSRs must exist"; const_cast(this)->in_csr_ = out_csr_->Transpose(); return in_csr_; } @@ -482,7 +482,7 @@ class ImmutableGraph: public GraphInterface { if (out_csr_) { return out_csr_; } else { - assert(in_csr_ != nullptr); + CHECK(in_csr_ != nullptr) << "one of the CSRs must exist"; const_cast(this)->out_csr_ = in_csr_->Transpose(); return out_csr_; } diff --git a/src/graph/graph.cc b/src/graph/graph.cc index e8fe788759ca..42d4e3ecb784 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -17,8 +17,8 @@ Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_node bool multigraph): is_multigraph_(multigraph) { this->AddVertices(num_nodes); num_edges_ = src_ids->shape[0]; - assert(num_edges_ == dst_ids->shape[0]); - assert(num_edges_ == edge_ids->shape[0]); + CHECK(num_edges_ == dst_ids->shape[0]) << "vectors in COO must have the same length"; + CHECK(num_edges_ == edge_ids->shape[0]) << "vectors in COO must have the same length"; const dgl_id_t *src_data = static_cast(src_ids->data); const dgl_id_t *dst_data = static_cast(dst_ids->data); const dgl_id_t *edge_data = static_cast(edge_ids->data); diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index 03781d7fde1c..bd6d9aa9436c 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -439,7 +439,7 @@ void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) { int num_valid_seeds = args[num_seeds + 4]; const GraphInterface *ptr = static_cast(ghandle); const ImmutableGraph *gptr = dynamic_cast(ptr); - assert(num_valid_seeds <= num_seeds); + CHECK(num_valid_seeds <= num_seeds); std::vector subgs(seeds.size()); #pragma omp parallel for for (int i = 0; i < num_valid_seeds; i++) { diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 369a5c3b042b..03587a13a1fc 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -186,7 +186,7 @@ std::pair ImmutableGraph::CSR::VertexSubgraph ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector *edges, int sort_on, int64_t num_nodes) { - assert(sort_on == 0 || sort_on == 1); + CHECK(sort_on == 0 || sort_on == 1) << "we must sort on the first or the second vector"; int other_end = sort_on == 1 ? 0 : 1; // TODO(zhengda) we should sort in parallel. struct compare { @@ -209,17 +209,17 @@ ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector *edges t->edge_ids.resize(edges->size()); for (size_t i = 0; i < edges->size(); i++) { t->indices[i] = edges->at(i).end_points[other_end]; - assert(t->indices[i] < num_nodes); + CHECK(t->indices[i] < num_nodes); t->edge_ids[i] = edges->at(i).edge_id; dgl_id_t vid = edges->at(i).end_points[sort_on]; - assert(vid < num_nodes); + CHECK(vid < num_nodes); while (vid > 0 && t->indptr.size() <= static_cast(vid)) t->indptr.push_back(i); - assert(t->indptr.size() == vid + 1); + CHECK(t->indptr.size() == vid + 1); } while (t->indptr.size() < num_nodes + 1) t->indptr.push_back(edges->size()); - assert(t->indptr.size() == num_nodes + 1); + CHECK(t->indptr.size() == num_nodes + 1); return t; } @@ -242,8 +242,8 @@ ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::Transpose() const { ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, bool multigraph) : is_multigraph_(multigraph) { int64_t len = src_ids->shape[0]; - assert(len == dst_ids->shape[0]); - assert(len == edge_ids->shape[0]); + CHECK(len == dst_ids->shape[0]); + CHECK(len == edge_ids->shape[0]); const dgl_id_t *src_data = static_cast(src_ids->data); const dgl_id_t *dst_data = static_cast(dst_ids->data); const dgl_id_t *edge_data = static_cast(edge_ids->data); @@ -330,7 +330,7 @@ IdArray ImmutableGraph::Successors(dgl_id_t vid, uint64_t radius) const { std::pair ImmutableGraph::GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const { - assert(this->in_csr_); + CHECK(this->in_csr_); auto pred = this->in_csr_->GetIndexRef(dst); auto it = std::lower_bound(pred.first, pred.second, src); // If there doesn't exist edges between the two nodes. @@ -338,7 +338,7 @@ std::pair ImmutableGraph::GetInEdgeIdRef(dgl return std::pair(nullptr, nullptr); size_t off = it - in_csr_->indices.data(); - assert(off < in_csr_->indices.size()); + CHECK(off < in_csr_->indices.size()); const dgl_id_t *start = &in_csr_->edge_ids[off]; int64_t len = 0; // There are edges between the source and the destination. @@ -348,7 +348,7 @@ std::pair ImmutableGraph::GetInEdgeIdRef(dgl std::pair ImmutableGraph::GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const { - assert(this->out_csr_); + CHECK(this->out_csr_); auto succ = this->out_csr_->GetIndexRef(src); auto it = std::lower_bound(succ.first, succ.second, dst); // If there doesn't exist edges between the two nodes. @@ -356,7 +356,7 @@ std::pair ImmutableGraph::GetOutEdgeIdRef(dg return std::pair(nullptr, nullptr); size_t off = it - out_csr_->indices.data(); - assert(off < out_csr_->indices.size()); + CHECK(off < out_csr_->indices.size()); const dgl_id_t *start = &out_csr_->edge_ids[off]; int64_t len = 0; // There are edges between the source and the destination. @@ -463,7 +463,7 @@ Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { ret = out_csr_->VertexSubgraph(vids); subg.graph = GraphPtr(new ImmutableGraph(nullptr, ret.first, IsMultigraph())); } else { - assert(in_csr_); + CHECK(in_csr_); ret = in_csr_->VertexSubgraph(vids); // When we generate a subgraph, it may be used by only accessing in-edges or out-edges. // We don't need to generate both. @@ -942,7 +942,7 @@ void CompactSubgraph(ImmutableGraph::CSR *subg, const std::unordered_map &id_map) { for (size_t i = 0; i < subg->indices.size(); i++) { auto it = id_map.find(subg->indices[i]); - assert(it != id_map.end()); + CHECK(it != id_map.end()); subg->indices[i] = it->second; } } From a18b61d9206df4eff3af7f79f77b2c755ff44610 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 00:06:28 +0800 Subject: [PATCH 65/75] fix comment. --- python/dgl/contrib/sampling/sampler.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/dgl/contrib/sampling/sampler.py b/python/dgl/contrib/sampling/sampler.py index fdceb0d9e475..65ba70fd4372 100644 --- a/python/dgl/contrib/sampling/sampler.py +++ b/python/dgl/contrib/sampling/sampler.py @@ -199,9 +199,6 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1, return_seed_id=False, prefetch=False): '''Create a sampler that samples neighborhood. - .. note:: This method currently only supports MXNet backend. Set - "DGLBACKEND" environment variable to "mxnet". - This creates a subgraph data loader that samples subgraphs from the input graph with neighbor sampling. This sampling method is implemented in C and can perform sampling very efficiently. From 67587daed755200baa2627bb3b65f3727ffc874e Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 12:32:41 +0800 Subject: [PATCH 66/75] address comments. --- include/dgl/immutable_graph.h | 43 ++++++++++++--------- src/graph/graph.cc | 10 ++++- src/graph/graph_apis.cc | 23 +++++++++--- src/graph/immutable_graph.cc | 71 +++++++++++++++++++++++++++-------- 4 files changed, 106 insertions(+), 41 deletions(-) diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 14ce6be534da..c89afe6fdf9c 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -16,12 +16,6 @@ namespace dgl { -template -bool binary_search(ForwardIt first, ForwardIt last, const T& value) { - first = std::lower_bound(first, last, value); - return (!(first == last) && !(value < *first)); -} - /*! * \brief DGL immutable graph index class. * @@ -68,6 +62,7 @@ class ImmutableGraph: public GraphInterface { DegreeArray GetDegrees(IdArray vids) const; EdgeArray GetEdges(dgl_id_t vid) const; EdgeArray GetEdges(IdArray vids) const; + /* \brief this returns the start and end position of the column indices corresponding v. */ std::pair GetIndexRef(dgl_id_t v) const { const int64_t start = indptr[v]; const int64_t end = indptr[v + 1]; @@ -75,16 +70,39 @@ class ImmutableGraph: public GraphInterface { } CSR::Ptr Transpose() const; std::pair VertexSubgraph(IdArray vids) const; + /* + * Construct a CSR from a list of edges. + * + * When constructing a CSR, we need to sort the edge list. To reduce the overhead, + * we simply sort on the input edge list. We allow sorting on both end points of an edge, + * which is specified by `sort_on`. + */ static CSR::Ptr FromEdges(std::vector *edges, int sort_on, int64_t num_nodes); }; + /*! \brief Construct an immutable graph from the COO format. */ ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, bool multigraph = false); + /*! + * \brief Construct an immutable graph from the CSR format. + * + * For a single graph, we need two CSRs, one stores the in-edges of vertices and + * the other stores the out-edges of vertices. These two CSRs stores the same edges. + * The reason we need both is that some operators are faster on in-edge CSR and + * the other operators are faster on out-edge CSR. + * + * However, not both CSRs are required. Technically, one CSR contains all information. + * Thus, when we construct a temporary graphs (e.g., the sampled subgraphs), we only + * construct one of the CSRs that runs fast for some operations we expect and construct + * the other CSR on demand. + */ ImmutableGraph(CSR::Ptr in_csr, CSR::Ptr out_csr, bool multigraph = false) : is_multigraph_(multigraph) { this->in_csr_ = in_csr; this->out_csr_ = out_csr; + CHECK(this->in_csr_ != nullptr || this->out_csr_ != nullptr) + << "there must exist one of the CSRs"; } /*! \brief default constructor */ @@ -187,17 +205,7 @@ class ImmutableGraph: public GraphInterface { BoolArray HasVertices(IdArray vids) const; /*! \return true if the given edge is in the graph.*/ - bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const { - if (!HasVertex(src) || !HasVertex(dst)) return false; - if (this->in_csr_) { - auto pred = this->in_csr_->GetIndexRef(dst); - return binary_search(pred.first, pred.second, src); - } else { - CHECK(this->out_csr_) << "one of the CSRs must exist"; - auto succ = this->out_csr_->GetIndexRef(src); - return binary_search(succ.first, succ.second, dst); - } - } + bool HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const; /*! \return a 0-1 array indicating whether the given edges are in the graph.*/ BoolArray HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const; @@ -466,6 +474,7 @@ class ImmutableGraph: public GraphInterface { std::pair GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; /* + * The immutable graph may only contain one of the CSRs (e.g., the sampled subgraphs). * When we get in csr or out csr, we try to get the one cached in the structure. * If not, we transpose the other one to get the one we need. */ diff --git a/src/graph/graph.cc b/src/graph/graph.cc index 42d4e3ecb784..f991573136e9 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -15,6 +15,9 @@ namespace dgl { Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, bool multigraph): is_multigraph_(multigraph) { + CHECK(IsValidIdArray(src_ids)); + CHECK(IsValidIdArray(dst_ids)); + CHECK(IsValidIdArray(edge_ids)); this->AddVertices(num_nodes); num_edges_ = src_ids->shape[0]; CHECK(num_edges_ == dst_ids->shape[0]) << "vectors in COO must have the same length"; @@ -22,6 +25,8 @@ Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_node const dgl_id_t *src_data = static_cast(src_ids->data); const dgl_id_t *dst_data = static_cast(dst_ids->data); const dgl_id_t *edge_data = static_cast(edge_ids->data); + all_edges_src_.reserve(num_edges_); + all_edges_dst_.reserve(num_edges_); for (int64_t i = 0; i < num_edges_; i++) { auto src = src_data[i]; auto dst = dst_data[i]; @@ -499,7 +504,7 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const { } std::vector Graph::GetAdj(bool transpose, const std::string &fmt) const { - int64_t num_edges = num_edges_; + int64_t num_edges = NumEdges(); int64_t num_nodes = NumVertices(); if (fmt == "coo") { IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); @@ -513,8 +518,9 @@ std::vector Graph::GetAdj(bool transpose, const std::string &fmt) const } IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); int64_t *eid_data = static_cast(eid->data); - for (uint64_t eid = 0; eid < num_edges; ++eid) + for (uint64_t eid = 0; eid < num_edges; ++eid) { eid_data[eid] = eid; + } return std::vector{idx, eid}; } else if (fmt == "csr") { IdArray indptr = IdArray::Empty({num_nodes + 1}, DLDataType{kDLInt, 64, 1}, diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index bd6d9aa9436c..b10731d1202a 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -87,6 +87,7 @@ PackedFunc ConvertSubgraphToPackedFunc(const std::vector& sg) { LOG(FATAL) << "invalid choice"; } }; + // TODO(minjie): figure out a better way of returning a complex results. return PackedFunc(body); } @@ -106,9 +107,9 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphCreate") const IdArray src_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[0])); const IdArray dst_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); const IdArray edge_ids = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[2])); - bool multigraph = static_cast(args[3]); - int64_t num_nodes = static_cast(args[4]); - bool readonly = static_cast(args[5]); + const bool multigraph = static_cast(args[3]); + const int64_t num_nodes = static_cast(args[4]); + const bool readonly = static_cast(args[5]); GraphHandle ghandle; if (readonly) ghandle = new ImmutableGraph(src_ids, dst_ids, edge_ids, num_nodes, multigraph); @@ -369,6 +370,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion") for (int i = 0; i < list_size; ++i) { const GraphInterface *ptr = static_cast(inhandles[i]); const Graph* gr = dynamic_cast(ptr); + if (gr == nullptr) + throw NotImplemented("_CAPI_DGLDisjointUnion for immutable graph"); graphs.push_back(gr); } Graph* gptr = new Graph(); @@ -382,6 +385,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum") GraphHandle ghandle = args[0]; const GraphInterface *ptr = static_cast(ghandle); const Graph* gptr = dynamic_cast(ptr); + if (gptr == nullptr) + throw NotImplemented("_CAPI_DGLDisjointPartitionByNum for immutable graph"); int64_t num = args[1]; std::vector&& rst = GraphOp::DisjointPartitionByNum(gptr, num); // return the pointer array as an integer array @@ -401,6 +406,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes") GraphHandle ghandle = args[0]; const GraphInterface *ptr = static_cast(ghandle); const Graph* gptr = dynamic_cast(ptr); + if (gptr == nullptr) + throw NotImplemented("_CAPI_DGLDisjointPartitionBySizes for immutable graph"); const IdArray sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); std::vector&& rst = GraphOp::DisjointPartitionBySizes(gptr, sizes); // return the pointer array as an integer array @@ -421,6 +428,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") bool backtracking = args[1]; const GraphInterface *ptr = static_cast(ghandle); const Graph* gptr = dynamic_cast(ptr); + if (gptr == nullptr) + throw NotImplemented("_CAPI_DGLGraphLineGraph for immutable graph"); Graph* lgptr = new Graph(); *lgptr = GraphOp::LineGraph(gptr, backtracking); GraphHandle lghandle = lgptr; @@ -434,11 +443,13 @@ void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) { for (size_t i = 0; i < seeds.size(); i++) seeds[i] = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[i + 1])); std::string neigh_type = args[num_seeds + 1]; - int num_hops = args[num_seeds + 2]; - int num_neighbors = args[num_seeds + 3]; - int num_valid_seeds = args[num_seeds + 4]; + const int num_hops = args[num_seeds + 2]; + const int num_neighbors = args[num_seeds + 3]; + const int num_valid_seeds = args[num_seeds + 4]; const GraphInterface *ptr = static_cast(ghandle); const ImmutableGraph *gptr = dynamic_cast(ptr); + if (gptr == nullptr) + throw NotImplemented("sampling isn't supported in mutable graph"); CHECK(num_valid_seeds <= num_seeds); std::vector subgs(seeds.size()); #pragma omp parallel for diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 03587a13a1fc..d08d4cdf6456 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -19,9 +19,15 @@ int rand_r(unsigned *seed) { namespace dgl { +template +bool binary_search(ForwardIt first, ForwardIt last, const T& value) { + first = std::lower_bound(first, last, value); + return (!(first == last) && !(value < *first)); +} + ImmutableGraph::EdgeArray ImmutableGraph::CSR::GetEdges(dgl_id_t vid) const { CHECK(HasVertex(vid)) << "invalid vertex: " << vid; - int64_t off = this->indptr[vid]; + const int64_t off = this->indptr[vid]; const int64_t len = this->GetDegree(vid); IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); @@ -111,6 +117,12 @@ class HashTableChecker { // Hashtable is very slow. Using bloom filter can significantly speed up lookups. Bitmap map; + /* + * This is to test if a vertex is in the induced subgraph. + * If it is, the edge on this vertex and the source vertex will be collected. + * `old_id` is the vertex we test, `old_eid` is the edge Id between the `old_id` + * and the source vertex. `col_idx` and `orig_eids` store the collected edges. + */ void Collect(const dgl_id_t old_id, const dgl_id_t old_eid, std::vector *col_idx, std::vector *orig_eids) { @@ -134,6 +146,11 @@ class HashTableChecker { } } + /* + * This is to collect edges from the neighborhood of a vertex. + * `neigh_idx`, `eids` and `row_len` indicates the neighbor list of the vertex. + * The collected edges are stored in `new_neigh_idx` and `orig_eids`. + */ void CollectOnRow(const dgl_id_t neigh_idx[], const dgl_id_t eids[], size_t row_len, std::vector *new_neigh_idx, std::vector *orig_eids) { @@ -148,6 +165,7 @@ class HashTableChecker { std::pair ImmutableGraph::CSR::VertexSubgraph( IdArray vids) const { + CHECK(IsValidIdArray(vids)) << "Invalid vertex id array."; const dgl_id_t* vid_data = static_cast(vids->data); const int64_t len = vids->shape[0]; @@ -197,10 +215,11 @@ ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector *edges this->other_end = other_end; } bool operator()(const Edge &e1, const Edge &e2) { - if (e1.end_points[sort_on] == e2.end_points[sort_on]) + if (e1.end_points[sort_on] == e2.end_points[sort_on]) { return e1.end_points[other_end] < e2.end_points[other_end]; - else + } else { return e1.end_points[sort_on] < e2.end_points[sort_on]; + } } }; std::sort(edges->begin(), edges->end(), compare(sort_on, other_end)); @@ -213,12 +232,14 @@ ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector *edges t->edge_ids[i] = edges->at(i).edge_id; dgl_id_t vid = edges->at(i).end_points[sort_on]; CHECK(vid < num_nodes); - while (vid > 0 && t->indptr.size() <= static_cast(vid)) + while (vid > 0 && t->indptr.size() <= static_cast(vid)) { t->indptr.push_back(i); + } CHECK(t->indptr.size() == vid + 1); } - while (t->indptr.size() < num_nodes + 1) + while (t->indptr.size() < num_nodes + 1) { t->indptr.push_back(edges->size()); + } CHECK(t->indptr.size() == num_nodes + 1); return t; } @@ -241,7 +262,10 @@ ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::Transpose() const { ImmutableGraph::ImmutableGraph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_nodes, bool multigraph) : is_multigraph_(multigraph) { - int64_t len = src_ids->shape[0]; + CHECK(IsValidIdArray(src_ids)) << "Invalid vertex id array."; + CHECK(IsValidIdArray(dst_ids)) << "Invalid vertex id array."; + CHECK(IsValidIdArray(edge_ids)) << "Invalid vertex id array."; + const int64_t len = src_ids->shape[0]; CHECK(len == dst_ids->shape[0]); CHECK(len == edge_ids->shape[0]); const dgl_id_t *src_data = static_cast(src_ids->data); @@ -272,6 +296,18 @@ BoolArray ImmutableGraph::HasVertices(IdArray vids) const { return rst; } +bool ImmutableGraph::HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const { + if (!HasVertex(src) || !HasVertex(dst)) return false; + if (this->in_csr_) { + auto pred = this->in_csr_->GetIndexRef(dst); + return binary_search(pred.first, pred.second, src); + } else { + CHECK(this->out_csr_) << "one of the CSRs must exist"; + auto succ = this->out_csr_->GetIndexRef(src); + return binary_search(succ.first, succ.second, dst); + } +} + BoolArray ImmutableGraph::HasEdgesBetween(IdArray src_ids, IdArray dst_ids) const { CHECK(IsValidIdArray(src_ids)) << "Invalid src id array."; CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array."; @@ -334,8 +370,9 @@ std::pair ImmutableGraph::GetInEdgeIdRef(dgl auto pred = this->in_csr_->GetIndexRef(dst); auto it = std::lower_bound(pred.first, pred.second, src); // If there doesn't exist edges between the two nodes. - if (it == pred.second || *it != src) + if (it == pred.second || *it != src) { return std::pair(nullptr, nullptr); + } size_t off = it - in_csr_->indices.data(); CHECK(off < in_csr_->indices.size()); @@ -352,8 +389,9 @@ std::pair ImmutableGraph::GetOutEdgeIdRef(dg auto succ = this->out_csr_->GetIndexRef(src); auto it = std::lower_bound(succ.first, succ.second, dst); // If there doesn't exist edges between the two nodes. - if (it == succ.second || *it != dst) + if (it == succ.second || *it != dst) { return std::pair(nullptr, nullptr); + } size_t off = it - out_csr_->indices.data(); CHECK(off < out_csr_->indices.size()); @@ -368,15 +406,17 @@ IdArray ImmutableGraph::EdgeId(dgl_id_t src, dgl_id_t dst) const { CHECK(HasVertex(src) && HasVertex(dst)) << "invalid edge: " << src << " -> " << dst; std::pair edge_ids; - if (in_csr_) + if (in_csr_) { edge_ids = GetInEdgeIdRef(src, dst); - else + } else { edge_ids = GetOutEdgeIdRef(src, dst); + } int64_t len = edge_ids.second - edge_ids.first; IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); dgl_id_t* rst_data = static_cast(rst->data); - if (len > 0) + if (len > 0) { std::copy(edge_ids.first, edge_ids.second, rst_data); + } return rst; } @@ -386,7 +426,6 @@ ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_i CHECK(IsValidIdArray(dst_ids)) << "Invalid dst id array."; const auto srclen = src_ids->shape[0]; const auto dstlen = dst_ids->shape[0]; - int64_t i, j; CHECK((srclen == dstlen) || (srclen == 1) || (dstlen == 1)) << "Invalid src and dst id array."; @@ -398,7 +437,7 @@ ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_i std::vector src, dst, eid; - for (i = 0, j = 0; i < srclen && j < dstlen; i += src_stride, j += dst_stride) { + for (int64_t i = 0, j = 0; i < srclen && j < dstlen; i += src_stride, j += dst_stride) { const dgl_id_t src_id = src_data[i], dst_id = dst_data[j]; CHECK(HasVertex(src_id) && HasVertex(dst_id)) << "invalid edge: " << src_id << " -> " << dst_id; @@ -410,14 +449,14 @@ ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_i edges = this->GetOutEdgeIdRef(src_id, dst_id); size_t len = edges.second - edges.first; - for (size_t i = 0; i < len; i++) { + for (size_t k = 0; k < len; k++) { src.push_back(src_id); dst.push_back(dst_id); - eid.push_back(edges.first[i]); + eid.push_back(edges.first[k]); } } - int64_t rstlen = src.size(); + const int64_t rstlen = src.size(); IdArray rst_src = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); IdArray rst_dst = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); IdArray rst_eid = IdArray::Empty({rstlen}, src_ids->dtype, src_ids->ctx); From ecf49803f568ca2e1e417814755625d4a811072d Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 13:32:00 +0800 Subject: [PATCH 67/75] use lambda. --- src/graph/immutable_graph.cc | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index d08d4cdf6456..a6b74b83345c 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -207,22 +207,13 @@ ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector *edges CHECK(sort_on == 0 || sort_on == 1) << "we must sort on the first or the second vector"; int other_end = sort_on == 1 ? 0 : 1; // TODO(zhengda) we should sort in parallel. - struct compare { - int sort_on; - int other_end; - compare(int sort_on, int other_end) { - this->sort_on = sort_on; - this->other_end = other_end; + std::sort(edges->begin(), edges->end(), [sort_on, other_end](const Edge &e1, const Edge &e2) { + if (e1.end_points[sort_on] == e2.end_points[sort_on]) { + return e1.end_points[other_end] < e2.end_points[other_end]; + } else { + return e1.end_points[sort_on] < e2.end_points[sort_on]; } - bool operator()(const Edge &e1, const Edge &e2) { - if (e1.end_points[sort_on] == e2.end_points[sort_on]) { - return e1.end_points[other_end] < e2.end_points[other_end]; - } else { - return e1.end_points[sort_on] < e2.end_points[sort_on]; - } - } - }; - std::sort(edges->begin(), edges->end(), compare(sort_on, other_end)); + }); auto t = std::make_shared(0, 0); t->indices.resize(edges->size()); t->edge_ids.resize(edges->size()); From 2b0eb6d9f5bc107a943586de544bd8de7f75e354 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 13:34:48 +0800 Subject: [PATCH 68/75] other comments. --- src/graph/immutable_graph.cc | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index a6b74b83345c..10eba82f9f67 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -433,14 +433,8 @@ ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_i CHECK(HasVertex(src_id) && HasVertex(dst_id)) << "invalid edge: " << src_id << " -> " << dst_id; - std::pair edges; - if (this->in_csr_) - edges = this->GetInEdgeIdRef(src_id, dst_id); - else - edges = this->GetOutEdgeIdRef(src_id, dst_id); - - size_t len = edges.second - edges.first; - for (size_t k = 0; k < len; k++) { + auto edges = this->in_csr_ ? GetInEdgeIdRef(src_id, dst_id) : GetOutEdgeIdRef(src_id, dst_id); + for (size_t k = 0; k < edges.size(); k++) { src.push_back(src_id); dst.push_back(dst_id); eid.push_back(edges.first[k]); From 43392d293090778efadf4fada1b2a63c64c4e56f Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 13:44:48 +0800 Subject: [PATCH 69/75] address comments. --- src/graph/immutable_graph.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 10eba82f9f67..ec05b724493d 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -175,8 +175,8 @@ std::pair ImmutableGraph::CSR::VertexSubgraph // Collect the non-zero entries in from the original graph. std::vector orig_edge_ids; - orig_edge_ids.reserve(len * 50); - auto sub_csr = std::make_shared(len, len * 50); + orig_edge_ids.reserve(len); + auto sub_csr = std::make_shared(len, len); sub_csr->indptr[0] = 0; for (int64_t i = 0; i < len; ++i) { const dgl_id_t oldvid = vid_data[i]; From edd1b82a981ff4b28558d294d6f4602ccf785c0a Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 16:58:07 +0800 Subject: [PATCH 70/75] modify the semantics of edges. --- include/dgl/graph.h | 2 +- include/dgl/graph_interface.h | 9 +++--- include/dgl/immutable_graph.h | 6 +++- python/dgl/graph.py | 12 +++++--- python/dgl/graph_index.py | 16 +++++++---- src/graph/graph.cc | 4 +-- src/graph/graph_apis.cc | 4 +-- src/graph/immutable_graph.cc | 47 ++++++++++++++++++++++--------- src/scheduler/scheduler_apis.cc | 2 +- tests/compute/test_graph_index.py | 10 +++++-- 10 files changed, 75 insertions(+), 37 deletions(-) diff --git a/include/dgl/graph.h b/include/dgl/graph.h index 21c2dcbe2106..a1908773c399 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -229,7 +229,7 @@ class Graph: public GraphInterface { * \param sorted Whether the returned edge list is sorted by their src and dst ids * \return the id arrays of the two endpoints of the edges. */ - EdgeArray Edges(bool sorted = false) const; + EdgeArray Edges(const std::string &order = "") const; /*! * \brief Get the in degree of the given vertex. diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index a7837b9d930e..805d1aed043e 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -219,12 +219,13 @@ class GraphInterface { /*! * \brief Get all the edges in the graph. - * \note If sorted is true, the returned edges list is sorted by their src and - * dst ids. Otherwise, they are in their edge id order. - * \param sorted Whether the returned edge list is sorted by their src and dst ids + * \note If order is "srcdst", the returned edges list is sorted by their src and + * dst ids. If order is "eid", they are in their edge id order. + * Otherwise, in the arbitrary order. + * \param order The order of the returned edge list. * \return the id arrays of the two endpoints of the edges. */ - virtual EdgeArray Edges(bool sorted = false) const = 0; + virtual EdgeArray Edges(const std::string &order = "") const = 0; /*! * \brief Get the in degree of the given vertex. diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index c89afe6fdf9c..20c3997c1e73 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -68,6 +68,10 @@ class ImmutableGraph: public GraphInterface { const int64_t end = indptr[v + 1]; return std::pair(&indices[start], &indices[end]); } + /* + * Read all edges and store them in the vector. + */ + void ReadAllEdges(std::vector *edges) const; CSR::Ptr Transpose() const; std::pair VertexSubgraph(IdArray vids) const; /* @@ -313,7 +317,7 @@ class ImmutableGraph: public GraphInterface { * \param sorted Whether the returned edge list is sorted by their src and dst ids * \return the id arrays of the two endpoints of the edges. */ - EdgeArray Edges(bool sorted = false) const; + EdgeArray Edges(const std::string &order = "") const; /*! * \brief Get the in degree of the given vertex. diff --git a/python/dgl/graph.py b/python/dgl/graph.py index 287bb17f4b4a..b26d36d73cb0 100644 --- a/python/dgl/graph.py +++ b/python/dgl/graph.py @@ -915,7 +915,7 @@ def out_edges(self, v, form='uv'): else: raise DGLError('Invalid form:', form) - def all_edges(self, form='uv', return_sorted=False): + def all_edges(self, form='uv', order=None): """Return all the edges. Parameters @@ -926,8 +926,12 @@ def all_edges(self, form='uv', return_sorted=False): - 'all' : a tuple (u, v, eid) - 'uv' : a pair (u, v), default - 'eid' : one eid tensor - return_sorted : bool - True if the returned edges are sorted by their src and dst ids. + order : string + The order of the returned edges. Currently support: + + - 'srcdst' : sorted by their src and dst ids. + - 'eid' : sorted by edge Ids. + - None : the arbitrary order. Returns ------- @@ -953,7 +957,7 @@ def all_edges(self, form='uv', return_sorted=False): >>> G.all_edges('all') (tensor([0, 0, 1]), tensor([1, 2, 2]), tensor([0, 1, 2])) """ - src, dst, eid = self._graph.edges(return_sorted) + src, dst, eid = self._graph.edges(order) if form == 'all': return (src.tousertensor(), dst.tousertensor(), eid.tousertensor()) elif form == 'uv': diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 0c9aa4eb2fe5..cb427587881e 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -386,13 +386,17 @@ def out_edges(self, v): return src, dst, eid @utils.cached_member(cache='_cache', prefix='edges') - def edges(self, return_sorted=False): + def edges(self, order=None): """Return all the edges Parameters ---------- - return_sorted : bool - True if the returned edges are sorted by their src and dst ids. + order : string + The order of the returned edges. Currently support: + + - 'srcdst' : sorted by their src and dst ids. + - 'eid' : sorted by edge Ids. + - None : the arbitrary order. Returns ------- @@ -403,9 +407,9 @@ def edges(self, return_sorted=False): utils.Index The edge ids. """ - key = 'edges_s%d' % return_sorted + key = 'edges_s%s' % order if key not in self._cache: - edge_array = _CAPI_DGLGraphEdges(self._handle, return_sorted) + edge_array = _CAPI_DGLGraphEdges(self._handle, order) src = utils.toindex(edge_array(0)) dst = utils.toindex(edge_array(1)) eid = utils.toindex(edge_array(2)) @@ -614,7 +618,7 @@ def incidence_matrix(self, typestr, ctx): A index for data shuffling due to sparse format change. Return None if shuffle is not required. """ - src, dst, eid = self.edges(False) + src, dst, eid = self.edges() src = src.tousertensor(ctx) # the index of the ctx will be cached dst = dst.tousertensor(ctx) # the index of the ctx will be cached eid = eid.tousertensor(ctx) # the index of the ctx will be cached diff --git a/src/graph/graph.cc b/src/graph/graph.cc index f991573136e9..b64b7aa59625 100644 --- a/src/graph/graph.cc +++ b/src/graph/graph.cc @@ -365,13 +365,13 @@ Graph::EdgeArray Graph::OutEdges(IdArray vids) const { } // O(E*log(E)) if sort is required; otherwise, O(E) -Graph::EdgeArray Graph::Edges(bool sorted) const { +Graph::EdgeArray Graph::Edges(const std::string &order) const { const int64_t len = num_edges_; IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray eid = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); - if (sorted) { + if (order == "srcdst") { typedef std::tuple Tuple; std::vector tuples; tuples.reserve(len); diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index b10731d1202a..a3628cdec442 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -309,8 +309,8 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphEdges") .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; const GraphInterface* gptr = static_cast(ghandle); - const bool sorted = args[1]; - *rv = ConvertEdgeArrayToPackedFunc(gptr->Edges(sorted)); + std::string order = args[1]; + *rv = ConvertEdgeArrayToPackedFunc(gptr->Edges(order)); }); DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphInDegree") diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index ec05b724493d..82eb5685bda8 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -4,7 +4,7 @@ * \brief DGL immutable graph index implementation */ -#include +#include #include #ifdef _MSC_VER @@ -235,8 +235,8 @@ ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector *edges return t; } -ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::Transpose() const { - std::vector edges(NumEdges()); +void ImmutableGraph::CSR::ReadAllEdges(std::vector *edges) const { + edges->resize(NumEdges()); for (size_t i = 0; i < NumVertices(); i++) { const dgl_id_t *indices_begin = &indices[indptr[i]]; const dgl_id_t *eid_begin = &edge_ids[indptr[i]]; @@ -245,9 +245,14 @@ ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::Transpose() const { e.end_points[0] = i; e.end_points[1] = indices_begin[j]; e.edge_id = eid_begin[j]; - edges[indptr[i] + j] = e; + (*edges)[indptr[i] + j] = e; } } +} + +ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::Transpose() const { + std::vector edges; + ReadAllEdges(&edges); return FromEdges(&edges, 1, NumVertices()); } @@ -456,7 +461,7 @@ ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_i return ImmutableGraph::EdgeArray{rst_src, rst_dst, rst_eid}; } -ImmutableGraph::EdgeArray ImmutableGraph::Edges(bool sorted) const { +ImmutableGraph::EdgeArray ImmutableGraph::Edges(const std::string &order) const { int64_t rstlen = NumEdges(); IdArray rst_src = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); IdArray rst_dst = IdArray::Empty({rstlen}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); @@ -465,16 +470,30 @@ ImmutableGraph::EdgeArray ImmutableGraph::Edges(bool sorted) const { dgl_id_t* rst_dst_data = static_cast(rst_dst->data); dgl_id_t* rst_eid_data = static_cast(rst_eid->data); - auto out_csr = GetOutCSR(); - // If sorted, the returned edges are sorted by the source Id and dest Id. - for (size_t i = 0; i < out_csr->indptr.size() - 1; i++) { - std::fill(rst_src_data + out_csr->indptr[i], rst_src_data + out_csr->indptr[i + 1], - static_cast(i)); + if (order.empty() || order == "srcdst") { + auto out_csr = GetOutCSR(); + // If sorted, the returned edges are sorted by the source Id and dest Id. + for (size_t i = 0; i < out_csr->indptr.size() - 1; i++) { + std::fill(rst_src_data + out_csr->indptr[i], rst_src_data + out_csr->indptr[i + 1], + static_cast(i)); + } + std::copy(out_csr->indices.begin(), out_csr->indices.end(), rst_dst_data); + std::copy(out_csr->edge_ids.begin(), out_csr->edge_ids.end(), rst_eid_data); + } else if (order == "eid") { + std::vector edges; + auto out_csr = GetOutCSR(); + out_csr->ReadAllEdges(&edges); + std::sort(edges.begin(), edges.end(), [](const Edge &e1, const Edge &e2) { + return e1.edge_id < e2.edge_id; + }); + for (size_t i = 0; i < edges.size(); i++) { + rst_src_data[i] = edges[i].end_points[0]; + rst_dst_data[i] = edges[i].end_points[1]; + rst_eid_data[i] = edges[i].edge_id; + } + } else { + LOG(FATAL) << "unsupported order " << order; } - std::copy(out_csr->indices.begin(), out_csr->indices.end(), rst_dst_data); - std::copy(out_csr->edge_ids.begin(), out_csr->edge_ids.end(), rst_eid_data); - - // TODO(zhengda) do I need to sort the edges if sorted = false? return ImmutableGraph::EdgeArray{rst_src, rst_dst, rst_eid}; } diff --git a/src/scheduler/scheduler_apis.cc b/src/scheduler/scheduler_apis.cc index 3ab72b3e6380..b79e7ce92838 100644 --- a/src/scheduler/scheduler_apis.cc +++ b/src/scheduler/scheduler_apis.cc @@ -48,7 +48,7 @@ DGL_REGISTER_GLOBAL("runtime.degree_bucketing._CAPI_DGLDegreeBucketingForFullGra .set_body([] (DGLArgs args, DGLRetValue* rv) { GraphHandle ghandle = args[0]; const Graph* gptr = static_cast(ghandle); - const auto& edges = gptr->Edges(false); + const auto& edges = gptr->Edges(""); int64_t n_vertices = gptr->NumVertices(); IdArray nids = IdArray::Empty({n_vertices}, edges.dst->dtype, edges.dst->ctx); int64_t* nid_data = static_cast(nids->data); diff --git a/tests/compute/test_graph_index.py b/tests/compute/test_graph_index.py index ff53f04d599a..76b90b946769 100644 --- a/tests/compute/test_graph_index.py +++ b/tests/compute/test_graph_index.py @@ -49,8 +49,14 @@ def check_basics(g, ig): assert g.number_of_nodes() == ig.number_of_nodes() assert g.number_of_edges() == ig.number_of_edges() - edges = g.edges(True) - iedges = ig.edges(True) + edges = g.edges("srcdst") + iedges = ig.edges("srcdst") + assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor()) + assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor()) + assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor()) + + edges = g.edges("eid") + iedges = ig.edges("eid") assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor()) assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor()) assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor()) From 5c1a947b0dec80c89ac97df7a6a2c20ec7866404 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 17:06:55 +0800 Subject: [PATCH 71/75] fix order. --- python/dgl/graph_index.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index cb427587881e..f3326d338a14 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -409,6 +409,8 @@ def edges(self, order=None): """ key = 'edges_s%s' % order if key not in self._cache: + if order is None: + order = "" edge_array = _CAPI_DGLGraphEdges(self._handle, order) src = utils.toindex(edge_array(0)) dst = utils.toindex(edge_array(1)) From 697a7e8dba38aac7bb6606b56b4e07ab1284960f Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 13:29:41 +0800 Subject: [PATCH 72/75] use DGLIdIter --- include/dgl/graph_interface.h | 2 ++ include/dgl/immutable_graph.h | 8 ++--- src/graph/immutable_graph.cc | 59 +++++++++++++++-------------------- 3 files changed, 32 insertions(+), 37 deletions(-) diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index 805d1aed043e..4d9f7b1625a3 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -43,6 +43,8 @@ struct SampledSubgraph; class DGLIdIters { std::vector::const_iterator begin_, end_; public: + DGLIdIters() { + } DGLIdIters(std::vector::const_iterator begin, std::vector::const_iterator end) { this->begin_ = begin; diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 20c3997c1e73..0a57375c03c0 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -63,10 +63,10 @@ class ImmutableGraph: public GraphInterface { EdgeArray GetEdges(dgl_id_t vid) const; EdgeArray GetEdges(IdArray vids) const; /* \brief this returns the start and end position of the column indices corresponding v. */ - std::pair GetIndexRef(dgl_id_t v) const { + DGLIdIters GetIndexRef(dgl_id_t v) const { const int64_t start = indptr[v]; const int64_t end = indptr[v + 1]; - return std::pair(&indices[start], &indices[end]); + return DGLIdIters(indices.begin() + start, indices.begin() + end); } /* * Read all edges and store them in the vector. @@ -474,8 +474,8 @@ class ImmutableGraph: public GraphInterface { virtual std::vector GetAdj(bool transpose, const std::string &fmt) const; protected: - std::pair GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; - std::pair GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; + DGLIdIters GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; + DGLIdIters GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const; /* * The immutable graph may only contain one of the CSRs (e.g., the sampled subgraphs). diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 82eb5685bda8..0d7db6cc4cff 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -296,11 +296,11 @@ bool ImmutableGraph::HasEdgeBetween(dgl_id_t src, dgl_id_t dst) const { if (!HasVertex(src) || !HasVertex(dst)) return false; if (this->in_csr_) { auto pred = this->in_csr_->GetIndexRef(dst); - return binary_search(pred.first, pred.second, src); + return dgl::binary_search(pred.begin(), pred.end(), src); } else { CHECK(this->out_csr_) << "one of the CSRs must exist"; auto succ = this->out_csr_->GetIndexRef(src); - return binary_search(succ.first, succ.second, dst); + return dgl::binary_search(succ.begin(), succ.end(), dst); } } @@ -339,11 +339,11 @@ IdArray ImmutableGraph::Predecessors(dgl_id_t vid, uint64_t radius) const { CHECK(radius >= 1) << "invalid radius: " << radius; auto pred = this->GetInCSR()->GetIndexRef(vid); - const int64_t len = pred.second - pred.first; + const int64_t len = pred.size(); IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); dgl_id_t* rst_data = static_cast(rst->data); - std::copy(pred.first, pred.second, rst_data); + std::copy(pred.begin(), pred.end(), rst_data); return rst; } @@ -352,66 +352,59 @@ IdArray ImmutableGraph::Successors(dgl_id_t vid, uint64_t radius) const { CHECK(radius >= 1) << "invalid radius: " << radius; auto succ = this->GetOutCSR()->GetIndexRef(vid); - const int64_t len = succ.second - succ.first; + const int64_t len = succ.size(); IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); dgl_id_t* rst_data = static_cast(rst->data); - std::copy(succ.first, succ.second, rst_data); + std::copy(succ.begin(), succ.end(), rst_data); return rst; } -std::pair ImmutableGraph::GetInEdgeIdRef(dgl_id_t src, - dgl_id_t dst) const { +DGLIdIters ImmutableGraph::GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const { CHECK(this->in_csr_); auto pred = this->in_csr_->GetIndexRef(dst); - auto it = std::lower_bound(pred.first, pred.second, src); + auto it = std::lower_bound(pred.begin(), pred.end(), src); // If there doesn't exist edges between the two nodes. - if (it == pred.second || *it != src) { - return std::pair(nullptr, nullptr); + if (it == pred.end() || *it != src) { + return DGLIdIters(); } - size_t off = it - in_csr_->indices.data(); + size_t off = it - in_csr_->indices.begin(); CHECK(off < in_csr_->indices.size()); - const dgl_id_t *start = &in_csr_->edge_ids[off]; + auto start = in_csr_->edge_ids.begin() + off; int64_t len = 0; // There are edges between the source and the destination. - for (auto it1 = it; it1 != pred.second && *it1 == src; it1++, len++) {} - return std::pair(start, start + len); + for (auto it1 = it; it1 != pred.end() && *it1 == src; it1++, len++) {} + return DGLIdIters(start, start + len); } -std::pair ImmutableGraph::GetOutEdgeIdRef(dgl_id_t src, - dgl_id_t dst) const { +DGLIdIters ImmutableGraph::GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const { CHECK(this->out_csr_); auto succ = this->out_csr_->GetIndexRef(src); - auto it = std::lower_bound(succ.first, succ.second, dst); + auto it = std::lower_bound(succ.begin(), succ.end(), dst); // If there doesn't exist edges between the two nodes. - if (it == succ.second || *it != dst) { - return std::pair(nullptr, nullptr); + if (it == succ.end() || *it != dst) { + return DGLIdIters(); } - size_t off = it - out_csr_->indices.data(); + size_t off = it - out_csr_->indices.begin(); CHECK(off < out_csr_->indices.size()); - const dgl_id_t *start = &out_csr_->edge_ids[off]; + auto start = out_csr_->edge_ids.begin() + off; int64_t len = 0; // There are edges between the source and the destination. - for (auto it1 = it; it1 != succ.second && *it1 == dst; it1++, len++) {} - return std::pair(start, start + len); + for (auto it1 = it; it1 != succ.end() && *it1 == dst; it1++, len++) {} + return DGLIdIters(start, start + len); } IdArray ImmutableGraph::EdgeId(dgl_id_t src, dgl_id_t dst) const { CHECK(HasVertex(src) && HasVertex(dst)) << "invalid edge: " << src << " -> " << dst; - std::pair edge_ids; - if (in_csr_) { - edge_ids = GetInEdgeIdRef(src, dst); - } else { - edge_ids = GetOutEdgeIdRef(src, dst); - } - int64_t len = edge_ids.second - edge_ids.first; + auto edge_ids = in_csr_ ? GetInEdgeIdRef(src, dst) : GetOutEdgeIdRef(src, dst); + int64_t len = edge_ids.size(); IdArray rst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0}); dgl_id_t* rst_data = static_cast(rst->data); if (len > 0) { - std::copy(edge_ids.first, edge_ids.second, rst_data); + std::copy(edge_ids.begin(), edge_ids.end(), rst_data); } return rst; @@ -442,7 +435,7 @@ ImmutableGraph::EdgeArray ImmutableGraph::EdgeIds(IdArray src_ids, IdArray dst_i for (size_t k = 0; k < edges.size(); k++) { src.push_back(src_id); dst.push_back(dst_id); - eid.push_back(edges.first[k]); + eid.push_back(edges[k]); } } From e43b917fb00439755e9ceb6fbdef33bdcf3e5655 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 18:13:07 +0800 Subject: [PATCH 73/75] fix. --- include/dgl/graph_interface.h | 2 -- src/graph/immutable_graph.cc | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index 4d9f7b1625a3..805d1aed043e 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -43,8 +43,6 @@ struct SampledSubgraph; class DGLIdIters { std::vector::const_iterator begin_, end_; public: - DGLIdIters() { - } DGLIdIters(std::vector::const_iterator begin, std::vector::const_iterator end) { this->begin_ = begin; diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 0d7db6cc4cff..723d35136056 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -366,7 +366,7 @@ DGLIdIters ImmutableGraph::GetInEdgeIdRef(dgl_id_t src, dgl_id_t dst) const { auto it = std::lower_bound(pred.begin(), pred.end(), src); // If there doesn't exist edges between the two nodes. if (it == pred.end() || *it != src) { - return DGLIdIters(); + return DGLIdIters(it, it); } size_t off = it - in_csr_->indices.begin(); @@ -384,7 +384,7 @@ DGLIdIters ImmutableGraph::GetOutEdgeIdRef(dgl_id_t src, dgl_id_t dst) const { auto it = std::lower_bound(succ.begin(), succ.end(), dst); // If there doesn't exist edges between the two nodes. if (it == succ.end() || *it != dst) { - return DGLIdIters(); + return DGLIdIters(it, it); } size_t off = it - out_csr_->indices.begin(); From 5ed4ff1995c3bc673f8521fa3b69e2140324cdd0 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 18:33:06 +0800 Subject: [PATCH 74/75] remove NotImplemented. --- include/dgl/graph.h | 3 ++- include/dgl/graph_interface.h | 12 ------------ include/dgl/immutable_graph.h | 14 ++++++++------ python/dgl/graph_index.py | 6 +++++- src/graph/graph_apis.cc | 15 +++++---------- src/graph/immutable_graph.cc | 3 ++- 6 files changed, 22 insertions(+), 31 deletions(-) diff --git a/include/dgl/graph.h b/include/dgl/graph.h index a1908773c399..3e0c0c000a21 100644 --- a/include/dgl/graph.h +++ b/include/dgl/graph.h @@ -375,7 +375,8 @@ class Graph: public GraphInterface { */ virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type, int num_hops, int expand_factor) const { - throw NotImplemented("NeighborUniformSample"); + LOG(FATAL) << "NeighborUniformSample isn't supported in mutable graph"; + return SampledSubgraph(); } protected: diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h index 805d1aed043e..7e617ac9d5c5 100644 --- a/include/dgl/graph_interface.h +++ b/include/dgl/graph_interface.h @@ -13,18 +13,6 @@ namespace dgl { -class NotImplemented: public std::exception { - std::string msg; - public: - explicit NotImplemented(const std::string &name) { - this->msg = name + " isn't implemented"; - } - - virtual const char* what() const noexcept { - return msg.c_str(); - } -}; - typedef uint64_t dgl_id_t; typedef dgl::runtime::NDArray IdArray; typedef dgl::runtime::NDArray DegreeArray; diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h index 0a57375c03c0..849b160847e7 100644 --- a/include/dgl/immutable_graph.h +++ b/include/dgl/immutable_graph.h @@ -141,7 +141,7 @@ class ImmutableGraph: public GraphInterface { * \param num_vertices The number of vertices to be added. */ void AddVertices(uint64_t num_vertices) { - throw NotImplemented("AddVertices"); + LOG(FATAL) << "AddVertices isn't supported in ImmutableGraph"; } /*! @@ -150,7 +150,7 @@ class ImmutableGraph: public GraphInterface { * \param dst The destination vertex. */ void AddEdge(dgl_id_t src, dgl_id_t dst) { - throw NotImplemented("AddEdge"); + LOG(FATAL) << "AddEdge isn't supported in ImmutableGraph"; } /*! @@ -159,14 +159,14 @@ class ImmutableGraph: public GraphInterface { * \param dst_ids The destination vertex id array. */ void AddEdges(IdArray src_ids, IdArray dst_ids) { - throw NotImplemented("AddEdges"); + LOG(FATAL) << "AddEdges isn't supported in ImmutableGraph"; } /*! * \brief Clear the graph. Remove all vertices/edges. */ void Clear() { - throw NotImplemented("Clear"); + LOG(FATAL) << "Clear isn't supported in ImmutableGraph"; } /*! @@ -257,7 +257,8 @@ class ImmutableGraph: public GraphInterface { * \return a pair whose first element is the source and the second the destination. */ std::pair FindEdge(dgl_id_t eid) const { - throw NotImplemented("FindEdge"); + LOG(FATAL) << "FindEdge isn't supported in ImmutableGraph"; + return std::pair(); } /*! @@ -266,7 +267,8 @@ class ImmutableGraph: public GraphInterface { * \return EdgeArray containing all edges with id in eid. The order is preserved. */ EdgeArray FindEdges(IdArray eids) const { - throw NotImplemented("FindEdges"); + LOG(FATAL) << "FindEdges isn't supported in ImmutableGraph"; + return EdgeArray(); } /*! diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index f3326d338a14..34d5af11c864 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -102,7 +102,11 @@ def add_edges(self, u, v): """ u_array = u.todgltensor() v_array = v.todgltensor() - _CAPI_DGLGraphAddEdges(self._handle, u_array, v_array) + try: + _CAPI_DGLGraphAddEdges(self._handle, u_array, v_array) + except Exception as e: + print(e) + pass self.clear_cache() def clear(self): diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc index a3628cdec442..dac42cd10b5b 100644 --- a/src/graph/graph_apis.cc +++ b/src/graph/graph_apis.cc @@ -370,8 +370,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointUnion") for (int i = 0; i < list_size; ++i) { const GraphInterface *ptr = static_cast(inhandles[i]); const Graph* gr = dynamic_cast(ptr); - if (gr == nullptr) - throw NotImplemented("_CAPI_DGLDisjointUnion for immutable graph"); + CHECK(gr) << "_CAPI_DGLDisjointUnion isn't implemented in immutable graph"; graphs.push_back(gr); } Graph* gptr = new Graph(); @@ -385,8 +384,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionByNum") GraphHandle ghandle = args[0]; const GraphInterface *ptr = static_cast(ghandle); const Graph* gptr = dynamic_cast(ptr); - if (gptr == nullptr) - throw NotImplemented("_CAPI_DGLDisjointPartitionByNum for immutable graph"); + CHECK(gptr) << "_CAPI_DGLDisjointPartitionByNum isn't implemented in immutable graph"; int64_t num = args[1]; std::vector&& rst = GraphOp::DisjointPartitionByNum(gptr, num); // return the pointer array as an integer array @@ -406,8 +404,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes") GraphHandle ghandle = args[0]; const GraphInterface *ptr = static_cast(ghandle); const Graph* gptr = dynamic_cast(ptr); - if (gptr == nullptr) - throw NotImplemented("_CAPI_DGLDisjointPartitionBySizes for immutable graph"); + CHECK(gptr) << "_CAPI_DGLDisjointPartitionBySizes isn't implemented in immutable graph"; const IdArray sizes = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[1])); std::vector&& rst = GraphOp::DisjointPartitionBySizes(gptr, sizes); // return the pointer array as an integer array @@ -428,8 +425,7 @@ DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph") bool backtracking = args[1]; const GraphInterface *ptr = static_cast(ghandle); const Graph* gptr = dynamic_cast(ptr); - if (gptr == nullptr) - throw NotImplemented("_CAPI_DGLGraphLineGraph for immutable graph"); + CHECK(gptr) << "_CAPI_DGLGraphLineGraph isn't implemented in immutable graph"; Graph* lgptr = new Graph(); *lgptr = GraphOp::LineGraph(gptr, backtracking); GraphHandle lghandle = lgptr; @@ -448,8 +444,7 @@ void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) { const int num_valid_seeds = args[num_seeds + 4]; const GraphInterface *ptr = static_cast(ghandle); const ImmutableGraph *gptr = dynamic_cast(ptr); - if (gptr == nullptr) - throw NotImplemented("sampling isn't supported in mutable graph"); + CHECK(gptr) << "sampling isn't implemented in mutable graph"; CHECK(num_valid_seeds <= num_seeds); std::vector subgs(seeds.size()); #pragma omp parallel for diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 723d35136056..5edd8d1b6cd0 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -511,7 +511,8 @@ Subgraph ImmutableGraph::VertexSubgraph(IdArray vids) const { } Subgraph ImmutableGraph::EdgeSubgraph(IdArray eids) const { - throw NotImplemented("EdgeSubgraph"); + LOG(FATAL) << "EdgeSubgraph isn't implemented in immutable graph"; + return Subgraph(); } ImmutableGraph::CSRArray ImmutableGraph::GetInCSRArray() const { From cefb8e963eff8c5c7f602e9bba1f3e217ed29543 Mon Sep 17 00:00:00 2001 From: Da Zheng Date: Wed, 16 Jan 2019 18:35:08 +0800 Subject: [PATCH 75/75] revert some code. --- python/dgl/graph_index.py | 6 +----- src/graph/immutable_graph.cc | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py index 34d5af11c864..f3326d338a14 100644 --- a/python/dgl/graph_index.py +++ b/python/dgl/graph_index.py @@ -102,11 +102,7 @@ def add_edges(self, u, v): """ u_array = u.todgltensor() v_array = v.todgltensor() - try: - _CAPI_DGLGraphAddEdges(self._handle, u_array, v_array) - except Exception as e: - print(e) - pass + _CAPI_DGLGraphAddEdges(self._handle, u_array, v_array) self.clear_cache() def clear(self): diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc index 5edd8d1b6cd0..0fafc1797898 100644 --- a/src/graph/immutable_graph.cc +++ b/src/graph/immutable_graph.cc @@ -4,8 +4,8 @@ * \brief DGL immutable graph index implementation */ -#include #include +#include #ifdef _MSC_VER // rand in MS compiler works well in multi-threading.