From 25f6d30611f827f8509a20b9793acbcd5689f112 Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Fri, 18 Jan 2019 12:52:02 +0800
Subject: [PATCH 01/10] signed and unsigned integer conversion

---
 include/dgl/immutable_graph.h |  4 ++--
 src/c_api_common.cc           |  2 +-
 src/graph/graph.cc            | 20 ++++++++++----------
 src/graph/graph_apis.cc       |  2 +-
 src/graph/immutable_graph.cc  | 18 +++++++++---------
 5 files changed, 23 insertions(+), 23 deletions(-)
diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h
index 849b160847e7..35133b2eb9f1 100644
--- a/include/dgl/immutable_graph.h
+++ b/include/dgl/immutable_graph.h
@@ -56,7 +56,7 @@ class ImmutableGraph: public GraphInterface {
       return indices.size();
     }
 
-    int64_t GetDegree(dgl_id_t vid) const {
+    uint64_t GetDegree(dgl_id_t vid) const {
       return indptr[vid + 1] - indptr[vid];
     }
     DegreeArray GetDegrees(IdArray vids) const;
@@ -81,7 +81,7 @@ class ImmutableGraph: public GraphInterface {
      * we simply sort on the input edge list. We allow sorting on both end points of an edge,
      * which is specified by `sort_on`.
      */
-    static CSR::Ptr FromEdges(std::vector<Edge> *edges, int sort_on, int64_t num_nodes);
+    static CSR::Ptr FromEdges(std::vector<Edge> *edges, int sort_on, uint64_t num_nodes);
   };
 
   /*! \brief Construct an immutable graph from the COO format. */
diff --git a/src/c_api_common.cc b/src/c_api_common.cc
index 0326f87a9b8b..63184615b82c 100644
--- a/src/c_api_common.cc
+++ b/src/c_api_common.cc
@@ -24,7 +24,7 @@ DLManagedTensor* CreateTmpDLManagedTensor(const DGLArgValue& arg) {
 
 PackedFunc ConvertNDArrayVectorToPackedFunc(const std::vector<NDArray>& vec) {
     auto body = [vec](DGLArgs args, DGLRetValue* rv) {
-        const int which = args[0];
+        const size_t which = args[0];
         if (which >= vec.size()) {
             LOG(FATAL) << "invalid choice";
         } else {
diff --git a/src/graph/graph.cc b/src/graph/graph.cc
index b64b7aa59625..f5a69ea5c8ed 100644
--- a/src/graph/graph.cc
+++ b/src/graph/graph.cc
@@ -20,14 +20,14 @@ Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_node
   CHECK(IsValidIdArray(edge_ids));
   this->AddVertices(num_nodes);
   num_edges_ = src_ids->shape[0];
-  CHECK(num_edges_ == dst_ids->shape[0]) << "vectors in COO must have the same length";
-  CHECK(num_edges_ == edge_ids->shape[0]) << "vectors in COO must have the same length";
+  CHECK(static_cast<long>(num_edges_) == dst_ids->shape[0]) << "vectors in COO must have the same length";
+  CHECK(static_cast<long>(num_edges_) == edge_ids->shape[0]) << "vectors in COO must have the same length";
   const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
   const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
   const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
   all_edges_src_.reserve(num_edges_);
   all_edges_dst_.reserve(num_edges_);
-  for (int64_t i = 0; i < num_edges_; i++) {
+  for (uint64_t i = 0; i < num_edges_; i++) {
     auto src = src_data[i];
     auto dst = dst_data[i];
     auto eid = edge_data[i];
@@ -504,10 +504,10 @@ Subgraph Graph::EdgeSubgraph(IdArray eids) const {
 }
 
 std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const {
-  int64_t num_edges = NumEdges();
-  int64_t num_nodes = NumVertices();
+  uint64_t num_edges = NumEdges();
+  uint64_t num_nodes = NumVertices();
   if (fmt == "coo") {
-    IdArray idx = IdArray::Empty({2 * num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray idx = IdArray::Empty({2 * static_cast<long>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *idx_data = static_cast<int64_t*>(idx->data);
     if (transpose) {
       std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data);
@@ -516,17 +516,17 @@ std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const
       std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data);
       std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges);
     }
-    IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray eid = IdArray::Empty({static_cast<long>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *eid_data = static_cast<int64_t*>(eid->data);
     for (uint64_t eid = 0; eid < num_edges; ++eid) {
       eid_data[eid] = eid;
     }
     return std::vector<IdArray>{idx, eid};
   } else if (fmt == "csr") {
-    IdArray indptr = IdArray::Empty({num_nodes + 1}, DLDataType{kDLInt, 64, 1},
+    IdArray indptr = IdArray::Empty({static_cast<long>(num_nodes) + 1}, DLDataType{kDLInt, 64, 1},
                                     DLContext{kDLCPU, 0});
-    IdArray indices = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-    IdArray eid = IdArray::Empty({num_edges}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray indices = IdArray::Empty({static_cast<long>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray eid = IdArray::Empty({static_cast<long>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
     int64_t *indices_data = static_cast<int64_t*>(indices->data);
     int64_t *eid_data = static_cast<int64_t*>(eid->data);
diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc
index dac42cd10b5b..58726437460f 100644
--- a/src/graph/graph_apis.cc
+++ b/src/graph/graph_apis.cc
@@ -70,7 +70,7 @@ PackedFunc ConvertSubgraphToPackedFunc(const Subgraph& sg) {
 // Convert Sampled Subgraph structures to PackedFunc.
 PackedFunc ConvertSubgraphToPackedFunc(const std::vector<SampledSubgraph>& sg) {
   auto body = [sg] (DGLArgs args, DGLRetValue* rv) {
-      const int which = args[0];
+      const size_t which = args[0];
       if (which < sg.size()) {
         GraphInterface* gptr = sg[which].graph->Reset();
         GraphHandle ghandle = gptr;
diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc
index 0fafc1797898..108c2994b6f6 100644
--- a/src/graph/immutable_graph.cc
+++ b/src/graph/immutable_graph.cc
@@ -191,7 +191,7 @@ std::pair<ImmutableGraph::CSR::Ptr, IdArray> ImmutableGraph::CSR::VertexSubgraph
 
   // Store the non-zeros in a subgraph with edge attributes of new edge ids.
   sub_csr->edge_ids.resize(sub_csr->indices.size());
-  for (int64_t i = 0; i < sub_csr->edge_ids.size(); i++)
+  for (size_t i = 0; i < sub_csr->edge_ids.size(); i++)
     sub_csr->edge_ids[i] = i;
 
   IdArray rst_eids = IdArray::Empty({static_cast<int64_t>(orig_edge_ids.size())},
@@ -203,7 +203,7 @@ std::pair<ImmutableGraph::CSR::Ptr, IdArray> ImmutableGraph::CSR::VertexSubgraph
 }
 
 ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::FromEdges(std::vector<Edge> *edges,
-                                                        int sort_on, int64_t num_nodes) {
+                                                        int sort_on, uint64_t num_nodes) {
   CHECK(sort_on == 0 || sort_on == 1) << "we must sort on the first or the second vector";
   int other_end = sort_on == 1 ? 0 : 1;
   // TODO(zhengda) we should sort in parallel.
@@ -285,7 +285,7 @@ BoolArray ImmutableGraph::HasVertices(IdArray vids) const {
   BoolArray rst = BoolArray::Empty({len}, vids->dtype, vids->ctx);
   const dgl_id_t* vid_data = static_cast<dgl_id_t*>(vids->data);
   dgl_id_t* rst_data = static_cast<dgl_id_t*>(rst->data);
-  const int64_t nverts = NumVertices();
+  const uint64_t nverts = NumVertices();
   for (int64_t i = 0; i < len; ++i) {
     rst_data[i] = (vid_data[i] < nverts)? 1 : 0;
   }
@@ -804,7 +804,7 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr,
   // BFS traverse the graph and sample vertices
   // <vertex_id, layer_id>
   std::unordered_set<dgl_id_t> sub_ver_map;
-  std::vector<std::pair<dgl_id_t, dgl_id_t> > sub_vers;
+  std::vector<std::pair<dgl_id_t, int> > sub_vers;
   sub_vers.reserve(num_seeds * 10);
   // add seed vertices
   for (size_t i = 0; i < num_seeds; ++i) {
@@ -893,20 +893,20 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr,
 
   // Copy sub_ver_map to output[0]
   // Copy layer
-  int64_t num_vertices = sub_ver_map.size();
+  uint64_t num_vertices = sub_ver_map.size();
   std::sort(sub_vers.begin(), sub_vers.end(),
             [](const std::pair<dgl_id_t, dgl_id_t> &a1, const std::pair<dgl_id_t, dgl_id_t> &a2) {
     return a1.first < a2.first;
   });
 
   SampledSubgraph subg;
-  subg.induced_vertices = IdArray::Empty({num_vertices},
+  subg.induced_vertices = IdArray::Empty({static_cast<long>(num_vertices)},
                                          DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.induced_edges = IdArray::Empty({num_edges},
+  subg.induced_edges = IdArray::Empty({static_cast<long>(num_edges)},
                                       DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.layer_ids = IdArray::Empty({num_vertices},
+  subg.layer_ids = IdArray::Empty({static_cast<long>(num_vertices)},
                                   DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.sample_prob = runtime::NDArray::Empty({num_vertices},
+  subg.sample_prob = runtime::NDArray::Empty({static_cast<long>(num_vertices)},
                                              DLDataType{kDLFloat, 32, 1}, DLContext{kDLCPU, 0});
 
   dgl_id_t *out = static_cast<dgl_id_t *>(subg.induced_vertices->data);

From 6255aab73e1fb1c633e2944f423ceac47d3c5502 Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Fri, 18 Jan 2019 13:41:21 +0800
Subject: [PATCH 02/10] replace long with uint64_t

---
 src/graph/graph.cc           | 14 +++++++-------
 src/graph/immutable_graph.cc |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/graph/graph.cc b/src/graph/graph.cc
index f5a69ea5c8ed..0f6f6877ab26 100644
--- a/src/graph/graph.cc
+++ b/src/graph/graph.cc
@@ -20,8 +20,8 @@ Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_node
   CHECK(IsValidIdArray(edge_ids));
   this->AddVertices(num_nodes);
   num_edges_ = src_ids->shape[0];
-  CHECK(static_cast<long>(num_edges_) == dst_ids->shape[0]) << "vectors in COO must have the same length";
-  CHECK(static_cast<long>(num_edges_) == edge_ids->shape[0]) << "vectors in COO must have the same length";
+  CHECK(static_cast<uint64_t>(num_edges_) == dst_ids->shape[0]) << "vectors in COO must have the same length";
+  CHECK(static_cast<uint64_t>(num_edges_) == edge_ids->shape[0]) << "vectors in COO must have the same length";
   const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
   const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
   const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
@@ -507,7 +507,7 @@ std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const
   uint64_t num_edges = NumEdges();
   uint64_t num_nodes = NumVertices();
   if (fmt == "coo") {
-    IdArray idx = IdArray::Empty({2 * static_cast<long>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray idx = IdArray::Empty({2 * static_cast<uint64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *idx_data = static_cast<int64_t*>(idx->data);
     if (transpose) {
       std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data);
@@ -516,17 +516,17 @@ std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const
       std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data);
       std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges);
     }
-    IdArray eid = IdArray::Empty({static_cast<long>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray eid = IdArray::Empty({static_cast<uint64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *eid_data = static_cast<int64_t*>(eid->data);
     for (uint64_t eid = 0; eid < num_edges; ++eid) {
       eid_data[eid] = eid;
     }
     return std::vector<IdArray>{idx, eid};
   } else if (fmt == "csr") {
-    IdArray indptr = IdArray::Empty({static_cast<long>(num_nodes) + 1}, DLDataType{kDLInt, 64, 1},
+    IdArray indptr = IdArray::Empty({static_cast<uint64_t>(num_nodes) + 1}, DLDataType{kDLInt, 64, 1},
                                     DLContext{kDLCPU, 0});
-    IdArray indices = IdArray::Empty({static_cast<long>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-    IdArray eid = IdArray::Empty({static_cast<long>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray indices = IdArray::Empty({static_cast<uint64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray eid = IdArray::Empty({static_cast<uint64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
     int64_t *indices_data = static_cast<int64_t*>(indices->data);
     int64_t *eid_data = static_cast<int64_t*>(eid->data);
diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc
index 108c2994b6f6..e3d9337a2239 100644
--- a/src/graph/immutable_graph.cc
+++ b/src/graph/immutable_graph.cc
@@ -900,13 +900,13 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr,
   });
 
   SampledSubgraph subg;
-  subg.induced_vertices = IdArray::Empty({static_cast<long>(num_vertices)},
+  subg.induced_vertices = IdArray::Empty({static_cast<uint64_t>(num_vertices)},
                                          DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.induced_edges = IdArray::Empty({static_cast<long>(num_edges)},
+  subg.induced_edges = IdArray::Empty({static_cast<uint64_t>(num_edges)},
                                       DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.layer_ids = IdArray::Empty({static_cast<long>(num_vertices)},
+  subg.layer_ids = IdArray::Empty({static_cast<uint64_t>(num_vertices)},
                                   DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.sample_prob = runtime::NDArray::Empty({static_cast<long>(num_vertices)},
+  subg.sample_prob = runtime::NDArray::Empty({static_cast<uint64_t>(num_vertices)},
                                              DLDataType{kDLFloat, 32, 1}, DLContext{kDLCPU, 0});
 
   dgl_id_t *out = static_cast<dgl_id_t *>(subg.induced_vertices->data);

From 353f622a7af247470f34215c3eb1b6f7ad8914b9 Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Fri, 18 Jan 2019 13:50:45 +0800
Subject: [PATCH 03/10] replace uint64_t with int64_t

---
 src/graph/graph.cc           | 14 +++++++-------
 src/graph/immutable_graph.cc |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/graph/graph.cc b/src/graph/graph.cc
index 0f6f6877ab26..3928f505fc72 100644
--- a/src/graph/graph.cc
+++ b/src/graph/graph.cc
@@ -20,8 +20,8 @@ Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_node
   CHECK(IsValidIdArray(edge_ids));
   this->AddVertices(num_nodes);
   num_edges_ = src_ids->shape[0];
-  CHECK(static_cast<uint64_t>(num_edges_) == dst_ids->shape[0]) << "vectors in COO must have the same length";
-  CHECK(static_cast<uint64_t>(num_edges_) == edge_ids->shape[0]) << "vectors in COO must have the same length";
+  CHECK(static_cast<int64_t>(num_edges_) == dst_ids->shape[0]) << "vectors in COO must have the same length";
+  CHECK(static_cast<int64_t>(num_edges_) == edge_ids->shape[0]) << "vectors in COO must have the same length";
   const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
   const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
   const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
@@ -507,7 +507,7 @@ std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const
   uint64_t num_edges = NumEdges();
   uint64_t num_nodes = NumVertices();
   if (fmt == "coo") {
-    IdArray idx = IdArray::Empty({2 * static_cast<uint64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray idx = IdArray::Empty({2 * static_cast<int64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *idx_data = static_cast<int64_t*>(idx->data);
     if (transpose) {
       std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data);
@@ -516,17 +516,17 @@ std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const
       std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data);
       std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges);
     }
-    IdArray eid = IdArray::Empty({static_cast<uint64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray eid = IdArray::Empty({static_cast<int64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *eid_data = static_cast<int64_t*>(eid->data);
     for (uint64_t eid = 0; eid < num_edges; ++eid) {
       eid_data[eid] = eid;
     }
     return std::vector<IdArray>{idx, eid};
   } else if (fmt == "csr") {
-    IdArray indptr = IdArray::Empty({static_cast<uint64_t>(num_nodes) + 1}, DLDataType{kDLInt, 64, 1},
+    IdArray indptr = IdArray::Empty({static_cast<int64_t>(num_nodes) + 1}, DLDataType{kDLInt, 64, 1},
                                     DLContext{kDLCPU, 0});
-    IdArray indices = IdArray::Empty({static_cast<uint64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-    IdArray eid = IdArray::Empty({static_cast<uint64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray indices = IdArray::Empty({static_cast<int64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray eid = IdArray::Empty({static_cast<int64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
     int64_t *indices_data = static_cast<int64_t*>(indices->data);
     int64_t *eid_data = static_cast<int64_t*>(eid->data);
diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc
index e3d9337a2239..4fe3e17877c3 100644
--- a/src/graph/immutable_graph.cc
+++ b/src/graph/immutable_graph.cc
@@ -900,13 +900,13 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr,
   });
 
   SampledSubgraph subg;
-  subg.induced_vertices = IdArray::Empty({static_cast<uint64_t>(num_vertices)},
+  subg.induced_vertices = IdArray::Empty({static_cast<int64_t>(num_vertices)},
                                          DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.induced_edges = IdArray::Empty({static_cast<uint64_t>(num_edges)},
+  subg.induced_edges = IdArray::Empty({static_cast<int64_t>(num_edges)},
                                       DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.layer_ids = IdArray::Empty({static_cast<uint64_t>(num_vertices)},
+  subg.layer_ids = IdArray::Empty({static_cast<int64_t>(num_vertices)},
                                   DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.sample_prob = runtime::NDArray::Empty({static_cast<uint64_t>(num_vertices)},
+  subg.sample_prob = runtime::NDArray::Empty({static_cast<int64_t>(num_vertices)},
                                              DLDataType{kDLFloat, 32, 1}, DLContext{kDLCPU, 0});
 
   dgl_id_t *out = static_cast<dgl_id_t *>(subg.induced_vertices->data);

From 020964d6596f0e766266257a71ff39c2faee3b30 Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Mon, 21 Jan 2019 11:04:47 +0800
Subject: [PATCH 04/10] layer sampler

---
 include/dgl/graph_interface.h |   8 ++
 src/graph/immutable_graph.cc  | 146 +++++++++++++++++++++++++++++++++-
 2 files changed, 151 insertions(+), 3 deletions(-)

diff --git a/include/dgl/graph_interface.h b/include/dgl/graph_interface.h
index 7e617ac9d5c5..f7e0895e1730 100644
--- a/include/dgl/graph_interface.h
+++ b/include/dgl/graph_interface.h
@@ -340,6 +340,14 @@ class GraphInterface {
    */
   virtual SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type,
                                                 int num_hops, int expand_factor) const = 0;
+
+  /*!
+   * \brief Sample a subgraph from the seed vertices with layer sampling.
+   * The layers are sampled with a uniform distribution.
+   * \return a subgraph
+   */
+  /* virtual SampledSubgraph LayerUniformSample(IdArray seeds, const std::string &neigh_type,
+                                             int n_layers, size_t layer_size) const = 0; */
 };
 
 /*! \brief Subgraph data structure */
diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc
index 4fe3e17877c3..4b9b90e3672c 100644
--- a/src/graph/immutable_graph.cc
+++ b/src/graph/immutable_graph.cc
@@ -788,7 +788,7 @@ struct neigh_list {
     : neighs(_neighs), edges(_edges) {}
 };
 
-SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr,
+SampledSubgraph ImmutableGraph::NeighborSample(IdArray seed_arr,
                                                const float* probability,
                                                const std::string &neigh_type,
                                                int num_hops,
@@ -907,7 +907,7 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr,
   subg.layer_ids = IdArray::Empty({static_cast<int64_t>(num_vertices)},
                                   DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
   subg.sample_prob = runtime::NDArray::Empty({static_cast<int64_t>(num_vertices)},
-                                             DLDataType{kDLFloat, 32, 1}, DLContext{kDLCPU, 0});
+                                             DLDataType{kDLFloat, 8 * sizeof(float), 1}, DLContext{kDLCPU, 0});
 
   dgl_id_t *out = static_cast<dgl_id_t *>(subg.induced_vertices->data);
   dgl_id_t *out_layer = static_cast<dgl_id_t *>(subg.layer_ids->data);
@@ -975,6 +975,134 @@ SampledSubgraph ImmutableGraph::SampleSubgraph(IdArray seed_arr,
   return subg;
 }
 
+SampledSubgraph ImmutableGraph::LayerSample(IdArray seed_array,
+                                            const float* probability,
+                                            const std::string &neigh_type,
+                                            int n_layers, size_t layer_size) const {
+  unsigned int rand_seed = time(nullptr);
+  auto g_csr = neigh_type == "in" ? GetInCSR() : GetOutCSR();
+  const int64_t* indptr = g_csr->indptr.data();
+  const dgl_id_t* indices = g_csr->indices.data();
+  const dgl_id_t* eids = g_csr->edge_ids.data();
+
+  std::vector<dgl_id_t> nodes;
+  std::vector<int> layer_ids;
+  std::vector<float> probabilities;
+  std::vector<dgl_id_t> edges;
+
+  std::unordered_set<dgl_id_t> candidate_set;
+  std::vector<dgl_id_t> candidates;
+  std::map<dgl_id_t, size_t> n_times;
+
+  size_t n_seeds = seed_array->shape[0];
+  const dgl_id_t* seed_data = static_cast<dgl_id_t*>(seed_array->data);
+  candidate_set.insert(seed_data, seed_data + n_seeds);
+  std::copy(candidate_set.begin(), candidate_set.end(), nodes.begin());
+  layer_ids.insert(layer_ids.end(), nodes.size(), 0);
+  probabilities.insert(probabilities.end(), nodes.size(), 0);
+
+  std::vector<size_t> positions {0, nodes.size()};
+  for (int i = 0; i != n_layers; i++) {
+    candidate_set.clear();
+    for (auto j = positions.end()[-2]; j != positions.back(); ++j) {
+      auto src = nodes[j];
+      candidate_set.insert(indices + indptr[src], indices + indptr[src + 1]);
+    }
+
+    candidates.clear();
+    std::copy(candidate_set.begin(), candidate_set.end(), candidates.begin());
+
+    auto n_candidates = candidates.size();
+    n_times.clear();
+    for (size_t j = 0; j != layer_size; ++j) {
+      auto dst = rand_r(&rand_seed) % n_candidates;
+      if (!n_times.insert(std::make_pair(dst, 1)).second) {
+        ++n_times[dst];
+      }
+    }
+
+    auto n_nodes = static_cast<float>(NumVertices());
+    for (auto const &pair : n_times) {
+      nodes.push_back(pair.first);
+      layer_ids.push_back(i + 1);
+      probabilities.push_back(pair.second / n_nodes);
+    }
+
+    positions.push_back(nodes.size());
+  }
+
+  /*
+  std::vector<size_t> idx(nodes.size());
+  iota(idx.begin(), idx.end(), 0);
+  auto less = [&nodes](size_t i, size_t j) {
+    return nodes[i] < nodes[j];
+  };
+  sort(idx.begin(), idx.end(), less);
+
+  for (size_t i = 0; i < n_nodes; i++) {
+    subg.induced_vertices[i] = nodes[idx[i]];
+    subg.layer_ids[i] = node_info[i].first;
+    subg.sample_prob[i] = node_info[i].second;
+  }
+  */
+
+  std::vector<dgl_id_t> ret_indices;
+  std::vector<dgl_id_t> ret_eids;
+  std::vector<size_t> new_indptr;
+  std::vector<dgl_id_t> new_indices;
+  std::vector<dgl_id_t> new_eids;
+  new_indptr.push_back(0);
+  for (size_t i = 0; i < positions.size() - 2; ++i) {
+    auto curr_begin = positions[i];
+    auto curr_end = positions[i + 1];
+    // auto curr_len = curr_end - curr_begin;
+    auto next_begin = curr_end;
+    auto next_end = positions[i + 2];
+    auto next_len = next_end - next_begin;
+    HashTableChecker checker(&nodes[next_begin], next_len);
+    for (size_t j = curr_begin; j != curr_end; ++j) {
+      ret_indices.clear();
+      ret_eids.clear();
+      auto src = nodes[j];
+      auto dis = indptr[src];
+      auto len = indptr[src + 1] - indptr[src];
+      checker.CollectOnRow(indices + dis, eids + dis, len, &ret_indices, &ret_eids);
+      new_indptr.push_back(ret_indices.size());
+      new_indices.insert(new_indices.end(), ret_indices.begin(), ret_indices.end());
+      new_eids.insert(new_eids.end(), ret_eids.begin(), ret_eids.end());
+    }
+  }
+
+  long n_nodes = nodes.size();
+  long n_edges = new_eids.size();
+  auto subg_csr = std::make_shared<CSR>(n_nodes, n_edges);
+  subg_csr->indptr.resize(n_nodes + 1);
+  subg_csr->indices.resize(n_edges);
+  subg_csr->edge_ids.resize(n_edges);
+  std::copy(new_indptr.begin(), new_indptr.end(), subg_csr->indptr.data());
+  std::copy(new_indices.begin(), new_indices.end(), subg_csr->indices.data());
+  std::copy(new_eids.begin(), new_eids.end(), subg_csr->edge_ids.data());
+
+  SampledSubgraph subg;
+  subg.induced_vertices = IdArray::Empty({n_nodes},
+                                         DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  subg.induced_edges = IdArray::Empty({n_edges},
+                                      DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  subg.layer_ids = IdArray::Empty({n_nodes},
+                                  DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  subg.sample_prob = runtime::NDArray::Empty({n_nodes},
+                                             DLDataType{kDLFloat, 8 * sizeof(float), 1}, DLContext{kDLCPU, 0});
+  std::copy(nodes.begin(), nodes.end(), static_cast<dgl_id_t*>(subg.induced_vertices->data));
+  std::copy(layer_ids.begin(), layer_ids.end(), static_cast<dgl_id_t*>(subg.layer_ids->data));
+  std::copy(probabilities.begin(), probabilities.end(), static_cast<float*>(subg.sample_prob->data));
+
+  if (neigh_type == "in")
+    subg.graph = GraphPtr(new ImmutableGraph(subg_csr, nullptr, IsMultigraph()));
+  else
+    subg.graph = GraphPtr(new ImmutableGraph(nullptr, subg_csr, IsMultigraph()));
+
+  return subg;
+}
 void CompactSubgraph(ImmutableGraph::CSR *subg,
                      const std::unordered_map<dgl_id_t, dgl_id_t> &id_map) {
   for (size_t i = 0; i < subg->indices.size(); i++) {
@@ -1000,7 +1128,7 @@ void ImmutableGraph::CompactSubgraph(IdArray induced_vertices) {
 SampledSubgraph ImmutableGraph::NeighborUniformSample(IdArray seeds,
                                                       const std::string &neigh_type,
                                                       int num_hops, int expand_factor) const {
-  auto ret = SampleSubgraph(seeds,                 // seed vector
+  auto ret = NeighborSample(seeds,                 // seed vector
                             nullptr,               // sample_id_probability
                             neigh_type,
                             num_hops,
@@ -1009,4 +1137,16 @@ SampledSubgraph ImmutableGraph::NeighborUniformSample(IdArray seeds,
   return ret;
 }
 
+SampledSubgraph ImmutableGraph::LayerUniformSample(IdArray seeds,
+                                                   const std::string &neigh_type,
+                                                   int n_layers, size_t layer_size) const {
+  auto ret = LayerSample(seeds,
+                         nullptr,
+                         neigh_type,
+                         n_layers,
+                         layer_size);
+  std::static_pointer_cast<ImmutableGraph>(ret.graph)->CompactSubgraph(ret.induced_vertices);
+  return ret;
+}
+
 }  // namespace dgl

From bb78184618dd0277bd2d4543ab4b9051bc2f237d Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Mon, 21 Jan 2019 11:50:28 +0800
Subject: [PATCH 05/10] c api interface

---
 include/dgl/immutable_graph.h          |  14 ++-
 python/dgl/contrib/sampling/sampler.py | 128 ++++++++++++++++++++++++-
 python/dgl/graph_index.py              |  61 +++++++++---
 src/graph/graph_apis.cc                |  55 +++++++++--
 4 files changed, 236 insertions(+), 22 deletions(-)

diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h
index 35133b2eb9f1..4693936d1ded 100644
--- a/include/dgl/immutable_graph.h
+++ b/include/dgl/immutable_graph.h
@@ -464,6 +464,14 @@ class ImmutableGraph: public GraphInterface {
   SampledSubgraph NeighborUniformSample(IdArray seeds, const std::string &neigh_type,
                                         int num_hops, int expand_factor) const;
 
+  /*!
+   * \brief Sample a subgraph from the seed vertices with layer sampling.
+   * The layers are sampled with a uniform distribution.
+   * \return a subgraph
+   */
+  SampledSubgraph LayerUniformSample(IdArray seeds, const std::string &neigh_type,
+                                     int n_layers, size_t layer_size) const;
+
   /*!
    * \brief Get the adjacency matrix of the graph.
    *
@@ -517,10 +525,14 @@ class ImmutableGraph: public GraphInterface {
    */
   CSRArray GetOutCSRArray() const;
 
-  SampledSubgraph SampleSubgraph(IdArray seed_arr, const float* probability,
+  SampledSubgraph NeighborSample(IdArray seed_arr, const float* probability,
                                  const std::string &neigh_type,
                                  int num_hops, size_t num_neighbor) const;
 
+  SampledSubgraph LayerSample(IdArray seed_arr, const float* probability,
+                              const std::string &neigh_type,
+                              int n_layers, size_t layer_size) const;
+
   /*!
    * \brief Compact a subgraph.
    * In a sampled subgraph, the vertex Id is still in the ones in the original graph.
diff --git a/python/dgl/contrib/sampling/sampler.py b/python/dgl/contrib/sampling/sampler.py
index 65ba70fd4372..62296cf1d83a 100644
--- a/python/dgl/contrib/sampling/sampler.py
+++ b/python/dgl/contrib/sampling/sampler.py
@@ -14,7 +14,7 @@
 except ImportError:
     import queue
 
-__all__ = ['NeighborSampler']
+__all__ = ['NeighborSampler', 'LayerSampler']
 
 class NSSubgraphLoader(object):
     def __init__(self, g, batch_size, expand_factor, num_hops=1,
@@ -79,6 +79,69 @@ def __next__(self):
             aux_infos['seeds'] = self._seed_ids.pop(0).tousertensor()
         return self._subgraphs.pop(0), aux_infos
 
+class LSSubgraphLoader(object):
+    def __init__(self, g, batch_size, layer_size, n_layers=1,
+                 neighbor_type='in', node_prob=None, seed_nodes=None,
+                 shuffle=False, num_workers=1, return_seed_id=False):
+        self._g = g
+        if not g._graph.is_readonly():
+            raise NotImplementedError("subgraph loader only support read-only graphs.")
+        self._batch_size = batch_size
+        self._layer_size = layer_size
+        self._n_layers = n_layers
+        self._node_prob = node_prob
+        self._return_seed_id = return_seed_id
+        if self._node_prob is not None:
+            assert self._node_prob.shape[0] == g.number_of_nodes(), \
+                    "We need to know the sampling probability of every node"
+        if seed_nodes is None:
+            self._seed_nodes = F.arange(0, g.number_of_nodes())
+        else:
+            self._seed_nodes = seed_nodes
+        if shuffle:
+            self._seed_nodes = F.rand_shuffle(self._seed_nodes)
+        self._num_workers = num_workers
+        self._neighbor_type = neighbor_type
+        self._subgraphs = []
+        self._seed_ids = []
+        self._subgraph_idx = 0
+
+    def _prefetch(self):
+        seed_ids = []
+        num_nodes = len(self._seed_nodes)
+        for i in range(self._num_workers):
+            start = self._subgraph_idx * self._batch_size
+            # if we have visited all nodes, don't do anything.
+            if start >= num_nodes:
+                break
+            end = min((self._subgraph_idx + 1) * self._batch_size, num_nodes)
+            seed_ids.append(utils.toindex(self._seed_nodes[start:end]))
+            self._subgraph_idx += 1
+        sgi = self._g._graph.neighbor_sampling(seed_ids, self._layer_size,
+                                               self._n_layers, self._neighbor_type,
+                                               self._node_prob)
+        subgraphs = [DGLSubGraph(self._g, i.induced_nodes, i.induced_edges, \
+                i) for i in sgi]
+        self._subgraphs.extend(subgraphs)
+        if self._return_seed_id:
+            self._seed_ids.extend(seed_ids)
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        # If we don't have prefetched subgraphs, let's prefetch them.
+        if len(self._subgraphs) == 0:
+            self._prefetch()
+        # At this point, if we still don't have subgraphs, we must have
+        # iterate all subgraphs and we should stop the iterator now.
+        if len(self._subgraphs) == 0:
+            raise StopIteration
+        aux_infos = {}
+        if self._return_seed_id:
+            aux_infos['seeds'] = self._seed_ids.pop(0).tousertensor()
+        return self._subgraphs.pop(0), aux_infos
+
 class _Prefetcher(object):
     """Internal shared prefetcher logic. It can be sub-classed by a Thread-based implementation
     or Process-based implementation."""
@@ -255,3 +318,66 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
         return loader
     else:
         return _PrefetchingLoader(loader, num_prefetch=num_workers*2)
+
+def NeighborSampler(g, batch_size, layer_size, n_layers=1,
+                    neighbor_type='in', node_prob=None, seed_nodes=None,
+                    shuffle=False, num_workers=1,
+                    return_seed_id=False, prefetch=False):
+    '''Create a sampler that samples neighborhood.
+
+    This creates a subgraph data loader that samples subgraphs from the input graph
+    with neighbor sampling. This sampling method is implemented in C and can perform
+    sampling very efficiently.
+    
+    A subgraph grows from a seed vertex. It contains sampled neighbors
+    of the seed vertex as well as the edges that connect neighbor nodes with
+    seed nodes. When the number of hops is k (>1), the neighbors are sampled
+    from the k-hop neighborhood. In this case, the sampled edges are the ones
+    that connect the source nodes and the sampled neighbor nodes of the source
+    nodes.
+
+    The subgraph loader returns a list of subgraphs and a dictionary of additional
+    information about the subgraphs. The size of the subgraph list is the number of workers.
+
+    The dictionary contains:
+
+    - seeds: a list of 1D tensors of seed Ids, if return_seed_id is True.
+
+    Parameters
+    ----------
+    g: the DGLGraph where we sample subgraphs.
+    batch_size: The number of subgraphs in a batch.
+    layer_size: the number of neighbors sampled from the neighbor list
+        of a vertex. The value of this parameter can be
+        an integer: indicates the number of neighbors sampled from a neighbor list.
+        a floating-point: indicates the ratio of the sampled neighbors in a neighbor list.
+        string: indicates some common ways of calculating the number of sampled neighbors,
+        e.g., 'sqrt(deg)'.
+    n_layers: The size of the neighborhood where we sample vertices.
+    neighbor_type: indicates the neighbors on different types of edges.
+        "in" means the neighbors on the in-edges, "out" means the neighbors on
+        the out-edges and "both" means neighbors on both types of edges.
+    node_prob: the probability that a neighbor node is sampled.
+        1D Tensor. None means uniform sampling. Otherwise, the number of elements
+        should be the same as the number of vertices in the graph.
+    seed_nodes: a list of nodes where we sample subgraphs from.
+        If it's None, the seed vertices are all vertices in the graph.
+    shuffle: indicates the sampled subgraphs are shuffled.
+    num_workers: the number of worker threads that sample subgraphs in parallel.
+    return_seed_id: indicates whether to return seed ids along with the subgraphs.
+        The seed Ids are in the parent graph.
+    prefetch : bool, default False
+        Whether to prefetch the samples in the next batch.
+
+    Returns
+    -------
+    A subgraph iterator
+        The iterator returns a list of batched subgraphs and a dictionary of additional
+        information about the subgraphs.
+    '''
+    loader = LSSubgraphLoader(g, batch_size, layer_size, n_layers, neighbor_type, node_prob,
+                              seed_nodes, shuffle, num_workers, return_seed_id)
+    if not prefetch:
+        return loader
+    else:
+        return _PrefetchingLoader(loader, num_prefetch=num_workers*2)
diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py
index f3326d338a14..5516a510b4f1 100644
--- a/python/dgl/graph_index.py
+++ b/python/dgl/graph_index.py
@@ -669,10 +669,25 @@ def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, no
         seed_ids = [v.todgltensor() for v in seed_ids]
         num_subgs = len(seed_ids)
         if node_prob is None:
-            rst = _uniform_sampling(self, seed_ids, neighbor_type, num_hops, expand_factor)
+            rst = _uniform_neighbor_sampling(self, seed_ids, neighbor_type, num_hops,
+                                             expand_factor)
         else:
-            rst = _nonuniform_sampling(self, node_prob, seed_ids, neighbor_type, num_hops,
-                                       expand_factor)
+            rst = _nonuniform_neighbor_sampling(self, node_prob, seed_ids, neighbor_type, num_hops, expand_factor)
+
+        return [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)),
+                              utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
+
+    def layer_sampling(self, seed_ids, layer_size, n_layers, layer_type, node_prob):
+        """Neighborhood sampling"""
+        if len(seed_ids) == 0:
+            return []
+
+        seed_ids = [v.todgltensor() for v in seed_ids]
+        num_subgs = len(seed_ids)
+        if node_prob is None:
+            rst = _uniform_layer_sampling(self, seed_ids, layer_type, n_layers, layer_size)
+        else:
+            rst = _nonuniform_layer_sampling(self, node_prob, seed_ids, layer_type, n_layers, layer_size)
 
         return [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)),
                               utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
@@ -1003,19 +1018,19 @@ def create_graph_index(graph_data=None, multigraph=False, readonly=False):
 
 # TODO(zhengda): we'll support variable-length inputs.
 _NEIGHBOR_SAMPLING_APIS = {
-    1: _CAPI_DGLGraphUniformSampling,
-    2: _CAPI_DGLGraphUniformSampling2,
-    4: _CAPI_DGLGraphUniformSampling4,
-    8: _CAPI_DGLGraphUniformSampling8,
-    16: _CAPI_DGLGraphUniformSampling16,
-    32: _CAPI_DGLGraphUniformSampling32,
-    64: _CAPI_DGLGraphUniformSampling64,
-    128: _CAPI_DGLGraphUniformSampling128,
+    1: _CAPI_DGLGraphNeighborUniformSampling,
+    2: _CAPI_DGLGraphNeighborUniformSampling2,
+    4: _CAPI_DGLGraphNeighborUniformSampling4,
+    8: _CAPI_DGLGraphNeighborUniformSampling8,
+    16: _CAPI_DGLGraphNeighborUniformSampling16,
+    32: _CAPI_DGLGraphNeighborUniformSampling32,
+    64: _CAPI_DGLGraphNeighborUniformSampling64,
+    128: _CAPI_DGLGraphNeighborUniformSampling128,
 }
 
 _EMPTY_ARRAYS = [utils.toindex(F.ones(shape=(0), dtype=F.int64, ctx=F.cpu()))]
 
-def _uniform_sampling(gidx, seed_ids, neigh_type, num_hops, expand_factor):
+def _uniform_neighbor_sampling(gidx, seed_ids, neigh_type, num_hops, expand_factor):
     num_seeds = len(seed_ids)
     empty_ids = []
     if len(seed_ids) > 1 and len(seed_ids) not in _NEIGHBOR_SAMPLING_APIS.keys():
@@ -1025,3 +1040,25 @@ def _uniform_sampling(gidx, seed_ids, neigh_type, num_hops, expand_factor):
     assert len(seed_ids) in _NEIGHBOR_SAMPLING_APIS.keys()
     return _NEIGHBOR_SAMPLING_APIS[len(seed_ids)](gidx._handle, *seed_ids, neigh_type,
                                                   num_hops, expand_factor, num_seeds)
+
+_LAYER_SAMPLING_APIS = {
+    1: _CAPI_DGLGraphLayerUniformSampling,
+    2: _CAPI_DGLGraphLayerUniformSampling2,
+    4: _CAPI_DGLGraphLayerUniformSampling4,
+    8: _CAPI_DGLGraphLayerUniformSampling8,
+    16: _CAPI_DGLGraphLayerUniformSampling16,
+    32: _CAPI_DGLGraphLayerUniformSampling32,
+    64: _CAPI_DGLGraphLayerUniformSampling64,
+    128: _CAPI_DGLGraphLayerUniformSampling128,
+}
+
+def _uniform_layer_sampling(gidx, seed_ids, neigh_type, n_layers, layer_size):
+    num_seeds = len(seed_ids)
+    empty_ids = []
+    if len(seed_ids) > 1 and len(seed_ids) not in _LAYER_SAMPLING_APIS.keys():
+        remain = 2**int(math.ceil(math.log2(len(dgl_ids)))) - len(dgl_ids)
+        empty_ids = _EMPTY_ARRAYS[0:remain]
+        seed_ids.extend([empty.todgltensor() for empty in empty_ids])
+    assert len(seed_ids) in _LAYER_SAMPLING_APIS.keys()
+    return _LAYER_SAMPLING_APIS[len(seed_ids)](gidx._handle, *seed_ids, neigh_type,
+                                               n_layers, layer_size, num_seeds)
diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc
index 58726437460f..4b9dbf6f5f12 100644
--- a/src/graph/graph_apis.cc
+++ b/src/graph/graph_apis.cc
@@ -454,23 +454,62 @@ void CAPI_NeighborUniformSample(DGLArgs args, DGLRetValue* rv) {
   *rv = ConvertSubgraphToPackedFunc(subgs);
 }
 
-DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling")
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNeighborUniformSampling")
 .set_body(CAPI_NeighborUniformSample<1>);
-DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling2")
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNeighborUniformSampling2")
 .set_body(CAPI_NeighborUniformSample<2>);
-DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling4")
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNeighborUniformSampling4")
 .set_body(CAPI_NeighborUniformSample<4>);
-DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling8")
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNeighborUniformSampling8")
 .set_body(CAPI_NeighborUniformSample<8>);
-DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling16")
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNeighborUniformSampling16")
 .set_body(CAPI_NeighborUniformSample<16>);
-DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling32")
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNeighborUniformSampling32")
 .set_body(CAPI_NeighborUniformSample<32>);
-DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling64")
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNeighborUniformSampling64")
 .set_body(CAPI_NeighborUniformSample<64>);
-DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphUniformSampling128")
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphNeighborUniformSampling128")
 .set_body(CAPI_NeighborUniformSample<128>);
 
+template<int num_seeds>
+void CAPI_LayerUniformSample(DGLArgs args, DGLRetValue* rv) {
+  GraphHandle ghandle = args[0];
+  std::vector<IdArray> seeds(num_seeds);
+  for (size_t i = 0; i < seeds.size(); i++)
+    seeds[i] = IdArray::FromDLPack(CreateTmpDLManagedTensor(args[i + 1]));
+  std::string neigh_type = args[num_seeds + 1];
+  const int num_hops = args[num_seeds + 2];
+  const int num_neighbors = args[num_seeds + 3];
+  const int num_valid_seeds = args[num_seeds + 4];
+  const GraphInterface *ptr = static_cast<const GraphInterface *>(ghandle);
+  const ImmutableGraph *gptr = dynamic_cast<const ImmutableGraph*>(ptr);
+  CHECK(gptr) << "sampling isn't implemented in mutable graph";
+  CHECK(num_valid_seeds <= num_seeds);
+  std::vector<SampledSubgraph> subgs(seeds.size());
+#pragma omp parallel for
+  for (int i = 0; i < num_valid_seeds; i++) {
+    subgs[i] = gptr->LayerUniformSample(seeds[i], neigh_type, num_hops, num_neighbors);
+  }
+  *rv = ConvertSubgraphToPackedFunc(subgs);
+}
+
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLayerUniformSampling")
+.set_body(CAPI_LayerUniformSample<1>);
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLayerUniformSampling2")
+.set_body(CAPI_LayerUniformSample<2>);
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLayerUniformSampling4")
+.set_body(CAPI_LayerUniformSample<4>);
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLayerUniformSampling8")
+.set_body(CAPI_LayerUniformSample<8>);
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLayerUniformSampling16")
+.set_body(CAPI_LayerUniformSample<16>);
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLayerUniformSampling32")
+.set_body(CAPI_LayerUniformSample<32>);
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLayerUniformSampling64")
+.set_body(CAPI_LayerUniformSample<64>);
+DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLayerUniformSampling128")
+.set_body(CAPI_LayerUniformSample<128>);
+
 DGL_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphGetAdj")
 .set_body([] (DGLArgs args, DGLRetValue* rv) {
     GraphHandle ghandle = args[0];

From 358f7462b71ba974a0f2339c9c719854a91c8ace Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Mon, 21 Jan 2019 12:07:50 +0800
Subject: [PATCH 06/10] test layer sampler with sse

---
 examples/mxnet/sse/sse_batch.py         | 36 ++++++++++++++++++-------
 python/dgl/contrib/sampling/__init__.py |  1 +
 python/dgl/contrib/sampling/sampler.py  |  8 +++---
 3 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/examples/mxnet/sse/sse_batch.py b/examples/mxnet/sse/sse_batch.py
index 808800ec2389..f964e9cdd8fe 100644
--- a/examples/mxnet/sse/sse_batch.py
+++ b/examples/mxnet/sse/sse_batch.py
@@ -266,9 +266,16 @@ def main(args, data):
         neigh_expand = args.neigh_expand
     # initialize graph
     dur = []
-    sampler = dgl.contrib.sampling.NeighborSampler(g, args.batch_size, neigh_expand,
-            neighbor_type='in', num_workers=args.num_parallel_subgraphs, seed_nodes=train_vs,
-            shuffle=True, return_seed_id=True)
+    if args.sampler == 'neighbor':
+        sampler = dgl.contrib.sampling.NeighborSampler(g, args.batch_size, neigh_expand,
+                neighbor_type='in', num_workers=args.num_parallel_subgraphs, seed_nodes=train_vs,
+                shuffle=True, return_seed_id=True)
+    elif args.sampler == 'layer':
+        sampler = dgl.contrib.sampling.LayerSampler(g, args.batch_size, neigh_expand,
+                neighbor_type='in', num_workers=args.num_parallel_subgraphs, seed_nodes=train_vs,
+                shuffle=True, return_seed_id=True)
+    else:
+        raise RuntimeError("Unsupported sampler!")
     if args.cache_subgraph:
         sampler = CachedSubgraphLoader(sampler, shuffle=True)
     for epoch in range(args.n_epochs):
@@ -313,11 +320,20 @@ def main(args, data):
         if args.cache_subgraph:
             sampler.restart()
         else:
-            sampler = dgl.contrib.sampling.NeighborSampler(g, args.batch_size, neigh_expand,
-                                                           neighbor_type='in',
-                                                           num_workers=args.num_parallel_subgraphs,
-                                                           seed_nodes=train_vs, shuffle=True,
-                                                           return_seed_id=True)
+            if args.sampler == 'neighbor':
+                sampler = dgl.contrib.sampling.NeighborSampler(g, args.batch_size, neigh_expand,
+                                                               neighbor_type='in',
+                                                               num_workers=args.num_parallel_subgraphs,
+                                                               seed_nodes=train_vs, shuffle=True,
+                                                               return_seed_id=True)
+            elif args.sampler == 'layer':
+                sampler = dgl.contrib.sampling.LayerSampler(g, args.batch_size, neigh_expand,
+                                                               neighbor_type='in',
+                                                               num_workers=args.num_parallel_subgraphs,
+                                                               seed_nodes=train_vs, shuffle=True,
+                                                               return_seed_id=True)
+            else:
+                raise RuntimeError("Unsupported sampler!")
 
         # test set accuracy
         logits = model_infer(g, eval_vs)
@@ -368,7 +384,7 @@ def __init__(self, csr, num_feats):
         self.train_mask = None
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='GCN')
+    parser = argparse.ArgumentParser(description='SSE')
     register_data_args(parser)
     parser.add_argument("--graph-file", type=str, default="",
             help="graph file")
@@ -400,6 +416,8 @@ def __init__(self, csr, num_feats):
             help="the number of subgraphs to construct in parallel.")
     parser.add_argument("--neigh-expand", type=int, default=16,
             help="the number of neighbors to sample.")
+    parser.add_argument("--sampler", type=str, default="neighbor",
+            help="neighbor/layer sampler")
     args = parser.parse_args()
     print("cache: " + str(args.cache_subgraph))
 
diff --git a/python/dgl/contrib/sampling/__init__.py b/python/dgl/contrib/sampling/__init__.py
index 1999bf619390..18ad3f0031be 100644
--- a/python/dgl/contrib/sampling/__init__.py
+++ b/python/dgl/contrib/sampling/__init__.py
@@ -1 +1,2 @@
 from .sampler import NeighborSampler
+from .sampler import LayerSampler
diff --git a/python/dgl/contrib/sampling/sampler.py b/python/dgl/contrib/sampling/sampler.py
index 62296cf1d83a..dc93d5710361 100644
--- a/python/dgl/contrib/sampling/sampler.py
+++ b/python/dgl/contrib/sampling/sampler.py
@@ -319,10 +319,10 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
     else:
         return _PrefetchingLoader(loader, num_prefetch=num_workers*2)
 
-def NeighborSampler(g, batch_size, layer_size, n_layers=1,
-                    neighbor_type='in', node_prob=None, seed_nodes=None,
-                    shuffle=False, num_workers=1,
-                    return_seed_id=False, prefetch=False):
+def LayerSampler(g, batch_size, layer_size, n_layers=1,
+                 neighbor_type='in', node_prob=None, seed_nodes=None,
+                 shuffle=False, num_workers=1,
+                 return_seed_id=False, prefetch=False):
     '''Create a sampler that samples neighborhood.
 
     This creates a subgraph data loader that samples subgraphs from the input graph

From 49ea85700cfaa9d36c8deef337765ca70878285f Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Mon, 21 Jan 2019 15:24:17 +0800
Subject: [PATCH 07/10] test with multi-layer gcn

---
 examples/pytorch/gcn/gcn_ls.py | 281 +++++++++++++++++++++++++++++++++
 1 file changed, 281 insertions(+)
 create mode 100644 examples/pytorch/gcn/gcn_ls.py

diff --git a/examples/pytorch/gcn/gcn_ls.py b/examples/pytorch/gcn/gcn_ls.py
new file mode 100644
index 000000000000..760f33176b78
--- /dev/null
+++ b/examples/pytorch/gcn/gcn_ls.py
@@ -0,0 +1,281 @@
+'''
+Adapted from Ziyue's code.
+'''
+
+
+import argparse, time, math
+import numpy as np
+import mxnet as mx
+from mxnet import gluon
+import dgl
+from dgl import DGLGraph
+import dgl.function as fn
+from dgl.data import register_data_args, load_data
+import scipy as sp
+from dgl import utils
+
+
+def generate_rand_graph(n):
+    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
+    return dgl.DGLGraph(arr, readonly=True)
+
+
+def sample_subgraph(g, seed_nodes, n_layers, layer_size):
+    induced_nodes = []
+    seeds = seed_nodes
+    parent_uv_edges_per_hop = []
+    for _ in range(n_layers):
+        for subg, aux in dgl.contrib.sampling.LayerSampler(g, 1000000, layer_size,
+                                                           neighbor_type='in',
+                                                           seed_nodes=np.array(seeds),
+                                                           return_seed_id=True):
+            #seed_ids = aux['seeds']
+            #print(g.in_edges(seeds, form='all'))
+            subg_src, subg_dst = subg.edges()
+            parent_nid = subg.parent_nid
+            src = parent_nid[subg_src]
+            dst = parent_nid[subg_dst]
+            parent_uv_edges_per_hop.append((src.asnumpy(), dst.asnumpy()))
+            #print((src, dst))
+            seeds = list(np.unique(src.asnumpy()))
+            induced_nodes.extend(list(parent_nid.asnumpy()))
+
+    subgraph = g.subgraph(list(np.unique(np.array(induced_nodes))))
+    #print(subgraph.parent_nid)
+    #print(parent_uv_edges_per_hop)
+    subg_uv_edges_per_hop = [(subgraph.map_to_subgraph_nid(src).asnumpy(),
+                              subgraph.map_to_subgraph_nid(dst).asnumpy())
+                             for src, dst in parent_uv_edges_per_hop]
+    #print(subg_uv_edges_per_hop)
+    return subgraph, subg_uv_edges_per_hop
+
+
+def test_sample():
+    g = generate_rand_graph(100)
+    n_layers = 3
+    seeds = [10, 20]
+    layer_size = 2
+    subgraph, subg_uv_edges_per_hop = sample_subgraph(g, seeds, n_layers, layer_size)
+
+
+class GCNLayer(gluon.Block):
+    def __init__(self,
+                 in_feats,
+                 out_feats,
+                 activation,
+                 dropout,
+                 bias=True):
+        super(GCNLayer, self).__init__()
+        with self.name_scope():
+            stdv = 1. / math.sqrt(out_feats)
+            self.weight = self.params.get('weight', shape=(in_feats, out_feats),
+                    init=mx.init.Uniform(stdv))
+            if bias:
+                self.bias = self.params.get('bias', shape=(out_feats,),
+                    init=mx.init.Uniform(stdv))
+            else:
+                self.bias = None
+        self.activation = activation
+        self.dropout = dropout
+
+    def forward(self, h, subg, subg_edges):
+        if self.dropout:
+            h = mx.nd.Dropout(h, p=self.dropout)
+        h = mx.nd.dot(h, self.weight.data(h.context))
+        # TODO: the normalization term need to be tuned.
+        # temporarilly normalized by sqrt(layer_size).
+        # for an unbiased estimator, it should be normalized by n(v)/D(v),
+        # but might be bad in practice
+        h = h / math.sqrt(3.)
+        subg.ndata['h'] = h
+        if subg_edges is not None:
+            subg.send_and_recv(subg_edges, fn.copy_src(src='h', out='m'),
+                               fn.sum(msg='m', out='h'))
+        else:
+            subg.update_all(fn.copy_src(src='h', out='m'),
+                            fn.sum(msg='m', out='h'))
+        h = subg.ndata.pop('h')
+        # same as TODO above
+        h = h / math.sqrt(3.)
+        # bias
+        if self.bias is not None:
+            h = h + self.bias.data(h.context)
+        if self.activation:
+            h = self.activation(h)
+        return h
+
+
+class GCN(gluon.Block):
+    def __init__(self,
+                 in_feats,
+                 n_hidden,
+                 n_classes,
+                 n_layers,
+                 activation,
+                 dropout,
+                 normalization):
+        super(GCN, self).__init__()
+        self.n_layers = n_layers
+        self.layers = gluon.nn.Sequential()
+        # input layer
+        self.layers.add(GCNLayer(in_feats, n_hidden, activation, 0.))
+        # hidden layers
+        for i in range(n_layers - 1):
+            self.layers.add(GCNLayer(n_hidden, n_hidden, activation, dropout))
+        # output layer
+        self.layers.add(GCNLayer(n_hidden, n_classes, None, dropout))
+
+
+    def forward(self, subg, subg_edges_per_hop):
+        h = subg.ndata['in']
+
+        for i, layer in enumerate(self.layers):
+            h = layer(h, subg, subg_edges_per_hop[self.n_layers-i])
+
+        return h
+
+
+def evaluate(model, g, n_layers, labels, mask):
+    pred = model(g, [None for i in range(n_layers)]).argmax(axis=1)
+    accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
+    return accuracy.asscalar()
+
+
+def main(args):
+    # load and preprocess dataset
+    data = load_data(args)
+
+    if args.self_loop:
+        data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])
+
+    features = mx.nd.array(data.features)
+    labels = mx.nd.array(data.labels)
+    train_mask = mx.nd.array(data.train_mask)
+    val_mask = mx.nd.array(data.val_mask)
+    test_mask = mx.nd.array(data.test_mask)
+    in_feats = features.shape[1]
+    n_classes = data.num_labels
+    n_nodes = data.graph.number_of_nodes()
+    n_edges = data.graph.number_of_edges()
+    print("""----Data statistics------'
+      #Nodes %d
+      #Edges %d
+      #Classes %d
+      #Train samples %d
+      #Val samples %d
+      #Test samples %d""" %
+          (n_nodes, n_edges, n_classes,
+              train_mask.sum().asscalar(),
+              val_mask.sum().asscalar(),
+              test_mask.sum().asscalar()))
+
+    if args.gpu < 0:
+        cuda = False
+        ctx = mx.cpu(0)
+    else:
+        cuda = True
+        ctx = mx.gpu(args.gpu)
+
+    features = features.as_in_context(ctx)
+    labels = labels.as_in_context(ctx)
+    train_mask = train_mask.as_in_context(ctx)
+    val_mask = val_mask.as_in_context(ctx)
+    test_mask = test_mask.as_in_context(ctx)
+
+    layer_size = args.layer_size
+    # create GCN model
+    g = DGLGraph(data.graph, readonly=True)
+    # normalization
+    degs = g.in_degrees().astype('float32')
+    degs[degs > layer_size] = layer_size
+    norm = mx.nd.power(degs, -0.5)
+    if cuda:
+        norm = norm.as_in_context(ctx)
+    g.ndata['norm'] = mx.nd.expand_dims(norm, 1)
+    g.ndata['in'] = features
+
+    num_data = len(train_mask.asnumpy())
+    full_idx = np.arange(0, num_data)
+    train_idx = full_idx[train_mask.asnumpy() == 1]
+
+    seed_nodes = list(train_idx)
+    n_layers = args.n_layers + 1
+
+    model = GCN(in_feats,
+                args.n_hidden,
+                n_classes,
+                args.n_layers,
+                mx.nd.relu,
+                args.dropout,
+                args.normalization)
+    model.initialize(ctx=ctx)
+    n_train_samples = train_mask.sum().asscalar()
+    loss_fcn = gluon.loss.SoftmaxCELoss()
+
+    # use optimizer
+    print(model.collect_params())
+    trainer = gluon.Trainer(model.collect_params(), 'adam',
+            {'learning_rate': args.lr, 'wd': args.weight_decay})
+
+    # initialize graph
+    dur = []
+    for epoch in range(args.n_epochs):
+        if epoch >= 3:
+            t0 = time.time()
+
+        subg, subg_edges_per_hop = sample_subgraph(g, seed_nodes, n_layers, layer_size)
+        subg_train_mask = subg.map_to_subgraph_nid(train_idx)
+        subg.copy_from_parent()
+
+        # forward
+        smask = np.zeros((len(subg.parent_nid),))
+        smask[subg_train_mask.asnumpy()] = 1
+
+        with mx.autograd.record():
+            pred = model(subg, subg_edges_per_hop)
+            loss = loss_fcn(pred, labels[subg.parent_nid], mx.nd.expand_dims(mx.nd.array(smask), 1))
+            loss = loss.sum() / n_train_samples
+
+        print(loss.asnumpy())
+        loss.backward()
+        trainer.step(batch_size=1)
+
+        if epoch >= 3:
+            dur.append(time.time() - t0)
+            acc = evaluate(model, g, n_layers, labels, val_mask)
+            print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
+                  "ETputs(KTEPS) {:.2f}". format(
+                epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
+
+    acc = evaluate(model, g, n_layers, labels, test_mask)
+    print("Test accuracy {:.2%}".format(acc))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='GCN')
+    register_data_args(parser)
+    parser.add_argument("--dropout", type=float, default=0.5,
+            help="dropout probability")
+    parser.add_argument("--gpu", type=int, default=-1,
+            help="gpu")
+    parser.add_argument("--lr", type=float, default=3e-2,
+            help="learning rate")
+    parser.add_argument("--n-epochs", type=int, default=200,
+            help="number of training epochs")
+    parser.add_argument("--n-hidden", type=int, default=16,
+            help="number of hidden gcn units")
+    parser.add_argument("--n-layers", type=int, default=1,
+            help="number of hidden gcn layers")
+    parser.add_argument("--layer-size", type=int, default=128,
+            help="number of neighbors to be sampled")
+    parser.add_argument("--normalization",
+            choices=['sym','left'], default=None,
+            help="graph normalization types (default=None)")
+    parser.add_argument("--self-loop", action='store_true',
+            help="graph self-loop (default=False)")
+    parser.add_argument("--weight-decay", type=float, default=5e-4,
+            help="Weight for L2 loss")
+    args = parser.parse_args()
+
+    print(args)
+
+    main(args)

From 9a81b6bfc1221b6a167ce05d8bef767496264499 Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Mon, 21 Jan 2019 15:38:07 +0800
Subject: [PATCH 08/10] lint

---
 python/dgl/graph_index.py    |  6 ++++--
 src/graph/graph.cc           | 22 ++++++++++++++--------
 src/graph/immutable_graph.cc | 19 ++++++++++++-------
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py
index 5516a510b4f1..903468c0ae92 100644
--- a/python/dgl/graph_index.py
+++ b/python/dgl/graph_index.py
@@ -672,7 +672,8 @@ def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, no
             rst = _uniform_neighbor_sampling(self, seed_ids, neighbor_type, num_hops,
                                              expand_factor)
         else:
-            rst = _nonuniform_neighbor_sampling(self, node_prob, seed_ids, neighbor_type, num_hops, expand_factor)
+            rst = _nonuniform_neighbor_sampling(self, node_prob, seed_ids, neighbor_type,
+                                                num_hops, expand_factor)
 
         return [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)),
                               utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
@@ -687,7 +688,8 @@ def layer_sampling(self, seed_ids, layer_size, n_layers, layer_type, node_prob):
         if node_prob is None:
             rst = _uniform_layer_sampling(self, seed_ids, layer_type, n_layers, layer_size)
         else:
-            rst = _nonuniform_layer_sampling(self, node_prob, seed_ids, layer_type, n_layers, layer_size)
+            rst = _nonuniform_layer_sampling(self, node_prob, seed_ids, layer_type,
+                                             n_layers, layer_size)
 
         return [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)),
                               utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
diff --git a/src/graph/graph.cc b/src/graph/graph.cc
index 3928f505fc72..4865895400bc 100644
--- a/src/graph/graph.cc
+++ b/src/graph/graph.cc
@@ -20,8 +20,10 @@ Graph::Graph(IdArray src_ids, IdArray dst_ids, IdArray edge_ids, size_t num_node
   CHECK(IsValidIdArray(edge_ids));
   this->AddVertices(num_nodes);
   num_edges_ = src_ids->shape[0];
-  CHECK(static_cast<int64_t>(num_edges_) == dst_ids->shape[0]) << "vectors in COO must have the same length";
-  CHECK(static_cast<int64_t>(num_edges_) == edge_ids->shape[0]) << "vectors in COO must have the same length";
+  CHECK(static_cast<int64_t>(num_edges_) == dst_ids->shape[0])
+    << "vectors in COO must have the same length";
+  CHECK(static_cast<int64_t>(num_edges_) == edge_ids->shape[0])
+    << "vectors in COO must have the same length";
   const dgl_id_t *src_data = static_cast<dgl_id_t*>(src_ids->data);
   const dgl_id_t *dst_data = static_cast<dgl_id_t*>(dst_ids->data);
   const dgl_id_t *edge_data = static_cast<dgl_id_t*>(edge_ids->data);
@@ -507,7 +509,8 @@ std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const
   uint64_t num_edges = NumEdges();
   uint64_t num_nodes = NumVertices();
   if (fmt == "coo") {
-    IdArray idx = IdArray::Empty({2 * static_cast<int64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray idx = IdArray::Empty({2 * static_cast<int64_t>(num_edges)},
+                                 DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *idx_data = static_cast<int64_t*>(idx->data);
     if (transpose) {
       std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data);
@@ -516,17 +519,20 @@ std::vector<IdArray> Graph::GetAdj(bool transpose, const std::string &fmt) const
       std::copy(all_edges_dst_.begin(), all_edges_dst_.end(), idx_data);
       std::copy(all_edges_src_.begin(), all_edges_src_.end(), idx_data + num_edges);
     }
-    IdArray eid = IdArray::Empty({static_cast<int64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray eid = IdArray::Empty({static_cast<int64_t>(num_edges)},
+                                 DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *eid_data = static_cast<int64_t*>(eid->data);
     for (uint64_t eid = 0; eid < num_edges; ++eid) {
       eid_data[eid] = eid;
     }
     return std::vector<IdArray>{idx, eid};
   } else if (fmt == "csr") {
-    IdArray indptr = IdArray::Empty({static_cast<int64_t>(num_nodes) + 1}, DLDataType{kDLInt, 64, 1},
-                                    DLContext{kDLCPU, 0});
-    IdArray indices = IdArray::Empty({static_cast<int64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-    IdArray eid = IdArray::Empty({static_cast<int64_t>(num_edges)}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray indptr = IdArray::Empty({static_cast<int64_t>(num_nodes) + 1},
+                                    DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray indices = IdArray::Empty({static_cast<int64_t>(num_edges)},
+                                     DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+    IdArray eid = IdArray::Empty({static_cast<int64_t>(num_edges)},
+                                 DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
     int64_t *indptr_data = static_cast<int64_t*>(indptr->data);
     int64_t *indices_data = static_cast<int64_t*>(indices->data);
     int64_t *eid_data = static_cast<int64_t*>(eid->data);
diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc
index a3562a2f25aa..dd92cc2c9fe6 100644
--- a/src/graph/immutable_graph.cc
+++ b/src/graph/immutable_graph.cc
@@ -908,7 +908,8 @@ SampledSubgraph ImmutableGraph::NeighborSample(IdArray seed_arr,
   subg.layer_ids = IdArray::Empty({static_cast<int64_t>(num_vertices)},
                                   DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
   subg.sample_prob = runtime::NDArray::Empty({static_cast<int64_t>(num_vertices)},
-                                             DLDataType{kDLFloat, 8 * sizeof(float), 1}, DLContext{kDLCPU, 0});
+                                             DLDataType{kDLFloat, 8 * sizeof(float), 1},
+                                             DLContext{kDLCPU, 0});
 
   dgl_id_t *out = static_cast<dgl_id_t *>(subg.induced_vertices->data);
   dgl_id_t *out_layer = static_cast<dgl_id_t *>(subg.layer_ids->data);
@@ -1074,8 +1075,8 @@ SampledSubgraph ImmutableGraph::LayerSample(IdArray seed_array,
     }
   }
 
-  long n_nodes = nodes.size();
-  long n_edges = new_eids.size();
+  int64_t n_nodes = nodes.size();
+  int64_t n_edges = new_eids.size();
   auto subg_csr = std::make_shared<CSR>(n_nodes, n_edges);
   subg_csr->indptr.resize(n_nodes + 1);
   subg_csr->indices.resize(n_edges);
@@ -1092,10 +1093,14 @@ SampledSubgraph ImmutableGraph::LayerSample(IdArray seed_array,
   subg.layer_ids = IdArray::Empty({n_nodes},
                                   DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
   subg.sample_prob = runtime::NDArray::Empty({n_nodes},
-                                             DLDataType{kDLFloat, 8 * sizeof(float), 1}, DLContext{kDLCPU, 0});
-  std::copy(nodes.begin(), nodes.end(), static_cast<dgl_id_t*>(subg.induced_vertices->data));
-  std::copy(layer_ids.begin(), layer_ids.end(), static_cast<dgl_id_t*>(subg.layer_ids->data));
-  std::copy(probabilities.begin(), probabilities.end(), static_cast<float*>(subg.sample_prob->data));
+                                             DLDataType{kDLFloat, 8 * sizeof(float), 1},
+                                             DLContext{kDLCPU, 0});
+  std::copy(nodes.begin(), nodes.end(),
+            static_cast<dgl_id_t*>(subg.induced_vertices->data));
+  std::copy(layer_ids.begin(), layer_ids.end(),
+            static_cast<dgl_id_t*>(subg.layer_ids->data));
+  std::copy(probabilities.begin(), probabilities.end(),
+            static_cast<float*>(subg.sample_prob->data));
 
   if (neigh_type == "in")
     subg.graph = GraphPtr(new ImmutableGraph(subg_csr, nullptr, IsMultigraph()));

From 707af9ba84c9fbcada90c15233b324b810d2d410 Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Mon, 28 Jan 2019 16:26:38 +0800
Subject: [PATCH 09/10] convergence issue in layer-wise sampling

---
 examples/mxnet/gcn/gcn_ls.py           | 190 +++++++++++++++++
 examples/pytorch/gcn/gcn_ls.py         | 281 -------------------------
 include/dgl/immutable_graph.h          |   4 +
 python/dgl/contrib/sampling/sampler.py |  20 +-
 python/dgl/graph_index.py              |  13 +-
 src/graph/graph_apis.cc                |   1 +
 src/graph/immutable_graph.cc           | 138 ++++++------
 7 files changed, 281 insertions(+), 366 deletions(-)
 create mode 100644 examples/mxnet/gcn/gcn_ls.py
 delete mode 100644 examples/pytorch/gcn/gcn_ls.py

diff --git a/examples/mxnet/gcn/gcn_ls.py b/examples/mxnet/gcn/gcn_ls.py
new file mode 100644
index 000000000000..a63183f9e184
--- /dev/null
+++ b/examples/mxnet/gcn/gcn_ls.py
@@ -0,0 +1,190 @@
+import argparse, time
+import numpy as np
+import mxnet as mx
+import mxnet.ndarray as nd
+from mxnet import gluon
+import dgl
+from dgl import DGLGraph
+from dgl.contrib.sampling import LayerSampler
+from dgl.data import register_data_args, load_data
+import dgl.function as fn
+from dgl.subgraph import DGLSubGraph
+
+class GCNLayer(gluon.Block):
+    def __init__(self, in_feats, out_feats, activation, dropout=0):
+        super(GCNLayer, self).__init__()
+        self.dropout = dropout
+        with self.name_scope():
+            self.dense = mx.gluon.nn.Dense(out_feats, activation)
+
+    def forward(self, sub_g, src, dst):
+        if self.dropout > 0:
+            sub_g.apply_nodes(lambda nodes: {'h' : nd.Dropout(nodes.data['h'],
+                                                             p=self.dropout)})
+        # TODO normalization
+        if src is None:
+            sub_g.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h'))
+        else:
+            sub_g.send_and_recv((src, dst),
+                                fn.copy_src(src='h', out='m'),
+                                fn.sum(msg='m', out='h'))
+        sub_g.ndata['h'] = self.dense(sub_g.ndata['h'])
+
+class GCN(gluon.Block):
+    def __init__(self, in_feats, n_hidden, n_classes, n_layers,
+                 activation, dropout, normalization):
+        super(GCN, self).__init__()
+        self.n_layers = n_layers
+        self.layers = gluon.nn.Sequential()
+        # input layer
+        self.layers.add(GCNLayer(in_feats, n_hidden, activation, 0.))
+        # hidden layers
+        for i in range(n_layers - 1):
+            self.layers.add(GCNLayer(n_hidden, n_hidden, activation, dropout))
+        # output layer
+        self.dense = mx.gluon.nn.Dense(n_classes)
+
+    def forward(self, sub_g):
+        sub_g.ndata['h'] = sub_g.ndata['x']
+        if isinstance(sub_g, DGLSubGraph):
+            n = sub_g.number_of_nodes()
+            nid = np.arange(n)
+            src, dst, eid = sub_g.edge_ids(nid, nid)
+            src = src.asnumpy()
+            dst = dst.asnumpy()
+            eid = eid.asnumpy()
+            for i, layer in enumerate(self.layers):
+                mask = eid == i
+                src = src[mask]
+                dst = dst[mask]
+                h = sub_g.ndata['h']
+                sample_prob = sub_g.sample_prob.asnumpy()
+                p = np.expand_dims(np.where(np.isin(nid, src), sample_prob, np.ones(n)), axis=1)
+                sub_g.ndata['h'] = h * nd.array(p).as_in_context(h.context)
+                layer(sub_g, src, dst)
+        else:
+            for layer in self.layers:
+                layer(sub_g, None, None)
+        return self.dense(sub_g.pop_n_repr('h'))
+
+def evaluate(model, g):
+    y = g.ndata['y']
+    y_bar = nd.argmax(model(g), axis=1)
+    mask = g.ndata['val_mask']
+    accuracy = nd.sum(mask * (y == y_bar)) / nd.sum(mask)
+    return accuracy.asscalar()
+
+def main(args):
+    # load and preprocess dataset
+    data = load_data(args)
+    if args.self_loop:
+        data.graph.add_edges_from([(i, i) for i in range(len(data.graph))])
+    n_nodes = data.graph.number_of_nodes()
+    n_edges = data.graph.number_of_edges()
+    features = nd.array(data.features)
+    in_feats = features.shape[1]
+    labels = nd.array(data.labels)
+    n_classes = data.num_labels
+    train_mask = nd.array(data.train_mask)
+    val_mask = nd.array(data.val_mask)
+    test_mask = nd.array(data.test_mask)
+    print("""-----Data statistics-----
+      # Nodes %d
+      # Edges %d
+      # Features %d
+      # Classes %d
+      # Train samples %d
+      # Val samples %d
+      # Test samples %d""" % (n_nodes, n_edges, in_feats, n_classes,
+                              train_mask.sum().asscalar(),
+                              val_mask.sum().asscalar(),
+                              test_mask.sum().asscalar()))
+
+    train_nid = np.arange(n_nodes)[data.train_mask.astype(bool)].tolist()
+
+    ctx = mx.cpu(0) if args.gpu < 0 else mx.gpu(args.gpu)
+    features = features.as_in_context(ctx)
+    labels = labels.as_in_context(ctx)
+    train_mask = train_mask.as_in_context(ctx)
+    val_mask = val_mask.as_in_context(ctx)
+    test_mask = test_mask.as_in_context(ctx)
+
+    g = DGLGraph(data.graph, readonly=True)
+    g.ndata['x'] = features
+    g.ndata['y'] = labels
+    g.ndata['train_mask'] = train_mask
+    g.ndata['val_mask'] = val_mask
+    g.ndata['test_mask'] = test_mask
+    deg = g.in_degrees().astype('float32').as_in_context(ctx)
+    g.ndata['normalizer'] = nd.expand_dims(nd.power(deg, -0.5), 1)
+    assert not g.is_multigraph
+
+    model = GCN(in_feats, args.n_hidden, n_classes, args.n_layers,
+                'relu', args.dropout, args.normalization)
+    model.initialize(ctx=ctx)
+    print(model.collect_params())
+
+    trainer = gluon.Trainer(model.collect_params(), 'adam',
+                            {'learning_rate': args.lr, 'wd': args.weight_decay})
+
+    def sampler():
+        for x in LayerSampler(g, 1000000, args.layer_size, args.n_layers,
+                              neighbor_type='in', seed_nodes=train_nid,
+                              return_prob=True):
+            yield x
+
+    dur = []
+    for epoch in range(args.n_epochs):
+        t0 = time.time()
+
+        sub_g, _ = next(sampler())
+        sub_g.copy_from_parent()
+
+#       print(sub_g.number_of_nodes(), sub_g.number_of_edges())
+
+        with mx.autograd.record():
+            y = sub_g.ndata['y']
+            y_bar = model(sub_g)
+            loss = nd.mean(gluon.loss.SoftmaxCELoss()(y_bar, y))
+
+        loss.backward()
+        trainer.step(batch_size=1)
+
+        dur.append(time.time() - t0)
+        acc = evaluate(model, g)
+        print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
+              "ETputs(KTEPS) {:.2f}".format(
+              epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
+
+    acc = evaluate(model, g)
+    print("Test accuracy {:.2%}".format(acc))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='GCN')
+    register_data_args(parser)
+    parser.add_argument("--dropout", type=float, default=0.5,
+            help="dropout probability")
+    parser.add_argument("--gpu", type=int, default=-1,
+            help="gpu")
+    parser.add_argument("--lr", type=float, default=3e-2,
+            help="learning rate")
+    parser.add_argument("--n-epochs", type=int, default=200,
+            help="number of training epochs")
+    parser.add_argument("--n-hidden", type=int, default=16,
+            help="number of hidden gcn units")
+    parser.add_argument("--n-layers", type=int, default=1,
+            help="number of hidden gcn layers")
+    parser.add_argument("--layer-size", type=int, default=128,
+            help="number of neighbors to be sampled")
+    parser.add_argument("--normalization",
+            choices=['sym'], default=None,
+            help="graph normalization types (default=None)")
+    parser.add_argument("--self-loop", action='store_true',
+            help="graph self-loop (default=False)")
+    parser.add_argument("--weight-decay", type=float, default=5e-4,
+            help="Weight for L2 loss")
+    args = parser.parse_args()
+
+    print(args)
+
+    main(args)
diff --git a/examples/pytorch/gcn/gcn_ls.py b/examples/pytorch/gcn/gcn_ls.py
deleted file mode 100644
index 760f33176b78..000000000000
--- a/examples/pytorch/gcn/gcn_ls.py
+++ /dev/null
@@ -1,281 +0,0 @@
-'''
-Adapted from Ziyue's code.
-'''
-
-
-import argparse, time, math
-import numpy as np
-import mxnet as mx
-from mxnet import gluon
-import dgl
-from dgl import DGLGraph
-import dgl.function as fn
-from dgl.data import register_data_args, load_data
-import scipy as sp
-from dgl import utils
-
-
-def generate_rand_graph(n):
-    arr = (sp.sparse.random(n, n, density=0.1, format='coo') != 0).astype(np.int64)
-    return dgl.DGLGraph(arr, readonly=True)
-
-
-def sample_subgraph(g, seed_nodes, n_layers, layer_size):
-    induced_nodes = []
-    seeds = seed_nodes
-    parent_uv_edges_per_hop = []
-    for _ in range(n_layers):
-        for subg, aux in dgl.contrib.sampling.LayerSampler(g, 1000000, layer_size,
-                                                           neighbor_type='in',
-                                                           seed_nodes=np.array(seeds),
-                                                           return_seed_id=True):
-            #seed_ids = aux['seeds']
-            #print(g.in_edges(seeds, form='all'))
-            subg_src, subg_dst = subg.edges()
-            parent_nid = subg.parent_nid
-            src = parent_nid[subg_src]
-            dst = parent_nid[subg_dst]
-            parent_uv_edges_per_hop.append((src.asnumpy(), dst.asnumpy()))
-            #print((src, dst))
-            seeds = list(np.unique(src.asnumpy()))
-            induced_nodes.extend(list(parent_nid.asnumpy()))
-
-    subgraph = g.subgraph(list(np.unique(np.array(induced_nodes))))
-    #print(subgraph.parent_nid)
-    #print(parent_uv_edges_per_hop)
-    subg_uv_edges_per_hop = [(subgraph.map_to_subgraph_nid(src).asnumpy(),
-                              subgraph.map_to_subgraph_nid(dst).asnumpy())
-                             for src, dst in parent_uv_edges_per_hop]
-    #print(subg_uv_edges_per_hop)
-    return subgraph, subg_uv_edges_per_hop
-
-
-def test_sample():
-    g = generate_rand_graph(100)
-    n_layers = 3
-    seeds = [10, 20]
-    layer_size = 2
-    subgraph, subg_uv_edges_per_hop = sample_subgraph(g, seeds, n_layers, layer_size)
-
-
-class GCNLayer(gluon.Block):
-    def __init__(self,
-                 in_feats,
-                 out_feats,
-                 activation,
-                 dropout,
-                 bias=True):
-        super(GCNLayer, self).__init__()
-        with self.name_scope():
-            stdv = 1. / math.sqrt(out_feats)
-            self.weight = self.params.get('weight', shape=(in_feats, out_feats),
-                    init=mx.init.Uniform(stdv))
-            if bias:
-                self.bias = self.params.get('bias', shape=(out_feats,),
-                    init=mx.init.Uniform(stdv))
-            else:
-                self.bias = None
-        self.activation = activation
-        self.dropout = dropout
-
-    def forward(self, h, subg, subg_edges):
-        if self.dropout:
-            h = mx.nd.Dropout(h, p=self.dropout)
-        h = mx.nd.dot(h, self.weight.data(h.context))
-        # TODO: the normalization term need to be tuned.
-        # temporarilly normalized by sqrt(layer_size).
-        # for an unbiased estimator, it should be normalized by n(v)/D(v),
-        # but might be bad in practice
-        h = h / math.sqrt(3.)
-        subg.ndata['h'] = h
-        if subg_edges is not None:
-            subg.send_and_recv(subg_edges, fn.copy_src(src='h', out='m'),
-                               fn.sum(msg='m', out='h'))
-        else:
-            subg.update_all(fn.copy_src(src='h', out='m'),
-                            fn.sum(msg='m', out='h'))
-        h = subg.ndata.pop('h')
-        # same as TODO above
-        h = h / math.sqrt(3.)
-        # bias
-        if self.bias is not None:
-            h = h + self.bias.data(h.context)
-        if self.activation:
-            h = self.activation(h)
-        return h
-
-
-class GCN(gluon.Block):
-    def __init__(self,
-                 in_feats,
-                 n_hidden,
-                 n_classes,
-                 n_layers,
-                 activation,
-                 dropout,
-                 normalization):
-        super(GCN, self).__init__()
-        self.n_layers = n_layers
-        self.layers = gluon.nn.Sequential()
-        # input layer
-        self.layers.add(GCNLayer(in_feats, n_hidden, activation, 0.))
-        # hidden layers
-        for i in range(n_layers - 1):
-            self.layers.add(GCNLayer(n_hidden, n_hidden, activation, dropout))
-        # output layer
-        self.layers.add(GCNLayer(n_hidden, n_classes, None, dropout))
-
-
-    def forward(self, subg, subg_edges_per_hop):
-        h = subg.ndata['in']
-
-        for i, layer in enumerate(self.layers):
-            h = layer(h, subg, subg_edges_per_hop[self.n_layers-i])
-
-        return h
-
-
-def evaluate(model, g, n_layers, labels, mask):
-    pred = model(g, [None for i in range(n_layers)]).argmax(axis=1)
-    accuracy = ((pred == labels) * mask).sum() / mask.sum().asscalar()
-    return accuracy.asscalar()
-
-
-def main(args):
-    # load and preprocess dataset
-    data = load_data(args)
-
-    if args.self_loop:
-        data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])
-
-    features = mx.nd.array(data.features)
-    labels = mx.nd.array(data.labels)
-    train_mask = mx.nd.array(data.train_mask)
-    val_mask = mx.nd.array(data.val_mask)
-    test_mask = mx.nd.array(data.test_mask)
-    in_feats = features.shape[1]
-    n_classes = data.num_labels
-    n_nodes = data.graph.number_of_nodes()
-    n_edges = data.graph.number_of_edges()
-    print("""----Data statistics------'
-      #Nodes %d
-      #Edges %d
-      #Classes %d
-      #Train samples %d
-      #Val samples %d
-      #Test samples %d""" %
-          (n_nodes, n_edges, n_classes,
-              train_mask.sum().asscalar(),
-              val_mask.sum().asscalar(),
-              test_mask.sum().asscalar()))
-
-    if args.gpu < 0:
-        cuda = False
-        ctx = mx.cpu(0)
-    else:
-        cuda = True
-        ctx = mx.gpu(args.gpu)
-
-    features = features.as_in_context(ctx)
-    labels = labels.as_in_context(ctx)
-    train_mask = train_mask.as_in_context(ctx)
-    val_mask = val_mask.as_in_context(ctx)
-    test_mask = test_mask.as_in_context(ctx)
-
-    layer_size = args.layer_size
-    # create GCN model
-    g = DGLGraph(data.graph, readonly=True)
-    # normalization
-    degs = g.in_degrees().astype('float32')
-    degs[degs > layer_size] = layer_size
-    norm = mx.nd.power(degs, -0.5)
-    if cuda:
-        norm = norm.as_in_context(ctx)
-    g.ndata['norm'] = mx.nd.expand_dims(norm, 1)
-    g.ndata['in'] = features
-
-    num_data = len(train_mask.asnumpy())
-    full_idx = np.arange(0, num_data)
-    train_idx = full_idx[train_mask.asnumpy() == 1]
-
-    seed_nodes = list(train_idx)
-    n_layers = args.n_layers + 1
-
-    model = GCN(in_feats,
-                args.n_hidden,
-                n_classes,
-                args.n_layers,
-                mx.nd.relu,
-                args.dropout,
-                args.normalization)
-    model.initialize(ctx=ctx)
-    n_train_samples = train_mask.sum().asscalar()
-    loss_fcn = gluon.loss.SoftmaxCELoss()
-
-    # use optimizer
-    print(model.collect_params())
-    trainer = gluon.Trainer(model.collect_params(), 'adam',
-            {'learning_rate': args.lr, 'wd': args.weight_decay})
-
-    # initialize graph
-    dur = []
-    for epoch in range(args.n_epochs):
-        if epoch >= 3:
-            t0 = time.time()
-
-        subg, subg_edges_per_hop = sample_subgraph(g, seed_nodes, n_layers, layer_size)
-        subg_train_mask = subg.map_to_subgraph_nid(train_idx)
-        subg.copy_from_parent()
-
-        # forward
-        smask = np.zeros((len(subg.parent_nid),))
-        smask[subg_train_mask.asnumpy()] = 1
-
-        with mx.autograd.record():
-            pred = model(subg, subg_edges_per_hop)
-            loss = loss_fcn(pred, labels[subg.parent_nid], mx.nd.expand_dims(mx.nd.array(smask), 1))
-            loss = loss.sum() / n_train_samples
-
-        print(loss.asnumpy())
-        loss.backward()
-        trainer.step(batch_size=1)
-
-        if epoch >= 3:
-            dur.append(time.time() - t0)
-            acc = evaluate(model, g, n_layers, labels, val_mask)
-            print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
-                  "ETputs(KTEPS) {:.2f}". format(
-                epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
-
-    acc = evaluate(model, g, n_layers, labels, test_mask)
-    print("Test accuracy {:.2%}".format(acc))
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='GCN')
-    register_data_args(parser)
-    parser.add_argument("--dropout", type=float, default=0.5,
-            help="dropout probability")
-    parser.add_argument("--gpu", type=int, default=-1,
-            help="gpu")
-    parser.add_argument("--lr", type=float, default=3e-2,
-            help="learning rate")
-    parser.add_argument("--n-epochs", type=int, default=200,
-            help="number of training epochs")
-    parser.add_argument("--n-hidden", type=int, default=16,
-            help="number of hidden gcn units")
-    parser.add_argument("--n-layers", type=int, default=1,
-            help="number of hidden gcn layers")
-    parser.add_argument("--layer-size", type=int, default=128,
-            help="number of neighbors to be sampled")
-    parser.add_argument("--normalization",
-            choices=['sym','left'], default=None,
-            help="graph normalization types (default=None)")
-    parser.add_argument("--self-loop", action='store_true',
-            help="graph self-loop (default=False)")
-    parser.add_argument("--weight-decay", type=float, default=5e-4,
-            help="Weight for L2 loss")
-    args = parser.parse_args()
-
-    print(args)
-
-    main(args)
diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h
index 4693936d1ded..5a5b55a4324b 100644
--- a/include/dgl/immutable_graph.h
+++ b/include/dgl/immutable_graph.h
@@ -14,6 +14,10 @@
 #include "runtime/ndarray.h"
 #include "graph_interface.h"
 
+// TODO remove
+#include <stdio.h>
+#include <iostream>
+
 namespace dgl {
 
 /*!
diff --git a/python/dgl/contrib/sampling/sampler.py b/python/dgl/contrib/sampling/sampler.py
index dc93d5710361..09abf6f381ee 100644
--- a/python/dgl/contrib/sampling/sampler.py
+++ b/python/dgl/contrib/sampling/sampler.py
@@ -82,7 +82,7 @@ def __next__(self):
 class LSSubgraphLoader(object):
     def __init__(self, g, batch_size, layer_size, n_layers=1,
                  neighbor_type='in', node_prob=None, seed_nodes=None,
-                 shuffle=False, num_workers=1, return_seed_id=False):
+                 shuffle=False, num_workers=1, return_seed_id=False, return_prob=False):
         self._g = g
         if not g._graph.is_readonly():
             raise NotImplementedError("subgraph loader only support read-only graphs.")
@@ -91,6 +91,7 @@ def __init__(self, g, batch_size, layer_size, n_layers=1,
         self._n_layers = n_layers
         self._node_prob = node_prob
         self._return_seed_id = return_seed_id
+        self._return_prob = return_prob
         if self._node_prob is not None:
             assert self._node_prob.shape[0] == g.number_of_nodes(), \
                     "We need to know the sampling probability of every node"
@@ -117,11 +118,14 @@ def _prefetch(self):
             end = min((self._subgraph_idx + 1) * self._batch_size, num_nodes)
             seed_ids.append(utils.toindex(self._seed_nodes[start:end]))
             self._subgraph_idx += 1
-        sgi = self._g._graph.neighbor_sampling(seed_ids, self._layer_size,
-                                               self._n_layers, self._neighbor_type,
-                                               self._node_prob)
-        subgraphs = [DGLSubGraph(self._g, i.induced_nodes, i.induced_edges, \
-                i) for i in sgi]
+        sgi = self._g._graph.layer_sampling(seed_ids, self._layer_size,
+                                            self._n_layers, self._neighbor_type,
+                                            self._node_prob, self._return_prob)
+        subgraphs = [DGLSubGraph(self._g, i.induced_nodes, i.induced_edges, i) for i in sgi]
+        if self._return_prob:
+            for sg, i in zip(subgraphs, sgi):
+                setattr(sg, 'layer_ids', i.layer_ids)
+                setattr(sg, 'sample_prob', i.sample_prob)
         self._subgraphs.extend(subgraphs)
         if self._return_seed_id:
             self._seed_ids.extend(seed_ids)
@@ -322,7 +326,7 @@ def NeighborSampler(g, batch_size, expand_factor, num_hops=1,
 def LayerSampler(g, batch_size, layer_size, n_layers=1,
                  neighbor_type='in', node_prob=None, seed_nodes=None,
                  shuffle=False, num_workers=1,
-                 return_seed_id=False, prefetch=False):
+                 return_seed_id=False, prefetch=False, return_prob=False):
     '''Create a sampler that samples neighborhood.
 
     This creates a subgraph data loader that samples subgraphs from the input graph
@@ -376,7 +380,7 @@ def LayerSampler(g, batch_size, layer_size, n_layers=1,
         information about the subgraphs.
     '''
     loader = LSSubgraphLoader(g, batch_size, layer_size, n_layers, neighbor_type, node_prob,
-                              seed_nodes, shuffle, num_workers, return_seed_id)
+                              seed_nodes, shuffle, num_workers, return_seed_id, return_prob)
     if not prefetch:
         return loader
     else:
diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py
index 903468c0ae92..8d56a44d8f55 100644
--- a/python/dgl/graph_index.py
+++ b/python/dgl/graph_index.py
@@ -678,7 +678,7 @@ def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, no
         return [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)),
                               utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
 
-    def layer_sampling(self, seed_ids, layer_size, n_layers, layer_type, node_prob):
+    def layer_sampling(self, seed_ids, layer_size, n_layers, layer_type, node_prob, return_prob=False):
         """Neighborhood sampling"""
         if len(seed_ids) == 0:
             return []
@@ -691,8 +691,15 @@ def layer_sampling(self, seed_ids, layer_size, n_layers, layer_type, node_prob):
             rst = _nonuniform_layer_sampling(self, node_prob, seed_ids, layer_type,
                                              n_layers, layer_size)
 
-        return [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)),
-                              utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
+        ret = [SubgraphIndex(rst(i), self, utils.toindex(rst(num_subgs + i)),
+                             utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
+        if return_prob:
+            for i, si in enumerate(ret):
+                l = rst(num_subgs * 3 + i).to_dlpack()
+                setattr(si, 'layer_ids', F.unsqueeze(F.zerocopy_from_dlpack(l), 1))
+                p = rst(num_subgs * 4 + i).to_dlpack()
+                setattr(si, 'sample_prob', F.zerocopy_from_dlpack(p))
+        return ret
 
     def to_networkx(self):
         """Convert to networkx graph.
diff --git a/src/graph/graph_apis.cc b/src/graph/graph_apis.cc
index 4b9dbf6f5f12..2136c27a7e21 100644
--- a/src/graph/graph_apis.cc
+++ b/src/graph/graph_apis.cc
@@ -7,6 +7,7 @@
 #include <dgl/immutable_graph.h>
 #include <dgl/graph_op.h>
 #include "../c_api_common.h"
+#include <stdio.h>
 
 using dgl::runtime::DGLArgs;
 using dgl::runtime::DGLArgValue;
diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc
index dd92cc2c9fe6..499d111a74f1 100644
--- a/src/graph/immutable_graph.cc
+++ b/src/graph/immutable_graph.cc
@@ -7,6 +7,8 @@
 #include <dgl/immutable_graph.h>
 #include <cstdlib>
 #include <cmath>
+#include <iterator>
+#include <queue>
 
 #ifdef _MSC_VER
 // rand in MS compiler works well in multi-threading.
@@ -249,6 +251,7 @@ void ImmutableGraph::CSR::ReadAllEdges(std::vector<Edge> *edges) const {
       (*edges)[indptr[i] + j] = e;
     }
   }
+  edges->resize(NumEdges());
 }
 
 ImmutableGraph::CSR::Ptr ImmutableGraph::CSR::Transpose() const {
@@ -992,6 +995,7 @@ SampledSubgraph ImmutableGraph::LayerSample(IdArray seed_array,
   std::vector<float> probabilities;
   std::vector<dgl_id_t> edges;
 
+  std::queue<size_t> sizes;
   std::unordered_set<dgl_id_t> candidate_set;
   std::vector<dgl_id_t> candidates;
   std::map<dgl_id_t, size_t> n_times;
@@ -999,116 +1003,102 @@ SampledSubgraph ImmutableGraph::LayerSample(IdArray seed_array,
   size_t n_seeds = seed_array->shape[0];
   const dgl_id_t* seed_data = static_cast<dgl_id_t*>(seed_array->data);
   candidate_set.insert(seed_data, seed_data + n_seeds);
-  std::copy(candidate_set.begin(), candidate_set.end(), nodes.begin());
-  layer_ids.insert(layer_ids.end(), nodes.size(), 0);
-  probabilities.insert(probabilities.end(), nodes.size(), 0);
+  std::copy(candidate_set.begin(), candidate_set.end(), std::back_inserter(nodes));
+  sizes.push(nodes.size());
 
-  std::vector<size_t> positions {0, nodes.size()};
+  size_t curr = 0;
   for (int i = 0; i != n_layers; i++) {
     candidate_set.clear();
-    for (auto j = positions.end()[-2]; j != positions.back(); ++j) {
+    for (auto j = curr; j != sizes.back(); ++j) {
       auto src = nodes[j];
       candidate_set.insert(indices + indptr[src], indices + indptr[src + 1]);
     }
 
     candidates.clear();
-    std::copy(candidate_set.begin(), candidate_set.end(), candidates.begin());
+    std::copy(candidate_set.begin(), candidate_set.end(), std::back_inserter(candidates));
 
-    auto n_candidates = candidates.size();
     n_times.clear();
+    auto n_candidates = candidates.size();
     for (size_t j = 0; j != layer_size; ++j) {
-      auto dst = rand_r(&rand_seed) % n_candidates;
+      auto dst = candidates[rand_r(&rand_seed) % n_candidates];
       if (!n_times.insert(std::make_pair(dst, 1)).second) {
         ++n_times[dst];
       }
     }
 
-    auto n_nodes = static_cast<float>(NumVertices());
     for (auto const &pair : n_times) {
       nodes.push_back(pair.first);
-      layer_ids.push_back(i + 1);
-      probabilities.push_back(pair.second / n_nodes);
+      probabilities.push_back(pair.second * n_candidates / static_cast<float>(layer_size));
     }
 
-    positions.push_back(nodes.size());
+    curr = sizes.back();
+    sizes.push(nodes.size());
   }
 
-  /*
-  std::vector<size_t> idx(nodes.size());
-  iota(idx.begin(), idx.end(), 0);
-  auto less = [&nodes](size_t i, size_t j) {
-    return nodes[i] < nodes[j];
-  };
-  sort(idx.begin(), idx.end(), less);
-
-  for (size_t i = 0; i < n_nodes; i++) {
-    subg.induced_vertices[i] = nodes[idx[i]];
-    subg.layer_ids[i] = node_info[i].first;
-    subg.sample_prob[i] = node_info[i].second;
-  }
-  */
-
-  std::vector<dgl_id_t> ret_indices;
-  std::vector<dgl_id_t> ret_eids;
-  std::vector<size_t> new_indptr;
-  std::vector<dgl_id_t> new_indices;
-  std::vector<dgl_id_t> new_eids;
-  new_indptr.push_back(0);
-  for (size_t i = 0; i < positions.size() - 2; ++i) {
-    auto curr_begin = positions[i];
-    auto curr_end = positions[i + 1];
-    // auto curr_len = curr_end - curr_begin;
-    auto next_begin = curr_end;
-    auto next_end = positions[i + 2];
-    auto next_len = next_end - next_begin;
-    HashTableChecker checker(&nodes[next_begin], next_len);
-    for (size_t j = curr_begin; j != curr_end; ++j) {
-      ret_indices.clear();
-      ret_eids.clear();
+  std::vector<dgl_id_t> ret_idx;
+  std::vector<dgl_id_t> ret_eid;
+  std::vector<size_t> sub_indptr;
+  std::vector<dgl_id_t> sub_indices;
+  std::vector<dgl_id_t> sub_eids;
+  sub_indptr.push_back(0);
+  curr = 0;
+  int i = n_layers - 1;
+  while (sizes.size() > 1) {
+    auto next = sizes.front();
+    sizes.pop();
+    auto next_size = sizes.front() - next;
+
+    HashTableChecker checker(nodes.data() + next, next_size);
+    for (size_t j = curr; j != next; ++j) {
+      ret_idx.clear();
+      ret_eid.clear();
       auto src = nodes[j];
-      auto dis = indptr[src];
+      auto d = indptr[src];
       auto len = indptr[src + 1] - indptr[src];
-      checker.CollectOnRow(indices + dis, eids + dis, len, &ret_indices, &ret_eids);
-      new_indptr.push_back(ret_indices.size());
-      new_indices.insert(new_indices.end(), ret_indices.begin(), ret_indices.end());
-      new_eids.insert(new_eids.end(), ret_eids.begin(), ret_eids.end());
+      checker.CollectOnRow(indices + d, eids + d, len, &ret_idx, &ret_eid);
+      sub_indptr.push_back(sub_indptr.back() + ret_idx.size());
+      for (auto k : ret_idx) {
+        sub_indices.push_back(nodes[next + k]);
+        layer_ids.push_back();
+      }
+      std::copy(ret_eid.begin(), ret_eid.end(), std::back_inserter(sub_eids));
     }
+    curr = next;
   }
+  sub_indptr.insert(sub_indptr.end(), nodes.size() - sub_indptr.size() + 1, sub_indptr.back());
 
   int64_t n_nodes = nodes.size();
-  int64_t n_edges = new_eids.size();
-  auto subg_csr = std::make_shared<CSR>(n_nodes, n_edges);
-  subg_csr->indptr.resize(n_nodes + 1);
-  subg_csr->indices.resize(n_edges);
-  subg_csr->edge_ids.resize(n_edges);
-  std::copy(new_indptr.begin(), new_indptr.end(), subg_csr->indptr.data());
-  std::copy(new_indices.begin(), new_indices.end(), subg_csr->indices.data());
-  std::copy(new_eids.begin(), new_eids.end(), subg_csr->edge_ids.data());
-
-  SampledSubgraph subg;
-  subg.induced_vertices = IdArray::Empty({n_nodes},
-                                         DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.induced_edges = IdArray::Empty({n_edges},
-                                      DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.layer_ids = IdArray::Empty({n_nodes},
-                                  DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
-  subg.sample_prob = runtime::NDArray::Empty({n_nodes},
-                                             DLDataType{kDLFloat, 8 * sizeof(float), 1},
-                                             DLContext{kDLCPU, 0});
+  int64_t n_edges = sub_eids.size();
+  auto sub_csr = std::make_shared<CSR>(n_nodes, n_edges);
+  std::copy(sub_indptr.begin(), sub_indptr.end(), sub_csr->indptr.begin());
+  std::copy(sub_indices.begin(), sub_indices.end(), std::back_inserter(sub_csr->indices));
+  std::copy(sub_eids.begin(), sub_eids.end(), std::back_inserter(sub_csr->edge_ids));
+
+  SampledSubgraph sub_g;
+  sub_g.induced_vertices = IdArray::Empty({n_nodes},
+                                          DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  sub_g.induced_edges = IdArray::Empty({n_edges},
+                                       DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  sub_g.layer_ids = IdArray::Empty({n_nodes},
+                                   DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
+  sub_g.sample_prob = runtime::NDArray::Empty({n_nodes},
+                                              DLDataType{kDLFloat, 8 * sizeof(float), 1},
+                                              DLContext{kDLCPU, 0});
   std::copy(nodes.begin(), nodes.end(),
-            static_cast<dgl_id_t*>(subg.induced_vertices->data));
+            static_cast<dgl_id_t*>(sub_g.induced_vertices->data));
   std::copy(layer_ids.begin(), layer_ids.end(),
-            static_cast<dgl_id_t*>(subg.layer_ids->data));
+            static_cast<dgl_id_t*>(sub_g.layer_ids->data));
   std::copy(probabilities.begin(), probabilities.end(),
-            static_cast<float*>(subg.sample_prob->data));
+            static_cast<float*>(sub_g.sample_prob->data));
 
   if (neigh_type == "in")
-    subg.graph = GraphPtr(new ImmutableGraph(subg_csr, nullptr, IsMultigraph()));
+    sub_g.graph = GraphPtr(new ImmutableGraph(sub_csr, nullptr, IsMultigraph()));
   else
-    subg.graph = GraphPtr(new ImmutableGraph(nullptr, subg_csr, IsMultigraph()));
+    sub_g.graph = GraphPtr(new ImmutableGraph(nullptr, sub_csr, IsMultigraph()));
 
-  return subg;
+  return sub_g;
 }
+
 void CompactSubgraph(ImmutableGraph::CSR *subg,
                      const std::unordered_map<dgl_id_t, dgl_id_t> &id_map) {
   for (size_t i = 0; i < subg->indices.size(); i++) {

From e3385db1de84f5ba6e06672488bffcaf875f496b Mon Sep 17 00:00:00 2001
From: Yu Gai <yg1246@nyu.edu>
Date: Sat, 9 Feb 2019 01:12:48 +0800
Subject: [PATCH 10/10] sampler checker

---
 examples/mxnet/gcn/gcn_ls.py  |  71 +++++++++++++++------
 include/dgl/immutable_graph.h |   1 +
 python/dgl/graph_index.py     |   3 +-
 src/graph/immutable_graph.cc  | 117 +++++++++++++++++++++++-----------
 4 files changed, 134 insertions(+), 58 deletions(-)

diff --git a/examples/mxnet/gcn/gcn_ls.py b/examples/mxnet/gcn/gcn_ls.py
index a63183f9e184..0a9b5f0d3f2e 100644
--- a/examples/mxnet/gcn/gcn_ls.py
+++ b/examples/mxnet/gcn/gcn_ls.py
@@ -1,5 +1,6 @@
 import argparse, time
 import numpy as np
+import numpy.random as npr
 import mxnet as mx
 import mxnet.ndarray as nd
 from mxnet import gluon
@@ -10,6 +11,33 @@
 import dgl.function as fn
 from dgl.subgraph import DGLSubGraph
 
+def check(g, sub_g, n_layers, layer_size, train_nid):
+    n = sub_g.number_of_nodes()
+    m = sub_g.number_of_edges()
+#   print('# nodes: %d, # edges: %d, # seeds: %d' % (n, m, len(train_nid)))
+    nid = np.arange(n)
+    src, dst = sub_g.edges()
+    src = src.asnumpy()
+    dst = dst.asnumpy()
+    lid = sub_g.layer_ids.asnumpy()
+#   print('np.unique(lid)', np.unique(lid))
+    for i in range(n_layers + 1):
+        nmask = lid == i
+        src_mask = np.isin(src, nid[nmask])
+        dst_mask = np.isin(dst, nid[nmask])
+        nn = np.sum(nmask)
+        mm_src = np.sum(src_mask)
+        mm_dst = np.sum(dst_mask)
+        assert nn <= layer_size
+        if i == 0:
+            assert mm_dst == 0
+        if i == n_layers + 1:
+            assert mm_src == 0
+#       print('[layer %d]# nodes: %d, # src: %d, # dst: %d' % (i, nn, mm_src, mm_dst))
+    src_lid = lid[src]
+    dst_lid = lid[dst]
+    assert np.all(dst_lid - src_lid == 1)
+
 class GCNLayer(gluon.Block):
     def __init__(self, in_feats, out_feats, activation, dropout=0):
         super(GCNLayer, self).__init__()
@@ -19,16 +47,18 @@ def __init__(self, in_feats, out_feats, activation, dropout=0):
 
     def forward(self, sub_g, src, dst):
         if self.dropout > 0:
-            sub_g.apply_nodes(lambda nodes: {'h' : nd.Dropout(nodes.data['h'],
-                                                             p=self.dropout)})
-        # TODO normalization
+            dropout = lambda nodes: {'h' : nd.Dropout(nodes.data['h'], p=self.dropout)}
+            sub_g.apply_nodes(dropout)
+        # normalize = lambda nodes : {'h' : nodes.data['h'] * nodes.data['normalizer']}
+        # sub_g.apply_nodes(normalize)
         if src is None:
             sub_g.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h'))
         else:
             sub_g.send_and_recv((src, dst),
                                 fn.copy_src(src='h', out='m'),
                                 fn.sum(msg='m', out='h'))
-        sub_g.ndata['h'] = self.dense(sub_g.ndata['h'])
+        # sub_g.apply_nodes(normalize)
+        sub_g.apply_nodes(lambda nodes : {'h' : self.dense(nodes.data['h'])}) 
 
 class GCN(gluon.Block):
     def __init__(self, in_feats, n_hidden, n_classes, n_layers,
@@ -49,28 +79,30 @@ def forward(self, sub_g):
         if isinstance(sub_g, DGLSubGraph):
             n = sub_g.number_of_nodes()
             nid = np.arange(n)
-            src, dst, eid = sub_g.edge_ids(nid, nid)
+            src, dst = sub_g.edges()
             src = src.asnumpy()
             dst = dst.asnumpy()
-            eid = eid.asnumpy()
+            layer_ids = sub_g.layer_ids.asnumpy()
+            sample_prob = sub_g.sample_prob.asnumpy()
             for i, layer in enumerate(self.layers):
-                mask = eid == i
-                src = src[mask]
-                dst = dst[mask]
+                nmask = layer_ids == i
+                emask = np.isin(src, nid[nmask])
+                src = src[emask]
+                dst = dst[emask]
                 h = sub_g.ndata['h']
-                sample_prob = sub_g.sample_prob.asnumpy()
-                p = np.expand_dims(np.where(np.isin(nid, src), sample_prob, np.ones(n)), axis=1)
-                sub_g.ndata['h'] = h * nd.array(p).as_in_context(h.context)
+                p = np.expand_dims(np.where(nmask, sample_prob, np.ones(n)), axis=1)
+                sub_g.ndata['h'] = h
+#               sub_g.ndata['h'] = h * nd.array(p).as_in_context(h.context)
                 layer(sub_g, src, dst)
         else:
             for layer in self.layers:
                 layer(sub_g, None, None)
         return self.dense(sub_g.pop_n_repr('h'))
 
-def evaluate(model, g):
+def evaluate(model, g, val=False):
     y = g.ndata['y']
     y_bar = nd.argmax(model(g), axis=1)
-    mask = g.ndata['val_mask']
+    mask = g.ndata['val_mask'] if val else g.ndata['test_mask']
     accuracy = nd.sum(mask * (y == y_bar)) / nd.sum(mask)
     return accuracy.asscalar()
 
@@ -128,8 +160,9 @@ def main(args):
                             {'learning_rate': args.lr, 'wd': args.weight_decay})
 
     def sampler():
+        seed_nodes = npr.choice(train_nid, 32, replace=False)
         for x in LayerSampler(g, 1000000, args.layer_size, args.n_layers,
-                              neighbor_type='in', seed_nodes=train_nid,
+                              neighbor_type='in', seed_nodes=seed_nodes,
                               return_prob=True):
             yield x
 
@@ -139,19 +172,19 @@ def sampler():
 
         sub_g, _ = next(sampler())
         sub_g.copy_from_parent()
-
-#       print(sub_g.number_of_nodes(), sub_g.number_of_edges())
+        # check(g, sub_g, args.n_layers, args.layer_size, train_nid)
 
         with mx.autograd.record():
             y = sub_g.ndata['y']
             y_bar = model(sub_g)
-            loss = nd.mean(gluon.loss.SoftmaxCELoss()(y_bar, y))
+            mask = sub_g.layer_ids.as_in_context(y) == args.n_layers
+            loss = nd.sum(mask * gluon.loss.SoftmaxCELoss()(y_bar, y)) / nd.sum(mask)
 
         loss.backward()
         trainer.step(batch_size=1)
 
         dur.append(time.time() - t0)
-        acc = evaluate(model, g)
+        acc = evaluate(model, g, val=True)
         print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | Accuracy {:.4f} | "
               "ETputs(KTEPS) {:.2f}".format(
               epoch, np.mean(dur), loss.asscalar(), acc, n_edges / np.mean(dur) / 1000))
diff --git a/include/dgl/immutable_graph.h b/include/dgl/immutable_graph.h
index 5a5b55a4324b..7501af4a69ea 100644
--- a/include/dgl/immutable_graph.h
+++ b/include/dgl/immutable_graph.h
@@ -16,6 +16,7 @@
 
 // TODO remove
 #include <stdio.h>
+#include <algorithm>
 #include <iostream>
 
 namespace dgl {
diff --git a/python/dgl/graph_index.py b/python/dgl/graph_index.py
index 8d56a44d8f55..3c231ae6c26c 100644
--- a/python/dgl/graph_index.py
+++ b/python/dgl/graph_index.py
@@ -679,7 +679,6 @@ def neighbor_sampling(self, seed_ids, expand_factor, num_hops, neighbor_type, no
                               utils.toindex(rst(num_subgs * 2 + i))) for i in range(num_subgs)]
 
     def layer_sampling(self, seed_ids, layer_size, n_layers, layer_type, node_prob, return_prob=False):
-        """Neighborhood sampling"""
         if len(seed_ids) == 0:
             return []
 
@@ -696,7 +695,7 @@ def layer_sampling(self, seed_ids, layer_size, n_layers, layer_type, node_prob,
         if return_prob:
             for i, si in enumerate(ret):
                 l = rst(num_subgs * 3 + i).to_dlpack()
-                setattr(si, 'layer_ids', F.unsqueeze(F.zerocopy_from_dlpack(l), 1))
+                setattr(si, 'layer_ids', F.zerocopy_from_dlpack(l))
                 p = rst(num_subgs * 4 + i).to_dlpack()
                 setattr(si, 'sample_prob', F.zerocopy_from_dlpack(p))
         return ret
diff --git a/src/graph/immutable_graph.cc b/src/graph/immutable_graph.cc
index 499d111a74f1..fd773ca3bdd8 100644
--- a/src/graph/immutable_graph.cc
+++ b/src/graph/immutable_graph.cc
@@ -991,82 +991,109 @@ SampledSubgraph ImmutableGraph::LayerSample(IdArray seed_array,
   const dgl_id_t* eids = g_csr->edge_ids.data();
 
   std::vector<dgl_id_t> nodes;
-  std::vector<int> layer_ids;
-  std::vector<float> probabilities;
   std::vector<dgl_id_t> edges;
+  std::vector<int> layer_ids;
+  std::vector<float> sample_prob;
 
-  std::queue<size_t> sizes;
+  std::queue<size_t> size_queue;
   std::unordered_set<dgl_id_t> candidate_set;
-  std::vector<dgl_id_t> candidates;
-  std::map<dgl_id_t, size_t> n_times;
+  std::vector<dgl_id_t> candidate_vector;
+  std::unordered_map<dgl_id_t, size_t> n_map;
 
   size_t n_seeds = seed_array->shape[0];
   const dgl_id_t* seed_data = static_cast<dgl_id_t*>(seed_array->data);
   candidate_set.insert(seed_data, seed_data + n_seeds);
   std::copy(candidate_set.begin(), candidate_set.end(), std::back_inserter(nodes));
-  sizes.push(nodes.size());
+  layer_ids.insert(layer_ids.end(), nodes.size(), n_layers);
+  size_queue.push(nodes.size());
 
   size_t curr = 0;
-  for (int i = 0; i != n_layers; i++) {
+  for (int i = 0; i != n_layers; ++i) {
     candidate_set.clear();
-    for (auto j = curr; j != sizes.back(); ++j) {
+    for (auto j = curr; j != size_queue.back(); ++j) {
       auto src = nodes[j];
       candidate_set.insert(indices + indptr[src], indices + indptr[src + 1]);
     }
 
-    candidates.clear();
-    std::copy(candidate_set.begin(), candidate_set.end(), std::back_inserter(candidates));
+    candidate_vector.clear();
+    std::copy(candidate_set.begin(), candidate_set.end(), std::back_inserter(candidate_vector));
 
-    n_times.clear();
-    auto n_candidates = candidates.size();
+    n_map.clear();
+    auto n_candidates = candidate_vector.size();
     for (size_t j = 0; j != layer_size; ++j) {
-      auto dst = candidates[rand_r(&rand_seed) % n_candidates];
-      if (!n_times.insert(std::make_pair(dst, 1)).second) {
-        ++n_times[dst];
+      auto dst = candidate_vector[rand_r(&rand_seed) % n_candidates];
+      if (!n_map.insert(std::make_pair(dst, 1)).second) {
+        ++n_map[dst];
       }
     }
 
-    for (auto const &pair : n_times) {
+    layer_ids.insert(layer_ids.end(), n_map.size(), n_layers - i - 1);
+    for (auto const &pair : n_map) {
       nodes.push_back(pair.first);
-      probabilities.push_back(pair.second * n_candidates / static_cast<float>(layer_size));
+      float sp = pair.second * n_candidates / static_cast<float>(layer_size);
+      sample_prob.push_back(sp);
     }
 
-    curr = sizes.back();
-    sizes.push(nodes.size());
+    curr = size_queue.back();
+    size_queue.push(nodes.size());
   }
 
-  std::vector<dgl_id_t> ret_idx;
-  std::vector<dgl_id_t> ret_eid;
-  std::vector<size_t> sub_indptr;
+  std::vector<size_t> sub_indptr = {0};
   std::vector<dgl_id_t> sub_indices;
   std::vector<dgl_id_t> sub_eids;
-  sub_indptr.push_back(0);
   curr = 0;
-  int i = n_layers - 1;
-  while (sizes.size() > 1) {
-    auto next = sizes.front();
-    sizes.pop();
-    auto next_size = sizes.front() - next;
-
-    HashTableChecker checker(nodes.data() + next, next_size);
-    for (size_t j = curr; j != next; ++j) {
-      ret_idx.clear();
-      ret_eid.clear();
+  while (size_queue.size() > 1) {
+    auto next = size_queue.front();
+    size_queue.pop();
+    auto next_size = size_queue.front() - next;
+
+    // HashTableChecker checker(nodes.data() + next, next_size);
+    std::unordered_map<dgl_id_t, dgl_id_t> checker;
+    for (dgl_id_t j = next; j < size_queue.front(); ++j) {
+      checker.insert(std::make_pair(nodes[j], j));
+    }
+    // std::cout << __FILE__ << ' ' << __LINE__ << ' ' << checker.size() << std::endl;
+    for (size_t j = curr; j < next; ++j) {
+      auto src = nodes[j];
+      for (int64_t k = indptr[src]; k < indptr[src + 1]; ++k) {
+        auto iterator = checker.find(indices[k]);
+        if (iterator != checker.end()) {
+          // std::cout << __FILE__ << ' ' << __LINE__ << ' ' << indices[k] << std::endl;
+          sub_indices.push_back(iterator->second);
+          sub_eids.push_back(eids[k]);
+        }
+      }
+      sub_indptr.push_back(sub_indices.size());
+      /*
       auto src = nodes[j];
       auto d = indptr[src];
       auto len = indptr[src + 1] - indptr[src];
+      std::vector<dgl_id_t> ret_idx;
+      std::vector<dgl_id_t> ret_eid;
       checker.CollectOnRow(indices + d, eids + d, len, &ret_idx, &ret_eid);
       sub_indptr.push_back(sub_indptr.back() + ret_idx.size());
-      for (auto k : ret_idx) {
+      for (const auto &k : ret_idx) {
         sub_indices.push_back(nodes[next + k]);
-        layer_ids.push_back();
       }
       std::copy(ret_eid.begin(), ret_eid.end(), std::back_inserter(sub_eids));
+
+      // CHECK
+      for (const auto &k : ret_idx) {
+        CHECK(layer_ids[next + k] + 1 == layer_ids[j])
+          << layer_ids[next + k] << ' ' << layer_ids[j];
+      }
+      */
     }
     curr = next;
   }
   sub_indptr.insert(sub_indptr.end(), nodes.size() - sub_indptr.size() + 1, sub_indptr.back());
 
+  /*
+  std::cout << __FILE__ << ' ' << __LINE__ << ' '
+            << *std::min_element(sub_indptr.begin(), sub_indptr.end()) << ' '
+            << *std::max_element(sub_indptr.begin(), sub_indptr.end()) << std::endl;
+  */
+
   int64_t n_nodes = nodes.size();
   int64_t n_edges = sub_eids.size();
   auto sub_csr = std::make_shared<CSR>(n_nodes, n_edges);
@@ -1088,7 +1115,7 @@ SampledSubgraph ImmutableGraph::LayerSample(IdArray seed_array,
             static_cast<dgl_id_t*>(sub_g.induced_vertices->data));
   std::copy(layer_ids.begin(), layer_ids.end(),
             static_cast<dgl_id_t*>(sub_g.layer_ids->data));
-  std::copy(probabilities.begin(), probabilities.end(),
+  std::copy(sample_prob.begin(), sample_prob.end(),
             static_cast<float*>(sub_g.sample_prob->data));
 
   if (neigh_type == "in")
@@ -1096,6 +1123,22 @@ SampledSubgraph ImmutableGraph::LayerSample(IdArray seed_array,
   else
     sub_g.graph = GraphPtr(new ImmutableGraph(nullptr, sub_csr, IsMultigraph()));
 
+  // CHECK
+  for (size_t i = 0; i < sub_indptr.size() - 1; ++i) {
+    for (size_t j = sub_indptr[i]; j < sub_indptr[i + 1]; ++j) {
+      /*
+      CHECK(layer_ids[i] - 1 == layer_ids[old_nid2new_nid[sub_indices[j]]])
+        << layer_ids[i] << ' ' << layer_ids[old_nid2new_nid[sub_indices[j]]];
+      */
+    }
+  }
+
+  auto p = static_cast<float*>(sub_g.sample_prob->data);
+  std::cout << __FILE__ << ' ' << __LINE__ << ' '
+            << *std::min_element(p, p + sample_prob.size()) << ' '
+            << *std::max_element(p, p + sample_prob.size()) << ' '
+            << n_nodes << ' ' << sub_g.sample_prob->shape[0] << std::endl;
+
   return sub_g;
 }
 
@@ -1141,7 +1184,7 @@ SampledSubgraph ImmutableGraph::LayerUniformSample(IdArray seeds,
                          neigh_type,
                          n_layers,
                          layer_size);
-  std::static_pointer_cast<ImmutableGraph>(ret.graph)->CompactSubgraph(ret.induced_vertices);
+  // std::static_pointer_cast<ImmutableGraph>(ret.graph)->CompactSubgraph(ret.induced_vertices);
   return ret;
 }