gunrock · neoblizz · Dec 8, 2021 · Jan 13, 2021 · Jan 14, 2021 · Jan 14, 2021
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -20,7 +20,6 @@ set(PROJECT_DEPS_DIR externals)
 # end /* Dependencies directory */
 
 # begin /* Include cmake modules */
-# include(${PROJECT_SOURCE_DIR}/cmake/FetchRapidJSON.cmake)
 include(${PROJECT_SOURCE_DIR}/cmake/FetchThrustCUB.cmake)
 include(${PROJECT_SOURCE_DIR}/cmake/FetchModernGPU.cmake)
 # end /* Include cmake modules */
@@ -57,7 +56,7 @@ set_target_properties(essentials
         CUDA_EXTENSIONS OFF
         CUDA_RESOLVE_DEVICE_SYMBOLS ON
         CUDA_SEPARABLE_COMPILATION ON
-        CUDA_ARCHITECTURES 70 # Set required architecture.
+        CUDA_ARCHITECTURES 61 # Set required architecture.
         # CUDA_PTX_COMPILATION ON # Can only be applied to OBJ.
 )
 
@@ -99,6 +98,7 @@ target_include_directories(essentials
 ####################################################
 target_link_libraries(essentials
     INTERFACE curand
+    INTERFACE cuda
 )
 
 ####################################################
@@ -114,17 +114,26 @@ target_sources(essentials
 ####################################################
 set(CXX_FLAGS 
   -Wall 
-  -Wno-unused-local-typedefs 
-  -Wno-strict-aliasing 
-  -Wno-unused-function 
+  # -Wextra
+  # -Werror
+  -Wno-unused-result
+  -Wno-unused-local-typedefs
+  -Wno-strict-aliasing
+  -Wno-unused-function
   -Wno-format-security
+  # -vvv
 )
 
 set(CUDA_FLAGS
   --expt-extended-lambda 
-  --expt-relaxed-constexpr 
+  --expt-relaxed-constexpr
   --use_fast_math
   --ptxas-options -v
+  # --verbose
+  --optimize 3 # Host optimize-level
+  # --debug # Host debug
+  # --device-debug # Device debug
+  --generate-line-info
 )
 
 ####################################################

diff --git a/README.md b/README.md
@@ -3,25 +3,27 @@
 
 ## Quick Start Guide
 
-Before building Gunrock make sure you have **CUDA Toolkit 11 or higher** installed on your system. Other external dependencies such as `NVIDIA/thrust`, `NVIDIA/cub`, etc. are automatically fetched using `cmake`.
+Before building Gunrock make sure you have **CUDA Toolkit 11 or higher**[^1] installed on your system. Other external dependencies such as `NVIDIA/thrust`, `NVIDIA/cub`, etc. are automatically fetched using `cmake`.
 
 ```shell
 git clone https://github.com/gunrock/essentials.git
 cd essentials
 mkdir build && cd build
 cmake .. 
 make sssp # or for all algorithms, use: make -j$(nproc)
-bin/sssp ../datasets/chesapeake.mtx
+bin/sssp ../datasets/chesapeake/chesapeake.mtx
 ```
-
-##### Preferred **CUDA v11.2.1** due to support for stream ordered memory allocators (e.g. `cudaFreeAsync()`).
+[^1]: Preferred **CUDA v11.2.1 or higher** due to support for stream ordered memory allocators (e.g. `cudaFreeAsync()`).
 
 ## Getting Started with Gunrock
 
-- [Gunrock's programming model]()
-- [API reference documentation]()
-- [Performance analysis]()
-- [Publications](https://gunrock.github.io/docs/#/gunrock/publications_and_presentations) and [presentations](https://gunrock.github.io/docs/#/gunrock/publications_and_presentations?id=presentations)
+- [Gunrock's Overview](https://github.com/gunrock/essentials/wiki/Overview)
+- [Gunrock's programming model](https://github.com/gunrock/essentials/wiki/Programming-Model)
+- [Gunrock's documentation](https://github.com/gunrock/essentials/wiki)
+- [Publications](https://github.com/gunrock/essentials/wiki/Publications) and [presentations](https://github.com/gunrock/essentials/wiki/Presentations)
+
+## Essentials vs. Gunrock
+Essentials is the future of Gunrock. The idea being to take the lessons learned from Gunrock to a new design, which simplfies the effort it takes to **(1)** implement graph algorithms, **(2)** add internal optimizations, **(3)** conduct future research. One example, in Gunrock SSSP is implemented in 4-5 files with 1000s of lines of code, whereas in essentials it is a single file with ~170 lines of code. Our end goal with essentials is releasing it as a `v2.0` for Gunrock.
 
 ## How to Cite Gunrock
 Thank you for citing our work.
@@ -50,4 +52,4 @@ Thank you for citing our work.
 
 ## Copyright and License
 
-Gunrock is copyright The Regents of the University of California, 2021. The library, examples, and all source code are released under [Apache 2.0](https://github.com/gunrock/essentials/blob/master/LICENSE).
+Gunrock is copyright The Regents of the University of California, 2021. The library, examples, and all source code are released under [Apache 2.0](https://github.com/gunrock/essentials/blob/master/LICENSE).
diff --git a/cmake/FetchRapidJSON.cmake b/cmake/FetchRapidJSON.cmake
diff --git a/cmake/FetchThrustCUB.cmake b/cmake/FetchThrustCUB.cmake
@@ -9,7 +9,7 @@ set(FETCHCONTENT_BASE_DIR ${FC_BASE})
 FetchContent_Declare(
     thrust
     GIT_REPOSITORY https://github.com/thrust/thrust.git
-    GIT_TAG        1.12.0
+    GIT_TAG        1.15.0
 )
 
 FetchContent_GetProperties(thrust)

diff --git a/datasets/bips98_606.mtx → datasets/bips98_606/bips98_606.mtx b/datasets/bips98_606.mtx → datasets/bips98_606/bips98_606.mtx
diff --git a/datasets/chesapeake.mtx → datasets/chesapeake/chesapeake.mtx b/datasets/chesapeake.mtx → datasets/chesapeake/chesapeake.mtx
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -6,4 +6,10 @@ add_subdirectory(geo)
 add_subdirectory(pr)
 add_subdirectory(ppr)
 add_subdirectory(bc)
+add_subdirectory(hits)
+add_subdirectory(kcore)
+add_subdirectory(spmv)
 # end /* Add examples' subdirectories */
+
+# begin /* Add experimental examples' subdirectories */
+add_subdirectory(experimental/async)
diff --git a/examples/bfs/bfs.cu b/examples/bfs/bfs.cu
@@ -17,17 +17,24 @@ void test_bfs(int num_arguments, char** argument_array) {
   using edge_t = int;
   using weight_t = float;
 
+  using csr_t =
+      format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>;
+
   // --
   // IO
 
+  csr_t csr;
   std::string filename = argument_array[1];
 
-  io::matrix_market_t<vertex_t, edge_t, weight_t> mm;
-
-  using csr_t =
-      format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>;
-  csr_t csr;
-  csr.from_coo(mm.load(filename));
+  if (util::is_market(filename)) {
+    io::matrix_market_t<vertex_t, edge_t, weight_t> mm;
+    csr.from_coo(mm.load(filename));
+  } else if (util::is_binary_csr(filename)) {
+    csr.read_binary(filename);
+  } else {
+    std::cerr << "Unknown file format: " << filename << std::endl;
+    exit(1);
+  }
 
   thrust::device_vector<vertex_t> row_indices(csr.number_of_nonzeros);
   thrust::device_vector<vertex_t> column_indices(csr.number_of_nonzeros);
@@ -78,20 +85,11 @@ void test_bfs(int num_arguments, char** argument_array) {
   // --
   // Log
 
-  std::cout << "GPU Distances (output) = ";
-  thrust::copy(distances.begin(),
-               (distances.size() < 40) ? distances.begin() + distances.size()
-                                       : distances.begin() + 40,
-               std::ostream_iterator<vertex_t>(std::cout, " "));
-  std::cout << std::endl;
-
-  std::cout << "CPU Distances (output) = ";
-  thrust::copy(h_distances.begin(),
-               (h_distances.size() < 40)
-                   ? h_distances.begin() + h_distances.size()
-                   : h_distances.begin() + 40,
-               std::ostream_iterator<vertex_t>(std::cout, " "));
-  std::cout << std::endl;
+  std::cout << "GPU distances[:40] = ";
+  gunrock::print::head<weight_t>(distances, 40);
+
+  std::cout << "CPU Distances[:40] = ";
+  gunrock::print::head<weight_t>(h_distances, 40);
 
   std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl;
   std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl;

diff --git a/examples/bfs/bfs_cpu.hxx b/examples/bfs/bfs_cpu.hxx
@@ -22,8 +22,12 @@ float run(csr_t& csr,
           vertex_t& single_source,
           vertex_t* distances,
           vertex_t* predecessors) {
-  thrust::host_vector<edge_t> row_offsets(csr.row_offsets);  // Copy data to CPU
-  thrust::host_vector<vertex_t> column_indices(csr.column_indices);
+
+  thrust::host_vector<edge_t> _row_offsets(csr.row_offsets);  // Copy data to CPU
+  thrust::host_vector<vertex_t> _column_indices(csr.column_indices);
+
+  edge_t* row_offsets = _row_offsets.data();
+  vertex_t* column_indices = _column_indices.data();
 
   for (vertex_t i = 0; i < csr.number_of_rows; i++)
     distances[i] = std::numeric_limits<vertex_t>::max();
@@ -71,6 +75,7 @@ int compute_error(thrust::device_vector<val_t> _gpu_result,
   int n_errors = 0;
   for (int i = 0; i < cpu_result.size(); i++) {
     if (gpu_result[i] != cpu_result[i]) {
+      std::cout << "gpu, cpu : " << gpu_result[i] << ", " << cpu_result[i] << std::endl;
       n_errors++;
     }
   }

diff --git a/examples/color/color_cpu.hxx b/examples/color/color_cpu.hxx
@@ -23,9 +23,8 @@ float run(csr_t& csr, vertex_t* colors) {
   for (vertex_t i = 0; i < n_vertices; i++)
     colors[i] = -1;
 
-  thrust::host_vector<vertex_t> randoms(n_vertices);
-  gunrock::generate::random::uniform_distribution(0, n_vertices,
-                                                  randoms.begin());
+  thrust::host_vector<weight_t> randoms(n_vertices);
+  gunrock::generate::random::uniform_distribution(randoms);
 
   int color = 0;
   int n_left = n_vertices;

diff --git a/examples/experimental/async/CMakeLists.txt b/examples/experimental/async/CMakeLists.txt
@@ -0,0 +1,21 @@
+# begin /* Set the application name. */
+set(APPLICATION_NAME async_bfs)
+# end /* Set the application name. */
+
+# begin /* Add CUDA executables */
+add_executable(${APPLICATION_NAME})
+
+set(SOURCE_LIST 
+    ${APPLICATION_NAME}.cu
+)
+
+target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST})
+target_link_libraries(${APPLICATION_NAME} PRIVATE essentials)
+get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES)
+set_target_properties(${APPLICATION_NAME} 
+    PROPERTIES 
+        CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
+) # XXX: Find a better way to inherit essentials properties.
+
+message("-- Example Added: ${APPLICATION_NAME}")
+# end /* Add CUDA executables */
diff --git a/examples/experimental/async/async_bfs.cu b/examples/experimental/async/async_bfs.cu
@@ -0,0 +1,87 @@
+#include <gunrock/algorithms/experimental/async/bfs.hxx>
+#include "bfs_cpu.hxx"
+
+using namespace gunrock;
+using namespace experimental;
+using namespace memory;
+
+void test_async_bfs(int num_arguments, char** argument_array) {
+  if (num_arguments != 2) {
+    std::cerr << "usage: ./bin/<program-name> filename.mtx" << std::endl;
+    exit(1);
+  }
+
+  // --
+  // Define types
+
+  using vertex_t = int;
+  using edge_t = int;
+  using weight_t = float;
+
+  // --
+  // IO
+
+  std::string filename = argument_array[1];
+
+  io::matrix_market_t<vertex_t, edge_t, weight_t> mm;
+
+  using csr_t =
+      format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>;
+  csr_t csr;
+  csr.from_coo(mm.load(filename));
+
+  // --
+  // Build graph
+
+  auto G = graph::build::from_csr<memory_space_t::device, graph::view_t::csr>(
+      csr.number_of_rows,               // rows
+      csr.number_of_columns,            // columns
+      csr.number_of_nonzeros,           // nonzeros
+      csr.row_offsets.data().get(),     // row_offsets
+      csr.column_indices.data().get(),  // column_indices
+      csr.nonzero_values.data().get()   // values
+  );  // supports row_indices and column_offsets (default = nullptr)
+
+  // --
+  // Params and memory allocation
+
+  vertex_t n_vertices = G.get_number_of_vertices();
+  vertex_t single_source = 0;
+  std::cout << "Single Source = " << single_source << std::endl;
+
+  // --
+  // GPU Run
+
+  thrust::device_vector<vertex_t> depth(n_vertices);
+
+  float gpu_elapsed = async::bfs::run(G, single_source, depth.data().get());
+  cudaDeviceSynchronize();
+
+  // --
+  // CPU Run
+
+  thrust::host_vector<vertex_t> h_depth(n_vertices);
+
+  float cpu_elapsed =
+      bfs_cpu::run<csr_t, vertex_t, edge_t>(csr, single_source, h_depth.data());
+
+  int n_errors = bfs_cpu::compute_error(depth, h_depth);
+
+  // --
+  // Log + Validate
+
+  std::cout << "GPU depth[:40] = ";
+  gunrock::print::head<vertex_t>(depth, 40);
+
+  std::cout << "CPU depth[:40] = ";
+  gunrock::print::head<vertex_t>(h_depth, 40);
+
+  std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl;
+  std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl;
+  std::cout << "Number of errors : " << n_errors << std::endl;
+}
+
+int main(int argc, char** argv) {
+  test_async_bfs(argc, argv);
+  return EXIT_SUCCESS;
+}