Skip to content
This repository has been archived by the owner on Dec 22, 2022. It is now read-only.

Improvements, Clean-up, Experimental Async Support #87

Merged
merged 44 commits into from
Dec 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
ffc9b3b
testing async
Jan 13, 2021
0a790c6
working w/ graph
Jan 14, 2021
ff3eefb
async bfs working
Jan 14, 2021
1261bcb
worknig async
Jan 14, 2021
9e807a6
async bfs
Jan 14, 2021
e829093
k-core application
angeil May 10, 2021
c7745d7
add k-core cpu comparison
angeil May 24, 2021
215bac5
merge changes and update kcore advance
angeil May 24, 2021
36e69be
SSSP, Cached load, Uniquify/Filter.
neoblizz May 25, 2021
836831f
Merge branch 'master' into dev/async_bfs
May 26, 2021
abd375c
cleaning up async_bfs; cleaning style on bfs+sssp a little
May 26, 2021
b1d42fa
async_enactor.hxx
May 26, 2021
9477dd0
testing
May 26, 2021
bf43d99
Merge pull request #77 from angeil/master
neoblizz May 27, 2021
13a3c39
Cleaner version, but compiler segfaults. No idea why.
neoblizz May 27, 2021
ab70d25
HITS
li-yi-dong Jun 7, 2021
58c706f
Modify CMakeLists
li-yi-dong Jun 7, 2021
b14dcd2
Compile issue
li-yi-dong Jun 8, 2021
b7486e7
Merge remote-tracking branch 'upstream/dev' into dev
li-yi-dong Jun 8, 2021
f578d19
Merge branch 'dev' into dev/async_bfs_202105
neoblizz Jun 9, 2021
8c0bc9c
Merge pull request #78 from bkj/dev/async_bfs_202105
neoblizz Jun 9, 2021
e3537b2
Not completely fixed but your weird segfault is gone
crozhon Jun 11, 2021
622fcf0
kcore cleaned up and fixed.
neoblizz Jun 11, 2021
b72b779
Merge pull request #81 from gunrock/seg_fault
neoblizz Jun 11, 2021
84ba9ae
Restructure
li-yi-dong Jun 12, 2021
635fa12
Simplify files
li-yi-dong Jun 16, 2021
0bc1658
Merge branch 'dev' into dev
li-yi-dong Jun 16, 2021
62947fc
Cleaning up asynchronous interface. Moved to experimental. experiment…
neoblizz Jun 17, 2021
6cb8f82
Merge branch 'dev' into dev
neoblizz Jun 23, 2021
6a864c2
Merge pull request #83 from li-yi-dong/dev
neoblizz Oct 5, 2021
65fef97
Thrust bump to 1.14.0, tests passed.
neoblizz Oct 5, 2021
b9f454d
Fix virtual_memory unittest build
porumbes Oct 27, 2021
6ad40c1
Merge pull request #85 from porumbes/dev
neoblizz Oct 27, 2021
5fda32c
Made BFS GPU and CPU version equivalent, now the validation passes.
neoblizz Oct 30, 2021
30a523c
spmv WIP. does NOT compile
DanLoran Nov 3, 2021
e76495a
Merge branch 'dev' of https://github.com/DanLoran/essentials into dev
DanLoran Nov 3, 2021
c8ad315
Merge branch 'DanLoran-dev' into dev
neoblizz Nov 3, 2021
073b07e
Cleanup, narrowed segfault.
neoblizz Nov 3, 2021
0667449
Removed implicit constructor call
neoblizz Nov 3, 2021
de1a9a3
re-commit datasets, some docs and removed rJSON.
neoblizz Nov 4, 2021
f15c992
Barebone neighborreduce implementation + Pull/Push SpMV.
neoblizz Nov 5, 2021
244fd26
Better random number generation and code clean-up.
neoblizz Nov 5, 2021
be4fe94
TIL: You can use footnotes in github markdown.
neoblizz Dec 7, 2021
680fb83
Update README.md
neoblizz Dec 8, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ set(PROJECT_DEPS_DIR externals)
# end /* Dependencies directory */

# begin /* Include cmake modules */
# include(${PROJECT_SOURCE_DIR}/cmake/FetchRapidJSON.cmake)
include(${PROJECT_SOURCE_DIR}/cmake/FetchThrustCUB.cmake)
include(${PROJECT_SOURCE_DIR}/cmake/FetchModernGPU.cmake)
# end /* Include cmake modules */
Expand Down Expand Up @@ -57,7 +56,7 @@ set_target_properties(essentials
CUDA_EXTENSIONS OFF
CUDA_RESOLVE_DEVICE_SYMBOLS ON
CUDA_SEPARABLE_COMPILATION ON
CUDA_ARCHITECTURES 70 # Set required architecture.
CUDA_ARCHITECTURES 61 # Set required architecture.
# CUDA_PTX_COMPILATION ON # Can only be applied to OBJ.
)

Expand Down Expand Up @@ -99,6 +98,7 @@ target_include_directories(essentials
####################################################
target_link_libraries(essentials
INTERFACE curand
INTERFACE cuda
)

####################################################
Expand All @@ -114,17 +114,26 @@ target_sources(essentials
####################################################
set(CXX_FLAGS
-Wall
-Wno-unused-local-typedefs
-Wno-strict-aliasing
-Wno-unused-function
# -Wextra
# -Werror
-Wno-unused-result
-Wno-unused-local-typedefs
-Wno-strict-aliasing
-Wno-unused-function
-Wno-format-security
# -vvv
)

set(CUDA_FLAGS
--expt-extended-lambda
--expt-relaxed-constexpr
--expt-relaxed-constexpr
--use_fast_math
--ptxas-options -v
# --verbose
--optimize 3 # Host optimize-level
# --debug # Host debug
# --device-debug # Device debug
--generate-line-info
)

####################################################
Expand Down
20 changes: 11 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,27 @@

## Quick Start Guide

Before building Gunrock make sure you have **CUDA Toolkit 11 or higher** installed on your system. Other external dependencies such as `NVIDIA/thrust`, `NVIDIA/cub`, etc. are automatically fetched using `cmake`.
Before building Gunrock make sure you have **CUDA Toolkit 11 or higher**[^1] installed on your system. Other external dependencies such as `NVIDIA/thrust`, `NVIDIA/cub`, etc. are automatically fetched using `cmake`.

```shell
git clone https://github.com/gunrock/essentials.git
cd essentials
mkdir build && cd build
cmake ..
make sssp # or for all algorithms, use: make -j$(nproc)
bin/sssp ../datasets/chesapeake.mtx
bin/sssp ../datasets/chesapeake/chesapeake.mtx
```

##### Preferred **CUDA v11.2.1** due to support for stream ordered memory allocators (e.g. `cudaFreeAsync()`).
[^1]: Preferred **CUDA v11.2.1 or higher** due to support for stream ordered memory allocators (e.g. `cudaFreeAsync()`).

## Getting Started with Gunrock

- [Gunrock's programming model]()
- [API reference documentation]()
- [Performance analysis]()
- [Publications](https://gunrock.github.io/docs/#/gunrock/publications_and_presentations) and [presentations](https://gunrock.github.io/docs/#/gunrock/publications_and_presentations?id=presentations)
- [Gunrock's Overview](https://github.com/gunrock/essentials/wiki/Overview)
- [Gunrock's programming model](https://github.com/gunrock/essentials/wiki/Programming-Model)
- [Gunrock's documentation](https://github.com/gunrock/essentials/wiki)
- [Publications](https://github.com/gunrock/essentials/wiki/Publications) and [presentations](https://github.com/gunrock/essentials/wiki/Presentations)

## Essentials vs. Gunrock
Essentials is the future of Gunrock. The idea being to take the lessons learned from Gunrock to a new design, which simplfies the effort it takes to **(1)** implement graph algorithms, **(2)** add internal optimizations, **(3)** conduct future research. One example, in Gunrock SSSP is implemented in 4-5 files with 1000s of lines of code, whereas in essentials it is a single file with ~170 lines of code. Our end goal with essentials is releasing it as a `v2.0` for Gunrock.

## How to Cite Gunrock
Thank you for citing our work.
Expand Down Expand Up @@ -50,4 +52,4 @@ Thank you for citing our work.

## Copyright and License

Gunrock is copyright The Regents of the University of California, 2021. The library, examples, and all source code are released under [Apache 2.0](https://github.com/gunrock/essentials/blob/master/LICENSE).
Gunrock is copyright The Regents of the University of California, 2021. The library, examples, and all source code are released under [Apache 2.0](https://github.com/gunrock/essentials/blob/master/LICENSE).
21 changes: 0 additions & 21 deletions cmake/FetchRapidJSON.cmake

This file was deleted.

2 changes: 1 addition & 1 deletion cmake/FetchThrustCUB.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ set(FETCHCONTENT_BASE_DIR ${FC_BASE})
FetchContent_Declare(
thrust
GIT_REPOSITORY https://github.com/thrust/thrust.git
GIT_TAG 1.12.0
GIT_TAG 1.15.0
)

FetchContent_GetProperties(thrust)
Expand Down
6 changes: 6 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,10 @@ add_subdirectory(geo)
add_subdirectory(pr)
add_subdirectory(ppr)
add_subdirectory(bc)
add_subdirectory(hits)
add_subdirectory(kcore)
add_subdirectory(spmv)
# end /* Add examples' subdirectories */

# begin /* Add experimental examples' subdirectories */
add_subdirectory(experimental/async)
38 changes: 18 additions & 20 deletions examples/bfs/bfs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,24 @@ void test_bfs(int num_arguments, char** argument_array) {
using edge_t = int;
using weight_t = float;

using csr_t =
format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>;

// --
// IO

csr_t csr;
std::string filename = argument_array[1];

io::matrix_market_t<vertex_t, edge_t, weight_t> mm;

using csr_t =
format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>;
csr_t csr;
csr.from_coo(mm.load(filename));
if (util::is_market(filename)) {
io::matrix_market_t<vertex_t, edge_t, weight_t> mm;
csr.from_coo(mm.load(filename));
} else if (util::is_binary_csr(filename)) {
csr.read_binary(filename);
} else {
std::cerr << "Unknown file format: " << filename << std::endl;
exit(1);
}

thrust::device_vector<vertex_t> row_indices(csr.number_of_nonzeros);
thrust::device_vector<vertex_t> column_indices(csr.number_of_nonzeros);
Expand Down Expand Up @@ -78,20 +85,11 @@ void test_bfs(int num_arguments, char** argument_array) {
// --
// Log

std::cout << "GPU Distances (output) = ";
thrust::copy(distances.begin(),
(distances.size() < 40) ? distances.begin() + distances.size()
: distances.begin() + 40,
std::ostream_iterator<vertex_t>(std::cout, " "));
std::cout << std::endl;

std::cout << "CPU Distances (output) = ";
thrust::copy(h_distances.begin(),
(h_distances.size() < 40)
? h_distances.begin() + h_distances.size()
: h_distances.begin() + 40,
std::ostream_iterator<vertex_t>(std::cout, " "));
std::cout << std::endl;
std::cout << "GPU distances[:40] = ";
gunrock::print::head<weight_t>(distances, 40);

std::cout << "CPU Distances[:40] = ";
gunrock::print::head<weight_t>(h_distances, 40);

std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl;
std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl;
Expand Down
9 changes: 7 additions & 2 deletions examples/bfs/bfs_cpu.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@ float run(csr_t& csr,
vertex_t& single_source,
vertex_t* distances,
vertex_t* predecessors) {
thrust::host_vector<edge_t> row_offsets(csr.row_offsets); // Copy data to CPU
thrust::host_vector<vertex_t> column_indices(csr.column_indices);

thrust::host_vector<edge_t> _row_offsets(csr.row_offsets); // Copy data to CPU
thrust::host_vector<vertex_t> _column_indices(csr.column_indices);

edge_t* row_offsets = _row_offsets.data();
vertex_t* column_indices = _column_indices.data();

for (vertex_t i = 0; i < csr.number_of_rows; i++)
distances[i] = std::numeric_limits<vertex_t>::max();
Expand Down Expand Up @@ -71,6 +75,7 @@ int compute_error(thrust::device_vector<val_t> _gpu_result,
int n_errors = 0;
for (int i = 0; i < cpu_result.size(); i++) {
if (gpu_result[i] != cpu_result[i]) {
std::cout << "gpu, cpu : " << gpu_result[i] << ", " << cpu_result[i] << std::endl;
n_errors++;
}
}
Expand Down
5 changes: 2 additions & 3 deletions examples/color/color_cpu.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ float run(csr_t& csr, vertex_t* colors) {
for (vertex_t i = 0; i < n_vertices; i++)
colors[i] = -1;

thrust::host_vector<vertex_t> randoms(n_vertices);
gunrock::generate::random::uniform_distribution(0, n_vertices,
randoms.begin());
thrust::host_vector<weight_t> randoms(n_vertices);
gunrock::generate::random::uniform_distribution(randoms);

int color = 0;
int n_left = n_vertices;
Expand Down
21 changes: 21 additions & 0 deletions examples/experimental/async/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# begin /* Set the application name. */
set(APPLICATION_NAME async_bfs)
# end /* Set the application name. */

# begin /* Add CUDA executables */
add_executable(${APPLICATION_NAME})

set(SOURCE_LIST
${APPLICATION_NAME}.cu
)

target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST})
target_link_libraries(${APPLICATION_NAME} PRIVATE essentials)
get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES)
set_target_properties(${APPLICATION_NAME}
PROPERTIES
CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
) # XXX: Find a better way to inherit essentials properties.

message("-- Example Added: ${APPLICATION_NAME}")
# end /* Add CUDA executables */
87 changes: 87 additions & 0 deletions examples/experimental/async/async_bfs.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#include <gunrock/algorithms/experimental/async/bfs.hxx>
#include "bfs_cpu.hxx"

using namespace gunrock;
using namespace experimental;
using namespace memory;

void test_async_bfs(int num_arguments, char** argument_array) {
if (num_arguments != 2) {
std::cerr << "usage: ./bin/<program-name> filename.mtx" << std::endl;
exit(1);
}

// --
// Define types

using vertex_t = int;
using edge_t = int;
using weight_t = float;

// --
// IO

std::string filename = argument_array[1];

io::matrix_market_t<vertex_t, edge_t, weight_t> mm;

using csr_t =
format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>;
csr_t csr;
csr.from_coo(mm.load(filename));

// --
// Build graph

auto G = graph::build::from_csr<memory_space_t::device, graph::view_t::csr>(
csr.number_of_rows, // rows
csr.number_of_columns, // columns
csr.number_of_nonzeros, // nonzeros
csr.row_offsets.data().get(), // row_offsets
csr.column_indices.data().get(), // column_indices
csr.nonzero_values.data().get() // values
); // supports row_indices and column_offsets (default = nullptr)

// --
// Params and memory allocation

vertex_t n_vertices = G.get_number_of_vertices();
vertex_t single_source = 0;
std::cout << "Single Source = " << single_source << std::endl;

// --
// GPU Run

thrust::device_vector<vertex_t> depth(n_vertices);

float gpu_elapsed = async::bfs::run(G, single_source, depth.data().get());
cudaDeviceSynchronize();

// --
// CPU Run

thrust::host_vector<vertex_t> h_depth(n_vertices);

float cpu_elapsed =
bfs_cpu::run<csr_t, vertex_t, edge_t>(csr, single_source, h_depth.data());

int n_errors = bfs_cpu::compute_error(depth, h_depth);

// --
// Log + Validate

std::cout << "GPU depth[:40] = ";
gunrock::print::head<vertex_t>(depth, 40);

std::cout << "CPU depth[:40] = ";
gunrock::print::head<vertex_t>(h_depth, 40);

std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl;
std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl;
std::cout << "Number of errors : " << n_errors << std::endl;
}

int main(int argc, char** argv) {
test_async_bfs(argc, argv);
return EXIT_SUCCESS;
}
Loading