From e8290938e157ddd13b70e6757e43e0f24b512067 Mon Sep 17 00:00:00 2001 From: Afton Geil Date: Mon, 10 May 2021 10:15:08 -0700 Subject: [PATCH 1/2] k-core application --- CMakeLists.txt | 3 +- examples/CMakeLists.txt | 1 + examples/kcore/CMakeLists.txt | 21 ++ examples/kcore/kcore.cu | 92 ++++++++ include/gunrock/applications/kcore.hxx | 294 +++++++++++++++++++++++++ 5 files changed, 410 insertions(+), 1 deletion(-) create mode 100644 examples/kcore/CMakeLists.txt create mode 100644 examples/kcore/kcore.cu create mode 100644 include/gunrock/applications/kcore.hxx diff --git a/CMakeLists.txt b/CMakeLists.txt index d0d8eb2f..6f5a1685 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,4 +157,5 @@ option(ESSENTIALS_BUILD_TESTS # Subdirectories for examples, testing and documentation if(ESSENTIALS_BUILD_TESTS) add_subdirectory(unittests) -endif(ESSENTIALS_BUILD_TESTS) \ No newline at end of file +endif(ESSENTIALS_BUILD_TESTS) + diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 02a9e70d..732ca18a 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,4 +4,5 @@ add_subdirectory(bfs) add_subdirectory(color) add_subdirectory(geo) add_subdirectory(pr) +add_subdirectory(kcore) # end /* Add examples' subdirectories */ diff --git a/examples/kcore/CMakeLists.txt b/examples/kcore/CMakeLists.txt new file mode 100644 index 00000000..7cd1ce0b --- /dev/null +++ b/examples/kcore/CMakeLists.txt @@ -0,0 +1,21 @@ +# begin /* Set the application name. */ +set(APPLICATION_NAME kcore) +# end /* Set the application name. */ + +# begin /* Add CUDA executables */ +add_executable(${APPLICATION_NAME}) + +set(SOURCE_LIST + ${APPLICATION_NAME}.cu +) + +target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST}) +target_link_libraries(${APPLICATION_NAME} PRIVATE essentials) +get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES) +set_target_properties(${APPLICATION_NAME} + PROPERTIES + CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} +) # XXX: Find a better way to inherit essentials properties. + +message("-- Example Added: ${APPLICATION_NAME}") +# end /* Add CUDA executables */ diff --git a/examples/kcore/kcore.cu b/examples/kcore/kcore.cu new file mode 100644 index 00000000..d72cd237 --- /dev/null +++ b/examples/kcore/kcore.cu @@ -0,0 +1,92 @@ +#include +#include + +using namespace gunrock; +using namespace memory; + +void test_kcore(int num_arguments, char** argument_array) { + if (num_arguments != 2) { + std::cerr << "usage: ./bin/ filename.mtx" << std::endl; + exit(1); + } + + // -- + // Define types + // Specify the types that will be used for + // - vertex ids (vertex_t) + // - edge offsets (edge_t) + // - edge weights (weight_t) + + using vertex_t = int; + using edge_t = int; + using weight_t = float; + + // -- + // IO + + // Filename to be read + std::string filename = argument_array[1]; + + // Load the matrix-market dataset into csr format. + io::matrix_market_t mm; + using csr_t = format::csr_t; + csr_t csr; + csr.from_coo(mm.load(filename)); + + // -- + // Build graph + + // Convert the dataset you loaded into an `essentials` graph. + // `memory_space_t::device` -> the graph will be created on the GPU. + // `graph::view_t::csr` -> your input data is in `csr` format. + // + // Note that `graph::build::from_csr` expects pointers, but the `csr` data arrays + // are `thrust` vectors, so we need to unwrap them w/ `.data().get()`. + auto G = graph::build::from_csr( + csr.number_of_rows, + csr.number_of_columns, + csr.number_of_nonzeros, + csr.row_offsets.data().get(), + csr.column_indices.data().get(), + csr.nonzero_values.data().get() + ); + + std::cout << "G.get_number_of_vertices() : " << G.get_number_of_vertices() << std::endl; + std::cout << "G.get_number_of_edges() : " << G.get_number_of_edges() << std::endl; + + // -- + // Params and memory allocation + + // Initialize a `thrust::device_vector` of length `n_vertices` for k-core values + vertex_t n_vertices = G.get_number_of_vertices(); + thrust::device_vector k_cores(n_vertices); + + // -- + // GPU Run + + float gpu_elapsed = gunrock::kcore::run(G, k_cores.data().get()); + + // -- + // CPU Run + + + // -- + // Log + Validate + + // Use a fancy thrust function to print the results to the command line + // Note, if your graph is big you might not want to print this whole thing + std::cout << "GPU k-Core Values (output) = "; + thrust::copy(k_cores.begin(), k_cores.end(), std::ostream_iterator(std::cout, " ")); + std::cout << std::endl; + + // Print runtime returned by `gunrock::my_sssp::run` + // This will just be the GPU runtime of the "region of interest", and will ignore any + // setup/teardown code. + std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl; + +} + +// Main method, wrapping test function +int main(int argc, char** argv) { + test_kcore(argc, argv); +} diff --git a/include/gunrock/applications/kcore.hxx b/include/gunrock/applications/kcore.hxx new file mode 100644 index 00000000..a3069f0e --- /dev/null +++ b/include/gunrock/applications/kcore.hxx @@ -0,0 +1,294 @@ +/** + * @file kcore.hxx + * @author Afton Geil (angeil@ucdavis.edu) + * @brief Vertex k-core decomposition algorithm. + * @version 0.1 + * @date 2021-05-03 + * + * @copyright Copyright (c) 2021 + * + */ + +#pragma once + +#include +#include + +namespace gunrock { +namespace kcore { + +//template +//struct param_t { + // No parameters for this algorithm +//}; + +template +struct result_t { + int* k_cores; + result_t(int* _k_cores) : k_cores(_k_cores) {} +}; + +//template +template +struct problem_t : gunrock::problem_t { + //param_type param; + result_type result; + + problem_t( + graph_t& G, + //param_type& _param, + result_type& _result, + std::shared_ptr _context + ) : gunrock::problem_t(G, _context), result(_result) {} + //) : gunrock::problem_t(G, _context), param(_param), result(_result) {} + + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + // Create a data structure that is internal to the application, and will not be returned to + // the user. + thrust::device_vector degrees; + thrust::device_vector deleted; + thrust::device_vector to_be_deleted; + + // `init` function, described above. This should be called once, when `problem` gets instantiated. + void init() { + // Get the graph + auto g = this->get_graph(); + + // Get number of vertices from the graph + auto n_vertices = g.get_number_of_vertices(); + + // Set the size of `degrees`, `deleted`, and `to_be_deleted` (`thrust` function) + degrees.resize(n_vertices); + deleted.resize(n_vertices); + to_be_deleted.resize(n_vertices); + } + + // `reset` function, described above. Should be called + // - after init, when `problem` is instantiated + // - between subsequent application runs, e.g. when you change the parameters + void reset() { + auto g = this->get_graph(); + + auto k_cores = this->result.k_cores; + auto n_vertices = g.get_number_of_vertices(); + + // set `k_cores`, `deleted`, and `to_be_deleted` to 0 for all vertices + thrust::fill( + thrust::device, + k_cores + 0, + k_cores + n_vertices, + 0 + ); + + thrust::fill( + thrust::device, + deleted.begin(), + deleted.end(), + 0 + ); + + thrust::fill( + thrust::device, + to_be_deleted.begin(), + to_be_deleted.end(), + 0 + ); + + //set initial `degrees` values to be vertices' actual degree + //will reduce these as vertices are removed from k-cores with increasing k value + auto get_degree = [=] __device__(const int& i) -> int { + return g.get_number_of_neighbors(i); + }; + + thrust::transform(thrust::counting_iterator(0), + thrust::counting_iterator(n_vertices), + degrees.begin(), get_degree); + } +}; + +template +struct enactor_t : gunrock::enactor_t { + using gunrock::enactor_t::enactor_t; + + using vertex_t = typename problem_t::vertex_t; + using edge_t = typename problem_t::edge_t; + using weight_t = typename problem_t::weight_t; + + // How to initialize the frontier at the beginning of the application. + // In this case, we want all vertices in the initial frontier + void prepare_frontier(frontier_t* f, cuda::multi_context_t& context) override { + // get pointer to the problem + auto P = this->get_problem(); + auto n_vertices = P->get_graph().get_number_of_vertices(); + + // Fill the frontier with a sequence of vertices from 0 -> n_vertices. + f->sequence((vertex_t)0, n_vertices, context.get_context(0)->stream()); + } + + // One iteration of the application + void loop(cuda::multi_context_t& context) override { + + auto E = this->get_enactor(); + auto P = this->get_problem(); + auto G = P->get_graph(); + + // Get parameters and data structures + // Note that `P->visited` is a thrust vector, so we need to unwrap again + //auto single_source = P->param.single_source; + //auto distances = P->result.distances; + //auto visited = P->visited.data().get(); + + auto k_cores = P->result.k_cores; + auto degrees = P->degrees.data().get(); + auto deleted = P->deleted.data().get(); + auto to_be_deleted = P->to_be_deleted.data().get(); + + auto n_vertices = G.get_number_of_vertices(); + auto f = this->get_input_frontier(); + + // Get current iteration of application + auto k = this->iteration + 1; + + // Mark vertices with degree <= k for deletion and output their neighbors + auto advance_op = [degrees, k_cores, k, deleted, to_be_deleted] __host__ __device__( + vertex_t const& source, // source of edge + vertex_t const& neighbor, // destination of edge + edge_t const& edge, // id of edge + weight_t const& weight // weight of edge + ) -> bool { + + if (deleted[source] == true) { + return false; + } + + if (degrees[source] > k) { + return false; + } + + else{ + k_cores[source] = k; + to_be_deleted[source] = true; + if (deleted[neighbor] == true) { + return false; + } + return true; + } + + }; + + // Reduce degrees of deleted vertices' neighbors + // Check updated degree against k + auto filter_op = [degrees, k_cores, k, deleted] __host__ __device__( + vertex_t const& vertex + ) -> bool { + + if (deleted[vertex] == true) { + return false; + } + + int old_degrees = math::atomic::add(°rees[vertex], -1); + + if (old_degrees != (k + 1)) { + return false; + } + + else { + return true; + } + + }; + + while(!f->is_empty()) { + + printf("frontier size before advance, iteration %u: %u\n", k, f->get_number_of_elements()); + + // Execute advance operator + operators::advance::execute( + G, E, advance_op, context); + + printf("frontier size after advance, iteration %u: %u\n", k, f->get_number_of_elements()); + + //Mark to-be-deleted vertices as deleted + auto mark_deleted = [=] __device__(const int& i) -> bool { + return deleted[i] | to_be_deleted[i]; + }; + + thrust::transform(thrust::device, thrust::counting_iterator(0), + thrust::counting_iterator(n_vertices), + P->deleted.begin(), mark_deleted); + + // Execute filter operator + operators::filter::execute( + G, E, filter_op, context); + + printf("frontier size after filter, iteration %u: %u\n", k, f->get_number_of_elements()); + + } + + } + + virtual bool is_converged(cuda::multi_context_t& context) { + auto P = this->get_problem(); + auto G = P->get_graph(); + auto n_vertices = G.get_number_of_vertices(); + auto f = this->get_input_frontier(); + + // Check if all vertices have been removed from graph + bool graph_empty = thrust::all_of(thrust::device, P->deleted.begin(), P->deleted.end(), thrust::identity()); + printf("no vertices remaining? %u\n", graph_empty); + + // Fill the frontier with a sequence of vertices from 0 -> n_vertices. + f->sequence((vertex_t)0, n_vertices, context.get_context(0)->stream()); + + bool timeout = (this->iteration >= 10); //TODO: remove + + return graph_empty || timeout; + } +}; + +template +float run(graph_t& G, + int* k_cores // Output +) { + using vertex_t = typename graph_t::vertex_type; + using weight_t = typename graph_t::weight_type; + + // instantiate `param` and `result` templates + //using param_type = param_t; + using result_type = result_t; + + // initialize `param` and `result` w/ the appropriate parameters / data structures + //param_type param(); + result_type result(k_cores); + + // This code probably should be the same across all applications, + // unless maybe you're doing something like multi-gpu / concurrent function calls + + // Context for application (eg, GPU + CUDA stream it will be executed on) + auto multi_context = + std::shared_ptr(new cuda::multi_context_t(0)); + + // instantiate `problem` and `enactor` templates. + //using problem_type = problem_t; + using problem_type = problem_t; + using enactor_type = enactor_t; + + // initialize problem; call `init` and `reset` to prepare data structures + //problem_type problem(G, param, result, multi_context); + problem_type problem(G, result, multi_context); + problem.init(); + problem.reset(); + + // initialize enactor; call enactor, returning GPU elapsed time + enactor_type enactor(&problem, multi_context); + return enactor.enact(); + // +} + +} // namespace kcore +} // namespace gunrock From c7745d79326e10b298752c9fe0266a9f20fff6fd Mon Sep 17 00:00:00 2001 From: Afton Geil Date: Mon, 24 May 2021 13:31:39 -0700 Subject: [PATCH 2/2] add k-core cpu comparison --- examples/kcore/kcore.cu | 22 ++++++-- examples/kcore/kcore_cpu.hxx | 77 ++++++++++++++++++++++++++ include/gunrock/applications/kcore.hxx | 60 ++++++++------------ 3 files changed, 116 insertions(+), 43 deletions(-) create mode 100644 examples/kcore/kcore_cpu.hxx diff --git a/examples/kcore/kcore.cu b/examples/kcore/kcore.cu index d72cd237..e2505b7e 100644 --- a/examples/kcore/kcore.cu +++ b/examples/kcore/kcore.cu @@ -1,5 +1,6 @@ #include #include +#include "kcore_cpu.hxx" using namespace gunrock; using namespace memory; @@ -53,6 +54,7 @@ void test_kcore(int num_arguments, char** argument_array) { std::cout << "G.get_number_of_vertices() : " << G.get_number_of_vertices() << std::endl; std::cout << "G.get_number_of_edges() : " << G.get_number_of_edges() << std::endl; + std::cout << "G.is_directed() : " << G.is_directed() << std::endl; // -- // Params and memory allocation @@ -69,20 +71,28 @@ void test_kcore(int num_arguments, char** argument_array) { // -- // CPU Run + thrust::host_vector h_k_cores(n_vertices); + + float cpu_elapsed = kcore_cpu::run(csr, h_k_cores.data()); + + int n_errors = kcore_cpu::compute_error(k_cores, h_k_cores); // -- // Log + Validate - // Use a fancy thrust function to print the results to the command line - // Note, if your graph is big you might not want to print this whole thing - std::cout << "GPU k-Core Values (output) = "; - thrust::copy(k_cores.begin(), k_cores.end(), std::ostream_iterator(std::cout, " ")); - std::cout << std::endl; + std::cout << "GPU k-core values[:40] = "; + gunrock::print::head(k_cores, 40); + + std::cout << "CPU k-core values[:40] = "; + gunrock::print::head(h_k_cores, 40); + - // Print runtime returned by `gunrock::my_sssp::run` + // Print runtime returned by `gunrock::kcore::run` // This will just be the GPU runtime of the "region of interest", and will ignore any // setup/teardown code. std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl; + std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl; + std::cout << "Number of errors : " << n_errors << std::endl; } diff --git a/examples/kcore/kcore_cpu.hxx b/examples/kcore/kcore_cpu.hxx new file mode 100644 index 00000000..ca79b253 --- /dev/null +++ b/examples/kcore/kcore_cpu.hxx @@ -0,0 +1,77 @@ +#pragma once + +#include + +namespace kcore_cpu { + +template +float run(csr_t& csr, + int* k_cores) { + thrust::host_vector row_offsets(csr.row_offsets); // Copy data to CPU + thrust::host_vector column_indices(csr.column_indices); + thrust::host_vector nonzero_values(csr.nonzero_values); + + //Initialize data + std::vector remaining; + std::vector remaining_buff; + std::vector to_be_deleted; + thrust::fill(k_cores, k_cores + csr.number_of_rows, 0); + thrust::host_vector degrees(csr.number_of_rows); + for (int v = 0; v < csr.number_of_rows; v++) { + degrees[v] = row_offsets[v + 1] - row_offsets[v]; + if (degrees[v] != 0) { + remaining.push_back(v); + } + } + + auto t_start = std::chrono::high_resolution_clock::now(); + + for (int k = 1; k < csr.number_of_rows; k++) { + while (true) { + //delete vertices with degree <= k + for (auto v : remaining) { + if (degrees[v] <= k) { + k_cores[v] = k; + to_be_deleted.push_back(v); + } + else { + remaining_buff.push_back(v); + } + } + remaining.swap(remaining_buff); + remaining_buff.clear(); + if (to_be_deleted.empty()) break; //increment k when all vertices have degree > k + //decrement degree of deleted vertices' neighbors + for (auto v : to_be_deleted) { + vertex_t start = row_offsets[v]; + vertex_t end = row_offsets[v + 1]; + for (vertex_t i = start; i < end; i++) { + degrees[column_indices[i]]--; + } + } + to_be_deleted.clear(); + } + if (remaining.empty()) break; //stop when graph is empty + } + + auto t_stop = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(t_stop - t_start).count(); + return (float)elapsed / 1000; + +} + +template +int compute_error(thrust::device_vector _gpu_result, + thrust::host_vector cpu_result) { + thrust::host_vector gpu_result(_gpu_result); + + int n_errors = 0; + for (int i = 0; i < cpu_result.size(); i++) { + if (gpu_result[i] != cpu_result[i]) { + n_errors++; + } + } + return n_errors; +} + +} //namespace kcore_cpu diff --git a/include/gunrock/applications/kcore.hxx b/include/gunrock/applications/kcore.hxx index a3069f0e..b0165b6a 100644 --- a/include/gunrock/applications/kcore.hxx +++ b/include/gunrock/applications/kcore.hxx @@ -28,19 +28,15 @@ struct result_t { result_t(int* _k_cores) : k_cores(_k_cores) {} }; -//template template struct problem_t : gunrock::problem_t { - //param_type param; result_type result; problem_t( graph_t& G, - //param_type& _param, result_type& _result, std::shared_ptr _context ) : gunrock::problem_t(G, _context), result(_result) {} - //) : gunrock::problem_t(G, _context), param(_param), result(_result) {} using vertex_t = typename graph_t::vertex_type; using edge_t = typename graph_t::edge_type; @@ -83,13 +79,6 @@ struct problem_t : gunrock::problem_t { 0 ); - thrust::fill( - thrust::device, - deleted.begin(), - deleted.end(), - 0 - ); - thrust::fill( thrust::device, to_be_deleted.begin(), @@ -106,6 +95,22 @@ struct problem_t : gunrock::problem_t { thrust::transform(thrust::counting_iterator(0), thrust::counting_iterator(n_vertices), degrees.begin(), get_degree); + + //mark zero degree vertices as deleted + auto degrees_data = degrees.data().get(); + auto mark_zero_degrees = [=] __device__(const int& i) -> bool { + if ((degrees_data[i]) == 0) { + return true; + } + else { + return false; + } + }; + + thrust::transform(thrust::device, thrust::counting_iterator(0), + thrust::counting_iterator(n_vertices), + deleted.begin(), mark_zero_degrees); + } }; @@ -136,16 +141,10 @@ struct enactor_t : gunrock::enactor_t { auto G = P->get_graph(); // Get parameters and data structures - // Note that `P->visited` is a thrust vector, so we need to unwrap again - //auto single_source = P->param.single_source; - //auto distances = P->result.distances; - //auto visited = P->visited.data().get(); - auto k_cores = P->result.k_cores; auto degrees = P->degrees.data().get(); auto deleted = P->deleted.data().get(); auto to_be_deleted = P->to_be_deleted.data().get(); - auto n_vertices = G.get_number_of_vertices(); auto f = this->get_input_frontier(); @@ -203,16 +202,12 @@ struct enactor_t : gunrock::enactor_t { while(!f->is_empty()) { - printf("frontier size before advance, iteration %u: %u\n", k, f->get_number_of_elements()); - // Execute advance operator operators::advance::execute( G, E, advance_op, context); - printf("frontier size after advance, iteration %u: %u\n", k, f->get_number_of_elements()); - //Mark to-be-deleted vertices as deleted auto mark_deleted = [=] __device__(const int& i) -> bool { return deleted[i] | to_be_deleted[i]; @@ -226,8 +221,6 @@ struct enactor_t : gunrock::enactor_t { operators::filter::execute( G, E, filter_op, context); - printf("frontier size after filter, iteration %u: %u\n", k, f->get_number_of_elements()); - } } @@ -240,14 +233,15 @@ struct enactor_t : gunrock::enactor_t { // Check if all vertices have been removed from graph bool graph_empty = thrust::all_of(thrust::device, P->deleted.begin(), P->deleted.end(), thrust::identity()); - printf("no vertices remaining? %u\n", graph_empty); + + if (graph_empty) { + printf("degeneracy = %u\n", this->iteration); + } // Fill the frontier with a sequence of vertices from 0 -> n_vertices. f->sequence((vertex_t)0, n_vertices, context.get_context(0)->stream()); - bool timeout = (this->iteration >= 10); //TODO: remove - - return graph_empty || timeout; + return graph_empty; } }; @@ -258,28 +252,21 @@ float run(graph_t& G, using vertex_t = typename graph_t::vertex_type; using weight_t = typename graph_t::weight_type; - // instantiate `param` and `result` templates - //using param_type = param_t; + // instantiate `result` template using result_type = result_t; - // initialize `param` and `result` w/ the appropriate parameters / data structures - //param_type param(); + // initialize `result` w/ the appropriate parameters / data structures result_type result(k_cores); - // This code probably should be the same across all applications, - // unless maybe you're doing something like multi-gpu / concurrent function calls - // Context for application (eg, GPU + CUDA stream it will be executed on) auto multi_context = std::shared_ptr(new cuda::multi_context_t(0)); // instantiate `problem` and `enactor` templates. - //using problem_type = problem_t; using problem_type = problem_t; using enactor_type = enactor_t; // initialize problem; call `init` and `reset` to prepare data structures - //problem_type problem(G, param, result, multi_context); problem_type problem(G, result, multi_context); problem.init(); problem.reset(); @@ -287,7 +274,6 @@ float run(graph_t& G, // initialize enactor; call enactor, returning GPU elapsed time enactor_type enactor(&problem, multi_context); return enactor.enact(); - // } } // namespace kcore