diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index a2bcda31..cede80f3 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -6,4 +6,5 @@ add_subdirectory(geo) add_subdirectory(pr) add_subdirectory(ppr) add_subdirectory(bc) +add_subdirectory(kcore) # end /* Add examples' subdirectories */ diff --git a/examples/kcore/CMakeLists.txt b/examples/kcore/CMakeLists.txt new file mode 100644 index 00000000..7cd1ce0b --- /dev/null +++ b/examples/kcore/CMakeLists.txt @@ -0,0 +1,21 @@ +# begin /* Set the application name. */ +set(APPLICATION_NAME kcore) +# end /* Set the application name. */ + +# begin /* Add CUDA executables */ +add_executable(${APPLICATION_NAME}) + +set(SOURCE_LIST + ${APPLICATION_NAME}.cu +) + +target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST}) +target_link_libraries(${APPLICATION_NAME} PRIVATE essentials) +get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES) +set_target_properties(${APPLICATION_NAME} + PROPERTIES + CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} +) # XXX: Find a better way to inherit essentials properties. + +message("-- Example Added: ${APPLICATION_NAME}") +# end /* Add CUDA executables */ diff --git a/examples/kcore/kcore.cu b/examples/kcore/kcore.cu new file mode 100644 index 00000000..bb89e5b8 --- /dev/null +++ b/examples/kcore/kcore.cu @@ -0,0 +1,102 @@ +#include +#include +#include "kcore_cpu.hxx" + +using namespace gunrock; +using namespace memory; + +void test_kcore(int num_arguments, char** argument_array) { + if (num_arguments != 2) { + std::cerr << "usage: ./bin/ filename.mtx" << std::endl; + exit(1); + } + + // -- + // Define types + // Specify the types that will be used for + // - vertex ids (vertex_t) + // - edge offsets (edge_t) + // - edge weights (weight_t) + + using vertex_t = int; + using edge_t = int; + using weight_t = float; + + // -- + // IO + + // Filename to be read + std::string filename = argument_array[1]; + + // Load the matrix-market dataset into csr format. + io::matrix_market_t mm; + using csr_t = format::csr_t; + csr_t csr; + csr.from_coo(mm.load(filename)); + + // -- + // Build graph + + // Convert the dataset you loaded into an `essentials` graph. + // `memory_space_t::device` -> the graph will be created on the GPU. + // `graph::view_t::csr` -> your input data is in `csr` format. + // + // Note that `graph::build::from_csr` expects pointers, but the `csr` data arrays + // are `thrust` vectors, so we need to unwrap them w/ `.data().get()`. + auto G = graph::build::from_csr( + csr.number_of_rows, + csr.number_of_columns, + csr.number_of_nonzeros, + csr.row_offsets.data().get(), + csr.column_indices.data().get(), + csr.nonzero_values.data().get() + ); + + std::cout << "G.get_number_of_vertices() : " << G.get_number_of_vertices() << std::endl; + std::cout << "G.get_number_of_edges() : " << G.get_number_of_edges() << std::endl; + std::cout << "G.is_directed() : " << G.is_directed() << std::endl; + + // -- + // Params and memory allocation + + // Initialize a `thrust::device_vector` of length `n_vertices` for k-core values + vertex_t n_vertices = G.get_number_of_vertices(); + thrust::device_vector k_cores(n_vertices); + + // -- + // GPU Run + + float gpu_elapsed = gunrock::kcore::run(G, k_cores.data().get()); + + // -- + // CPU Run + + thrust::host_vector h_k_cores(n_vertices); + + float cpu_elapsed = kcore_cpu::run(csr, h_k_cores.data()); + + int n_errors = kcore_cpu::compute_error(k_cores, h_k_cores); + + // -- + // Log + Validate + + std::cout << "GPU k-core values[:40] = "; + gunrock::print::head(k_cores, 40); + + std::cout << "CPU k-core values[:40] = "; + gunrock::print::head(h_k_cores, 40); + + + // Print runtime returned by `gunrock::kcore::run` + // This will just be the GPU runtime of the "region of interest", and will ignore any + // setup/teardown code. + std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl; + std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl; + std::cout << "Number of errors : " << n_errors << std::endl; + +} + +// Main method, wrapping test function +int main(int argc, char** argv) { + test_kcore(argc, argv); +} diff --git a/examples/kcore/kcore_cpu.hxx b/examples/kcore/kcore_cpu.hxx new file mode 100644 index 00000000..ca79b253 --- /dev/null +++ b/examples/kcore/kcore_cpu.hxx @@ -0,0 +1,77 @@ +#pragma once + +#include + +namespace kcore_cpu { + +template +float run(csr_t& csr, + int* k_cores) { + thrust::host_vector row_offsets(csr.row_offsets); // Copy data to CPU + thrust::host_vector column_indices(csr.column_indices); + thrust::host_vector nonzero_values(csr.nonzero_values); + + //Initialize data + std::vector remaining; + std::vector remaining_buff; + std::vector to_be_deleted; + thrust::fill(k_cores, k_cores + csr.number_of_rows, 0); + thrust::host_vector degrees(csr.number_of_rows); + for (int v = 0; v < csr.number_of_rows; v++) { + degrees[v] = row_offsets[v + 1] - row_offsets[v]; + if (degrees[v] != 0) { + remaining.push_back(v); + } + } + + auto t_start = std::chrono::high_resolution_clock::now(); + + for (int k = 1; k < csr.number_of_rows; k++) { + while (true) { + //delete vertices with degree <= k + for (auto v : remaining) { + if (degrees[v] <= k) { + k_cores[v] = k; + to_be_deleted.push_back(v); + } + else { + remaining_buff.push_back(v); + } + } + remaining.swap(remaining_buff); + remaining_buff.clear(); + if (to_be_deleted.empty()) break; //increment k when all vertices have degree > k + //decrement degree of deleted vertices' neighbors + for (auto v : to_be_deleted) { + vertex_t start = row_offsets[v]; + vertex_t end = row_offsets[v + 1]; + for (vertex_t i = start; i < end; i++) { + degrees[column_indices[i]]--; + } + } + to_be_deleted.clear(); + } + if (remaining.empty()) break; //stop when graph is empty + } + + auto t_stop = std::chrono::high_resolution_clock::now(); + auto elapsed = std::chrono::duration_cast(t_stop - t_start).count(); + return (float)elapsed / 1000; + +} + +template +int compute_error(thrust::device_vector _gpu_result, + thrust::host_vector cpu_result) { + thrust::host_vector gpu_result(_gpu_result); + + int n_errors = 0; + for (int i = 0; i < cpu_result.size(); i++) { + if (gpu_result[i] != cpu_result[i]) { + n_errors++; + } + } + return n_errors; +} + +} //namespace kcore_cpu diff --git a/include/gunrock/algorithms/kcore.hxx b/include/gunrock/algorithms/kcore.hxx new file mode 100644 index 00000000..b5d4a5b6 --- /dev/null +++ b/include/gunrock/algorithms/kcore.hxx @@ -0,0 +1,282 @@ +/** + * @file kcore.hxx + * @author Afton Geil (angeil@ucdavis.edu) + * @brief Vertex k-core decomposition algorithm. + * @version 0.1 + * @date 2021-05-03 + * + * @copyright Copyright (c) 2021 + * + */ + +#pragma once + +#include +#include + +namespace gunrock { +namespace kcore { + +//template +//struct param_t { + // No parameters for this algorithm +//}; + +template +struct result_t { + int* k_cores; + result_t(int* _k_cores) : k_cores(_k_cores) {} +}; + +template +struct problem_t : gunrock::problem_t { + result_type result; + + problem_t( + graph_t& G, + result_type& _result, + std::shared_ptr _context + ) : gunrock::problem_t(G, _context), result(_result) {} + + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + // Create a data structure that is internal to the application, and will not be returned to + // the user. + thrust::device_vector degrees; + thrust::device_vector deleted; + thrust::device_vector to_be_deleted; + + // `init` function, described above. This should be called once, when `problem` gets instantiated. + void init() { + // Get the graph + auto g = this->get_graph(); + + // Get number of vertices from the graph + auto n_vertices = g.get_number_of_vertices(); + + // Set the size of `degrees`, `deleted`, and `to_be_deleted` (`thrust` function) + degrees.resize(n_vertices); + deleted.resize(n_vertices); + to_be_deleted.resize(n_vertices); + } + + // `reset` function, described above. Should be called + // - after init, when `problem` is instantiated + // - between subsequent application runs, e.g. when you change the parameters + void reset() { + auto g = this->get_graph(); + + auto k_cores = this->result.k_cores; + auto n_vertices = g.get_number_of_vertices(); + + // set `k_cores`, `deleted`, and `to_be_deleted` to 0 for all vertices + thrust::fill( + thrust::device, + k_cores + 0, + k_cores + n_vertices, + 0 + ); + + thrust::fill( + thrust::device, + to_be_deleted.begin(), + to_be_deleted.end(), + 0 + ); + + //set initial `degrees` values to be vertices' actual degree + //will reduce these as vertices are removed from k-cores with increasing k value + auto get_degree = [=] __device__(const int& i) -> int { + return g.get_number_of_neighbors(i); + }; + + thrust::transform(thrust::counting_iterator(0), + thrust::counting_iterator(n_vertices), + degrees.begin(), get_degree); + + //mark zero degree vertices as deleted + auto degrees_data = degrees.data().get(); + auto mark_zero_degrees = [=] __device__(const int& i) -> bool { + if ((degrees_data[i]) == 0) { + return true; + } + else { + return false; + } + }; + + thrust::transform(thrust::device, thrust::counting_iterator(0), + thrust::counting_iterator(n_vertices), + deleted.begin(), mark_zero_degrees); + + } +}; + +template +struct enactor_t : gunrock::enactor_t { + using gunrock::enactor_t::enactor_t; + + using vertex_t = typename problem_t::vertex_t; + using edge_t = typename problem_t::edge_t; + using weight_t = typename problem_t::weight_t; + + // How to initialize the frontier at the beginning of the application. + // In this case, we want all vertices in the initial frontier + void prepare_frontier(frontier_t* f, cuda::multi_context_t& context) override { + // get pointer to the problem + auto P = this->get_problem(); + auto n_vertices = P->get_graph().get_number_of_vertices(); + + // Fill the frontier with a sequence of vertices from 0 -> n_vertices. + f->sequence((vertex_t)0, n_vertices, context.get_context(0)->stream()); + } + + // One iteration of the application + void loop(cuda::multi_context_t& context) override { + + auto E = this->get_enactor(); + auto P = this->get_problem(); + auto G = P->get_graph(); + + // Get parameters and data structures + auto k_cores = P->result.k_cores; + auto degrees = P->degrees.data().get(); + auto deleted = P->deleted.data().get(); + auto to_be_deleted = P->to_be_deleted.data().get(); + auto n_vertices = G.get_number_of_vertices(); + auto f = this->get_input_frontier(); + + // Get current iteration of application + auto k = this->iteration + 1; + + // Mark vertices with degree <= k for deletion and output their neighbors + auto advance_op = [degrees, k_cores, k, deleted, to_be_deleted] __host__ __device__( + vertex_t const& source, // source of edge + vertex_t const& neighbor, // destination of edge + edge_t const& edge, // id of edge + weight_t const& weight // weight of edge + ) -> bool { + + if (deleted[source] == true) { + return false; + } + + if (degrees[source] > k) { + return false; + } + + else{ + k_cores[source] = k; + to_be_deleted[source] = true; + if (deleted[neighbor] == true) { + return false; + } + return true; + } + + }; + + // Reduce degrees of deleted vertices' neighbors + // Check updated degree against k + auto filter_op = [degrees, k_cores, k, deleted] __host__ __device__( + vertex_t const& vertex + ) -> bool { + + if (deleted[vertex] == true) { + return false; + } + + int old_degrees = math::atomic::add(°rees[vertex], -1); + + if (old_degrees != (k + 1)) { + return false; + } + + else { + return true; + } + + }; + + while(!f->is_empty()) { + + // Execute advance operator + // + operators::advance::execute( + G, E, advance_op, context); + + //Mark to-be-deleted vertices as deleted + auto mark_deleted = [=] __device__(const int& i) -> bool { + return deleted[i] | to_be_deleted[i]; + }; + + thrust::transform(thrust::device, thrust::counting_iterator(0), + thrust::counting_iterator(n_vertices), + P->deleted.begin(), mark_deleted); + + // Execute filter operator + operators::filter::execute( + G, E, filter_op, context); + + } + + } + + virtual bool is_converged(cuda::multi_context_t& context) { + auto P = this->get_problem(); + auto G = P->get_graph(); + auto n_vertices = G.get_number_of_vertices(); + auto f = this->get_input_frontier(); + + // Check if all vertices have been removed from graph + bool graph_empty = thrust::all_of(thrust::device, P->deleted.begin(), P->deleted.end(), thrust::identity()); + + if (graph_empty) { + printf("degeneracy = %u\n", this->iteration); + } + + // Fill the frontier with a sequence of vertices from 0 -> n_vertices. + f->sequence((vertex_t)0, n_vertices, context.get_context(0)->stream()); + + return graph_empty; + } +}; + +template +float run(graph_t& G, + int* k_cores // Output +) { + using vertex_t = typename graph_t::vertex_type; + using weight_t = typename graph_t::weight_type; + + // instantiate `result` template + using result_type = result_t; + + // initialize `result` w/ the appropriate parameters / data structures + result_type result(k_cores); + + // Context for application (eg, GPU + CUDA stream it will be executed on) + auto multi_context = + std::shared_ptr(new cuda::multi_context_t(0)); + + // instantiate `problem` and `enactor` templates. + using problem_type = problem_t; + using enactor_type = enactor_t; + + // initialize problem; call `init` and `reset` to prepare data structures + problem_type problem(G, result, multi_context); + problem.init(); + problem.reset(); + + // initialize enactor; call enactor, returning GPU elapsed time + enactor_type enactor(&problem, multi_context); + return enactor.enact(); +} + +} // namespace kcore +} // namespace gunrock