diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index a2bcda31..cede80f3 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -6,4 +6,5 @@ add_subdirectory(geo)
 add_subdirectory(pr)
 add_subdirectory(ppr)
 add_subdirectory(bc)
+add_subdirectory(kcore)
 # end /* Add examples' subdirectories */
diff --git a/examples/kcore/CMakeLists.txt b/examples/kcore/CMakeLists.txt
new file mode 100644
index 00000000..7cd1ce0b
--- /dev/null
+++ b/examples/kcore/CMakeLists.txt
@@ -0,0 +1,21 @@
+# begin /* Set the application name. */
+set(APPLICATION_NAME kcore)
+# end /* Set the application name. */
+
+# begin /* Add CUDA executables */
+add_executable(${APPLICATION_NAME})
+
+set(SOURCE_LIST 
+    ${APPLICATION_NAME}.cu
+)
+
+target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST})
+target_link_libraries(${APPLICATION_NAME} PRIVATE essentials)
+get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES)
+set_target_properties(${APPLICATION_NAME} 
+    PROPERTIES 
+        CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
+) # XXX: Find a better way to inherit essentials properties.
+
+message("-- Example Added: ${APPLICATION_NAME}")
+# end /* Add CUDA executables */
diff --git a/examples/kcore/kcore.cu b/examples/kcore/kcore.cu
new file mode 100644
index 00000000..bb89e5b8
--- /dev/null
+++ b/examples/kcore/kcore.cu
@@ -0,0 +1,102 @@
+#include <gunrock/algorithms/algorithms.hxx>
+#include <gunrock/algorithms/kcore.hxx>
+#include "kcore_cpu.hxx"
+
+using namespace gunrock;
+using namespace memory;
+
+void test_kcore(int num_arguments, char** argument_array) {
+  if (num_arguments != 2) {
+    std::cerr << "usage: ./bin/<program-name> filename.mtx" << std::endl;
+    exit(1);
+  }
+
+  // --
+  // Define types
+  // Specify the types that will be used for
+  // - vertex ids (vertex_t)
+  // - edge offsets (edge_t)
+  // - edge weights (weight_t)
+  
+  using vertex_t = int;
+  using edge_t   = int;
+  using weight_t = float;
+
+  // --
+  // IO
+  
+  // Filename to be read
+  std::string filename = argument_array[1];
+
+  // Load the matrix-market dataset into csr format.
+  io::matrix_market_t<vertex_t, edge_t, weight_t> mm;
+  using csr_t = format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>;
+  csr_t csr;
+  csr.from_coo(mm.load(filename));
+
+  // --
+  // Build graph
+
+  // Convert the dataset you loaded into an `essentials` graph.
+  // `memory_space_t::device` -> the graph will be created on the GPU.
+  // `graph::view_t::csr`     -> your input data is in `csr` format.
+  //
+  // Note that `graph::build::from_csr` expects pointers, but the `csr` data arrays
+  // are `thrust` vectors, so we need to unwrap them w/ `.data().get()`.
+  auto G = graph::build::from_csr<memory_space_t::device, graph::view_t::csr>(
+      csr.number_of_rows,
+      csr.number_of_columns,
+      csr.number_of_nonzeros,
+      csr.row_offsets.data().get(),
+      csr.column_indices.data().get(),
+      csr.nonzero_values.data().get() 
+  );
+
+  std::cout << "G.get_number_of_vertices() : " << G.get_number_of_vertices() << std::endl;
+  std::cout << "G.get_number_of_edges()    : " << G.get_number_of_edges()    << std::endl;
+  std::cout << "G.is_directed()    : " << G.is_directed()    << std::endl;
+
+  // --
+  // Params and memory allocation
+  
+  // Initialize a `thrust::device_vector` of length `n_vertices` for k-core values
+  vertex_t n_vertices = G.get_number_of_vertices();
+  thrust::device_vector<int> k_cores(n_vertices);
+
+  // --
+  // GPU Run
+
+  float gpu_elapsed = gunrock::kcore::run(G, k_cores.data().get());
+
+  // --
+  // CPU Run
+
+  thrust::host_vector<int> h_k_cores(n_vertices);
+ 
+  float cpu_elapsed = kcore_cpu::run<csr_t, vertex_t, edge_t, weight_t>(csr, h_k_cores.data());
+
+  int n_errors = kcore_cpu::compute_error(k_cores, h_k_cores);
+
+  // --
+  // Log + Validate
+
+  std::cout << "GPU k-core values[:40] = ";
+  gunrock::print::head<int>(k_cores, 40);
+
+  std::cout << "CPU k-core values[:40] = ";
+  gunrock::print::head<int>(h_k_cores, 40);
+
+
+  // Print runtime returned by `gunrock::kcore::run`
+  // This will just be the GPU runtime of the "region of interest", and will ignore any 
+  // setup/teardown code.
+  std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl;
+  std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl;
+  std::cout << "Number of errors : " << n_errors << std::endl;
+
+}
+
+// Main method, wrapping test function
+int main(int argc, char** argv) {
+  test_kcore(argc, argv);
+}
diff --git a/examples/kcore/kcore_cpu.hxx b/examples/kcore/kcore_cpu.hxx
new file mode 100644
index 00000000..ca79b253
--- /dev/null
+++ b/examples/kcore/kcore_cpu.hxx
@@ -0,0 +1,77 @@
+#pragma once
+
+#include <chrono>
+
+namespace kcore_cpu {
+
+template <typename csr_t, typename vertex_t, typename edge_t, typename weight_t>
+float run(csr_t& csr,
+	  int* k_cores) {
+  thrust::host_vector<edge_t> row_offsets(csr.row_offsets);  // Copy data to CPU
+  thrust::host_vector<vertex_t> column_indices(csr.column_indices);
+  thrust::host_vector<weight_t> nonzero_values(csr.nonzero_values);
+
+  //Initialize data
+  std::vector<int> remaining;
+  std::vector<int> remaining_buff;
+  std::vector<int> to_be_deleted;
+  thrust::fill(k_cores, k_cores + csr.number_of_rows, 0);
+  thrust::host_vector<int> degrees(csr.number_of_rows);
+  for (int v = 0; v < csr.number_of_rows; v++) {
+    degrees[v] = row_offsets[v + 1] - row_offsets[v];
+    if (degrees[v] != 0) {
+      remaining.push_back(v);
+    }
+  }
+
+  auto t_start = std::chrono::high_resolution_clock::now();
+
+  for (int k = 1; k < csr.number_of_rows; k++) {
+    while (true) {
+      //delete vertices with degree <= k
+      for (auto v : remaining) {
+        if (degrees[v] <= k) {
+          k_cores[v] = k;
+          to_be_deleted.push_back(v);
+        }
+        else {
+          remaining_buff.push_back(v);
+        }
+      }
+      remaining.swap(remaining_buff);
+      remaining_buff.clear();
+      if (to_be_deleted.empty()) break; //increment k when all vertices have degree > k
+      //decrement degree of deleted vertices' neighbors
+      for (auto v : to_be_deleted) {
+        vertex_t start = row_offsets[v];
+        vertex_t end = row_offsets[v + 1];
+        for (vertex_t i = start; i < end; i++) {
+          degrees[column_indices[i]]--;
+        }
+      }
+      to_be_deleted.clear();
+    }
+    if (remaining.empty()) break;   //stop when graph is empty
+  }
+
+  auto t_stop = std::chrono::high_resolution_clock::now();
+  auto elapsed = std::chrono::duration_cast<std::chrono::microseconds>(t_stop - t_start).count();
+  return (float)elapsed / 1000;
+
+}
+
+template <typename val_t>
+int compute_error(thrust::device_vector<val_t> _gpu_result,
+                  thrust::host_vector<val_t> cpu_result) {
+  thrust::host_vector<val_t> gpu_result(_gpu_result);
+
+  int n_errors = 0;
+  for (int i = 0; i < cpu_result.size(); i++) {
+    if (gpu_result[i] != cpu_result[i]) {
+      n_errors++;
+    }
+  }
+  return n_errors;
+}
+
+}   //namespace kcore_cpu
diff --git a/include/gunrock/algorithms/kcore.hxx b/include/gunrock/algorithms/kcore.hxx
new file mode 100644
index 00000000..b5d4a5b6
--- /dev/null
+++ b/include/gunrock/algorithms/kcore.hxx
@@ -0,0 +1,282 @@
+/**
+ * @file kcore.hxx
+ * @author Afton Geil (angeil@ucdavis.edu)
+ * @brief Vertex k-core decomposition algorithm.
+ * @version 0.1
+ * @date 2021-05-03
+ *
+ * @copyright Copyright (c) 2021
+ *
+ */
+
+#pragma once
+
+#include <gunrock/algorithms/algorithms.hxx>
+#include <thrust/logical.h>
+
+namespace gunrock {
+namespace kcore {
+
+//template <typename vertex_t>
+//struct param_t {
+  // No parameters for this algorithm
+//};
+
+template <typename vertex_t>
+struct result_t {
+  int* k_cores;
+  result_t(int* _k_cores) : k_cores(_k_cores) {}
+};
+
+template <typename graph_t, typename result_type>
+struct problem_t : gunrock::problem_t<graph_t> {
+  result_type result;
+
+  problem_t(
+    graph_t& G,
+    result_type& _result,
+    std::shared_ptr<cuda::multi_context_t> _context
+  ) : gunrock::problem_t<graph_t>(G, _context), result(_result) {}
+
+  using vertex_t = typename graph_t::vertex_type;
+  using edge_t = typename graph_t::edge_type;
+  using weight_t = typename graph_t::weight_type;
+
+  // Create a data structure that is internal to the application, and will not be returned to
+  // the user.
+  thrust::device_vector<int> degrees;
+  thrust::device_vector<bool> deleted;
+  thrust::device_vector<bool> to_be_deleted;
+
+  // `init` function, described above.  This should be called once, when `problem` gets instantiated.
+  void init() {
+    // Get the graph
+    auto g = this->get_graph();
+    
+    // Get number of vertices from the graph
+    auto n_vertices = g.get_number_of_vertices();   
+    
+    // Set the size of `degrees`, `deleted`, and `to_be_deleted` (`thrust` function)
+    degrees.resize(n_vertices);
+    deleted.resize(n_vertices);
+    to_be_deleted.resize(n_vertices);
+  }
+
+  // `reset` function, described above.  Should be called
+  // - after init, when `problem` is instantiated
+  // - between subsequent application runs, e.g. when you change the parameters
+  void reset() {
+    auto g = this->get_graph();
+    
+    auto k_cores  = this->result.k_cores;
+    auto n_vertices = g.get_number_of_vertices();
+    
+    // set `k_cores`, `deleted`, and `to_be_deleted` to 0 for all vertices
+    thrust::fill(
+      thrust::device, 
+      k_cores + 0, 
+      k_cores + n_vertices,
+      0
+    );
+
+    thrust::fill(
+      thrust::device, 
+      to_be_deleted.begin(), 
+      to_be_deleted.end(),
+      0
+    );
+
+    //set initial `degrees` values to be vertices' actual degree
+    //will reduce these as vertices are removed from k-cores with increasing k value
+    auto get_degree = [=] __device__(const int& i) -> int {
+      return g.get_number_of_neighbors(i);
+    };
+
+    thrust::transform(thrust::counting_iterator<vertex_t>(0),
+                      thrust::counting_iterator<vertex_t>(n_vertices),
+                      degrees.begin(), get_degree);
+
+    //mark zero degree vertices as deleted
+    auto degrees_data = degrees.data().get();
+    auto mark_zero_degrees = [=] __device__(const int& i) -> bool {
+      if ((degrees_data[i]) == 0) {
+        return true;
+      }
+      else {
+        return false;
+      }
+    };
+
+    thrust::transform(thrust::device, thrust::counting_iterator<vertex_t>(0),
+                      thrust::counting_iterator<vertex_t>(n_vertices),
+                      deleted.begin(), mark_zero_degrees);
+
+  }
+};
+
+template <typename problem_t>
+struct enactor_t : gunrock::enactor_t<problem_t> {
+  using gunrock::enactor_t<problem_t>::enactor_t;
+
+  using vertex_t = typename problem_t::vertex_t;
+  using edge_t   = typename problem_t::edge_t;
+  using weight_t = typename problem_t::weight_t;
+
+  // How to initialize the frontier at the beginning of the application.
+  // In this case, we want all vertices in the initial frontier
+  void prepare_frontier(frontier_t<vertex_t>* f, cuda::multi_context_t& context) override {
+    // get pointer to the problem
+    auto P = this->get_problem();   
+    auto n_vertices = P->get_graph().get_number_of_vertices();
+
+    // Fill the frontier with a sequence of vertices from 0 -> n_vertices.
+    f->sequence((vertex_t)0, n_vertices, context.get_context(0)->stream());
+  }
+
+  // One iteration of the application
+  void loop(cuda::multi_context_t& context) override {
+
+    auto E = this->get_enactor();
+    auto P = this->get_problem();
+    auto G = P->get_graph();
+
+    // Get parameters and data structures
+    auto k_cores = P->result.k_cores;
+    auto degrees = P->degrees.data().get();
+    auto deleted = P->deleted.data().get();
+    auto to_be_deleted = P->to_be_deleted.data().get();
+    auto n_vertices = G.get_number_of_vertices();
+    auto f = this->get_input_frontier();
+
+    // Get current iteration of application
+    auto k = this->iteration + 1;
+    
+    // Mark vertices with degree <= k for deletion and output their neighbors
+    auto advance_op = [degrees, k_cores, k, deleted, to_be_deleted] __host__ __device__(
+      vertex_t const& source,    // source of edge
+      vertex_t const& neighbor,  // destination of edge
+      edge_t const& edge,        // id of edge
+      weight_t const& weight     // weight of edge
+    ) -> bool {
+
+      if (deleted[source] == true) {
+        return false;
+      }
+
+      if (degrees[source] > k) {
+        return false;
+      }
+
+      else{
+        k_cores[source] = k;
+        to_be_deleted[source] = true;
+        if (deleted[neighbor] == true) {
+            return false;
+        }
+        return true;
+      }
+
+    };
+
+    // Reduce degrees of deleted vertices' neighbors
+    // Check updated degree against k
+    auto filter_op = [degrees, k_cores, k, deleted] __host__ __device__(
+      vertex_t const& vertex
+    ) -> bool {
+
+      if (deleted[vertex] == true) {
+        return false;
+      }
+
+      int old_degrees = math::atomic::add(&degrees[vertex], -1);
+
+      if (old_degrees != (k + 1)) {
+        return false;
+      }
+
+      else {
+        return true;
+      }
+
+    };
+
+    while(!f->is_empty()) {
+
+      // Execute advance operator
+      //
+        operators::advance::execute<operators::load_balance_t::merge_path,
+                                    operators::advance_direction_t::forward,
+                                    operators::advance_io_type_t::vertices,
+                                    operators::advance_io_type_t::vertices>(
+		G, E, advance_op, context);
+
+      //Mark to-be-deleted vertices as deleted
+      auto mark_deleted = [=] __device__(const int& i) -> bool {
+        return deleted[i] | to_be_deleted[i];
+      };
+
+      thrust::transform(thrust::device, thrust::counting_iterator<vertex_t>(0),
+                        thrust::counting_iterator<vertex_t>(n_vertices),
+                        P->deleted.begin(), mark_deleted);
+
+      // Execute filter operator
+      operators::filter::execute<operators::filter_algorithm_t::predicated>(
+          G, E, filter_op, context);
+
+    }
+
+  }
+
+  virtual bool is_converged(cuda::multi_context_t& context) {
+    auto P = this->get_problem();
+    auto G = P->get_graph();
+    auto n_vertices = G.get_number_of_vertices();
+    auto f = this->get_input_frontier();
+
+    //  Check if all vertices have been removed from graph
+    bool graph_empty = thrust::all_of(thrust::device, P->deleted.begin(), P->deleted.end(), thrust::identity<bool>());
+
+    if (graph_empty) {
+      printf("degeneracy = %u\n", this->iteration);
+    }
+
+    // Fill the frontier with a sequence of vertices from 0 -> n_vertices.
+    f->sequence((vertex_t)0, n_vertices, context.get_context(0)->stream());
+
+    return graph_empty;
+  }
+};
+
+template <typename graph_t>
+float run(graph_t& G,
+          int* k_cores       // Output
+) {
+  using vertex_t = typename graph_t::vertex_type;
+  using weight_t = typename graph_t::weight_type;
+
+  // instantiate `result` template
+  using result_type = result_t<int>;
+
+  // initialize `result` w/ the appropriate parameters / data structures
+  result_type result(k_cores);
+
+  // Context for application (eg, GPU + CUDA stream it will be executed on)
+  auto multi_context =
+      std::shared_ptr<cuda::multi_context_t>(new cuda::multi_context_t(0));
+
+  // instantiate `problem` and `enactor` templates.
+  using problem_type = problem_t<graph_t, result_type>;
+  using enactor_type = enactor_t<problem_type>;
+
+  // initialize problem; call `init` and `reset` to prepare data structures
+  problem_type problem(G, result, multi_context);
+  problem.init();
+  problem.reset();
+
+  // initialize enactor; call enactor, returning GPU elapsed time
+  enactor_type enactor(&problem, multi_context);
+  return enactor.enact();
+}
+
+}  // namespace kcore
+}  // namespace gunrock