From f06a8fbb9f659991203cccdde9e52036f441e21e Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 17:07:04 -0800
Subject: [PATCH 01/14] Improving launch_box; removed non-static to static
 conversion, added a launch method.

---
 include/gunrock/cuda/detail/launch_box.hxx    |  6 +-
 include/gunrock/cuda/launch_box.hxx           | 73 ++++++++++++++-----
 .../gunrock/framework/frontier/frontier.hxx   |  3 +
 .../operators/advance/block_mapped.hxx        | 44 ++++++-----
 .../operators/advance/thread_mapped.hxx       | 20 +++--
 unittests/launch_box/test_launch_box.cu       |  5 +-
 6 files changed, 98 insertions(+), 53 deletions(-)
diff --git a/include/gunrock/cuda/detail/launch_box.hxx b/include/gunrock/cuda/detail/launch_box.hxx
index a364a493..1c672ce5 100644
--- a/include/gunrock/cuda/detail/launch_box.hxx
+++ b/include/gunrock/cuda/detail/launch_box.hxx
@@ -24,11 +24,11 @@ namespace detail {
  * @tparam sm_flags_ Bitwise flags indicating SM versions (`sm_flag_t` enum).
  */
 template <sm_flag_t sm_flags_>
-struct launch_params_abc_t {
+struct launch_params_base_t {
   enum : unsigned { sm_flags = sm_flags_ };
 
  protected:
-  launch_params_abc_t() {}
+  launch_params_base_t() {}
 };
 
 /**
@@ -60,7 +60,7 @@ struct raise_not_found_error_t {
 
 /**
  * @brief Subsets a pack of launch parameters (children of
- * `launch_params_abc_t`), selecting the ones that match the architecture being
+ * `launch_params_base_t`), selecting the ones that match the architecture being
  * compiled for, stored in a tuple type.
  *
  * @par Overview
diff --git a/include/gunrock/cuda/launch_box.hxx b/include/gunrock/cuda/launch_box.hxx
index 513708f9..4c5f0411 100644
--- a/include/gunrock/cuda/launch_box.hxx
+++ b/include/gunrock/cuda/launch_box.hxx
@@ -25,9 +25,22 @@
 
 namespace gunrock {
 namespace cuda {
-
 namespace launch_box {
 
+struct dimensions_t {
+  unsigned int x, y, z;
+
+  __host__ __device__ constexpr dimensions_t(const unsigned int _x = 1,
+                                             const unsigned int _y = 1,
+                                             const unsigned int _z = 1)
+      : x(_x), y(_y), z(_z) {}
+
+  __host__ __device__ constexpr unsigned int size() const { return x * y * z; }
+  __host__ __device__ constexpr operator dim3(void) const {
+    return uint3{x, y, z};
+  }
+};
+
 /**
  * @brief CUDA dim3 template representation, since dim3 cannot be used as a
  * template argument.
@@ -36,10 +49,14 @@ namespace launch_box {
  * @tparam y_ (default = `1`) Dimension in the Y direction.
  * @tparam z_ (default = `1`) Dimension in the Z direction.
  */
-template <unsigned int x_, unsigned int y_ = 1, unsigned int z_ = 1>
+template <unsigned int x_ = 1, unsigned int y_ = 1, unsigned int z_ = 1>
 struct dim3_t {
-  enum : unsigned int { x = x_, y = y_, z = z_, size = x_ * y_ * z_ };
-  static constexpr dim3 get_dim3() { return dim3(x, y, z); }
+  enum : unsigned int { x = x_, y = y_, z = z_ };
+  static constexpr unsigned int size() { return x * y * z; }
+  static constexpr dimensions_t dimensions() { return {x, y, z}; }
+
+  // Convertors must be non-static members.
+  constexpr operator dimensions_t(void) { return {x, y, z}; }
 };
 
 /**
@@ -97,13 +114,15 @@ template <sm_flag_t sm_flags_,
           typename block_dimensions_,
           typename grid_dimensions_,
           size_t shared_memory_bytes_ = 0>
-struct launch_params_t : detail::launch_params_abc_t<sm_flags_> {
+struct launch_params_t : detail::launch_params_base_t<sm_flags_> {
   typedef block_dimensions_ block_dimensions_t;
   typedef grid_dimensions_ grid_dimensions_t;
   enum : size_t { shared_memory_bytes = shared_memory_bytes_ };
 
-  static constexpr dim3 block_dimensions = block_dimensions_t::get_dim3();
-  static constexpr dim3 grid_dimensions = grid_dimensions_t::get_dim3();
+  static constexpr dimensions_t block_dimensions =
+      block_dimensions_t::dimensions();
+  static constexpr dimensions_t grid_dimensions =
+      grid_dimensions_t::dimensions();
   standard_context_t& context;
 
   launch_params_t(standard_context_t& context_) : context(context_) {}
@@ -121,15 +140,17 @@ struct launch_params_t : detail::launch_params_abc_t<sm_flags_> {
 template <sm_flag_t sm_flags_,
           typename grid_dimensions_,
           size_t shared_memory_bytes_ = 0>
-struct launch_params_dynamic_block_t : detail::launch_params_abc_t<sm_flags_> {
+struct launch_params_dynamic_block_t : detail::launch_params_base_t<sm_flags_> {
   typedef grid_dimensions_ grid_dimensions_t;
   enum : size_t { shared_memory_bytes = shared_memory_bytes_ };
 
-  dim3 block_dimensions;
-  static constexpr dim3 grid_dimensions = grid_dimensions_t::get_dim3();
+  dimensions_t block_dimensions;
+  static constexpr dimensions_t grid_dimensions =
+      grid_dimensions_t::dimensions();
+
   standard_context_t& context;
 
-  launch_params_dynamic_block_t(dim3 block_dimensions_,
+  launch_params_dynamic_block_t(dimensions_t block_dimensions_,
                                 standard_context_t& context_)
       : block_dimensions(block_dimensions_), context(context_) {}
 };
@@ -146,17 +167,33 @@ struct launch_params_dynamic_block_t : detail::launch_params_abc_t<sm_flags_> {
 template <sm_flag_t sm_flags_,
           typename block_dimensions_,
           size_t shared_memory_bytes_ = 0>
-struct launch_params_dynamic_grid_t : detail::launch_params_abc_t<sm_flags_> {
+struct launch_params_dynamic_grid_t : detail::launch_params_base_t<sm_flags_> {
   typedef block_dimensions_ block_dimensions_t;
   enum : size_t { shared_memory_bytes = shared_memory_bytes_ };
 
-  static constexpr dim3 block_dimensions = block_dimensions_t::get_dim3();
-  dim3 grid_dimensions;
+  static constexpr dimensions_t block_dimensions =
+      block_dimensions_t::dimensions();
+
+  dimensions_t grid_dimensions;
   standard_context_t& context;
 
-  launch_params_dynamic_grid_t(dim3 grid_dimensions_,
-                               standard_context_t& context_)
-      : grid_dimensions(grid_dimensions_), context(context_) {}
+  launch_params_dynamic_grid_t(standard_context_t& context_)
+      : context(context_) {}
+
+  void calculate_grid_dimensions(std::size_t num_elements) {
+    grid_dimensions = dimensions_t(
+        (num_elements + block_dimensions.x - 1) / block_dimensions.x, 1, 1);
+  }
+
+  /**
+   * @brief Launch a kernel within the given launch box.
+   */
+  template <typename func_t, typename... args_t>
+  void launch(func_t __, args_t&&... args) {
+    __<<<  // kernel function.
+        grid_dimensions, block_dimensions, shared_memory_bytes,
+        context.stream()>>>(std::forward<args_t>(args)...);
+  }
 };
 
 /**
@@ -169,7 +206,7 @@ struct launch_params_dynamic_grid_t : detail::launch_params_abc_t<sm_flags_> {
 template <typename launch_box_t, typename func_t>
 inline float occupancy(func_t kernel) {
   int max_active_blocks;
-  int block_size = launch_box_t::block_dimensions_t::size;
+  int block_size = launch_box_t::block_dimensions_t::size();
   int device;
   cudaDeviceProp props;
 
diff --git a/include/gunrock/framework/frontier/frontier.hxx b/include/gunrock/framework/frontier/frontier.hxx
index 2b8b2702..3e2083c4 100644
--- a/include/gunrock/framework/frontier/frontier.hxx
+++ b/include/gunrock/framework/frontier/frontier.hxx
@@ -38,6 +38,9 @@ class frontier_t : public frontier::vector_frontier_t<vertex_t, edge_t, _kind> {
  public:
   using vertex_type = vertex_t;
   using edge_type = edge_t;
+  using type_t = std::conditional_t<_kind == frontier_kind_t::vertex_frontier,
+                                    vertex_t,
+                                    edge_t>;
   using frontier_type = frontier_t<vertex_t, edge_t, _kind, _view>;
 
   /// TODO: This is a more permenant solution.
diff --git a/include/gunrock/framework/operators/advance/block_mapped.hxx b/include/gunrock/framework/operators/advance/block_mapped.hxx
index e5c847d5..98872bfb 100644
--- a/include/gunrock/framework/operators/advance/block_mapped.hxx
+++ b/include/gunrock/framework/operators/advance/block_mapped.hxx
@@ -27,8 +27,8 @@ namespace operators {
 namespace advance {
 namespace block_mapped {
 
-template <int THREADS_PER_BLOCK,
-          int ITEMS_PER_THREAD,
+template <unsigned int THREADS_PER_BLOCK,
+          unsigned int ITEMS_PER_THREAD,
           advance_io_type_t input_type,
           advance_io_type_t output_type,
           typename graph_t,
@@ -175,23 +175,29 @@ void execute(graph_t& G,
     output->set_number_of_elements(size_of_output);
   }
 
-  std::size_t work_size = (input_type == advance_io_type_t::graph)
-                              ? G.get_number_of_vertices()
-                              : input->get_number_of_elements();
-
-  using namespace gunrock::cuda::launch_box;
-  using launch_t = launch_box_t<launch_params_dynamic_grid_t<fallback, dim3_t<128>>>;
-
-  launch_t launch_box(dim3((work_size + launch_t::block_dimensions_t::x - 1) / launch_t::block_dimensions_t::x), context);
-
-  // Launch blocked-mapped advance kernel.
-  block_mapped_kernel<launch_t::block_dimensions_t::x, 1, input_type, output_type>
-      <<<launch_box.grid_dimensions,     // grid dimensions
-         launch_box.block_dimensions,    // block dimensions
-         launch_box.shared_memory_bytes, // shared memory
-         launch_box.context.stream()     // context
-         >>>(G, op, input->data(), output->data(), work_size,
-             segments.data().get());
+  std::size_t num_elements = (input_type == advance_io_type_t::graph)
+                                 ? G.get_number_of_vertices()
+                                 : input->get_number_of_elements();
+
+  // Set-up and launch block-mapped advance.
+  using namespace cuda::launch_box;
+  using launch_t =
+      launch_box_t<launch_params_dynamic_grid_t<fallback, dim3_t<128>>>;
+
+  launch_t launch_box(context);
+
+  launch_box.calculate_grid_dimensions(num_elements);
+  auto __bm = block_mapped_kernel<        // kernel
+      launch_box.block_dimensions.x,      // threas per block
+      1,                                  // items per thread
+      input_type, output_type,            // i/o parameters
+      graph_t,                            // graph type
+      typename frontier_t::type_t,        // frontier value type
+      typename work_tiles_t::value_type,  // segments value type
+      operator_t                          // lambda type
+      >;
+  launch_box.launch(__bm, G, op, input->data(), output->data(), num_elements,
+                    segments.data().get());
   context.synchronize();
 }
 
diff --git a/include/gunrock/framework/operators/advance/thread_mapped.hxx b/include/gunrock/framework/operators/advance/thread_mapped.hxx
index 888fcf5d..d5caefe7 100644
--- a/include/gunrock/framework/operators/advance/thread_mapped.hxx
+++ b/include/gunrock/framework/operators/advance/thread_mapped.hxx
@@ -97,17 +97,15 @@ void execute(graph_t& G,
                                  ? G.get_number_of_vertices()
                                  : input.get_number_of_elements();
 
-  using namespace gunrock::cuda::launch_box;
-  using launch_t = launch_box_t<launch_params_dynamic_grid_t<fallback, dim3_t<128>>>;
-
-  launch_t launch_box(dim3((num_elements + launch_t::block_dimensions_t::x - 1) / launch_t::block_dimensions_t::x, 1, 1), context);
-
-  // Launch blocked-mapped advance kernel.
-  thread_mapped_kernel<<<launch_box.grid_dimensions,     // grid dimensions
-                         launch_box.block_dimensions,    // block dimensions
-                         launch_box.shared_memory_bytes, // shared memory
-                         launch_box.context.stream()     // context
-                         >>>(neighbors_expand, num_elements);
+  // Set-up and launch thread-mapped advance.
+  using namespace cuda::launch_box;
+  using launch_t =
+      launch_box_t<launch_params_dynamic_grid_t<fallback, dim3_t<128>>>;
+
+  launch_t launch_box(context);
+  launch_box.calculate_grid_dimensions(num_elements);
+  auto __tm = thread_mapped_kernel<decltype(neighbors_expand)>;
+  launch_box.launch(__tm, neighbors_expand, num_elements);
   context.synchronize();
 }
 }  // namespace thread_mapped
diff --git a/unittests/launch_box/test_launch_box.cu b/unittests/launch_box/test_launch_box.cu
index 0945a9a5..0f611b6e 100644
--- a/unittests/launch_box/test_launch_box.cu
+++ b/unittests/launch_box/test_launch_box.cu
@@ -50,8 +50,9 @@ void test_fallback() {
 }
 
 void test_define() {
-  dim3 block_dimensions = launch_t::block_dimensions_t::get_dim3();
-  dim3 grid_dimensions = launch_t::grid_dimensions_t::get_dim3();
+  dimensions_t block_dimensions = launch_t::block_dimensions_t::dimensions();
+  dimensions_t grid_dimensions = launch_t::grid_dimensions_t::dimensions();
+  dim3 conversion_test = block_dimensions;
   size_t smem = launch_t::shared_memory_bytes;
 
   std::cout << "block_dimensions:    " << block_dimensions.x << ", "

From 92cc778596ecaa9fb4cfbddfb9689e74e27efc72 Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 17:34:42 -0800
Subject: [PATCH 02/14] MSVC is being stupid.

---
 include/gunrock/cuda/launch_box.hxx | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/gunrock/cuda/launch_box.hxx b/include/gunrock/cuda/launch_box.hxx
index 4c5f0411..802fe696 100644
--- a/include/gunrock/cuda/launch_box.hxx
+++ b/include/gunrock/cuda/launch_box.hxx
@@ -36,9 +36,14 @@ struct dimensions_t {
       : x(_x), y(_y), z(_z) {}
 
   __host__ __device__ constexpr unsigned int size() const { return x * y * z; }
+
+#ifdef _MSC_VER
+  __host__ __device__ operator dim3(void) const { return uint3{x, y, z}; }
+#else
   __host__ __device__ constexpr operator dim3(void) const {
     return uint3{x, y, z};
   }
+#endif
 };
 
 /**

From d4b73a4704707a9b9a55bb14e0bec6d59d668bbc Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 18:04:01 -0800
Subject: [PATCH 03/14] changing unnamed to named class.

---
 include/gunrock/cuda/device_properties.hxx | 114 ++++++++++-----------
 1 file changed, 57 insertions(+), 57 deletions(-)

diff --git a/include/gunrock/cuda/device_properties.hxx b/include/gunrock/cuda/device_properties.hxx
index c192dc43..fd62eb9d 100644
--- a/include/gunrock/cuda/device_properties.hxx
+++ b/include/gunrock/cuda/device_properties.hxx
@@ -18,7 +18,7 @@ namespace cuda {
 
 typedef cudaDeviceProp device_properties_t;
 
-typedef struct {
+struct compute_capability_t {
   unsigned major;
   unsigned minor;
   constexpr unsigned as_combined_number() const { return major * 10 + minor; }
@@ -28,7 +28,7 @@ typedef struct {
   constexpr bool operator<(int i) { return (int)as_combined_number() < i; }
   constexpr bool operator>=(int i) { return (int)as_combined_number() >= i; }
   constexpr bool operator<=(int i) { return (int)as_combined_number() <= i; }
-} compute_capability_t;
+};
 
 /**
  * @brief Make compute capability from major and minor versions.
@@ -76,18 +76,13 @@ enum : size_t { KiB = 1024, K = 1024 };
  * \return const char* architecture name or nullptr if capability is invalid
  */
 inline constexpr const char* arch_name(compute_capability_t capability) {
-  return (capability.major == 8)
-             ? "Ampere"
-             : (capability.major == 7 && capability.minor == 5)
-                   ? "Turing"
-                   : (capability.major == 7)
-                         ? "Volta"
-                         : (capability.major == 6)
-                               ? "Pascal"
-                               : (capability.major == 5)
-                                     ? "Maxwell"
-                                     : (capability.major == 3) ? "Kepler"
-                                                               : nullptr;
+  return (capability.major == 8)                            ? "Ampere"
+         : (capability.major == 7 && capability.minor == 5) ? "Turing"
+         : (capability.major == 7)                          ? "Volta"
+         : (capability.major == 6)                          ? "Pascal"
+         : (capability.major == 5)                          ? "Maxwell"
+         : (capability.major == 3)                          ? "Kepler"
+                                                            : nullptr;
 }
 
 // Device properties retrieved from:
@@ -115,11 +110,14 @@ inline constexpr unsigned warp_max_threads() {
  * \return unsigned
  */
 inline constexpr unsigned sm_max_ctas(compute_capability_t capability) {
-  return (capability >= 86) ? 16 :              // SM86+
-             (capability >= 80) ? 32 :          // SM80
-                 (capability >= 75) ? 16 :      // SM75
-                     (capability >= 50) ? 32 :  // SM50-SM72
-                         16;                    // SM30-SM37
+  return (capability >= 86) ? 16 :  // SM86+
+             (capability >= 80) ? 32
+                                :  // SM80
+             (capability >= 75) ? 16
+                                :  // SM75
+             (capability >= 50) ? 32
+                                :  // SM50-SM72
+             16;                   // SM30-SM37
 }
 
 /**
@@ -128,10 +126,12 @@ inline constexpr unsigned sm_max_ctas(compute_capability_t capability) {
  * \return unsigned
  */
 inline constexpr unsigned sm_max_threads(compute_capability_t capability) {
-  return (capability >= 86) ? 1536 :          // SM86+
-             (capability >= 80) ? 2048 :      // SM80
-                 (capability >= 75) ? 1024 :  // SM75
-                     2048;                    // SM30-SM72
+  return (capability >= 86) ? 1536 :  // SM86+
+             (capability >= 80) ? 2048
+                                :  // SM80
+             (capability >= 75) ? 1024
+                                :  // SM75
+             2048;                 // SM30-SM72
 }
 
 /**
@@ -140,9 +140,10 @@ inline constexpr unsigned sm_max_threads(compute_capability_t capability) {
  * \return unsigned
  */
 inline constexpr unsigned sm_registers(compute_capability_t capability) {
-  return (capability >= 50) ? 64 * K :       // SM50+
-             (capability >= 37) ? 128 * K :  // SM37
-                 64 * K;                     // SM30-SM35
+  return (capability >= 50) ? 64 * K :  // SM50+
+             (capability >= 37) ? 128 * K
+                                :  // SM37
+             64 * K;               // SM30-SM35
 }
 
 /**
@@ -158,30 +159,32 @@ template <enum cudaFuncCache sm3XCacheConfig = cudaFuncCachePreferNone>
 inline constexpr unsigned sm_max_shared_memory_bytes(
     compute_capability_t capability) {
   unsigned sm3XConfiguredSmem =
-      (sm3XCacheConfig == cudaFuncCachePreferNone)
-          ? 48 * KiB
-          : (sm3XCacheConfig == cudaFuncCachePreferShared)
-                ? 48 * KiB
-                : (sm3XCacheConfig == cudaFuncCachePreferL1)
-                      ? 16 * KiB
-                      : (sm3XCacheConfig == cudaFuncCachePreferEqual)
-                            ? 32 * KiB
-                            : 48 * KiB;
-
-  return (capability >= 86) ? 100 * KiB :                         // SM86+
-             (capability >= 80) ? 164 * KiB :                     // SM80
-                 (capability >= 75) ? 64 * KiB :                  // SM75
-                     (capability >= 70) ? 96 * KiB :              // SM70-SM72
-                         (capability >= 62) ? 64 * KiB :          // SM62
-                             (capability >= 61) ? 96 * KiB :      // SM61
-                                 (capability >= 53) ? 64 * KiB :  // SM53
-                                     (capability >= 52) ? 96 * KiB :  // SM52
-                                         (capability >= 50) ? 64 * KiB
-                                                            :  // SM50
-                                             (capability >= 37)
-                                                 ? 64 * KiB + sm3XConfiguredSmem
-                                                 :                    // SM37
-                                                 sm3XConfiguredSmem;  // SM30-SM35
+      (sm3XCacheConfig == cudaFuncCachePreferNone)     ? 48 * KiB
+      : (sm3XCacheConfig == cudaFuncCachePreferShared) ? 48 * KiB
+      : (sm3XCacheConfig == cudaFuncCachePreferL1)     ? 16 * KiB
+      : (sm3XCacheConfig == cudaFuncCachePreferEqual)  ? 32 * KiB
+                                                       : 48 * KiB;
+
+  return (capability >= 86) ? 100 * KiB :  // SM86+
+             (capability >= 80) ? 164 * KiB
+                                :  // SM80
+             (capability >= 75) ? 64 * KiB
+                                :  // SM75
+             (capability >= 70) ? 96 * KiB
+                                :  // SM70-SM72
+             (capability >= 62) ? 64 * KiB
+                                :  // SM62
+             (capability >= 61) ? 96 * KiB
+                                :  // SM61
+             (capability >= 53) ? 64 * KiB
+                                :  // SM53
+             (capability >= 52) ? 96 * KiB
+                                :  // SM52
+             (capability >= 50) ? 64 * KiB
+                                :  // SM50
+             (capability >= 37) ? 64 * KiB + sm3XConfiguredSmem
+                                :  // SM37
+             sm3XConfiguredSmem;   // SM30-SM35
 }
 
 /**
@@ -204,13 +207,10 @@ template <
 inline constexpr unsigned shared_memory_bank_stride() {
   // The default config on 3.x is the same constant value for later archs
   // Only let 3.x be configurable if stride later becomes dependent on arch
-  return (sm3XSmemConfig == cudaSharedMemBankSizeDefault)
-             ? 1 << 2
-             : (sm3XSmemConfig == cudaSharedMemBankSizeFourByte)
-                   ? 1 << 2
-                   : (sm3XSmemConfig == cudaSharedMemBankSizeEightByte)
-                         ? 1 << 3
-                         : 1 << 2;
+  return (sm3XSmemConfig == cudaSharedMemBankSizeDefault)     ? 1 << 2
+         : (sm3XSmemConfig == cudaSharedMemBankSizeFourByte)  ? 1 << 2
+         : (sm3XSmemConfig == cudaSharedMemBankSizeEightByte) ? 1 << 3
+                                                              : 1 << 2;
 }
 
 void print(device_properties_t& prop) {

From b381f2573fe773ff893054a43e4476ea723a669c Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 18:57:21 -0800
Subject: [PATCH 04/14] Better type limit test?

---
 include/gunrock/util/type_limits.hxx | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/include/gunrock/util/type_limits.hxx b/include/gunrock/util/type_limits.hxx
index aaad813e..f46d7d87 100644
--- a/include/gunrock/util/type_limits.hxx
+++ b/include/gunrock/util/type_limits.hxx
@@ -57,16 +57,14 @@ namespace util {
 namespace limits {
 
 template <typename type_t>
-__host__ __device__ __forceinline__ bool is_valid(type_t value) {
+constexpr __host__ __device__ __forceinline__ bool is_valid(type_t value) {
   static_assert((std::is_integral<type_t>::value ||
                  std::is_floating_point<type_t>::value),
                 "type_t must be an arithmetic type.");
-  if constexpr (std::is_integral<type_t>::value)
+  if (std::is_integral<type_t>::value)
     return (value != gunrock::numeric_limits<type_t>::invalid());
-
-  // else if constexpr (std::is_floating_point<type_t>::value)
-  // just putting else doesn't work (gives a warning), even though it should...
-  return isnan(value) ? false : true;  // XXX: test this on device
+  else
+    return isnan(value) ? false : true;
 }
 
 }  // namespace limits

From f92b0cbf48f3f778dc317ee436b7abced4c49eeb Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 18:57:36 -0800
Subject: [PATCH 05/14] CMAKE status message ++.

---
 unittests/CMakeLists.txt                   |  1 +
 unittests/copy_ctor/CMakeLists.txt         |  2 +-
 unittests/device_properties/CMakeLists.txt |  2 +-
 unittests/for/CMakeLists.txt               |  2 +-
 unittests/launch_box/CMakeLists.txt        |  2 +-
 unittests/type_limits/CMakeLists.txt       | 21 +++++++++++++++++++++
 unittests/type_limits/test_type_limits.cu  |  2 +-
 7 files changed, 27 insertions(+), 5 deletions(-)
 create mode 100644 unittests/type_limits/CMakeLists.txt

diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index 60fe72b3..4a0b3fe1 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -9,4 +9,5 @@ add_subdirectory(copy_ctor)
 add_subdirectory(launch_box)
 add_subdirectory(device_properties)
 add_subdirectory(for)
+add_subdirectory(type_limits)
 # end /* Add unit tests' subdirectories */
diff --git a/unittests/copy_ctor/CMakeLists.txt b/unittests/copy_ctor/CMakeLists.txt
index 63080bbc..b7520fce 100644
--- a/unittests/copy_ctor/CMakeLists.txt
+++ b/unittests/copy_ctor/CMakeLists.txt
@@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME}
         CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
 ) # XXX: Find a better way to inherit essentials properties.
 
-message("-- Example Added: ${APPLICATION_NAME}")
+message(STATUS "Example Added: ${APPLICATION_NAME}")
 # end /* Add CUDA executables */
\ No newline at end of file
diff --git a/unittests/device_properties/CMakeLists.txt b/unittests/device_properties/CMakeLists.txt
index 8ee08474..9b85db0e 100644
--- a/unittests/device_properties/CMakeLists.txt
+++ b/unittests/device_properties/CMakeLists.txt
@@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME}
         CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
 ) # XXX: Find a better way to inherit essentials properties.
 
-message("-- Example Added: ${APPLICATION_NAME}")
+message(STATUS "Example Added: ${APPLICATION_NAME}")
 # end /* Add CUDA executables */
diff --git a/unittests/for/CMakeLists.txt b/unittests/for/CMakeLists.txt
index 49092b06..7f3edc86 100644
--- a/unittests/for/CMakeLists.txt
+++ b/unittests/for/CMakeLists.txt
@@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME}
         CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
 ) # XXX: Find a better way to inherit essentials properties.
 
-message("-- Example Added: ${APPLICATION_NAME}")
+message(STATUS "Example Added: ${APPLICATION_NAME}")
 # end /* Add CUDA executables */
\ No newline at end of file
diff --git a/unittests/launch_box/CMakeLists.txt b/unittests/launch_box/CMakeLists.txt
index c0dbb267..02efe74b 100644
--- a/unittests/launch_box/CMakeLists.txt
+++ b/unittests/launch_box/CMakeLists.txt
@@ -17,5 +17,5 @@ set_target_properties(${APPLICATION_NAME}
         CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
 ) # XXX: Find a better way to inherit essentials properties.
 
-message("-- Example Added: ${APPLICATION_NAME}")
+message(STATUS "Example Added: ${APPLICATION_NAME}")
 # end /* Add CUDA executables */
\ No newline at end of file
diff --git a/unittests/type_limits/CMakeLists.txt b/unittests/type_limits/CMakeLists.txt
new file mode 100644
index 00000000..4fdbd7df
--- /dev/null
+++ b/unittests/type_limits/CMakeLists.txt
@@ -0,0 +1,21 @@
+# begin /* Set the application name. */
+set(APPLICATION_NAME type_limits)
+# end /* Set the application name. */
+
+# begin /* Add CUDA executables */
+add_executable(${APPLICATION_NAME})
+
+set(SOURCE_LIST 
+    test_${APPLICATION_NAME}.cu
+)
+
+target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST})
+target_link_libraries(${APPLICATION_NAME} PRIVATE essentials)
+get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES)
+set_target_properties(${APPLICATION_NAME} 
+    PROPERTIES 
+        CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
+) # XXX: Find a better way to inherit essentials properties.
+
+message(STATUS "Example Added: ${APPLICATION_NAME}")
+# end /* Add CUDA executables */
\ No newline at end of file
diff --git a/unittests/type_limits/test_type_limits.cu b/unittests/type_limits/test_type_limits.cu
index ab268e4c..91d29491 100644
--- a/unittests/type_limits/test_type_limits.cu
+++ b/unittests/type_limits/test_type_limits.cu
@@ -4,7 +4,7 @@
 #include <gunrock/util/type_limits.hxx>
 
 void test_type_limits() {
-  using type_t = short unsigned int;
+  using type_t = unsigned int;
   type_t i = gunrock::numeric_limits<type_t>::invalid();
   std::cout << "i = " << i << " (is valid? " << std::boolalpha
             << gunrock::util::limits::is_valid(i) << ")" << std::endl;

From 17e84753cf894dcf8525775ddcd76cef5c529269 Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 19:22:26 -0800
Subject: [PATCH 06/14] Trying a fix for isnan(int).

---
 include/gunrock/util/type_limits.hxx      |  2 +-
 unittests/type_limits/test_type_limits.cu | 27 +++++++++++++++++++----
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/include/gunrock/util/type_limits.hxx b/include/gunrock/util/type_limits.hxx
index f46d7d87..6d7becef 100644
--- a/include/gunrock/util/type_limits.hxx
+++ b/include/gunrock/util/type_limits.hxx
@@ -64,7 +64,7 @@ constexpr __host__ __device__ __forceinline__ bool is_valid(type_t value) {
   if (std::is_integral<type_t>::value)
     return (value != gunrock::numeric_limits<type_t>::invalid());
   else
-    return isnan(value) ? false : true;
+    return isnan(static_cast<double>(value)) ? false : true;
 }
 
 }  // namespace limits
diff --git a/unittests/type_limits/test_type_limits.cu b/unittests/type_limits/test_type_limits.cu
index 91d29491..8e228ee9 100644
--- a/unittests/type_limits/test_type_limits.cu
+++ b/unittests/type_limits/test_type_limits.cu
@@ -4,10 +4,29 @@
 #include <gunrock/util/type_limits.hxx>
 
 void test_type_limits() {
-  using type_t = unsigned int;
-  type_t i = gunrock::numeric_limits<type_t>::invalid();
-  std::cout << "i = " << i << " (is valid? " << std::boolalpha
-            << gunrock::util::limits::is_valid(i) << ")" << std::endl;
+  std::cout << "invalid = " << gunrock::numeric_limits<int>::invalid()
+            << " (is valid? " << std::boolalpha
+            << gunrock::util::limits::is_valid(
+                   gunrock::numeric_limits<int>::invalid())
+            << ")" << std::endl;
+
+  std::cout << "invalid = " << gunrock::numeric_limits<float>::invalid()
+            << " (is valid? " << std::boolalpha
+            << gunrock::util::limits::is_valid(
+                   gunrock::numeric_limits<float>::invalid())
+            << ")" << std::endl;
+
+  std::cout << "invalid = " << gunrock::numeric_limits<double>::invalid()
+            << " (is valid? " << std::boolalpha
+            << gunrock::util::limits::is_valid(
+                   gunrock::numeric_limits<double>::invalid())
+            << ")" << std::endl;
+
+  std::cout << "invalid = " << gunrock::numeric_limits<unsigned int>::invalid()
+            << " (is valid? " << std::boolalpha
+            << gunrock::util::limits::is_valid(
+                   gunrock::numeric_limits<unsigned int>::invalid())
+            << ")" << std::endl;
 }
 
 int main(int argc, char** argv) {

From 4eb6fff7afa362afd6549aa323f64a5fc829934c Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 19:23:27 -0800
Subject: [PATCH 07/14] Trying a fix in new branch

---
 .github/workflows/windows.yml        | 2 +-
 include/gunrock/util/type_limits.hxx | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 9d32c2a2..84408592 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -5,7 +5,7 @@ name: Windows
 on:
   # Triggers the workflow on push or pull request events but only for the master and dev branch
   push:
-    branches: [ master, dev ]
+    branches: [ master, dev, windows ]
   pull_request:
     branches: [ master, dev ]
 
diff --git a/include/gunrock/util/type_limits.hxx b/include/gunrock/util/type_limits.hxx
index 6d7becef..351c8ead 100644
--- a/include/gunrock/util/type_limits.hxx
+++ b/include/gunrock/util/type_limits.hxx
@@ -61,6 +61,9 @@ constexpr __host__ __device__ __forceinline__ bool is_valid(type_t value) {
   static_assert((std::is_integral<type_t>::value ||
                  std::is_floating_point<type_t>::value),
                 "type_t must be an arithmetic type.");
+
+  // Trying:
+  // https://stackoverflow.com/questions/61646166/how-to-resolve-fpclassify-ambiguous-call-to-overloaded-function
   if (std::is_integral<type_t>::value)
     return (value != gunrock::numeric_limits<type_t>::invalid());
   else

From 8f1e38d55a5446927613703f7c8ba941d1154585 Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 19:59:50 -0800
Subject: [PATCH 08/14] Need verbosity to debug geo.cu, bump CUDA version as
 well.

---
 .github/workflows/windows.yml | 2 +-
 CMakeLists.txt                | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 84408592..e53c7bc1 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -26,7 +26,7 @@ jobs:
         uses: Jimver/cuda-toolkit@v0.2.5
         id: cuda-toolkit
         with:
-          cuda: '11.4.0' # Build fails @ 11.5.1, 11.3.1
+          cuda: '11.5.1' # Build fails @ 11.5.1, 11.3.1 (11.4.0 passes except geo.cu)
           linux-local-args: '["--toolkit"]'
 
       # Runs a single command using the runners shell
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 634c9776..aac44448 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,6 +14,9 @@ project(essentials
   LANGUAGES CXX CUDA
 )
 
+# Increase verbosity.
+set(CMAKE_VERBOSE_MAKEFILE ON)
+
 # begin /* Dependencies directory */
 set(PROJECT_DEPS_DIR externals)
 # end /* Dependencies directory */
@@ -114,16 +117,16 @@ target_sources(essentials
 ####################################################
 set(CXX_FLAGS 
   $<$<CXX_COMPILER_ID:MSVC>:
-    -D_SCL_SECURE_NO_WARNINGS
+    /W4
   >
   $<$<CXX_COMPILER_ID:GNU>:
     -Wall 
+    -Wextra
     -Wno-unused-result
     -Wno-unused-local-typedefs
     -Wno-strict-aliasing
     -Wno-unused-function
     -Wno-format-security
-    # -Wextra
     # -Werror
     # -vvv
   >

From d9efb32bcda35263a5c1813e3b81d2a160a75dfd Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 20:29:39 -0800
Subject: [PATCH 09/14] For MSC host platform/dialect, an extended lambda
 cannot be defined inside the \'if\' or \'else\' block of a constexpr if
 statement.

---
 .github/workflows/windows.yml                 |  2 +-
 CMakeLists.txt                                |  4 +--
 .../gunrock/framework/operators/for/for.hxx   | 36 +++++++++++--------
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index e53c7bc1..772ff4b6 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -48,7 +48,7 @@ jobs:
         
       - name: Build all applications
         # Build your program with the given configuration
-        run: cmake --build ${{github.workspace}}/build # --target sssp
+        run: cmake --build ${{github.workspace}}/build --verbose # --target sssp
         
         # TODO: Use the following once we have BUILD_TYPE and envrionment variables set-up:
         # run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
diff --git a/CMakeLists.txt b/CMakeLists.txt
index aac44448..f27d6725 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,7 +15,7 @@ project(essentials
 )
 
 # Increase verbosity.
-set(CMAKE_VERBOSE_MAKEFILE ON)
+# set(CMAKE_VERBOSE_MAKEFILE ON)
 
 # begin /* Dependencies directory */
 set(PROJECT_DEPS_DIR externals)
@@ -121,7 +121,7 @@ set(CXX_FLAGS
   >
   $<$<CXX_COMPILER_ID:GNU>:
     -Wall 
-    -Wextra
+    # -Wextra
     -Wno-unused-result
     -Wno-unused-local-typedefs
     -Wno-strict-aliasing
diff --git a/include/gunrock/framework/operators/for/for.hxx b/include/gunrock/framework/operators/for/for.hxx
index 106e5618..ec1ba21a 100644
--- a/include/gunrock/framework/operators/for/for.hxx
+++ b/include/gunrock/framework/operators/for/for.hxx
@@ -11,6 +11,19 @@ namespace gunrock {
 namespace operators {
 namespace parallel_for {
 
+namespace detail {
+template <typename operator_t, typename index_t>
+void for_each(operator_t apply, index_t size, cuda::multi_context_t& context) {
+  auto single_context = context.get_context(0);
+  thrust::for_each(
+      thrust::cuda::par.on(single_context->stream()),
+      thrust::make_counting_iterator<index_t>(0),     // Begin: 0
+      thrust::make_counting_iterator<index_t>(size),  // End: # of V/E
+      apply                                           // Unary Operator
+  );
+}
+}  // namespace detail
+
 template <parallel_for_each_t type, typename graph_t, typename operator_t>
 void execute(graph_t& G, operator_t op, cuda::multi_context_t& context) {
   using index_t = std::conditional_t<type == parallel_for_each_t::vertex,
@@ -23,22 +36,15 @@ void execute(graph_t& G, operator_t op, cuda::multi_context_t& context) {
                          ? G.get_number_of_vertices()
                          : G.get_number_of_edges();
 
-  if constexpr (type == parallel_for_each_t::weight) {
-    auto apply = [=] __device__(index_t const& x) { op(G.get_edge_weight(x)); };
-    thrust::for_each(
-        thrust::cuda::par.on(single_context->stream()),
-        thrust::make_counting_iterator<index_t>(0),     // Begin: 0
-        thrust::make_counting_iterator<index_t>(size),  // End: # of V/E
-        apply                                           // Unary Operator
-    );
+  /// Note: For certain host platform/dialect, an extended lambda cannot be
+  /// defined inside the 'if' or 'else' block of a constexpr if statement.
+  if /* constexpr */ (type == parallel_for_each_t::weight) {
+    detail::for_each(
+        [=] __device__(index_t const& x) { op(G.get_edge_weight(x)); }, size,
+        context);
   } else {
-    auto apply = [=] __device__(index_t const& x) { op(x); };
-    thrust::for_each(
-        thrust::cuda::par.on(single_context->stream()),
-        thrust::make_counting_iterator<index_t>(0),     // Begin: 0
-        thrust::make_counting_iterator<index_t>(size),  // End: # of V/E
-        apply                                           // Unary Operator
-    );
+    detail::for_each([=] __device__(index_t const& x) { op(x); }, size,
+                     context);
   }
 }
 

From 79f0b703cfe9c96f6f34517724cd50324ffdb6ed Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 21:40:28 -0800
Subject: [PATCH 10/14] Let's debug geo.

---
 examples/geo/geo.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/geo/geo.cu b/examples/geo/geo.cu
index 38416b72..b2606a46 100644
--- a/examples/geo/geo.cu
+++ b/examples/geo/geo.cu
@@ -164,8 +164,8 @@ void test_geo(int num_arguments, char** argument_array) {
   // --
   // GPU Run
 
-  float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(),
-                                        total_iterations, spatial_iterations);
+  // float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(),
+  // total_iterations, spatial_iterations);
 
   // --
   // Log + Validate

From fe702e1c7b0c2cd3d9e0575152791dea5448bbac Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 21:41:12 -0800
Subject: [PATCH 11/14] Let's debug geo.

---
 examples/geo/geo.cu | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/geo/geo.cu b/examples/geo/geo.cu
index b2606a46..f7ca1f2d 100644
--- a/examples/geo/geo.cu
+++ b/examples/geo/geo.cu
@@ -163,8 +163,9 @@ void test_geo(int num_arguments, char** argument_array) {
 
   // --
   // GPU Run
+  float gpu_elapsed = 0;
 
-  // float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(),
+  // gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(),
   // total_iterations, spatial_iterations);
 
   // --

From ad563d0cd6e3754a399b7984199beaa24fd9c82c Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 21:57:21 -0800
Subject: [PATCH 12/14] Testing if the issue is type limits on device

---
 unittests/type_limits/test_type_limits.cu | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/unittests/type_limits/test_type_limits.cu b/unittests/type_limits/test_type_limits.cu
index 8e228ee9..afa56a05 100644
--- a/unittests/type_limits/test_type_limits.cu
+++ b/unittests/type_limits/test_type_limits.cu
@@ -3,6 +3,10 @@
 
 #include <gunrock/util/type_limits.hxx>
 
+#include <thrust/execution_policy.h>
+#include <thrust/for_each.h>
+#include <thrust/iterator/counting_iterator.h>
+
 void test_type_limits() {
   std::cout << "invalid = " << gunrock::numeric_limits<int>::invalid()
             << " (is valid? " << std::boolalpha
@@ -27,6 +31,20 @@ void test_type_limits() {
             << gunrock::util::limits::is_valid(
                    gunrock::numeric_limits<unsigned int>::invalid())
             << ")" << std::endl;
+
+  auto apply = [=] __device__(int const& x) {
+    auto y = gunrock::numeric_limits<float>::invalid();
+    bool v = gunrock::util::limits::is_valid(x);
+    bool inv = gunrock::util::limits::is_valid(y);
+
+    printf("%f\n", y);
+  };
+
+  thrust::for_each(thrust::device,
+                   thrust::make_counting_iterator<int>(0),  // Begin: 0
+                   thrust::make_counting_iterator<int>(1),  // End: 1
+                   apply                                    // Unary Operator
+  );
 }
 
 int main(int argc, char** argv) {

From 16322f7a345d6c94b33cde9fa5ed055f3e14580a Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 22:08:21 -0800
Subject: [PATCH 13/14] More fixes for geo, GCC conditionals, and a weird ref.

---
 examples/geo/geo.cu                |  6 +++---
 include/gunrock/algorithms/geo.hxx | 12 ++++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/examples/geo/geo.cu b/examples/geo/geo.cu
index f7ca1f2d..2480bc2b 100644
--- a/examples/geo/geo.cu
+++ b/examples/geo/geo.cu
@@ -163,10 +163,10 @@ void test_geo(int num_arguments, char** argument_array) {
 
   // --
   // GPU Run
-  float gpu_elapsed = 0;
+  // float gpu_elapsed = 0;
 
-  // gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(),
-  // total_iterations, spatial_iterations);
+  float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(),
+                                        total_iterations, spatial_iterations);
 
   // --
   // Log + Validate
diff --git a/include/gunrock/algorithms/geo.hxx b/include/gunrock/algorithms/geo.hxx
index 7fdc0657..4e6bda9f 100644
--- a/include/gunrock/algorithms/geo.hxx
+++ b/include/gunrock/algorithms/geo.hxx
@@ -210,7 +210,11 @@ __device__ __host__ void spatial_median(graph_t& G,
       R.latitude = (T.latitude - y.latitude) * Dinvs;
       R.longitude = (T.longitude - y.longitude) * Dinvs;
       r = sqrt(R.latitude * R.latitude + R.longitude * R.longitude);
-      rinv = r == 0 ?: num_zeros / r;
+
+      // Was rinv = (r == 0) ?: (num_zeros / r);
+      // https://gcc.gnu.org/onlinedocs/gcc/Conditionals.html
+      // ... I hate myself too.
+      rinv = (r == 0) ? 0 : (num_zeros / r);
 
       y1.latitude = max(0.0f, 1 - rinv) * T.latitude +
                     min(1.0f, rinv) * y.latitude;  // latitude
@@ -394,9 +398,9 @@ struct enactor_t : gunrock::enactor_t<problem_t> {
 
 template <typename graph_t>
 float run(graph_t& G,
-          coordinates_t* coordinates,              // Input/Output
-          unsigned int& total_iterations,          // Parameter
-          unsigned int& spatial_iterations = 1000  // Parameter
+          coordinates_t* coordinates,                   // Input/Output
+          const unsigned int total_iterations,          // Parameter
+          const unsigned int spatial_iterations = 1000  // Parameter
 ) {
   // <user-defined>
   using param_type = param_t;

From 8c6bf81e77f078106fd4f25f7461b91dd4d549ab Mon Sep 17 00:00:00 2001
From: neoblizz <osama94@gmail.com>
Date: Sun, 19 Dec 2021 22:45:54 -0800
Subject: [PATCH 14/14] final cleanup

---
 .github/workflows/windows.yml | 6 +++---
 examples/geo/geo.cu           | 1 -
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 772ff4b6..d0010770 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -5,7 +5,7 @@ name: Windows
 on:
   # Triggers the workflow on push or pull request events but only for the master and dev branch
   push:
-    branches: [ master, dev, windows ]
+    branches: [ master, dev ]
   pull_request:
     branches: [ master, dev ]
 
@@ -26,7 +26,7 @@ jobs:
         uses: Jimver/cuda-toolkit@v0.2.5
         id: cuda-toolkit
         with:
-          cuda: '11.5.1' # Build fails @ 11.5.1, 11.3.1 (11.4.0 passes except geo.cu)
+          cuda: '11.5.1'
           linux-local-args: '["--toolkit"]'
 
       # Runs a single command using the runners shell
@@ -48,7 +48,7 @@ jobs:
         
       - name: Build all applications
         # Build your program with the given configuration
-        run: cmake --build ${{github.workspace}}/build --verbose # --target sssp
+        run: cmake --build ${{github.workspace}}/build # --verbose # --target sssp
         
         # TODO: Use the following once we have BUILD_TYPE and envrionment variables set-up:
         # run: cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
diff --git a/examples/geo/geo.cu b/examples/geo/geo.cu
index 2480bc2b..38416b72 100644
--- a/examples/geo/geo.cu
+++ b/examples/geo/geo.cu
@@ -163,7 +163,6 @@ void test_geo(int num_arguments, char** argument_array) {
 
   // --
   // GPU Run
-  // float gpu_elapsed = 0;
 
   float gpu_elapsed = gunrock::geo::run(G, coordinates.data().get(),
                                         total_iterations, spatial_iterations);