diff --git a/CMakePresets.json b/CMakePresets.json
index b7d119840fe..1b7409a8278 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -89,6 +89,7 @@
         "cudax_ENABLE_EXAMPLES": true,
         "cudax_ENABLE_CUDASTF": true,
         "cudax_ENABLE_CUDASTF_BOUNDSCHECK": false,
+        "cudax_ENABLE_CUDASTF_CODE_GENERATION": true,
         "cudax_ENABLE_CUDASTF_DEBUG": false,
         "cudax_ENABLE_CUDASTF_MATHLIBS": false,
         "cudax_ENABLE_DIALECT_CPP17": true,
@@ -329,6 +330,7 @@
         "cudax_ENABLE_EXAMPLES": true,
         "cudax_ENABLE_CUDASTF": true,
         "cudax_ENABLE_CUDASTF_BOUNDSCHECK": false,
+        "cudax_ENABLE_CUDASTF_CODE_GENERATION": true,
         "cudax_ENABLE_CUDASTF_MATHLIBS": false,
         "cudax_ENABLE_DIALECT_CPP17": false,
         "cudax_ENABLE_DIALECT_CPP20": false
diff --git a/cudax/CMakeLists.txt b/cudax/CMakeLists.txt
index cb928391b8b..a9725331592 100644
--- a/cudax/CMakeLists.txt
+++ b/cudax/CMakeLists.txt
@@ -21,6 +21,7 @@ option(cudax_ENABLE_HEADER_TESTING "Test that CUDA Experimental's public headers
 option(cudax_ENABLE_TESTING "Build CUDA Experimental's tests." ON)
 option(cudax_ENABLE_EXAMPLES "Build CUDA Experimental's examples." ON)
 option(cudax_ENABLE_CUDASTF "Enable CUDASTF subproject" ON)
+option(cudax_ENABLE_CUDASTF_CODE_GENERATION "Enable code generation using STF's parallel_for or launch with CUDA compiler." ON)
 option(cudax_ENABLE_CUDASTF_BOUNDSCHECK "Enable bounds checks for STF targets. Requires debug build." OFF)
 option(cudax_ENABLE_CUDASTF_DEBUG "Enable additional debugging for STF targets. Requires debug build." OFF)
 option(cudax_ENABLE_CUDASTF_MATHLIBS "Enable STF tests/examples that use cublas/cusolver." OFF)
diff --git a/cudax/cmake/cudaxSTFConfigureTarget.cmake b/cudax/cmake/cudaxSTFConfigureTarget.cmake
index 74765af8ff3..1d504bab969 100644
--- a/cudax/cmake/cudaxSTFConfigureTarget.cmake
+++ b/cudax/cmake/cudaxSTFConfigureTarget.cmake
@@ -11,10 +11,18 @@ function(cudax_stf_configure_target target_name)
     CUDA::curand
     CUDA::cuda_driver
   )
+
+  if (cudax_ENABLE_CUDASTF_CODE_GENERATION)
+    target_compile_options(${target_name} PRIVATE
+      $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--extended-lambda>
+    )
+    target_compile_definitions(${target_name} PRIVATE "CUDASTF_CODE_GENERATION")
+  endif()
+
   target_compile_options(${target_name} PRIVATE
-    $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--extended-lambda>
     $<$<COMPILE_LANG_AND_ID:CUDA,NVIDIA>:--expt-relaxed-constexpr>
   )
+
   set_target_properties(${target_name} PROPERTIES
     CUDA_RUNTIME_LIBRARY Static
     CUDA_SEPARABLE_COMPILATION ON
diff --git a/cudax/examples/stf/CMakeLists.txt b/cudax/examples/stf/CMakeLists.txt
index 586a3fe4be4..d8953beae51 100644
--- a/cudax/examples/stf/CMakeLists.txt
+++ b/cudax/examples/stf/CMakeLists.txt
@@ -2,24 +2,30 @@ set(stf_example_sources
   01-axpy.cu
   01-axpy-cuda_kernel.cu
   01-axpy-cuda_kernel_chain.cu
-  01-axpy-parallel_for.cu
-  01-axpy-launch.cu
   02-axpy-host_launch.cu
   03-temporary-data.cu
   04-fibonacci.cu
   04-fibonacci-run_once.cu
   08-cub-reduce.cu
   axpy-annotated.cu
+  void_data_interface.cu
+  explicit_data_places.cu
+  thrust_zip_iterator.cu
+  1f1b.cu
+)
+
+# Examples which rely on code generation (parallel_for or launch)
+set(stf_example_codegen_sources
+  01-axpy-parallel_for.cu
+  01-axpy-launch.cu
   binary_fhe.cu
   cfd.cu
   custom_data_interface.cu
-  void_data_interface.cu
   frozen_data_init.cu
   graph_algorithms/degree_centrality.cu
   graph_algorithms/pagerank.cu
   graph_algorithms/tricount.cu
   graph_algorithms/jaccard.cu
-  explicit_data_places.cu
   fdtd_mgpu.cu
   heat.cu
   heat_mgpu.cu
@@ -33,9 +39,7 @@ set(stf_example_sources
   scan.cu
   mandelbrot.cu
   standalone-launches.cu
-  thrust_zip_iterator.cu
   word_count.cu
-  1f1b.cu
 )
 
 # Examples using CUBLAS, CUSOLVER...
@@ -110,6 +114,12 @@ foreach(cn_target IN LISTS cudax_TARGETS)
     cudax_add_stf_example(example_target "${source}" ${cn_target})
   endforeach()
 
+  if (cudax_ENABLE_CUDASTF_CODE_GENERATION)
+     foreach(source IN LISTS stf_example_codegen_sources)
+         cudax_add_stf_example(example_target "${source}" ${cn_target})
+     endforeach()
+  endif()
+
   if (cudax_ENABLE_CUDASTF_MATHLIBS)
     foreach(source IN LISTS stf_example_mathlib_sources)
       cudax_add_stf_example(example_target "${source}" ${cn_target} LINK_MATHLIBS)
diff --git a/cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh b/cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh
index fe6b8a59f11..5e78fa336b7 100644
--- a/cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh
+++ b/cudax/include/cuda/experimental/__stf/graph/graph_ctx.cuh
@@ -763,7 +763,7 @@ UNITTEST("set_symbol on graph_task and graph_task<>")
   ctx.finalize();
 };
 
-#  ifdef __CUDACC__
+#  if defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 namespace reserved
 {
 
@@ -1028,7 +1028,7 @@ UNITTEST("create many graph ctxs")
 
 } // end namespace reserved
 
-#  endif // __CUDACC__
+#  endif // defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 
 #endif // UNITTESTED_FILE
 } // end namespace cuda::experimental::stf
diff --git a/cudax/include/cuda/experimental/__stf/internal/launch.cuh b/cudax/include/cuda/experimental/__stf/internal/launch.cuh
index 9d9abad3bf5..7d96933f39a 100644
--- a/cudax/include/cuda/experimental/__stf/internal/launch.cuh
+++ b/cudax/include/cuda/experimental/__stf/internal/launch.cuh
@@ -31,7 +31,7 @@ namespace cuda::experimental::stf
 {
 
 // This feature requires a CUDA compiler
-#ifdef __CUDACC__
+#if defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 
 class stream_ctx;
 template <typename...>
@@ -505,5 +505,5 @@ private:
 
 } // namespace reserved
 
-#endif // __CUDACC__
+#endif // defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 } // end namespace cuda::experimental::stf
diff --git a/cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh b/cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh
index 497dac051eb..0bf646c4b90 100644
--- a/cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh
+++ b/cudax/include/cuda/experimental/__stf/internal/parallel_for_scope.cuh
@@ -29,7 +29,7 @@
 namespace cuda::experimental::stf
 {
 
-#ifdef __CUDACC__
+#if defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 
 class stream_ctx;
 class graph_ctx;
@@ -468,6 +468,6 @@ private:
 };
 } // end namespace reserved
 
-#endif // __CUDACC__
+#endif // defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 
 } // end namespace cuda::experimental::stf
diff --git a/cudax/include/cuda/experimental/__stf/internal/thread_hierarchy.cuh b/cudax/include/cuda/experimental/__stf/internal/thread_hierarchy.cuh
index cbe2e779791..99dbf07081a 100644
--- a/cudax/include/cuda/experimental/__stf/internal/thread_hierarchy.cuh
+++ b/cudax/include/cuda/experimental/__stf/internal/thread_hierarchy.cuh
@@ -419,7 +419,7 @@ private:
 };
 
 #ifdef UNITTESTED_FILE
-#  ifdef __CUDACC__
+#  if defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 namespace reserved
 {
 template <auto... spec>
@@ -525,7 +525,7 @@ UNITTEST("thread hierarchy inner sync")
   cuda_safe_call(cudaDeviceSynchronize());
 };
 
-#  endif // __CUDACC__
+#  endif // defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 #endif // UNITTESTED_FILE
 
 } // end namespace cuda::experimental::stf
diff --git a/cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh b/cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh
index c3c8ac2a82b..029fd45b824 100644
--- a/cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh
+++ b/cudax/include/cuda/experimental/__stf/stream/stream_ctx.cuh
@@ -1125,7 +1125,7 @@ UNITTEST("get logical_data from a task_dep")
   ctx.finalize();
 };
 
-#  ifdef __CUDACC__
+#  if defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 namespace reserved
 {
 inline void unit_test_pfor()
@@ -1313,8 +1313,8 @@ UNITTEST("basic launch test")
 };
 
 } // end namespace reserved
-#  endif // UNITTESTED_FILE
+#  endif // defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 
-#endif // __CUDACC__
+#endif // UNITTESTED_FILE
 
 } // namespace cuda::experimental::stf
diff --git a/cudax/include/cuda/experimental/stf.cuh b/cudax/include/cuda/experimental/stf.cuh
index f5ce8f2e4dc..3bfdb5a743a 100644
--- a/cudax/include/cuda/experimental/stf.cuh
+++ b/cudax/include/cuda/experimental/stf.cuh
@@ -357,18 +357,13 @@ public:
     return task(default_exec_place(), mv(deps)...);
   }
 
+#if defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
   /*
    * parallel_for : apply an operation over a shaped index space
    */
   template <typename S, typename... Deps>
   auto parallel_for(exec_place e_place, S shape, task_dep<Deps>... deps)
   {
-#ifndef __CUDACC__
-    // We want a test that always fail, but is only triggered when the
-    // function is instantiated so the test relies on actual templated
-    // types.
-    static_assert(::std::is_same_v<S, ::std::false_type>, "parallel_for is only supported with CUDA compilers.");
-#endif
     EXPECT(payload.index() != ::std::variant_npos, "Context is not initialized.");
     using result_t = unified_scope<reserved::parallel_for_scope<stream_ctx, S, null_partition, Deps...>,
                                    reserved::parallel_for_scope<graph_ctx, S, null_partition, Deps...>>;
@@ -382,12 +377,6 @@ public:
   template <typename partitioner_t, typename S, typename... Deps>
   auto parallel_for(partitioner_t p, exec_place e_place, S shape, task_dep<Deps>... deps)
   {
-#ifndef __CUDACC__
-    // We want a test that always fail, but is only triggered when the
-    // function is instantiated so the test relies on actual templated
-    // types.
-    static_assert(::std::is_same_v<S, ::std::false_type>, "parallel_for is only supported with CUDA compilers.");
-#endif
     EXPECT(payload.index() != ::std::variant_npos, "Context is not initialized.");
     using result_t = unified_scope<reserved::parallel_for_scope<stream_ctx, S, partitioner_t, Deps...>,
                                    reserved::parallel_for_scope<graph_ctx, S, partitioner_t, Deps...>>;
@@ -403,6 +392,7 @@ public:
   {
     return parallel_for(default_exec_place(), mv(shape), mv(deps)...);
   }
+#endif // defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 
   template <typename... Deps>
   auto host_launch(task_dep<Deps>... deps)
@@ -473,17 +463,10 @@ public:
       payload);
   }
 
+#if defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
   template <typename thread_hierarchy_spec_t, typename... Deps>
   auto launch(thread_hierarchy_spec_t spec, exec_place e_place, task_dep<Deps>... deps)
   {
-#ifndef __CUDACC__
-    // We want a test that always fail, but is only triggered when the
-    // function is instantiated so the test relies on actual templated
-    // types.
-    static_assert(::std::is_same_v<thread_hierarchy_spec_t, ::std::false_type>,
-                  "launch is only supported with CUDA compilers.");
-#endif
-
     using result_t = unified_scope<reserved::launch_scope<stream_ctx, thread_hierarchy_spec_t, Deps...>,
                                    reserved::launch_scope<graph_ctx, thread_hierarchy_spec_t, Deps...>>;
     return ::std::visit(
@@ -514,6 +497,7 @@ public:
   {
     return launch(mv(ths), default_exec_place(), mv(deps)...);
   }
+#endif // defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 
   auto repeat(size_t count)
   {
@@ -802,7 +786,7 @@ UNITTEST("context with arguments")
   cuda_safe_call(cudaStreamDestroy(stream));
 };
 
-#  ifdef __CUDACC__
+#  if defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 namespace reserved
 {
 inline void unit_test_context_pfor()
@@ -1251,7 +1235,7 @@ UNITTEST("unit_test_partitioner_product")
 };
 
 } // namespace reserved
-#  endif // __CUDACC__
+#  endif // defined(CUDASTF_CODE_GENERATION) && defined(__CUDACC__)
 
 UNITTEST("make_tuple_indexwise")
 {
diff --git a/cudax/test/stf/CMakeLists.txt b/cudax/test/stf/CMakeLists.txt
index 0b238da03d3..65082348ae8 100644
--- a/cudax/test/stf/CMakeLists.txt
+++ b/cudax/test/stf/CMakeLists.txt
@@ -1,23 +1,13 @@
 set(stf_test_sources
-  algorithm/graph_algorithms.cu
-  algorithm/in_graph_ctx.cu
-  algorithm/nested.cu
-  algorithm/algorithm_with_read.cu
-  allocators/adapter.cu
   allocators/buddy_allocator.cu
-  allocators/cap_tmp_buffers.cu
-  cpp/read_const.cu
+  cpp/concurrency_test.cu
   cpp/redundant_data.cu
   cpp/redundant_data_different_modes.cu
-  cpp/reuse_computation.cu
-  cpp/reuse_computation_2.cu
   cpp/scoped_graph_task.cu
   cpp/user_streams.cu
-  cpp/concurrency_test.cu
   dot/basic.cu
-  dot/with_events.cu
   dot/graph_print_to_dot.cu
-  tools/auto_dump/auto_dump.cu
+  dot/with_events.cu
   error_checks/ctx_mismatch.cu
   error_checks/data_interface_mismatch.cu
   error_checks/double_finalize.cu
@@ -25,107 +15,120 @@ set(stf_test_sources
   error_checks/misformed_tasks_dbl_end.cu
   error_checks/misformed_tasks_dbl_start.cu
   error_checks/non_managed_data.cu
-  error_checks/slice_check_bounds.cu
   error_checks/uninitialized_data.cu
-  error_checks/unsatisfiable_spec.cu
   error_checks/write_frozen.cu
-  examples/01-axpy-launch-ranges-cg.cu
-  examples/01-axpy-places.cu
-  examples/05-stencil.cu
   examples/05-stencil-no-copy.cu
   examples/05-stencil-places.cu
+  examples/05-stencil.cu
   examples/05-stencil2d-places.cu
-  examples/09-nbody.cu
+  fhe/parse_arctyrex.cu
+  gnu/include_only.cpp
+  graph/concurrency_test.cu
+  graph/graph_ctx_low_level.cu
+  graph/static_graph_ctx.cu
+  hashtable/test.cu
+  interface/data_from_device_async.cu
+  interface/move_operator.cu
+  local_stf/legacy_to_stf.cu
+  places/managed.cu
+  places/managed_from_user.cu
+  places/non_current_device.cu
+  places/place_partition.cu
+  places/recursion.cu
+  reclaiming/graph.cu
+  reclaiming/graph_2.cu
+  reclaiming/graph_real_oom.cu
+  reclaiming/stream.cu
+  reductions/many_inc.cu
+  reductions/redux_test.cu
+  reductions/redux_test2.cu
+  reductions/slice2d_reduction.cu
+  reductions/slice_custom_op.cu
+  reductions/successive_reductions.cu
+  reductions/sum.cu
+  reductions/sum_array.cu
+  reductions/sum_multiple_places_no_refvalue.cu
+  slice/pinning.cu
+  stencil/stencil-1D.cu
+  stress/empty_tasks.cu
+  stress/empty_tasks_alloc.cu
+  stress/kernel_chain.cu
+  stress/kernel_chain_fused.cu
+  stress/many_read.cu
+  stress/task_bench.cu
+  threads/axpy-threads-2.cu
+  # threads/axpy-threads.cu
+  utility/timing_with_fences.cu
+)
+
+set(stf_test_codegen_sources
+  algorithm/algorithm_with_read.cu
+  algorithm/graph_algorithms.cu
+  algorithm/in_graph_ctx.cu
+  algorithm/nested.cu
+  allocators/adapter.cu
+  allocators/cap_tmp_buffers.cu
+  cpp/read_const.cu
+  cpp/reuse_computation.cu
+  cpp/reuse_computation_2.cu
+  error_checks/slice_check_bounds.cu
+  error_checks/unsatisfiable_spec.cu
+  examples/01-axpy-launch-ranges-cg.cu
+  examples/01-axpy-places.cu
   examples/09-nbody-algorithm.cu
   examples/09-nbody-blocked.cu
-  fhe/parse_arctyrex.cu
+  examples/09-nbody.cu
   freeze/constant_logical_data.cu
   freeze/freeze.cu
   freeze/freeze_rw.cu
   freeze/task_fence.cu
-  graph/concurrency_test.cu
   graph/epoch.cu
   graph/for_each_batched.cu
   graph/for_each_batched_write.cu
   graph/freeze_for_graph.cu
-  graph/graph_ctx_low_level.cu
   graph/graph_composition.cu
+  graph/graph_tmp_data.cu
   graph/many.cu
   graph/multiple_graph_ctx.cu
-  graph/static_graph_ctx.cu
-  graph/graph_tmp_data.cu
   green_context/axpy_gc.cu
   green_context/cuda_graph.cu
   green_context/gc_grid.cu
-  gnu/include_only.cpp
-  hash/logical_data.cu
   hash/ctx_hash.cu
+  hash/logical_data.cu
   hashtable/fusion.cu
   hashtable/fusion_reduction.cu
-  hashtable/test.cu
   hashtable/parallel_for.cu
   hashtable/parallel_for_shape.cu
-  interface/mix_stream_and_graph.cu
-  interface/mix_stream_and_graph_2.cu
   interface/data_from_device.cu
   interface/data_from_device_2.cu
   interface/data_from_device_wb.cu
-  interface/data_from_device_async.cu
   interface/graph_use_device_data.cu
-  interface/stream_add_callback.cu
-  interface/move_operator.cu
+  interface/mix_stream_and_graph.cu
+  interface/mix_stream_and_graph_2.cu
   interface/scal.cu
   interface/scalar_div.cu
+  interface/stream_add_callback.cu
   local_stf/interop_cuda.cu
-  local_stf/legacy_to_stf.cu
   loop_dispatch/dispatch_on_streams.cu
-  loop_dispatch/nested_loop_dispatch.cu
   loop_dispatch/loop_dispatch.cu
+  loop_dispatch/nested_loop_dispatch.cu
+  parallel_for/fdtd.cu
+  parallel_for/parallel_for_all_devs.cu
+  parallel_for/parallel_for_box.cu
   parallel_for/parallel_for_repeat.cu
-  parallel_for/test_parallel_for.cu
   parallel_for/test2_parallel_for_context.cu
-  parallel_for/parallel_for_box.cu
-  parallel_for/parallel_for_all_devs.cu
-  parallel_for/fdtd.cu
+  parallel_for/test_parallel_for.cu
   parallel_for/tiled_loops.cu
   places/cuda_stream_place.cu
-  places/managed.cu
   places/managed_from_shape.cu
-  places/managed_from_user.cu
-  places/non_current_device.cu
-  places/place_partition.cu
-  places/recursion.cu
-  reclaiming/graph.cu
-  reclaiming/graph_2.cu
-  reclaiming/graph_real_oom.cu
-  reclaiming/stream.cu
-  reductions/many_inc.cu
-  reductions/redux_test.cu
-  reductions/redux_test2.cu
-  reductions/slice2d_reduction.cu
-  reductions/slice_custom_op.cu
-  reductions/successive_reductions.cu
   reductions/successive_reductions_pfor.cu
-  reductions/sum.cu
-  reductions/sum_array.cu
   reductions/sum_multiple_places.cu
-  reductions/sum_multiple_places_no_refvalue.cu
   reductions/write_back_after_redux.cu
-  slice/pinning.cu
-  stencil/stencil-1D.cu
-  stress/empty_tasks.cu
-  stress/empty_tasks_alloc.cu
-  stress/kernel_chain.cu
-  stress/kernel_chain_fused.cu
   stress/launch_overhead.cu
   stress/launch_vs_parallelfor.cu
-  stress/many_read.cu
   stress/parallel_for_overhead.cu
-  stress/task_bench.cu
-  # threads/axpy-threads.cu
-  threads/axpy-threads-2.cu
   # threads/axpy-threads-pfor.cu   # Currently has a difficult-to-reproduce concurrency problem
-  utility/timing_with_fences.cu
+  tools/auto_dump/auto_dump.cu
 )
 
 # Examples using CUBLAS, CUSOLVER...
@@ -266,6 +269,12 @@ foreach(cn_target IN LISTS cudax_TARGETS)
     cudax_add_stf_test(test_target "${source}" ${cn_target})
   endforeach()
 
+  if (cudax_ENABLE_CUDASTF_CODE_GENERATION)
+    foreach(source IN LISTS stf_test_codegen_sources)
+      cudax_add_stf_test(test_target "${source}" ${cn_target})
+    endforeach()
+  endif()
+
   # Tests with mathlib deps:
   if (cudax_ENABLE_CUDASTF_MATHLIBS)
     foreach(source IN LISTS stf_test_mathlib_sources)
diff --git a/cudax/test/stf/static_error_checks/CMakeLists.txt b/cudax/test/stf/static_error_checks/CMakeLists.txt
index 576cb992b68..8e055cb0430 100644
--- a/cudax/test/stf/static_error_checks/CMakeLists.txt
+++ b/cudax/test/stf/static_error_checks/CMakeLists.txt
@@ -1,6 +1,4 @@
 set(static_assert_tests
-  gnu_launch.cpp
-  gnu_parallel_for.cpp
   static.cu
   task_prototype.cu
   stream_task_prototype.cu
diff --git a/cudax/test/stf/static_error_checks/gnu_launch.cpp b/cudax/test/stf/static_error_checks/gnu_launch.cpp
deleted file mode 100644
index 8c39ad6d4d9..00000000000
--- a/cudax/test/stf/static_error_checks/gnu_launch.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of CUDASTF in CUDA C++ Core Libraries,
-// under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
-//
-//===----------------------------------------------------------------------===//
-
-#include <cuda/experimental/stf.cuh>
-
-using namespace cuda::experimental::stf;
-
-int main()
-{
-  context ctx;
-
-  auto A = ctx.logical_data<int>(size_t(128));
-
-  // We cannot do a launch on a device lambda without a CUDA compiler
-  // as we normally expect an extended lambda (which is also missing here)
-  ctx.launch(par(128), A.write())->*[](auto th, auto A) { A(th.rank(i) = 0;
-  };
-
-  ctx.finalize();
-}
diff --git a/cudax/test/stf/static_error_checks/gnu_parallel_for.cpp b/cudax/test/stf/static_error_checks/gnu_parallel_for.cpp
deleted file mode 100644
index fcc485d01de..00000000000
--- a/cudax/test/stf/static_error_checks/gnu_parallel_for.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of CUDASTF in CUDA C++ Core Libraries,
-// under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-// SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES.
-//
-//===----------------------------------------------------------------------===//
-
-#include <cuda/experimental/stf.cuh>
-
-using namespace cuda::experimental::stf;
-
-int main()
-{
-  context ctx;
-
-  auto A = ctx.logical_data<int>(size_t(1));
-
-  // We cannot do a parallel for on a device lambda without a CUDA compiler
-  // as we normally expect an extended lambda (which is also missing here)
-  ctx.parallel_for(A.shape(), A.write())->*[](size_t i, auto A) {
-    A(i) = 0;
-  };
-
-  ctx.finalize();
-}