diff --git a/test/conformance/CMakeLists.txt b/test/conformance/CMakeLists.txt
index 9a122e36b4..1c717dd3cb 100644
--- a/test/conformance/CMakeLists.txt
+++ b/test/conformance/CMakeLists.txt
@@ -21,13 +21,13 @@ function(add_test_adapter name adapter backend)
     function(do_add_test tname env)
         if(${UR_CONFORMANCE_ENABLE_MATCH_FILES} AND EXISTS ${MATCH_FILE})
             add_test(NAME ${tname}
-                COMMAND ${CMAKE_COMMAND}
-                -D TEST_FILE=${Python3_EXECUTABLE}
-                -D TEST_ARGS="${UR_CONFORMANCE_TEST_DIR}/cts_exe.py --test_command ${TEST_COMMAND}"
-                -D MODE=stdout
-                -D MATCH_FILE=${MATCH_FILE}
-                -P ${PROJECT_SOURCE_DIR}/cmake/match.cmake
-                DEPENDS ${TEST_TARGET_NAME}
+                COMMAND ${Python3_EXECUTABLE} ${UR_CONFORMANCE_TEST_DIR}/cts_exe.py
+                    --failslist ${MATCH_FILE}
+                    --test_command ${PROJECT_BINARY_DIR}/bin/${TEST_TARGET_NAME}
+                    --
+                    --backend=${backend}
+                    --devices_count=${UR_TEST_DEVICES_COUNT}
+                    --platforms_count=${UR_TEST_PLATFORMS_COUNT}
                 WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
             )
         else()
@@ -40,7 +40,7 @@ function(add_test_adapter name adapter backend)
         endif()
 
         if(UR_CONFORMANCE_ENABLE_MATCH_FILES)
-            list(APPEND env GTEST_COLOR=no)
+            list(APPEND env GTEST_COLOR=yes)
         endif()
         set_tests_properties(${tname} PROPERTIES
             ENVIRONMENT "${env}"
diff --git a/test/conformance/cts_exe.py b/test/conformance/cts_exe.py
old mode 100644
new mode 100755
index 8b2e33d03b..b183b55d6e
--- a/test/conformance/cts_exe.py
+++ b/test/conformance/cts_exe.py
@@ -1,6 +1,6 @@
-#! /usr/bin/env python3
+#!/usr/bin/env python3
 """
- Copyright (C) 2023 Intel Corporation
+ Copyright (C) 2024 Intel Corporation
 
  Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
  See LICENSE.TXT
@@ -11,68 +11,171 @@
 # The match files contain tests that are expected to fail.
 
 import os
-import shlex
 import sys
-from argparse import ArgumentParser
+import argparse
 import subprocess  # nosec B404
-import signal
-import re
-from collections import OrderedDict
 
 
-def _print_cmdline(cmd_args, env, cwd, file=sys.stderr):
-    cwd = shlex.quote(cwd)
-    env_args = " ".join(
-        "%s=%s" % (shlex.quote(k), shlex.quote(v)) for k, v in env.items()
+def _ci():
+    return os.environ.get("CI") is not None
+
+
+def _color():
+    return sys.stdout.isatty() or os.environ.get("GTEST_COLOR").lower() == "yes"
+
+
+def _print_header(header, *args):
+    if _ci():
+        # GitHub CI interprets this as a "group header" and will provide buttons to fold/unfold it
+        print("##[group]{}".format(header.format(*args)))
+    elif _color():
+        # Inverse color
+        print("\033[7m{}\033[27m".format(header.format(*args)))
+    else:
+        print("### {}".format(header.format(*args)))
+
+
+def _print_end_header():
+    if _ci():
+        print("##[endgroup]")
+
+
+def _print_error(header, *args):
+    if _color():
+        # "!!!" on a red background
+        print("\033[41m!!!\033[0m {}".format(header.format(*args)))
+    else:
+        print("!!! {}".format(header.format(*args)))
+
+
+def _print_format(msg, *args):
+    print(msg.format(*args))
+
+
+def _print_environ(env):
+    _print_header("Environment")
+    for k, v in env.items():
+        _print_format("> {} = {}", k, v)
+    _print_end_header()
+
+
+def _check_filter(cmd, filter):
+    """
+    Checks that the filter matches at least one test for the given cmd
+    """
+    sys.stdout.flush()
+    check = subprocess.Popen(  # nosec B603
+        cmd + ["--gtest_list_tests"],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.DEVNULL,
+        env=(os.environ | {"GTEST_FILTER": filter}),
     )
-    cmd_str = " ".join(map(shlex.quote, cmd_args))
-    print(f"### env -C {cwd} -i {env_args} {cmd_str}", file=file)
+    if not check.stdout.read(1):
+        return False
+    return True
 
 
-if __name__ == "__main__":
+def _run_cmd(cmd, comment, filter):
+    _print_header("Running suite for: {}", comment)
+    _print_format("### {}", " ".join(cmd))
+
+    # Check tests are found
+    if not _check_filter(cmd, filter):
+        _print_end_header()
+        _print_error("Could not find any tests with this filter")
+        return 2
 
-    parser = ArgumentParser()
+    sys.stdout.flush()
+    result = subprocess.call(  # nosec B603
+        cmd,
+        stdout=sys.stdout,
+        stderr=sys.stdout,
+        env=(os.environ | {"GTEST_FILTER": filter}),
+    )
+    _print_end_header()
+    return result
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
     parser.add_argument("--test_command", help="Ctest test case")
-    parser.add_argument("--devices_count", type=str, help="Number of devices on which tests will be run")
-    parser.add_argument("--platforms_count", type=str, help="Number of platforms on which tests will be run")
-    parser.add_argument("--backend", type=str, help="Number of platforms on which tests will be run")
+    parser.add_argument("--failslist", type=str, help="Failure list")
+    parser.add_argument("--", dest="ignored", action="store_true")
+    parser.add_argument("rest", nargs=argparse.REMAINDER)
     args = parser.parse_args()
-    invocation = [
-        args.test_command,
-        "--gtest_brief=1",
-        f"--devices_count={args.devices_count}",
-        f"--platforms_count={args.platforms_count}",
-        f"--backend={args.backend}",
-    ]
-    _print_cmdline(invocation, os.environ, os.getcwd())
-
-    result = subprocess.Popen(  # nosec B603
-        invocation, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True
-    )
 
-    pat = re.compile(r'\[( )*FAILED( )*\]')
-    output_list = []
-    test_cases = []
-    for line in result.stdout:
-        output_list.append(line)
-        if pat.search(line):
-            test_case = line.split(" ")[5]
-            test_case = test_case.rstrip(',')
-            test_cases.append(test_case)
-
-    # Every fail has a single corresponding match line but if there are multiple
-    # devices being tested there will be multiple lines with the same failure
-    # message. To avoid matching mismatch, remove lines that differ only by device ID.
-    test_cases = [re.sub(r'ID[0-9]ID', 'X', tc) for tc in test_cases]
-    test_cases = list(OrderedDict.fromkeys(test_cases))
-
-    for tc in test_cases:
-        print(tc)
-
-    rc = result.wait()
-    if rc < 0:
-        print(signal.strsignal(abs(rc)))
-
-    print("#### GTEST_OUTPUT ####", file=sys.stderr)
-    print(''.join(output_list), file=sys.stderr)
-    print("#### GTEST_OUTPUT_END ####", file=sys.stderr)
+    base_invocation = [args.test_command] + args.rest
+
+    if os.environ.get("GTEST_OUTPUT") is not None:
+        # We are being ran purely to generate an output file (likely for ctest_parser.py); falling back to just using
+        # one test execution
+        sys.exit(
+            subprocess.call(  # nosec B603
+                base_invocation, stdout=sys.stdout, stderr=sys.stderr
+            )
+        )
+
+    _print_environ(os.environ)
+
+    # Parse fails list
+    _print_format("Loading fails from {}", args.failslist)
+    fail_patterns = []
+    expected_fail = False
+    with open(args.failslist) as f:
+        for l in f:
+            optional = "{{OPT}}" in l
+            l = l.replace("{{OPT}}", "")
+            l = l.replace("{{.*}}", "*")
+
+            if l.startswith("{{Segmentation fault"):
+                expected_fail = True
+                continue
+            if l.startswith("#"):
+                continue
+            if l.startswith("{{NONDETERMINISTIC}}"):
+                continue
+            if l.strip() == "":
+                continue
+
+            fail_patterns.append(
+                {
+                    "pattern": l.strip(),
+                    "optional": optional,
+                }
+            )
+
+    _print_header("Known failing tests")
+    for fail in fail_patterns:
+        _print_format("> {}", fail)
+    _print_end_header()
+    if len(fail_patterns) == 0:
+        _print_error(
+            "Fail list is empty, if there are no more failures, please remove the file"
+        )
+        sys.exit(2)
+
+    final_result = 0
+
+    # First, run all the known good tests
+    gtest_filter = "-" + (":".join(map(lambda x: x["pattern"], fail_patterns)))
+    if _check_filter(base_invocation, gtest_filter):
+        result = _run_cmd(base_invocation, "known good tests", gtest_filter)
+        if result != 0 and not expected_fail:
+            _print_error("Tests we expected to pass have failed")
+            final_result = result
+    else:
+        _print_format("Note: No tests in this suite are expected to pass")
+
+    # Then run each known failing tests
+    for fail in fail_patterns:
+        result = _run_cmd(
+            base_invocation, "failing test {}".format(fail["pattern"]), fail["pattern"]
+        )
+
+        if result == 0 and not fail["optional"]:
+            _print_error(
+                "Test {} is passing when we expect it to fail!", fail["pattern"]
+            )
+            final_result = 1
+
+    sys.exit(final_result)
diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_cuda.match
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_hip.match
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match
index e69646e18b..2ccc267535 100644
--- a/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match
+++ b/test/conformance/exp_command_buffer/exp_command_buffer_adapter_native_cpu.match
@@ -13,7 +13,7 @@
 {{OPT}}BufferFillCommandTest.OverrideArgList/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}InvalidUpdateTest.NotFinalizedCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}InvalidUpdateTest.NotUpdatableCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
-{{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__X_
+{{OPT}}InvalidUpdateTest.InvalidDimensions/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}USMFillCommandTest.UpdateParameters/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}USMFillCommandTest.UpdateBeforeEnqueue/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}USMMultipleFillCommandTest.UpdateAllKernels/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
@@ -33,6 +33,6 @@
 {{OPT}}KernelCommandEventSyncTest.InterCommandBuffer/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}KernelCommandEventSyncTest.SignalWaitBeforeEnqueue/SYCL_NATIVE_CPU__{{.*}}
 {{OPT}}KernelCommandEventSyncUpdateTest.Basic/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
-{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__X_{{.*}}
+{{OPT}}KernelCommandEventSyncUpdateTest.TwoWaitEvents/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}KernelCommandEventSyncUpdateTest.InvalidWaitUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
 {{OPT}}KernelCommandEventSyncUpdateTest.InvalidSignalUpdate/SYCL_NATIVE_CPU___SYCL_Native_CPU__{{.*}}
diff --git a/test/conformance/queue/queue_adapter_level_zero.match b/test/conformance/queue/queue_adapter_level_zero.match
deleted file mode 100644
index 8b13789179..0000000000
--- a/test/conformance/queue/queue_adapter_level_zero.match
+++ /dev/null
@@ -1 +0,0 @@
-