diff --git a/src/v/kafka/server/server.cc b/src/v/kafka/server/server.cc
index b81a85382f0e2..951b0ea012576 100644
--- a/src/v/kafka/server/server.cc
+++ b/src/v/kafka/server/server.cc
@@ -101,7 +101,7 @@ server::server(
   ss::sharded<cluster::tx_gateway_frontend>& tx_gateway_frontend,
   ss::sharded<cluster::tx_registry_frontend>& tx_registry_frontend,
   std::optional<qdc_monitor::config> qdc_config,
-  ssx::thread_worker& tw,
+  ssx::singleton_thread_worker& tw,
   const std::unique_ptr<pandaproxy::schema_registry::api>& sr) noexcept
   : net::server(cfg, klog)
   , _smp_group(smp)
diff --git a/src/v/kafka/server/server.h b/src/v/kafka/server/server.h
index afa0d3fbfe5e4..c2fcddd495c86 100644
--- a/src/v/kafka/server/server.h
+++ b/src/v/kafka/server/server.h
@@ -65,7 +65,7 @@ class server final
       ss::sharded<cluster::tx_gateway_frontend>&,
       ss::sharded<cluster::tx_registry_frontend>&,
       std::optional<qdc_monitor::config>,
-      ssx::thread_worker&,
+      ssx::singleton_thread_worker&,
       const std::unique_ptr<pandaproxy::schema_registry::api>&) noexcept;
 
     ~server() noexcept override = default;
@@ -161,7 +161,7 @@ class server final
 
     latency_probe& latency_probe() { return *_probe; }
 
-    ssx::thread_worker& thread_worker() { return _thread_worker; }
+    ssx::singleton_thread_worker& thread_worker() { return _thread_worker; }
 
     const std::unique_ptr<pandaproxy::schema_registry::api>& schema_registry() {
         return _schema_registry;
@@ -222,7 +222,7 @@ class server final
     ssx::metrics::metric_groups _metrics
       = ssx::metrics::metric_groups::make_internal();
     std::unique_ptr<class latency_probe> _probe;
-    ssx::thread_worker& _thread_worker;
+    ssx::singleton_thread_worker& _thread_worker;
     std::unique_ptr<replica_selector> _replica_selector;
     const std::unique_ptr<pandaproxy::schema_registry::api>& _schema_registry;
 };
diff --git a/src/v/redpanda/application.cc b/src/v/redpanda/application.cc
index 05192fe3fae7d..c1d61f9ef9122 100644
--- a/src/v/redpanda/application.cc
+++ b/src/v/redpanda/application.cc
@@ -2132,7 +2132,7 @@ void application::start_runtime_services(
             });
       });
 
-    thread_worker->start().get();
+    thread_worker->start({.name = "worker"}).get();
 
     // single instance
     node_status_backend.invoke_on_all(&cluster::node_status_backend::start)
diff --git a/src/v/redpanda/application.h b/src/v/redpanda/application.h
index 3aa0ae8ac49a4..0a54199c4d814 100644
--- a/src/v/redpanda/application.h
+++ b/src/v/redpanda/application.h
@@ -143,7 +143,7 @@ class application {
 
     std::unique_ptr<cluster::controller> controller;
 
-    std::unique_ptr<ssx::thread_worker> thread_worker;
+    std::unique_ptr<ssx::singleton_thread_worker> thread_worker;
 
     ss::sharded<kafka::server> _kafka_server;
 
diff --git a/src/v/security/gssapi_authenticator.cc b/src/v/security/gssapi_authenticator.cc
index 873ce420efbe9..09d62e69f9156 100644
--- a/src/v/security/gssapi_authenticator.cc
+++ b/src/v/security/gssapi_authenticator.cc
@@ -169,7 +169,7 @@ class gssapi_authenticator::impl {
 };
 
 gssapi_authenticator::gssapi_authenticator(
-  ssx::thread_worker& thread_worker,
+  ssx::singleton_thread_worker& thread_worker,
   std::vector<gssapi_rule> rules,
   ss::sstring principal,
   ss::sstring keytab)
diff --git a/src/v/security/gssapi_authenticator.h b/src/v/security/gssapi_authenticator.h
index 36a62a6943c15..3908009dedbb0 100644
--- a/src/v/security/gssapi_authenticator.h
+++ b/src/v/security/gssapi_authenticator.h
@@ -21,7 +21,7 @@ class gssapi_authenticator final : public sasl_mechanism {
     static constexpr const char* name = "GSSAPI";
 
     gssapi_authenticator(
-      ssx::thread_worker& thread_worker,
+      ssx::singleton_thread_worker& thread_worker,
       std::vector<gssapi_rule> rules,
       ss::sstring principal,
       ss::sstring keytab);
@@ -40,7 +40,7 @@ class gssapi_authenticator final : public sasl_mechanism {
     friend std::ostream&
     operator<<(std::ostream& os, gssapi_authenticator::state const s);
 
-    ssx::thread_worker& _worker;
+    ssx::singleton_thread_worker& _worker;
     security::acl_principal _principal;
     state _state{state::init};
     class impl;
diff --git a/src/v/ssx/fwd.h b/src/v/ssx/fwd.h
index 04717c769f53d..2decf6c83de28 100644
--- a/src/v/ssx/fwd.h
+++ b/src/v/ssx/fwd.h
@@ -13,6 +13,6 @@
 
 namespace ssx {
 
-class thread_worker;
+class singleton_thread_worker;
 
 } // namespace ssx
diff --git a/src/v/ssx/tests/thread_worker.cc b/src/v/ssx/tests/thread_worker.cc
index 95d1d7736c3ca..cb33c31e107d7 100644
--- a/src/v/ssx/tests/thread_worker.cc
+++ b/src/v/ssx/tests/thread_worker.cc
@@ -34,8 +34,8 @@ template<size_t tries, size_t stop_at>
 auto thread_worker_test() {
     BOOST_REQUIRE_GT(ss::smp::count, 1);
 
-    auto w = ssx::thread_worker{};
-    w.start().get();
+    auto w = ssx::singleton_thread_worker{};
+    w.start({}).get();
 
     std::vector<std::vector<ss::future<move_only>>> all_results(ss::smp::count);
 
@@ -73,6 +73,13 @@ auto thread_worker_test() {
     }
 }
 
+SEASTAR_THREAD_TEST_CASE(thread_worker_can_be_stopped_before_its_started) {
+    // During application startup, we register a defer for stopping before the
+    // worker is started.
+    auto w = ssx::singleton_thread_worker{};
+    w.stop().get();
+}
+
 SEASTAR_THREAD_TEST_CASE(thread_worker_single_cancel_after) {
     // cancel thread worker at the after all of the gets
     thread_worker_test<1, 1>();
diff --git a/src/v/ssx/tests/thread_worker_bench.cc b/src/v/ssx/tests/thread_worker_bench.cc
index e3167eda103a3..ba9850fe5f1ee 100644
--- a/src/v/ssx/tests/thread_worker_bench.cc
+++ b/src/v/ssx/tests/thread_worker_bench.cc
@@ -13,8 +13,8 @@
 #include <seastar/testing/perf_tests.hh>
 
 ss::future<> run_test(size_t data_size) {
-    auto w = ssx::thread_worker{};
-    co_await w.start();
+    auto w = ssx::singleton_thread_worker{};
+    co_await w.start({});
 
     std::vector<ss::future<size_t>> vec;
     vec.reserve(data_size);
diff --git a/src/v/ssx/thread_worker.h b/src/v/ssx/thread_worker.h
index 357cb10dfaf5a..5f79fa6bfbcc7 100644
--- a/src/v/ssx/thread_worker.h
+++ b/src/v/ssx/thread_worker.h
@@ -21,14 +21,20 @@
 #include <seastar/core/future.hh>
 #include <seastar/core/gate.hh>
 #include <seastar/core/internal/pollable_fd.hh>
+#include <seastar/core/loop.hh>
+#include <seastar/core/posix.hh>
 #include <seastar/core/reactor.hh>
+#include <seastar/core/semaphore.hh>
 #include <seastar/core/sharded.hh>
 #include <seastar/core/smp.hh>
 
+#include <boost/lockfree/spsc_queue.hpp>
 #include <sys/eventfd.h>
 
 #include <exception>
 #include <memory>
+#include <sched.h>
+#include <span>
 #include <thread>
 #include <type_traits>
 
@@ -44,13 +50,36 @@ class task_base {
     task_base& operator=(task_base&&) = delete;
     task_base& operator=(task_base const&) = delete;
 
-    virtual void process(ss::alien::instance&, ss::shard_id) = 0;
+    virtual ss::stop_iteration process(ss::alien::instance&, ss::shard_id) = 0;
     virtual void
     set_exception(ss::alien::instance&, ss::shard_id, std::exception_ptr)
       = 0;
     virtual ~task_base() = default;
 };
 
+class stop_task final : public task_base {
+public:
+    ss::future<> get_future() noexcept { return _promise.get_future(); }
+
+    ss::stop_iteration
+    process(ss::alien::instance& alien, ss::shard_id shard) final {
+        ss::alien::run_on(
+          alien, shard, [this]() noexcept { _promise.set_value(); });
+        return ss::stop_iteration::yes;
+    }
+
+    void set_exception(
+      ss::alien::instance& alien,
+      ss::shard_id shard,
+      std::exception_ptr p) final {
+        ss::alien::run_on(
+          alien, shard, [this, p]() noexcept { _promise.set_exception(p); });
+    }
+
+private:
+    ss::promise<> _promise;
+};
+
 template<typename Func>
 class worker_task final : public task_base {
     using value_type = std::invoke_result_t<Func>;
@@ -63,7 +92,8 @@ class worker_task final : public task_base {
         return _promise.get_future();
     }
 
-    void process(ss::alien::instance& alien, ss::shard_id shard) final {
+    ss::stop_iteration
+    process(ss::alien::instance& alien, ss::shard_id shard) final {
         try {
             if constexpr (std::is_void_v<value_type>) {
                 _func();
@@ -79,6 +109,7 @@ class worker_task final : public task_base {
         } catch (...) {
             set_exception(alien, shard, std::current_exception());
         };
+        return ss::stop_iteration::no;
     }
 
     void set_exception(
@@ -95,88 +126,136 @@ class worker_task final : public task_base {
 };
 
 class thread_worker {
-    static constexpr eventfd_t RUNNABLE_INIT = 0;
-    static constexpr eventfd_t RUNNABLE_READY = 1;
-    static constexpr eventfd_t RUNNABLE_STOP = 2;
-
 public:
-    thread_worker()
-      : _alien{ss::engine().alien()}
+    struct config {
+        // If the thread should be pinned to a the same core as the shard it was
+        // created on.
+        bool pin_to_shard_core = false;
+        // NOTE: pthread names must be less than 12 bytes
+        //
+        // Why 12 bytes? pthread only support names of length 16 and we want to
+        // suffix threads with the core it's associated with via `-xxx`.
+        ss::sstring name = "thread";
+    };
+
+    explicit thread_worker()
+      : _alien(&ss::engine().alien())
+      , _cpu_id(::sched_getcpu())
       , _shard_id(ss::this_shard_id()) {}
 
-    void start() {
-        _worker = std::thread([this]() { return run(); });
+    void start(config c) {
+        _worker = std::thread([this, c = std::move(c)]() mutable {
+            configure_thread(std::move(c));
+            run();
+        });
     }
 
     ss::future<> stop() {
         co_await _gate.close();
-        _ready.signal(RUNNABLE_STOP);
         if (_worker.joinable()) {
+            auto task = impl::stop_task();
+            vassert(
+              _queue.push(&task),
+              "expected to be able to push a task onto the queue");
+            _ready.signal(1);
+            co_await task.get_future();
             _worker.join();
         }
     }
 
     template<typename Func>
-    auto submit(Func func) {
+    auto submit(Func func) ->
+      typename ss::futurize<std::invoke_result_t<Func>>::type {
         auto gh = _gate.hold();
-        return _mutex.get_units().then(
-          [this, func{std::move(func)}, gh{std::move(gh)}](auto units) mutable {
-              vassert(_task == nullptr, "Only one task supported at a time");
-              auto task = std::make_unique<impl::worker_task<Func>>(
-                std::move(func));
-              auto f = task->get_future().finally(
-                [this, gh{std::move(gh)}, units{std::move(units)}] {
-                    _task.reset();
-                });
-              _task = std::move(task);
-              _ready.signal(RUNNABLE_READY);
-              return f;
-          });
+        auto units = co_await ss::get_units(_semaphore, 1);
+        auto task = impl::worker_task<Func>(std::move(func));
+        vassert(
+          _queue.push(&task),
+          "expected to be able to push a task onto the queue");
+        _ready.signal(1);
+        co_return co_await task.get_future();
     }
 
 private:
-    void configure_thread() {
-        ::pthread_setname_np(::pthread_self(), "ssx::thread_worker");
+    void configure_thread(config c) {
+        auto name = ss::format("{}-{}", c.name, _shard_id);
+        ss::throw_pthread_error(
+          ::pthread_setname_np(::pthread_self(), name.c_str()));
+        if (c.pin_to_shard_core) {
+            cpu_set_t cs;
+            CPU_ZERO(&cs);
+            CPU_SET(_cpu_id, &cs);
+            ss::throw_pthread_error(
+              pthread_setaffinity_np(pthread_self(), sizeof(cs), &cs));
+        }
         // Ignore all signals - let seastar handle them
         sigset_t mask;
         ::sigfillset(&mask);
         ss::throw_pthread_error(::pthread_sigmask(SIG_BLOCK, &mask, nullptr));
     }
-    size_t run() {
-        configure_thread();
+
+    void run() {
+        std::array<impl::task_base*, max_length> items{};
         while (true) {
             eventfd_t result = 0;
             auto r = ::eventfd_read(_ready.get_read_fd(), &result);
-            if (r < 0 || result != RUNNABLE_READY) {
-                if (_task) {
-                    _task->set_exception(
-                      _alien,
-                      _shard_id,
-                      std::make_exception_ptr(ss::abort_requested_exception{}));
+            while (!_queue.empty()) {
+                size_t num_tasks = _queue.pop(items.data(), items.size());
+                if (process_tasks({items.data(), num_tasks})) {
+                    // stop was requested, we can be done
+                    vassert(
+                      _queue.empty(),
+                      "expected a stop task to be the last task in the task "
+                      "queue, but there were {} available to process.",
+                      _queue.read_available());
+                    return;
                 }
             }
             if (r < 0) {
-                return r;
-            } else if (result == RUNNABLE_STOP) {
-                return 0;
+                return;
             }
-            _task->process(_alien, _shard_id);
         }
     }
 
-    ss::alien::instance& _alien;
+    ss::stop_iteration process_tasks(std::span<impl::task_base*> tasks) {
+        for (size_t i = 0; i < tasks.size(); ++i) {
+            if (tasks[i]->process(*_alien, _shard_id)) {
+                fail_tasks(tasks.subspan(i + 1));
+                return ss::stop_iteration::yes;
+            }
+        }
+        return ss::stop_iteration::no;
+    }
+
+    void fail_tasks(std::span<impl::task_base*> tasks) {
+        for (auto* task : tasks) {
+            task->set_exception(
+              *_alien,
+              _shard_id,
+              std::make_exception_ptr(ss::abort_requested_exception{}));
+        }
+    }
+
+    ss::alien::instance* _alien;
+    unsigned _cpu_id;
     ss::shard_id _shard_id;
     std::thread _worker;
     ss::gate _gate;
-    mutex _mutex;
-    seastar::writeable_eventfd _ready{RUNNABLE_INIT};
-    std::unique_ptr<impl::task_base> _task{nullptr};
+    ss::writeable_eventfd _ready;
+
+    constexpr static size_t max_length = 128;
+    ss::semaphore _semaphore = ss::semaphore(max_length);
+    boost::lockfree::
+      spsc_queue<impl::task_base*, boost::lockfree::capacity<max_length>>
+        _queue;
 };
 
 } // namespace impl
 
 /**
- * thread_worker runs tasks in a std::thread.
+ * singleton_thread_worker runs tasks in a std::thread, it's expected that only
+ * a single core manages the lifetime of this shard, and all other shards submit
+ * their requests through that shard.
  *
  * By running in a std::thread, it's possible to make blocking calls such as
  * file I/O and posix thread primitives without blocking a reactor.
@@ -186,21 +265,27 @@ class thread_worker {
  * class is most suited to run for the lifetime of an application, rather than
  * short-lived.
  */
-class thread_worker {
+class singleton_thread_worker {
 public:
     static constexpr ss::shard_id shard_id = 0;
-    thread_worker() = default;
+    struct config {
+        // NOTE: pthread names must be less than 12 bytes
+        //
+        // Why 12 bytes? pthread only support names of length 16 and we want to
+        // suffix threads with the core it's associated with via `-xxx`.
+        ss::sstring name = "thread";
+    };
 
     /**
      * start the background thread.
      */
-    ss::future<> start() {
+    ss::future<> start(config c) {
         vassert(
           ss::this_shard_id() == shard_id,
           "thread_worker must be started on shard ",
           shard_id);
         co_await _gate.start();
-        _impl.start();
+        _impl.start({.name = std::move(c.name)});
     }
 
     /**
@@ -240,4 +325,57 @@ class thread_worker {
     impl::thread_worker _impl;
 };
 
+/**
+ * sharded_thread_worker runs tasks in a std::thread, and creates a thread core
+ * each reactor core so that there is a twin std::thread for each reactor core.
+ *
+ * By running in a std::thread, it's possible to make blocking calls such as
+ * file I/O and posix thread primitives without blocking a reactor.
+ *
+ * The thread worker will drain all operations before joining the thread in
+ * stop(), but it should be noted that joining a thread may block. As such, this
+ * class is most suited to run for the lifetime of an application, rather than
+ * short-lived.
+ */
+class sharded_thread_worker {
+public:
+    struct config {
+        // NOTE: pthread names must be less than 12 bytes
+        //
+        // Why 12 bytes? pthread only support names of length 16 and we want to
+        // suffix threads with the core it's associated with via `-xxx`.
+        ss::sstring name = "thread";
+    };
+
+    /**
+     * start the background thread.
+     */
+    ss::future<> start(config c) {
+        co_await _impl.start();
+        co_await _impl.invoke_on_all([&c](impl::thread_worker& w) {
+            return w.start({.pin_to_shard_core = true, .name = c.name});
+        });
+    }
+
+    /**
+     * stop and join the background thread.
+     *
+     * Although the work has completed, it should be noted that joining a thread
+     * may block the reactor.
+     */
+    ss::future<> stop() { co_await _impl.stop(); }
+
+    /**
+     * submit a task to the background thread
+     */
+    template<typename Func>
+    auto submit(Func func) ->
+      typename ss::futurize<std::invoke_result_t<Func>>::type {
+        return _impl.local().submit(std::move(func));
+    }
+
+private:
+    ss::sharded<impl::thread_worker> _impl;
+};
+
 } // namespace ssx