msm: kgsl: Dispatch commands using a master kthread

Instead of coordinating between a worker when dispatching commands and abusing a mutex lock for synchronization, it's faster to keep a single kthread dispatching commands whenever needed. This reduces GPU processing latency. Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> [@0ctobot: Adapted for msm-4.9, this reverts commit: 2eb74d7 ("msm: kgsl: Defer issue commands to worker thread")] Signed-off-by: Adam W. Willis <return.of.octobot@gmail.com> Signed-off-by: Adam W. Willis <return.of.octobot@gmail.com> Signed-off-by: Raphiel Rollerscaperers <raphielscape@outlook.com>
TogoFire · Jul 22, 2021 · b771109 · b771109
1 parent 21b102a
commit b771109
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 70 deletions.
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
@@ -16,6 +16,11 @@
 #include <linux/sched.h>
 #include <linux/jiffies.h>
 #include <linux/err.h>
+#include <linux/version.h>
+/* The sched_param struct is located elsewhere in newer kernels */
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
+#include <uapi/linux/sched/types.h>
+#endif
 
 #include "kgsl.h"
 #include "kgsl_sharedmem.h"
@@ -986,13 +991,6 @@ static void _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
 	spin_unlock(&dispatcher->plist_lock);
 }
 
-static inline void _decrement_submit_now(struct kgsl_device *device)
-{
-	spin_lock(&device->submit_lock);
-	device->submit_now--;
-	spin_unlock(&device->submit_lock);
-}
-
 /**
  * adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
  * @adreno_dev: Pointer to the adreno device struct
@@ -1001,30 +999,7 @@ static inline void _decrement_submit_now(struct kgsl_device *device)
  */
 static void adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
 {
-	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
-	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
-
-	spin_lock(&device->submit_lock);
-	/* If state transition to SLUMBER, schedule the work for later */
-	if (device->slumber == true) {
-		spin_unlock(&device->submit_lock);
-		goto done;
-	}
-	device->submit_now++;
-	spin_unlock(&device->submit_lock);
-
-	/* If the dispatcher is busy then schedule the work for later */
-	if (!mutex_trylock(&dispatcher->mutex)) {
-		_decrement_submit_now(device);
-		goto done;
-	}
-
-	_adreno_dispatcher_issuecmds(adreno_dev);
-	mutex_unlock(&dispatcher->mutex);
-	_decrement_submit_now(device);
-	return;
-done:
-	adreno_dispatcher_schedule(device);
+	adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
 }
 
 /**
@@ -2500,12 +2475,9 @@ static void _dispatcher_power_down(struct adreno_device *adreno_dev)
 	mutex_unlock(&device->mutex);
 }
 
-static void adreno_dispatcher_work(struct kthread_work *work)
+static void adreno_dispatcher_work(struct adreno_device *adreno_dev)
 {
-	struct adreno_dispatcher *dispatcher =
-		container_of(work, struct adreno_dispatcher, work);
-	struct adreno_device *adreno_dev =
-		container_of(dispatcher, struct adreno_device, dispatcher);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
 	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
 	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
 	int count = 0;
@@ -2555,12 +2527,39 @@ static void adreno_dispatcher_work(struct kthread_work *work)
 	mutex_unlock(&dispatcher->mutex);
 }
 
+static int adreno_dispatcher_thread(void *data)
+{
+	static const struct sched_param sched_rt_prio = {
+		.sched_priority = 16
+	};
+	struct adreno_device *adreno_dev = data;
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	sched_setscheduler_nocheck(current, SCHED_FIFO, &sched_rt_prio);
+
+	while (1) {
+		bool should_stop;
+
+		wait_event(dispatcher->cmd_waitq,
+			  (should_stop = kthread_should_stop()) ||
+			   atomic_cmpxchg(&dispatcher->send_cmds, 1, 0));
+
+		if (should_stop)
+			break;
+
+		adreno_dispatcher_work(adreno_dev);
+	}
+
+	return 0;
+}
+
 void adreno_dispatcher_schedule(struct kgsl_device *device)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
 
-	kthread_queue_work(&kgsl_driver.worker, &dispatcher->work);
+	if (!atomic_cmpxchg(&dispatcher->send_cmds, 0, 1))
+		wake_up(&dispatcher->cmd_waitq);
 }
 
 /**
@@ -2680,6 +2679,8 @@ void adreno_dispatcher_close(struct adreno_device *adreno_dev)
 	int i;
 	struct adreno_ringbuffer *rb;
 
+	kthread_stop(dispatcher->thread);
+
 	mutex_lock(&dispatcher->mutex);
 	del_timer_sync(&dispatcher->timer);
 	del_timer_sync(&dispatcher->fault_timer);
@@ -2856,14 +2857,19 @@ int adreno_dispatcher_init(struct adreno_device *adreno_dev)
 	setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer,
 		(unsigned long) adreno_dev);
 
-	kthread_init_work(&dispatcher->work, adreno_dispatcher_work);
-
 	init_completion(&dispatcher->idle_gate);
 	complete_all(&dispatcher->idle_gate);
 
 	plist_head_init(&dispatcher->pending);
 	spin_lock_init(&dispatcher->plist_lock);
 
+	init_waitqueue_head(&dispatcher->cmd_waitq);
+	dispatcher->send_cmds = (atomic_t)ATOMIC_INIT(0);
+	dispatcher->thread = kthread_run(adreno_dispatcher_thread, adreno_dev,
+					 "adreno_dispatch");
+	if (IS_ERR(dispatcher->thread))
+		return PTR_ERR(dispatcher->thread);
+
 	ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher,
 		&device->dev->kobj, "dispatch");
 

diff --git a/drivers/gpu/msm/adreno_dispatch.h b/drivers/gpu/msm/adreno_dispatch.h
@@ -75,12 +75,14 @@ struct adreno_dispatcher_drawqueue {
  * @fault: Non-zero if a fault was detected.
  * @pending: Priority list of contexts waiting to submit drawobjs
  * @plist_lock: Spin lock to protect the pending queue
- * @work: work_struct to put the dispatcher in a work queue
  * @kobj: kobject for the dispatcher directory in the device sysfs node
  * @idle_gate: Gate to wait on for dispatcher to idle
  * @disp_preempt_fair_sched: If set then dispatcher will try to be fair to
  * starving RB's by scheduling them in and enforcing a minimum time slice
  * for every RB that is scheduled to run on the device
+ * @thread: Kthread for the command dispatcher
+ * @cmd_waitq: Waitqueue for the command dispatcher
+ * @send_cmds: Atomic boolean indicating that commands should be dispatched
  */
 struct adreno_dispatcher {
 	struct mutex mutex;
@@ -91,10 +93,12 @@ struct adreno_dispatcher {
 	atomic_t fault;
 	struct plist_head pending;
 	spinlock_t plist_lock;
-	struct kthread_work work;
 	struct kobject kobj;
 	struct completion idle_gate;
 	unsigned int disp_preempt_fair_sched;
+	struct task_struct *thread;
+	wait_queue_head_t cmd_waitq;
+	atomic_t send_cmds;
 };
 
 enum adreno_dispatcher_flags {

diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
@@ -4906,7 +4906,6 @@ int kgsl_device_platform_probe(struct kgsl_device *device)
 		device->id, device->reg_phys, device->reg_len);
 
 	rwlock_init(&device->context_lock);
-	spin_lock_init(&device->submit_lock);
 
 	setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device);
 

diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
@@ -280,11 +280,6 @@ struct kgsl_device {
 	struct kgsl_pwrctrl pwrctrl;
 	int open_count;
 
-	/* For GPU inline submission */
-	uint32_t submit_now;
-	spinlock_t submit_lock;
-	bool slumber;
-
 	struct mutex mutex;
 	uint32_t state;
 	uint32_t requested_state;

diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -2359,24 +2359,9 @@ void kgsl_idle_check(struct work_struct *work)
 			|| device->state ==  KGSL_STATE_NAP)) {
 
 		if (!atomic_read(&device->active_cnt)) {
-			spin_lock(&device->submit_lock);
-			if (device->submit_now) {
-				spin_unlock(&device->submit_lock);
-				goto done;
-			}
-			/* Don't allow GPU inline submission in SLUMBER */
-			if (requested_state == KGSL_STATE_SLUMBER)
-				device->slumber = true;
-			spin_unlock(&device->submit_lock);
-
 			ret = kgsl_pwrctrl_change_state(device,
 					device->requested_state);
 			if (ret == -EBUSY) {
-				if (requested_state == KGSL_STATE_SLUMBER) {
-					spin_lock(&device->submit_lock);
-					device->slumber = false;
-					spin_unlock(&device->submit_lock);
-				}
 				/*
 				 * If the GPU is currently busy, restore
 				 * the requested state and reschedule
@@ -2387,7 +2372,7 @@ void kgsl_idle_check(struct work_struct *work)
 				kgsl_schedule_work(&device->idle_check_ws);
 			}
 		}
-done:
+
 		if (!ret)
 			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
 
@@ -2912,13 +2897,6 @@ static void kgsl_pwrctrl_set_state(struct kgsl_device *device,
 	trace_kgsl_pwr_set_state(device, state);
 	device->state = state;
 	device->requested_state = KGSL_STATE_NONE;
-
-	spin_lock(&device->submit_lock);
-	if (state == KGSL_STATE_SLUMBER || state == KGSL_STATE_SUSPEND)
-		device->slumber = true;
-	else
-		device->slumber = false;
-	spin_unlock(&device->submit_lock);
 }
 
 static void kgsl_pwrctrl_request_state(struct kgsl_device *device,