Skip to content

Commit

Permalink
msm: kgsl: Dispatch commands using a master kthread
Browse files Browse the repository at this point in the history
Instead of coordinating between a worker when dispatching commands and
abusing a mutex lock for synchronization, it's faster to keep a single
kthread dispatching commands whenever needed. This reduces GPU
processing latency.

Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com>
[@0ctobot: Adapted for msm-4.9, this reverts commit:
2eb74d7 ("msm: kgsl: Defer issue commands to worker thread")]
Signed-off-by: Adam W. Willis <return.of.octobot@gmail.com>

Signed-off-by: Adam W. Willis <return.of.octobot@gmail.com>
Signed-off-by: Raphiel Rollerscaperers <raphielscape@outlook.com>
  • Loading branch information
kerneltoast authored and TogoFire committed Jul 22, 2021
1 parent 21b102a commit b771109
Show file tree
Hide file tree
Showing 5 changed files with 52 additions and 70 deletions.
84 changes: 45 additions & 39 deletions drivers/gpu/msm/adreno_dispatch.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@
#include <linux/sched.h>
#include <linux/jiffies.h>
#include <linux/err.h>
#include <linux/version.h>
/* The sched_param struct is located elsewhere in newer kernels */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
#include <uapi/linux/sched/types.h>
#endif

#include "kgsl.h"
#include "kgsl_sharedmem.h"
Expand Down Expand Up @@ -986,13 +991,6 @@ static void _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
spin_unlock(&dispatcher->plist_lock);
}

static inline void _decrement_submit_now(struct kgsl_device *device)
{
spin_lock(&device->submit_lock);
device->submit_now--;
spin_unlock(&device->submit_lock);
}

/**
* adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
* @adreno_dev: Pointer to the adreno device struct
Expand All @@ -1001,30 +999,7 @@ static inline void _decrement_submit_now(struct kgsl_device *device)
*/
static void adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
{
struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
struct kgsl_device *device = KGSL_DEVICE(adreno_dev);

spin_lock(&device->submit_lock);
/* If state transition to SLUMBER, schedule the work for later */
if (device->slumber == true) {
spin_unlock(&device->submit_lock);
goto done;
}
device->submit_now++;
spin_unlock(&device->submit_lock);

/* If the dispatcher is busy then schedule the work for later */
if (!mutex_trylock(&dispatcher->mutex)) {
_decrement_submit_now(device);
goto done;
}

_adreno_dispatcher_issuecmds(adreno_dev);
mutex_unlock(&dispatcher->mutex);
_decrement_submit_now(device);
return;
done:
adreno_dispatcher_schedule(device);
adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
}

/**
Expand Down Expand Up @@ -2500,12 +2475,9 @@ static void _dispatcher_power_down(struct adreno_device *adreno_dev)
mutex_unlock(&device->mutex);
}

static void adreno_dispatcher_work(struct kthread_work *work)
static void adreno_dispatcher_work(struct adreno_device *adreno_dev)
{
struct adreno_dispatcher *dispatcher =
container_of(work, struct adreno_dispatcher, work);
struct adreno_device *adreno_dev =
container_of(dispatcher, struct adreno_device, dispatcher);
struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
int count = 0;
Expand Down Expand Up @@ -2555,12 +2527,39 @@ static void adreno_dispatcher_work(struct kthread_work *work)
mutex_unlock(&dispatcher->mutex);
}

static int adreno_dispatcher_thread(void *data)
{
static const struct sched_param sched_rt_prio = {
.sched_priority = 16
};
struct adreno_device *adreno_dev = data;
struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;

sched_setscheduler_nocheck(current, SCHED_FIFO, &sched_rt_prio);

while (1) {
bool should_stop;

wait_event(dispatcher->cmd_waitq,
(should_stop = kthread_should_stop()) ||
atomic_cmpxchg(&dispatcher->send_cmds, 1, 0));

if (should_stop)
break;

adreno_dispatcher_work(adreno_dev);
}

return 0;
}

void adreno_dispatcher_schedule(struct kgsl_device *device)
{
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;

kthread_queue_work(&kgsl_driver.worker, &dispatcher->work);
if (!atomic_cmpxchg(&dispatcher->send_cmds, 0, 1))
wake_up(&dispatcher->cmd_waitq);
}

/**
Expand Down Expand Up @@ -2680,6 +2679,8 @@ void adreno_dispatcher_close(struct adreno_device *adreno_dev)
int i;
struct adreno_ringbuffer *rb;

kthread_stop(dispatcher->thread);

mutex_lock(&dispatcher->mutex);
del_timer_sync(&dispatcher->timer);
del_timer_sync(&dispatcher->fault_timer);
Expand Down Expand Up @@ -2856,14 +2857,19 @@ int adreno_dispatcher_init(struct adreno_device *adreno_dev)
setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer,
(unsigned long) adreno_dev);

kthread_init_work(&dispatcher->work, adreno_dispatcher_work);

init_completion(&dispatcher->idle_gate);
complete_all(&dispatcher->idle_gate);

plist_head_init(&dispatcher->pending);
spin_lock_init(&dispatcher->plist_lock);

init_waitqueue_head(&dispatcher->cmd_waitq);
dispatcher->send_cmds = (atomic_t)ATOMIC_INIT(0);
dispatcher->thread = kthread_run(adreno_dispatcher_thread, adreno_dev,
"adreno_dispatch");
if (IS_ERR(dispatcher->thread))
return PTR_ERR(dispatcher->thread);

ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher,
&device->dev->kobj, "dispatch");

Expand Down
8 changes: 6 additions & 2 deletions drivers/gpu/msm/adreno_dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,14 @@ struct adreno_dispatcher_drawqueue {
* @fault: Non-zero if a fault was detected.
* @pending: Priority list of contexts waiting to submit drawobjs
* @plist_lock: Spin lock to protect the pending queue
* @work: work_struct to put the dispatcher in a work queue
* @kobj: kobject for the dispatcher directory in the device sysfs node
* @idle_gate: Gate to wait on for dispatcher to idle
* @disp_preempt_fair_sched: If set then dispatcher will try to be fair to
* starving RB's by scheduling them in and enforcing a minimum time slice
* for every RB that is scheduled to run on the device
* @thread: Kthread for the command dispatcher
* @cmd_waitq: Waitqueue for the command dispatcher
* @send_cmds: Atomic boolean indicating that commands should be dispatched
*/
struct adreno_dispatcher {
struct mutex mutex;
Expand All @@ -91,10 +93,12 @@ struct adreno_dispatcher {
atomic_t fault;
struct plist_head pending;
spinlock_t plist_lock;
struct kthread_work work;
struct kobject kobj;
struct completion idle_gate;
unsigned int disp_preempt_fair_sched;
struct task_struct *thread;
wait_queue_head_t cmd_waitq;
atomic_t send_cmds;
};

enum adreno_dispatcher_flags {
Expand Down
1 change: 0 additions & 1 deletion drivers/gpu/msm/kgsl.c
Original file line number Diff line number Diff line change
Expand Up @@ -4906,7 +4906,6 @@ int kgsl_device_platform_probe(struct kgsl_device *device)
device->id, device->reg_phys, device->reg_len);

rwlock_init(&device->context_lock);
spin_lock_init(&device->submit_lock);

setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device);

Expand Down
5 changes: 0 additions & 5 deletions drivers/gpu/msm/kgsl_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,11 +280,6 @@ struct kgsl_device {
struct kgsl_pwrctrl pwrctrl;
int open_count;

/* For GPU inline submission */
uint32_t submit_now;
spinlock_t submit_lock;
bool slumber;

struct mutex mutex;
uint32_t state;
uint32_t requested_state;
Expand Down
24 changes: 1 addition & 23 deletions drivers/gpu/msm/kgsl_pwrctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2359,24 +2359,9 @@ void kgsl_idle_check(struct work_struct *work)
|| device->state == KGSL_STATE_NAP)) {

if (!atomic_read(&device->active_cnt)) {
spin_lock(&device->submit_lock);
if (device->submit_now) {
spin_unlock(&device->submit_lock);
goto done;
}
/* Don't allow GPU inline submission in SLUMBER */
if (requested_state == KGSL_STATE_SLUMBER)
device->slumber = true;
spin_unlock(&device->submit_lock);

ret = kgsl_pwrctrl_change_state(device,
device->requested_state);
if (ret == -EBUSY) {
if (requested_state == KGSL_STATE_SLUMBER) {
spin_lock(&device->submit_lock);
device->slumber = false;
spin_unlock(&device->submit_lock);
}
/*
* If the GPU is currently busy, restore
* the requested state and reschedule
Expand All @@ -2387,7 +2372,7 @@ void kgsl_idle_check(struct work_struct *work)
kgsl_schedule_work(&device->idle_check_ws);
}
}
done:

if (!ret)
kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);

Expand Down Expand Up @@ -2912,13 +2897,6 @@ static void kgsl_pwrctrl_set_state(struct kgsl_device *device,
trace_kgsl_pwr_set_state(device, state);
device->state = state;
device->requested_state = KGSL_STATE_NONE;

spin_lock(&device->submit_lock);
if (state == KGSL_STATE_SLUMBER || state == KGSL_STATE_SUSPEND)
device->slumber = true;
else
device->slumber = false;
spin_unlock(&device->submit_lock);
}

static void kgsl_pwrctrl_request_state(struct kgsl_device *device,
Expand Down

0 comments on commit b771109

Please sign in to comment.