Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

4.8/wb buf throttle #376

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions Documentation/block/queue-sysfs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,5 +169,18 @@ This is the number of bytes the device can write in a single write-same
command. A value of '0' means write-same is not supported by this
device.

wb_lat_usec (RW)
----------------
If the device is registered for writeback throttling, then this file shows
the target minimum read latency. If this latency is exceeded in a given
window of time (see wb_window_usec), then the writeback throttling will start
scaling back writes.

wb_window_usec (RW)
-------------------
If the device is registered for writeback throttling, then this file shows
the value of the monitoring window in which we'll look at the target
latency. See wb_lat_usec.


Jens Axboe <jens.axboe@oracle.com>, February 2009
1 change: 1 addition & 0 deletions block/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
menuconfig BLOCK
bool "Enable the block layer" if EXPERT
default y
select WBT
help
Provide block layer support for the kernel.

Expand Down
2 changes: 1 addition & 1 deletion block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
badblocks.o partitions/
Expand Down
21 changes: 19 additions & 2 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <linux/ratelimit.h>
#include <linux/pm_runtime.h>
#include <linux/blk-cgroup.h>
#include <linux/wbt.h>

#define CREATE_TRACE_POINTS
#include <trace/events/block.h>
Expand Down Expand Up @@ -882,6 +883,8 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,

fail:
blk_free_flush_queue(q->fq);
wbt_exit(q->rq_wb);
q->rq_wb = NULL;
return NULL;
}
EXPORT_SYMBOL(blk_init_allocated_queue);
Expand Down Expand Up @@ -1346,6 +1349,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
blk_delete_timer(rq);
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
wbt_requeue(q->rq_wb, &rq->wb_stat);

if (rq->cmd_flags & REQ_QUEUED)
blk_queue_end_tag(q, rq);
Expand Down Expand Up @@ -1436,6 +1440,8 @@ void __blk_put_request(struct request_queue *q, struct request *req)
/* this is a bio leak */
WARN_ON(req->bio != NULL);

wbt_done(q->rq_wb, &req->wb_stat);

/*
* Request may not have originated from ll_rw_blk. if not,
* it didn't come out of our reserved rq pools
Expand Down Expand Up @@ -1667,6 +1673,7 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
int el_ret, rw_flags = 0, where = ELEVATOR_INSERT_SORT;
struct request *req;
unsigned int request_count = 0;
unsigned int wb_acct;

/*
* low level driver can indicate that it wants pages above a
Expand Down Expand Up @@ -1719,6 +1726,8 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
}

get_rq:
wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, q->queue_lock);

/*
* This sync check and mask will be re-done in init_request_from_bio(),
* but we need to set it earlier to expose the sync flag to the
Expand All @@ -1738,11 +1747,14 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio)
*/
req = get_request(q, bio_data_dir(bio), rw_flags, bio, GFP_NOIO);
if (IS_ERR(req)) {
__wbt_done(q->rq_wb, wb_acct);
bio->bi_error = PTR_ERR(req);
bio_endio(bio);
goto out_unlock;
}

wbt_track(&req->wb_stat, wb_acct);

/*
* After dropping the lock and possibly sleeping here, our request
* may now be mergeable after it had proven unmergeable (above).
Expand Down Expand Up @@ -2475,6 +2487,8 @@ void blk_start_request(struct request *req)
{
blk_dequeue_request(req);

wbt_issue(req->q->rq_wb, &req->wb_stat);

/*
* We are now handing the request to the hardware, initialize
* resid_len to full count and add the timeout handler.
Expand Down Expand Up @@ -2542,6 +2556,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)

trace_block_rq_complete(req->q, req, nr_bytes);

blk_stat_add(&req->q->rq_stats[rq_data_dir(req)], req);

if (!req->bio)
return false;

Expand Down Expand Up @@ -2709,9 +2725,10 @@ void blk_finish_request(struct request *req, int error)

blk_account_io_done(req);

if (req->end_io)
if (req->end_io) {
wbt_done(req->q->rq_wb, &req->wb_stat);
req->end_io(req, error);
else {
} else {
if (blk_bidi_rq(req))
__blk_put_request(req->next_rq->q, req->next_rq);

Expand Down
47 changes: 47 additions & 0 deletions block/blk-mq-sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,47 @@ static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page)
return ret;
}

static void blk_mq_stat_clear(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_ctx *ctx;
unsigned int i;

hctx_for_each_ctx(hctx, ctx, i) {
blk_stat_init(&ctx->stat[0]);
blk_stat_init(&ctx->stat[1]);
}
}

static ssize_t blk_mq_hw_sysfs_stat_store(struct blk_mq_hw_ctx *hctx,
const char *page, size_t count)
{
blk_mq_stat_clear(hctx);
return count;
}

static ssize_t print_stat(char *page, struct blk_rq_stat *stat, const char *pre)
{
return sprintf(page, "%s samples=%llu, mean=%lld, min=%lld, max=%lld\n",
pre, (long long) stat->nr_samples,
(long long) stat->mean, (long long) stat->min,
(long long) stat->max);
}

static ssize_t blk_mq_hw_sysfs_stat_show(struct blk_mq_hw_ctx *hctx, char *page)
{
struct blk_rq_stat stat[2];
ssize_t ret;

blk_stat_init(&stat[0]);
blk_stat_init(&stat[1]);

blk_hctx_stat_get(hctx, stat);

ret = print_stat(page, &stat[0], "read :");
ret += print_stat(page + ret, &stat[1], "write:");
return ret;
}

static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = {
.attr = {.name = "dispatched", .mode = S_IRUGO },
.show = blk_mq_sysfs_dispatched_show,
Expand Down Expand Up @@ -304,6 +345,11 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_poll = {
.attr = {.name = "io_poll", .mode = S_IRUGO },
.show = blk_mq_hw_sysfs_poll_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_stat = {
.attr = {.name = "stats", .mode = S_IRUGO | S_IWUSR },
.show = blk_mq_hw_sysfs_stat_show,
.store = blk_mq_hw_sysfs_stat_store,
};

static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_queued.attr,
Expand All @@ -314,6 +360,7 @@ static struct attribute *default_hw_ctx_attrs[] = {
&blk_mq_hw_sysfs_cpus.attr,
&blk_mq_hw_sysfs_active.attr,
&blk_mq_hw_sysfs_poll.attr,
&blk_mq_hw_sysfs_stat.attr,
NULL,
};

Expand Down
40 changes: 38 additions & 2 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,15 @@
#include <linux/sched/sysctl.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
#include <linux/wbt.h>

#include <trace/events/block.h>

#include <linux/blk-mq.h>
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
#include "blk-stat.h"

static DEFINE_MUTEX(all_q_mutex);
static LIST_HEAD(all_q_list);
Expand Down Expand Up @@ -330,6 +332,8 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,

if (rq->cmd_flags & REQ_MQ_INFLIGHT)
atomic_dec(&hctx->nr_active);

wbt_done(q->rq_wb, &rq->wb_stat);
rq->cmd_flags = 0;

clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
Expand Down Expand Up @@ -362,6 +366,7 @@ inline void __blk_mq_end_request(struct request *rq, int error)
blk_account_io_done(rq);

if (rq->end_io) {
wbt_done(rq->q->rq_wb, &rq->wb_stat);
rq->end_io(rq, error);
} else {
if (unlikely(blk_bidi_rq(rq)))
Expand Down Expand Up @@ -412,10 +417,19 @@ static void blk_mq_ipi_complete_request(struct request *rq)
put_cpu();
}

static void blk_mq_stat_add(struct request *rq)
{
struct blk_rq_stat *stat = &rq->mq_ctx->stat[rq_data_dir(rq)];

blk_stat_add(stat, rq);
}

static void __blk_mq_complete_request(struct request *rq)
{
struct request_queue *q = rq->q;

blk_mq_stat_add(rq);

if (!q->softirq_done_fn)
blk_mq_end_request(rq, rq->errors);
else
Expand Down Expand Up @@ -459,6 +473,8 @@ void blk_mq_start_request(struct request *rq)
if (unlikely(blk_bidi_rq(rq)))
rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq);

wbt_issue(q->rq_wb, &rq->wb_stat);

blk_add_timer(rq);

/*
Expand Down Expand Up @@ -494,6 +510,7 @@ static void __blk_mq_requeue_request(struct request *rq)
struct request_queue *q = rq->q;

trace_block_rq_requeue(q, rq);
wbt_requeue(q->rq_wb, &rq->wb_stat);

if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
if (q->dma_drain_size && blk_rq_bytes(rq))
Expand Down Expand Up @@ -1312,6 +1329,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
struct blk_plug *plug;
struct request *same_queue_rq = NULL;
blk_qc_t cookie;
unsigned int wb_acct;

blk_queue_bounce(q, &bio);

Expand All @@ -1326,9 +1344,15 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
return BLK_QC_T_NONE;

wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, NULL);

rq = blk_mq_map_request(q, bio, &data);
if (unlikely(!rq))
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
return BLK_QC_T_NONE;
}

wbt_track(&rq->wb_stat, wb_acct);

cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);

Expand Down Expand Up @@ -1405,6 +1429,7 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
struct blk_map_ctx data;
struct request *rq;
blk_qc_t cookie;
unsigned int wb_acct;

blk_queue_bounce(q, &bio);

Expand All @@ -1421,9 +1446,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio)
} else
request_count = blk_plug_queued_count(q);

wb_acct = wbt_wait(q->rq_wb, bio->bi_opf, NULL);

rq = blk_mq_map_request(q, bio, &data);
if (unlikely(!rq))
if (unlikely(!rq)) {
__wbt_done(q->rq_wb, wb_acct);
return BLK_QC_T_NONE;
}

wbt_track(&rq->wb_stat, wb_acct);

cookie = blk_tag_to_qc_t(rq->tag, data.hctx->queue_num);

Expand Down Expand Up @@ -1807,6 +1838,8 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
spin_lock_init(&__ctx->lock);
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
blk_stat_init(&__ctx->stat[0]);
blk_stat_init(&__ctx->stat[1]);

/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpu_online(i))
Expand Down Expand Up @@ -2145,6 +2178,9 @@ void blk_mq_free_queue(struct request_queue *q)
list_del_init(&q->all_q_node);
mutex_unlock(&all_q_mutex);

wbt_exit(q->rq_wb);
q->rq_wb = NULL;

blk_mq_del_queue_tag_set(q);

blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
Expand Down
3 changes: 3 additions & 0 deletions block/blk-mq.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#ifndef INT_BLK_MQ_H
#define INT_BLK_MQ_H

#include "blk-stat.h"

struct blk_mq_tag_set;

struct blk_mq_ctx {
Expand All @@ -20,6 +22,7 @@ struct blk_mq_ctx {

/* incremented at completion time */
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
struct blk_rq_stat stat[2];

struct request_queue *queue;
struct kobject kobj;
Expand Down
15 changes: 15 additions & 0 deletions block/blk-settings.c
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,19 @@ void blk_queue_flush_queueable(struct request_queue *q, bool queueable)
}
EXPORT_SYMBOL_GPL(blk_queue_flush_queueable);

/**
* blk_set_queue_depth - tell the block layer about the device queue depth
* @q: the request queue for the device
* @depth: queue depth
*
*/
void blk_set_queue_depth(struct request_queue *q, unsigned int depth)
{
q->queue_depth = depth;
wbt_set_queue_depth(q->rq_wb, depth);
}
EXPORT_SYMBOL(blk_set_queue_depth);

/**
* blk_queue_write_cache - configure queue's write cache
* @q: the request queue for the device
Expand All @@ -851,6 +864,8 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua)
else
queue_flag_clear(QUEUE_FLAG_FUA, q);
spin_unlock_irq(q->queue_lock);

wbt_set_write_cache(q->rq_wb, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
}
EXPORT_SYMBOL_GPL(blk_queue_write_cache);

Expand Down
Loading