Skip to content

Commit

Permalink
hw/nvme: reimplement zone reset to allow cancellation
Browse files Browse the repository at this point in the history
Prior to this patch, the aios associated with zone reset are submitted
anonymously (no reference saved to the aiocb from the blk_aio call).

Fix this by resetting the zones one after another, saving a reference to
the aiocb for each reset.

Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
Reviewed-by: Keith Busch <kbusch@kernel.org>
  • Loading branch information
birkelund committed Jun 29, 2021
1 parent 796d206 commit 63d96e4
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 121 deletions.
288 changes: 168 additions & 120 deletions hw/nvme/ctrl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1691,6 +1691,29 @@ static uint16_t nvme_zrm_close(NvmeNamespace *ns, NvmeZone *zone)
}
}

static uint16_t nvme_zrm_reset(NvmeNamespace *ns, NvmeZone *zone)
{
switch (nvme_get_zone_state(zone)) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
nvme_aor_dec_open(ns);
/* fallthrough */
case NVME_ZONE_STATE_CLOSED:
nvme_aor_dec_active(ns);
/* fallthrough */
case NVME_ZONE_STATE_FULL:
zone->w_ptr = zone->d.zslba;
zone->d.wp = zone->w_ptr;
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
/* fallthrough */
case NVME_ZONE_STATE_EMPTY:
return NVME_SUCCESS;

default:
return NVME_ZONE_INVAL_TRANSITION;
}
}

static void nvme_zrm_auto_transition_zone(NvmeNamespace *ns)
{
NvmeZone *zone;
Expand Down Expand Up @@ -2020,79 +2043,6 @@ static void nvme_verify_mdata_in_cb(void *opaque, int ret)
nvme_verify_cb(ctx, ret);
}

struct nvme_zone_reset_ctx {
NvmeRequest *req;
NvmeZone *zone;
};

static void nvme_aio_zone_reset_complete_cb(void *opaque, int ret)
{
struct nvme_zone_reset_ctx *ctx = opaque;
NvmeRequest *req = ctx->req;
NvmeNamespace *ns = req->ns;
NvmeZone *zone = ctx->zone;
uintptr_t *resets = (uintptr_t *)&req->opaque;

if (ret) {
nvme_aio_err(req, ret);
goto out;
}

switch (nvme_get_zone_state(zone)) {
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
nvme_aor_dec_open(ns);
/* fall through */
case NVME_ZONE_STATE_CLOSED:
nvme_aor_dec_active(ns);
/* fall through */
case NVME_ZONE_STATE_FULL:
zone->w_ptr = zone->d.zslba;
zone->d.wp = zone->w_ptr;
nvme_assign_zone_state(ns, zone, NVME_ZONE_STATE_EMPTY);
/* fall through */
default:
break;
}

out:
g_free(ctx);

(*resets)--;

if (*resets) {
return;
}

nvme_enqueue_req_completion(nvme_cq(req), req);
}

static void nvme_aio_zone_reset_cb(void *opaque, int ret)
{
struct nvme_zone_reset_ctx *ctx = opaque;
NvmeRequest *req = ctx->req;
NvmeNamespace *ns = req->ns;
NvmeZone *zone = ctx->zone;

trace_pci_nvme_aio_zone_reset_cb(nvme_cid(req), zone->d.zslba);

if (ret) {
goto out;
}

if (ns->lbaf.ms) {
int64_t offset = nvme_moff(ns, zone->d.zslba);

blk_aio_pwrite_zeroes(ns->blkconf.blk, offset,
nvme_m2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
nvme_aio_zone_reset_complete_cb, ctx);
return;
}

out:
nvme_aio_zone_reset_complete_cb(opaque, ret);
}

struct nvme_compare_ctx {
struct {
QEMUIOVector iov;
Expand Down Expand Up @@ -3395,41 +3345,6 @@ static uint16_t nvme_finish_zone(NvmeNamespace *ns, NvmeZone *zone,
return nvme_zrm_finish(ns, zone);
}

static uint16_t nvme_reset_zone(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state, NvmeRequest *req)
{
uintptr_t *resets = (uintptr_t *)&req->opaque;
struct nvme_zone_reset_ctx *ctx;

switch (state) {
case NVME_ZONE_STATE_EMPTY:
return NVME_SUCCESS;
case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
case NVME_ZONE_STATE_CLOSED:
case NVME_ZONE_STATE_FULL:
break;
default:
return NVME_ZONE_INVAL_TRANSITION;
}

/*
* The zone reset aio callback needs to know the zone that is being reset
* in order to transition the zone on completion.
*/
ctx = g_new(struct nvme_zone_reset_ctx, 1);
ctx->req = req;
ctx->zone = zone;

(*resets)++;

blk_aio_pwrite_zeroes(ns->blkconf.blk, nvme_l2b(ns, zone->d.zslba),
nvme_l2b(ns, ns->zone_size), BDRV_REQ_MAY_UNMAP,
nvme_aio_zone_reset_cb, ctx);

return NVME_NO_COMPLETE;
}

static uint16_t nvme_offline_zone(NvmeNamespace *ns, NvmeZone *zone,
NvmeZoneState state, NvmeRequest *req)
{
Expand Down Expand Up @@ -3558,12 +3473,144 @@ static uint16_t nvme_do_zone_op(NvmeNamespace *ns, NvmeZone *zone,
return status;
}

typedef struct NvmeZoneResetAIOCB {
BlockAIOCB common;
BlockAIOCB *aiocb;
NvmeRequest *req;
QEMUBH *bh;
int ret;

bool all;
int idx;
NvmeZone *zone;
} NvmeZoneResetAIOCB;

static void nvme_zone_reset_cancel(BlockAIOCB *aiocb)
{
NvmeZoneResetAIOCB *iocb = container_of(aiocb, NvmeZoneResetAIOCB, common);
NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;

iocb->idx = ns->num_zones;

iocb->ret = -ECANCELED;

if (iocb->aiocb) {
blk_aio_cancel_async(iocb->aiocb);
iocb->aiocb = NULL;
}
}

static const AIOCBInfo nvme_zone_reset_aiocb_info = {
.aiocb_size = sizeof(NvmeZoneResetAIOCB),
.cancel_async = nvme_zone_reset_cancel,
};

static void nvme_zone_reset_bh(void *opaque)
{
NvmeZoneResetAIOCB *iocb = opaque;

iocb->common.cb(iocb->common.opaque, iocb->ret);

qemu_bh_delete(iocb->bh);
iocb->bh = NULL;
qemu_aio_unref(iocb);
}

static void nvme_zone_reset_cb(void *opaque, int ret);

static void nvme_zone_reset_epilogue_cb(void *opaque, int ret)
{
NvmeZoneResetAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;
int64_t moff;
int count;

if (ret < 0) {
nvme_zone_reset_cb(iocb, ret);
return;
}

if (!ns->lbaf.ms) {
nvme_zone_reset_cb(iocb, 0);
return;
}

moff = nvme_moff(ns, iocb->zone->d.zslba);
count = nvme_m2b(ns, ns->zone_size);

iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk, moff, count,
BDRV_REQ_MAY_UNMAP,
nvme_zone_reset_cb, iocb);
return;
}

static void nvme_zone_reset_cb(void *opaque, int ret)
{
NvmeZoneResetAIOCB *iocb = opaque;
NvmeRequest *req = iocb->req;
NvmeNamespace *ns = req->ns;

if (ret < 0) {
iocb->ret = ret;
goto done;
}

if (iocb->zone) {
nvme_zrm_reset(ns, iocb->zone);

if (!iocb->all) {
goto done;
}
}

while (iocb->idx < ns->num_zones) {
NvmeZone *zone = &ns->zone_array[iocb->idx++];

switch (nvme_get_zone_state(zone)) {
case NVME_ZONE_STATE_EMPTY:
if (!iocb->all) {
goto done;
}

continue;

case NVME_ZONE_STATE_EXPLICITLY_OPEN:
case NVME_ZONE_STATE_IMPLICITLY_OPEN:
case NVME_ZONE_STATE_CLOSED:
case NVME_ZONE_STATE_FULL:
iocb->zone = zone;
break;

default:
continue;
}

trace_pci_nvme_zns_zone_reset(zone->d.zslba);

iocb->aiocb = blk_aio_pwrite_zeroes(ns->blkconf.blk,
nvme_l2b(ns, zone->d.zslba),
nvme_l2b(ns, ns->zone_size),
BDRV_REQ_MAY_UNMAP,
nvme_zone_reset_epilogue_cb,
iocb);
return;
}

done:
iocb->aiocb = NULL;
if (iocb->bh) {
qemu_bh_schedule(iocb->bh);
}
}

static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
{
NvmeCmd *cmd = (NvmeCmd *)&req->cmd;
NvmeNamespace *ns = req->ns;
NvmeZone *zone;
uintptr_t *resets;
NvmeZoneResetAIOCB *iocb;
uint8_t *zd_ext;
uint32_t dw13 = le32_to_cpu(cmd->cdw13);
uint64_t slba = 0;
Expand All @@ -3574,7 +3621,7 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
enum NvmeZoneProcessingMask proc_mask = NVME_PROC_CURRENT_ZONE;

action = dw13 & 0xff;
all = dw13 & 0x100;
all = !!(dw13 & 0x100);

req->status = NVME_SUCCESS;

Expand Down Expand Up @@ -3618,21 +3665,22 @@ static uint16_t nvme_zone_mgmt_send(NvmeCtrl *n, NvmeRequest *req)
break;

case NVME_ZONE_ACTION_RESET:
resets = (uintptr_t *)&req->opaque;

if (all) {
proc_mask = NVME_PROC_OPENED_ZONES | NVME_PROC_CLOSED_ZONES |
NVME_PROC_FULL_ZONES;
}
trace_pci_nvme_reset_zone(slba, zone_idx, all);

*resets = 1;
iocb = blk_aio_get(&nvme_zone_reset_aiocb_info, ns->blkconf.blk,
nvme_misc_cb, req);

status = nvme_do_zone_op(ns, zone, proc_mask, nvme_reset_zone, req);
iocb->req = req;
iocb->bh = qemu_bh_new(nvme_zone_reset_bh, iocb);
iocb->ret = 0;
iocb->all = all;
iocb->idx = zone_idx;
iocb->zone = NULL;

(*resets)--;
req->aiocb = &iocb->common;
nvme_zone_reset_cb(iocb, 0);

return *resets ? NVME_NO_COMPLETE : req->status;
return NVME_NO_COMPLETE;

case NVME_ZONE_ACTION_OFFLINE:
if (all) {
Expand Down
2 changes: 1 addition & 1 deletion hw/nvme/trace-events
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ pci_nvme_compare_data_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_compare_mdata_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_discard_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_copy_in_cb(uint16_t cid) "cid %"PRIu16""
pci_nvme_aio_zone_reset_cb(uint16_t cid, uint64_t zslba) "cid %"PRIu16" zslba 0x%"PRIx64""
pci_nvme_aio_flush_cb(uint16_t cid, const char *blkname) "cid %"PRIu16" blk '%s'"
pci_nvme_create_sq(uint64_t addr, uint16_t sqid, uint16_t cqid, uint16_t qsize, uint16_t qflags) "create submission queue, addr=0x%"PRIx64", sqid=%"PRIu16", cqid=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16""
pci_nvme_create_cq(uint64_t addr, uint16_t cqid, uint16_t vector, uint16_t size, uint16_t qflags, int ien) "create completion queue, addr=0x%"PRIx64", cqid=%"PRIu16", vector=%"PRIu16", qsize=%"PRIu16", qflags=%"PRIu16", ien=%d"
Expand Down Expand Up @@ -100,6 +99,7 @@ pci_nvme_open_zone(uint64_t slba, uint32_t zone_idx, int all) "open zone, slba=%
pci_nvme_close_zone(uint64_t slba, uint32_t zone_idx, int all) "close zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_finish_zone(uint64_t slba, uint32_t zone_idx, int all) "finish zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_reset_zone(uint64_t slba, uint32_t zone_idx, int all) "reset zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_zns_zone_reset(uint64_t zslba) "zslba 0x%"PRIx64""
pci_nvme_offline_zone(uint64_t slba, uint32_t zone_idx, int all) "offline zone, slba=%"PRIu64", idx=%"PRIu32", all=%"PRIi32""
pci_nvme_set_descriptor_extension(uint64_t slba, uint32_t zone_idx) "set zone descriptor extension, slba=%"PRIu64", idx=%"PRIu32""
pci_nvme_zd_extension_set(uint32_t zone_idx) "set descriptor extension for zone_idx=%"PRIu32""
Expand Down

0 comments on commit 63d96e4

Please sign in to comment.