From e205673b8565172da9ce6d449cea8c92f65f9a2c Mon Sep 17 00:00:00 2001 From: Kai Date: Thu, 24 Nov 2022 08:00:10 +0000 Subject: [PATCH] anolis: RDMA/erdma: Update and bugfix ANBZ: #293 This patch includes the following updates: 1. Add atomic operations support. 2. More support for eadm tools. 3. Fix inline mtt count threshold. 4. Codestyle fixing. 5. Limit error print rate. 7. Flush support when modify qp to error. Signed-off-by: Kai Reviewed-by: Cheng You Link: https://gitee.com/anolis/cloud-kernel/pulls/900 --- drivers/infiniband/hw/erdma/erdma.h | 20 +- drivers/infiniband/hw/erdma/erdma_cm.c | 19 +- drivers/infiniband/hw/erdma/erdma_cm.h | 5 +- drivers/infiniband/hw/erdma/erdma_cmdq.c | 101 ++-- drivers/infiniband/hw/erdma/erdma_cq.c | 45 +- drivers/infiniband/hw/erdma/erdma_eq.c | 27 +- drivers/infiniband/hw/erdma/erdma_hw.h | 188 ++++++- drivers/infiniband/hw/erdma/erdma_ioctl.c | 579 ++++++++++++++++++---- drivers/infiniband/hw/erdma/erdma_ioctl.h | 134 ++++- drivers/infiniband/hw/erdma/erdma_main.c | 359 ++++++++------ drivers/infiniband/hw/erdma/erdma_qp.c | 192 ++----- drivers/infiniband/hw/erdma/erdma_stats.c | 47 +- drivers/infiniband/hw/erdma/erdma_stats.h | 15 +- drivers/infiniband/hw/erdma/erdma_verbs.c | 346 ++++++++----- drivers/infiniband/hw/erdma/erdma_verbs.h | 65 ++- include/uapi/rdma/erdma-abi.h | 19 +- 16 files changed, 1426 insertions(+), 735 deletions(-) diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 326bba9555189f..e111541486cbea 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Authors: Cheng Xu */ /* Kai Shen */ @@ -9,10 +9,13 @@ #include #include +#include #include #include +#include "erdma_debug.h" #include "erdma_hw.h" +#include "erdma_ioctl.h" #include "erdma_stats.h" #ifndef RDMA_DRIVER_ERDMA @@ -21,7 +24,7 @@ #define ERDMA_MAJOR_VER 0 #define ERDMA_MEDIUM_VER 2 -#define ERDMA_MINOR_VER 14 +#define ERDMA_MINOR_VER 35 #define DRV_MODULE_NAME "erdma" #define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack" @@ -46,7 +49,7 @@ struct erdma_eq { atomic64_t event_num; atomic64_t notify_num; - u64 __iomem *db_addr; + void __iomem *db; u64 *db_record; }; @@ -143,6 +146,7 @@ struct erdma_devattr { int numa_node; enum erdma_cc_alg cc; + u8 flags; u32 grp_num; int irq_num; @@ -205,6 +209,7 @@ struct erdma_dev { struct net_device *netdev; struct pci_dev *pdev; struct notifier_block netdev_nb; + struct workqueue_struct *reflush_wq; resource_size_t func_bar_addr; resource_size_t func_bar_len; @@ -213,6 +218,7 @@ struct erdma_dev { struct erdma_devattr attrs; /* physical port state (only one port per device) */ enum ib_port_state state; + u32 mtu; /* cmdq and aeq use the same msix vector */ struct erdma_irq comm_irq; @@ -234,14 +240,16 @@ struct erdma_dev { DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT); /* * We provide max 496 uContexts that each has one SQ normal Db, - * and one directWQE db。 + * and one directWQE db. */ DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT); atomic_t num_ctx; atomic_t num_cep; struct list_head cep_list; - bool is_registered; + + struct dma_pool *db_pool; + struct dma_pool *resp_pool; }; static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift) @@ -289,7 +297,7 @@ void erdma_finish_cmdq_init(struct erdma_dev *dev); void erdma_cmdq_destroy(struct erdma_dev *dev); void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op); -int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size, +int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, u64 *resp0, u64 *resp1); void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq); diff --git a/drivers/infiniband/hw/erdma/erdma_cm.c b/drivers/infiniband/hw/erdma/erdma_cm.c index 1a62189430773f..1b76506c80769c 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.c +++ b/drivers/infiniband/hw/erdma/erdma_cm.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ @@ -10,23 +10,9 @@ /* Copyright (c) 2008-2019, IBM Corporation */ /* Copyright (c) 2017, Open Grid Computing, Inc. */ -#include -#include -#include -#include -#include -#include -#include #include -#include - -#include -#include -#include -#include #include "erdma.h" -#include "erdma_debug.h" #include "erdma_cm.h" #include "erdma_verbs.h" @@ -320,11 +306,9 @@ void erdma_qp_cm_drop(struct erdma_qp *qp) erdma_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); break; - case ERDMA_EPSTATE_RDMA_MODE: erdma_cm_upcall(cep, IW_CM_EVENT_CLOSE, 0); break; - case ERDMA_EPSTATE_IDLE: case ERDMA_EPSTATE_LISTENING: case ERDMA_EPSTATE_CONNECTING: @@ -360,7 +344,6 @@ void erdma_cep_put(struct erdma_cep *cep) kref_read(&cep->ref) - 1); WARN_ON(kref_read(&cep->ref) < 1); - kref_put(&cep->ref, __erdma_cep_dealloc); } diff --git a/drivers/infiniband/hw/erdma/erdma_cm.h b/drivers/infiniband/hw/erdma/erdma_cm.h index b87c53b83e10ed..6d5db98e1b88b7 100644 --- a/drivers/infiniband/hw/erdma/erdma_cm.h +++ b/drivers/infiniband/hw/erdma/erdma_cm.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Authors: Cheng Xu */ /* Kai Shen */ @@ -12,9 +12,8 @@ #ifndef __ERDMA_CM_H__ #define __ERDMA_CM_H__ -#include #include - +#include #include /* iWarp MPA protocol defs */ diff --git a/drivers/infiniband/hw/erdma/erdma_cmdq.c b/drivers/infiniband/hw/erdma/erdma_cmdq.c index cd8b8071e5fd33..dcb185f5465ea3 100644 --- a/drivers/infiniband/hw/erdma/erdma_cmdq.c +++ b/drivers/infiniband/hw/erdma/erdma_cmdq.c @@ -1,16 +1,10 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include -#include -#include - #include "erdma.h" -#include "erdma_hw.h" -#include "erdma_verbs.h" static void arm_cmdq_cq(struct erdma_cmdq *cmdq) { @@ -47,7 +41,7 @@ static struct erdma_comp_wait *get_comp_wait(struct erdma_cmdq *cmdq) return ERR_PTR(-ENOMEM); } - set_bit(comp_idx, cmdq->comp_wait_bitmap); + __set_bit(comp_idx, cmdq->comp_wait_bitmap); spin_unlock(&cmdq->lock); return &cmdq->wait_pool[comp_idx]; @@ -60,7 +54,7 @@ static void put_comp_wait(struct erdma_cmdq *cmdq, cmdq->wait_pool[comp_wait->ctx_id].cmd_status = ERDMA_CMD_STATUS_INIT; spin_lock(&cmdq->lock); - used = test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap); + used = __test_and_clear_bit(comp_wait->ctx_id, cmdq->comp_wait_bitmap); spin_unlock(&cmdq->lock); WARN_ON(!used); @@ -127,6 +121,16 @@ static int erdma_cmdq_sq_init(struct erdma_dev *dev) return 0; } +static void erdma_cmdq_sq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + dma_free_coherent(&dev->pdev->dev, + (cmdq->sq.depth << SQEBB_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); +} + static int erdma_cmdq_cq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; @@ -158,6 +162,16 @@ static int erdma_cmdq_cq_init(struct erdma_dev *dev) return 0; } +static void erdma_cmdq_cq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + dma_free_coherent(&dev->pdev->dev, + (cmdq->cq.depth << CQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); +} + static int erdma_cmdq_eq_init(struct erdma_dev *dev) { struct erdma_cmdq *cmdq = &dev->cmdq; @@ -176,8 +190,7 @@ static int erdma_cmdq_eq_init(struct erdma_dev *dev) spin_lock_init(&eq->lock); atomic64_set(&eq->event_num, 0); - eq->db_addr = - (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG); + eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG; eq->db_record = (u64 *)(eq->qbuf + buf_size); erdma_reg_write32(dev, ERDMA_REGS_CMDQ_EQ_ADDR_H_REG, @@ -191,11 +204,20 @@ static int erdma_cmdq_eq_init(struct erdma_dev *dev) return 0; } +static void erdma_cmdq_eq_destroy(struct erdma_dev *dev) +{ + struct erdma_cmdq *cmdq = &dev->cmdq; + + dma_free_coherent(&dev->pdev->dev, + (cmdq->eq.depth << EQE_SHIFT) + + ERDMA_EXTRA_BUFFER_SIZE, + cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); +} + int erdma_cmdq_init(struct erdma_dev *dev) { - int err, i; struct erdma_cmdq *cmdq = &dev->cmdq; - u32 status, ctrl; + int err; cmdq->max_outstandings = ERDMA_CMDQ_MAX_OUTSTANDING; cmdq->use_event = false; @@ -218,46 +240,14 @@ int erdma_cmdq_init(struct erdma_dev *dev) if (err) goto err_destroy_cq; - ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1); - erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, ctrl); - - for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) { - status = - erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, - ERDMA_REG_DEV_ST_INIT_DONE_MASK); - if (status) - break; - - msleep(ERDMA_REG_ACCESS_WAIT_MS); - } - - if (i == ERDMA_WAIT_DEV_DONE_CNT) { - dev_err(&dev->pdev->dev, "wait init done failed.\n"); - err = -ETIMEDOUT; - goto err_destroy_eq; - } - set_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); return 0; -err_destroy_eq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->eq.depth << EQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); - err_destroy_cq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->cq.depth << CQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); - + erdma_cmdq_cq_destroy(dev); err_destroy_sq: - dma_free_coherent(&dev->pdev->dev, - (cmdq->sq.depth << SQEBB_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); + erdma_cmdq_sq_destroy(dev); return err; } @@ -275,18 +265,9 @@ void erdma_cmdq_destroy(struct erdma_dev *dev) clear_bit(ERDMA_CMDQ_STATE_OK_BIT, &cmdq->state); - dma_free_coherent(&dev->pdev->dev, - (cmdq->eq.depth << EQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->eq.qbuf, cmdq->eq.qbuf_dma_addr); - dma_free_coherent(&dev->pdev->dev, - (cmdq->sq.depth << SQEBB_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->sq.qbuf, cmdq->sq.qbuf_dma_addr); - dma_free_coherent(&dev->pdev->dev, - (cmdq->cq.depth << CQE_SHIFT) + - ERDMA_EXTRA_BUFFER_SIZE, - cmdq->cq.qbuf, cmdq->cq.qbuf_dma_addr); + erdma_cmdq_eq_destroy(dev); + erdma_cmdq_cq_destroy(dev); + erdma_cmdq_sq_destroy(dev); } static void *get_next_valid_cmdq_cqe(struct erdma_cmdq *cmdq) @@ -449,7 +430,7 @@ void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op) FIELD_PREP(ERDMA_CMD_HDR_OPCODE_MASK, op); } -int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size, +int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, void *req, u32 req_size, u64 *resp0, u64 *resp1) { struct erdma_comp_wait *comp_wait; diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index d29886324592ea..d9eae90e94cf74 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -1,12 +1,9 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include - -#include "erdma_hw.h" #include "erdma_verbs.h" static void *get_next_valid_cqe(struct erdma_cq *cq) @@ -36,18 +33,26 @@ static void notify_cq(struct erdma_cq *cq, u8 solcitied) int erdma_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct erdma_cq *cq = to_ecq(ibcq); + u16 dim_timeout = cq->dim.timeout; unsigned long irq_flags; int ret = 0; spin_lock_irqsave(&cq->kern_cq.lock, irq_flags); - notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); - - if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq)) + if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && get_next_valid_cqe(cq)) { ret = 1; + goto unlock; + } - cq->kern_cq.notify_cnt++; - + if (!dim_timeout) { + notify_cq(cq, (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + cq->kern_cq.notify_cnt++; + } else { + cq->dim.flags |= flags; + hrtimer_start(&cq->dim.timer, ns_to_ktime(dim_timeout * NSEC_PER_USEC), + HRTIMER_MODE_REL_PINNED); + } +unlock: spin_unlock_irqrestore(&cq->kern_cq.lock, irq_flags); return ret; @@ -62,7 +67,6 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { [ERDMA_OP_RECV_IMM] = IB_WC_RECV_RDMA_WITH_IMM, [ERDMA_OP_RECV_INV] = IB_WC_RECV, [ERDMA_OP_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, - [ERDMA_OP_INVALIDATE] = IB_WC_LOCAL_INV, [ERDMA_OP_RSP_SEND_IMM] = IB_WC_RECV, [ERDMA_OP_SEND_WITH_INV] = IB_WC_SEND, [ERDMA_OP_REG_MR] = IB_WC_REG_MR, @@ -203,3 +207,24 @@ int erdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) return npolled; } + +enum hrtimer_restart cq_timer_fn(struct hrtimer *t) +{ + struct erdma_cq *cq = container_of(t, struct erdma_cq, dim.timer); + + notify_cq(cq, (cq->dim.flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + cq->kern_cq.notify_cnt++; + cq->dim.flags = 0; + + return HRTIMER_NORESTART; +} + +#define DIM_OFF_THRESHOLD 3 +int erdma_modify_cq(struct ib_cq *ibcq, u16 cq_count, u16 cq_period) +{ + struct erdma_cq *cq = to_ecq(ibcq); + + cq->dim.timeout = cq_period >= DIM_OFF_THRESHOLD ? cq_period : 0; + + return 0; +} diff --git a/drivers/infiniband/hw/erdma/erdma_eq.c b/drivers/infiniband/hw/erdma/erdma_eq.c index 51ce06bc390927..0ed611bfb6e38d 100644 --- a/drivers/infiniband/hw/erdma/erdma_eq.c +++ b/drivers/infiniband/hw/erdma/erdma_eq.c @@ -1,20 +1,9 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -#include -#include -#include - -#include -#include -#include - -#include "erdma.h" -#include "erdma_cm.h" -#include "erdma_hw.h" #include "erdma_verbs.h" #define MAX_POLL_CHUNK_SIZE 16 @@ -25,7 +14,7 @@ void notify_eq(struct erdma_eq *eq) FIELD_PREP(ERDMA_EQDB_ARM_MASK, 1); *eq->db_record = db_data; - writeq(db_data, eq->db_addr); + writeq(db_data, eq->db); atomic64_inc(&eq->notify_num); } @@ -109,7 +98,7 @@ int erdma_aeq_init(struct erdma_dev *dev) atomic64_set(&eq->event_num, 0); atomic64_set(&eq->notify_num, 0); - eq->db_addr = (u64 __iomem *)(dev->func_bar + ERDMA_REGS_AEQ_DB_REG); + eq->db = dev->func_bar + ERDMA_REGS_AEQ_DB_REG; eq->db_record = (u64 *)(eq->qbuf + buf_size); erdma_reg_write32(dev, ERDMA_REGS_AEQ_ADDR_H_REG, @@ -234,7 +223,7 @@ static int create_eq_cmd(struct erdma_dev *dev, u32 eqn, struct erdma_eq *eq) req.db_dma_addr_l = lower_32_bits(db_info_dma_addr); req.db_dma_addr_h = upper_32_bits(db_info_dma_addr); - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(struct erdma_cmdq_create_eq_req), NULL, NULL); } @@ -256,9 +245,8 @@ static int erdma_ceq_init_one(struct erdma_dev *dev, u16 ceqn) atomic64_set(&eq->notify_num, 0); eq->depth = ERDMA_DEFAULT_EQ_DEPTH; - eq->db_addr = - (u64 __iomem *)(dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG + - (ceqn + 1) * ERDMA_DB_SIZE); + eq->db = dev->func_bar + ERDMA_REGS_CEQ_DB_BASE_REG + + (ceqn + 1) * ERDMA_DB_SIZE; eq->db_record = (u64 *)(eq->qbuf + buf_size); eq->ci = 0; dev->ceqs[ceqn].dev = dev; @@ -286,8 +274,7 @@ static void erdma_ceq_uninit_one(struct erdma_dev *dev, u16 ceqn) req.qtype = ERDMA_EQ_TYPE_CEQ; req.vector_idx = ceqn + 1; - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) return; diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index bb028712eb961d..087aae76dd9515 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Authors: Cheng Xu */ /* Kai Shen */ @@ -10,6 +10,9 @@ #include #include +#define ERDMA_HW_PAGE_SHIFT 12 +#define ERDMA_HW_PAGE_SIZE 4096 + /* PCIe device related definition. */ #define PCI_VENDOR_ID_ALIBABA 0x1ded @@ -75,11 +78,11 @@ #define ERDMA_BAR_SQDB_SPACE_OFFSET ERDMA_BAR_DB_SPACE_BASE #define ERDMA_BAR_SQDB_SPACE_SIZE (384 * 1024) -#define ERDMA_BAR_RQDB_SPACE_OFFSET \ +#define ERDMA_BAR_RQDB_SPACE_OFFSET \ (ERDMA_BAR_SQDB_SPACE_OFFSET + ERDMA_BAR_SQDB_SPACE_SIZE) #define ERDMA_BAR_RQDB_SPACE_SIZE (96 * 1024) -#define ERDMA_BAR_CQDB_SPACE_OFFSET \ +#define ERDMA_BAR_CQDB_SPACE_OFFSET \ (ERDMA_BAR_RQDB_SPACE_OFFSET + ERDMA_BAR_RQDB_SPACE_SIZE) /* Doorbell page resources related. */ @@ -145,14 +148,20 @@ enum CMDQ_RDMA_OPCODE { CMDQ_OPCODE_MODIFY_QP = 3, CMDQ_OPCODE_CREATE_CQ = 4, CMDQ_OPCODE_DESTROY_CQ = 5, + CMDQ_OPCODE_REFLUSH = 6, CMDQ_OPCODE_REG_MR = 8, - CMDQ_OPCODE_DEREG_MR = 9 + CMDQ_OPCODE_DEREG_MR = 9, + CMDQ_OPCODE_QUERY_QPC = 11, + CMDQ_OPCODE_QUERY_CQC = 12, }; enum CMDQ_COMMON_OPCODE { CMDQ_OPCODE_CREATE_EQ = 0, CMDQ_OPCODE_DESTROY_EQ = 1, CMDQ_OPCODE_QUERY_FW_INFO = 2, + CMDQ_OPCODE_CONF_MTU = 3, + CMDQ_OPCODE_GET_STATS = 4, + CMDQ_OPCODE_QUERY_EQC = 6, }; /* cmdq-SQE HDR */ @@ -190,6 +199,11 @@ struct erdma_cmdq_destroy_eq_req { u8 qtype; }; +struct erdma_cmdq_config_mtu_req { + u64 hdr; + u32 mtu; +}; + /* create_cq cfg0 */ #define ERDMA_CMD_CREATE_CQ_DEPTH_MASK GENMASK(31, 24) #define ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK GENMASK(23, 20) @@ -218,8 +232,8 @@ struct erdma_cmdq_create_cq_req { /* regmr cfg1 */ #define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12) #define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6) -#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2) -#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0) +#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1) +#define ERDMA_CMD_REGMR_ACC_MODE_MASK BIT(0) /* regmr cfg2 */ #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) @@ -299,8 +313,16 @@ struct erdma_cmdq_destroy_qp_req { u32 qpn; }; +struct erdma_cmdq_reflush_req { + u64 hdr; + u32 qpn; + u32 sq_pi; + u32 rq_pi; +}; + /* cap qword 0 definition */ #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) +#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) #define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16) #define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0) @@ -312,6 +334,11 @@ struct erdma_cmdq_destroy_qp_req { #define ERDMA_NQP_PER_QBLOCK 1024 +enum { + ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7, + ERDMA_DEV_CAP_FLAGS_QUERY_QC = 1 << 6, +}; + #define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0) /* CQE hdr */ @@ -367,8 +394,8 @@ struct erdma_rqe { #define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) /* REG MR attrs */ -#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0) -#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2) +#define ERDMA_SQE_MR_MODE_MASK BIT(0) +#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1) #define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6) #define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12) @@ -417,7 +444,7 @@ struct erdma_reg_mr_sqe { }; /* EQ related. */ -#define ERDMA_DEFAULT_EQ_DEPTH 256 +#define ERDMA_DEFAULT_EQ_DEPTH 4096 /* ceqe */ #define ERDMA_CEQE_HDR_DB_MASK BIT_ULL(63) @@ -453,13 +480,13 @@ enum erdma_opcode { ERDMA_OP_RECV_IMM = 5, ERDMA_OP_RECV_INV = 6, - ERDMA_OP_REQ_ERR = 7, - ERDMA_OP_READ_RESPONSE = 8, + ERDMA_OP_RSVD0 = 7, + ERDMA_OP_RSVD1 = 8, ERDMA_OP_WRITE_WITH_IMM = 9, - ERDMA_OP_RECV_ERR = 10, + ERDMA_OP_RSVD2 = 10, + ERDMA_OP_RSVD3 = 11, - ERDMA_OP_INVALIDATE = 11, ERDMA_OP_RSP_SEND_IMM = 12, ERDMA_OP_SEND_WITH_INV = 13, @@ -508,4 +535,139 @@ enum erdma_vendor_err { ERDMA_WC_VENDOR_SQE_WARP_ERR = 0x34 }; +/* Response Definitions for Query Command Category */ +#define ERDMA_HW_RESP_SIZE 256 + +struct erdma_cmdq_query_req { + u64 hdr; + u32 rsvd; + u32 index; + + u64 target_addr; + u32 target_length; +}; + +struct erdma_cmdq_query_resp_hdr { + u16 magic; + u8 ver; + u8 length; + + u32 index; + u32 rsvd[2]; +}; + +struct erdma_cmdq_query_stats_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + u64 tx_req_cnt; + u64 tx_packets_cnt; + u64 tx_bytes_cnt; + u64 tx_drop_packets_cnt; + u64 tx_bps_meter_drop_packets_cnt; + u64 tx_pps_meter_drop_packets_cnt; + u64 rx_packets_cnt; + u64 rx_bytes_cnt; + u64 rx_drop_packets_cnt; + u64 rx_bps_meter_drop_packets_cnt; + u64 rx_pps_meter_drop_packets_cnt; +}; + +struct erdma_cmdq_query_qpc_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + struct{ + u8 status; /* 0 - disabled, 1 - enabled. */ + u8 qbuf_page_offset; + u8 qbuf_page_size; + u8 qbuf_depth; + + u16 hw_pi; + u16 hw_ci; + } qpc[2]; + + /* hardware io stat */ + u16 last_comp_sqe_idx; + u16 last_comp_rqe_idx; + u16 scqe_counter; + u16 rcqe_counter; + + u16 tx_pkts_cnt; + u16 rx_pkts_cnt; + u16 rx_error_drop_cnt; + u16 rx_invalid_drop_cnt; + + u32 rto_retrans_cnt; + //qp sw info + u32 rqpn; + + u32 pd; + u16 fw_sq_pi; + u16 fw_sq_ci; + + u16 fw_rq_ci; + u8 sq_in_flush; + u8 rq_in_flush; + u16 sq_flushed_pi; + u16 rq_flushed_pi; + + u32 scqn; + u32 rcqn; + + u64 sqbuf_addr; + u64 rqbuf_addr; + u64 sdbrec_addr; + u64 rdbrec_addr; + + u64 sdbrec_cur; + u64 rdbrec_cur; + + u32 ip_src; + u32 ip_dst; + u16 srcport; + u16 dstport; +}; + +struct erdma_cmdq_query_cqc_resp { + struct erdma_cmdq_query_resp_hdr hdr; + + u32 pi; + u8 q_en; + u8 log_depth; + u8 cq_cur_ownership; + u8 last_errdb_type; /* 0,dup db;1,out-order db */ + + u32 last_errdb_ci; + u8 out_order_db_cnt; + u8 dup_db_cnt; + u16 rsvd; + + u64 cn_cq_db_addr; + u64 cq_db_record; +}; + +struct erdma_cmdq_query_eqc_resp { + struct erdma_cmdq_query_resp_hdr hdr; + u16 depth; + u16 vector; + + u8 int_suppression; + u8 tail_owner; + u8 head_owner; + u8 overflow; + + u32 head; + u32 tail; + + u64 cn_addr; + u64 cn_db_addr; + u64 eq_db_record; +}; + +struct erdma_cmdq_dump_addr_req { + u64 hdr; + u64 dump_addr; + u64 target_addr; + u32 target_length; +}; + #endif diff --git a/drivers/infiniband/hw/erdma/erdma_ioctl.c b/drivers/infiniband/hw/erdma/erdma_ioctl.c index 47d375ec8b2c86..6352a00c92b9c4 100644 --- a/drivers/infiniband/hw/erdma/erdma_ioctl.c +++ b/drivers/infiniband/hw/erdma/erdma_ioctl.c @@ -5,14 +5,14 @@ #include #include +#include +#include +#include #include "erdma.h" +#include "erdma_cm.h" #include "erdma_ioctl.h" #include "erdma_verbs.h" -#include "erdma_debug.h" -#include -#include -#include static struct class *erdma_chrdev_class; static struct cdev erdma_cdev; @@ -21,7 +21,69 @@ static dev_t erdma_char_dev; #define ERDMA_CHRDEV_NAME "erdma" -static int erdma_ioctl_conf_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) +static int erdma_query_resource(struct erdma_dev *dev, u32 mod, u32 op, + u32 index, void *out, u32 len) +{ + struct erdma_cmdq_query_req req; + dma_addr_t dma_addr; + void *resp; + int err; + + erdma_cmdq_build_reqhdr(&req.hdr, mod, op); + + resp = dma_pool_alloc(dev->resp_pool, GFP_KERNEL, &dma_addr); + if (!resp) + return -ENOMEM; + + req.index = index; + req.target_addr = dma_addr; + req.target_length = ERDMA_HW_RESP_SIZE; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + goto out; + + if (out) + memcpy(out, resp, len); + +out: + dma_pool_free(dev->resp_pool, resp, dma_addr); + + return err; +} + +static int erdma_query_qpc(struct erdma_dev *dev, u32 qpn, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_qpc_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_QPC, qpn, out, + sizeof(struct erdma_cmdq_query_qpc_resp)); +} + +static int erdma_query_cqc(struct erdma_dev *dev, u32 cqn, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_cqc_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_QUERY_CQC, cqn, out, + sizeof(struct erdma_cmdq_query_cqc_resp)); +} + +static int erdma_query_eqc(struct erdma_dev *dev, u32 eqn, void *out) +{ + BUILD_BUG_ON(sizeof(struct erdma_cmdq_query_eqc_resp) > + ERDMA_HW_RESP_SIZE); + + return erdma_query_resource(dev, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_QUERY_EQC, eqn, out, + sizeof(struct erdma_cmdq_query_eqc_resp)); +} + +static int erdma_ioctl_conf_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) { int ret = 0; @@ -41,14 +103,36 @@ static int erdma_ioctl_conf_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg * return ret; } -static void fill_eq_info(struct erdma_eq_info *info, struct erdma_eq *eq) +static void fill_eq_info(struct erdma_dev *dev, struct erdma_eq_info *info, + struct erdma_eq *eq) { + struct erdma_cmdq_query_eqc_resp resp; + int ret; + info->event_cnt = atomic64_read(&eq->event_num); info->notify_cnt = atomic64_read(&eq->notify_num); info->depth = eq->depth; info->ci = eq->ci; info->qbuf_dma = eq->qbuf_dma_addr; info->qbuf_va = (u64)eq->qbuf; + info->hw_info_valid = 0; + + ret = erdma_query_eqc(dev, info->eqn, &resp); + if (ret) + return; + + info->hw_info_valid = 1; + info->hw_depth = resp.depth; + info->vector = resp.vector; + info->int_suppression = resp.int_suppression; + info->tail_owner = resp.tail_owner; + info->head_owner = resp.head_owner; + info->overflow = resp.overflow; + info->head = resp.head; + info->tail = resp.tail; + info->cn_addr = resp.cn_addr; + info->cn_db_addr = resp.cn_db_addr; + info->eq_db_record = resp.eq_db_record; } static void show_cep_info(struct erdma_dev *edev) @@ -61,115 +145,308 @@ static void show_cep_info(struct erdma_dev *edev) if (!num_cep) return; - pr_info("%-20s%-6s%-6s%-7s%-3s%-3s%-4s%-21s%-9s\n", - "CEP", "State", "Ref's", "QP-ID", "LQ", "LC", "U", "Sock", "CM-ID"); + pr_info("%-20s%-6s%-6s%-7s%-3s%-3s%-4s%-21s%-9s\n", "CEP", "State", + "Ref's", "QP-ID", "LQ", "LC", "U", "Sock", "CM-ID"); list_for_each_safe(pos, tmp, &edev->cep_list) { struct erdma_cep *cep = list_entry(pos, struct erdma_cep, devq); - pr_info("0x%-18p%-6d%-6d%-7d%-3s%-3s%-4d0x%-18p 0x%-16p\n", - cep, cep->state, kref_read(&cep->ref), + pr_info("0x%-18p%-6d%-6d%-7d%-3s%-3s%-4d0x%-18p 0x%-16p\n", cep, + cep->state, kref_read(&cep->ref), cep->qp ? QP_ID(cep->qp) : -1, list_empty(&cep->listenq) ? "n" : "y", - cep->listen_cep ? "y" : "n", cep->in_use, - cep->sock, cep->cm_id); + cep->listen_cep ? "y" : "n", cep->in_use, cep->sock, + cep->cm_id); + } +} + +static int fill_cq_info(struct erdma_dev *dev, u32 cqn, + struct erdma_ioctl_msg *msg) +{ + struct erdma_cq_info *info = &msg->out.cq_info; + struct erdma_cmdq_query_cqc_resp resp; + struct rdma_restrack_entry *res; + struct erdma_cq *cq; + int ret; + + if (cqn == 0) { + info->cqn = 0; + info->depth = dev->cmdq.cq.depth; + info->assoc_eqn = 0; + info->qbuf_dma_addr = dev->cmdq.cq.qbuf_dma_addr; + info->ci = dev->cmdq.cq.ci; + info->cmdsn = dev->cmdq.cq.cmdsn; + info->notify_cnt = atomic64_read(&dev->cmdq.cq.armed_num); + + goto query_hw_cqc; + } + + cq = find_cq_by_cqn(dev, cqn); + if (!cq) + return -EINVAL; + + info->cqn = cq->cqn; + info->depth = cq->depth; + info->assoc_eqn = cq->assoc_eqn; + + res = &cq->ibcq.res; + info->is_user = !rdma_is_kernel_res(res); + + if (info->is_user) { + info->mtt.page_size = cq->user_cq.qbuf_mtt.page_size; + info->mtt.page_offset = cq->user_cq.qbuf_mtt.page_offset; + info->mtt.page_cnt = cq->user_cq.qbuf_mtt.page_cnt; + info->mtt.mtt_nents = cq->user_cq.qbuf_mtt.mtt_nents; + memcpy(info->mtt.mtt_entry, cq->user_cq.qbuf_mtt.mtt_entry, + ERDMA_MAX_INLINE_MTT_ENTRIES * sizeof(__u64)); + info->mtt.va = cq->user_cq.qbuf_mtt.va; + info->mtt.len = cq->user_cq.qbuf_mtt.len; + info->mtt_type = cq->user_cq.qbuf_mtt.mtt_type; + } else { + info->qbuf_dma_addr = cq->kern_cq.qbuf_dma_addr; + info->ci = cq->kern_cq.ci; + info->cmdsn = cq->kern_cq.cmdsn; + info->notify_cnt = cq->kern_cq.notify_cnt; } + + info->hw_info_valid = 0; + +query_hw_cqc: + ret = erdma_query_cqc(dev, cqn, &resp); + if (ret) + return 0; + + info->hw_info_valid = 1; + info->hw_pi = resp.pi; + info->enable = resp.q_en; + info->log_depth = resp.log_depth; + info->cq_cur_ownership = resp.cq_cur_ownership; + info->last_errdb_type = resp.last_errdb_type; + info->last_errdb_ci = resp.last_errdb_ci; + info->out_order_db_cnt = resp.out_order_db_cnt; + info->dup_db_cnt = resp.dup_db_cnt; + info->cn_cq_db_addr = resp.cn_cq_db_addr; + info->cq_db_record = resp.cq_db_record; + + return 0; } -static int erdma_ioctl_ver_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) +static int erdma_ioctl_ver_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) { - msg->out.version = ERDMA_MAJOR_VER << 16 | - ERDMA_MEDIUM_VER << 8 | - ERDMA_MINOR_VER; + msg->out.version = + ERDMA_MAJOR_VER << 16 | ERDMA_MEDIUM_VER << 8 | ERDMA_MINOR_VER; return 0; } -static int erdma_ioctl_info_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) +static int erdma_fill_qp_info(struct erdma_dev *dev, u32 qpn, + struct erdma_qp_info *qp_info) { - int ret = 0; - struct erdma_qp *qp; - struct erdma_qp_info *qp_info; + struct erdma_cmdq_query_qpc_resp resp; struct rdma_restrack_entry *res; - int count = 0; struct erdma_mem *mtt; - int i; + struct erdma_qp *qp; + int i, ret; + + if (qpn == 0) + goto query_hw_qpc; + + qp = find_qp_by_qpn(dev, qpn); + if (!qp) + return -EINVAL; + erdma_qp_get(qp); + + qp_info->hw_info_valid = 0; + qp_info->qpn = qp->ibqp.qp_num; + qp_info->qp_state = qp->attrs.state; + qp_info->ref_cnt = kref_read(&qp->ref); + qp_info->qtype = qp->attrs.qp_type; + qp_info->sq_depth = qp->attrs.sq_size; + qp_info->rq_depth = qp->attrs.rq_size; + qp_info->cookie = qp->attrs.remote_cookie; + qp_info->cc = qp->attrs.cc; + qp_info->assoc_scqn = qp->scq->cqn; + qp_info->assoc_rcqn = qp->rcq->cqn; + + if (qp->cep && qp->cep->cm_id) { + struct erdma_cep *cep = qp->cep; + struct iw_cm_id *id = cep->cm_id; + struct sockaddr_storage remote_addr; + struct sockaddr_storage local_addr; + + qp_info->sip = + ntohl(to_sockaddr_in(id->local_addr).sin_addr.s_addr); + qp_info->dip = + ntohl(to_sockaddr_in(id->remote_addr).sin_addr.s_addr); + qp_info->sport = ntohs(to_sockaddr_in(id->local_addr).sin_port); + qp_info->dport = + ntohs(to_sockaddr_in(id->remote_addr).sin_port); + + if (cep->sock) { + getname_local(cep->sock, &local_addr); + getname_peer(cep->sock, &remote_addr); + qp_info->origin_sport = + ntohs(to_sockaddr_in(local_addr).sin_port); + qp_info->sip = ntohl( + to_sockaddr_in(local_addr).sin_addr.s_addr); + } + } + + res = &qp->ibqp.res; + qp_info->is_user = !rdma_is_kernel_res(res); + if (qp_info->is_user) { + qp_info->pid = res->task->pid; + get_task_comm(qp_info->buf, res->task); + mtt = &qp->user_qp.sq_mtt; + qp_info->sq_mtt_type = mtt->mtt_type; + qp_info->sq_mtt.page_size = mtt->page_size; + qp_info->sq_mtt.page_offset = mtt->page_offset; + qp_info->sq_mtt.page_cnt = mtt->page_cnt; + qp_info->sq_mtt.mtt_nents = mtt->mtt_nents; + qp_info->sq_mtt.va = mtt->va; + qp_info->sq_mtt.len = mtt->len; + for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) + qp_info->sq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; + + mtt = &qp->user_qp.rq_mtt; + qp_info->rq_mtt_type = mtt->mtt_type; + qp_info->rq_mtt.page_size = mtt->page_size; + qp_info->rq_mtt.page_offset = mtt->page_offset; + qp_info->rq_mtt.page_cnt = mtt->page_cnt; + qp_info->rq_mtt.mtt_nents = mtt->mtt_nents; + qp_info->rq_mtt.va = mtt->va; + qp_info->rq_mtt.len = mtt->len; + for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) + qp_info->rq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; + } else { + qp_info->sqci = qp->kern_qp.sq_ci; + qp_info->sqpi = qp->kern_qp.sq_pi; + qp_info->rqci = qp->kern_qp.rq_ci; + qp_info->rqpi = qp->kern_qp.rq_pi; + + qp_info->sqbuf_dma = qp->kern_qp.sq_buf_dma_addr; + qp_info->rqbuf_dma = qp->kern_qp.rq_buf_dma_addr; + qp_info->sqdbrec_dma = qp->kern_qp.sq_db_info_dma_addr; + qp_info->rqdbrec_dma = qp->kern_qp.rq_db_info_dma_addr; + } + + erdma_qp_put(qp); + +query_hw_qpc: + ret = erdma_query_qpc(dev, qpn, &resp); + if (ret) + return 0; + + qp_info->hw_info_valid = 1; + qp_info->sq_enable = resp.qpc[0].status; + qp_info->sqbuf_page_offset = resp.qpc[0].qbuf_page_offset; + qp_info->sqbuf_page_size = resp.qpc[0].qbuf_page_size; + qp_info->sqbuf_depth = resp.qpc[0].qbuf_depth; + qp_info->hw_sq_ci = resp.qpc[0].hw_ci; + qp_info->hw_sq_pi = resp.qpc[0].hw_pi; + + qp_info->rq_enable = resp.qpc[1].status; + qp_info->rqbuf_page_offset = resp.qpc[1].qbuf_page_offset; + qp_info->rqbuf_page_size = resp.qpc[1].qbuf_page_size; + qp_info->rqbuf_depth = resp.qpc[1].qbuf_depth; + qp_info->hw_rq_ci = resp.qpc[1].hw_ci; + qp_info->hw_rq_pi = resp.qpc[1].hw_pi; + qp_info->last_comp_sqe_idx = resp.last_comp_sqe_idx; + qp_info->last_comp_rqe_idx = resp.last_comp_rqe_idx; + qp_info->scqe_counter = resp.scqe_counter; + qp_info->rcqe_counter = resp.rcqe_counter; + qp_info->tx_pkts_cnt = resp.tx_pkts_cnt; + qp_info->rx_pkts_cnt = resp.rx_pkts_cnt; + qp_info->rx_error_drop_cnt = resp.rx_error_drop_cnt; + qp_info->rx_invalid_drop_cnt = resp.rx_invalid_drop_cnt; + qp_info->rto_retrans_cnt = resp.rto_retrans_cnt; + qp_info->pd = resp.pd; + qp_info->fw_sq_pi = resp.fw_sq_pi; + qp_info->fw_sq_ci = resp.fw_sq_ci; + qp_info->fw_rq_ci = resp.fw_rq_ci; + qp_info->sq_in_flush = resp.sq_in_flush; + qp_info->rq_in_flush = resp.rq_in_flush; + qp_info->sq_flushed_pi = resp.sq_flushed_pi; + qp_info->rq_flushed_pi = resp.rq_flushed_pi; + qp_info->sqbuf_addr = resp.sqbuf_addr; + qp_info->rqbuf_addr = resp.rqbuf_addr; + qp_info->sdbrec_addr = resp.sdbrec_addr; + qp_info->rdbrec_addr = resp.rdbrec_addr; + qp_info->ip_src = resp.ip_src; + qp_info->ip_dst = resp.ip_dst; + qp_info->srcport = resp.srcport; + qp_info->dstport = resp.dstport; + qp_info->sdbrec_val = resp.sdbrec_cur; + qp_info->rdbrec_val = resp.rdbrec_cur; + + if (qpn != 0 && resp.scqn != qp_info->assoc_scqn) + ibdev_info(&dev->ibdev, "hw scqn(%u) != drv scqn(%u)\n", + resp.scqn, qp_info->assoc_scqn); + + if (qpn != 0 && resp.rcqn != qp_info->assoc_rcqn) + ibdev_info(&dev->ibdev, "hw rcqn(%u) != drv rcqn(%u)\n", + resp.rcqn, qp_info->assoc_rcqn); + + return 0; +} + +static int erdma_ioctl_info_cmd(struct erdma_dev *edev, + struct erdma_ioctl_msg *msg) +{ + struct erdma_qp_info *qp_info; + int ret = 0, count = 0, i; + struct erdma_qp *qp; + struct erdma_cq *cq; unsigned long index; switch (msg->in.opcode) { case ERDMA_INFO_TYPE_QP: - qp = find_qp_by_qpn(edev, msg->in.info_req.qn); - if (!qp) - return -EINVAL; - erdma_qp_get(qp); - qp_info = &msg->out.qp_info; - - qp_info->qpn = qp->ibqp.qp_num; - qp_info->qp_state = qp->attrs.state; - qp_info->ref_cnt = kref_read(&qp->ref); - qp_info->qtype = qp->attrs.qp_type; - qp_info->sq_depth = qp->attrs.sq_size; - qp_info->rq_depth = qp->attrs.rq_size; - qp_info->cookie = qp->attrs.cookie; - qp_info->cc = qp->attrs.cc; - res = &qp->ibqp.res; - qp_info->is_user = !rdma_is_kernel_res(res); - if (qp_info->is_user) { - qp_info->pid = res->task->pid; - get_task_comm(qp_info->buf, res->task); - mtt = &qp->user_qp.sq_mtt; - qp_info->sq_mtt_type = mtt->mtt_type; - qp_info->sq_mtt.page_size = mtt->page_size; - qp_info->sq_mtt.page_offset = mtt->page_offset; - qp_info->sq_mtt.page_cnt = mtt->page_cnt; - qp_info->sq_mtt.mtt_nents = mtt->mtt_nents; - qp_info->sq_mtt.va = mtt->va; - qp_info->sq_mtt.len = mtt->len; - for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) - qp_info->sq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; - - mtt = &qp->user_qp.rq_mtt; - qp_info->rq_mtt_type = mtt->mtt_type; - qp_info->rq_mtt.page_size = mtt->page_size; - qp_info->rq_mtt.page_offset = mtt->page_offset; - qp_info->rq_mtt.page_cnt = mtt->page_cnt; - qp_info->rq_mtt.mtt_nents = mtt->mtt_nents; - qp_info->rq_mtt.va = mtt->va; - qp_info->rq_mtt.len = mtt->len; - for (i = 0; i < ERDMA_MAX_INLINE_MTT_ENTRIES; i++) - qp_info->rq_mtt.mtt_entry[i] = mtt->mtt_entry[i]; - } - - erdma_qp_put(qp); + ret = erdma_fill_qp_info(edev, msg->in.info_req.qn, qp_info); break; case ERDMA_INFO_TYPE_ALLOCED_QP: - xa_for_each_start(&edev->qp_xa, index, qp, msg->in.info_req.qn) { + xa_for_each_start(&edev->qp_xa, index, qp, + msg->in.info_req.qn) { msg->out.allocted_qpn[count++] = index; if (count == msg->in.info_req.max_result_cnt) break; } msg->out.length = count * 4; + break; + case ERDMA_INFO_TYPE_ALLOCED_CQ: + xa_for_each_start(&edev->cq_xa, index, cq, + msg->in.info_req.qn) { + msg->out.allocted_cqn[count++] = index; + if (count == msg->in.info_req.max_result_cnt) + break; + } + msg->out.length = count * 4; + break; case ERDMA_INFO_TYPE_EQ: msg->out.eq_info[0].ready = 1; msg->out.eq_info[0].eqn = 0; - fill_eq_info(&msg->out.eq_info[0], &edev->aeq); + fill_eq_info(edev, &msg->out.eq_info[0], &edev->aeq); msg->out.eq_info[1].ready = 1; msg->out.eq_info[1].eqn = 1; - fill_eq_info(&msg->out.eq_info[1], &edev->cmdq.eq); + fill_eq_info(edev, &msg->out.eq_info[1], &edev->cmdq.eq); for (i = 0; i < 31; i++) { msg->out.eq_info[i + 2].ready = edev->ceqs[i].ready; msg->out.eq_info[i + 2].eqn = i + 2; - fill_eq_info(&msg->out.eq_info[i + 2], &edev->ceqs[i].eq); + fill_eq_info(edev, &msg->out.eq_info[i + 2], + &edev->ceqs[i].eq); } break; case ERDMA_INFO_TYPE_CEP: show_cep_info(edev); break; + case ERDMA_INFO_TYPE_CQ: + ret = fill_cq_info(edev, msg->in.info_req.qn, msg); + break; default: pr_info("unknown opcode:%u\n", msg->in.opcode); return -EINVAL; @@ -180,29 +457,24 @@ static int erdma_ioctl_info_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg * int erdma_ioctl_stat_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) { - __u64 *stats_data; + int ret; switch (msg->in.opcode) { case ERDMA_STAT_TYPE_QP: case ERDMA_STAT_TYPE_CQ: break; case ERDMA_STAT_TYPE_DEV: - stats_data = (__u64 *)msg->out.data; - stats_data[0] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TSO_IN_PKTS_REG); - stats_data[1] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TSO_OUT_PKTS_REG); - stats_data[2] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TSO_OUT_BYTES_REG); - stats_data[3] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TX_DROP_PKTS_REG); - stats_data[4] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TX_BPS_METER_DROP_PKTS_REG); - stats_data[5] = erdma_reg_read64(edev, ERDMA_REGS_STATS_TX_PPS_METER_DROP_PKTS_REG); - - stats_data[6] = erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_PKTS_REG); - stats_data[7] = erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_BYTES_REG); - stats_data[8] = erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_DROP_PKTS_REG); - stats_data[9] = erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_BPS_METER_DROP_PKTS_REG); - stats_data[10] = - erdma_reg_read64(edev, ERDMA_REGS_STATS_RX_PPS_METER_DROP_PKTS_REG); - - msg->out.length = 256; + ret = erdma_query_hw_stats(edev); + if (ret) + return ret; + + /* Make sure that no overflow happens. */ + BUILD_BUG_ON(ERDMA_STATS_MAX > 512); + + memcpy(msg->out.stats, &edev->stats, + sizeof(__u64) * ERDMA_STATS_MAX); + + msg->out.length = ERDMA_STATS_MAX * sizeof(__u64); break; default: pr_err("unknown stat opcode %d.\n", msg->in.opcode); @@ -214,39 +486,138 @@ int erdma_ioctl_stat_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) int erdma_ioctl_dump_cmd(struct erdma_dev *edev, struct erdma_ioctl_msg *msg) { - u32 qe_idx = msg->in.dump_req.qe_idx; - u32 qn = msg->in.dump_req.qn; + u32 qe_idx = msg->in.dump_req.qe_idx; + u32 qn = msg->in.dump_req.qn; struct erdma_qp *qp; + struct erdma_cq *cq; + struct erdma_eq *eq; int ret = 0; u64 address; u32 wqe_idx; switch (msg->in.opcode) { case ERDMA_DUMP_TYPE_SQE: + + /* CMDQ-SQ */ + if (qn == 0) { + wqe_idx = qe_idx & (edev->cmdq.sq.depth - 1); + memcpy(msg->out.data, + edev->cmdq.sq.qbuf + (wqe_idx << SQEBB_SHIFT), + SQEBB_SIZE); + } else { + qp = find_qp_by_qpn(edev, qn); + if (!qp) + return -EINVAL; + erdma_qp_get(qp); + + if (!rdma_is_kernel_res(&qp->ibqp.res)) { + address = qp->user_qp.sq_mtt.umem->address; + wqe_idx = qe_idx & (qp->attrs.sq_size - 1); + address += wqe_idx << SQEBB_SHIFT; + ret = access_process_vm(qp->ibqp.res.task, + address, msg->out.data, + SQEBB_SIZE, FOLL_FORCE); + if (ret != SQEBB_SIZE) { + pr_info("access address with error (%d)\n", + ret); + erdma_qp_put(qp); + return -EIO; + } + ret = 0; + } else { + wqe_idx = qe_idx & (qp->attrs.sq_size - 1); + memcpy(msg->out.data, + qp->kern_qp.sq_buf + + (wqe_idx << SQEBB_SHIFT), + SQEBB_SIZE); + } + erdma_qp_put(qp); + } + msg->out.length = SQEBB_SIZE; + break; + case ERDMA_DUMP_TYPE_RQE: qp = find_qp_by_qpn(edev, qn); if (!qp) return -EINVAL; erdma_qp_get(qp); if (!rdma_is_kernel_res(&qp->ibqp.res)) { - - address = qp->user_qp.sq_mtt.umem->address; - wqe_idx = qe_idx & (qp->attrs.sq_size - 1); - address += wqe_idx << SQEBB_SHIFT; - ret = access_process_vm(qp->ibqp.res.task, - address, msg->out.data, SQEBB_SIZE, FOLL_FORCE); - if (ret != SQEBB_SIZE) { - pr_info("access address with error (%d)\n", ret); + address = qp->user_qp.rq_mtt.umem->address; + wqe_idx = qe_idx & (qp->attrs.rq_size - 1); + address += wqe_idx << RQE_SHIFT; + ret = access_process_vm(qp->ibqp.res.task, address, + msg->out.data, RQE_SIZE, + FOLL_FORCE); + if (ret != RQE_SIZE) { + pr_info("access address with error (%d)\n", + ret); erdma_qp_put(qp); return -EIO; } + ret = 0; } else { - + wqe_idx = qe_idx & (qp->attrs.rq_size - 1); + memcpy(msg->out.data, + qp->kern_qp.rq_buf + (wqe_idx << RQE_SHIFT), + RQE_SIZE); } erdma_qp_put(qp); - msg->out.length = 256; + msg->out.length = RQE_SIZE; break; - case ERDMA_DUMP_TYPE_RQE: + case ERDMA_DUMP_TYPE_CQE: + if (qn == 0) { + /* CMDQ-CQ */ + wqe_idx = qe_idx & (edev->cmdq.cq.depth - 1); + memcpy(msg->out.data, + edev->cmdq.cq.qbuf + (wqe_idx << CQE_SHIFT), + CQE_SIZE); + } else { + cq = find_cq_by_cqn(edev, qn); + if (!cq) + return -EINVAL; + + if (!rdma_is_kernel_res(&cq->ibcq.res)) { + address = cq->user_cq.qbuf_mtt.umem->address; + wqe_idx = qe_idx & (cq->depth - 1); + address += wqe_idx << CQE_SHIFT; + ret = access_process_vm(cq->ibcq.res.task, + address, msg->out.data, + CQE_SIZE, FOLL_FORCE); + if (ret != CQE_SIZE) { + pr_info("access address with error (%d)\n", + ret); + return -EIO; + } + ret = 0; + } else { + wqe_idx = qe_idx & (cq->depth - 1); + memcpy(msg->out.data, + cq->kern_cq.qbuf + + (wqe_idx << CQE_SHIFT), + CQE_SIZE); + } + } + msg->out.length = CQE_SIZE; + break; + + case ERDMA_DUMP_TYPE_EQE: + /* 0: AEQ, 1: CMD-EQ, 2 - 33: CEQ */ + if (qn == 0) { /* AEQ */ + eq = &edev->aeq; + } else if (qn == 1) { + eq = &edev->cmdq.eq; + } else if (qn > 1 && qn <= 33) { + if (edev->ceqs[qn - 2].ready == 0) + return -EINVAL; + eq = &edev->ceqs[qn - 2].eq; + } else { + return -EINVAL; + } + + wqe_idx = qe_idx & (eq->depth - 1); + memcpy(msg->out.data, eq->qbuf + (wqe_idx << EQE_SHIFT), + EQE_SIZE); + msg->out.length = EQE_SIZE; break; default: break; @@ -320,7 +691,9 @@ long do_ioctl(unsigned int cmd, unsigned long arg) out: if (!bypass_dev) ib_device_put(ibdev); - return -EOPNOTSUPP; + + kfree(msg); + return ret; } long chardev_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) @@ -351,6 +724,7 @@ static int chardev_close(struct inode *inode, struct file *filp) return 0; } +/* clang-format off */ static const struct file_operations chardev_fops = { .owner = THIS_MODULE, .open = chardev_open, @@ -358,6 +732,7 @@ static const struct file_operations chardev_fops = { .read = chardev_read, .unlocked_ioctl = chardev_ioctl }; +/* clang-format on */ void erdma_chrdev_destroy(void) { @@ -395,8 +770,8 @@ int erdma_chrdev_init(void) goto destroy_class; } - erdma_chrdev = device_create(erdma_chrdev_class, - NULL, erdma_char_dev, NULL, ERDMA_CHRDEV_NAME); + erdma_chrdev = device_create(erdma_chrdev_class, NULL, erdma_char_dev, + NULL, ERDMA_CHRDEV_NAME); if (IS_ERR(erdma_chrdev)) { pr_err("create_device failed.\n"); goto delete_cdev; diff --git a/drivers/infiniband/hw/erdma/erdma_ioctl.h b/drivers/infiniband/hw/erdma/erdma_ioctl.h index 57e7d29eef4eed..c0f4a2cb0789ca 100644 --- a/drivers/infiniband/hw/erdma/erdma_ioctl.h +++ b/drivers/infiniband/hw/erdma/erdma_ioctl.h @@ -49,9 +49,10 @@ enum erdma_stat_type { enum erdma_info_type { ERDMA_INFO_TYPE_DEV = 0, + ERDMA_INFO_TYPE_ALLOCED_QP, ERDMA_INFO_TYPE_QP, + ERDMA_INFO_TYPE_ALLOCED_CQ, ERDMA_INFO_TYPE_CQ, - ERDMA_INFO_TYPE_ALLOCED_QP, ERDMA_INFO_TYPE_EQ, ERDMA_INFO_TYPE_CEP, ERDMA_INFO_TYPE_MAX, @@ -67,12 +68,8 @@ enum erdma_dump_type { ERDMA_DUMP_TYPE_SQE = 0, ERDMA_DUMP_TYPE_RQE, ERDMA_DUMP_TYPE_CQE, - ERDMA_DUMP_MAX = ERDMA_DUMP_TYPE_CQE + 1, -}; - -struct erdma_dev_info { - __u32 devid; - __u64 node_guid; + ERDMA_DUMP_TYPE_EQE, + ERDMA_DUMP_MAX = ERDMA_DUMP_TYPE_EQE + 1, }; struct erdma_qp_info { @@ -96,8 +93,22 @@ struct erdma_qp_info { __u8 sq_mtt_type; __u8 rq_mtt_type; + __u32 assoc_scqn; + __u32 assoc_rcqn; + + __u16 sqci; + __u16 sqpi; + __u16 rqci; + __u16 rqpi; + __u64 sqbuf_dma; + __u64 rqbuf_dma; + __u64 sqdbrec_dma; + __u64 rqdbrec_dma; + __u32 pid; char buf[TASK_COMM_LEN]; + __u8 rsvd0[15]; + __u8 hw_info_valid; struct { __u32 page_size; @@ -108,12 +119,100 @@ struct erdma_qp_info { __u64 va; __u64 len; } sq_mtt, rq_mtt; + + __u8 sq_enable; + __u8 sqbuf_page_offset; + __u8 sqbuf_page_size; + __u8 sqbuf_depth; + __u16 hw_sq_ci; + __u16 hw_sq_pi; + + __u8 rq_enable; + __u8 rqbuf_page_offset; + __u8 rqbuf_page_size; + __u8 rqbuf_depth; + __u16 hw_rq_ci; + __u16 hw_rq_pi; + + __u16 last_comp_sqe_idx; + __u16 last_comp_rqe_idx; + __u16 scqe_counter; + __u16 rcqe_counter; + __u16 tx_pkts_cnt; + __u16 rx_pkts_cnt; + __u16 rx_error_drop_cnt; + __u16 rx_invalid_drop_cnt; + __u32 rto_retrans_cnt; + + __u32 pd; + __u16 fw_sq_pi; + __u16 fw_sq_ci; + __u16 fw_rq_ci; + __u8 sq_in_flush; + __u8 rq_in_flush; + + __u16 sq_flushed_pi; + __u16 rq_flushed_pi; + + __u64 sqbuf_addr; + __u64 rqbuf_addr; + __u64 sdbrec_addr; + __u64 rdbrec_addr; + __u64 sdbrec_val; + __u64 rdbrec_val; + + __u32 ip_src; + __u32 ip_dst; + __u16 srcport; + __u16 dstport; +}; + +struct erdma_cq_info { + __u32 cqn; + __u32 depth; + + __u32 assoc_eqn; + __u8 is_user; + __u8 rsvd0; + __u8 mtt_type; + __u8 hw_info_valid; + + __u64 qbuf_dma_addr; + __u32 ci; + __u32 cmdsn; + __u32 notify_cnt; + __u32 rsvd1; + + struct { + __u32 page_size; + __u32 page_offset; + __u32 page_cnt; + __u32 mtt_nents; + __u64 mtt_entry[4]; + __u64 va; + __u64 len; + } mtt; + + __u32 hw_pi; + __u8 enable; + __u8 log_depth; + __u8 cq_cur_ownership; + __u8 last_errdb_type; /* 0,dup db;1,out-order db */ + + __u32 last_errdb_ci; + __u8 out_order_db_cnt; + __u8 dup_db_cnt; + __u16 rsvd; + + __u64 cn_cq_db_addr; + __u64 cq_db_record; }; struct erdma_eq_info { __u32 eqn; __u8 ready; - __u8 rsvd[3]; + __u8 rsvd[2]; + __u8 hw_info_valid; __u64 event_cnt; __u64 notify_cnt; @@ -122,6 +221,22 @@ struct erdma_eq_info { __u32 ci; __u64 qbuf_dma; __u64 qbuf_va; + + __u16 hw_depth; + __u16 vector; + + __u8 int_suppression; + __u8 tail_owner; + __u8 head_owner; + __u8 overflow; + + __u32 head; + __u32 tail; + + __u64 cn_addr; + __u64 cn_db_addr; + __u64 eq_db_record; + }; struct erdma_ioctl_inbuf { @@ -157,12 +272,15 @@ struct erdma_ioctl_outbuf { } config_resp; __u32 allocted_qpn[1024]; + __u32 allocted_cqn[1024]; struct erdma_qp_info qp_info; /* 0: AEQ, 1: Cmd-EQ, 2-32: Completion-EQ */ struct erdma_eq_info eq_info[33]; + struct erdma_cq_info cq_info; __u32 version; + __u64 stats[512]; }; }; diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 4650fbc9562403..443442ad0e445d 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -1,39 +1,22 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include - +#include #include -#include - -#include -#include #include #include "erdma.h" #include "erdma_cm.h" -#include "erdma_debug.h" -#include "erdma_hw.h" -#include "erdma_ioctl.h" -#include "erdma_stats.h" #include "erdma_verbs.h" MODULE_AUTHOR("Cheng Xu "); -MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver"); +MODULE_AUTHOR("Kai Shen "); +MODULE_DESCRIPTION("Alibaba elasticRDMA adapter driver (preview)"); MODULE_LICENSE("Dual BSD/GPL"); __u32 dprint_mask; @@ -48,6 +31,83 @@ static unsigned int vector_num = ERDMA_NUM_MSIX_VEC; module_param(vector_num, uint, 0444); MODULE_PARM_DESC(vector_num, "number of compeletion vectors"); +static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, + void *arg) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(arg); + struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb); + + dprint(DBG_CTRL, " netdev:%s,ns:%p: Event %lu to erdma_dev %p\n", + netdev->name, dev_net(netdev), event, dev); + + if (dev->netdev == NULL || dev->netdev != netdev) + goto done; + + switch (event) { + case NETDEV_UP: + dev->state = IB_PORT_ACTIVE; + erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); + break; + case NETDEV_DOWN: + dev->state = IB_PORT_DOWN; + erdma_port_event(dev, IB_EVENT_PORT_ERR); + break; + case NETDEV_CHANGEMTU: + if (dev->mtu != netdev->mtu) { + erdma_set_mtu(dev, netdev->mtu); + dev->mtu = netdev->mtu; + } + break; + case NETDEV_REGISTER: + case NETDEV_UNREGISTER: + case NETDEV_CHANGEADDR: + case NETDEV_GOING_DOWN: + case NETDEV_CHANGE: + default: + break; + } + +done: + return NOTIFY_OK; +} + +static int erdma_enum_and_get_netdev(struct erdma_dev *dev) +{ + struct net_device *netdev; + int ret = -ENODEV; + + /* Already binded to a net_device, so we skip. */ + if (dev->netdev) + return 0; + + rtnl_lock(); + for_each_netdev(&init_net, netdev) { + /* + * In erdma, the paired netdev and ibdev should have the same + * MAC address. erdma can get the value from its PCIe bar + * registers. Since erdma can not get the paired netdev + * reference directly, we do a traverse here to get the paired + * netdev. + */ + if (ether_addr_equal_unaligned(netdev->perm_addr, + dev->attrs.peer_addr)) { + ret = ib_device_set_netdev(&dev->ibdev, netdev, 1); + if (ret) { + rtnl_unlock(); + ibdev_warn(&dev->ibdev, + "failed (%d) to link netdev", ret); + return ret; + } + dev->netdev = netdev; + break; + } + } + + rtnl_unlock(); + + return ret; +} + static int erdma_device_register(struct erdma_dev *dev) { struct ib_device *ibdev = &dev->ibdev; @@ -65,12 +125,13 @@ static int erdma_device_register(struct erdma_dev *dev) if (ret < 0) return ret; + ret = erdma_enum_and_get_netdev(dev); + if (ret) + return -EPROBE_DEFER; + + dev->mtu = dev->netdev->mtu; addrconf_addr_eui48((u8 *)&ibdev->node_guid, dev->netdev->dev_addr); - ibdev->phys_port_cnt = 1; - ret = ib_device_set_netdev(ibdev, dev->netdev, 1); - if (ret) - return ret; ret = ib_register_device(ibdev, ibdev->name, &dev->pdev->dev); if (ret) { dev_err(&dev->pdev->dev, @@ -79,6 +140,14 @@ static int erdma_device_register(struct erdma_dev *dev) return ret; } + dev->netdev_nb.notifier_call = erdma_netdev_event; + ret = register_netdevice_notifier(&dev->netdev_nb); + if (ret) { + ibdev_err(&dev->ibdev, "failed to register notifier.\n"); + ib_unregister_device(ibdev); + return ret; + } + dprint(DBG_DM, " Registered '%s' for interface '%s',HWaddr=%02x.%02x.%02x.%02x.%02x.%02x\n", ibdev->name, dev->netdev->name, *(__u8 *)dev->netdev->dev_addr, @@ -88,105 +157,9 @@ static int erdma_device_register(struct erdma_dev *dev) *((__u8 *)dev->netdev->dev_addr + 4), *((__u8 *)dev->netdev->dev_addr + 5)); - dev->is_registered = 1; - return 0; } -int erdma_find_netdev_and_register_ibdev(struct erdma_dev *dev) -{ - struct net *net; - struct net_device *ndev; - - rtnl_lock(); - down_read(&net_rwsem); - for_each_net(net) - for_each_netdev(net, ndev) { - if (ether_addr_equal_unaligned(ndev->perm_addr, dev->attrs.peer_addr)) { - dev->netdev = ndev; - break; - } - } - up_read(&net_rwsem); - rtnl_unlock(); - - if (dev->netdev) - return erdma_device_register(dev); - - return -ENODEV; -} - -static void erdma_device_deregister(struct erdma_dev *edev) -{ - int i; - - ib_unregister_device(&edev->ibdev); - - WARN_ON(atomic_read(&edev->num_ctx)); - WARN_ON(atomic_read(&edev->num_cep)); - i = 0; - - while (!list_empty(&edev->cep_list)) { - struct erdma_cep *cep = - list_entry(edev->cep_list.next, struct erdma_cep, devq); - list_del(&cep->devq); - dprint(DBG_ON, ": Free CEP (0x%p), state: %d\n", cep, - cep->state); - kfree(cep); - i++; - } - if (i) - pr_warn("erdma device deregister: free'd %d CEPs\n", i); -} - -static int erdma_netdev_event(struct notifier_block *nb, unsigned long event, - void *arg) -{ - struct net_device *netdev = netdev_notifier_info_to_dev(arg); - struct erdma_dev *dev = container_of(nb, struct erdma_dev, netdev_nb); - - dprint(DBG_CTRL, " netdev:%s,ns:%p: Event %lu to erdma_dev %p\n", - netdev->name, dev_net(netdev), event, dev); - - if ((dev->netdev == NULL && event != NETDEV_REGISTER) || - (dev->netdev != NULL && dev->netdev != netdev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_UP: - dev->state = IB_PORT_ACTIVE; - erdma_port_event(dev, IB_EVENT_PORT_ACTIVE); - break; - case NETDEV_DOWN: - dev->state = IB_PORT_DOWN; - erdma_port_event(dev, IB_EVENT_PORT_ERR); - break; - case NETDEV_REGISTER: - if (!compat_mode && - ether_addr_equal_unaligned(netdev->perm_addr, - dev->attrs.peer_addr)) { - dev->netdev = netdev; - dev->state = IB_PORT_INIT; - if (!dev->is_registered) { - dprint(DBG_DM, - ": new erdma lowlevel device for %s\n", - netdev->name); - erdma_device_register(dev); - } - } - break; - case NETDEV_UNREGISTER: - case NETDEV_CHANGEADDR: - case NETDEV_CHANGEMTU: - case NETDEV_GOING_DOWN: - case NETDEV_CHANGE: - default: - break; - } - - return NOTIFY_OK; -} - static irqreturn_t erdma_comm_irq_handler(int irq, void *data) { struct erdma_dev *dev = data; @@ -223,7 +196,9 @@ static void erdma_dwqe_resource_init(struct erdma_dev *dev) dev->attrs.dwqe_pages = type0; dev->attrs.dwqe_entries = type1 * ERDMA_DWQE_TYPE1_CNT_PER_PAGE; - dev_info(&dev->pdev->dev, "grp_num:%d, total pages:%d, type0:%d, type1:%d, type1_db_cnt:%d\n", + dev_info( + &dev->pdev->dev, + "grp_num:%d, total pages:%d, type0:%d, type1:%d, type1_db_cnt:%d\n", dev->attrs.grp_num, total_pages, type0, type1, type1 * 16); } @@ -263,16 +238,37 @@ static void erdma_comm_irq_uninit(struct erdma_dev *dev) free_irq(dev->comm_irq.msix_vector, dev); } +static int erdma_hw_resp_pool_init(struct erdma_dev *dev) +{ + dev->resp_pool = + dma_pool_create("erdma_resp_pool", &dev->pdev->dev, + ERDMA_HW_RESP_SIZE, ERDMA_HW_RESP_SIZE, 0); + if (!dev->resp_pool) + return -ENOMEM; + + return 0; +} + +static void erdma_hw_resp_pool_destroy(struct erdma_dev *dev) +{ + dma_pool_destroy(dev->resp_pool); +} + static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) { int ret; erdma_dwqe_resource_init(dev); + ret = erdma_hw_resp_pool_init(dev); + if (ret) + return ret; ret = dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(ERDMA_PCI_WIDTH)); - if (ret) + DMA_BIT_MASK(ERDMA_PCI_WIDTH)); + if (ret) { + erdma_hw_resp_pool_destroy(dev); return ret; + } dma_set_max_seg_size(&pdev->dev, UINT_MAX); @@ -280,6 +276,34 @@ static int erdma_device_init(struct erdma_dev *dev, struct pci_dev *pdev) } static void erdma_device_uninit(struct erdma_dev *dev) +{ + erdma_hw_resp_pool_destroy(dev); +} + +static int erdma_wait_hw_init_done(struct erdma_dev *dev) +{ + int i; + + erdma_reg_write32(dev, ERDMA_REGS_DEV_CTRL_REG, + FIELD_PREP(ERDMA_REG_DEV_CTRL_INIT_MASK, 1)); + + for (i = 0; i < ERDMA_WAIT_DEV_DONE_CNT; i++) { + if (erdma_reg_read32_filed(dev, ERDMA_REGS_DEV_ST_REG, + ERDMA_REG_DEV_ST_INIT_DONE_MASK)) + break; + + msleep(ERDMA_REG_ACCESS_WAIT_MS); + } + + if (i == ERDMA_WAIT_DEV_DONE_CNT) { + dev_err(&dev->pdev->dev, "wait init done failed.\n"); + return -ETIMEDOUT; + } + + return 0; +} + +static void erdma_hw_stop(struct erdma_dev *dev) { u32 ctrl = FIELD_PREP(ERDMA_REG_DEV_CTRL_RESET_MASK, 1); @@ -293,10 +317,9 @@ static const struct pci_device_id erdma_pci_tbl[] = { static int erdma_probe_dev(struct pci_dev *pdev) { - int err; struct erdma_dev *dev; + int bars, err; u32 version; - int bars; err = pci_enable_device(pdev); if (err) { @@ -348,7 +371,7 @@ static int erdma_probe_dev(struct pci_dev *pdev) err = erdma_request_vectors(dev); if (err) - goto err_iounmap_func_bar; + goto err_uninit_device; err = erdma_comm_irq_init(dev); if (err) @@ -362,18 +385,24 @@ static int erdma_probe_dev(struct pci_dev *pdev) if (err) goto err_uninit_aeq; - err = erdma_ceqs_init(dev); + err = erdma_wait_hw_init_done(dev); if (err) goto err_uninit_cmdq; + err = erdma_ceqs_init(dev); + if (err) + goto err_stop_hw; + msleep(500); erdma_finish_cmdq_init(dev); return 0; +err_stop_hw: + erdma_hw_stop(dev); + err_uninit_cmdq: - erdma_device_uninit(dev); erdma_cmdq_destroy(dev); err_uninit_aeq: @@ -385,6 +414,9 @@ static int erdma_probe_dev(struct pci_dev *pdev) err_free_vectors: pci_free_irq_vectors(dev->pdev); +err_uninit_device: + erdma_device_uninit(dev); + err_iounmap_func_bar: devm_iounmap(&pdev->dev, dev->func_bar); @@ -405,19 +437,15 @@ static void erdma_remove_dev(struct pci_dev *pdev) struct erdma_dev *dev = pci_get_drvdata(pdev); erdma_ceqs_uninit(dev); - - erdma_device_uninit(dev); - + erdma_hw_stop(dev); erdma_cmdq_destroy(dev); erdma_aeq_destroy(dev); erdma_comm_irq_uninit(dev); pci_free_irq_vectors(dev->pdev); - + erdma_device_uninit(dev); devm_iounmap(&pdev->dev, dev->func_bar); pci_release_selected_regions(pdev, ERDMA_BAR_MASK); - ib_dealloc_device(&dev->ibdev); - pci_disable_device(pdev); } @@ -435,7 +463,9 @@ static int erdma_check_version(struct erdma_dev *dev) u8 fw_major = (dev->attrs.fw_version >> 16); u8 fw_medium = (dev->attrs.fw_version >> 8); - return (fw_major != ERDMA_MAJOR_VER || fw_medium != ERDMA_MEDIUM_VER) ? -1 : 0; + return (fw_major != ERDMA_MAJOR_VER || fw_medium != ERDMA_MEDIUM_VER) ? + -1 : + 0; } #define ERDMA_GET_CAP(name, cap) FIELD_GET(ERDMA_CMD_DEV_CAP_##name##_MASK, cap) @@ -462,6 +492,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); dev->attrs.max_mr = dev->attrs.max_qp << 1; dev->attrs.max_cq = dev->attrs.max_qp << 1; + dev->attrs.flags = ERDMA_GET_CAP(FLAGS, cap0); dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR; dev->attrs.max_ord = ERDMA_MAX_ORD; @@ -496,7 +527,6 @@ static int erdma_res_cb_init(struct erdma_dev *dev) dev->res_cb[i].bitmap = kcalloc(BITS_TO_LONGS(dev->res_cb[i].max_cap), sizeof(unsigned long), GFP_KERNEL); - /* We will free the memory in erdma_res_cb_free */ if (!dev->res_cb[i].bitmap) goto err; } @@ -559,9 +589,8 @@ static const struct ib_device_ops erdma_device_ops = { .req_notify_cq = erdma_req_notify_cq, .reg_user_mr = erdma_reg_user_mr, .get_netdev = erdma_get_netdev, - .drain_sq = erdma_drain_sq, - .drain_rq = erdma_drain_rq, .query_pkey = erdma_query_pkey, + .modify_cq = erdma_modify_cq, INIT_RDMA_OBJ_SIZE(ib_cq, erdma_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, erdma_pd, ibpd), @@ -570,7 +599,6 @@ static const struct ib_device_ops erdma_device_ops = { static const struct ib_device_ops erdma_compat_ops = { .get_link_layer = erdma_get_link_layer, - .query_pkey = erdma_query_pkey }; static int erdma_ib_device_add(struct pci_dev *pdev) @@ -578,9 +606,7 @@ static int erdma_ib_device_add(struct pci_dev *pdev) struct erdma_dev *dev = pci_get_drvdata(pdev); struct ib_device *ibdev = &dev->ibdev; u64 mac; - int ret = 0; - - dprint(DBG_INIT, "init erdma_dev(%p)\n", dev); + int ret; erdma_stats_init(dev); @@ -641,33 +667,40 @@ static int erdma_ib_device_add(struct pci_dev *pdev) atomic_set(&dev->num_ctx, 0); - dprint(DBG_INIT, "ib device create ok.\n"); - mac = erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_L_REG); mac |= (u64)erdma_reg_read32(dev, ERDMA_REGS_NETDEV_MAC_H_REG) << 32; - dev_info(&dev->pdev->dev, "assoc netdev mac addr is 0x%llx.\n", - mac); + dev_info(&dev->pdev->dev, "assoc netdev mac addr is 0x%llx.\n", mac); u64_to_ether_addr(mac, dev->attrs.peer_addr); - dev->netdev = NULL; - if (compat_mode) { - ret = erdma_find_netdev_and_register_ibdev(dev); - if (ret) - goto err_out; + dev->db_pool = dma_pool_create("erdma_db", &pdev->dev, ERDMA_DB_SIZE, + ERDMA_DB_SIZE, 0); + if (!dev->db_pool) { + ret = -ENOMEM; + goto err_out; } - dev->netdev_nb.notifier_call = erdma_netdev_event; - ret = register_netdevice_notifier(&dev->netdev_nb); + dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!dev->reflush_wq) { + ret = -ENOMEM; + goto free_pool; + } + + ret = erdma_device_register(dev); if (ret) - goto err_out; + goto free_wq; + + dev->ibdev.use_cq_dim = true; return 0; +free_wq: + destroy_workqueue(dev->reflush_wq); +free_pool: + dma_pool_destroy(dev->db_pool); err_out: - if (dev->is_registered && compat_mode) - ib_unregister_device(&dev->ibdev); xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); @@ -681,16 +714,20 @@ static void erdma_ib_device_remove(struct pci_dev *pdev) { struct erdma_dev *dev = pci_get_drvdata(pdev); + unregister_netdevice_notifier(&dev->netdev_nb); - if (dev->is_registered) { - erdma_device_deregister(dev); - dev->is_registered = 0; - } + ib_unregister_device(&dev->ibdev); + + WARN_ON(atomic_read(&dev->num_ctx)); + WARN_ON(atomic_read(&dev->num_cep)); + WARN_ON(!list_empty(&dev->cep_list)); erdma_res_cb_free(dev); xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); + dma_pool_destroy(dev->db_pool); + destroy_workqueue(dev->reflush_wq); } static int erdma_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 243bb0666226f8..886fcd58ce8c8b 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* Authors: Cheng Xu */ /* Kai Shen */ @@ -6,16 +6,6 @@ /* Authors: Bernard Metzler */ /* Copyright (c) 2008-2019, IBM Corporation */ -#include -#include -#include -#include - -#include -#include -#include - -#include "erdma.h" #include "erdma_cm.h" #include "erdma_verbs.h" @@ -32,7 +22,7 @@ void erdma_qp_llp_close(struct erdma_qp *qp) case ERDMA_QP_STATE_RTR: case ERDMA_QP_STATE_IDLE: case ERDMA_QP_STATE_TERMINATE: - qp_attrs.state = ERDMA_QP_STATE_CLOSING; + qp_attrs.state = ERDMA_QP_STATE_ERROR; erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); break; case ERDMA_QP_STATE_CLOSING: @@ -97,6 +87,8 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, tp = tcp_sk(qp->cep->sock->sk); + qp->attrs.remote_cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); + req.cookie = be32_to_cpu(qp->cep->mpa.ext_data.cookie); req.dip = to_sockaddr_in(remote_addr).sin_addr.s_addr; req.sip = to_sockaddr_in(local_addr).sin_addr.s_addr; @@ -120,8 +112,7 @@ static int erdma_modify_qp_state_to_rts(struct erdma_qp *qp, FIELD_PREP(ERDMA_CMD_MODIFY_QP_CC_MASK, qp->attrs.cc) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } static int erdma_modify_qp_state_to_rts_compat(struct erdma_qp *qp, @@ -145,17 +136,17 @@ static int erdma_modify_qp_state_to_rts_compat(struct erdma_qp *qp, req.sip = qp->attrs.laddr.in.sin_addr.s_addr; if (req.dip < req.sip) { - req.dport = COMPAT_PORT_BASE + - ((QP_ID(qp) >> 16) & 0xF); + req.dport = COMPAT_PORT_BASE + ((QP_ID(qp) >> 16) & 0xF); req.sport = QP_ID(qp); - } else if (req.dip == req.sip) { /* if dip == sip, must have lqpn != rqpn */ + } else if (req.dip == + req.sip) { /* if dip == sip, must have lqpn != rqpn */ if (QP_ID(qp) < qp->attrs.remote_qp_num) { - req.dport = COMPAT_PORT_BASE + - ((QP_ID(qp) >> 16) & 0xF); + req.dport = + COMPAT_PORT_BASE + ((QP_ID(qp) >> 16) & 0xF); req.sport = QP_ID(qp); } else { req.sport = COMPAT_PORT_BASE + - ((qp->attrs.remote_qp_num >> 16) & 0xF); + ((qp->attrs.remote_qp_num >> 16) & 0xF); req.dport = qp->attrs.remote_qp_num; } } else { @@ -167,8 +158,7 @@ static int erdma_modify_qp_state_to_rts_compat(struct erdma_qp *qp, req.send_nxt = req.sport * 4; req.recv_nxt = req.dport * 4; - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, @@ -186,14 +176,14 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, req.cfg = FIELD_PREP(ERDMA_CMD_MODIFY_QP_STATE_MASK, attrs->state) | FIELD_PREP(ERDMA_CMD_MODIFY_QP_QPN_MASK, QP_ID(qp)); - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, enum erdma_qp_attr_mask mask) { int drop_conn, ret = 0; + bool need_reflush = false; if (!mask) return 0; @@ -206,9 +196,11 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, case ERDMA_QP_STATE_RTR: if (attrs->state == ERDMA_QP_STATE_RTS) { if (compat_mode) - ret = erdma_modify_qp_state_to_rts_compat(qp, attrs, mask); + ret = erdma_modify_qp_state_to_rts_compat( + qp, attrs, mask); else - ret = erdma_modify_qp_state_to_rts(qp, attrs, mask); + ret = erdma_modify_qp_state_to_rts(qp, attrs, + mask); } else if (attrs->state == ERDMA_QP_STATE_ERROR) { qp->attrs.state = ERDMA_QP_STATE_ERROR; if (qp->cep) { @@ -217,6 +209,9 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, } ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + /* We apply to kernel qp first. */ + if (rdma_is_kernel_res(&qp->ibqp.res)) + need_reflush = true; } break; case ERDMA_QP_STATE_RTS: @@ -227,7 +222,11 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, attrs->state == ERDMA_QP_STATE_ERROR) { drop_conn = 1; if (!(qp->attrs.flags & ERDMA_QP_IN_DESTROY)) - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); + ret = erdma_modify_qp_state_to_stop(qp, attrs, + mask); + /* We apply to kernel qp first. */ + if (rdma_is_kernel_res(&qp->ibqp.res)) + need_reflush = true; } if (drop_conn) @@ -252,6 +251,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, break; } + if (need_reflush && !ret) { + qp->flags |= ERDMA_QP_IN_FLUSHING; + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + } + return ret; } @@ -311,7 +316,7 @@ static int fill_inline_data(struct erdma_qp *qp, qp->attrs.sq_size, SQEBB_SHIFT); if (!remain_size) break; - }; + } i++; } @@ -474,7 +479,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, mr->mem.mtt_nents); - if (mr->mem.mtt_nents < ERDMA_MAX_INLINE_MTT_ENTRIES) { + if (mr->mem.mtt_nents <= ERDMA_MAX_INLINE_MTT_ENTRIES) { attrs |= FIELD_PREP(ERDMA_SQE_MR_MTT_TYPE_MASK, 0); /* Copy SGLs to SQE content to accelerate */ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1, @@ -536,8 +541,8 @@ static void kick_sq_db(struct erdma_qp *qp, u16 pi) writeq(db_data, qp->kern_qp.hw_sq_db); } -static int erdma_post_send_internal(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, - const struct ib_send_wr **bad_send_wr, bool is_last) +int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, + const struct ib_send_wr **bad_send_wr) { struct erdma_qp *qp = to_eqp(ibqp); int ret = 0; @@ -549,14 +554,6 @@ static int erdma_post_send_internal(struct ib_qp *ibqp, const struct ib_send_wr return -EINVAL; spin_lock_irqsave(&qp->kern_qp.sq_lock, flags); - if (unlikely(qp->kern_qp.sq_shutdown)) { - *bad_send_wr = send_wr; - ret = -EINVAL; - goto out; - } - if (unlikely(is_last)) - qp->kern_qp.sq_shutdown = true; - sq_pi = qp->kern_qp.sq_pi; while (wr) { @@ -576,16 +573,13 @@ static int erdma_post_send_internal(struct ib_qp *ibqp, const struct ib_send_wr wr = wr->next; } -out: spin_unlock_irqrestore(&qp->kern_qp.sq_lock, flags); - return ret; -} + if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); -int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, - const struct ib_send_wr **bad_send_wr) -{ - return erdma_post_send_internal(ibqp, send_wr, bad_send_wr, false); + return ret; } static int erdma_post_recv_one(struct erdma_qp *qp, @@ -618,8 +612,8 @@ static int erdma_post_recv_one(struct erdma_qp *qp, return 0; } -static int erdma_post_recv_internal(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, - const struct ib_recv_wr **bad_recv_wr, bool is_last) +int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, + const struct ib_recv_wr **bad_recv_wr) { const struct ib_recv_wr *wr = recv_wr; struct erdma_qp *qp = to_eqp(ibqp); @@ -627,13 +621,6 @@ static int erdma_post_recv_internal(struct ib_qp *ibqp, const struct ib_recv_wr int ret = 0; spin_lock_irqsave(&qp->kern_qp.rq_lock, flags); - if (unlikely(qp->kern_qp.rq_shutdown)) { - *bad_recv_wr = recv_wr; - ret = -EINVAL; - goto out; - } - if (unlikely(is_last)) - qp->kern_qp.rq_shutdown = true; while (wr) { ret = erdma_post_recv_one(qp, wr); @@ -643,97 +630,12 @@ static int erdma_post_recv_internal(struct ib_qp *ibqp, const struct ib_recv_wr } wr = wr->next; } -out: - spin_unlock_irqrestore(&qp->kern_qp.rq_lock, flags); - return ret; -} -int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, - const struct ib_recv_wr **bad_recv_wr) -{ - return erdma_post_recv_internal(ibqp, recv_wr, bad_recv_wr, false); -} - -struct ib_drain_cqe { - struct ib_cqe cqe; - struct completion done; -}; - -static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc) -{ - struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe, - cqe); - - complete(&cqe->done); -} - -static void erdma_drain_qp(struct ib_qp *qp) -{ - struct ib_drain_cqe sdrain, rdrain; - const struct ib_send_wr *bad_swr; - const struct ib_recv_wr *bad_rwr; - struct ib_recv_wr rwr = {}; - struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; - struct ib_rdma_wr swr = { - .wr = { - .next = NULL, - { .wr_cqe = &sdrain.cqe, }, - .opcode = IB_WR_RDMA_WRITE, - .send_flags = IB_SEND_SIGNALED, - }, - }; - int ret, cnt; - - rwr.wr_cqe = &rdrain.cqe; - rdrain.cqe.done = ib_drain_qp_done; - init_completion(&rdrain.done); - - ret = erdma_post_recv_internal(qp, &rwr, &bad_rwr, true); - if (ret) { - WARN_ONCE(ret, "failed to drain recv queue: %d", ret); - return; - } - - sdrain.cqe.done = ib_drain_qp_done; - init_completion(&sdrain.done); - - ret = erdma_post_send_internal(qp, &swr.wr, &bad_swr, true); - if (ret) { - WARN_ONCE(ret, "failed to drain send queue: %d", ret); - return; - } - - ret = ib_modify_qp(qp, &attr, IB_QP_STATE); - if (ret) { - WARN_ONCE(ret, "failed to modify qp to ERR: %d", ret); - return; - } - - cnt = 0; - while (wait_for_completion_timeout(&sdrain.done, HZ / 10) <= 0 && cnt < 50) { - ib_process_cq_direct(qp->send_cq, -1); - cnt++; - } - - cnt = 0; - while (wait_for_completion_timeout(&rdrain.done, HZ / 10) <= 0 && cnt < 50) { - ib_process_cq_direct(qp->recv_cq, -1); - cnt++; - } -} - -void erdma_drain_rq(struct ib_qp *ibqp) -{ - struct erdma_qp *qp = to_eqp(ibqp); + spin_unlock_irqrestore(&qp->kern_qp.rq_lock, flags); - if (qp->attrs.state != ERDMA_QP_STATE_ERROR) - erdma_drain_qp(ibqp); -} + if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); -void erdma_drain_sq(struct ib_qp *ibqp) -{ - struct erdma_qp *qp = to_eqp(ibqp); - - if (qp->attrs.state != ERDMA_QP_STATE_ERROR) - erdma_drain_qp(ibqp); + return ret; } diff --git a/drivers/infiniband/hw/erdma/erdma_stats.c b/drivers/infiniband/hw/erdma/erdma_stats.c index 4df2290291bf89..b8442cdd4261a8 100644 --- a/drivers/infiniband/hw/erdma/erdma_stats.c +++ b/drivers/infiniband/hw/erdma/erdma_stats.c @@ -3,12 +3,11 @@ /* Authors: Cheng Xu */ /* Kai Shen */ /* Copyright (c) 2020-2022, Alibaba Group. */ -//#include "kcompat.h" #include "erdma.h" -#include "erdma_stats.h" -static const char * const erdma_stats_names[] = { + +static const char *const erdma_stats_names[] = { [ERDMA_STATS_IW_LISTEN_CREATE] = "listen_create_cnt", [ERDMA_STATS_IW_LISTEN_IPV6] = "listen_ipv6_cnt", [ERDMA_STATS_IW_LISTEN_SUCCESS] = "listen_success_cnt", @@ -62,32 +61,48 @@ static const char * const erdma_stats_names[] = { [ERDMA_STATS_CMD_REG_USR_MR] = "verbs_reg_usr_mr_cnt", [ERDMA_STATS_CMD_REG_USR_MR_FAILED] = "verbs_reg_usr_mr_failed_cnt", + [ERDMA_STATS_TX_REQS_CNT] = "hw_tx_reqs_cnt", + [ERDMA_STATS_TX_PACKETS_CNT] = "hw_tx_packets_cnt", + [ERDMA_STATS_TX_BYTES_CNT] = "hw_tx_bytes_cnt", + [ERDMA_STATS_TX_DISABLE_DROP_CNT] = "hw_disable_drop_cnt", + [ERDMA_STATS_TX_BPS_METER_DROP_CNT] = "hw_bps_limit_drop_cnt", + [ERDMA_STATS_TX_PPS_METER_DROP_CNT] = "hw_pps_limit_drop_cnt", + [ERDMA_STATS_RX_PACKETS_CNT] = "hw_rx_packets_cnt", + [ERDMA_STATS_RX_BYTES_CNT] = "hw_rx_bytes_cnt", + [ERDMA_STATS_RX_DISABLE_DROP_CNT] = "hw_rx_disable_drop_cnt", + [ERDMA_STATS_RX_BPS_METER_DROP_CNT] = "hw_rx_bps_limit_drop_cnt", + [ERDMA_STATS_RX_PPS_METER_DROP_CNT] = "hw_rx_pps_limit_drop_cnt", }; -struct rdma_hw_stats *erdma_alloc_hw_stats(struct ib_device *ibdev, port_t port_num) +struct rdma_hw_stats *erdma_alloc_hw_stats(struct ib_device *ibdev, + port_t port_num) { - return rdma_alloc_hw_stats_struct(erdma_stats_names, - ERDMA_STATS_MAX, RDMA_HW_STATS_DEFAULT_LIFESPAN); + return rdma_alloc_hw_stats_struct(erdma_stats_names, ERDMA_STATS_MAX, + RDMA_HW_STATS_DEFAULT_LIFESPAN); } -int erdma_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, port_t port_num, int index) +int erdma_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, + port_t port_num, int index) { struct erdma_dev *dev = to_edev(ibdev); - atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_SUBMITTED], dev->cmdq.sq.total_cmds); - atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_COMP], dev->cmdq.sq.total_comp_cmds); + atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_SUBMITTED], + dev->cmdq.sq.total_cmds); + atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_COMP], + dev->cmdq.sq.total_comp_cmds); atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_EQ_NOTIFY], - atomic64_read(&dev->cmdq.eq.notify_num)); + atomic64_read(&dev->cmdq.eq.notify_num)); atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_EQ_EVENT], - atomic64_read(&dev->cmdq.eq.event_num)); + atomic64_read(&dev->cmdq.eq.event_num)); atomic64_set(&dev->stats.value[ERDMA_STATS_CMDQ_CQ_ARMED], - atomic64_read(&dev->cmdq.cq.armed_num)); - atomic64_set(&dev->stats.value[ERDMA_STATS_AEQ_EVENT], atomic64_read(&dev->aeq.event_num)); + atomic64_read(&dev->cmdq.cq.armed_num)); + atomic64_set(&dev->stats.value[ERDMA_STATS_AEQ_EVENT], + atomic64_read(&dev->aeq.event_num)); atomic64_set(&dev->stats.value[ERDMA_STATS_AEQ_NOTIFY], - atomic64_read(&dev->aeq.notify_num)); + atomic64_read(&dev->aeq.notify_num)); - memcpy(&stats->value[0], &dev->stats.value[0], sizeof(u64) * ERDMA_STATS_MAX); + memcpy(&stats->value[0], &dev->stats.value[0], + sizeof(u64) * ERDMA_STATS_MAX); return stats->num_counters; } diff --git a/drivers/infiniband/hw/erdma/erdma_stats.h b/drivers/infiniband/hw/erdma/erdma_stats.h index 2bbfd437bd81f0..d2fcf25ddb75b9 100644 --- a/drivers/infiniband/hw/erdma/erdma_stats.h +++ b/drivers/infiniband/hw/erdma/erdma_stats.h @@ -7,10 +7,10 @@ #ifndef __ERDMA_STATS_H__ #define __ERDMA_STATS_H__ -//#include "kcompat.h" #include typedef u8 port_t; + #define ERDMA_INC_CNT(dev, name) \ atomic64_inc(&dev->stats.value[ERDMA_STATS_##name]) @@ -68,6 +68,19 @@ enum erdma_hw_stats_index { ERDMA_STATS_CMD_REG_USR_MR, ERDMA_STATS_CMD_REG_USR_MR_FAILED, + ERDMA_STATS_TX_REQS_CNT, + ERDMA_STATS_TX_PACKETS_CNT, + ERDMA_STATS_TX_BYTES_CNT, + ERDMA_STATS_TX_DISABLE_DROP_CNT, + ERDMA_STATS_TX_BPS_METER_DROP_CNT, + ERDMA_STATS_TX_PPS_METER_DROP_CNT, + + ERDMA_STATS_RX_PACKETS_CNT, + ERDMA_STATS_RX_BYTES_CNT, + ERDMA_STATS_RX_DISABLE_DROP_CNT, + ERDMA_STATS_RX_BPS_METER_DROP_CNT, + ERDMA_STATS_RX_PPS_METER_DROP_CNT, + ERDMA_STATS_MAX }; diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 377baf9a6be475..986dbb136048f9 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -9,32 +9,21 @@ /* Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. */ -//#include "kcompat.h" - -#include -#include -#include -#include +#include #include #include -#include -#include -#include #include -#include #include -#include #include + #include "erdma.h" #include "erdma_cm.h" -#include "erdma_hw.h" #include "erdma_verbs.h" -#include "erdma_debug.h" - extern bool compat_mode; -static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, bool is_user) +static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, + bool is_user) { struct erdma_cmdq_create_qp_req req; struct erdma_pd *pd = to_epd(qp->ibqp.pd); @@ -53,7 +42,7 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, bool is_use FIELD_PREP(ERDMA_CMD_CREATE_QP_PD_MASK, pd->pdn); if (!is_user) { - u32 pgsz_range = ilog2(SZ_1M) - PAGE_SHIFT; + u32 pgsz_range = ilog2(SZ_1M) - ERDMA_HW_PAGE_SHIFT; req.sq_cqn_mtt_cfg = FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, @@ -73,21 +62,19 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, bool is_use req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr; req.sq_buf_addr = qp->kern_qp.sq_buf_dma_addr; - req.sq_db_info_dma_addr = qp->kern_qp.sq_buf_dma_addr + - (qp->attrs.sq_size << SQEBB_SHIFT); - req.rq_db_info_dma_addr = qp->kern_qp.rq_buf_dma_addr + - (qp->attrs.rq_size << RQE_SHIFT); + req.sq_db_info_dma_addr = qp->kern_qp.sq_db_info_dma_addr; + req.rq_db_info_dma_addr = qp->kern_qp.rq_db_info_dma_addr; } else { user_qp = &qp->user_qp; req.sq_cqn_mtt_cfg = FIELD_PREP( ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->sq_mtt.page_size) - PAGE_SHIFT); + ilog2(user_qp->sq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); req.sq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); req.rq_cqn_mtt_cfg = FIELD_PREP( ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->rq_mtt.page_size) - PAGE_SHIFT); + ilog2(user_qp->rq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); req.rq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); @@ -118,12 +105,11 @@ static int create_qp_cmd(struct erdma_dev *dev, struct erdma_qp *qp, bool is_use req.rq_mtt_entry[2] = user_qp->rq_mtt.mtt_entry[3]; } - req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; } - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), &resp0, + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), &resp0, &resp1); if (err) { dev_err(&dev->pdev->dev, @@ -178,11 +164,11 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) } post_cmd: - return erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + return erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); } -static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, bool is_user) +static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, + bool is_user) { int err; struct erdma_cmdq_create_cq_req req; @@ -199,7 +185,7 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, bool is_use if (!is_user) { page_size = SZ_32M; req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(page_size) - PAGE_SHIFT); + ilog2(page_size) - ERDMA_HW_PAGE_SHIFT); req.qbuf_addr_l = lower_32_bits(cq->kern_cq.qbuf_dma_addr); req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr); @@ -212,8 +198,9 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, bool is_use cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT); } else { mtt = &cq->user_cq.qbuf_mtt; - req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(mtt->page_size) - PAGE_SHIFT); + req.cfg0 |= + FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, + ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT); if (mtt->mtt_nents == 1) { req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf); req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf); @@ -230,8 +217,7 @@ static int create_cq_cmd(struct erdma_dev *dev, struct erdma_cq *cq, bool is_use req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; } - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) { dev_err(&dev->pdev->dev, "ERROR: err code = %d, cmd of create cq failed.\n", @@ -272,15 +258,14 @@ static inline void erdma_free_idx(struct erdma_resource_cb *res_cb, u32 idx) u32 used; spin_lock_irqsave(&res_cb->lock, flags); - used = test_and_clear_bit(idx, res_cb->bitmap); + used = __test_and_clear_bit(idx, res_cb->bitmap); spin_unlock_irqrestore(&res_cb->lock, flags); WARN_ON(!used); } - static struct rdma_user_mmap_entry * -erdma_user_mmap_entry_insert(struct ib_ucontext *uctx, u64 address, - u32 size, u8 mmap_flag, u64 *mmap_offset) +erdma_user_mmap_entry_insert(struct ib_ucontext *uctx, u64 address, u32 size, + u8 mmap_flag, u64 *mmap_offset) { struct erdma_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL); @@ -294,8 +279,7 @@ erdma_user_mmap_entry_insert(struct ib_ucontext *uctx, u64 address, size = PAGE_ALIGN(size); - ret = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry, - size); + ret = rdma_user_mmap_entry_insert(uctx, &entry->rdma_entry, size); if (ret) { kfree(entry); return NULL; @@ -322,7 +306,8 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_qp_rd_atom = dev->attrs.max_ord; attr->max_qp_init_rd_atom = dev->attrs.max_ird; attr->max_res_rd_atom = dev->attrs.max_qp * dev->attrs.max_ird; - attr->device_cap_flags = IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + attr->device_cap_flags = + IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; ibdev->local_dma_lkey = dev->attrs.local_dma_key; attr->max_send_sge = dev->attrs.max_send_sge; attr->max_recv_sge = dev->attrs.max_recv_sge; @@ -334,6 +319,12 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_mw = dev->attrs.max_mw; attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA; attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT; + + if (dev->attrs.flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) { + attr->atomic_cap = IB_ATOMIC_GLOB; + attr->masked_atomic_cap = IB_ATOMIC_GLOB; + } + attr->fw_ver = ((u64)(dev->attrs.fw_version >> 16) << 32) | (((dev->attrs.fw_version >> 8) & 0xFF) << 16) | ((dev->attrs.fw_version & 0xFF)); @@ -375,6 +366,7 @@ int erdma_query_port(struct ib_device *ibdev, port_t port, attr->gid_tbl_len = 16; else attr->gid_tbl_len = 1; + attr->pkey_tbl_len = 1; attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_DEVICE_MGMT_SUP; attr->max_msg_sz = -1; if (dev->state == IB_PORT_ACTIVE) @@ -390,19 +382,18 @@ int erdma_get_port_immutable(struct ib_device *ibdev, port_t port, { if (compat_mode) { port_immutable->gid_tbl_len = 16; - port_immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + port_immutable->core_cap_flags = + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; port_immutable->max_mad_size = IB_MGMT_MAD_SIZE; } else { port_immutable->gid_tbl_len = 1; port_immutable->core_cap_flags = RDMA_CORE_PORT_IWARP; - } return 0; } -int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, - u16 *pkey) +int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, u16 *pkey) { if (index > 0) return -EINVAL; @@ -447,6 +438,21 @@ int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } +static void erdma_flush_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct erdma_qp *qp = + container_of(dwork, struct erdma_qp, reflush_dwork); + struct erdma_cmdq_reflush_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_REFLUSH); + req.qpn = QP_ID(qp); + req.sq_pi = qp->kern_qp.sq_pi; + req.rq_pi = qp->kern_qp.rq_pi; + erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL); +} + static int erdma_qp_validate_cap(struct erdma_dev *dev, struct ib_qp_init_attr *attrs) { @@ -485,16 +491,24 @@ static void free_kernel_qp(struct erdma_qp *qp) vfree(qp->kern_qp.rwr_tbl); if (qp->kern_qp.sq_buf) - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), - qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); + dma_free_coherent(&dev->pdev->dev, + qp->attrs.sq_size << SQEBB_SHIFT, + qp->kern_qp.sq_buf, + qp->kern_qp.sq_buf_dma_addr); if (qp->kern_qp.rq_buf) - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), - qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); + dma_free_coherent(&dev->pdev->dev, + qp->attrs.rq_size << RQE_SHIFT, + qp->kern_qp.rq_buf, + qp->kern_qp.rq_buf_dma_addr); + + if (qp->kern_qp.sq_db_info) + dma_pool_free(dev->db_pool, qp->kern_qp.sq_db_info, + qp->kern_qp.sq_db_info_dma_addr); + + if (qp->kern_qp.rq_db_info) + dma_pool_free(dev->db_pool, qp->kern_qp.rq_db_info, + qp->kern_qp.rq_db_info_dma_addr); } static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, @@ -502,7 +516,6 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, { struct erdma_kqp *kqp = &qp->kern_qp; int ret = -ENOMEM; - int size; if (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) kqp->sig_all = 1; @@ -511,8 +524,8 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, kqp->sq_ci = 0; kqp->rq_pi = 0; kqp->rq_ci = 0; - kqp->hw_sq_db = - dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT); + kqp->hw_sq_db = dev->func_bar + + (ERDMA_SDB_SHARED_PAGE_INDEX << ERDMA_HW_PAGE_SHIFT); kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET; kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64)); @@ -520,30 +533,40 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, if (!kqp->swr_tbl || !kqp->rwr_tbl) goto err_out; - size = (qp->attrs.sq_size << SQEBB_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; - kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, size, + kqp->sq_buf = dma_alloc_coherent(&dev->pdev->dev, + qp->attrs.sq_size << SQEBB_SHIFT, &kqp->sq_buf_dma_addr, GFP_KERNEL); if (!kqp->sq_buf) goto err_out; - size = (qp->attrs.rq_size << RQE_SHIFT) + ERDMA_EXTRA_BUFFER_SIZE; - kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, size, + kqp->rq_buf = dma_alloc_coherent(&dev->pdev->dev, + qp->attrs.rq_size << RQE_SHIFT, &kqp->rq_buf_dma_addr, GFP_KERNEL); if (!kqp->rq_buf) goto err_out; - kqp->sq_db_info = kqp->sq_buf + (qp->attrs.sq_size << SQEBB_SHIFT); - kqp->rq_db_info = kqp->rq_buf + (qp->attrs.rq_size << RQE_SHIFT); + kqp->sq_db_info = dma_pool_alloc(dev->db_pool, GFP_KERNEL, + &kqp->sq_db_info_dma_addr); + if (!kqp->sq_db_info) + goto err_out; + + kqp->rq_db_info = dma_pool_alloc(dev->db_pool, GFP_KERNEL, + &kqp->rq_db_info_dma_addr); + if (!kqp->rq_db_info) + goto err_out; if (attrs->create_flags & IB_QP_CREATE_IWARP_WITHOUT_CM) { - struct iw_ext_conn_param *param = (struct iw_ext_conn_param *)(attrs->qp_context); + struct iw_ext_conn_param *param = + (struct iw_ext_conn_param *)(attrs->qp_context); if (param == NULL) { ret = -EINVAL; goto err_out; } if (param->sk_addr.family != PF_INET) { - ibdev_err(&dev->ibdev, "IPv4 address is required for connection without CM.\n"); + ibdev_err_ratelimited( + &dev->ibdev, + "IPv4 address is required for connection without CM.\n"); ret = -EINVAL; goto err_out; } @@ -563,11 +586,10 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, return ret; } -static int -get_mtt_entries(struct ib_udata *udata, struct erdma_ucontext *ctx, struct erdma_mem *mem, - u64 start, u64 len, int access, u64 virt, - unsigned long req_page_size, u8 force_indirect_mtt, - bool is_mr) +static int get_mtt_entries(struct ib_udata *udata, struct erdma_ucontext *ctx, + struct erdma_mem *mem, u64 start, u64 len, + int access, u64 virt, unsigned long req_page_size, + u8 force_indirect_mtt, bool is_mr) { struct erdma_dev *dev = to_edev(ctx->ibucontext.device); struct ib_block_iter biter; @@ -584,7 +606,6 @@ get_mtt_entries(struct ib_udata *udata, struct erdma_ucontext *ctx, struct erdma mem->va = virt; mem->len = len; mem->page_size = ib_umem_find_best_pgsz(mem->umem, req_page_size, virt); - mem->page_offset = start & (mem->page_size - 1); mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size); mem->page_cnt = mem->mtt_nents; @@ -603,6 +624,7 @@ get_mtt_entries(struct ib_udata *udata, struct erdma_ucontext *ctx, struct erdma mem->mtt_type = ERDMA_MR_INLINE_MTT; phy_addr = mem->mtt_entry; } + rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) { *phy_addr = rdma_block_iter_dma_address(&biter); phy_addr++; @@ -644,7 +666,8 @@ static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem) } } -static int erdma_map_user_dbrecords(struct ib_udata *udata, struct erdma_ucontext *uctx, +static int erdma_map_user_dbrecords(struct ib_udata *udata, + struct erdma_ucontext *uctx, u64 dbrecords_va, struct erdma_user_dbrecords_page **dbr_page, dma_addr_t *dma_addr) @@ -706,15 +729,15 @@ erdma_unmap_user_dbrecords(struct erdma_ucontext *ctx, mutex_unlock(&ctx->dbrecords_page_mutex); } -static int init_user_qp(struct erdma_qp *qp, struct ib_udata *udata, struct erdma_ucontext *uctx, - u64 va, u32 len, u64 db_info_va) +static int init_user_qp(struct erdma_qp *qp, struct ib_udata *udata, + struct erdma_ucontext *uctx, u64 va, u32 len, + u64 db_info_va) { - dma_addr_t db_info_dma_addr; u32 rq_offset; int ret; - if (len < (PAGE_ALIGN(qp->attrs.sq_size * SQEBB_SIZE) + + if (len < (ALIGN(qp->attrs.sq_size * SQEBB_SIZE, ERDMA_HW_PAGE_SIZE) + qp->attrs.rq_size * RQE_SIZE)) return -EINVAL; @@ -724,7 +747,7 @@ static int init_user_qp(struct erdma_qp *qp, struct ib_udata *udata, struct erdm if (ret) return ret; - rq_offset = PAGE_ALIGN(qp->attrs.sq_size << SQEBB_SHIFT); + rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE); qp->user_qp.rq_offset = rq_offset; ret = get_mtt_entries(udata, uctx, &qp->user_qp.rq_mtt, va + rq_offset, @@ -763,11 +786,11 @@ static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx) int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, struct ib_udata *udata) { - struct erdma_qp *qp = to_eqp(ibqp); struct erdma_dev *dev = to_edev(ibqp->device); - struct erdma_ucontext *uctx; - struct erdma_ureq_create_qp ureq; struct erdma_uresp_create_qp uresp; + struct erdma_qp *qp = to_eqp(ibqp); + struct erdma_ureq_create_qp ureq; + struct erdma_ucontext *uctx; int ret; uctx = rdma_udata_to_drv_context(udata, struct erdma_ucontext, @@ -831,6 +854,8 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, goto err_out_xa; } + INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker); + qp->attrs.max_send_sge = attrs->cap.max_send_sge; qp->attrs.max_recv_sge = attrs->cap.max_recv_sge; qp->attrs.state = ERDMA_QP_STATE_IDLE; @@ -880,7 +905,7 @@ struct ib_qp *erdma_kzalloc_qp(struct ib_pd *ibpd, if (ret) goto err_free; - /* clear the field, otherwise core code will have problems. */ + /* clear the field, otherwise core code will have problems. */ qp->ibqp.res.task = NULL; return &qp->ibqp; err_free: @@ -1135,8 +1160,7 @@ int erdma_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) req.cfg = FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, ibmr->lkey >> 8) | FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, ibmr->lkey & 0xFF); - ret = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + ret = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (ret) { ERDMA_INC_CNT(dev, CMD_DEREG_MR_FAILED); dev_err(&dev->pdev->dev, @@ -1163,12 +1187,13 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) ERDMA_INC_CNT(dev, CMD_DESTROY_CQ); + hrtimer_cancel(&cq->dim.timer); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_DESTROY_CQ); req.cqn = cq->cqn; - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) { dev_err(&dev->pdev->dev, "ERROR: err code = %d, cmd of destroy cq failed.\n", @@ -1189,12 +1214,13 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) return 0; } -static void erdma_ib_lock_cqs(struct erdma_cq *send_cq, struct erdma_cq *recv_cq) +static void erdma_ib_lock_cqs(struct erdma_cq *send_cq, + struct erdma_cq *recv_cq) __acquires(&send_cq->kern_cq.lock) __acquires(&recv_cq->kern_cq.lock) { if (send_cq) { if (recv_cq) { - if (send_cq->cqn < recv_cq->cqn) { + if (send_cq->cqn < recv_cq->cqn) { spin_lock(&send_cq->kern_cq.lock); spin_lock_nested(&recv_cq->kern_cq.lock, SINGLE_DEPTH_NESTING); @@ -1219,12 +1245,13 @@ static void erdma_ib_lock_cqs(struct erdma_cq *send_cq, struct erdma_cq *recv_cq } } -static void erdma_ib_unlock_cqs(struct erdma_cq *send_cq, struct erdma_cq *recv_cq) +static void erdma_ib_unlock_cqs(struct erdma_cq *send_cq, + struct erdma_cq *recv_cq) __releases(&send_cq->kern_cq.lock) __releases(&recv_cq->kern_cq.lock) { if (send_cq) { if (recv_cq) { - if (send_cq->cqn < recv_cq->cqn) { + if (send_cq->cqn < recv_cq->cqn) { spin_unlock(&recv_cq->kern_cq.lock); spin_unlock(&send_cq->kern_cq.lock); } else if (send_cq->cqn == recv_cq->cqn) { @@ -1273,12 +1300,13 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); up_write(&qp->state_lock); + cancel_delayed_work_sync(&qp->reflush_dwork); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_DESTROY_QP); req.qpn = QP_ID(qp); - err = erdma_post_cmd_wait(&dev->cmdq, (u64 *)&req, sizeof(req), NULL, - NULL); + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); if (err) { dev_err(&dev->pdev->dev, "ERROR: err code = %d, cmd of destroy qp failed.\n", @@ -1291,16 +1319,7 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) wait_for_completion(&qp->safe_free); if (rdma_is_kernel_res(&qp->ibqp.res)) { - vfree(qp->kern_qp.swr_tbl); - vfree(qp->kern_qp.rwr_tbl); - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.rq_size << RQE_SHIFT), - qp->kern_qp.rq_buf, qp->kern_qp.rq_buf_dma_addr); - dma_free_coherent( - &dev->pdev->dev, - WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), - qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); + free_kernel_qp(qp); } else { put_mtt_entries(dev, &qp->user_qp.sq_mtt); put_mtt_entries(dev, &qp->user_qp.rq_mtt); @@ -1368,8 +1387,8 @@ void erdma_mmap_free(struct rdma_user_mmap_entry *rdma_entry) static void alloc_db_resources(struct erdma_dev *dev, struct erdma_ucontext *ctx) { - u32 bitmap_idx; struct erdma_devattr *attrs = &dev->attrs; + u32 bitmap_idx, hw_page_idx; if (attrs->disable_dwqe) goto alloc_normal_db; @@ -1382,11 +1401,9 @@ static void alloc_db_resources(struct erdma_dev *dev, spin_unlock(&dev->db_bitmap_lock); ctx->sdb_type = ERDMA_SDB_PAGE; - ctx->sdb_idx = bitmap_idx; - ctx->sdb_page_idx = bitmap_idx; + ctx->sdb_bitmap_idx = bitmap_idx; ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + - (bitmap_idx << PAGE_SHIFT); - ctx->sdb_page_off = 0; + (bitmap_idx << ERDMA_HW_PAGE_SHIFT); return; } @@ -1397,13 +1414,12 @@ static void alloc_db_resources(struct erdma_dev *dev, spin_unlock(&dev->db_bitmap_lock); ctx->sdb_type = ERDMA_SDB_ENTRY; - ctx->sdb_idx = bitmap_idx; - ctx->sdb_page_idx = attrs->dwqe_pages + - bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; - ctx->sdb_page_off = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE; - + ctx->sdb_bitmap_idx = bitmap_idx; + hw_page_idx = attrs->dwqe_pages + + bitmap_idx / ERDMA_DWQE_TYPE1_CNT_PER_PAGE; + ctx->sdb_entid = bitmap_idx % ERDMA_DWQE_TYPE1_CNT_PER_PAGE; ctx->sdb = dev->func_bar_addr + ERDMA_BAR_SQDB_SPACE_OFFSET + - (ctx->sdb_page_idx << PAGE_SHIFT); + (hw_page_idx << ERDMA_HW_PAGE_SHIFT); return; } @@ -1412,11 +1428,8 @@ static void alloc_db_resources(struct erdma_dev *dev, alloc_normal_db: ctx->sdb_type = ERDMA_SDB_SHARED; - ctx->sdb_idx = 0; - ctx->sdb_page_idx = ERDMA_SDB_SHARED_PAGE_INDEX; - ctx->sdb_page_off = 0; - - ctx->sdb = dev->func_bar_addr + (ctx->sdb_page_idx << PAGE_SHIFT); + ctx->sdb = dev->func_bar_addr + + (ERDMA_SDB_SHARED_PAGE_INDEX << ERDMA_HW_PAGE_SHIFT); } static void erdma_uctx_user_mmap_entries_remove(struct erdma_ucontext *uctx) @@ -1448,11 +1461,6 @@ int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) ctx->rdb = dev->func_bar_addr + ERDMA_BAR_RQDB_SPACE_OFFSET; ctx->cdb = dev->func_bar_addr + ERDMA_BAR_CQDB_SPACE_OFFSET; - if (udata->outlen < sizeof(uresp)) { - ret = -EINVAL; - goto err_out; - } - ctx->sq_db_mmap_entry = erdma_user_mmap_entry_insert( ibctx, (u64)ctx->sdb, PAGE_SIZE, ERDMA_MMAP_IO_NC, &uresp.sdb); if (!ctx->sq_db_mmap_entry) { @@ -1476,9 +1484,13 @@ int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *udata) uresp.dev_id = dev->pdev->device; uresp.sdb_type = ctx->sdb_type; - uresp.sdb_offset = ctx->sdb_page_off; + uresp.sdb_entid = ctx->sdb_entid; + uresp.sdb_off = ctx->sdb & ~PAGE_MASK; + uresp.rdb_off = ctx->rdb & ~PAGE_MASK; + uresp.cdb_off = ctx->cdb & ~PAGE_MASK; - ret = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + ret = ib_copy_to_udata(udata, &uresp, + min(sizeof(uresp), udata->outlen)); if (ret) goto err_out; @@ -1503,9 +1515,9 @@ void erdma_dealloc_ucontext(struct ib_ucontext *ibctx) spin_lock(&dev->db_bitmap_lock); if (ctx->sdb_type == ERDMA_SDB_PAGE) - clear_bit(ctx->sdb_idx, dev->sdb_page); + clear_bit(ctx->sdb_bitmap_idx, dev->sdb_page); else if (ctx->sdb_type == ERDMA_SDB_ENTRY) - clear_bit(ctx->sdb_idx, dev->sdb_entry); + clear_bit(ctx->sdb_bitmap_idx, dev->sdb_entry); erdma_uctx_user_mmap_entries_remove(ctx); spin_unlock(&dev->db_bitmap_lock); @@ -1533,9 +1545,10 @@ int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct rdma_ah_attr *ah_attr; const struct ib_gid_attr *sgid_attr; + if (compat_mode) { - dprint(DBG_QP, "attr mask: %x, av: %d, state:%d\n", - attr_mask, attr_mask & IB_QP_AV, attr_mask & IB_QP_STATE); + dprint(DBG_QP, "attr mask: %x, av: %d, state:%d\n", attr_mask, + attr_mask & IB_QP_AV, attr_mask & IB_QP_STATE); if (attr_mask & IB_QP_AV) { ah_attr = &attr->ah_attr; @@ -1543,18 +1556,24 @@ int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, dprint(DBG_QP, "attr_type:%d\n", ah_attr->type); dprint(DBG_QP, "gid type:%u, sgid: %pI6\n", - rdma_gid_attr_network_type(sgid_attr), sgid_attr->gid.raw); - dprint(DBG_QP, "dgid: %pI6\n", rdma_ah_read_grh(ah_attr)->dgid.raw); + rdma_gid_attr_network_type(sgid_attr), + sgid_attr->gid.raw); + dprint(DBG_QP, "dgid: %pI6\n", + rdma_ah_read_grh(ah_attr)->dgid.raw); - rdma_gid2ip((struct sockaddr *)&qp->attrs.laddr, &sgid_attr->gid); + rdma_gid2ip((struct sockaddr *)&qp->attrs.laddr, + &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&qp->attrs.raddr, - &rdma_ah_read_grh(ah_attr)->dgid); - dprint(DBG_QP, "laddr:0x%x\n", ntohl(qp->attrs.laddr.in.sin_addr.s_addr)); - dprint(DBG_QP, "raddr:0x%x\n", ntohl(qp->attrs.raddr.in.sin_addr.s_addr)); + &rdma_ah_read_grh(ah_attr)->dgid); + dprint(DBG_QP, "laddr:0x%x\n", + ntohl(qp->attrs.laddr.in.sin_addr.s_addr)); + dprint(DBG_QP, "raddr:0x%x\n", + ntohl(qp->attrs.raddr.in.sin_addr.s_addr)); } if (attr_mask & IB_QP_DEST_QPN) { - dprint(DBG_QP, "get remote qpn %u\n", attr->dest_qp_num); + dprint(DBG_QP, "get remote qpn %u\n", + attr->dest_qp_num); qp->attrs.remote_qp_num = attr->dest_qp_num; } @@ -1645,15 +1664,16 @@ int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, return 0; } -static int erdma_init_user_cq(struct ib_udata *udata, struct erdma_ucontext *uctx, - struct erdma_cq *cq, struct erdma_ureq_create_cq *ureq) +static int erdma_init_user_cq(struct ib_udata *udata, + struct erdma_ucontext *uctx, struct erdma_cq *cq, + struct erdma_ureq_create_cq *ureq) { struct erdma_dev *dev = to_edev(cq->ibcq.device); int ret; ret = get_mtt_entries(udata, uctx, &cq->user_cq.qbuf_mtt, ureq->qbuf_va, - ureq->qbuf_len, 0, ureq->qbuf_va, - SZ_64M - SZ_4K, 1, false); + ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K, + 1, false); if (ret) return ret; @@ -1749,6 +1769,9 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, if (ret) goto err_free_res; + hrtimer_init(&cq->dim.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + cq->dim.timer.function = cq_timer_fn; + return 0; err_free_res: @@ -1767,7 +1790,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, return ret; } -struct net_device *erdma_get_netdev(struct ib_device *device, u8 port_num) +struct net_device *erdma_get_netdev(struct ib_device *device, port_t port_num) { struct erdma_dev *edev = to_edev(device); @@ -1781,6 +1804,17 @@ void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext) { } +void erdma_set_mtu(struct erdma_dev *dev, u32 mtu) +{ + struct erdma_cmdq_config_mtu_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_CONF_MTU); + req.mtu = mtu; + + erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); +} + void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason) { struct ib_event event; @@ -1796,3 +1830,41 @@ void erdma_destroy_ah(struct ib_ah *ibah, u32 flags) { return; } + +int erdma_query_hw_stats(struct erdma_dev *dev) +{ + struct erdma_cmdq_query_stats_resp *stats; + struct erdma_cmdq_query_req req; + dma_addr_t dma_addr; + int err; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_COMMON, + CMDQ_OPCODE_GET_STATS); + + stats = dma_pool_alloc(dev->resp_pool, GFP_KERNEL, &dma_addr); + if (!stats) + return -ENOMEM; + + req.target_addr = dma_addr; + req.target_length = ERDMA_HW_RESP_SIZE; + /* Clear the magic fileds. */ + stats->hdr.magic = 0; + + err = erdma_post_cmd_wait(&dev->cmdq, &req, sizeof(req), NULL, NULL); + if (err) + goto out; + + if (stats->hdr.magic != 0x5566) { + err = -EINVAL; + goto out; + } + + memcpy(&dev->stats.value[ERDMA_STATS_TX_REQS_CNT], &stats->tx_req_cnt, + sizeof(__u64) * (ERDMA_STATS_RX_PPS_METER_DROP_CNT - + ERDMA_STATS_TX_REQS_CNT + 1)); + +out: + dma_pool_free(dev->resp_pool, stats, dma_addr); + + return err; +} diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index ade781ca6eac1f..87a8c652a42256 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* Authors: Cheng Xu */ /* Kai Shen */ @@ -7,20 +7,11 @@ #ifndef __ERDMA_VERBS_H__ #define __ERDMA_VERBS_H__ -#include - -#include -#include -#include - -//#include "kcompat.h" #include "erdma.h" -#include "erdma_cm.h" -#include "erdma_hw.h" /* RDMA Capability. */ #define ERDMA_MAX_PD (128 * 1024) -#define ERDMA_MAX_SEND_WR 4096 +#define ERDMA_MAX_SEND_WR 8192 #define ERDMA_MAX_ORD 128 #define ERDMA_MAX_IRD 128 #define ERDMA_MAX_SGE_RD 1 @@ -44,9 +35,8 @@ struct erdma_ucontext { struct ib_ucontext ibucontext; u32 sdb_type; - u32 sdb_idx; - u32 sdb_page_idx; - u32 sdb_page_off; + u32 sdb_bitmap_idx; + u32 sdb_entid; u64 sdb; u64 rdb; u64 cdb; @@ -80,16 +70,18 @@ struct erdma_pd { #define ERDMA_MR_INLINE_MTT 0 #define ERDMA_MR_INDIRECT_MTT 1 -#define ERDMA_MR_ACC_LR BIT(0) -#define ERDMA_MR_ACC_LW BIT(1) -#define ERDMA_MR_ACC_RR BIT(2) -#define ERDMA_MR_ACC_RW BIT(3) +#define ERDMA_MR_ACC_RA BIT(0) +#define ERDMA_MR_ACC_LR BIT(1) +#define ERDMA_MR_ACC_LW BIT(2) +#define ERDMA_MR_ACC_RR BIT(3) +#define ERDMA_MR_ACC_RW BIT(4) static inline u8 to_erdma_access_flags(int access) { return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) | (access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) | - (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0); + (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0) | + (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0); } struct erdma_mem { @@ -139,7 +131,6 @@ struct erdma_kqp { u16 sq_pi; u16 sq_ci; u64 *swr_tbl; - bool sq_shutdown; void *hw_sq_db; void *sq_buf; dma_addr_t sq_buf_dma_addr; @@ -149,12 +140,14 @@ struct erdma_kqp { u16 rq_pi; u16 rq_ci; u64 *rwr_tbl; - bool rq_shutdown; void *hw_rq_db; void *rq_buf; dma_addr_t rq_buf_dma_addr; void *rq_db_info; + dma_addr_t sq_db_info_dma_addr; + dma_addr_t rq_db_info_dma_addr; + u8 sig_all; }; @@ -170,7 +163,8 @@ enum erdma_qp_state { }; enum erdma_qp_flags { - ERDMA_QP_IN_DESTROY = (1 << 0) + ERDMA_QP_IN_DESTROY = (1 << 0), + ERDMA_QP_IN_FLUSHING = (1 << 1), }; enum erdma_qp_attr_mask { @@ -194,6 +188,8 @@ struct erdma_qp_attrs { u32 max_recv_sge; u32 cookie; u32 flags; + + u32 remote_cookie; #define ERDMA_QP_ACTIVE 0 #define ERDMA_QP_PASSIVE 1 u8 qp_type; @@ -229,6 +225,9 @@ struct erdma_qp { struct erdma_cq *rcq; struct erdma_qp_attrs attrs; + unsigned long flags; + struct delayed_work reflush_dwork; + }; struct erdma_kcq_info { @@ -249,6 +248,12 @@ struct erdma_ucq_info { dma_addr_t db_info_dma_addr; }; +struct erdma_dim { + enum ib_cq_notify_flags flags; + struct hrtimer timer; + u16 timeout; +}; + struct erdma_cq { struct ib_cq ibcq; u32 cqn; @@ -260,6 +265,7 @@ struct erdma_cq { struct erdma_kcq_info kern_cq; struct erdma_ucq_info user_cq; }; + struct erdma_dim dim; }; #define QP_ID(qp) ((qp)->ibqp.qp_num) @@ -312,6 +318,8 @@ to_emmap(struct rdma_user_mmap_entry *ibmmap) return container_of(ibmmap, struct erdma_user_mmap_entry, rdma_entry); } +enum hrtimer_restart cq_timer_fn(struct hrtimer *t); + int erdma_alloc_ucontext(struct ib_ucontext *ibctx, struct ib_udata *data); void erdma_dealloc_ucontext(struct ib_ucontext *ibctx); int erdma_query_device(struct ib_device *dev, struct ib_device_attr *attr, @@ -320,6 +328,7 @@ int erdma_get_port_immutable(struct ib_device *dev, port_t port, struct ib_port_immutable *ib_port_immutable); int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *data); + int erdma_query_port(struct ib_device *dev, port_t port, struct ib_port_attr *attr); int erdma_query_gid(struct ib_device *dev, port_t port, int idx, @@ -327,7 +336,7 @@ int erdma_query_gid(struct ib_device *dev, port_t port, int idx, int erdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *data); int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); struct ib_qp *erdma_kzalloc_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *attr, - struct ib_udata *data); + struct ib_udata *data); int erdma_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, struct ib_qp_init_attr *init_attr); int erdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, @@ -355,15 +364,17 @@ int erdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset); void erdma_disassociate_ucontext(struct ib_ucontext *ibcontext); void erdma_port_event(struct erdma_dev *dev, enum ib_event_type reason); +void erdma_set_mtu(struct erdma_dev *dev, u32 mtu); -struct net_device *erdma_get_netdev(struct ib_device *device, u8 port_num); +struct net_device *erdma_get_netdev(struct ib_device *device, port_t port_num); enum rdma_link_layer erdma_get_link_layer(struct ib_device *dev, port_t port_num); int erdma_query_pkey(struct ib_device *ibdev, port_t port, u16 index, - u16 *pkey); -void erdma_drain_rq(struct ib_qp *ibqp); -void erdma_drain_sq(struct ib_qp *ibqp); + u16 *pkey); void erdma_destroy_ah(struct ib_ah *ibah, u32 flags); +int erdma_modify_cq(struct ib_cq *ibcq, u16 cq_count, u16 cq_period); + +int erdma_query_hw_stats(struct erdma_dev *dev); #endif diff --git a/include/uapi/rdma/erdma-abi.h b/include/uapi/rdma/erdma-abi.h index cd409b9cfca897..45504641598358 100644 --- a/include/uapi/rdma/erdma-abi.h +++ b/include/uapi/rdma/erdma-abi.h @@ -11,8 +11,8 @@ #define ERDMA_ABI_VERSION 1 struct erdma_ureq_create_cq { - __u64 db_record_va; - __u64 qbuf_va; + __aligned_u64 db_record_va; + __aligned_u64 qbuf_va; __u32 qbuf_len; __u32 rsvd0; }; @@ -23,8 +23,8 @@ struct erdma_uresp_create_cq { }; struct erdma_ureq_create_qp { - __u64 db_record_va; - __u64 qbuf_va; + __aligned_u64 db_record_va; + __aligned_u64 qbuf_va; __u32 qbuf_len; __u32 rsvd0; }; @@ -40,10 +40,13 @@ struct erdma_uresp_alloc_ctx { __u32 dev_id; __u32 pad; __u32 sdb_type; - __u32 sdb_offset; - __u64 sdb; - __u64 rdb; - __u64 cdb; + __u32 sdb_entid; + __aligned_u64 sdb; + __aligned_u64 rdb; + __aligned_u64 cdb; + __u32 sdb_off; + __u32 rdb_off; + __u32 cdb_off; }; #endif