Mellanox · adanos911 · Feb 12, 2020 · Feb 12, 2020 · Feb 12, 2020
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,16 @@
 
 ## v20.04: (Upcoming Release)
 
+### rpc
+
+Add optional 'no_wr_batching' parameter to 'nvmf_create_transport' RPC method.
+
+### nvmf
+
+Add 'no_wr_batching' parameter in 'spdk_nvmf_transport_opts' for the ability disable WR batching RDMA.
+
+Add NoWRBatching option in [Transport] section, this can be used for disable RDMA WR batching.
+
 ### vmd
 A new function, `spdk_vmd_fini`, has been added. It releases all resources acquired by the VMD
 library through the `spdk_vmd_init` call.

diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md
@@ -3844,6 +3844,7 @@ no_srq                      | Optional | boolean | Disable shared receive queue
 c2h_success                 | Optional | boolean | Disable C2H success optimization (TCP only)
 dif_insert_or_strip         | Optional | boolean | Enable DIF insert for write I/O and DIF strip for read I/O DIF (TCP only)
 sock_priority               | Optional | number  | The socket priority of the connection owned by this transport (TCP only)
+no_wr_batching              | Optional | boolean | Disable work requests batching (RDMA only)
 
 ### Example:
 

diff --git a/etc/spdk/nvmf.conf.in b/etc/spdk/nvmf.conf.in
@@ -111,6 +111,9 @@
   # Set the maximum number outstanding I/O per shared receive queue. Relevant only for RDMA transport
   #MaxSRQDepth 4096
 
+  # Disable batching for RDMA requests
+  #NoWRBatching False
+
 [Transport]
   # Set TCP transport type.
   Type TCP

diff --git a/include/spdk/nvmf.h b/include/spdk/nvmf.h
@@ -83,6 +83,7 @@ struct spdk_nvmf_transport_opts {
 	bool		no_srq;
 	bool		c2h_success;
 	bool		dif_insert_or_strip;
+	bool            no_wr_batching;
 	uint32_t	sock_priority;
 };
 

diff --git a/lib/nvmf/nvmf_rpc.c b/lib/nvmf/nvmf_rpc.c
@@ -1602,6 +1602,10 @@ static const struct spdk_json_object_decoder nvmf_rpc_create_transport_decoder[]
 		"tgt_name", offsetof(struct nvmf_rpc_create_transport_ctx, tgt_name),
 		spdk_json_decode_string, true
 	},
+	{
+		"no_wr_batching", offsetof(struct nvmf_rpc_create_transport_ctx, opts.no_wr_batching),
+		spdk_json_decode_bool, false
+	},
 };
 
 static void
@@ -1745,6 +1749,7 @@ dump_nvmf_transport(struct spdk_json_write_ctx *w, struct spdk_nvmf_transport *t
 	if (type == SPDK_NVME_TRANSPORT_RDMA) {
 		spdk_json_write_named_uint32(w, "max_srq_depth", opts->max_srq_depth);
 		spdk_json_write_named_bool(w, "no_srq", opts->no_srq);
+		spdk_json_write_named_bool(w, "no_wr_batching", opts->no_wr_batching);
 	} else if (type == SPDK_NVME_TRANSPORT_TCP) {
 		spdk_json_write_named_bool(w, "c2h_success", opts->c2h_success);
 		spdk_json_write_named_uint32(w, "sock_priority", opts->sock_priority);

diff --git a/lib/nvmf/rdma.c b/lib/nvmf/rdma.c
@@ -522,6 +522,14 @@ struct spdk_nvmf_rdma_transport {
 static inline void
 spdk_nvmf_rdma_start_disconnect(struct spdk_nvmf_rdma_qpair *rqpair);
 
+static void
+_poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport,
+		     struct spdk_nvmf_rdma_poller *rpoller);
+
+static void
+_poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport,
+		     struct spdk_nvmf_rdma_poller *rpoller);
+
 static inline int
 spdk_nvmf_rdma_check_ibv_state(enum ibv_qp_state state)
 {
@@ -1105,6 +1113,7 @@ static void
 nvmf_rdma_qpair_queue_recv_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *first)
 {
 	struct ibv_recv_wr *last;
+	struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport, struct spdk_nvmf_rdma_transport, transport);
 
 	last = first;
 	while (last->next != NULL) {
@@ -1121,6 +1130,10 @@ nvmf_rdma_qpair_queue_recv_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_r
 		rqpair->resources->recvs_to_post.last->next = first;
 		rqpair->resources->recvs_to_post.last = last;
 	}
+
+	if (rtransport->transport.opts.no_wr_batching) {
+		_poller_submit_recvs(rtransport, rqpair->poller);
+	}
 }
 
 /* Append the given send wr structure to the qpair's outstanding sends list. */
@@ -1129,6 +1142,7 @@ static void
 nvmf_rdma_qpair_queue_send_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_send_wr *first)
 {
 	struct ibv_send_wr *last;
+	struct spdk_nvmf_rdma_transport *rtransport = SPDK_CONTAINEROF(rqpair->qpair.transport, struct spdk_nvmf_rdma_transport, transport);
 
 	last = first;
 	while (last->next != NULL) {
@@ -1143,6 +1157,10 @@ nvmf_rdma_qpair_queue_send_wrs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_s
 		rqpair->sends_to_post.last->next = first;
 		rqpair->sends_to_post.last = last;
 	}
+
+	if (rtransport->transport.opts.no_wr_batching) {
+		_poller_submit_sends(rtransport, rqpair->poller);
+	}
 }
 
 static int
@@ -2295,6 +2313,7 @@ spdk_nvmf_rdma_request_process(struct spdk_nvmf_rdma_transport *rtransport,
 #define SPDK_NVMF_RDMA_DEFAULT_BUFFER_CACHE_SIZE 32
 #define SPDK_NVMF_RDMA_DEFAULT_NO_SRQ false
 #define SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP false
+#define SPDK_NVMF_RDMA_DEFAULT_NO_WR_BATCHING false
 
 static void
 spdk_nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts)
@@ -2310,6 +2329,7 @@ spdk_nvmf_rdma_opts_init(struct spdk_nvmf_transport_opts *opts)
 	opts->max_srq_depth =		SPDK_NVMF_RDMA_DEFAULT_SRQ_DEPTH;
 	opts->no_srq =			SPDK_NVMF_RDMA_DEFAULT_NO_SRQ;
 	opts->dif_insert_or_strip =	SPDK_NVMF_RDMA_DIF_INSERT_OR_STRIP;
+	opts->no_wr_batching =          SPDK_NVMF_RDMA_DEFAULT_NO_WR_BATCHING;
 }
 
 const struct spdk_mem_map_ops g_nvmf_rdma_map_ops = {
@@ -2370,7 +2390,8 @@ spdk_nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
 		     "  Transport opts:  max_ioq_depth=%d, max_io_size=%d,\n"
 		     "  max_qpairs_per_ctrlr=%d, io_unit_size=%d,\n"
 		     "  in_capsule_data_size=%d, max_aq_depth=%d,\n"
-		     "  num_shared_buffers=%d, max_srq_depth=%d, no_srq=%d\n",
+		     "  num_shared_buffers=%d, max_srq_depth=%d, no_srq=%d,\n"
+		     "  no_wr_batching=%d\n",
 		     opts->max_queue_depth,
 		     opts->max_io_size,
 		     opts->max_qpairs_per_ctrlr,
@@ -2379,7 +2400,8 @@ spdk_nvmf_rdma_create(struct spdk_nvmf_transport_opts *opts)
 		     opts->max_aq_depth,
 		     opts->num_shared_buffers,
 		     opts->max_srq_depth,
-		     opts->no_srq);
+		     opts->no_srq,
+		     opts->no_wr_batching);
 
 	/* I/O unit size cannot be larger than max I/O size */
 	if (opts->io_unit_size > opts->max_io_size) {
@@ -3725,7 +3747,7 @@ _qp_reset_failed_recvs(struct spdk_nvmf_rdma_qpair *rqpair, struct ibv_recv_wr *
 	spdk_nvmf_rdma_start_disconnect(rqpair);
 }
 
-static void
+void
 _poller_submit_recvs(struct spdk_nvmf_rdma_transport *rtransport,
 		     struct spdk_nvmf_rdma_poller *rpoller)
 {
@@ -3818,7 +3840,7 @@ _qp_reset_failed_sends(struct spdk_nvmf_rdma_transport *rtransport,
 
 }
 
-static void
+void
 _poller_submit_sends(struct spdk_nvmf_rdma_transport *rtransport,
 		     struct spdk_nvmf_rdma_poller *rpoller)
 {

diff --git a/module/event/subsystems/nvmf/conf.c b/module/event/subsystems/nvmf/conf.c
@@ -642,6 +642,8 @@ spdk_nvmf_parse_transport(struct spdk_nvmf_parse_transport_ctx *ctx)
 		}
 		bval = spdk_conf_section_get_boolval(ctx->sp, "NoSRQ", false);
 		opts.no_srq = bval;
+		bval = spdk_conf_section_get_boolval(ctx->sp, "NoWRBatching", false);
+		opts.no_wr_batching = bval;
 	}
 
 	if (trtype == SPDK_NVME_TRANSPORT_TCP) {

diff --git a/scripts/rpc.py b/scripts/rpc.py
@@ -1684,7 +1684,8 @@ def nvmf_create_transport(args):
                                        no_srq=args.no_srq,
                                        c2h_success=args.c2h_success,
                                        dif_insert_or_strip=args.dif_insert_or_strip,
-                                       sock_priority=args.sock_priority)
+                                       sock_priority=args.sock_priority,
+                                       no_wr_batching=args.no_wr_batching)
 
     p = subparsers.add_parser('nvmf_create_transport', help='Create NVMf transport')
     p.add_argument('-t', '--trtype', help='Transport type (ex. RDMA)', type=str, required=True)
@@ -1702,6 +1703,7 @@ def nvmf_create_transport(args):
     p.add_argument('-o', '--c2h-success', action='store_false', help='Disable C2H success optimization. Relevant only for TCP transport')
     p.add_argument('-f', '--dif-insert-or-strip', action='store_true', help='Enable DIF insert/strip. Relevant only for TCP transport')
     p.add_argument('-y', '--sock-priority', help='The sock priority of the tcp connection. Relevant only for TCP transport', type=int)
+    p.add_argument('-b', '--no-wr-batching', action='store_false', help='Disable work requests batching. Relevant only for RDMA transport', default=False)
     p.set_defaults(func=nvmf_create_transport)
 
     def nvmf_get_transports(args):

diff --git a/scripts/rpc/nvmf.py b/scripts/rpc/nvmf.py
@@ -106,7 +106,8 @@ def nvmf_create_transport(client,
                           no_srq=False,
                           c2h_success=True,
                           dif_insert_or_strip=None,
-                          sock_priority=None):
+                          sock_priority=None,
+                          no_wr_batching=None):
     """NVMf Transport Create options.
 
     Args:
@@ -123,7 +124,7 @@ def nvmf_create_transport(client,
         no_srq: Boolean flag to disable SRQ even for devices that support it - RDMA specific (optional)
         c2h_success: Boolean flag to disable the C2H success optimization - TCP specific (optional)
         dif_insert_or_strip: Boolean flag to enable DIF insert/strip for I/O - TCP specific (optional)
-
+        no_wr_batching: Boolean flag to disable work requests batching - RDMA specific (optional)
     Returns:
         True or False
     """
@@ -158,6 +159,8 @@ def nvmf_create_transport(client,
         params['dif_insert_or_strip'] = dif_insert_or_strip
     if sock_priority:
         params['sock_priority'] = sock_priority
+    if no_wr_batching is not None:
+	params['no_wr_batching'] = no_wr_batching
     return client.call('nvmf_create_transport', params)