From f06c9e176655b6ecfa926be09f65cdb0abcfe268 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Mon, 15 Jun 2020 04:47:42 -0400 Subject: [PATCH] prov/verbs: CM req header is part of private data with AF_INET With AF_INET, the CM request header is part of the private data and it is defined in the first 36 bytes. To comply with the CM REQ private data format, user private data must be stored starting at byte 36. Worse, any data saved at the beginning of the private data buffer would be corrupted as the first byte would be overwritten with 0. References: - https://www.spinics.net/lists/linux-rdma/msg22381.html - IBTA Architecture Specification Vol 1. Annex A11: RDMA IP CM Service. Signed-off-by: Sylvain Didelot --- prov/verbs/src/fi_verbs.h | 2 ++ prov/verbs/src/verbs_cm.c | 30 +++++++++++++++++++----------- prov/verbs/src/verbs_ep.c | 2 +- prov/verbs/src/verbs_eq.c | 11 +++++++++++ 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/prov/verbs/src/fi_verbs.h b/prov/verbs/src/fi_verbs.h index e27b299ef3f..f622947fec9 100644 --- a/prov/verbs/src/fi_verbs.h +++ b/prov/verbs/src/fi_verbs.h @@ -113,6 +113,7 @@ #define VERBS_NO_COMP_FLAG ((uint64_t)-1) +#define VRB_CM_REQ_HDR_SIZE (36) #define VRB_CM_DATA_SIZE (56) #define VERBS_CM_DATA_SIZE (VRB_CM_DATA_SIZE - \ sizeof(struct vrb_cm_data_hdr)) @@ -575,6 +576,7 @@ struct vrb_ep { size_t rx_cq_size; struct rdma_conn_param conn_param; struct vrb_cm_data_hdr *cm_hdr; + void *cm_priv_data; }; diff --git a/prov/verbs/src/verbs_cm.c b/prov/verbs/src/verbs_cm.c index 174682eac64..a33827f9ada 100644 --- a/prov/verbs/src/verbs_cm.c +++ b/prov/verbs/src/verbs_cm.c @@ -125,11 +125,10 @@ vrb_msg_ep_prepare_cm_data(const void *param, size_t param_size, static inline void vrb_ep_prepare_rdma_cm_param(struct rdma_conn_param *conn_param, - struct vrb_cm_data_hdr *cm_hdr, - size_t cm_hdr_data_size) + void *priv_data, size_t priv_data_size) { - conn_param->private_data = cm_hdr; - conn_param->private_data_len = (uint8_t)cm_hdr_data_size; + conn_param->private_data = priv_data; + conn_param->private_data_len = (uint8_t)priv_data_size; conn_param->responder_resources = RDMA_MAX_RESP_RES; conn_param->initiator_depth = RDMA_MAX_INIT_DEPTH; conn_param->flow_control = 1; @@ -142,6 +141,9 @@ vrb_msg_ep_connect(struct fid_ep *ep_fid, const void *addr, { struct vrb_ep *ep = container_of(ep_fid, struct vrb_ep, util_ep.ep_fid); + size_t priv_data_len; + struct vrb_cm_data_hdr *cm_hdr; + off_t cm_hdr_offset = 0; int ret; if (OFI_UNLIKELY(paramlen > VERBS_CM_DATA_SIZE)) @@ -153,13 +155,19 @@ vrb_msg_ep_connect(struct fid_ep *ep_fid, const void *addr, return ret; } - ep->cm_hdr = malloc(sizeof(*(ep->cm_hdr)) + paramlen); - if (!ep->cm_hdr) + if (ep->id->route.addr.src_addr.sa_family == AF_IB) + cm_hdr_offset = VRB_CM_REQ_HDR_SIZE; + + priv_data_len = cm_hdr_offset + sizeof(*cm_hdr) + paramlen; + ep->cm_priv_data = malloc(priv_data_len); + if (!ep->cm_priv_data) return -FI_ENOMEM; - vrb_msg_ep_prepare_cm_data(param, paramlen, ep->cm_hdr); - vrb_ep_prepare_rdma_cm_param(&ep->conn_param, ep->cm_hdr, - sizeof(*(ep->cm_hdr)) + paramlen); + cm_hdr = (void*)((char *)ep->cm_priv_data + cm_hdr_offset); + + vrb_msg_ep_prepare_cm_data(param, paramlen, cm_hdr); + vrb_ep_prepare_rdma_cm_param(&ep->conn_param, ep->cm_priv_data, + priv_data_len); ep->conn_param.retry_count = 15; if (ep->srq_ep) @@ -170,8 +178,8 @@ vrb_msg_ep_connect(struct fid_ep *ep_fid, const void *addr, FI_WARN(&vrb_prov, FI_LOG_EP_CTRL, "rdma_resolve_route failed: %s (%d)\n", strerror(-ret), -ret); - free(ep->cm_hdr); - ep->cm_hdr = NULL; + free(ep->cm_priv_data); + ep->cm_priv_data = NULL; return ret; } return 0; diff --git a/prov/verbs/src/verbs_ep.c b/prov/verbs/src/verbs_ep.c index f7b47960c9b..76af19ccf21 100644 --- a/prov/verbs/src/verbs_ep.c +++ b/prov/verbs/src/verbs_ep.c @@ -272,7 +272,7 @@ static int vrb_close_free_ep(struct vrb_ep *ep) free(ep->util_ep.ep_fid.msg); ep->util_ep.ep_fid.msg = NULL; - free(ep->cm_hdr); + free(ep->cm_priv_data); if (ep->util_ep.rx_cq) { cq = container_of(ep->util_ep.rx_cq, struct vrb_cq, util_cq); diff --git a/prov/verbs/src/verbs_eq.c b/prov/verbs/src/verbs_eq.c index 845adb5b618..260817fae34 100644 --- a/prov/verbs/src/verbs_eq.c +++ b/prov/verbs/src/verbs_eq.c @@ -281,6 +281,15 @@ static inline int vrb_eq_copy_event_data(struct fi_eq_cm_entry *entry, return datalen; } +static void vrb_eq_skip_cm_req_hdr(const void **priv_data, + size_t *priv_data_len) +{ + if (*priv_data_len > VRB_CM_REQ_HDR_SIZE) { + *priv_data = (void*)((char *)*priv_data + VRB_CM_REQ_HDR_SIZE); + *priv_data_len -= VRB_CM_REQ_HDR_SIZE; + } +} + static void vrb_eq_skip_xrc_cm_data(const void **priv_data, size_t *priv_data_len) { @@ -850,6 +859,8 @@ vrb_eq_cm_process_event(struct vrb_eq *eq, &priv_data, &priv_datalen); if (ret == -FI_EAGAIN || *event == FI_CONNECTED) goto ack; + } else if (cma_event->id->route.addr.src_addr.sa_family == AF_IB) { + vrb_eq_skip_cm_req_hdr(&priv_data, &priv_datalen); } break; case RDMA_CM_EVENT_CONNECT_RESPONSE: