From e80a5d89c89a9f7bc5ac5da1426b9c84fc04805f Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Thu, 28 May 2020 09:33:17 -0400 Subject: [PATCH 01/13] fabtests: Add the option (-F) to select the address format to use Signed-off-by: Sylvain Didelot --- fabtests/benchmarks/msg_bw.c | 1 + fabtests/benchmarks/msg_pingpong.c | 1 + fabtests/benchmarks/rma_bw.c | 1 + fabtests/common/shared.c | 11 +++++++++++ fabtests/include/shared.h | 8 +++++--- 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/fabtests/benchmarks/msg_bw.c b/fabtests/benchmarks/msg_bw.c index f273d6a1f54..2e36d0ff1d3 100644 --- a/fabtests/benchmarks/msg_bw.c +++ b/fabtests/benchmarks/msg_bw.c @@ -107,6 +107,7 @@ int main(int argc, char **argv) hints->domain_attr->resource_mgmt = FI_RM_ENABLED; hints->domain_attr->mr_mode = opts.mr_mode; hints->domain_attr->threading = FI_THREAD_DOMAIN; + hints->addr_format = opts.address_format; ret = run(); diff --git a/fabtests/benchmarks/msg_pingpong.c b/fabtests/benchmarks/msg_pingpong.c index ef342eae898..a0386472429 100644 --- a/fabtests/benchmarks/msg_pingpong.c +++ b/fabtests/benchmarks/msg_pingpong.c @@ -107,6 +107,7 @@ int main(int argc, char **argv) hints->caps = FI_MSG; hints->domain_attr->mr_mode = opts.mr_mode; hints->domain_attr->threading = FI_THREAD_DOMAIN; + hints->addr_format = opts.address_format; ret = run(); diff --git a/fabtests/benchmarks/rma_bw.c b/fabtests/benchmarks/rma_bw.c index e4351c89bb2..a8ace33bcc1 100644 --- a/fabtests/benchmarks/rma_bw.c +++ b/fabtests/benchmarks/rma_bw.c @@ -95,6 +95,7 @@ int main(int argc, char **argv) hints->domain_attr->resource_mgmt = FI_RM_ENABLED; hints->mode = FI_CONTEXT; hints->domain_attr->threading = FI_THREAD_DOMAIN; + hints->addr_format = opts.address_format; while ((op = getopt(argc, argv, "ho:" CS_OPTS INFO_OPTS BENCHMARK_OPTS)) != -1) { switch (op) { diff --git a/fabtests/common/shared.c b/fabtests/common/shared.c index ba32da25238..51e83c77329 100644 --- a/fabtests/common/shared.c +++ b/fabtests/common/shared.c @@ -2764,6 +2764,7 @@ void ft_addr_usage() "over the, optional, port"); FT_PRINT_OPTS_USAGE("-C ", "number of connections to accept before " "cleaning up a server"); + FT_PRINT_OPTS_USAGE("-F ", "Address format (default:FI_FORMAT_UNSPEC)"); } void ft_usage(char *name, char *desc) @@ -2923,6 +2924,16 @@ void ft_parse_addr_opts(int op, char *optarg, struct ft_opts *opts) else opts->oob_port = default_oob_port; break; + case 'F': + if (!strncasecmp("fi_sockaddr_in", optarg, 14)) + opts->address_format = FI_SOCKADDR_IN; + else if (!strncasecmp("fi_sockaddr_in6", optarg, 15)) + opts->address_format = FI_SOCKADDR_IN6; + else if (!strncasecmp("fi_sockaddr_ib", optarg, 14)) + opts->address_format = FI_SOCKADDR_IB; + else if (!strncasecmp("fi_sockaddr", optarg, 11)) /* keep me last */ + opts->address_format = FI_SOCKADDR; + break; case 'C': opts->options |= FT_OPT_SERVER_PERSIST; opts->num_connections = atoi(optarg); diff --git a/fabtests/include/shared.h b/fabtests/include/shared.h index 85e27bbeb1f..ebb6d4daefb 100644 --- a/fabtests/include/shared.h +++ b/fabtests/include/shared.h @@ -165,13 +165,14 @@ struct ft_opts { char *oob_port; int argc; int num_connections; + int address_format; uint64_t mr_mode; /* Fail if the selected provider does not support FI_MSG_PREFIX. */ int force_prefix; enum fi_hmem_iface iface; uint64_t device; - + char **argv; }; @@ -241,7 +242,7 @@ extern int ft_parent_proc; extern int ft_socket_pair[2]; extern int sock; extern int listen_sock; -#define ADDR_OPTS "B:P:s:a:b::E::C:" +#define ADDR_OPTS "B:P:s:a:b::E::C:F:" #define FAB_OPTS "f:d:p:D:i:H" #define INFO_OPTS FAB_OPTS "e:M:" #define CS_OPTS ADDR_OPTS "I:S:mc:t:w:l" @@ -265,7 +266,8 @@ extern char default_port[8]; .mr_mode = FI_MR_LOCAL | OFI_MR_BASIC_MAP, \ .iface = FI_HMEM_SYSTEM, \ .device = 0, \ - .argc = argc, .argv = argv \ + .argc = argc, .argv = argv, \ + .address_format = FI_FORMAT_UNSPEC \ } #define FT_STR_LEN 32 From c7b0b3bde0e6b712ee8119384807fede1d352f8b Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Mon, 29 Jun 2020 04:12:14 -0400 Subject: [PATCH 02/13] fabtests: Add the option -F to the man page Signed-off-by: Sylvain Didelot --- fabtests/man/fabtests.7.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fabtests/man/fabtests.7.md b/fabtests/man/fabtests.7.md index 72592563003..f64b2353900 100644 --- a/fabtests/man/fabtests.7.md +++ b/fabtests/man/fabtests.7.md @@ -382,6 +382,9 @@ the list available for that test. *-s
* : Specifies the address of the local endpoint. +*-F +: Specifies the address format. + *-b[=oob_port]* : Enables out-of-band (via sockets) address exchange and test synchronization. A port for the out-of-band connection may be specified From 534f1cb964145fbc9b6d8bff355587fc50464357 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Wed, 3 Jun 2020 10:23:14 -0400 Subject: [PATCH 03/13] common: ofi_addr_format() is now public This patch makes the call to ofi_addr_fomat public (e.g., not static anymore), so the providers can use it. Signed-off-by: Sylvain Didelot --- include/ofi_net.h | 1 + src/common.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/include/ofi_net.h b/include/ofi_net.h index 7a924df2ac5..6d06c1488a5 100644 --- a/include/ofi_net.h +++ b/include/ofi_net.h @@ -323,6 +323,7 @@ size_t ofi_mask_addr(struct sockaddr *maskaddr, const struct sockaddr *srcaddr, */ const char *ofi_straddr(char *buf, size_t *len, uint32_t addr_format, const void *addr); +uint32_t ofi_addr_format(const char *str); /* Returns allocated address to caller. Caller must free. */ int ofi_str_toaddr(const char *str, uint32_t *addr_format, diff --git a/src/common.c b/src/common.c index cf65c8024cd..f8729eb750e 100644 --- a/src/common.c +++ b/src/common.c @@ -380,7 +380,7 @@ const char *ofi_straddr(char *buf, size_t *len, return buf; } -static uint32_t ofi_addr_format(const char *str) +uint32_t ofi_addr_format(const char *str) { char fmt[16]; int ret; From 39ec048e642d2048a8dcd26133b057325d13e6e6 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Wed, 3 Jun 2020 10:28:33 -0400 Subject: [PATCH 04/13] prov/verbs: Select the correct rdmacm port space Signed-off-by: Sylvain Didelot --- prov/verbs/src/fi_verbs.c | 2 +- prov/verbs/src/fi_verbs.h | 1 + prov/verbs/src/verbs_ep.c | 5 +++-- prov/verbs/src/verbs_info.c | 14 +++++++++++++- 4 files changed, 18 insertions(+), 4 deletions(-) diff --git a/prov/verbs/src/fi_verbs.c b/prov/verbs/src/fi_verbs.c index e4a4ee8b86f..8d2cbbcca6b 100644 --- a/prov/verbs/src/fi_verbs.c +++ b/prov/verbs/src/fi_verbs.c @@ -165,7 +165,7 @@ int vrb_get_rai_id(const char *node, const char *service, uint64_t flags, if (ret) return ret; - ret = rdma_create_id(NULL, id, NULL, RDMA_PS_TCP); + ret = rdma_create_id(NULL, id, NULL, vrb_get_port_space(hints)); if (ret) { VERBS_INFO_ERRNO(FI_LOG_FABRIC, "rdma_create_id", errno); ret = -errno; diff --git a/prov/verbs/src/fi_verbs.h b/prov/verbs/src/fi_verbs.h index 80af7fecb82..670eff38024 100644 --- a/prov/verbs/src/fi_verbs.h +++ b/prov/verbs/src/fi_verbs.h @@ -768,6 +768,7 @@ int vrb_fi_to_rai(const struct fi_info *fi, uint64_t flags, int vrb_get_matching_info(uint32_t version, const struct fi_info *hints, struct fi_info **info, const struct fi_info *verbs_info, uint8_t passive); +int vrb_get_port_space(const struct fi_info *info); void vrb_alter_info(const struct fi_info *hints, struct fi_info *info); struct verbs_ep_domain { diff --git a/prov/verbs/src/verbs_ep.c b/prov/verbs/src/verbs_ep.c index 18348cc2dbd..4f502abcb96 100644 --- a/prov/verbs/src/verbs_ep.c +++ b/prov/verbs/src/verbs_ep.c @@ -1046,7 +1046,7 @@ int vrb_open_ep(struct fid_domain *domain, struct fi_info *info, if (!info->handle) { /* Only RC, XRC active RDMA CM ID is created at connect */ if (!(dom->flags & VRB_USE_XRC)) { - ret = vrb_create_ep(info, RDMA_PS_TCP, + ret = vrb_create_ep(info, vrb_get_port_space(info), &ep->id); if (ret) goto err1; @@ -1252,7 +1252,8 @@ int vrb_passive_ep(struct fid_fabric *fabric, struct fi_info *info, _pep->info->dest_addrlen = 0; } - ret = rdma_create_id(NULL, &_pep->id, &_pep->pep_fid.fid, RDMA_PS_TCP); + ret = rdma_create_id(NULL, &_pep->id, &_pep->pep_fid.fid, + vrb_get_port_space(info)); if (ret) { VERBS_INFO(FI_LOG_DOMAIN, "Unable to create PEP rdma_cm_id\n"); goto err2; diff --git a/prov/verbs/src/verbs_info.c b/prov/verbs/src/verbs_info.c index 7cbf05ffa2c..79eae023c77 100644 --- a/prov/verbs/src/verbs_info.c +++ b/prov/verbs/src/verbs_info.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "fi_verbs.h" @@ -302,7 +303,6 @@ int vrb_fi_to_rai(const struct fi_info *fi, uint64_t flags, rai->ai_flags |= RAI_NUMERICHOST; rai->ai_qp_type = IBV_QPT_RC; - rai->ai_port_space = RDMA_PS_TCP; if (!fi) return 0; @@ -310,18 +310,22 @@ int vrb_fi_to_rai(const struct fi_info *fi, uint64_t flags, switch(fi->addr_format) { case FI_SOCKADDR_IN: case FI_FORMAT_UNSPEC: + rai->ai_port_space = RDMA_PS_TCP; rai->ai_family = AF_INET; rai->ai_flags |= RAI_FAMILY; break; case FI_SOCKADDR_IN6: + rai->ai_port_space = RDMA_PS_TCP; rai->ai_family = AF_INET6; rai->ai_flags |= RAI_FAMILY; break; case FI_SOCKADDR_IB: + rai->ai_port_space = RDMA_PS_IB; rai->ai_family = AF_IB; rai->ai_flags |= RAI_FAMILY; break; case FI_SOCKADDR: + rai->ai_port_space = RDMA_PS_TCP; if (fi->src_addrlen) { rai->ai_family = ((struct sockaddr *)fi->src_addr)->sa_family; rai->ai_flags |= RAI_FAMILY; @@ -983,6 +987,14 @@ static int vrb_ifa_rdma_info(const struct ifaddrs *ifa, char **dev_name, return ret; } +int vrb_get_port_space(const struct fi_info *info) +{ + if (info != NULL && info->addr_format == FI_SOCKADDR_IB) + return RDMA_PS_IB; + else + return RDMA_PS_TCP; +} + /* Builds a list of interfaces that correspond to active verbs devices */ static int vrb_getifaddrs(struct dlist_entry *verbs_devs) { From 3481b4dc64c909ca49ebdf8ff565a1e67ef037ef Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Wed, 3 Jun 2020 10:30:05 -0400 Subject: [PATCH 05/13] ofi_net.h: Define OFI_IB_IP_{PORT,PS}_MASK Values are taken from rdma_cma.h. Next patches in the series require those definitions in the common Libfabric code. Signed-off-by: Sylvain Didelot --- include/ofi_net.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/ofi_net.h b/include/ofi_net.h index 6d06c1488a5..d204a1c3b9c 100644 --- a/include/ofi_net.h +++ b/include/ofi_net.h @@ -127,6 +127,10 @@ int ofi_discard_socket(SOCKET sock, size_t len); #define OFI_ADDRSTRLEN (INET6_ADDRSTRLEN + 50) +/* values taken from librdmacm/rdma_cma.h */ +#define OFI_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL +#define OFI_IB_IP_PORT_MASK 0x000000000000FFFFULL + union ofi_sock_ip { struct sockaddr sa; struct sockaddr_in sin; From 58139d89f5b3f8d377629e894f00d0d464fe5ea3 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Tue, 30 Jun 2020 06:05:47 -0400 Subject: [PATCH 06/13] windows/osd.h: Map strtok_r() to strtok_s() Windows doesn't define strtok_r() but strtok_s() can be used as a replacement. Signed-off-by: Sylvain Didelot --- include/windows/osd.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/windows/osd.h b/include/windows/osd.h index 9ee9d0d60e5..d3cabcebb65 100644 --- a/include/windows/osd.h +++ b/include/windows/osd.h @@ -261,6 +261,7 @@ do \ #define strcasecmp _stricmp #define snprintf _snprintf #define sleep(x) Sleep(x * 1000) +#define strtok_r strtok_s #define __PRI64_PREFIX "ll" From 04ce93d9c66a6b01484d5810aeab73ebf54c7a66 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Wed, 3 Jun 2020 10:32:47 -0400 Subject: [PATCH 07/13] prov/verbs: Add functions to convert fi_sockaddr_ib addresses This patch defines 2 new formats for fi_sockaddr_if addresses: fi_sockaddr_ib://[]::: and: fi_sockaddr_ib://[]:::: Change-Id: If7900b71e01adbed1510f35fbdd298800ca75758 Signed-off-by: Sylvain Didelot --- include/ofi_net.h | 10 +++++ src/common.c | 111 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 120 insertions(+), 1 deletion(-) diff --git a/include/ofi_net.h b/include/ofi_net.h index d204a1c3b9c..eb5abeacbb0 100644 --- a/include/ofi_net.h +++ b/include/ofi_net.h @@ -131,6 +131,16 @@ int ofi_discard_socket(SOCKET sock, size_t len); #define OFI_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL #define OFI_IB_IP_PORT_MASK 0x000000000000FFFFULL +struct ofi_sockaddr_ib { + unsigned short int sib_family; /* AF_IB */ + uint16_t sib_pkey; + uint32_t sib_flowinfo; + uint8_t sib_addr[16]; + uint64_t sib_sid; + uint64_t sib_sid_mask; + uint64_t sib_scope_id; +}; + union ofi_sock_ip { struct sockaddr sa; struct sockaddr_in sin; diff --git a/src/common.c b/src/common.c index f8729eb750e..b06a4af018c 100644 --- a/src/common.c +++ b/src/common.c @@ -284,6 +284,7 @@ const char *ofi_straddr(char *buf, size_t *len, const struct sockaddr *sock_addr; const struct sockaddr_in6 *sin6; const struct sockaddr_in *sin; + const struct ofi_sockaddr_ib *sib; char str[INET6_ADDRSTRLEN + 8]; size_t size; @@ -332,7 +333,19 @@ const char *ofi_straddr(char *buf, size_t *len, str, *((uint16_t *)addr + 8), *((uint32_t *)addr + 5)); break; case FI_SOCKADDR_IB: - size = snprintf(buf, *len, "fi_sockaddr_ib://%p", addr); + sib = addr; + memset(str, 0, sizeof(str)); + if (!inet_ntop(AF_INET6, sib->sib_addr, str, INET6_ADDRSTRLEN)) + return NULL; + + size = snprintf(buf, *len, "fi_sockaddr_ib://[%s]" /* GID */ + ":0x%" PRIx16 /* P_Key */ + ":0x%" PRIx16 /* port space */ + ":0x%" PRIx8 /* Scope ID */, + str, /* GID */ + ntohs(sib->sib_pkey), /* P_Key */ + (uint16_t)(ntohll(sib->sib_sid) >> 16) & 0xfff, /* port space */ + (uint8_t)ntohll(sib->sib_scope_id) & 0xff); break; case FI_ADDR_PSMX: size = snprintf(buf, *len, "fi_addr_psmx://%" PRIx64, @@ -476,6 +489,101 @@ static int ofi_str_to_ib_ud(const char *str, void **addr, size_t *len) return -FI_EINVAL; } +static int ofi_str_to_sib(const char *str, void **addr, size_t *len) +{ + int ret; + char *tok, *endptr, *saveptr; + struct ofi_sockaddr_ib *sib; + uint16_t pkey; + uint16_t ps; + uint64_t scope_id; + uint16_t port; + char gid[64 + 1]; + char extra_str[64 + 1]; + + memset(gid, 0, sizeof(gid)); + + ret = sscanf(str, "%*[^:]://[%64[^]]]" /* GID */ + ":%64s", /* P_Key : port_space : Scope ID : port */ + gid, extra_str); + if (ret != 2) { + FI_WARN(&core_prov, FI_LOG_CORE, + "Invalid GID in address: %s\n", str); + return -FI_EINVAL; + } + + tok = strtok_r(extra_str, ":", &saveptr); + if (!tok) { + FI_WARN(&core_prov, FI_LOG_CORE, + "Invalid pkey in address: %s\n", str); + return -FI_EINVAL; + } + + pkey = strtol(tok, &endptr, 0); + if (*endptr) { + FI_WARN(&core_prov, FI_LOG_CORE, + "Invalid pkey in address: %s\n", str); + return -FI_EINVAL; + } + + tok = strtok_r(NULL, ":", &saveptr); + if (!tok) { + FI_WARN(&core_prov, FI_LOG_CORE, + "Invalid port space in address: %s\n", str); + return -FI_EINVAL; + } + + ps = strtol(tok, &endptr, 0); + if (*endptr) { + FI_WARN(&core_prov, FI_LOG_CORE, + "Invalid port space in address: %s\n", str); + return -FI_EINVAL; + } + + tok = strtok_r(NULL, ":", &saveptr); + if (!tok) { + FI_WARN(&core_prov, FI_LOG_CORE, + "Invalid scope id in address: %s\n", str); + return -FI_EINVAL; + } + + scope_id = strtol(tok, &endptr, 0); + if (*endptr) { + FI_WARN(&core_prov, FI_LOG_CORE, + "Invalid scope id in address: %s\n", str); + return -FI_EINVAL; + } + + /* Port is optional */ + tok = strtok_r(NULL, ":", &saveptr); + if (tok) + port = strtol(tok, &endptr, 0); + else + port = 0; + + *len = sizeof(struct ofi_sockaddr_ib); + *addr = calloc(1, *len); + if (!*addr) + return -FI_ENOMEM; + + sib = (struct ofi_sockaddr_ib *)(*addr); + + if (inet_pton(AF_INET6, gid, sib->sib_addr) > 0) { + sib->sib_family = AF_IB; + sib->sib_pkey = htons(pkey); + if (ps && port) { + sib->sib_sid = htonll(((uint64_t) ps << 16) + port); + sib->sib_sid_mask = htonll(OFI_IB_IP_PS_MASK | + OFI_IB_IP_PORT_MASK); + } + sib->sib_scope_id = htonll(scope_id); + return FI_SUCCESS; + } + + free(*addr); + return -FI_EINVAL; +} + static int ofi_str_to_efa(const char *str, void **addr, size_t *len) { char gid[INET6_ADDRSTRLEN]; @@ -691,6 +799,7 @@ int ofi_str_toaddr(const char *str, uint32_t *addr_format, case FI_ADDR_EFA: return ofi_str_to_efa(str, addr, len); case FI_SOCKADDR_IB: + return ofi_str_to_sib(str, addr, len); case FI_ADDR_GNI: case FI_ADDR_BGQ: case FI_ADDR_MLX: From ddf2b15a7d040f297910eb7449664442b3c4edee Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Wed, 3 Jun 2020 10:37:42 -0400 Subject: [PATCH 08/13] prov/verbs: Generate native IB addresses for all pairs ... ... in the system, independent of whether ipoib is enabled on that pair or not. That change allows fi_getinfo() to retrieve IB interfaces in the case ipoib is not available on the system. Signed-off-by: Sylvain Didelot --- prov/verbs/src/fi_verbs.h | 2 + prov/verbs/src/verbs_info.c | 110 ++++++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/prov/verbs/src/fi_verbs.h b/prov/verbs/src/fi_verbs.h index 670eff38024..65666e27589 100644 --- a/prov/verbs/src/fi_verbs.h +++ b/prov/verbs/src/fi_verbs.h @@ -756,6 +756,8 @@ int vrb_ep_destroy_xrc_qp(struct vrb_xrc_ep *ep); int vrb_xrc_close_srq(struct vrb_srq_ep *srq_ep); int vrb_sockaddr_len(struct sockaddr *addr); +void vrb_set_sid(enum rdma_port_space ps, + uint16_t port, struct sockaddr_ib *sib); int vrb_init_info(const struct fi_info **all_infos); int vrb_getinfo(uint32_t version, const char *node, const char *service, diff --git a/prov/verbs/src/verbs_info.c b/prov/verbs/src/verbs_info.c index 79eae023c77..a31b81ba4aa 100644 --- a/prov/verbs/src/verbs_info.c +++ b/prov/verbs/src/verbs_info.c @@ -995,6 +995,111 @@ int vrb_get_port_space(const struct fi_info *info) return RDMA_PS_TCP; } +void vrb_set_sid(enum rdma_port_space ps, + uint16_t port, struct sockaddr_ib *sib) +{ + sib->sib_sid = htonll(((uint64_t) ps << 16) + ntohs(port)); + sib->sib_sid_mask = htonll(OFI_IB_IP_PS_MASK | OFI_IB_IP_PORT_MASK); +} + +static struct rdma_addrinfo *vrb_alloc_ib_addrinfo(uint8_t port_num, + const union ibv_gid *gid, uint16_t pkey) +{ + struct rdma_addrinfo *rai; + struct sockaddr_ib *sib; + + rai = calloc(1, sizeof(struct rdma_addrinfo)); + if (!rai) + return NULL; + + rai->ai_flags = RAI_PASSIVE | RAI_NUMERICHOST | RAI_FAMILY; + rai->ai_family = AF_IB; + rai->ai_port_space = RDMA_PS_IB; + + sib = calloc(1, sizeof(struct sockaddr_ib)); + if (!sib) { + free(rai); + return NULL; + } + rai->ai_src_addr = (struct sockaddr *) sib; + rai->ai_src_len = sizeof(struct sockaddr_ib); + + sib->sib_family = AF_IB; + memcpy(&sib->sib_addr.sib_raw, &gid->raw, sizeof(*gid)); + sib->sib_pkey = pkey; + sib->sib_scope_id = port_num; + + vrb_set_sid(RDMA_PS_IB, 0, sib); + + return rai; +} + +static int vrb_get_sib(struct dlist_entry *verbs_devs) +{ + struct rdma_addrinfo *rai = NULL; + struct ibv_device **devices; + char *dev_name = NULL; + int num_devices; + struct ibv_context *context; + int ret, num_verbs_ifs = 0; + struct ibv_device_attr device_attr; + struct ibv_port_attr port_attr; + union ibv_gid gid; + uint16_t pkey; + + devices = ibv_get_device_list(&num_devices); + if (!devices) + return -errno; + + for (int dev = 0; dev < num_devices; dev++) { + context = ibv_open_device(devices[dev]); + + ret = ibv_query_device(context, &device_attr); + if (ret) + continue; + + for (int port = 1; port <= device_attr.phys_port_cnt; port++) { + ret = ibv_query_port(context, port, &port_attr); + if (ret) + continue; + + for (int gidx = 0; gidx < port_attr.gid_tbl_len; gidx++) { + /* gid_tbl_len may contain GID entries that are NULL (fe80::), + * so we need to filter them out */ + ret = ibv_query_gid(context, port, gidx, &gid); + if (ret || !gid.global.interface_id || !gid.global.subnet_prefix) + continue; + + for (int pidx = 0; pidx < port_attr.pkey_tbl_len; pidx++) { + ret = ibv_query_pkey(context, port, pidx, &pkey); + if (ret || !pkey) + continue; + + rai = vrb_alloc_ib_addrinfo(port, &gid, pkey); + if (!rai) + continue; + + dev_name = strdup(ibv_get_device_name(context->device)); + if (!dev_name) + return -FI_ENOMEM; + + ret = verbs_devs_add(verbs_devs, dev_name, rai); + if (ret) { + free(dev_name); + rdma_freeaddrinfo(rai); + continue; + } + + num_verbs_ifs++; + } + } + } + } + + ibv_free_device_list(devices); + return num_verbs_ifs ? 0 : -FI_ENODATA; +} + /* Builds a list of interfaces that correspond to active verbs devices */ static int vrb_getifaddrs(struct dlist_entry *verbs_devs) { @@ -1130,6 +1235,9 @@ static void vrb_sockaddr_set_port(struct sockaddr *sa, uint16_t port) case AF_INET6: ((struct sockaddr_in6 *)sa)->sin6_port = port; break; + case AF_IB: + vrb_set_sid(RDMA_PS_IB, port, (struct sockaddr_ib *)sa); + break; } } @@ -1249,6 +1357,8 @@ int vrb_init_info(const struct fi_info **all_infos) vrb_getifaddrs(&verbs_devs); + vrb_get_sib(&verbs_devs); + if (dlist_empty(&verbs_devs)) FI_WARN(&vrb_prov, FI_LOG_FABRIC, "no valid IPoIB interfaces found, FI_EP_MSG endpoint " From 49021849ece146901ab7e06f48095fc3de49cd7c Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Wed, 3 Jun 2020 10:44:54 -0400 Subject: [PATCH 09/13] prov/verbs: fi_getinfo() returns fi_sockaddr_ib interfaces. This is the final patch of the series that adds the support of GID-base connection establishment. The Verbs provider now can directly connect to the network adapters using the GID. In other words, the patch allows to use Libfabric even if there is no IP address set for the Infiniband interfaces. There are significant issues of issues IP addresses for connection establishment: - It requires to set up/maintain IP addresses for every IB interfaces. - In the context of multirail (multiple local interfaces that belong to the same network subnet), it requires specific IP routes to prevent an interface to reply for another one. Connection establishment would fail otherwise. The GID can be accessed by looking at the field src_addr returned by "fi_info -p verbs -v". Example of output: src_addr: fi_sockaddr_ib://[fe80::248a:703:1c:dc0c]:ffff:13f:0 The patch also modifies fabtest so anybody can start testing this new feature. A new option -F allows to specify the address format that is use for the source/destination addresses. After figuring out the GID of interface that will be used for the server, one can run the following commands with fabtest: Server: fi_msg_bw -s [fe80::248a:703:1c:dc0c]:ffff:13f:0 -e msg \ -p verbs -F fi_sockaddr_ib Client: fi_msg_bw -e msg -p verbs \ -F fi_sockaddr_ib [fe80::248a:703:1c:dc0c]:ffff:13f:0 Signed-off-by: Sylvain Didelot --- prov/verbs/src/fi_verbs.c | 93 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 1 deletion(-) diff --git a/prov/verbs/src/fi_verbs.c b/prov/verbs/src/fi_verbs.c index 8d2cbbcca6b..4bcc42af3c9 100644 --- a/prov/verbs/src/fi_verbs.c +++ b/prov/verbs/src/fi_verbs.c @@ -103,7 +103,7 @@ int vrb_sockaddr_len(struct sockaddr *addr) } static int -vrb_get_rdma_rai(const char *node, const char *service, uint64_t flags, +vrb_get_rdmacm_rai(const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct rdma_addrinfo **rai) { struct rdma_addrinfo rai_hints, *_rai; @@ -154,6 +154,97 @@ vrb_get_rdma_rai(const char *node, const char *service, uint64_t flags, return ret; } +static int vrb_get_sib_rai(const char *node, const char *service, uint64_t flags, + const struct fi_info *hints, struct rdma_addrinfo **rai) +{ + struct sockaddr_ib *sib; + size_t sib_len; + char *straddr; + uint32_t fmt; + int ret; + bool has_prefix; + const char *prefix = "fi_sockaddr_ib://"; + + *rai = calloc(1, sizeof(struct rdma_addrinfo)); + if (*rai == NULL) + return -FI_ENOMEM; + + ret = vrb_fi_to_rai(hints, flags, *rai); + if (ret) + return ret; + + if (node) { + fmt = ofi_addr_format(node); + if (fmt == FI_SOCKADDR_IB) + has_prefix = true; + else if (fmt == FI_FORMAT_UNSPEC) + has_prefix = false; + else + return -FI_EINVAL; + + if (service) { + ret = asprintf(&straddr, "%s%s:%s", has_prefix ? "" : prefix, + node, service); + } else { + ret = asprintf(&straddr, "%s%s", has_prefix ? "" : prefix, node); + } + + if (ret == -1) + return -FI_ENOMEM; + + ret = ofi_str_toaddr(straddr, &fmt, (void **)&sib, &sib_len); + free(straddr); + + if (ret || fmt != FI_SOCKADDR_IB) { + return -FI_EINVAL; + } + + if (flags & FI_SOURCE) { + (*rai)->ai_flags |= RAI_PASSIVE; + if ((*rai)->ai_src_addr) + free((*rai)->ai_src_addr); + (*rai)->ai_src_addr = (void *)sib; + (*rai)->ai_src_len = sizeof(struct sockaddr_ib); + } else { + if ((*rai)->ai_dst_addr) + free((*rai)->ai_dst_addr); + (*rai)->ai_dst_addr = (void *)sib; + (*rai)->ai_dst_len = sizeof(struct sockaddr_ib); + } + + } else if (service) { + if ((flags & FI_SOURCE) && (*rai)->ai_src_addr) { + if ((*rai)->ai_src_len < sizeof(struct sockaddr_ib)) + return -FI_EINVAL; + + (*rai)->ai_src_len = sizeof(struct sockaddr_ib); + sib = (struct sockaddr_ib *)(*rai)->ai_src_addr; + } else { + if ((*rai)->ai_dst_len < sizeof(struct sockaddr_ib)) + return -FI_EINVAL; + + (*rai)->ai_dst_len = sizeof(struct sockaddr_ib); + sib = (struct sockaddr_ib *)(*rai)->ai_dst_addr; + } + + sib->sib_sid = htonll(((uint64_t) RDMA_PS_IB << 16) + (uint16_t)atoi(service)); + sib->sib_sid_mask = htonll(OFI_IB_IP_PS_MASK | OFI_IB_IP_PORT_MASK); + } + + return 0; +} + +int vrb_get_rdma_rai(const char *node, const char *service, uint64_t flags, + const struct fi_info *hints, struct rdma_addrinfo **rai) +{ + if (hints && hints->addr_format == FI_SOCKADDR_IB) { + if (node || hints->src_addr || hints->dest_addr) + return vrb_get_sib_rai(node, service, flags, hints, rai); + /* fallthrough */ + } + return vrb_get_rdmacm_rai(node, service, flags, hints, rai); +} + int vrb_get_rai_id(const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct rdma_addrinfo **rai, struct rdma_cm_id **id) From f494c4c10d4368abd90a51c018218b9b2db11a09 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Mon, 15 Jun 2020 04:47:42 -0400 Subject: [PATCH 10/13] prov/verbs: Provide the RDMA CM header when AF_IB is used. With AF_INET, user space should fill out the RDMA CM header and pass that to the kernel. References: - RDMA CM header format: https://github.com/linux-rdma/rdma-core/blob/master/\ librdmacm/cma.h#L105 - https://www.spinics.net/lists/linux-rdma/msg22381.html - IBTA Architecture Specification Vol 1. Annex A11: RDMA IP CM Service. Signed-off-by: Sylvain Didelot --- prov/verbs/src/fi_verbs.h | 12 ++++++++++ prov/verbs/src/verbs_cm.c | 48 ++++++++++++++++++++++++++++++--------- prov/verbs/src/verbs_ep.c | 2 +- prov/verbs/src/verbs_eq.c | 13 +++++++++++ 4 files changed, 63 insertions(+), 12 deletions(-) diff --git a/prov/verbs/src/fi_verbs.h b/prov/verbs/src/fi_verbs.h index 65666e27589..16f38b78691 100644 --- a/prov/verbs/src/fi_verbs.h +++ b/prov/verbs/src/fi_verbs.h @@ -594,6 +594,7 @@ struct vrb_ep { size_t rx_cq_size; struct rdma_conn_param conn_param; struct vrb_cm_data_hdr *cm_hdr; + void *cm_priv_data; }; @@ -700,6 +701,17 @@ struct vrb_connreq { struct vrb_xrc_conn_info xrc; }; +/* Structure below is a copy of the RDMA CM header (structure ib_connect_hdr in + * file librdmacm/cma.h) + * DO NOT MODIFY! */ +struct vrb_rdma_cm_hdr { + uint8_t cma_version; /* Set by the kernel */ + uint8_t ip_version; /* IP version: 7:4 */ + uint16_t port; + uint32_t src_addr[4]; + uint32_t dst_addr[4]; +}; + struct vrb_cm_data_hdr { uint8_t size; char data[]; diff --git a/prov/verbs/src/verbs_cm.c b/prov/verbs/src/verbs_cm.c index 63aea5feed2..228dfdf1879 100644 --- a/prov/verbs/src/verbs_cm.c +++ b/prov/verbs/src/verbs_cm.c @@ -123,23 +123,41 @@ vrb_msg_ep_prepare_cm_data(const void *param, size_t param_size, static inline void vrb_ep_prepare_rdma_cm_param(struct rdma_conn_param *conn_param, - struct vrb_cm_data_hdr *cm_hdr, - size_t cm_hdr_data_size) + void *priv_data, size_t priv_data_size) { - conn_param->private_data = cm_hdr; - conn_param->private_data_len = (uint8_t)cm_hdr_data_size; + conn_param->private_data = priv_data; + conn_param->private_data_len = (uint8_t)priv_data_size; conn_param->responder_resources = RDMA_MAX_RESP_RES; conn_param->initiator_depth = RDMA_MAX_INIT_DEPTH; conn_param->flow_control = 1; conn_param->rnr_retry_count = 7; } +static void +vrb_msg_ep_prepare_rdma_cm_hdr(void *priv_data, + const struct rdma_cm_id *id) +{ + struct vrb_rdma_cm_hdr *rdma_cm_hdr = priv_data; + + rdma_cm_hdr->ip_version = 0; + rdma_cm_hdr->port = 0; + + /* Record the GIDs */ + memcpy(rdma_cm_hdr->src_addr, + &((struct ofi_sockaddr_ib *)&id->route.addr.src_addr)->sib_addr, 16); + memcpy(rdma_cm_hdr->dst_addr, + &((struct ofi_sockaddr_ib *)&id->route.addr.dst_addr)->sib_addr, 16); +} + static int vrb_msg_ep_connect(struct fid_ep *ep_fid, const void *addr, const void *param, size_t paramlen) { struct vrb_ep *ep = container_of(ep_fid, struct vrb_ep, util_ep.ep_fid); + size_t priv_data_len; + struct vrb_cm_data_hdr *cm_hdr; + off_t rdma_cm_hdr_len = 0; int ret; if (OFI_UNLIKELY(paramlen > VERBS_CM_DATA_SIZE)) @@ -151,13 +169,21 @@ vrb_msg_ep_connect(struct fid_ep *ep_fid, const void *addr, return ret; } - ep->cm_hdr = malloc(sizeof(*(ep->cm_hdr)) + paramlen); - if (!ep->cm_hdr) + if (ep->id->route.addr.src_addr.sa_family == AF_IB) + rdma_cm_hdr_len = sizeof(struct vrb_rdma_cm_hdr); + + priv_data_len = sizeof(*cm_hdr) + paramlen + rdma_cm_hdr_len; + ep->cm_priv_data = malloc(priv_data_len); + if (!ep->cm_priv_data) return -FI_ENOMEM; - vrb_msg_ep_prepare_cm_data(param, paramlen, ep->cm_hdr); - vrb_ep_prepare_rdma_cm_param(&ep->conn_param, ep->cm_hdr, - sizeof(*(ep->cm_hdr)) + paramlen); + if (rdma_cm_hdr_len) + vrb_msg_ep_prepare_rdma_cm_hdr(ep->cm_priv_data, ep->id); + + cm_hdr = (void *)((char *)ep->cm_priv_data + rdma_cm_hdr_len); + vrb_msg_ep_prepare_cm_data(param, paramlen, cm_hdr); + vrb_ep_prepare_rdma_cm_param(&ep->conn_param, ep->cm_priv_data, + priv_data_len); ep->conn_param.retry_count = 15; if (ep->srq_ep) @@ -168,8 +194,8 @@ vrb_msg_ep_connect(struct fid_ep *ep_fid, const void *addr, FI_WARN(&vrb_prov, FI_LOG_EP_CTRL, "rdma_resolve_route failed: %s (%d)\n", strerror(-ret), -ret); - free(ep->cm_hdr); - ep->cm_hdr = NULL; + free(ep->cm_priv_data); + ep->cm_priv_data = NULL; return ret; } return 0; diff --git a/prov/verbs/src/verbs_ep.c b/prov/verbs/src/verbs_ep.c index 4f502abcb96..a5993c36617 100644 --- a/prov/verbs/src/verbs_ep.c +++ b/prov/verbs/src/verbs_ep.c @@ -355,7 +355,7 @@ static int vrb_close_free_ep(struct vrb_ep *ep) free(ep->util_ep.ep_fid.msg); ep->util_ep.ep_fid.msg = NULL; - free(ep->cm_hdr); + free(ep->cm_priv_data); if (ep->util_ep.rx_cq) { cq = container_of(ep->util_ep.rx_cq, struct vrb_cq, util_cq); diff --git a/prov/verbs/src/verbs_eq.c b/prov/verbs/src/verbs_eq.c index c97b6fa2a67..cd664ed467d 100644 --- a/prov/verbs/src/verbs_eq.c +++ b/prov/verbs/src/verbs_eq.c @@ -284,6 +284,17 @@ static inline int vrb_eq_copy_event_data(struct fi_eq_cm_entry *entry, return datalen; } +static void vrb_eq_skip_rdma_cm_hdr(const void **priv_data, + size_t *priv_data_len) +{ + size_t rdma_cm_hdr_len = sizeof(struct vrb_rdma_cm_hdr); + + if (*priv_data_len > rdma_cm_hdr_len) { + *priv_data = (void*)((char *)*priv_data + rdma_cm_hdr_len); + *priv_data_len -= rdma_cm_hdr_len; + } +} + static void vrb_eq_skip_xrc_cm_data(const void **priv_data, size_t *priv_data_len) { @@ -896,6 +907,8 @@ vrb_eq_cm_process_event(struct vrb_eq *eq, } if (*event == FI_CONNECTED) goto ack; + } else if (cma_event->id->route.addr.src_addr.sa_family == AF_IB) { + vrb_eq_skip_rdma_cm_hdr(&priv_data, &priv_datalen); } break; case RDMA_CM_EVENT_CONNECT_RESPONSE: From b3c6bb26fdcf8115f3c2abb6c5ee58496ee258ab Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Fri, 3 Jul 2020 05:31:10 -0400 Subject: [PATCH 11/13] common: Add support of AF_IB to addr manipulation functions Signed-off-by: Sylvain Didelot --- include/ofi_net.h | 55 ++++++++++++++++++++++++++++++++++++++++------- src/common.c | 4 ++-- 2 files changed, 49 insertions(+), 10 deletions(-) diff --git a/include/ofi_net.h b/include/ofi_net.h index eb5abeacbb0..dff0903ff8f 100644 --- a/include/ofi_net.h +++ b/include/ofi_net.h @@ -141,11 +141,19 @@ struct ofi_sockaddr_ib { uint64_t sib_scope_id; }; +enum ofi_rdma_port_space { + OFI_RDMA_PS_IPOIB = 0x0002, + OFI_RDMA_PS_IB = 0x013F, + OFI_RDMA_PS_TCP = 0x0106, + OFI_RDMA_PS_UDP = 0x0111, +}; + union ofi_sock_ip { - struct sockaddr sa; - struct sockaddr_in sin; - struct sockaddr_in6 sin6; - uint8_t align[32]; + struct sockaddr sa; + struct sockaddr_in sin; + struct sockaddr_in6 sin6; + struct ofi_sockaddr_ib sib; + uint8_t align[48]; }; struct ofi_addr_list_entry { @@ -174,6 +182,7 @@ void ofi_free_list_of_addr(struct slist *addr_list); #define ofi_sin6_addr(addr) (((struct sockaddr_in6 *)(addr))->sin6_addr) #define ofi_sin6_port(addr) (((struct sockaddr_in6 *)(addr))->sin6_port) +#define ofi_sib_addr(addr) (((struct ofi_sockaddr_ib *)(addr))->sib_addr) static inline size_t ofi_sizeofaddr(const struct sockaddr *addr) { @@ -182,6 +191,8 @@ static inline size_t ofi_sizeofaddr(const struct sockaddr *addr) return sizeof(struct sockaddr_in); case AF_INET6: return sizeof(struct sockaddr_in6); + case AF_IB: + return sizeof(struct ofi_sockaddr_ib); default: FI_WARN(&core_prov, FI_LOG_CORE, "Unknown address format\n"); return 0; @@ -195,6 +206,8 @@ static inline size_t ofi_sizeofip(const struct sockaddr *addr) return sizeof(struct in_addr); case AF_INET6: return sizeof(struct in6_addr); + case AF_IB: + return sizeof(ofi_sib_addr(addr)); default: FI_WARN(&core_prov, FI_LOG_CORE, "Unknown address format\n"); return 0; @@ -217,7 +230,7 @@ static inline int ofi_translate_addr_format(int family) uint16_t ofi_get_sa_family(const struct fi_info *info); -static inline int ofi_ipv4_is_any_addr(struct sockaddr *sa) +static inline int ofi_sin_is_any_addr(struct sockaddr *sa) { struct in_addr ia_any = { .s_addr = INADDR_ANY, @@ -230,7 +243,7 @@ static inline int ofi_ipv4_is_any_addr(struct sockaddr *sa) } -static inline int ofi_ipv6_is_any_addr(struct sockaddr *sa) +static inline int ofi_sin6_is_any_addr(struct sockaddr *sa) { struct in6_addr ia6_any = IN6ADDR_ANY_INIT; @@ -240,6 +253,16 @@ static inline int ofi_ipv6_is_any_addr(struct sockaddr *sa) return !memcmp(&ofi_sin6_addr(sa), &ia6_any, sizeof(ia6_any)); } +static inline int ofi_sib_is_any_addr(struct sockaddr *sa) +{ + struct in6_addr ia6_any = IN6ADDR_ANY_INIT; + + if (!sa) + return 0; + + return !memcmp(&ofi_sib_addr(sa), &ia6_any, sizeof(ia6_any)); +} + static inline int ofi_is_any_addr(struct sockaddr *sa) { if (!sa) @@ -247,9 +270,11 @@ static inline int ofi_is_any_addr(struct sockaddr *sa) switch(sa->sa_family) { case AF_INET: - return ofi_ipv4_is_any_addr(sa); + return ofi_sin_is_any_addr(sa); case AF_INET6: - return ofi_ipv6_is_any_addr(sa); + return ofi_sin6_is_any_addr(sa); + case AF_IB: + return ofi_sib_is_any_addr(sa); default: FI_WARN(&core_prov, FI_LOG_CORE, "Unknown address format!\n"); return 0; @@ -266,6 +291,8 @@ static inline uint16_t ofi_addr_get_port(const struct sockaddr *addr) return ntohs(ofi_sin_port((const struct sockaddr_in *) addr)); case AF_INET6: return ntohs(ofi_sin6_port((const struct sockaddr_in6 *) addr)); + case AF_IB: + return (uint16_t)ntohll(((const struct ofi_sockaddr_ib *)addr)->sib_sid); default: FI_WARN(&core_prov, FI_LOG_FABRIC, "Unknown address format\n"); assert(0); @@ -275,6 +302,8 @@ static inline uint16_t ofi_addr_get_port(const struct sockaddr *addr) static inline void ofi_addr_set_port(struct sockaddr *addr, uint16_t port) { + struct ofi_sockaddr_ib *sib; + switch (ofi_sa_family(addr)) { case AF_INET: ofi_sin_port(addr) = htons(port); @@ -282,6 +311,11 @@ static inline void ofi_addr_set_port(struct sockaddr *addr, uint16_t port) case AF_INET6: ofi_sin6_port(addr) = htons(port); break; + case AF_IB: + sib = (struct ofi_sockaddr_ib *)addr; + sib->sib_sid = htonll(((uint64_t)OFI_RDMA_PS_IB << 16) + ntohs(port)); + sib->sib_sid_mask = htonll(OFI_IB_IP_PS_MASK | OFI_IB_IP_PORT_MASK); + break; default: FI_WARN(&core_prov, FI_LOG_FABRIC, "Unknown address format\n"); assert(0); @@ -295,6 +329,8 @@ static inline void * ofi_get_ipaddr(const struct sockaddr *addr) return &ofi_sin_addr((const struct sockaddr_in *) addr); case AF_INET6: return &ofi_sin6_addr((const struct sockaddr_in6 *) addr); + case AF_IB: + return &ofi_sib_addr((const struct ofi_sockaddr_ib *) addr); default: return NULL; } @@ -313,6 +349,9 @@ static inline int ofi_equals_ipaddr(const struct sockaddr *addr1, case AF_INET6: return !memcmp(&ofi_sin6_addr(addr1), &ofi_sin6_addr(addr2), sizeof(ofi_sin6_addr(addr1))); + case AF_IB: + return !memcmp(&ofi_sib_addr(addr1), &ofi_sib_addr(addr2), + sizeof(ofi_sib_addr(addr1))); default: return 0; } diff --git a/src/common.c b/src/common.c index b06a4af018c..b351c5845a2 100644 --- a/src/common.c +++ b/src/common.c @@ -858,10 +858,10 @@ static int ofi_is_any_addr_port(struct sockaddr *addr) { switch (ofi_sa_family(addr)) { case AF_INET: - return (ofi_ipv4_is_any_addr(addr) && + return (ofi_sin_is_any_addr(addr) && ofi_sin_port(addr)); case AF_INET6: - return (ofi_ipv6_is_any_addr(addr) && + return (ofi_sin6_is_any_addr(addr) && ofi_sin6_port(addr)); default: FI_WARN(&core_prov, FI_LOG_CORE, From 330d80ee8c3830405c29ca5fa9ec2d5d4930cb25 Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Fri, 3 Jul 2020 05:34:01 -0400 Subject: [PATCH 12/13] prov/verbs: Use ofi common functions for addr manipulation Remove verbs specific functions to manipulate sockaddr addresses and use the ofi functions provided by common code. Signed-off-by: Sylvain Didelot --- prov/verbs/src/fi_verbs.h | 3 --- prov/verbs/src/verbs_cm.c | 2 +- prov/verbs/src/verbs_eq.c | 4 ++-- prov/verbs/src/verbs_info.c | 28 +++------------------------- 4 files changed, 6 insertions(+), 31 deletions(-) diff --git a/prov/verbs/src/fi_verbs.h b/prov/verbs/src/fi_verbs.h index 16f38b78691..6bc137aef81 100644 --- a/prov/verbs/src/fi_verbs.h +++ b/prov/verbs/src/fi_verbs.h @@ -767,9 +767,6 @@ void vrb_ep_tgt_conn_done(struct vrb_xrc_ep *qp); int vrb_ep_destroy_xrc_qp(struct vrb_xrc_ep *ep); int vrb_xrc_close_srq(struct vrb_srq_ep *srq_ep); -int vrb_sockaddr_len(struct sockaddr *addr); -void vrb_set_sid(enum rdma_port_space ps, - uint16_t port, struct sockaddr_ib *sib); int vrb_init_info(const struct fi_info **all_infos); int vrb_getinfo(uint32_t version, const char *node, const char *service, diff --git a/prov/verbs/src/verbs_cm.c b/prov/verbs/src/verbs_cm.c index 228dfdf1879..4ee4184d2c3 100644 --- a/prov/verbs/src/verbs_cm.c +++ b/prov/verbs/src/verbs_cm.c @@ -37,7 +37,7 @@ static int vrb_copy_addr(void *dst_addr, size_t *dst_addrlen, void *src_addr) { - size_t src_addrlen = vrb_sockaddr_len(src_addr); + size_t src_addrlen = ofi_sizeofaddr(src_addr); if (*dst_addrlen == 0) { *dst_addrlen = src_addrlen; diff --git a/prov/verbs/src/verbs_eq.c b/prov/verbs/src/verbs_eq.c index cd664ed467d..002c8a9b053 100644 --- a/prov/verbs/src/verbs_eq.c +++ b/prov/verbs/src/verbs_eq.c @@ -226,13 +226,13 @@ vrb_eq_cm_getinfo(struct rdma_cm_event *event, struct fi_info *pep_info, free((*info)->src_addr); - (*info)->src_addrlen = vrb_sockaddr_len(rdma_get_local_addr(event->id)); + (*info)->src_addrlen = ofi_sizeofaddr(rdma_get_local_addr(event->id)); (*info)->src_addr = malloc((*info)->src_addrlen); if (!((*info)->src_addr)) goto err2; memcpy((*info)->src_addr, rdma_get_local_addr(event->id), (*info)->src_addrlen); - (*info)->dest_addrlen = vrb_sockaddr_len(rdma_get_peer_addr(event->id)); + (*info)->dest_addrlen = ofi_sizeofaddr(rdma_get_peer_addr(event->id)); (*info)->dest_addr = malloc((*info)->dest_addrlen); if (!((*info)->dest_addr)) goto err2; diff --git a/prov/verbs/src/verbs_info.c b/prov/verbs/src/verbs_info.c index a31b81ba4aa..e9eae49a6ee 100644 --- a/prov/verbs/src/verbs_info.c +++ b/prov/verbs/src/verbs_info.c @@ -995,13 +995,6 @@ int vrb_get_port_space(const struct fi_info *info) return RDMA_PS_TCP; } -void vrb_set_sid(enum rdma_port_space ps, - uint16_t port, struct sockaddr_ib *sib) -{ - sib->sib_sid = htonll(((uint64_t) ps << 16) + ntohs(port)); - sib->sib_sid_mask = htonll(OFI_IB_IP_PS_MASK | OFI_IB_IP_PORT_MASK); -} - static struct rdma_addrinfo *vrb_alloc_ib_addrinfo(uint8_t port_num, const union ibv_gid *gid, uint16_t pkey) { @@ -1029,7 +1022,7 @@ static struct rdma_addrinfo *vrb_alloc_ib_addrinfo(uint8_t port_num, sib->sib_pkey = pkey; sib->sib_scope_id = port_num; - vrb_set_sid(RDMA_PS_IB, 0, sib); + ofi_addr_set_port((struct sockaddr *)sib, 0); return rai; } @@ -1226,21 +1219,6 @@ static int vrb_get_srcaddr_devs(struct fi_info **info) return 0; } -static void vrb_sockaddr_set_port(struct sockaddr *sa, uint16_t port) -{ - switch(sa->sa_family) { - case AF_INET: - ((struct sockaddr_in *)sa)->sin_port = port; - break; - case AF_INET6: - ((struct sockaddr_in6 *)sa)->sin6_port = port; - break; - case AF_IB: - vrb_set_sid(RDMA_PS_IB, port, (struct sockaddr_ib *)sa); - break; - } -} - /* the `rai` parameter is used for the MSG EP type */ /* the `fmt`, `[src | dest]_addr` parameters are used for the DGRAM EP type */ /* if the `fmt` parameter isn't used, pass FI_FORMAT_UNSPEC */ @@ -1301,7 +1279,7 @@ static int vrb_fill_addr(struct rdma_addrinfo *rai, struct fi_info **info, * corresponds to a valid dest addr) */ local_addr = rdma_get_local_addr(id); - rai->ai_src_len = vrb_sockaddr_len(local_addr); + rai->ai_src_len = ofi_sizeofaddr(local_addr); rai->ai_src_addr = malloc(rai->ai_src_len); if (!rai->ai_src_addr) return -FI_ENOMEM; @@ -1310,7 +1288,7 @@ static int vrb_fill_addr(struct rdma_addrinfo *rai, struct fi_info **info, /* User didn't specify a port. Zero out the random port * assigned by rdmamcm so that this rai/fi_info can be * used multiple times to create rdma endpoints.*/ - vrb_sockaddr_set_port(rai->ai_src_addr, 0); + ofi_addr_set_port(rai->ai_src_addr, 0); rai_to_fi: return vrb_set_info_addrs(*info, rai, FI_FORMAT_UNSPEC, From 523a71fe3fa6afc4fab7b601419fe1c7ad647caf Mon Sep 17 00:00:00 2001 From: Sylvain Didelot Date: Thu, 23 Jul 2020 14:30:45 -0400 Subject: [PATCH 13/13] prov/verbs: set ip_version and port correctly in RDMA CM header The patch also simplifies a 'if' statement. Signed-off-by: Sylvain Didelot --- prov/verbs/src/fi_verbs.c | 8 ++++---- prov/verbs/src/verbs_cm.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/prov/verbs/src/fi_verbs.c b/prov/verbs/src/fi_verbs.c index 4bcc42af3c9..b1c95f8a6db 100644 --- a/prov/verbs/src/fi_verbs.c +++ b/prov/verbs/src/fi_verbs.c @@ -237,11 +237,11 @@ static int vrb_get_sib_rai(const char *node, const char *service, uint64_t flags int vrb_get_rdma_rai(const char *node, const char *service, uint64_t flags, const struct fi_info *hints, struct rdma_addrinfo **rai) { - if (hints && hints->addr_format == FI_SOCKADDR_IB) { - if (node || hints->src_addr || hints->dest_addr) - return vrb_get_sib_rai(node, service, flags, hints, rai); - /* fallthrough */ + if (hints && hints->addr_format == FI_SOCKADDR_IB && + (node || hints->src_addr || hints->dest_addr)) { + return vrb_get_sib_rai(node, service, flags, hints, rai); } + return vrb_get_rdmacm_rai(node, service, flags, hints, rai); } diff --git a/prov/verbs/src/verbs_cm.c b/prov/verbs/src/verbs_cm.c index 4ee4184d2c3..8ae1599f11f 100644 --- a/prov/verbs/src/verbs_cm.c +++ b/prov/verbs/src/verbs_cm.c @@ -139,8 +139,8 @@ vrb_msg_ep_prepare_rdma_cm_hdr(void *priv_data, { struct vrb_rdma_cm_hdr *rdma_cm_hdr = priv_data; - rdma_cm_hdr->ip_version = 0; - rdma_cm_hdr->port = 0; + rdma_cm_hdr->ip_version = 6 << 4; /* IPv6 */ + rdma_cm_hdr->port = htons(ofi_addr_get_port(&id->route.addr.src_addr)); /* Record the GIDs */ memcpy(rdma_cm_hdr->src_addr,