From 90e0a46093d233f8f3c272413e0bb987fc0fa887 Mon Sep 17 00:00:00 2001 From: Rishikesh K Rajak Date: Thu, 21 Oct 2021 17:49:37 +0200 Subject: [PATCH] ATL fix for verbs provider and some minor fixes --- src/atl_counters.tbl | 2 ++ src/fam_atl.cpp | 61 +++++++++++++++++++------------------- test/atl_multi_get_put.cpp | 15 ++++++++-- test/atl_parallel_test.cpp | 15 ++++++++-- test/fam_SG_ATL.cpp | 46 +++++++++++++++++++++++----- test/fam_atl_get_put.cpp | 41 ++++++++++++++++++++----- 6 files changed, 129 insertions(+), 51 deletions(-) diff --git a/src/atl_counters.tbl b/src/atl_counters.tbl index d955054..87a8f46 100644 --- a/src/atl_counters.tbl +++ b/src/atl_counters.tbl @@ -1,2 +1,4 @@ ATL_COUNTER(fam_get_atomic) ATL_COUNTER(fam_put_atomic) +ATL_COUNTER(fam_scatter_atomic) +ATL_COUNTER(fam_gather_atomic) diff --git a/src/fam_atl.cpp b/src/fam_atl.cpp index 3f86530..bb67661 100644 --- a/src/fam_atl.cpp +++ b/src/fam_atl.cpp @@ -69,7 +69,7 @@ namespace openfam { #define RETURN_WITH_FAM_EXCEPTION \ } \ catch (Fam_Exception & e) { \ - throw e; \ + throw e; \ } class ATLib::ATLimpl_ { @@ -501,8 +501,8 @@ int atl_finalize() { if (serverAddrName) free(serverAddrName); - if (defaultCtx != NULL) - delete defaultCtx; + //if (defaultCtx != NULL) + //delete defaultCtx; return 0; } @@ -599,10 +599,11 @@ int fam_get_atomic(void *local, Fam_Descriptor *descriptor, uint64_t nodeId = descriptor->get_memserver_id(); Fam_Context *ATLCtx = get_defaultCtx(nodeId); + //uint64_t ATLBaseAddr = (uint64_t)descriptor->get_base_address(); fi_context *ctx = fabric_post_response_buff(&retStatus,(*fiAddrs)[nodeId], ATLCtx,sizeof(retStatus)); ret = famCIS->get_atomic(globalDescriptor.regionId & REGIONID_MASK, globalDescriptor.offset, offset, nbytes, - key, get_selfAddr(nodeId), get_selfAddrLen(nodeId), + key, (uint64_t)local, get_selfAddr(nodeId), get_selfAddrLen(nodeId), nodeId, uid, gid); if (ret == 0) { @@ -664,7 +665,7 @@ int fam_put_atomic(void *local, Fam_Descriptor *descriptor, ret = famCIS->put_atomic(globalDescriptor.regionId & REGIONID_MASK, globalDescriptor.offset, offset, nbytes, - key, get_selfAddr(nodeId),get_selfAddrLen(nodeId), + key, (uint64_t) local, get_selfAddr(nodeId),get_selfAddrLen(nodeId), (const char *)local, nodeId, uid, gid); if ((ret == 0) && (nbytes > MAX_DATA_IN_MSG)) { @@ -691,16 +692,16 @@ int fam_scatter_atomic(void *local, Fam_Descriptor *descriptor, int32_t retStatus = -1; fi_context *ctx = NULL; Fam_Global_Descriptor globalDescriptor; - // FAM_CNTR_INC_API(fam_put_atomic); - // FAM_PROFILE_START_ALLOCATOR(fam_put_atomic); + ATL_CNTR_INC_API(fam_scatter_atomic); + ATL_PROFILE_START_ALLOCATOR(fam_scatter_atomic); if ((local == NULL) || (descriptor == NULL) || (nElements == 0)) { message << "Invalid Options"; THROW_ATL_ERR_MSG(ATL_Exception, message.str().c_str()); } ret = validate_item(descriptor); - // FAM_PROFILE_END_ALLOCATOR(fam_put_atomic); - // FAM_PROFILE_START_OPS(fam_put_atomic); + ATL_PROFILE_END_ALLOCATOR(fam_scatter_atomic); + ATL_PROFILE_START_OPS(fam_scatter_atomic); if (ret == 0) { // Read data from FAM region with this key globalDescriptor = descriptor->get_global_descriptor(); @@ -722,7 +723,7 @@ int fam_scatter_atomic(void *local, Fam_Descriptor *descriptor, ret = famCIS->scatter_strided_atomic( globalDescriptor.regionId & REGIONID_MASK, globalDescriptor.offset, - nElements, firstElement, stride, elementSize, key, get_selfAddr(nodeId), + nElements, firstElement, stride, elementSize, key, (uint64_t) local, get_selfAddr(nodeId), get_selfAddrLen(nodeId), nodeId, uid, gid); if (ret == 0) { @@ -730,8 +731,8 @@ int fam_scatter_atomic(void *local, Fam_Descriptor *descriptor, ret = retStatus; fabric_deregister_mr(mr); } - // FAM_PROFILE_END_OPS(fam_put_atomic); - } // validate_item + ATL_PROFILE_END_OPS(fam_scatter_atomic); + } return ret; } @@ -746,16 +747,16 @@ int fam_gather_atomic(void *local, Fam_Descriptor *descriptor, int32_t retStatus = -1; fi_context *ctx = NULL; Fam_Global_Descriptor globalDescriptor; - // FAM_CNTR_INC_API(fam_put_atomic); - // FAM_PROFILE_START_ALLOCATOR(fam_put_atomic); + ATL_CNTR_INC_API(fam_gather_atomic); + ATL_PROFILE_START_ALLOCATOR(fam_gather_atomic); if ((local == NULL) || (descriptor == NULL) || (nElements == 0)) { message << "Invalid Options"; THROW_ATL_ERR_MSG(ATL_Exception, message.str().c_str()); } ret = validate_item(descriptor); - // FAM_PROFILE_END_ALLOCATOR(fam_put_atomic); - // FAM_PROFILE_START_OPS(fam_put_atomic); + ATL_PROFILE_END_ALLOCATOR(fam_gather_atomic); + ATL_PROFILE_START_OPS(fam_gather_atomic); if (ret == 0) { // Read data from FAM region with this key globalDescriptor = descriptor->get_global_descriptor(); @@ -777,7 +778,7 @@ int fam_gather_atomic(void *local, Fam_Descriptor *descriptor, ret = famCIS->gather_strided_atomic( globalDescriptor.regionId & REGIONID_MASK, globalDescriptor.offset, - nElements, firstElement, stride, elementSize, key, get_selfAddr(nodeId), + nElements, firstElement, stride, elementSize, key, (uint64_t) local, get_selfAddr(nodeId), get_selfAddrLen(nodeId), nodeId, uid, gid); if (ret == 0) { @@ -785,6 +786,7 @@ int fam_gather_atomic(void *local, Fam_Descriptor *descriptor, ret = retStatus; fabric_deregister_mr(mr); } + ATL_PROFILE_END_OPS(fam_gather_atomic); // FAM_PROFILE_END_OPS(fam_put_atomic); } // validate_item return ret; @@ -801,16 +803,16 @@ int fam_scatter_atomic(void *local, Fam_Descriptor *descriptor, int32_t retStatus = -1; fi_context *ctx = NULL; Fam_Global_Descriptor globalDescriptor; - // FAM_CNTR_INC_API(fam_put_atomic); - // FAM_PROFILE_START_ALLOCATOR(fam_put_atomic); + ATL_CNTR_INC_API(fam_scatter_atomic); + ATL_PROFILE_START_ALLOCATOR(fam_scatter_atomic); if ((local == NULL) || (descriptor == NULL) || (nElements == 0)) { message << "Invalid Options"; THROW_ATL_ERR_MSG(ATL_Exception, message.str().c_str()); } ret = validate_item(descriptor); - // FAM_PROFILE_END_ALLOCATOR(fam_put_atomic); - // FAM_PROFILE_START_OPS(fam_put_atomic); + ATL_PROFILE_END_ALLOCATOR(fam_scatter_atomic); + ATL_PROFILE_START_OPS(fam_scatter_atomic); if (ret == 0) { // Read data from FAM region with this key globalDescriptor = descriptor->get_global_descriptor(); @@ -842,7 +844,7 @@ int fam_scatter_atomic(void *local, Fam_Descriptor *descriptor, ret = famCIS->scatter_indexed_atomic( globalDescriptor.regionId & REGIONID_MASK, globalDescriptor.offset, - nElements, string(indexStr.str()).c_str(), elementSize, key, + nElements, string(indexStr.str()).c_str(), elementSize, key, (uint64_t) local, get_selfAddr(nodeId), get_selfAddrLen(nodeId), nodeId, uid, gid); if (ret == 0) { @@ -850,7 +852,7 @@ int fam_scatter_atomic(void *local, Fam_Descriptor *descriptor, ret = retStatus; fabric_deregister_mr(mr); } - // FAM_PROFILE_END_OPS(fam_put_atomic); + ATL_PROFILE_END_OPS(fam_scatter_atomic); } // validate_item return ret; } @@ -865,16 +867,16 @@ int fam_gather_atomic(void *local, Fam_Descriptor *descriptor, int32_t retStatus = -1; fi_context *ctx = NULL; Fam_Global_Descriptor globalDescriptor; - // FAM_CNTR_INC_API(fam_put_atomic); - // FAM_PROFILE_START_ALLOCATOR(fam_put_atomic); + ATL_CNTR_INC_API(fam_gather_atomic); + ATL_PROFILE_START_ALLOCATOR(fam_gather_atomic); if ((local == NULL) || (descriptor == NULL) || (nElements == 0)) { message << "Invalid Options"; THROW_ATL_ERR_MSG(ATL_Exception, message.str().c_str()); } ret = validate_item(descriptor); - // FAM_PROFILE_END_ALLOCATOR(fam_put_atomic); - // FAM_PROFILE_START_OPS(fam_put_atomic); + ATL_PROFILE_END_ALLOCATOR(fam_gather_atomic); + ATL_PROFILE_START_OPS(fam_gather_atomic); if (ret == 0) { // Read data from FAM region with this key globalDescriptor = descriptor->get_global_descriptor(); @@ -906,7 +908,7 @@ int fam_gather_atomic(void *local, Fam_Descriptor *descriptor, ret = famCIS->gather_indexed_atomic( globalDescriptor.regionId & REGIONID_MASK, globalDescriptor.offset, - nElements, string(indexStr.str()).c_str(), elementSize, key, + nElements, string(indexStr.str()).c_str(), elementSize, key, (uint64_t) local, get_selfAddr(nodeId), get_selfAddrLen(nodeId), nodeId, uid, gid); if (ret == 0) { @@ -914,7 +916,7 @@ int fam_gather_atomic(void *local, Fam_Descriptor *descriptor, ret = retStatus; fabric_deregister_mr(mr); } - // FAM_PROFILE_END_OPS(fam_put_atomic); + ATL_PROFILE_END_OPS(fam_gather_atomic); } // validate_item return ret; } @@ -1017,4 +1019,3 @@ ATLib::~ATLib() { delete pATLimpl_; } } //namespace - diff --git a/test/atl_multi_get_put.cpp b/test/atl_multi_get_put.cpp index e3ee5fc..dfd0e2f 100644 --- a/test/atl_multi_get_put.cpp +++ b/test/atl_multi_get_put.cpp @@ -101,7 +101,7 @@ int main() { dataRegion = my_fam->fam_lookup_region(DATA_REGION); } catch (Fam_Exception &e) { cout << "data Region not found" << endl; - dataRegion = my_fam->fam_create_region(DATA_REGION, 1048576, 0777, RAID1); + dataRegion = my_fam->fam_create_region(DATA_REGION, 1048576, 0777, NULL); } char msg1[200] = {0}; char msg2[200] = {0}; @@ -115,12 +115,21 @@ int main() { for (i = 0; i < 200; i++) msg1[i] = 'X'; auto start = std::chrono::high_resolution_clock::now(); - myatlib->fam_put_atomic((void *)msg1, item1, 0, 200); + try { + myatlib->fam_put_atomic((void *)msg1, item1, 0, 200); + } catch(Fam_Exception &e) { + cout << "fam_put_atomic failed" << e.fam_error_msg() << endl; + } + auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; cout << "put atomic elapsed time: " << elapsed_seconds.count() << endl; start = std::chrono::high_resolution_clock::now(); - myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); // strlen(msg1)); + try { + myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); // strlen(msg1)); + } catch(Fam_Exception &e) { + cout << "fam_get_atomic failed" << e.fam_error_msg() << endl; + } end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "get atomic elapsed time: " << elapsed_seconds.count() << endl; diff --git a/test/atl_parallel_test.cpp b/test/atl_parallel_test.cpp index 5f1832f..02d0858 100644 --- a/test/atl_parallel_test.cpp +++ b/test/atl_parallel_test.cpp @@ -95,7 +95,7 @@ int main(int argc, char *argv[]) { dataRegion = my_fam->fam_lookup_region(DATA_REGION); } catch (Fam_Exception &e) { cout << "data Region not found" << endl; - dataRegion = my_fam->fam_create_region(DATA_REGION, 1048576, 0777, RAID1); + dataRegion = my_fam->fam_create_region(DATA_REGION, 1048576, 0777, NULL); } char msg1[200] = {0}; char msg2[200] = {0}; @@ -120,12 +120,21 @@ int main(int argc, char *argv[]) { auto start = std::chrono::high_resolution_clock::now(); for (i = 0; i < NUM_ITERATIONS; i++) { compflag = false; - myatlib->fam_put_atomic((void *)msg1, item1, 0, 200); + try { + myatlib->fam_put_atomic((void *)msg1, item1, 0, 200); + } catch(Fam_Exception &e) { + cout << "fam_put_atomic failed" << e.fam_error_msg() << endl; + } auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; cout << "put atomic elapsed time: " << elapsed_seconds.count() << endl; start = std::chrono::high_resolution_clock::now(); - myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); + try { + myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); + } catch(Fam_Exception &e) { + cout << "fam_get_atomic failed" << e.fam_error_msg() << endl; + } + end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "get atomic elapsed time: " << elapsed_seconds.count() << endl; diff --git a/test/fam_SG_ATL.cpp b/test/fam_SG_ATL.cpp index 02f9171..55fe1aa 100644 --- a/test/fam_SG_ATL.cpp +++ b/test/fam_SG_ATL.cpp @@ -94,7 +94,7 @@ int main() { dataRegion = my_fam->fam_lookup_region(DATA_REGION); } catch (Fam_Exception &e) { cout << "data Region not found" << endl; - dataRegion = my_fam->fam_create_region(DATA_REGION, 1048576, 0777, RAID1); + dataRegion = my_fam->fam_create_region(DATA_REGION, 1048576, 0777, NULL); } char msg1[200] = {0}; char msg2[200] = {0}; @@ -108,17 +108,26 @@ int main() { for (i = 0; i < 200; i++) msg1[i] = 'X'; auto start = std::chrono::high_resolution_clock::now(); - myatlib->fam_put_atomic((void *)msg1, item1, 0, 200); // strlen(msg1)); + try { + myatlib->fam_put_atomic((void *)msg1, item1, 0, 200); // strlen(msg1)); + } catch(Fam_Exception &e) { + cout << "fam_put_atomic failed" << e.fam_error_msg() << endl; + } + auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; cout << "put atomic elapsed time: " << elapsed_seconds.count() << endl; start = std::chrono::high_resolution_clock::now(); - myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); // strlen(msg1)); + try { + myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); // strlen(msg1)); + } catch(Fam_Exception &e) { + cout << "fam_get_atomic failed" << e.fam_error_msg() << endl; + } end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "get atomic elapsed time: " << elapsed_seconds.count() << endl; cout << msg2 << endl; - if (strcmp(msg1, msg2) != 0) + if (strncmp(msg1, msg2, 200) != 0) cout << "Test1: Comparison of full string failed" << endl; else cout << "Test1: Comparison of full string successful" << endl; @@ -128,17 +137,31 @@ int main() { // sleep(30); cout << "Scatter atomic - strided" << endl; start = std::chrono::high_resolution_clock::now(); - myatlib->fam_scatter_atomic(msg1, item1, 5, 1, 2, 2); + try { + myatlib->fam_scatter_atomic(msg1, item1, 5, 1, 2, 2); + } catch (Fam_Exception &e) { + cout << "Scatter atomic not found" << e.fam_error_msg() << endl; + } + end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "Scatter strided atomic elapsed time: " << elapsed_seconds.count() << endl; - myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); + try { + myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); + } catch(Fam_Exception &e) { + cout << "fam_get_atomic failed" << e.fam_error_msg() << endl; + } + cout << msg2 << endl; cout << "Gather atomic - strided" << endl; memset(msg2, 0, 200); start = std::chrono::high_resolution_clock::now(); - myatlib->fam_gather_atomic(msg2, item1, 5, 1, 2, 2); + try { + myatlib->fam_gather_atomic(msg2, item1, 5, 1, 2, 2); + } catch (Fam_Exception &e) { + cout << " Gather Failed" << e.fam_error_msg() << endl; + } end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "Gather strided atomic elapsed time: " << elapsed_seconds.count() @@ -153,7 +176,11 @@ int main() { msg1[i] = 'Z'; uint64_t indexes[] = {10, 17, 13, 15, 30}; start = std::chrono::high_resolution_clock::now(); - myatlib->fam_scatter_atomic(msg1, item1, 5, indexes, 2); + try { + myatlib->fam_scatter_atomic(msg1, item1, 5, indexes, 2); + } catch (Fam_Exception &e) { + cout << " Gather Failed" << e.fam_error_msg() << endl; + } end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "Scatter indexed atomic elapsed time: " << elapsed_seconds.count() @@ -164,6 +191,9 @@ int main() { memset(msg2, 0, 200); start = std::chrono::high_resolution_clock::now(); myatlib->fam_gather_atomic(msg2, item1, 5, indexes, 2); + } catch (Fam_Exception &e) { + cout << " Gather Failed" << e.fam_error_msg() << endl; + } end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "Gather indexed atomic elapsed time: " << elapsed_seconds.count() diff --git a/test/fam_atl_get_put.cpp b/test/fam_atl_get_put.cpp index 0b5ac10..29af841 100644 --- a/test/fam_atl_get_put.cpp +++ b/test/fam_atl_get_put.cpp @@ -83,10 +83,13 @@ int main() { int i; int *my_rank; bool compflag = true; + memset((void *)&fam_opts, 0, sizeof(Fam_Options)); init_fam_options(&fam_opts); + // cout << "PID ready: gdb attach " << getpid() <fam_initialize("default", &fam_opts); @@ -100,7 +103,7 @@ int main() { dataRegion = my_fam->fam_lookup_region(DATA_REGION); } catch (Fam_Exception &e) { cout << "data Region not found" << endl; - dataRegion = my_fam->fam_create_region(DATA_REGION, 1048576, 0777, RAID1); + dataRegion = my_fam->fam_create_region(DATA_REGION, 1048576, 0777, NULL); } char msg1[200] = {0}; char msg2[200] = {0}; @@ -114,12 +117,20 @@ int main() { for (i = 0; i < 200; i++) msg1[i] = 'X'; auto start = std::chrono::high_resolution_clock::now(); - myatlib->fam_put_atomic((void *)msg1, item1, 0, 200); + try { + myatlib->fam_put_atomic((void *)msg1, item1, 0, 200); + } catch(Fam_Exception &e) { + cout << "fam_put_atomic failed" << e.fam_error_msg() << endl; + } auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration elapsed_seconds = end - start; cout << "put atomic elapsed time: " << elapsed_seconds.count() << endl; start = std::chrono::high_resolution_clock::now(); - myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); // strlen(msg1)); + try { + myatlib->fam_get_atomic((void *)msg2, item1, 0, 200); // strlen(msg1)); + } catch(Fam_Exception &e) { + cout << "fam_get_atomic failed" << e.fam_error_msg() << endl; + } end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "get atomic elapsed time: " << elapsed_seconds.count() << endl; @@ -137,12 +148,20 @@ int main() { msg1[i] = 'Y'; memset(msg2, 0, 200); start = std::chrono::high_resolution_clock::now(); - myatlib->fam_put_atomic((void *)msg1, item1, 20, 150); + try { + myatlib->fam_put_atomic((void *)msg1, item1, 20, 150); + } catch(Fam_Exception &e) { + cout << "fam_put_atomic failed" << e.fam_error_msg() << endl; + } end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "put atomic elapsed time: " << elapsed_seconds.count() << endl; - myatlib->fam_get_atomic((void *)msg2, item1, 20, 150); + try { + myatlib->fam_get_atomic((void *)msg2, item1, 20, 150); + } catch(Fam_Exception &e) { + cout << "fam_get_atomic failed" << e.fam_error_msg() << endl; + } cout << msg2 << endl; if (strncmp(msg2, strchr(msg1, 'Y'), 150) == 0) cout << "Test 2: Comaparion of partial string successful" << endl; @@ -177,12 +196,20 @@ int main() { for (i = 0; i < 200; i++) msg1[i] = 'Z'; start = std::chrono::high_resolution_clock::now(); - myatlib->fam_put_atomic((void *)msg1, item1, 50, 100); + try { + myatlib->fam_put_atomic((void *)msg1, item1, 50, 100); + } catch(Fam_Exception &e) { + cout << "fam_put_atomic failed" << e.fam_error_msg() << endl; + } end = std::chrono::high_resolution_clock::now(); elapsed_seconds = end - start; cout << "put atomic elapsed time: " << elapsed_seconds.count() << endl; - myatlib->fam_get_atomic((void *)msg2, item1, 50, 100); + try { + myatlib->fam_get_atomic((void *)msg2, item1, 50, 100); + } catch(Fam_Exception &e) { + cout << "fam_get_atomic failed" << e.fam_error_msg() << endl; + } cout << msg2 << endl; if (strncmp(msg2, strchr(msg1, 'Z'), 100) == 0) cout << "Comaparion of partial string successful" << endl;