-
Notifications
You must be signed in to change notification settings - Fork 878
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Topic/sentinel proc name conversion #1345
Topic/sentinel proc name conversion #1345
Conversation
@@ -97,6 +97,9 @@ ORTE_DECLSPEC char *orte_pretty_print_timing(int64_t secs, int64_t usecs); | |||
#define ORTE_CONSTRUCT_LOCAL_JOBID(local, job) \ | |||
( ((local) & 0xffff0000) | ((job) & 0x0000ffff) ) | |||
|
|||
#define ORTE_CONSTRUCT_JOBID(family, local) \ | |||
ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_CONSTRUCT_JOB_FAMILY(family), local) | |||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this really work if family and local are just 16-bit values? My macros assumed they were 32-bit.
9057cdd
to
4c5029c
Compare
@hjelmn the issue only occurs on big endian platforms (and i am only half surprised cutoff was not tested on this arch)
the "right" place is endian dependant, that means we would have struct {
#if WORDS_BIGENDIAN
opal_jobid_t jobid;
opal_vpid_t vpid;
#else
opal_vpid_t vpid;
opal_jobid_t jobid;
#endif
} opal_process_name_t; and that would require to change the way @rhc54 could you please double check this is PR is good to go ? here is a patch to be applied on top of this PR to automatically disable cutoff on non 64 bits arch commit 0bee2a570c4ada11cfca9f35268c0a20a5835e74
Author: Gilles Gouaillardet <gilles@rist.or.jp>
Date: Mon Feb 8 10:48:26 2016 +0900
sentinel: disable mpi_add_procs_cutoff on non 64 bits arch
- add have_mpi_add_procs_cutoff MCA read-only parameter
- make mpi_add_procs_cutoff readonly and MAX_UINT32 on non 64 bits arch
- add an extra check to ensure sizeof(opal_process_name_t) is 8
diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h
index eac16cf..d8fe2f9 100644
--- a/ompi/proc/proc.h
+++ b/ompi/proc/proc.h
@@ -361,6 +361,7 @@ OMPI_DECLSPEC opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_na
OMPI_DECLSPEC opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name);
+#if OPAL_SIZEOF_PROCESS_NAME_T == SIZEOF_VOID_P
/**
* Check if an ompi_proc_t is a sentinel
*/
@@ -407,6 +408,23 @@ static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel
name.vpid = vpid;
return name;
}
+#else
+static inline bool ompi_proc_is_sentinel (ompi_proc_t *proc)
+{
+ return false;
+}
+
+/* these functions will never be called */
+static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name)
+{
+ return (uintptr_t)-1;
+}
+
+static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel)
+{
+ return opal_name_invalid;
+}
+#endif
END_C_DECLS
diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c
index 7b32eee..2bf206a 100644
--- a/ompi/runtime/ompi_mpi_params.c
+++ b/ompi/runtime/ompi_mpi_params.c
@@ -17,6 +17,8 @@
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
+ * Copyright (c) 2016 Research Organization for Information Science
+ * and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -64,6 +66,7 @@ int ompi_mpi_event_tick_rate = -1;
char *ompi_mpi_show_mca_params_string = NULL;
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
bool ompi_mpi_preconnect_mpi = false;
+bool ompi_have_add_procs_cutoff = false;
uint32_t ompi_add_procs_cutoff = 1024;
bool ompi_mpi_dynamics_enabled = true;
@@ -263,13 +266,27 @@ int ompi_mpi_register_params(void)
ompi_rte_abort(1, NULL);
}
- ompi_add_procs_cutoff = 1024;
+ /* Procs cutoff support */
+ ompi_have_add_procs_cutoff = (OPAL_SIZEOF_PROCESS_NAME_T == SIZEOF_VOID_P);
+ (void) mca_base_var_register("ompi", "mpi", NULL, "have_add_procs_cutoff",
+ "Whether this Open MPI installation supports add_procs_cutoff",
+ MCA_BASE_VAR_TYPE_BOOL, NULL, 0,
+ MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+ OPAL_INFO_LVL_9,
+ MCA_BASE_VAR_SCOPE_CONSTANT,
+ &ompi_have_add_procs_cutoff);
+
+ if (!ompi_have_add_procs_cutoff) {
+ ompi_add_procs_cutoff = UINT32_MAX;
+ }
(void) mca_base_var_register ("ompi", "mpi", NULL, "add_procs_cutoff",
"Maximum world size for pre-allocating resources for all "
"remote processes. Increasing this limit may improve "
"communication performance at the cost of memory usage "
- "(default: 1024)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
- 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
+ "(default: 1024)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
+ ompi_have_add_procs_cutoff?0:MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
+ OPAL_INFO_LVL_3,
+ ompi_have_add_procs_cutoff?MCA_BASE_VAR_SCOPE_LOCAL:MCA_BASE_VAR_SCOPE_CONSTANT,
&ompi_add_procs_cutoff);
ompi_mpi_dynamics_enabled = true;
diff --git a/opal/dss/dss_types.h b/opal/dss/dss_types.h
index 8c1bad9..c261223 100644
--- a/opal/dss/dss_types.h
+++ b/opal/dss/dss_types.h
@@ -14,7 +14,7 @@
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
- * Copyright (c) 2014 Research Organization for Information Science
+ * Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
@@ -47,6 +47,7 @@ typedef struct {
opal_jobid_t jobid;
opal_vpid_t vpid;
} opal_process_name_t;
+#define OPAL_SIZEOF_PROCESS_NAME_T 8
BEGIN_C_DECLS
diff --git a/test/dss/dss_size.c b/test/dss/dss_size.c
index c128ec7..54f0919 100644
--- a/test/dss/dss_size.c
+++ b/test/dss/dss_size.c
@@ -9,6 +9,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
+ * Copyright (c) 2016 Research Organization for Information Science
+ * and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -42,6 +44,7 @@ static bool test9(void); /* verify composite (multiple types and element
static bool test11(void); /* verify size_t */
static bool test12(void); /* verify pid_t */
static bool test13(void); /* verify pid_t */
+static bool test14(void); /* verify opal_process_name_t */
FILE *test_out;
@@ -150,6 +153,14 @@ int main (int argc, char* argv[])
fprintf(test_out, "opal_dss test13 failed\n");
}
+ fprintf(test_out, "executing test14\n");
+ if (test14()) {
+ fprintf(test_out, "Test14 succeeded\n");
+ }
+ else {
+ fprintf(test_out, "opal_dss test14 failed\n");
+ }
+
fclose(test_out);
opal_finalize();
@@ -624,3 +635,14 @@ static bool test13(void)
return (true);
}
+static bool test14(void)
+{
+ if (sizeof(opal_process_name_t) != OPAL_SIZEOF_PROCESS_NAME) {
+ fprintf(test_out, "opal_process_name_t does not have expected size (%d != %d)\n",
+ sizeof(opal_process_name_t), OPAL_SIZEOF_PROCESS_NAME);
+ return(false);
+ }
+
+ return (true);
+}
+ and here is an other one that does support non 64 bits arch, at the expense of a commit 1bcc02c961018e6f0ae82281a7a2caf62de11d78
Author: Gilles Gouaillardet <gilles@rist.or.jp>
Date: Mon Feb 8 10:48:26 2016 +0900
sentinel: fix non 64 bits arch
on a non 64 bits arch, a sentinel is an opal_process_name_t * with its LSB forced to one
diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c
index b2200bd..aa30d61 100644
--- a/ompi/communicator/comm_init.c
+++ b/ompi/communicator/comm_init.c
@@ -107,6 +107,8 @@ int ompi_comm_init(void)
group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *));
group->grp_proc_count = size;
+ ompi_proc_sentinel_init(size);
+
for (size_t i = 0 ; i < size ; ++i) {
opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
/* look for existing ompi_proc_t that matches this name */
@@ -328,6 +330,7 @@ int ompi_comm_finalize(void)
}
}
+ ompi_proc_sentinel_finalize();
OBJ_DESTRUCT (&ompi_mpi_communicators);
OBJ_DESTRUCT (&ompi_comm_f_to_c_table);
diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h
index eac16cf..3d3230c 100644
--- a/ompi/proc/proc.h
+++ b/ompi/proc/proc.h
@@ -361,6 +361,11 @@ OMPI_DECLSPEC opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_na
OMPI_DECLSPEC opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name);
+#if OPAL_SIZEOF_PROCESS_NAME_T == SIZEOF_VOID_P
+
+#define ompi_proc_sentinel_init(s)
+#define ompi_proc_sentinel_finalize()
+
/**
* Check if an ompi_proc_t is a sentinel
*/
@@ -407,6 +412,37 @@ static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel
name.vpid = vpid;
return name;
}
+#else
+
+static opal_process_name_t * proc_names;
+
+static inline void ompi_proc_sentinel_init(int size) {
+ proc_names = (opal_process_name_t *)calloc(sizeof(opal_process_name_t), size);
+}
+
+static inline bool ompi_proc_is_sentinel (ompi_proc_t *proc)
+{
+ return (uintptr_t)proc & 0x1;
+}
+
+static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name)
+{
+ proc_names[name.vpid] = name;
+ return (uintptr_t)(proc_names+name.vpid) | 0x1;
+}
+
+static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel)
+{
+ opal_process_name_t * name;
+ name = (opal_process_name_t *)((uintptr_t)sentinel & (~0x1));
+ return *name;
+}
+
+static inline void ompi_proc_sentinel_finalize(void)
+{
+ free(proc_names);
+}
+#endif
END_C_DECLS
diff --git a/ompi/runtime/ompi_mpi_params.c b/ompi/runtime/ompi_mpi_params.c
index 7b32eee..34abb5f 100644
--- a/ompi/runtime/ompi_mpi_params.c
+++ b/ompi/runtime/ompi_mpi_params.c
@@ -17,6 +17,8 @@
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2015 Mellanox Technologies, Inc.
* All rights reserved.
+ * Copyright (c) 2016 Research Organization for Information Science
+ * and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -64,6 +66,7 @@ int ompi_mpi_event_tick_rate = -1;
char *ompi_mpi_show_mca_params_string = NULL;
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
bool ompi_mpi_preconnect_mpi = false;
+bool ompi_have_add_procs_cutoff = false;
uint32_t ompi_add_procs_cutoff = 1024;
bool ompi_mpi_dynamics_enabled = true;
@@ -263,13 +266,14 @@ int ompi_mpi_register_params(void)
ompi_rte_abort(1, NULL);
}
- ompi_add_procs_cutoff = 1024;
(void) mca_base_var_register ("ompi", "mpi", NULL, "add_procs_cutoff",
"Maximum world size for pre-allocating resources for all "
"remote processes. Increasing this limit may improve "
"communication performance at the cost of memory usage "
- "(default: 1024)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
- 0, 0, OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL,
+ "(default: 1024)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
+ 0,
+ OPAL_INFO_LVL_3,
+ MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_add_procs_cutoff);
ompi_mpi_dynamics_enabled = true;
diff --git a/opal/dss/dss_types.h b/opal/dss/dss_types.h
index 8c1bad9..c261223 100644
--- a/opal/dss/dss_types.h
+++ b/opal/dss/dss_types.h
@@ -14,7 +14,7 @@
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
- * Copyright (c) 2014 Research Organization for Information Science
+ * Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
@@ -47,6 +47,7 @@ typedef struct {
opal_jobid_t jobid;
opal_vpid_t vpid;
} opal_process_name_t;
+#define OPAL_SIZEOF_PROCESS_NAME_T 8
BEGIN_C_DECLS
diff --git a/test/dss/dss_size.c b/test/dss/dss_size.c
index c128ec7..54f0919 100644
--- a/test/dss/dss_size.c
+++ b/test/dss/dss_size.c
@@ -9,6 +9,8 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
+ * Copyright (c) 2016 Research Organization for Information Science
+ * and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@@ -42,6 +44,7 @@ static bool test9(void); /* verify composite (multiple types and element
static bool test11(void); /* verify size_t */
static bool test12(void); /* verify pid_t */
static bool test13(void); /* verify pid_t */
+static bool test14(void); /* verify opal_process_name_t */
FILE *test_out;
@@ -150,6 +153,14 @@ int main (int argc, char* argv[])
fprintf(test_out, "opal_dss test13 failed\n");
}
+ fprintf(test_out, "executing test14\n");
+ if (test14()) {
+ fprintf(test_out, "Test14 succeeded\n");
+ }
+ else {
+ fprintf(test_out, "opal_dss test14 failed\n");
+ }
+
fclose(test_out);
opal_finalize();
@@ -624,3 +635,14 @@ static bool test13(void)
return (true);
}
+static bool test14(void)
+{
+ if (sizeof(opal_process_name_t) != OPAL_SIZEOF_PROCESS_NAME) {
+ fprintf(test_out, "opal_process_name_t does not have expected size (%d != %d)\n",
+ sizeof(opal_process_name_t), OPAL_SIZEOF_PROCESS_NAME);
+ return(false);
+ }
+
+ return (true);
+}
+ |
Just to clarify: I have no particular solution I'm advocating, but I do have couple of requirements on the solution itself
HTH |
I just thought of something we can do for 2.x. Since we always create the procs for dynamic procs we could 1) add the jobid to the ompi_group_t structure, and 2) make the sentinel (rank << 1) | 1. That would solve the issue. |
@hjelmn what if we have processes from multiple jobid on the same group? |
This is not possible yet |
@ggouaillardet assuming that your comment was an answer to my question, let me clarify that this is not an option. We should be able to have ompi_group_t with processes from multiple jobid. |
@bosilca yes, that was an answer. |
@bosilca let me rephrase my initial reply. |
So the proposal here is that when we create a group we replace all the "not yet resolved" processes from MPI_COMM_WORLD by their corresponding rank in MPI_COMM_WORLD (which will allow to find them in the global proc array) shifted to the left by one and with the LSB set. This is a nice solution, that should be minimally intrusive on the group operations. But then why do you need to have an jobid associated with the ompi_group_t ? |
@bosilca Good point. Was just throwing an idea out there. Don't really need the job id since it can be gotten from OMPI_PROC_MY_NAME. |
I can throw together a patch that does this. Will do that tomorrow. |
MSB is now automatically cleared when right shifting Thanks George for pointing this
from /* Setup MPI_COMM_WORLD */
OBJ_CONSTRUCT(&ompi_mpi_comm_world, ompi_communicator_t);
assert(ompi_mpi_comm_world.comm.c_f_to_c_index == 0);
group = OBJ_NEW(ompi_group_t);
size = ompi_process_info.num_procs;
group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *));
group->grp_proc_count = size;
for (size_t i = 0 ; i < size ; ++i) {
opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
/* look for existing ompi_proc_t that matches this name */
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name);
if (NULL == group->grp_proc_pointers[i]) {
/* set sentinel value */
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name);
} else {
OBJ_RETAIN (group->grp_proc_pointers[i]);
}
}
OMPI_GROUP_SET_INTRINSIC (group);
OMPI_GROUP_SET_DENSE (group);
ompi_set_group_rank(group, ompi_proc_local());
ompi_mpi_comm_world.comm.c_contextid = 0;
ompi_mpi_comm_world.comm.c_id_start_index = 4;
ompi_mpi_comm_world.comm.c_id_available = 4;
ompi_mpi_comm_world.comm.c_my_rank = group->grp_my_rank;
ompi_mpi_comm_world.comm.c_local_group = group;
ompi_mpi_comm_world.comm.c_remote_group = group;
now, if we here is a patch that does that (and i have to fully test it) commit f26c7fb2258db5ca51c650bdbd9cb86e3bb090da
Author: Gilles Gouaillardet <gilles@rist.or.jp>
Date: Mon Feb 8 10:48:26 2016 +0900
sentinel: fix 32 bits arch
since a sentinel is only made from the current job, only store the
first 31 bits of the vpid into the sentinel.
diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h
index eac16cf..ecb4346 100644
--- a/ompi/proc/proc.h
+++ b/ompi/proc/proc.h
@@ -369,6 +369,7 @@ static inline bool ompi_proc_is_sentinel (ompi_proc_t *proc)
return (intptr_t) proc & 0x1;
}
+#if OPAL_SIZEOF_PROCESS_NAME_T == SIZEOF_VOID_P
/*
* we assume an ompi_proc_t is at least aligned on two bytes,
* so if the LSB of a pointer to an ompi_proc_t is 1, we have to handle
@@ -407,6 +408,27 @@ static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel
name.vpid = vpid;
return name;
}
+#elif 4 == SIZEOF_VOID_P
+/*
+ * currently, a sentinel is only made from the current jobid aka OMPI_PROC_MY_NAME->jobid
+ * so we only store the first 31 bits of the vpid
+ */
+static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name)
+{
+ assert(OMPI_PROC_MY_NAME->jobid == name.jobid);
+ return (uintptr_t)((name.vpid <<1) | 0x1);
+}
+
+static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel)
+{
+ opal_process_name_t name;
+ name.jobid = OMPI_PROC_MY_NAME->jobid;
+ name.vpid = sentinel >> 1;
+ return name;
+}
+#else
+#error unsupported pointer size
+#endif
END_C_DECLS
diff --git a/opal/dss/dss_types.h b/opal/dss/dss_types.h
index 8c1bad9..c261223 100644
--- a/opal/dss/dss_types.h
+++ b/opal/dss/dss_types.h
@@ -14,7 +14,7 @@
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
- * Copyright (c) 2014 Research Organization for Information Science
+ * Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
@@ -47,6 +47,7 @@ typedef struct {
opal_jobid_t jobid;
opal_vpid_t vpid;
} opal_process_name_t;
+#define OPAL_SIZEOF_PROCESS_NAME_T 8
BEGIN_C_DECLS
|
Nice. @ggouaillardet beat me to it. |
converting an opal_process_name_t means the loss of one bit, it was decided to restrict the local job id to 15 bits, so the useful information of an opal_process_name_t can fit in 63 bits.
since a sentinel is only made from the current job, only store the first 31 bits of the vpid into the sentinel.
4c5029c
to
96310f4
Compare
i updated the PR
|
@bosilca yes, that was a reply It is always possible to have a group with processes from different jobids. My point is only pointers to processes in mpi_comm_world have the potential of being replaced with sentinels, hence the optimization @hjelmn suggested Ideally, sentinels could be for processes of any job, but we are not there yet. group = OBJ_NEW(ompi_group_t);
size = ompi_process_info.num_procs;
group->grp_proc_pointers = (ompi_proc_t **) calloc (size, sizeof (ompi_proc_t *));
group->grp_proc_count = size;
for (size_t i = 0 ; i < size ; ++i) {
opal_process_name_t name = {.vpid = i, .jobid = OMPI_PROC_MY_NAME->jobid};
/* look for existing ompi_proc_t that matches this name */
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_lookup (name);
if (NULL == group->grp_proc_pointers[i]) {
/* set sentinel value */
group->grp_proc_pointers[i] = (ompi_proc_t *) ompi_proc_name_to_sentinel (name);
} else {
OBJ_RETAIN (group->grp_proc_pointers[i]);
}
} |
:bot:retest: |
@hjelmn go for it! 👍 |
…conversion Topic/sentinel proc name conversion
Fixes #1350