Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve MPI-I/O file hints when fapl is closed #3755

Merged
merged 12 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions src/H5Fint.c
Copy link
Collaborator

@jhendersonHDF jhendersonHDF Oct 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the library calls H5_mpi_info_dup when getting MPI info from a FAPL, I believe this means the library will need to close the MPI_Info struct with H5_mpi_info_free when closing an H5F_shared_t structure now.

Original file line number Diff line number Diff line change
Expand Up @@ -402,18 +402,15 @@ H5F_get_access_plist(H5F_t *f, bool app_ref)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set collective metadata read flag");
if (H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
MPI_Comm mpi_comm;
MPI_Info mpi_info;

/* Retrieve and set MPI communicator */
if (MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(f)))
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI communicator");
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_COMM_NAME, &mpi_comm) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI communicator");

/* Retrieve and set MPI info object */
if (H5P_get(old_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &mpi_info) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, H5I_INVALID_HID, "can't get MPI info object");
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &mpi_info) < 0)
/* Retrieve MPI info object */
if (H5P_set(new_plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTSET, H5I_INVALID_HID, "can't set MPI info object");
}
#endif /* H5_HAVE_PARALLEL */
Expand Down Expand Up @@ -1209,6 +1206,8 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata read flag");
if (H5P_get(plist, H5F_ACS_COLL_MD_WRITE_FLAG_NAME, &(f->shared->coll_md_write)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get collective metadata write flag");
if (H5P_get(plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &(f->shared->mpi_info)) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't set MPI info object");
#endif /* H5_HAVE_PARALLEL */
if (H5P_get(plist, H5F_ACS_META_CACHE_INIT_IMAGE_CONFIG_NAME, &(f->shared->mdc_initCacheImageCfg)) <
0)
Expand Down
1 change: 1 addition & 0 deletions src/H5Fpkg.h
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ struct H5F_shared_t {
#ifdef H5_HAVE_PARALLEL
H5P_coll_md_read_flag_t coll_md_read; /* Do all metadata reads collectively */
bool coll_md_write; /* Do all metadata writes collectively */
MPI_Info mpi_info; /* MPI info */
#endif /* H5_HAVE_PARALLEL */
};

Expand Down
57 changes: 57 additions & 0 deletions testpar/t_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1060,3 +1060,60 @@ test_invalid_libver_bounds_file_close_assert(void)
ret = H5Pclose(fcpl_id);
VRFY((SUCCEED == ret), "H5Pclose");
}

/*
* Verify that MPI I/O hints are preserved after closing the file access property list
* as described in issue #3025
* This is a test program from the user.
*/
void
test_fapl_preserve_hints(void)
{
hid_t fid = H5I_INVALID_HID; /* HDF5 file ID */
hid_t fapl_id = H5I_INVALID_HID; /* File access plist */
const char *filename;
MPI_Info info = MPI_INFO_NULL;
MPI_Info info_used = MPI_INFO_NULL;
herr_t ret; /* Generic return value */
int mpi_ret; /* MPI return value */

filename = (const char *)GetTestParameters();

/* set up MPI parameters */
mpi_ret = MPI_Info_create(&info);
VRFY((mpi_ret >= 0), "MPI_Info_create succeeded");

mpi_ret = MPI_Info_set(info, "hdf_info_fapl", "true");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you be able to test MPI_Info_get() after retrieving the info to make sure it still has the same value?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Other than that, looks good to me. I'm fine with merging it to get it into 1.14 and adding this line to the test after if needed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed on checking to make sure that the hint is still the same after retrieving the prop list

VRFY((mpi_ret == MPI_SUCCESS), "MPI_Info_set succeeded");

fapl_id = H5Pcreate(H5P_FILE_ACCESS);
VRFY((fapl_id != H5I_INVALID_HID), "H5Pcreate");

ret = H5Pset_fapl_mpio(fapl_id, MPI_COMM_WORLD, info);
VRFY((ret >= 0), "H5Pset_fapl_mpio");

fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
VRFY((fid != H5I_INVALID_HID), "H5Fcreate succeeded");

ret = H5Pclose(fapl_id);
VRFY((ret >= 0), "H5Pclose succeeded");

fapl_id = H5Fget_access_plist(fid);
VRFY((fapl_id != H5I_INVALID_HID), "H5Fget_access_plist succeeded");

ret = H5Pget_fapl_mpio(fapl_id, NULL, &info_used);
VRFY((ret >= 0), "H5Pget_fapl_mpio succeeded");

VRFY((info_used != MPI_INFO_NULL), "H5Pget_fapl_mpio");

ret = H5Pclose(fapl_id);
VRFY((ret >= 0), "H5Pclose succeeded");

ret = H5Fclose(fid);
VRFY((ret >= 0), "H5Fclose succeeded");

/* Free the MPI info object */
mpi_ret = MPI_Info_free(&info);
VRFY((mpi_ret >= 0), "MPI_Info_free succeeded");

} /* end test_fapl_preserve_hints() */
3 changes: 3 additions & 0 deletions testpar/testphdf5.c
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,9 @@ main(int argc, char **argv)
AddTest("invlibverassert", test_invalid_libver_bounds_file_close_assert, NULL,
"Invalid libver bounds assertion failure", PARATESTFILE);

AddTest("fapl_preserve", test_fapl_preserve_hints, NULL, "preserve MPI I/O hints after fapl closed",
PARATESTFILE);

AddTest("idsetw", dataset_writeInd, NULL, "dataset independent write", PARATESTFILE);
AddTest("idsetr", dataset_readInd, NULL, "dataset independent read", PARATESTFILE);

Expand Down
1 change: 1 addition & 0 deletions testpar/testphdf5.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ void zero_dim_dset(void);
void test_file_properties(void);
void test_delete(void);
void test_invalid_libver_bounds_file_close_assert(void);
void test_fapl_preserve_hints(void);
void multiple_dset_write(void);
void multiple_group_write(void);
void multiple_group_read(void);
Expand Down