Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revise file close assertion failure fix #3418

Merged
merged 1 commit into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/H5C.c
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ H5C_prep_for_file_close(H5F_t *f)

#ifdef H5_HAVE_PARALLEL
if ((H5F_INTENT(f) & H5F_ACC_RDWR) && !image_generated && cache_ptr->aux_ptr != NULL &&
f->shared->sblock && f->shared->fs_persist) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This just goes back to the previous behavior and tries to close as much of the cache as possible, even if the superblock isn't available.

f->shared->fs_persist) {
/* If persistent free space managers are enabled, flushing the
* metadata cache may result in the deletion, insertion, and/or
* dirtying of entries.
Expand Down
5 changes: 5 additions & 0 deletions src/H5Cimage.c
Original file line number Diff line number Diff line change
Expand Up @@ -654,6 +654,11 @@ H5C__load_cache_image(H5F_t *f)
} /* end if */

done:
if (ret_value < 0) {
if (H5_addr_defined(cache_ptr->image_addr))
cache_ptr->image_buffer = H5MM_xfree(cache_ptr->image_buffer);
}
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Memory leak exposed by these file create failure changes. Free cache image pointer if this function fails.


FUNC_LEAVE_NOAPI(ret_value)
} /* H5C__load_cache_image() */

Expand Down
10 changes: 5 additions & 5 deletions src/H5Fint.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ static herr_t H5F__build_name(const char *prefix, const char *file_name, char **
static char *H5F__getenv_prefix_name(char **env_prefix /*in,out*/);
static H5F_t *H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5FD_t *lf);
static herr_t H5F__check_if_using_file_locks(H5P_genplist_t *fapl, hbool_t *use_file_locking);
static herr_t H5F__dest(H5F_t *f, hbool_t flush);
static herr_t H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure);
static herr_t H5F__build_actual_name(const H5F_t *f, const H5P_genplist_t *fapl, const char *name,
char ** /*out*/ actual_name);
static herr_t H5F__flush_phase1(H5F_t *f);
Expand Down Expand Up @@ -1357,7 +1357,7 @@ H5F__new(H5F_shared_t *shared, unsigned flags, hid_t fcpl_id, hid_t fapl_id, H5F
*-------------------------------------------------------------------------
*/
static herr_t
H5F__dest(H5F_t *f, hbool_t flush)
H5F__dest(H5F_t *f, hbool_t flush, hbool_t free_on_failure)
{
herr_t ret_value = SUCCEED; /* Return value */

Expand Down Expand Up @@ -1624,7 +1624,7 @@ H5F__dest(H5F_t *f, hbool_t flush)
HDONE_ERROR(H5E_FILE, H5E_CANTINIT, FAIL, "problems closing file");
f->shared = NULL;

if (ret_value >= 0)
if ((ret_value >= 0) || free_on_failure)
f = H5FL_FREE(H5F_t, f);

FUNC_LEAVE_NOAPI(ret_value)
Expand Down Expand Up @@ -2121,7 +2121,7 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id)

done:
if ((NULL == ret_value) && file)
if (H5F__dest(file, FALSE) < 0)
if (H5F__dest(file, FALSE, TRUE) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, NULL, "problems closing file");

FUNC_LEAVE_NOAPI(ret_value)
Expand Down Expand Up @@ -2556,7 +2556,7 @@ H5F_try_close(H5F_t *f, hbool_t *was_closed /*out*/)
* shared H5F_shared_t struct. If the reference count for the H5F_shared_t
* struct reaches zero then destroy it also.
*/
if (H5F__dest(f, TRUE) < 0)
if (H5F__dest(f, TRUE, FALSE) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "problems closing file");

/* Since we closed the file, this should be set to TRUE */
Expand Down
20 changes: 17 additions & 3 deletions src/H5Fsuper.c
Original file line number Diff line number Diff line change
Expand Up @@ -1076,8 +1076,9 @@ H5F__super_init(H5F_t *f)
FALSE; /* Whether the driver info block has been inserted into the metadata cache */
H5P_genplist_t *plist; /* File creation property list */
H5AC_ring_t orig_ring = H5AC_RING_INV;
hsize_t userblock_size; /* Size of userblock, in bytes */
hsize_t superblock_size; /* Size of superblock, in bytes */
hsize_t userblock_size; /* Size of userblock, in bytes */
hsize_t superblock_size = 0; /* Size of superblock, in bytes */
haddr_t superblock_addr = HADDR_UNDEF;
size_t driver_size; /* Size of driver info block (bytes) */
unsigned super_vers = HDF5_SUPERBLOCK_VERSION_DEF; /* Superblock version for file */
H5O_loc_t ext_loc; /* Superblock extension object location */
Expand Down Expand Up @@ -1277,7 +1278,7 @@ H5F__super_init(H5F_t *f)
f->shared->sblock = sblock;

/* Allocate space for the superblock */
if (HADDR_UNDEF == H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size))
if (HADDR_UNDEF == (superblock_addr = H5MF_alloc(f, H5FD_MEM_SUPER, superblock_size)))
HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "file allocation failed for superblock");

/* set the drvinfo filed to NULL -- will overwrite this later if needed */
Expand Down Expand Up @@ -1469,6 +1470,19 @@ H5F__super_init(H5F_t *f)

/* Check if the superblock has been allocated yet */
if (sblock) {
if (non_default_fs_settings && H5_addr_defined(superblock_addr)) {
/*
* For non-default free-space settings, the allocation of
* space in the file for the superblock may have have allocated
* memory for the free-space manager and inserted it into the
* metadata cache. Clean that up before returning or we may fail
* to close the file later due to the metadata cache's metadata
* free space manager ring (H5AC_RING_MDFSM) not being clean.
*/
if (H5MF_try_close(f) < 0)
HDONE_ERROR(H5E_FILE, H5E_CANTFREE, FAIL, "can't close file free space manager");
}

/* Check if we've cached it already */
if (sblock_in_cache) {
/* Unpin superblock in cache */
Expand Down
68 changes: 35 additions & 33 deletions src/H5MF.c
Original file line number Diff line number Diff line change
Expand Up @@ -2581,16 +2581,14 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled)
hbool_t fsm_opened[H5F_MEM_PAGE_NTYPES]; /* State of FSM */
hbool_t fsm_visited[H5F_MEM_PAGE_NTYPES]; /* State of FSM */

/* Sanity check */
assert(f->shared->sblock);

/* should only be called if file is opened R/W */
assert(H5F_INTENT(f) & H5F_ACC_RDWR);

/* shouldn't be called unless we have a superblock supporting the
* superblock extension.
*/
assert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2);
if (f->shared->sblock)
assert(f->shared->sblock->super_vers >= HDF5_SUPERBLOCK_VERSION_2);
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this and the below changes, try to close down as much as possible in the raw data FSM, even if the superblock isn't available. A bit dangerous if something else here uses the superblock, but I followed things far down and didn't really see anything else.


/* Initialize fsm_opened and fsm_visited */
memset(fsm_opened, 0, sizeof(fsm_opened));
Expand Down Expand Up @@ -2738,40 +2736,44 @@ H5MF_settle_raw_data_fsm(H5F_t *f, hbool_t *fsm_settled)
* file space manager info message is guaranteed to exist.
* Leave it in for now, but consider removing it.
*/
if (H5_addr_defined(f->shared->sblock->ext_addr))
if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL,
"error in removing message from superblock extension");
if (f->shared->sblock) {
if (H5_addr_defined(f->shared->sblock->ext_addr))
if (H5F__super_ext_remove_msg(f, H5O_FSINFO_ID) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTRELEASE, FAIL,
"error in removing message from superblock extension");
}

/* As the final element in 1), shrink the EOA for the file */
if (H5MF__close_shrink_eoa(f) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_CANTSHRINK, FAIL, "can't shrink eoa");

/* 2) Ensure that space is allocated for the free space manager superblock
* extension message. Must do this now, before reallocating file space
* for free space managers, as it is possible that this allocation may
* grab the last section in a FSM -- making it unnecessary to
* re-allocate file space for it.
*
* Do this by writing a free space manager superblock extension message.
*
* Since no free space manager has file space allocated for it, this
* message must be invalid since we can't save addresses of FSMs when
* those addresses are unknown. This is OK -- we will write the correct
* values to the message at free space manager shutdown.
*/
for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++)
fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF;
fsinfo.strategy = f->shared->fs_strategy;
fsinfo.persist = f->shared->fs_persist;
fsinfo.threshold = f->shared->fs_threshold;
fsinfo.page_size = f->shared->fs_page_size;
fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres;
fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF;

if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL,
"error in writing fsinfo message to superblock extension");
if (f->shared->sblock) {
/* 2) Ensure that space is allocated for the free space manager superblock
* extension message. Must do this now, before reallocating file space
* for free space managers, as it is possible that this allocation may
* grab the last section in a FSM -- making it unnecessary to
* re-allocate file space for it.
*
* Do this by writing a free space manager superblock extension message.
*
* Since no free space manager has file space allocated for it, this
* message must be invalid since we can't save addresses of FSMs when
* those addresses are unknown. This is OK -- we will write the correct
* values to the message at free space manager shutdown.
*/
for (fsm_type = H5F_MEM_PAGE_SUPER; fsm_type < H5F_MEM_PAGE_NTYPES; fsm_type++)
fsinfo.fs_addr[fsm_type - 1] = HADDR_UNDEF;
fsinfo.strategy = f->shared->fs_strategy;
fsinfo.persist = f->shared->fs_persist;
fsinfo.threshold = f->shared->fs_threshold;
fsinfo.page_size = f->shared->fs_page_size;
fsinfo.pgend_meta_thres = f->shared->pgend_meta_thres;
fsinfo.eoa_pre_fsm_fsalloc = HADDR_UNDEF;

if (H5F__super_ext_write_msg(f, H5O_FSINFO_ID, &fsinfo, TRUE, H5O_MSG_FLAG_MARK_IF_UNKNOWN) < 0)
HGOTO_ERROR(H5E_RESOURCE, H5E_WRITEERROR, FAIL,
"error in writing fsinfo message to superblock extension");
}

/* 3) Scan all free space managers not involved in allocating
* space for free space managers. For each such free space
Expand Down