Skip to content

Commit

Permalink
HDF5: Handle unknown datatypes in datasets (#1469)
Browse files Browse the repository at this point in the history
* HDF5: Throw ReadError at dataset open
* Try parent types when a dataset datatype is unknown
* Update Sample Download Scripts
* Add test for old HDF5-plugin written openPMD dataset from PIConGPU
* Remove debugging comments
  • Loading branch information
franzpoeschel authored Aug 8, 2023
1 parent 8b35d09 commit 3bc460d
Show file tree
Hide file tree
Showing 4 changed files with 257 additions and 73 deletions.
5 changes: 5 additions & 0 deletions share/openPMD/download_samples.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,23 @@ New-item -ItemType directory -Name samples\git-sample\3d-bp4\
Invoke-WebRequest https://github.com/openPMD/openPMD-example-datasets/raw/f3b73e43511db96217a153dc3ab3cb2e8f81f7db/example-3d.tar.gz -OutFile example-3d.tar.gz
Invoke-WebRequest https://github.com/openPMD/openPMD-example-datasets/raw/f3b73e43511db96217a153dc3ab3cb2e8f81f7db/example-thetaMode.tar.gz -OutFile example-thetaMode.tar.gz
Invoke-WebRequest https://github.com/openPMD/openPMD-example-datasets/raw/f3b73e43511db96217a153dc3ab3cb2e8f81f7db/example-3d-bp4.tar.gz -OutFile example-3d-bp4.tar.gz
Invoke-WebRequest https://github.com/openPMD/openPMD-example-datasets/raw/566b356030df38f56049484941baacafef331163/legacy_datasets.tar.gz -OutFile legacy_datasets.tar.gz
7z.exe x -r example-3d.tar.gz
7z.exe x -r example-3d.tar
7z.exe x -r example-thetaMode.tar.gz
7z.exe x -r example-thetaMode.tar
7z.exe x -r example-3d-bp4.tar.gz
7z.exe x -r example-3d-bp4.tar
7z.exe x -r legacy_datasets.tar.gz
7z.exe x -r legacy_datasets.tar
Move-Item -Path example-3d\hdf5\* samples\git-sample\
Move-Item -Path example-thetaMode\hdf5\* samples\git-sample\thetaMode\
Move-Item -Path example-3d-bp4\* samples\git-sample\3d-bp4\
Move-Item -Path legacy_datasets\* samples\git-sample\legacy\
Remove-Item -Recurse -Force example-3d*
Remove-Item -Recurse -Force example-thetaMode*
Remove-Item -Recurse -Force example-3d-bp4*
Remove-Item -Recurse -Force legacy_datasets*

# Ref.: https://github.com/yt-project/yt/pull/1645
New-item -ItemType directory -Name samples\issue-sample\
Expand Down
5 changes: 4 additions & 1 deletion share/openPMD/download_samples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@ mkdir -p samples/git-sample/3d-bp4
curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/f3b73e43511db96217a153dc3ab3cb2e8f81f7db/example-3d.tar.gz
curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/f3b73e43511db96217a153dc3ab3cb2e8f81f7db/example-thetaMode.tar.gz
curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/f3b73e43511db96217a153dc3ab3cb2e8f81f7db/example-3d-bp4.tar.gz
curl -sOL https://github.com/openPMD/openPMD-example-datasets/raw/566b356030df38f56049484941baacafef331163/legacy_datasets.tar.gz
tar -xzf example-3d.tar.gz
tar -xzf example-thetaMode.tar.gz
tar -xzf example-3d-bp4.tar.gz
tar -xzf legacy_datasets.tar.gz
mv example-3d/hdf5/* samples/git-sample/
mv example-thetaMode/hdf5/* samples/git-sample/thetaMode/
mv example-3d-bp4/* samples/git-sample/3d-bp4
mv legacy_datasets/* samples/git-sample/legacy
chmod 777 samples/
rm -rf example-3d.* example-3d example-thetaMode.* example-thetaMode example-3d-bp4 example-3d-bp4.*
rm -rf example-3d.* example-3d example-thetaMode.* example-thetaMode example-3d-bp4 example-3d-bp4.* legacy_datasets legacy_datasets.*

# Ref.: https://github.com/yt-project/yt/pull/1645
mkdir -p samples/issue-sample/
Expand Down
241 changes: 169 additions & 72 deletions src/IO/HDF5/HDF5IOHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -991,10 +991,15 @@ void HDF5IOHandlerImpl::openDataset(

node_id = H5Gopen(
file.id, concrete_h5_file_position(writable->parent).c_str(), gapl);
VERIFY(
node_id >= 0,
"[HDF5] Internal error: Failed to open HDF5 group during dataset "
"opening");
if (node_id < 0)
{
throw error::ReadError(
error::AffectedObject::Dataset,
error::Reason::NotFound,
"HDF5",
"Internal error: Failed to open HDF5 group during dataset "
"opening");
}

/* Sanitize name */
std::string name = parameters.name;
Expand All @@ -1004,10 +1009,15 @@ void HDF5IOHandlerImpl::openDataset(
name += '/';

dataset_id = H5Dopen(node_id, name.c_str(), H5P_DEFAULT);
VERIFY(
dataset_id >= 0,
"[HDF5] Internal error: Failed to open HDF5 dataset during dataset "
"opening");
if (dataset_id < 0)
{
throw error::ReadError(
error::AffectedObject::Dataset,
error::Reason::NotFound,
"HDF5",
"Internal error: Failed to open HDF5 dataset during dataset "
"opening");
}

hid_t dataset_type, dataset_space;
dataset_type = H5Dget_type(dataset_id);
Expand All @@ -1020,52 +1030,114 @@ void HDF5IOHandlerImpl::openDataset(
if (dataset_class == H5S_SIMPLE || dataset_class == H5S_SCALAR ||
dataset_class == H5S_NULL)
{
if (H5Tequal(dataset_type, H5T_NATIVE_UCHAR))
d = DT::UCHAR;
else if (H5Tequal(dataset_type, H5T_NATIVE_SCHAR))
d = DT::SCHAR;
// NOTE: in HDF5, CHAR is actually either UCHAR or SCHAR.
else if (H5Tequal(dataset_type, H5T_NATIVE_CHAR))
d = DT::CHAR;
else if (H5Tequal(dataset_type, H5T_NATIVE_SHORT))
d = DT::SHORT;
else if (H5Tequal(dataset_type, H5T_NATIVE_INT))
d = DT::INT;
else if (H5Tequal(dataset_type, H5T_NATIVE_LONG))
d = DT::LONG;
else if (H5Tequal(dataset_type, H5T_NATIVE_LLONG))
d = DT::LONGLONG;
else if (H5Tequal(dataset_type, H5T_NATIVE_FLOAT))
d = DT::FLOAT;
else if (H5Tequal(dataset_type, H5T_NATIVE_DOUBLE))
d = DT::DOUBLE;
else if (
H5Tequal(dataset_type, H5T_NATIVE_LDOUBLE) ||
H5Tequal(dataset_type, m_H5T_LONG_DOUBLE_80_LE))
d = DT::LONG_DOUBLE;
else if (H5Tequal(dataset_type, m_H5T_CFLOAT))
d = DT::CFLOAT;
else if (H5Tequal(dataset_type, m_H5T_CDOUBLE))
d = DT::CDOUBLE;
else if (
H5Tequal(dataset_type, m_H5T_CLONG_DOUBLE) ||
H5Tequal(dataset_type, m_H5T_CLONG_DOUBLE_80_LE))
d = DT::CLONG_DOUBLE;
else if (H5Tequal(dataset_type, H5T_NATIVE_USHORT))
d = DT::USHORT;
else if (H5Tequal(dataset_type, H5T_NATIVE_UINT))
d = DT::UINT;
else if (H5Tequal(dataset_type, H5T_NATIVE_ULONG))
d = DT::ULONG;
else if (H5Tequal(dataset_type, H5T_NATIVE_ULLONG))
d = DT::ULONGLONG;
else if (H5Tget_class(dataset_type) == H5T_STRING)
d = DT::STRING;
else
throw std::runtime_error("[HDF5] Unknown dataset type");
constexpr size_t max_retries = 10;
/*
* It happens that an HDF5 file has a type that is not equal to any of
* the native types, but can still be read as its parent type.
* For example an enum (which some applications use to emulate bools)
* can still be read as its parent type, a char.
* Upon not matching any native type, don't give up yet, but check the
* parent type.
* Normally, this procedure should stop at the point where
* H5Tget_super() returns H5I_INVALID_HID, but this is putting a bit
* too much trust in an external library to be the loop's exit
* condition. So, we restrict the loop to a maximum of 10 iterations
* before manually canceling it.
*/
size_t remaining_tries = max_retries;
bool repeat = false;
do
{
repeat = false;
if (H5Tequal(dataset_type, H5T_NATIVE_UCHAR))
d = DT::UCHAR;
else if (H5Tequal(dataset_type, H5T_NATIVE_SCHAR))
d = DT::SCHAR;
// NOTE: in HDF5, CHAR is actually either UCHAR or SCHAR.
else if (H5Tequal(dataset_type, H5T_NATIVE_CHAR))
d = DT::CHAR;
else if (H5Tequal(dataset_type, H5T_NATIVE_SHORT))
d = DT::SHORT;
else if (H5Tequal(dataset_type, H5T_NATIVE_INT))
d = DT::INT;
else if (H5Tequal(dataset_type, H5T_NATIVE_LONG))
d = DT::LONG;
else if (H5Tequal(dataset_type, H5T_NATIVE_LLONG))
d = DT::LONGLONG;
else if (H5Tequal(dataset_type, H5T_NATIVE_FLOAT))
d = DT::FLOAT;
else if (H5Tequal(dataset_type, H5T_NATIVE_DOUBLE))
d = DT::DOUBLE;
else if (
H5Tequal(dataset_type, H5T_NATIVE_LDOUBLE) ||
H5Tequal(dataset_type, m_H5T_LONG_DOUBLE_80_LE))
d = DT::LONG_DOUBLE;
else if (H5Tequal(dataset_type, m_H5T_CFLOAT))
d = DT::CFLOAT;
else if (H5Tequal(dataset_type, m_H5T_CDOUBLE))
d = DT::CDOUBLE;
else if (
H5Tequal(dataset_type, m_H5T_CLONG_DOUBLE) ||
H5Tequal(dataset_type, m_H5T_CLONG_DOUBLE_80_LE))
d = DT::CLONG_DOUBLE;
else if (H5Tequal(dataset_type, H5T_NATIVE_USHORT))
d = DT::USHORT;
else if (H5Tequal(dataset_type, H5T_NATIVE_UINT))
d = DT::UINT;
else if (H5Tequal(dataset_type, H5T_NATIVE_ULONG))
d = DT::ULONG;
else if (H5Tequal(dataset_type, H5T_NATIVE_ULLONG))
d = DT::ULONGLONG;
else if (H5Tget_class(dataset_type) == H5T_STRING)
d = DT::STRING;
else
{
auto throw_error = []() {
throw error::ReadError(
error::AffectedObject::Dataset,
error::Reason::UnexpectedContent,
"HDF5",
"Unknown dataset type");
};
if (remaining_tries == 0)
{
throw_error();
}
hid_t next_type = H5Tget_super(dataset_type);
if (next_type == H5I_INVALID_HID)
{
throw_error();
}
else if (H5Tequal(dataset_type, next_type))
{
H5Tclose(next_type);
throw_error();
}
else
{
if (H5Tclose(dataset_type) != 0)
{
throw error::ReadError(
error::AffectedObject::Group,
error::Reason::Other,
"HDF5",
"Internal error: Failed to close HDF5 dataset type "
"during "
"dataset opening");
}
dataset_type = next_type;
--remaining_tries;
repeat = true;
}
}
} while (repeat);
}
else
throw std::runtime_error("[HDF5] Unsupported dataset class");
throw error::ReadError(
error::AffectedObject::Dataset,
error::Reason::UnexpectedContent,
"HDF5",
"Unknown dataset class");

auto dtype = parameters.dtype;
*dtype = d;
Expand All @@ -1083,30 +1155,55 @@ void HDF5IOHandlerImpl::openDataset(

herr_t status;
status = H5Sclose(dataset_space);
VERIFY(
status == 0,
"[HDF5] Internal error: Failed to close HDF5 dataset space during "
"dataset opening");
if (status != 0)
{
throw error::ReadError(
error::AffectedObject::Group,
error::Reason::Other,
"HDF5",
"Internal error: Failed to close HDF5 dataset space during "
"dataset opening");
}
status = H5Tclose(dataset_type);
VERIFY(
status == 0,
"[HDF5] Internal error: Failed to close HDF5 dataset type during "
"dataset opening");
if (status != 0)
{
throw error::ReadError(
error::AffectedObject::Group,
error::Reason::Other,
"HDF5",
"Internal error: Failed to close HDF5 dataset type during "
"dataset opening");
}
status = H5Dclose(dataset_id);
VERIFY(
status == 0,
"[HDF5] Internal error: Failed to close HDF5 dataset during dataset "
"opening");
if (status != 0)
{
throw error::ReadError(
error::AffectedObject::Group,
error::Reason::Other,
"HDF5",
"Internal error: Failed to close HDF5 dataset during dataset "
"opening");
}
status = H5Gclose(node_id);
VERIFY(
status == 0,
"[HDF5] Internal error: Failed to close HDF5 group during dataset "
"opening");
if (status != 0)
{
throw error::ReadError(
error::AffectedObject::Group,
error::Reason::Other,
"HDF5",
"Internal error: Failed to close HDF5 group during dataset "
"opening");
}
status = H5Pclose(gapl);
VERIFY(
status == 0,
"[HDF5] Internal error: Failed to close HDF5 property during dataset "
"opening");
if (status != 0)
{
throw error::ReadError(
error::AffectedObject::Group,
error::Reason::Other,
"HDF5",
"Internal error: Failed to close HDF5 property during dataset "
"opening");
}

writable->written = true;
writable->abstractFilePosition = std::make_shared<HDF5FilePosition>(name);
Expand Down
Loading

0 comments on commit 3bc460d

Please sign in to comment.