Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Resizable Datasets #829

Merged
merged 3 commits into from
Mar 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions include/openPMD/Dataset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ class Dataset
public:
Dataset(Datatype, Extent, std::string options = "{}");

/**
* @brief Constructor that sets the datatype to undefined.
*
* Helpful for resizing datasets, since datatypes need not be given twice.
*
*/
Dataset( Extent );

Dataset& extend(Extent newExtent);
Dataset& setChunkSize(Extent const&);
Dataset& setCompression(std::string const&, uint8_t const);
Expand Down
4 changes: 1 addition & 3 deletions include/openPMD/IO/IOTask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,7 @@ template<>
struct OPENPMDAPI_EXPORT Parameter< Operation::EXTEND_DATASET > : public AbstractParameter
{
Parameter() = default;
Parameter(Parameter const & p) : AbstractParameter(),
name(p.name), extent(p.extent) {}
Parameter(Parameter const & p) : AbstractParameter(), extent(p.extent) {}

std::unique_ptr< AbstractParameter >
clone() const override
Expand All @@ -291,7 +290,6 @@ struct OPENPMDAPI_EXPORT Parameter< Operation::EXTEND_DATASET > : public Abstrac
new Parameter< Operation::EXTEND_DATASET >(*this));
}

std::string name = "";
Extent extent = {};
};

Expand Down
25 changes: 24 additions & 1 deletion include/openPMD/RecordComponent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,26 @@ class RecordComponent : public BaseRecordComponent

RecordComponent& setUnitSI(double);

RecordComponent& resetDataset(Dataset);
/**
* @brief Declare the dataset's type and extent.
*
* Calling this again after flushing will require resizing the dataset.
* Support for this depends on the backend.
* Unsupported are:
* * Changing the datatype.
* * Shrinking any dimension's extent.
* * Changing the number of dimensions.
*
* Backend support for resizing datasets:
* * JSON: Supported
* * ADIOS1: Unsupported
* * ADIOS2: Supported as of ADIOS2 2.7.0
* * HDF5: (Currently) unsupported.
* Will be probably supported as soon as chunking is supported in HDF5.
*
* @return RecordComponent&
*/
RecordComponent & resetDataset( Dataset );

uint8_t getDimensionality() const;
Extent getExtent() const;
Expand Down Expand Up @@ -196,6 +215,10 @@ class RecordComponent : public BaseRecordComponent
std::shared_ptr< std::queue< IOTask > > m_chunks;
std::shared_ptr< Attribute > m_constantValue;
std::shared_ptr< bool > m_isEmpty = std::make_shared< bool >( false );
// User has extended the dataset, but the EXTEND task must yet be flushed
// to the backend
std::shared_ptr< bool > m_hasBeenExtended =
std::make_shared< bool >( false );

private:
void flush(std::string const&);
Expand Down
8 changes: 6 additions & 2 deletions src/Dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ Dataset::Dataset(Datatype d, Extent e, std::string options_in)
options{std::move(options_in)}
{ }

Dataset&
Dataset::extend(Extent newExtents)
Dataset::Dataset( Extent e ) : Dataset( Datatype::UNDEFINED, std::move( e ) )
{
}

Dataset &
Dataset::extend( Extent newExtents )
{
if( newExtents.size() != rank )
throw std::runtime_error("Dimensionality of extended Dataset must match the original dimensionality");
Expand Down
55 changes: 49 additions & 6 deletions src/IO/ADIOS/ADIOS2IOHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,15 +403,58 @@ void ADIOS2IOHandlerImpl::createDataset(
}
}

void ADIOS2IOHandlerImpl::extendDataset(
Writable *, const Parameter< Operation::EXTEND_DATASET > & )
namespace detail
{
struct DatasetExtender
{
template< typename T, typename... Args >
void
operator()(
adios2::IO & IO,
std::string const & variable,
Extent const & newShape )
{
auto var = IO.InquireVariable< T >( variable );
if( !var )
{
throw std::runtime_error(
"[ADIOS2] Unable to retrieve variable for resizing: '" +
variable + "'." );
}
adios2::Dims dims;
dims.reserve( newShape.size() );
for( auto ext : newShape )
{
dims.push_back( ext );
}
var.SetShape( dims );
}

std::string errorMsg = "ADIOS2: extendDataset()";
};
} // namespace detail

void
ADIOS2IOHandlerImpl::extendDataset(
Writable * writable,
const Parameter< Operation::EXTEND_DATASET > & parameters )
{
throw std::runtime_error(
"[ADIOS2] Dataset extension not implemented in ADIOS backend" );
VERIFY_ALWAYS(
m_handler->m_backendAccess != Access::READ_ONLY,
"[ADIOS2] Cannot extend datasets in read-only mode." );
setAndGetFilePosition( writable );
auto file = refreshFileFromParent( writable );
std::string name = nameOfVariable( writable );
auto & filedata = getFileData( file );
static detail::DatasetExtender de;
Datatype dt = detail::fromADIOS2Type( filedata.m_IO.VariableType( name ) );
switchAdios2VariableType( dt, de, filedata.m_IO, name, parameters.extent );
}

void ADIOS2IOHandlerImpl::openFile(
Writable * writable, const Parameter< Operation::OPEN_FILE > & parameters )
void
ADIOS2IOHandlerImpl::openFile(
Writable * writable,
const Parameter< Operation::OPEN_FILE > & parameters )
{
if ( !auxiliary::directory_exists( m_handler->directory ) )
{
Expand Down
45 changes: 27 additions & 18 deletions src/IO/HDF5/HDF5IOHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,25 +346,36 @@ HDF5IOHandlerImpl::extendDataset(Writable* writable,
if( !writable->written )
throw std::runtime_error("[HDF5] Extending an unwritten Dataset is not possible.");

auto file = getFile(writable->parent).get();
hid_t node_id, dataset_id;
node_id = H5Gopen(file.id,
concrete_h5_file_position(writable->parent).c_str(),
H5P_DEFAULT);
VERIFY(node_id >= 0, "[HDF5] Internal error: Failed to open HDF5 group during dataset extension");

/* Sanitize name */
std::string name = parameters.name;
if( auxiliary::starts_with(name, '/') )
name = auxiliary::replace_first(name, "/", "");
if( !auxiliary::ends_with(name, '/') )
name += '/';

dataset_id = H5Dopen(node_id,
name.c_str(),
auto res = getFile( writable );
if( !res )
res = getFile( writable->parent );
hid_t dataset_id = H5Dopen(res.get().id,
concrete_h5_file_position(writable).c_str(),
H5P_DEFAULT);
VERIFY(dataset_id >= 0, "[HDF5] Internal error: Failed to open HDF5 dataset during dataset extension");

// Datasets may only be extended if they have chunked layout, so let's see
// whether this one does
{
hid_t dataset_space = H5Dget_space( dataset_id );
int ndims = H5Sget_simple_extent_ndims( dataset_space );
VERIFY(
ndims >= 0,
"[HDF5]: Internal error: Failed to retrieve dimensionality of "
"dataset "
"during dataset read." );
hid_t propertyList = H5Dget_create_plist( dataset_id );
std::vector< hsize_t > chunkExtent( ndims, 0 );
int chunkDimensionality =
H5Pget_chunk( propertyList, ndims, chunkExtent.data() );
if( chunkDimensionality < 0 )
{
throw std::runtime_error(
"[HDF5] Cannot extend datasets unless written with chunked "
"layout (currently unsupported)." );
}
}

std::vector< hsize_t > size;
for( auto const& val : parameters.extent )
size.push_back(static_cast< hsize_t >(val));
Expand All @@ -375,8 +386,6 @@ HDF5IOHandlerImpl::extendDataset(Writable* writable,

status = H5Dclose(dataset_id);
VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 dataset during dataset extension");
status = H5Gclose(node_id);
VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 group during dataset extension");
}

void
Expand Down
53 changes: 42 additions & 11 deletions src/IO/JSON/JSONIOHandlerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,18 +248,42 @@ namespace openPMD
}
}

namespace
{
void
mergeInto( nlohmann::json & into, nlohmann::json & from );
void
mergeInto( nlohmann::json & into, nlohmann::json & from )
{
if( !from.is_array() )
{
into = from; // copy
}
else
{
size_t size = from.size();
for( size_t i = 0; i < size; ++i )
{
if( !from[ i ].is_null() )
{
mergeInto( into[ i ], from[ i ] );
}
}
}
}
} // namespace

void JSONIOHandlerImpl::extendDataset(
void
JSONIOHandlerImpl::extendDataset(
Writable * writable,
Parameter< Operation::EXTEND_DATASET > const & parameters
)
Parameter< Operation::EXTEND_DATASET > const & parameters )
{
VERIFY_ALWAYS(m_handler->m_backendAccess != Access::READ_ONLY,
VERIFY_ALWAYS(
m_handler->m_backendAccess != Access::READ_ONLY,
"[JSON] Cannot extend a dataset in read-only mode." )
refreshFileFromParent( writable );
setAndGetFilePosition( writable );
auto name = removeSlashes( parameters.name );
auto & j = obtainJsonContents( writable )[name];
refreshFileFromParent( writable );
auto & j = obtainJsonContents( writable );

try
{
Expand All @@ -280,25 +304,32 @@ namespace openPMD
}
} catch( json::basic_json::type_error & )
{
throw std::runtime_error( "[JSON] The specified location contains no valid dataset" );
throw std::runtime_error(
"[JSON] The specified location contains no valid dataset" );
}
switch( stringToDatatype( j[ "datatype" ].get< std::string >() ) )
{
case Datatype::CFLOAT:
case Datatype::CDOUBLE:
case Datatype::CLONG_DOUBLE:
{
// @todo test complex resizing
auto complexExtent = parameters.extent;
complexExtent.push_back( 2 );
j["data"] = initializeNDArray( complexExtent );
nlohmann::json newData = initializeNDArray( complexExtent );
nlohmann::json & oldData = j[ "data" ];
mergeInto( newData, oldData );
j[ "data" ] = newData;
break;
}
default:
j["data"] = initializeNDArray( parameters.extent );
nlohmann::json newData = initializeNDArray( parameters.extent );
nlohmann::json & oldData = j[ "data" ];
mergeInto( newData, oldData );
j[ "data" ] = newData;
break;
}
writable->written = true;

}

namespace
Expand Down
Loading