Skip to content

Commit

Permalink
Resizable Datasets (#829)
Browse files Browse the repository at this point in the history
* Frontend support for resizing datasets

* Backend support for resizing datasets

* Test resizable datasets
  • Loading branch information
franzpoeschel authored Mar 23, 2021
1 parent 2f1798f commit fa714a9
Show file tree
Hide file tree
Showing 10 changed files with 426 additions and 57 deletions.
8 changes: 8 additions & 0 deletions include/openPMD/Dataset.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ class Dataset
public:
Dataset(Datatype, Extent, std::string options = "{}");

/**
* @brief Constructor that sets the datatype to undefined.
*
* Helpful for resizing datasets, since datatypes need not be given twice.
*
*/
Dataset( Extent );

Dataset& extend(Extent newExtent);
Dataset& setChunkSize(Extent const&);
Dataset& setCompression(std::string const&, uint8_t const);
Expand Down
4 changes: 1 addition & 3 deletions include/openPMD/IO/IOTask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,7 @@ template<>
struct OPENPMDAPI_EXPORT Parameter< Operation::EXTEND_DATASET > : public AbstractParameter
{
Parameter() = default;
Parameter(Parameter const & p) : AbstractParameter(),
name(p.name), extent(p.extent) {}
Parameter(Parameter const & p) : AbstractParameter(), extent(p.extent) {}

std::unique_ptr< AbstractParameter >
clone() const override
Expand All @@ -291,7 +290,6 @@ struct OPENPMDAPI_EXPORT Parameter< Operation::EXTEND_DATASET > : public Abstrac
new Parameter< Operation::EXTEND_DATASET >(*this));
}

std::string name = "";
Extent extent = {};
};

Expand Down
25 changes: 24 additions & 1 deletion include/openPMD/RecordComponent.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,26 @@ class RecordComponent : public BaseRecordComponent

RecordComponent& setUnitSI(double);

RecordComponent& resetDataset(Dataset);
/**
* @brief Declare the dataset's type and extent.
*
* Calling this again after flushing will require resizing the dataset.
* Support for this depends on the backend.
* Unsupported are:
* * Changing the datatype.
* * Shrinking any dimension's extent.
* * Changing the number of dimensions.
*
* Backend support for resizing datasets:
* * JSON: Supported
* * ADIOS1: Unsupported
* * ADIOS2: Supported as of ADIOS2 2.7.0
* * HDF5: (Currently) unsupported.
* Will be probably supported as soon as chunking is supported in HDF5.
*
* @return RecordComponent&
*/
RecordComponent & resetDataset( Dataset );

uint8_t getDimensionality() const;
Extent getExtent() const;
Expand Down Expand Up @@ -196,6 +215,10 @@ class RecordComponent : public BaseRecordComponent
std::shared_ptr< std::queue< IOTask > > m_chunks;
std::shared_ptr< Attribute > m_constantValue;
std::shared_ptr< bool > m_isEmpty = std::make_shared< bool >( false );
// User has extended the dataset, but the EXTEND task must yet be flushed
// to the backend
std::shared_ptr< bool > m_hasBeenExtended =
std::make_shared< bool >( false );

private:
void flush(std::string const&);
Expand Down
8 changes: 6 additions & 2 deletions src/Dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,12 @@ Dataset::Dataset(Datatype d, Extent e, std::string options_in)
options{std::move(options_in)}
{ }

Dataset&
Dataset::extend(Extent newExtents)
Dataset::Dataset( Extent e ) : Dataset( Datatype::UNDEFINED, std::move( e ) )
{
}

Dataset &
Dataset::extend( Extent newExtents )
{
if( newExtents.size() != rank )
throw std::runtime_error("Dimensionality of extended Dataset must match the original dimensionality");
Expand Down
55 changes: 49 additions & 6 deletions src/IO/ADIOS/ADIOS2IOHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,15 +403,58 @@ void ADIOS2IOHandlerImpl::createDataset(
}
}

void ADIOS2IOHandlerImpl::extendDataset(
Writable *, const Parameter< Operation::EXTEND_DATASET > & )
namespace detail
{
struct DatasetExtender
{
template< typename T, typename... Args >
void
operator()(
adios2::IO & IO,
std::string const & variable,
Extent const & newShape )
{
auto var = IO.InquireVariable< T >( variable );
if( !var )
{
throw std::runtime_error(
"[ADIOS2] Unable to retrieve variable for resizing: '" +
variable + "'." );
}
adios2::Dims dims;
dims.reserve( newShape.size() );
for( auto ext : newShape )
{
dims.push_back( ext );
}
var.SetShape( dims );
}

std::string errorMsg = "ADIOS2: extendDataset()";
};
} // namespace detail

void
ADIOS2IOHandlerImpl::extendDataset(
Writable * writable,
const Parameter< Operation::EXTEND_DATASET > & parameters )
{
throw std::runtime_error(
"[ADIOS2] Dataset extension not implemented in ADIOS backend" );
VERIFY_ALWAYS(
m_handler->m_backendAccess != Access::READ_ONLY,
"[ADIOS2] Cannot extend datasets in read-only mode." );
setAndGetFilePosition( writable );
auto file = refreshFileFromParent( writable );
std::string name = nameOfVariable( writable );
auto & filedata = getFileData( file );
static detail::DatasetExtender de;
Datatype dt = detail::fromADIOS2Type( filedata.m_IO.VariableType( name ) );
switchAdios2VariableType( dt, de, filedata.m_IO, name, parameters.extent );
}

void ADIOS2IOHandlerImpl::openFile(
Writable * writable, const Parameter< Operation::OPEN_FILE > & parameters )
void
ADIOS2IOHandlerImpl::openFile(
Writable * writable,
const Parameter< Operation::OPEN_FILE > & parameters )
{
if ( !auxiliary::directory_exists( m_handler->directory ) )
{
Expand Down
45 changes: 27 additions & 18 deletions src/IO/HDF5/HDF5IOHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,25 +346,36 @@ HDF5IOHandlerImpl::extendDataset(Writable* writable,
if( !writable->written )
throw std::runtime_error("[HDF5] Extending an unwritten Dataset is not possible.");

auto file = getFile(writable->parent).get();
hid_t node_id, dataset_id;
node_id = H5Gopen(file.id,
concrete_h5_file_position(writable->parent).c_str(),
H5P_DEFAULT);
VERIFY(node_id >= 0, "[HDF5] Internal error: Failed to open HDF5 group during dataset extension");

/* Sanitize name */
std::string name = parameters.name;
if( auxiliary::starts_with(name, '/') )
name = auxiliary::replace_first(name, "/", "");
if( !auxiliary::ends_with(name, '/') )
name += '/';

dataset_id = H5Dopen(node_id,
name.c_str(),
auto res = getFile( writable );
if( !res )
res = getFile( writable->parent );
hid_t dataset_id = H5Dopen(res.get().id,
concrete_h5_file_position(writable).c_str(),
H5P_DEFAULT);
VERIFY(dataset_id >= 0, "[HDF5] Internal error: Failed to open HDF5 dataset during dataset extension");

// Datasets may only be extended if they have chunked layout, so let's see
// whether this one does
{
hid_t dataset_space = H5Dget_space( dataset_id );
int ndims = H5Sget_simple_extent_ndims( dataset_space );
VERIFY(
ndims >= 0,
"[HDF5]: Internal error: Failed to retrieve dimensionality of "
"dataset "
"during dataset read." );
hid_t propertyList = H5Dget_create_plist( dataset_id );
std::vector< hsize_t > chunkExtent( ndims, 0 );
int chunkDimensionality =
H5Pget_chunk( propertyList, ndims, chunkExtent.data() );
if( chunkDimensionality < 0 )
{
throw std::runtime_error(
"[HDF5] Cannot extend datasets unless written with chunked "
"layout (currently unsupported)." );
}
}

std::vector< hsize_t > size;
for( auto const& val : parameters.extent )
size.push_back(static_cast< hsize_t >(val));
Expand All @@ -375,8 +386,6 @@ HDF5IOHandlerImpl::extendDataset(Writable* writable,

status = H5Dclose(dataset_id);
VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 dataset during dataset extension");
status = H5Gclose(node_id);
VERIFY(status == 0, "[HDF5] Internal error: Failed to close HDF5 group during dataset extension");
}

void
Expand Down
53 changes: 42 additions & 11 deletions src/IO/JSON/JSONIOHandlerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,18 +248,42 @@ namespace openPMD
}
}

namespace
{
void
mergeInto( nlohmann::json & into, nlohmann::json & from );
void
mergeInto( nlohmann::json & into, nlohmann::json & from )
{
if( !from.is_array() )
{
into = from; // copy
}
else
{
size_t size = from.size();
for( size_t i = 0; i < size; ++i )
{
if( !from[ i ].is_null() )
{
mergeInto( into[ i ], from[ i ] );
}
}
}
}
} // namespace

void JSONIOHandlerImpl::extendDataset(
void
JSONIOHandlerImpl::extendDataset(
Writable * writable,
Parameter< Operation::EXTEND_DATASET > const & parameters
)
Parameter< Operation::EXTEND_DATASET > const & parameters )
{
VERIFY_ALWAYS(m_handler->m_backendAccess != Access::READ_ONLY,
VERIFY_ALWAYS(
m_handler->m_backendAccess != Access::READ_ONLY,
"[JSON] Cannot extend a dataset in read-only mode." )
refreshFileFromParent( writable );
setAndGetFilePosition( writable );
auto name = removeSlashes( parameters.name );
auto & j = obtainJsonContents( writable )[name];
refreshFileFromParent( writable );
auto & j = obtainJsonContents( writable );

try
{
Expand All @@ -280,25 +304,32 @@ namespace openPMD
}
} catch( json::basic_json::type_error & )
{
throw std::runtime_error( "[JSON] The specified location contains no valid dataset" );
throw std::runtime_error(
"[JSON] The specified location contains no valid dataset" );
}
switch( stringToDatatype( j[ "datatype" ].get< std::string >() ) )
{
case Datatype::CFLOAT:
case Datatype::CDOUBLE:
case Datatype::CLONG_DOUBLE:
{
// @todo test complex resizing
auto complexExtent = parameters.extent;
complexExtent.push_back( 2 );
j["data"] = initializeNDArray( complexExtent );
nlohmann::json newData = initializeNDArray( complexExtent );
nlohmann::json & oldData = j[ "data" ];
mergeInto( newData, oldData );
j[ "data" ] = newData;
break;
}
default:
j["data"] = initializeNDArray( parameters.extent );
nlohmann::json newData = initializeNDArray( parameters.extent );
nlohmann::json & oldData = j[ "data" ];
mergeInto( newData, oldData );
j[ "data" ] = newData;
break;
}
writable->written = true;

}

namespace
Expand Down
Loading

0 comments on commit fa714a9

Please sign in to comment.