Skip to content

Commit

Permalink
Clean up Dataspaces (#1104)
Browse files Browse the repository at this point in the history
* Clean up Dataspaces

- define HDF5.UNLIMITED constant for unlimited values
- improve printing of Dataspace objects
- define Dataspace contructors, deprecate methods for dataspace functions

* add to docs

* fix errors

* deprecate some create_dataset methods

* rearrange, fix tests

* more deprecations

* add newline

* create_attribute fixes

* format

* simplify create_dataset dispatch, clean up docs

* make UNLIMITED an Int

* make create_attribute consistent with create_dataset

* rearrange hyperslab tests, support colons

* update docs, fix deprecations in tests

* fix tests

* improve deprecation warnings

* split up docstrings, add more links

* reorder Windows tests to avoid weird bug
  • Loading branch information
simonbyrne authored Dec 23, 2023
1 parent ddc33ec commit 4568054
Show file tree
Hide file tree
Showing 24 changed files with 619 additions and 448 deletions.
12 changes: 6 additions & 6 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,11 @@ useful to incrementally save to very large datasets you don't want to keep in
memory. For example,

```julia
dset = create_dataset(g, "B", datatype(Float64), dataspace(1000,100,10), chunk=(100,100,1))
dset = create_dataset(g, "B", Float64, (1000,100,10), chunk=(100,100,1))
dset[:,1,1] = rand(1000)
```

creates a Float64 dataset in the file or group `g`, with dimensions 1000x100x10, and then
creates a `Float64` dataset in the file or group `g`, with dimensions 1000x100x10, and then
writes to just the first 1000 element slice.
If you know the typical size of subset reasons you'll be reading/writing, it can be beneficial to set the chunk dimensions appropriately.

Expand Down Expand Up @@ -330,7 +330,7 @@ to.
The following fails:

```julia
vec_dset = create_dataset(g, "v", datatype(Float64), dataspace(10_000,1))
vec_dset = create_dataset(g, "v", Float64, (10_000,1))
HDF5.ismmappable(vec_dset) # == true
vec = HDF5.readmmap(vec_dset) # throws ErrorException("Error mmapping array")
```
Expand All @@ -348,7 +348,7 @@ Alternatively, the policy can be set so that the space is allocated immediately
creation of the data set with the `alloc_time` keyword:

```julia
mtx_dset = create_dataset(g, "M", datatype(Float64), dataspace(100, 1000),
mtx_dset = create_dataset(g, "M", Float64, (100, 1000),
alloc_time = HDF5.H5D_ALLOC_TIME_EARLY)
mtx = HDF5.readmmap(mtx_dset) # succeeds immediately
```
Expand Down Expand Up @@ -577,14 +577,14 @@ write_attribute(parent, name, data)
You can use extendible dimensions,

```julia
d = create_dataset(parent, name, dtype, (dims, max_dims), chunk=(chunk_dims))
d = create_dataset(parent, name, dtype, dims; max_dims=max_dims, chunk=(chunk_dims))
HDF5.set_extent_dims(d, new_dims)
```

where dims is a tuple of integers. For example

```julia
b = create_dataset(fid, "b", Int, ((1000,),(-1,)), chunk=(100,)) #-1 is equivalent to typemax(hsize_t)
b = create_dataset(fid, "b", Int, (1000,); max_dims=(HDF5.UNLIMITED,), chunk=(100,)) # HDF5.UNLIMITED is equivalent to typemax(hsize_t)
HDF5.set_extent_dims(b, (10000,))
b[1:10000] = collect(1:10000)
```
Expand Down
2 changes: 2 additions & 0 deletions docs/src/interface/dataspaces.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ CurrentModule = HDF5
```@docs
Dataspace
dataspace
UNLIMITED
isnull
get_extent_dims
set_extent_dims
Expand All @@ -18,4 +19,5 @@ set_extent_dims
BlockRange
select_hyperslab!
get_regular_hyperslab
is_selection_valid
```
2 changes: 1 addition & 1 deletion docs/src/mpi.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ A = fill(myrank, M) # local data
dims = (M, Nproc) # dimensions of global data

# Create dataset
dset = create_dataset(ff, "/data", datatype(eltype(A)), dataspace(dims))
dset = create_dataset(ff, "/data", eltype(A), dims)

# Write local data
dset[:, myrank + 1] = A
Expand Down
1 change: 1 addition & 0 deletions src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ export @read,
create_property,
group_info,
object_info,
Dataspace,
dataspace,
datatype,
Filters,
Expand Down
35 changes: 0 additions & 35 deletions src/api_midlevel.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# This file defines midlevel api wrappers. We include name normalization for methods that are
# applicable to different hdf5 api-layers. We still try to adhere close proximity to the underlying
# method name in the hdf5-library.

"""
HDF5.set_extent_dims(dset::HDF5.Dataset, new_dims::Dims)
Expand All @@ -13,40 +12,6 @@ function set_extent_dims(dset::Dataset, size::Dims)
API.h5d_set_extent(dset, API.hsize_t[reverse(size)...])
end

"""
HDF5.set_extent_dims(dspace::HDF5.Dataspace, new_dims::Dims, max_dims::Union{Dims,Nothing} = nothing)
Change the dimensions of a dataspace `dspace` to `new_dims`, optionally with the maximum possible
dimensions `max_dims` different from the active size `new_dims`. If not given, `max_dims` is set equal
to `new_dims`.
"""
function set_extent_dims(
dspace::Dataspace, size::Dims, max_dims::Union{Dims,Nothing}=nothing
)
checkvalid(dspace)
rank = length(size)
current_size = API.hsize_t[reverse(size)...]
maximum_size = isnothing(max_dims) ? C_NULL : [reverse(max_dims .% API.hsize_t)...]
API.h5s_set_extent_simple(dspace, rank, current_size, maximum_size)
return nothing
end

"""
HDF5.get_extent_dims(obj::Union{HDF5.Dataspace, HDF5.Dataset, HDF5.Attribute}) -> dims, maxdims
Get the array dimensions from a dataspace, dataset, or attribute and return a tuple of `dims` and `maxdims`.
"""
function get_extent_dims(obj::Union{Dataspace,Dataset,Attribute})
dspace = obj isa Dataspace ? checkvalid(obj) : dataspace(obj)
h5_dims, h5_maxdims = API.h5s_get_simple_extent_dims(dspace)
# reverse dimensions since hdf5 uses C-style order
N = length(h5_dims)
dims = ntuple(i -> @inbounds(Int(h5_dims[N - i + 1])), N)
maxdims = ntuple(i -> @inbounds(h5_maxdims[N - i + 1]) % Int, N) # allows max_dims to be specified as -1 without triggering an overflow
obj isa Dataspace || close(dspace)
return dims, maxdims
end

"""
HDF5.get_chunk_offset(dataset_id, index)
Expand Down
54 changes: 41 additions & 13 deletions src/attributes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,49 @@ open_attribute(
) = Attribute(API.h5a_open(checkvalid(parent), name, aapl), file(parent))

"""
create_attribute(parent::Union{File,Object}, name::AbstractString, dtype::Datatype, space::Dataspace)
create_attribute(parent::Union{File,Object}, name::AbstractString, data)
create_attribute(
parent::Union{File,Object},
name::AbstractString,
dtype::Union{Datatype, Type},
dspace::Union{Dataspace, Dims, Nothing}
)
Create a new [`Attribute`](@ref) object named `name` on the object `parent`,
either by specifying the `Datatype` and `Dataspace` of the attribute, or by
providing the data. Note that no data will be written: use
[`write_attribute`](@ref) to write the data.
with the corresponding [`Datatype`](@ref) and [`Dataspace`](@ref).
"""
function create_attribute(
parent::Union{File,Object}, name::AbstractString, dtype::Datatype, dspace::Dataspace
)
attrid = API.h5a_create(
checkvalid(parent), name, dtype, dspace, _attr_properties(name), API.H5P_DEFAULT
)
return Attribute(attrid, file(parent))
end
create_attribute(
parent::Union{File,Object},
name::AbstractString,
dtype::Datatype,
dspace::Union{Dims,Nothing}
) = create_attribute(parent, name, dtype, Dataspace(dspace))
create_attribute(
parent::Union{File,Object},
name::AbstractString,
dtype::Type,
dspace::Union{Dataspace,Dims,Nothing}
) = create_attribute(parent, name, datatype(dtype), dspace)

"""
create_attribute(
parent::Union{File,Object},
name::AbstractString,
data
) -> Attribute, Datatype
Create a new [`Attribute`](@ref) object named `name` on the object `parent` for
the object `data`, returning both the `Attribute` and the [`Datatype`](@ref).
Note that no data will be written: use [`write_attribute`](@ref) to write the
data.
"""
function create_attribute(parent::Union{File,Object}, name::AbstractString, data; pv...)
dtype = datatype(data)
Expand All @@ -88,14 +124,6 @@ function create_attribute(parent::Union{File,Object}, name::AbstractString, data
end
return obj, dtype
end
function create_attribute(
parent::Union{File,Object}, name::AbstractString, dtype::Datatype, dspace::Dataspace
)
attrid = API.h5a_create(
checkvalid(parent), name, dtype, dspace, _attr_properties(name), API.H5P_DEFAULT
)
return Attribute(attrid, file(parent))
end

# generic method
function write_attribute(attr::Attribute, memtype::Datatype, x::T) where {T}
Expand Down
90 changes: 33 additions & 57 deletions src/datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,37 @@ end

# Setting dset creation properties with name/value pairs
"""
create_dataset(parent, path, datatype, dataspace; properties...)
create_dataset(
parent::Union{File, Group},
path::Union{AbstractString, Nothing},
datatype::Union{Datatype, Type},
dataspace::Union{Dataspace, Dims, Nothing};
properties...)
# Arguments
* `parent` - `File` or `Group`
* `path` - `String` describing the path of the dataset within the HDF5 file or
`nothing` to create an anonymous dataset
* `datatype` - `Datatype` or `Type` or the dataset
* `dataspace` - `Dataspace` or `Dims` of the dataset
* `parent`: parent file `File` or `Group`.
* `path`: `String` describing the path of the dataset within the HDF5 file, or
`nothing` to create an anonymous dataset
* `datatype` - [`Datatype`](@ref) or `Type` or the dataset
* `dataspace` - [`Dataspace`](@ref) or `Dims` of the dataset. If `nothing`, then
it will create a null (empty) dataset.
* `properties` - keyword name-value pairs set properties of the dataset
# Keywords
There are many keyword properties that can be set. Below are a few select keywords.
There are many keyword properties that can be set. Below are a few select
keywords.
* `max_dims` - `Dims` describing the maximum size of the dataset. Required for
resizable datasets. Unlimited dimensions are denoted by [`HDF5.UNLIMITED`](@ref).
* `chunk` - `Dims` describing the size of a chunk. Needed to apply filters.
* `filters` - `AbstractVector{<: Filters.Filter}` describing the order of the filters to apply to the data. See [`Filters`](@ref)
* `external` - `Tuple{AbstractString, Intger, Integer}` `(filepath, offset, filesize)` External dataset file location, data offset, and file size. See [`API.h5p_set_external`](@ref).
Additionally, the initial create, transfer, and access properties can be provided as a keyword:
* `filters` - `AbstractVector{<: Filters.Filter}` describing the order of the
filters to apply to the data. See [`Filters`](@ref)
* `external` - `Tuple{AbstractString, Intger, Integer}` `(filepath, offset,
filesize)` External dataset file location, data offset, and file size. See
[`API.h5p_set_external`](@ref).
Additionally, the initial create, transfer, and access properties can be
provided as a keyword:
* `dcpl` - [`DatasetCreateProperties`](@ref)
* `dxpl` - [`DatasetTransferProperties`](@ref)
* `dapl` - [`DatasetAccessProperties`](@ref)
Expand All @@ -69,64 +82,27 @@ function create_dataset(
pv = setproperties!(dcpl, dxpl, dapl; pv...)
isempty(pv) || error("invalid keyword options")
if isnothing(path)
ds = API.h5d_create_anon(parent, dtype, dspace, dcpl, dapl)
ds = API.h5d_create_anon(checkvalid(parent), dtype, dspace, dcpl, dapl)
else
ds = API.h5d_create(parent, path, dtype, dspace, _link_properties(path), dcpl, dapl)
ds = API.h5d_create(
checkvalid(parent), path, dtype, dspace, _link_properties(path), dcpl, dapl
)
end
Dataset(ds, file(parent), dxpl)
end
create_dataset(
parent::Union{File,Group},
path::Union{AbstractString,Nothing},
dtype::Datatype,
dspace_dims::Dims;
pv...
) = create_dataset(checkvalid(parent), path, dtype, dataspace(dspace_dims); pv...)
create_dataset(
parent::Union{File,Group},
path::Union{AbstractString,Nothing},
dtype::Datatype,
dspace_dims::Tuple{Dims,Dims};
pv...
) = create_dataset(
checkvalid(parent),
path,
dtype,
dataspace(dspace_dims[1]; max_dims=dspace_dims[2]);
pv...
)
create_dataset(
parent::Union{File,Group},
path::Union{AbstractString,Nothing},
dtype::Type,
dspace_dims::Tuple{Dims,Dims};
pv...
) = create_dataset(
checkvalid(parent),
path,
datatype(dtype),
dataspace(dspace_dims[1]; max_dims=dspace_dims[2]);
pv...
)
create_dataset(
parent::Union{File,Group},
path::Union{AbstractString,Nothing},
dtype::Type,
dspace_dims::Dims;
dspace_dims::Union{Dims,Nothing};
max_dims=nothing,
pv...
) = create_dataset(checkvalid(parent), path, datatype(dtype), dataspace(dspace_dims); pv...)
) = create_dataset(parent, path, dtype, Dataspace(dspace_dims; max_dims); pv...)
create_dataset(
parent::Union{File,Group},
path::Union{AbstractString,Nothing},
dtype::Type,
dspace_dims::Int...;
pv...
) = create_dataset(checkvalid(parent), path, datatype(dtype), dataspace(dspace_dims); pv...)
create_dataset(
parent::Union{File,Group},
path::Union{AbstractString,Nothing},
dtype::Type,
dspace::Dataspace;
dspace::Union{Dataspace,Dims,Nothing};
pv...
) = create_dataset(checkvalid(parent), path, datatype(dtype), dspace; pv...)

Expand Down Expand Up @@ -459,7 +435,7 @@ function create_external_dataset(
sz::Dims,
offset::Integer=0
)
create_external_dataset(parent, name, filepath, datatype(t), dataspace(sz), offset)
create_external_dataset(parent, name, filepath, datatype(t), Dataspace(sz), offset)
end
function create_external_dataset(
parent::Union{File,Group},
Expand Down
Loading

0 comments on commit 4568054

Please sign in to comment.