Skip to content

Commit

Permalink
working on grpah loading of nwb files
Browse files Browse the repository at this point in the history
  • Loading branch information
sneakers-the-rat committed Aug 31, 2024
1 parent b555ccb commit 49585e4
Show file tree
Hide file tree
Showing 8 changed files with 199 additions and 47 deletions.
4 changes: 2 additions & 2 deletions nwb_linkml/pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions nwb_linkml/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies = [
"numpydantic>=1.3.3",
"black>=24.4.2",
"pandas>=2.2.2",
"networkx>=3.3",
]

[project.urls]
Expand Down
83 changes: 82 additions & 1 deletion nwb_linkml/src/nwb_linkml/io/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import json
import os
import re
import shutil
import subprocess
import sys
Expand All @@ -31,11 +32,12 @@
from typing import TYPE_CHECKING, Dict, List, Optional, Union, overload

import h5py
import networkx as nx
import numpy as np
from pydantic import BaseModel
from tqdm import tqdm

from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf
from nwb_linkml.maps.hdf5 import ReadPhases, ReadQueue, flatten_hdf, get_references

if TYPE_CHECKING:
from nwb_linkml.providers.schema import SchemaProvider
Expand All @@ -47,6 +49,85 @@
from typing_extensions import Never


def hdf_dependency_graph(h5f: Path | h5py.File) -> nx.DiGraph:
"""
Directed dependency graph of dataset and group nodes in an NWBFile such that
each node ``n_i`` is connected to node ``n_j`` if
* ``n_j`` is ``n_i``'s child
* ``n_i`` contains a reference to ``n_j``
Resolve references in
* Attributes
* Dataset columns
* Compound dtypes
Args:
h5f (:class:`pathlib.Path` | :class:`h5py.File`): NWB file to graph
Returns:
:class:`networkx.DiGraph`
"""
# detect nodes to skip
skip_pattern = re.compile("^/specifications.*")

if isinstance(h5f, (Path, str)):
h5f = h5py.File(h5f, "r")

g = nx.DiGraph()

def _visit_item(name: str, node: h5py.Dataset | h5py.Group) -> None:
if skip_pattern.match(name):
return
# find references in attributes
refs = get_references(node)
if isinstance(node, h5py.Group):
refs.extend([child.name for child in node.values()])
refs = set(refs)

# add edges
edges = [(node.name, ref) for ref in refs]
g.add_edges_from(edges)

# ensure node added to graph
if len(edges) == 0:
g.add_node(node.name)

# store attrs in node
g.nodes[node.name].update(node.attrs)

# apply to root
_visit_item(h5f.name, h5f)

h5f.visititems(_visit_item)
return g


def filter_dependency_graph(g: nx.DiGraph) -> nx.DiGraph:
"""
Remove nodes from a dependency graph if they
* have no neurodata type AND
* have no outbound edges
OR
* are a VectorIndex (which are handled by the dynamictable mixins)
"""
remove_nodes = []
node: str
for node in g.nodes.keys():
ndtype = g.nodes[node].get("neurodata_type", None)
if ndtype == "VectorData":
remove_nodes.append(node)
elif not ndtype and g.out_degree(node) == 0:
remove_nodes.append(node)

g.remove_nodes_from(remove_nodes)
return g


class HDF5IO:
"""
Read (and eventually write) from an NWB HDF5 file.
Expand Down
2 changes: 1 addition & 1 deletion nwb_linkml/src/nwb_linkml/maps/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ def get_references(obj: h5py.Dataset | h5py.Group) -> List[str]:
# scalar
if isinstance(obj[()], h5py.h5r.Reference):
refs.append(obj[()])
elif isinstance(obj[0], h5py.h5r.Reference):
elif len(obj) > 0 and isinstance(obj[0], h5py.h5r.Reference):
# single-column
refs.extend(obj[:].tolist())
elif len(obj.dtype) > 1:
Expand Down
67 changes: 25 additions & 42 deletions nwb_linkml/tests/data/test_nwb.yaml
Original file line number Diff line number Diff line change
@@ -1,46 +1,28 @@
# manually transcribed target version of nwb-linkml dataset
# matching the one created by fixtures.py:nwb_file
---
id: my_dataset

prefixes:
nwbfile:
- path: "test_nwb.nwb"
- hash: "blake2b:blahblahblahblah"

imports:
core:
as: nwb
version: "2.7.0"
from:
- pypi:
package: nwb-models
---

hdmf-common:
as: hdmf
version: "1.8.0"
from:
- pypi:
package: nwb-models
---
meta:
id: my_dataset

prefixes:
nwbfile:
- path: "test_nwb.nwb"
- hash: "blake2b:blahblahblahblah"

imports:
core:
as: nwb
version: "2.7.0"
from:
- pypi:
package: nwb-models
hdmf-common:
as: hdmf
version: "1.8.0"
from:
- pypi:
package: nwb-models

extracellular_ephys: &ecephys
electrodes:
group:
- @shank{{i}}
- @shank{{i}}
- @shank{{i}}
# could have expression here like { range(3) } => i
# - ... { range(3) } => i
# or blank ... implies use expression from outer scope
- ...
shank{{i}}:
device: @general.devices.array
...: { range(3) } => i

# expands to
extracellular_ephys:
electrodes:
group:
- @shank0
Expand All @@ -54,7 +36,7 @@ extracellular_ephys:
device: @general.devices.array
# etc.

data: !{{ nwb.NWBFile }} <== :nwbfile
data: !nwb.NWBFile
file_create_date: [ 2024-01-01 ]
identifier: "1111-1111-1111-1111"
session_description: All that you touch, you change.
Expand All @@ -63,11 +45,12 @@ data: !{{ nwb.NWBFile }} <== :nwbfile
devices:
- Heka ITC-1600:
- Microscope:
description: My two-photon microscope
manufacturer: The best microscope manufacturer
- array:
description: old reliable
manufacturer: diy
extracellular_ephys: *ecephys

extracellular_ephys: nwbfile:/general/extracellular_ephys
experiment_description: All that you change, changes you.
experimenter: [ "Lauren Oya Olamina" ]
institution: Earthseed Research Institute
Expand Down
76 changes: 76 additions & 0 deletions nwb_linkml/tests/data/test_nwb_condensed_sketch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Sketch of a condensed expression syntax for creation with nwb-linkml
# just a sketch! keeping here for continued work but currentl unused.

Check failure on line 2 in nwb_linkml/tests/data/test_nwb_condensed_sketch.yaml

View workflow job for this annotation

GitHub Actions / Check for spelling errors

currentl ==> currently
---
id: my_dataset

prefixes:
nwbfile:
- path: "test_nwb.nwb"
- hash: "blake2b:blahblahblahblah"

imports:
core:
as: nwb
version: "2.7.0"
from:
- pypi:
package: nwb-models
hdmf-common:
as: hdmf
version: "1.8.0"
from:
- pypi:
package: nwb-models
---

extracellular_ephys: &ecephys
electrodes:
group:
- @shank{{i}}
- @shank{{i}}
- @shank{{i}}
# could have expression here like { range(3) } => i
# - ... { range(3) } => i
# or blank ... implies use expression from outer scope
- ...
shank{{i}}:
device: @general.devices.array
...: { range(3) } => i

# expands to
extracellular_ephys:
electrodes:
group:
- @shank0
- @shank0
- @shank0
- @shank1
- # etc.
shank0:
device: @general.devices.array
shank1:
device: @general.devices.array
# etc.

data: !{{ nwb.NWBFile }} <== :nwbfile
file_create_date: [ 2024-01-01 ]
identifier: "1111-1111-1111-1111"
session_description: All that you touch, you change.
session_start_time: 2024-01-01T01:01:01
general:
devices:
- Heka ITC-1600:
- Microscope:
- array:
description: old reliable
manufacturer: diy
extracellular_ephys: *ecephys

experiment_description: All that you change, changes you.
experimenter: [ "Lauren Oya Olamina" ]
institution: Earthseed Research Institute
keywords:
- behavior
- belief
related_publications: doi:10.1016/j.neuron.2016.12.011

2 changes: 2 additions & 0 deletions nwb_linkml/tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,8 @@ def nwb_file(tmp_output_dir) -> Path:
generator = np.random.default_rng()

nwb_path = tmp_output_dir / "test_nwb.nwb"
if nwb_path.exists():
return nwb_path

nwbfile = NWBFile(
session_description="All that you touch, you change.", # required
Expand Down
11 changes: 10 additions & 1 deletion nwb_linkml/tests/test_io/test_io_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pytest

from nwb_linkml.io.hdf5 import HDF5IO, truncate_file
from nwb_linkml.io.hdf5 import HDF5IO, truncate_file, hdf_dependency_graph, filter_dependency_graph


@pytest.mark.skip()
Expand Down Expand Up @@ -98,3 +98,12 @@ def test_flatten_hdf():
assert not any(["specifications" in v.path for v in flat.values()])
pdb.set_trace()
raise NotImplementedError("Just a stub for local testing for now, finish me!")


def test_dependency_graph(nwb_file):
"""
dependency graph is correctly constructed from an HDF5 file
"""
graph = hdf_dependency_graph(nwb_file)
graph = filter_dependency_graph(graph)
pass

0 comments on commit 49585e4

Please sign in to comment.