Skip to content

Commit

Permalink
Test docs (#65)
Browse files Browse the repository at this point in the history
* fix some doc code
* add tests for markdown docs
  • Loading branch information
cbyrohl authored Aug 4, 2023
1 parent 226bc02 commit e404be8
Show file tree
Hide file tree
Showing 8 changed files with 119 additions and 18 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]


### Added
- test docs

### Fixed
- various bugs related to dask operations using pint quantities.

## [0.2.2] - 2023-07-11

Expand Down
32 changes: 25 additions & 7 deletions docs/derived_fields.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ There are two ways to create new derived fields. For quick analysis, we can simp

``` py
from scida import load
ds = load("somedataset") # (1)!
ds = load("TNG50-4_snapshot") # (1)!
gas = ds.data['gas']
kineticenergy = 0.5*gas['Masses']*gas['Velocities']**2
kineticenergy = 0.5*gas['Masses']*(gas['Velocities']**2).sum(axis=1)
```

1. In this example, we assume a dataset, such as the 'TNG50\_snapshot' test data set, that has its fields (*Masses*, *Velocities*) nested by particle type (*gas*)
Expand All @@ -33,15 +33,16 @@ For this purpose, **field recipes** are available. An example of such recipe is


``` py
import dask.array as da
import numpy as np

from scida import load
ds = load("somedataset")
ds = load("TNG50-4_snapshot")

@snap.register_field("stars") # (1)!
@ds.register_field("stars") # (1)!
def VelMag(arrs, **kwargs):
import dask.array as da
vel = arrs['Velocities']
return np.sqrt( vel[:,0]**2 + vel[:,1]**2 + vel[:,2]**2 )
return da.sqrt(vel[:,0]**2 + vel[:,1]**2 + vel[:,2]**2)
```

1. Here, *stars* is the name of the **field container** the field should be added to. The field will now be available as ds\['stars'\]\['VelMag'\]
Expand Down Expand Up @@ -69,7 +70,24 @@ def Volume(arrs, **kwargs):
return arrs["Masses"]/arrs["Density"]

@fielddefs.register_field("all") # (3)!
def GroupDistance3D(arrs, snap=None):
"""Returns distance to hosting group center. Returns rubbish if not actually associated with a group."""
import dask.array as da
boxsize = snap.header["BoxSize"]
pos_part = arrs["Coordinates"]
groupid = arrs["GroupID"]
if hasattr(groupid, "magnitude"):
groupid = groupid.magnitude
boxsize *= snap.ureg("code_length")
pos_cat = snap.data["Group"]["GroupPos"][groupid]
dist3 = pos_part-pos_cat
dist3 = da.where(dist3>boxsize/2.0, boxsize-dist3, dist3)
dist3 = da.where(dist3<=-boxsize/2.0, boxsize+dist3, dist3) # PBC
return dist3

@fielddefs.register_field("all")
def GroupDistance(arrs, snap=None):
import dask.array as da
dist3 = arrs["GroupDistance3D"]
dist = da.sqrt((dist3**2).sum(axis=1))
dist = da.where(arrs["GroupID"]==-1, np.nan, dist) # set unbound gas to nan
Expand All @@ -83,7 +101,7 @@ def GroupDistance(arrs, snap=None):
Finally, we just need to import the *fielddefs* object (if we have defined it in another file) and merge them with a dataset that we loaded:

``` py
ds = load("snapshot")
ds = load("TNG50-4_snapshot")
ds.data.merge(fielddefs)
```

Expand Down
8 changes: 4 additions & 4 deletions docs/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ Please note that all fields within a container are expected to have the same sha

``` py
from scida import load
import da.array as da
ds = load('simname')
array = da.zeros_like(ds.data["PartType0"]["Density"][:,0])
import dask.array as da
ds = load('TNG50-4_snapshot')
array = da.zeros_like(ds.data["PartType0"]["Density"])
ds.data['PartType0']["zerofield"] = array
```

As we operate with dask, make sure to cast your array accordingly. For example, if your field is just a numpy array or a hdf5 memmap, you can use `da.from_array` to cast it to a dask array.
Alternatively, if you have another dataset loaded, you can assign fields from one to another:

``` py
ds2 = load('simname2')
ds2 = load('TNG50-4_snapshot')
ds.data['PartType0']["NewDensity"] = ds2.data['PartType0']["Density"]
```

Expand Down
2 changes: 1 addition & 1 deletion docs/halocatalogs.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ Currently, we support the usual FOF/Subfind combination and format. Their presen

``` py
from scida import load
ds = load("snapshot") # (1)!
ds = load("TNG50-4_snapshot") # (1)!
```

1. In this example, we assume a dataset, such as the 'TNG50\_snapshot' test data set, that has its fields (*Masses*, *Velocities*) nested by particle type (*gas*)
Expand Down
7 changes: 3 additions & 4 deletions docs/units.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Loading data with units

Loading data sets with
Loading data sets with

``` py
from scida import load
Expand All @@ -16,6 +16,7 @@ Units are introduced via the [pint](https://pint.readthedocs.io/en/stable/) pack


``` pycon
>>> gas = ds.data["PartType0"]
>>> gas["Coordinates"]
dask.array<mul, shape=(18540104, 3), dtype=float64, chunksize=(5592405, 3), chunktype=numpy.ndarray> <Unit('code_length')>
```
Expand All @@ -34,7 +35,7 @@ We can change units for evaluation as desired:

``` pycon
>>> coords = gas["Coordinates"]
>>> coords.to("cm")
>>> coords.to("cm")
>>> # here the default system is cgs, thus we get the same result from
>>> coords.to_base_units()
dask.array<mul, shape=(18540104, 3), dtype=float64, chunksize=(5592405, 3), chunktype=numpy.ndarray> <Unit('centimeter')>
Expand Down Expand Up @@ -76,5 +77,3 @@ dask.array<mul, shape=(18540104,), dtype=float64, chunksize=(18540104,), chunkty
>>> coords.to("halfmeter")[0].compute()
array([6.64064027e+23, 2.23858253e+24, 1.94176712e+24]) <Unit('halfmeter')>
```


2 changes: 1 addition & 1 deletion src/scida/customs/arepo/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def discover_catalog(self):
self.catalog = candidate
break

def register_field(self, parttype: str, name: str = None, construct: bool = True):
def register_field(self, parttype: str, name: str = None, construct: bool = False):
"""
Register a field.
Parameters
Expand Down
2 changes: 1 addition & 1 deletion src/scida/customs/gadgetstyle/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class GadgetStyleSnapshot(Dataset):
def __init__(self, path, chunksize="auto", virtualcache=True, **kwargs) -> None:
"""We define gadget-style snapshots as nbody/hydrodynamical simulation snapshots that follow
the common /PartType0, /PartType1 grouping scheme."""
self.boxsize = np.full(3, np.nan)
self.boxsize = np.nan
super().__init__(path, chunksize=chunksize, virtualcache=virtualcache, **kwargs)

defaultattributes = ["config", "header", "parameters"]
Expand Down
79 changes: 79 additions & 0 deletions tests/test_docs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import pathlib

import pytest

# unfortunately cannot use existing solution, so have to write our own
# (https://github.com/nschloe/pytest-codeblocks/issues/77)

ignore_files = ["largedatasets.md", "visualization.md"]


class DocFile:
def __init__(self, path: pathlib.Path):
self.path = path
self.lines = []
self.codeblocks = []
self.extract_codeblocks()

def extract_codeblocks(self):
with open(self.path, "r") as f:
lines = f.readlines()
# check line for codeblock start/end
lidx = [i for i, k in enumerate(lines) if k.startswith("```")]
print(lidx)
# assert that there are an even number of codeblock start/end
assert len(lidx) % 2 == 0
cblines = []
for i in range(0, len(lidx), 2):
start = lidx[i] # includes codeblock start line
end = lidx[i + 1]
# we check for the type. if its 'pycon', we need to remove the '>>>' prompt
blocktype = lines[start].strip().replace("```", "").strip()
blocktype = blocktype.split()[0].strip()
if blocktype == "py" or blocktype == "python":
cblines.append(lines[start + 1 : end])
elif blocktype == "pycon":
cblines.append(
[k[4:] for k in lines[start + 1 : end] if k.startswith(">>>")]
)
elif blocktype == "bash":
# not python; ignore
pass
else:
raise ValueError("Unknown codeblock type: %s" % blocktype)
self.codeblocks = ["".join(cbl) for cbl in cblines]

def evaluate(self):
# evaluate all at once (for now)
code = "\n".join(self.codeblocks)
print("Evaluating code:")
print(code)
exec(code)


def get_docfiles():
fixturename = "mddocfile"
docfiles = []
paths = []
ids = []

docpath = pathlib.Path(__file__).parent.parent / "docs"
for p in docpath.glob("*.md"):
name = p.name
if name in ignore_files:
continue
print("Evaluating %s" % p)
# read lines
docfile = DocFile(p)
docfiles.append(docfile)
paths.append(p)
ids.append(name)

params = docfiles
return pytest.mark.parametrize(fixturename, params, ids=ids)


@pytest.mark.xfail
@get_docfiles()
def test_docs(mddocfile):
mddocfile.evaluate()

0 comments on commit e404be8

Please sign in to comment.