Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adaptahop automatic detection properties #160

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
dafe71f
Add pytest_cache to gitignore
cphyc Feb 11, 2021
71bdd08
Add support for adaptahop on its own
cphyc Oct 21, 2021
5654aeb
Return all properties but halo particle identities
cphyc Oct 21, 2021
ca3fb21
Make use of auxiliary_file_patterns
cphyc Oct 21, 2021
a7c1c56
Taking out the ramses handlers into a new file
Nov 25, 2021
b1771ce
Correcting typo in class fefinition
Nov 25, 2021
068e2cd
Minimal working example importing M200 and r200
Nov 26, 2021
25c5cc6
Re-incoporating and testing logic to import every single one property…
Nov 26, 2021
ceac560
Mechanics to exclude and augment adaptahop properties
Nov 26, 2021
83aabf6
First attempts at storing links between parent and subhalo pairs
Nov 26, 2021
c9fcb47
Modify loop to ensure correct counting of all subhalos. This loop is …
Nov 26, 2021
10435e7
Update tangos/input_handlers/ramsesHOP.py
Nov 29, 2021
ebbb28b
remove child computation by default which is very long, and take out …
Nov 29, 2021
77a5d86
Adding try catch to deal with contamination fraction calculation
Nov 29, 2021
1e72d63
Give clear log message when contamination is not present
Nov 29, 2021
82f1cdc
Using physical units when storing into the database
Nov 29, 2021
22845ba
Use newly reformatted AdaptaHOP arrays through pynbody
Nov 29, 2021
a05cd2d
remove old static mapping, correct bug in asking for pos array and ad…
Nov 29, 2021
f8fbe45
Remap the names of properties following latest changes
Nov 30, 2021
c08f6d4
Add logging message for each halo and reincorporate child links by de…
Nov 30, 2021
b81f925
Tranforming wanring into log info
Nov 30, 2021
cfffaa4
Enumerate halos quickly
cphyc Dec 1, 2021
864c8cd
Load the parent->child mapping only once
cphyc Dec 1, 2021
3043c3a
Remove weird log message
cphyc Dec 7, 2021
8191a5e
Move method to property
cphyc Dec 7, 2021
f3b63fb
Move logic to children class
cphyc Dec 7, 2021
06f337e
Harden logic
cphyc Dec 7, 2021
547013e
Attempt to factorize halo matching
cphyc Dec 7, 2021
3c6b1f5
Support linking
cphyc Dec 8, 2021
7a4b462
Remove useless import
cphyc Dec 8, 2021
b04f646
Factorize common code
cphyc Dec 8, 2021
6e4c1e1
Swap order of mixins for inheritance to work properly
cphyc Dec 8, 2021
ea6636e
Physical conversion can be done once and for all at halo catalogue level
cphyc Dec 8, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ build/*
test_tutorial_build/tutorial_*
dist/*
tests/test_dbs/
.DS_store
.DS_store
.pytest_cache/
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
'zope.sqlalchemy',
'hupper',
'six',
'scipy >= 0.14.0'
'scipy >= 0.14.0',
'more_itertools >= 8.0.0',
]

tests_require = [
Expand Down
2 changes: 1 addition & 1 deletion tangos/input_handlers/finding.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def enumerate_timestep_extensions(self):
if self._is_able_to_load(e):
yield e[len(base) + 1:]
else:
logger.info("Could not load %s",e)
logger.info("Could not load %s with class %s", e, self)

def _is_able_to_load(self, fname):
"""Determine whether a named file can be loaded
Expand Down
54 changes: 21 additions & 33 deletions tangos/input_handlers/pynbody.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,21 @@
import os.path
import time
import weakref
import re
import numpy as np
from itertools import chain
from more_itertools import always_iterable
from ..util import proxy_object

pynbody = None # deferred import; occurs when a PynbodyInputHandler is constructed

from . import halo_stat_files, finding
from . import finding
from . import HandlerBase
from .. import config
from ..log import logger
from six.moves import range



_loaded_halocats = {}

class DummyTimeStep(object):
Expand Down Expand Up @@ -194,11 +196,10 @@ def _construct_halo_cat(self, ts_extension, object_typetag):
return h # pynbody.halo.AmigaGrpCatalogue(f)




def match_objects(self, ts1, ts2, halo_min, halo_max,
dm_only=False, threshold=0.005, object_typetag='halo',
output_handler_for_ts2=None):
output_handler_for_ts2=None,
fuzzy_match_kwa={}):
if dm_only:
only_family='dm'
else:
Expand All @@ -218,8 +219,19 @@ def match_objects(self, ts1, ts2, halo_min, halo_max,
if halo_max is None:
halo_max = max(len(h2), len(h1))

return f1.bridge(f2).fuzzy_match_catalog(halo_min, halo_max, threshold=threshold,
only_family=only_family, groups_1=h1, groups_2=h2)
return self.create_bridge(f1, f2).fuzzy_match_catalog(
halo_min,
halo_max,
threshold=threshold,
only_family=only_family,
groups_1=h1,
groups_2=h2,
**fuzzy_match_kwa,
)

@classmethod
def create_bridge(f1, f2):
return f1.bridge(f2)

def enumerate_objects(self, ts_extension, object_typetag="halo", min_halo_particles=config.min_halo_particles):
if self._can_enumerate_objects_from_statfile(ts_extension, object_typetag):
Expand Down Expand Up @@ -249,7 +261,7 @@ def enumerate_objects(self, ts_extension, object_typetag="halo", min_halo_partic
for i in range(istart, len(h)+istart):
try:
hi = h[i]
if len(hi.dm)+len(hi.star)+len(hi.gas) > min_halo_particles:
if len(hi.dm) + len(hi.star) + len(hi.gas) >= min_halo_particles:
yield i, i, len(hi.dm), len(hi.star), len(hi.gas)
except (ValueError, KeyError) as e:
pass
Expand Down Expand Up @@ -311,30 +323,6 @@ def _estimate_mass_resolution_quicker(self, f):
logger.warn(" -- it will certainly be wrong for e.g. zoom simulations")
return estimated_part_mass

class RamsesHOPInputHandler(PynbodyInputHandler):
patterns = ["output_0????"]

def match_objects(self, ts1, ts2, halo_min, halo_max,
dm_only=False, threshold=0.005, object_typetag='halo',
output_handler_for_ts2=None):

f1 = self.load_timestep(ts1).dm
h1 = self._construct_halo_cat(ts1, object_typetag)

if output_handler_for_ts2 is None:
f2 = self.load_timestep(ts2).dm
h2 = self._construct_halo_cat(ts2, object_typetag)
else:
f2 = output_handler_for_ts2.load_timestep(ts2).dm
h2 = output_handler_for_ts2._construct_halo_cat(ts2, object_typetag)

bridge = pynbody.bridge.OrderBridge(f1,f2, monotonic=False)

return bridge.fuzzy_match_catalog(halo_min, halo_max, threshold=threshold,
only_family=pynbody.family.dm, groups_1=h1, groups_2=h2)




class GadgetSubfindInputHandler(PynbodyInputHandler):
patterns = ["snapshot_???"]
Expand Down Expand Up @@ -594,4 +582,4 @@ class ChangaUseIDLInputHandler(ChangaInputHandler):
halo_stat_file_class_name = "AmigaIDLStatFile"
auxiliary_file_patterns = ["*.amiga.grp"]

from . import caterpillar, eagle
from . import caterpillar, eagle, ramsesHOP
217 changes: 217 additions & 0 deletions tangos/input_handlers/ramsesHOP.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
from more_itertools import always_iterable
from ..util import proxy_object
from itertools import chain
from .pynbody import PynbodyInputHandler
import numpy as np
from ..log import logger
from .. import config

class RamsesCatalogueMixin:
def create_bridge(self, f1, f2):
import pynbody
# Ensure that f1.dm and f2.dm are not garbage-collected
self._f1dm = f1.dm
self._f2dm = f2.dm

return pynbody.bridge.OrderBridge(self._f1dm, self._f2dm, monotonic=False)

def match_objects(self, ts1, ts2, halo_min, halo_max, dm_only=True, threshold=0.005,
object_typetag="halo", output_handler_for_ts2=None):
import pynbody
if not dm_only:
logger.warn(
"`match_objects` was called with dm_only=%s, but %s only supports DM-only"
" catalogues at the moment. Falling back to DM-only.", dm_only, self.__class__.__name__
)
dm_only = True

return super().match_objects(
ts1,
ts2,
halo_min,
halo_max,
dm_only=dm_only,
threshold=threshold,
object_typetag=object_typetag,
output_handler_for_ts2=output_handler_for_ts2,
fuzzy_match_kwa={"use_family": pynbody.family.dm}
)

class RamsesHOPInputHandler(RamsesCatalogueMixin, PynbodyInputHandler):
""" Handling Ramses outputs with HOP halo finding (Eisenstein and Hut 1998)"""
patterns = ["output_0????"]
auxiliary_file_patterns = ["grp*.tag"]

def _is_able_to_load(self, ts_extension):
import pynbody
filepath = self._extension_to_filename(ts_extension)
try:
f = pynbody.load(filepath)
if self.quicker:
logger.warn("Pynbody was able to load %r, but because 'quicker' flag is set we won't check whether it can also load the halo files", filepath)
else:
h = pynbody.halo.hop.HOPCatalogue(f)
return True
except (IOError, RuntimeError):
return False



class RamsesAdaptaHOPInputHandler(RamsesCatalogueMixin, PynbodyInputHandler):
""" Handling Ramses outputs with AdaptaHOP halo and subhalo finding """

patterns = ["output_0????"]
auxiliary_file_patterns = ["tree_bricks???"]

_excluded_adaptahop_precalculated_properties = (
"members", "timestep", "level", "host_id", "first_subhalo_id", "next_subhalo_id",
"pos_x", "pos_y", "pos_z", "pos", "vel_x", "vel_y", "vel_z",
"angular_momentum_x", "angular_momentum_y", "angular_momentum_z",
"contaminated", "m_contam", "mtot_contam", "n_contam", "ntot_contam",
)
_included_adaptahop_additional_properties = (
"parent", "child", "shrink_center", "bulk_velocity", "contamination_fraction"
)

def _is_able_to_load(self, ts_extension):
import pynbody
filepath = self._extension_to_filename(ts_extension)
try:
f = pynbody.load(filepath)
if self.quicker:
logger.warn("Pynbody was able to load %r, but because 'quicker' flag is set we won't check whether it can also load the halo files", filepath)
else:
h = f.halos()
return isinstance(h, pynbody.halo.adaptahop.BaseAdaptaHOPCatalogue)
return True
except (IOError, RuntimeError):
return False


@staticmethod
def _compute_contamination_fraction(adaptahop_halo):
# Deal with the potential case that contamination fraction has not been computed through AdaptaHOP
try:
contamination_fraction = float(adaptahop_halo.properties['ntot_contam'] / adaptahop_halo.properties['ntot'])
except KeyError:
logger.warn("Ignoring import of contamination fraction which has not been stored on disk by AdaptaHOP")
contamination_fraction = None
return contamination_fraction

def available_object_property_names_for_timestep(self, ts_extension, object_typetag):
h = self._construct_halo_cat(ts_extension, object_typetag)

halo_attributes = list(h._halo_attributes)
if h._read_contamination:
halo_attributes.extend(h._halo_attributes_contam)

attrs = chain.from_iterable(tuple(always_iterable(attr)) for (attr, _len, _dtype) in halo_attributes)

# Import all precalculated properties except conflicting ones with Tangos syntax
property_list = [attr for attr in attrs if attr not in self._excluded_adaptahop_precalculated_properties]
# Add additional properties that can be derived from adaptahop
property_list += self._included_adaptahop_additional_properties
return property_list

@staticmethod
def _resolve_units(value):
import pynbody
if (pynbody.units.is_unit(value)):
return float(value)
else:
return value

def _get_map_child_subhalos(self, ts_extension):
h = self._construct_halo_cat(ts_extension, 'halo')
halo_children = {}
for halo_i in range(1, len(h)+1): # AdaptaHOP catalogues start at 1
halo_props = h[halo_i].properties

if halo_props['host_id'] != halo_i: # If halo isn't its own host, it is a subhalo
parent = halo_props['host_id']
if parent not in halo_children:
halo_children[parent] = []
halo_children[parent].append(halo_i)
return halo_children

def iterate_object_properties_for_timestep(self, ts_extension, object_typetag, property_names):
h = self._construct_halo_cat(ts_extension, object_typetag)
# For AdaptaHOP handler, we do not need to load the entire snaphshot to enumerate
# properties in the halo catalogue. If pynbody supports this, ask it to do so.
h._index_parent = False
h.physical_units() # make sure all units are physical before storing to database

if "child" in property_names:
# Construct the mapping between parent and subhalos
map_child_parent = self._get_map_child_subhalos(ts_extension)

for halo_i in range(1, len(h)+1): # AdaptaHOP catalogues start at 1

# Tangos expects data to have a finder offset, and a finder id following the stat file logic
# I think these are irrelevant for AdaptaHOP catalogues which are derived directly from pynbody
# Putting the finder ID twice seems to produce consistent results
all_data = [halo_i, halo_i]

adaptahop_halo = h[halo_i]
precalculated_properties = adaptahop_halo.properties

# Loop over all properties we wish to import
for k in property_names:
if k in self._included_adaptahop_additional_properties:
if k == "parent":
data = proxy_object.IncompleteProxyObjectFromFinderId(precalculated_properties['host_id'], 'halo')
elif k == "child":
# Determine whether halo has childs and create halo objects to it
try:
list_of_child = map_child_parent[halo_i]
data = [proxy_object.IncompleteProxyObjectFromFinderId(data_i, 'halo') for data_i in list_of_child]
except KeyError:
data = None
elif k == "shrink_center":
# Avoid naming confusions with already defined PynbodyProperties
data = (adaptahop_halo.properties['pos']).view(np.ndarray)
elif k == "bulk_velocity":
data = (adaptahop_halo.properties['vel']).view(np.ndarray)
elif k == "contamination_fraction":
data = self._compute_contamination_fraction(adaptahop_halo)
else:
raise NotImplementedError(
"Cannot handle property %s for halo catalogue %r" % (
k, self
)
)
elif k in precalculated_properties:
data = precalculated_properties[k]
# Strip the unit as Tangos expects it to be a raw number
data = self._resolve_units(data)
else:
data = None

all_data.append(data)
logger.info("Done with Halo %i" % halo_i)
yield all_data

def enumerate_objects(self, ts_extension, object_typetag="halo", min_halo_particles=config.min_halo_particles):
if self._can_enumerate_objects_from_statfile(ts_extension, object_typetag):
for X in self._enumerate_objects_from_statfile(ts_extension, object_typetag):
yield X
else:
logger.warn("No halo statistics file found for timestep %r", ts_extension)

try:
h = self._construct_halo_cat(ts_extension, object_typetag)
h._index_parent = False
except:
logger.warn("Unable to read %ss using pynbody; assuming step has none", object_typetag)
return

logger.warn(" => enumerating %ss directly using pynbody", object_typetag)
istart = 1

for i in range(istart, len(h)+istart):
try:
hi = h[i]
if hi.properties["npart"] > min_halo_particles:
yield i, i, hi.properties["npart"], 0, 0
except (ValueError, KeyError) as e:
pass