Skip to content

Commit

Permalink
Merge pull request #241 from fnattino/fix/generate_subcatalogs
Browse files Browse the repository at this point in the history
Fix unexpected behaviour of `generate_subcatalogs`
  • Loading branch information
lossyrob authored Dec 9, 2020
2 parents c3685d4 + 3ab4095 commit 0630c4f
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 5 deletions.
24 changes: 19 additions & 5 deletions pystac/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ def fn():

return self

def generate_subcatalogs(self, template, defaults=None, **kwargs):
def generate_subcatalogs(self, template, defaults=None, parent_ids=None, **kwargs):
"""Walks through the catalog and generates subcatalogs
for items based on the template string. See :class:`~pystac.layout.LayoutTemplate`
for details on the construction of template strings. This template string
Expand All @@ -533,29 +533,43 @@ def generate_subcatalogs(self, template, defaults=None, **kwargs):
defaults (dict): Default values for the template variables
that will be used if the property cannot be found on
the item.
parent_ids (List[str]): Optional list of the parent catalogs'
identifiers. If the bottom-most subcatalags already match the
template, no subcatalog is added.
Returns:
[catalog]: List of new catalogs created
"""
result = []
parent_ids = parent_ids or list()
parent_ids.append(self.id)
for child in self.get_children():
result.extend(child.generate_subcatalogs(template, defaults=defaults))
result.extend(
child.generate_subcatalogs(template,
defaults=defaults,
parent_ids=parent_ids.copy()))

layout_template = LayoutTemplate(template, defaults=defaults)
subcat_id_to_cat = {}

items = list(self.get_items())
for item in items:
item_parts = layout_template.get_template_values(item)
id_iter = reversed(parent_ids)
if all(['{}'.format(id) == next(id_iter, None)
for id in reversed(item_parts.values())]):
# Skip items for which the sub-catalog structure already
# matches the template. The list of parent IDs can include more
# elements on the root side, so compare the reversed sequences.
continue
curr_parent = self
for k, v in item_parts.items():
subcat_id = '{}'.format(v)
subcat = subcat_id_to_cat.get(subcat_id)
subcat = curr_parent.get_child(subcat_id)
if subcat is None:
subcat_desc = 'Catalog of items from {} with {} of {}'.format(
curr_parent.id, k, v)
subcat = pystac.Catalog(id=subcat_id, description=subcat_desc)
curr_parent.add_child(subcat)
subcat_id_to_cat[subcat_id] = subcat
result.append(subcat)
curr_parent = subcat
self.remove_item(item.id)
Expand Down
85 changes: 85 additions & 0 deletions tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,91 @@ def test_generate_subcatalogs_does_not_change_item_count(self):
expected = item_counts[child.id]
self.assertEqual(actual, expected, msg=" for child '{}'".format(child.id))

def test_generate_subcatalogs_can_be_applied_multiple_times(self):
catalog = TestCases.test_case_8()

_ = catalog.generate_subcatalogs('${year}/${month}')
catalog.normalize_hrefs('/tmp')
expected_hrefs = {item.id: item.get_self_href() for item in catalog.get_all_items()}

result = catalog.generate_subcatalogs('${year}/${month}')
self.assertEqual(len(result), 0)
catalog.normalize_hrefs('/tmp')
for item in catalog.get_all_items():
self.assertEqual(item.get_self_href(),
expected_hrefs[item.id],
msg=" for item '{}'".format(item.id))

def test_generate_subcatalogs_works_after_adding_more_items(self):
catalog = Catalog(id='test', description='Test')
properties = dict(property1='A', property2=1)
catalog.add_item(
Item(id='item1',
geometry=RANDOM_GEOM,
bbox=RANDOM_BBOX,
datetime=datetime.utcnow(),
properties=properties))
catalog.generate_subcatalogs('${property1}/${property2}')
catalog.add_item(
Item(id='item2',
geometry=RANDOM_GEOM,
bbox=RANDOM_BBOX,
datetime=datetime.utcnow(),
properties=properties))
catalog.generate_subcatalogs('${property1}/${property2}')

catalog.normalize_hrefs('/tmp')
item1_parent = catalog.get_item('item1', recursive=True).get_parent()
item2_parent = catalog.get_item('item2', recursive=True).get_parent()
self.assertEqual(item1_parent.get_self_href(), item2_parent.get_self_href())

def test_generate_subcatalogs_works_for_branched_subcatalogs(self):
catalog = Catalog(id='test', description='Test')
item_properties = [
dict(property1='A', property2=1, property3='i'), # add 3 subcats
dict(property1='A', property2=1, property3='j'), # add 1 more
dict(property1='A', property2=2, property3='i'), # add 2 more
dict(property1='B', property2=1, property3='i'), # add 3 more
]
for ni, properties in enumerate(item_properties):
catalog.add_item(
Item(id='item{}'.format(ni),
geometry=RANDOM_GEOM,
bbox=RANDOM_BBOX,
datetime=datetime.utcnow(),
properties=properties))
result = catalog.generate_subcatalogs('${property1}/${property2}/${property3}')
self.assertEqual(len(result), 9)

actual_subcats = set([cat.id for cat in result])
expected_subcats = {'A', 'B', '1', '2', 'i', 'j'}
self.assertSetEqual(actual_subcats, expected_subcats)

def test_generate_subcatalogs_works_for_subcatalogs_with_same_ids(self):
catalog = Catalog(id='test', description='Test')
item_properties = [
dict(property1=1, property2=1), # add 2 subcats
dict(property1=1, property2=2), # add 1 more
dict(property1=2, property2=1), # add 2 more
dict(property1=2, property2=2), # add 1 more
]
for ni, properties in enumerate(item_properties):
catalog.add_item(
Item(id='item{}'.format(ni),
geometry=RANDOM_GEOM,
bbox=RANDOM_BBOX,
datetime=datetime.utcnow(),
properties=properties))
result = catalog.generate_subcatalogs('${property1}/${property2}')
self.assertEqual(len(result), 6)

catalog.normalize_hrefs('/')
for item in catalog.get_all_items():
parent_href = item.get_parent().get_self_href()
path_to_parent, _ = os.path.split(parent_href)
subcats = [el for el in path_to_parent.split('/') if el]
self.assertEqual(len(subcats), 2, msg=" for item '{}'".format(item.id))

def test_map_items(self):
def item_mapper(item):
item.properties['ITEM_MAPPER'] = 'YEP'
Expand Down

0 comments on commit 0630c4f

Please sign in to comment.