From d1b561b37a4fa9726980d503637eb8bd76c3165d Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Thu, 19 Nov 2020 09:33:00 +0100 Subject: [PATCH 1/6] initialize subcatalog dictionary in generate_subcatalogs --- pystac/catalog.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pystac/catalog.py b/pystac/catalog.py index fcbc02d4d..3df529479 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -542,7 +542,13 @@ def generate_subcatalogs(self, template, defaults=None, **kwargs): result.extend(child.generate_subcatalogs(template, defaults=defaults)) layout_template = LayoutTemplate(template, defaults=defaults) + subcat_id_to_cat = {} + curr_parent = self + while curr_parent is not None: + subcat_id_to_cat[curr_parent.id] = curr_parent + curr_parent = curr_parent.get_parent() + items = list(self.get_items()) for item in items: item_parts = layout_template.get_template_values(item) From 2015bf548d1083ca8c41b02b38aa0755b2fd7c9e Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Mon, 23 Nov 2020 21:59:56 +0100 Subject: [PATCH 2/6] fix generate_subcatalogs for edge-cases --- pystac/catalog.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/pystac/catalog.py b/pystac/catalog.py index 3df529479..39475db91 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -520,7 +520,7 @@ def fn(): return self - def generate_subcatalogs(self, template, defaults=None, **kwargs): + def generate_subcatalogs(self, template, defaults=None, parent_ids=None, **kwargs): """Walks through the catalog and generates subcatalogs for items based on the template string. See :class:`~pystac.layout.LayoutTemplate` for details on the construction of template strings. This template string @@ -533,35 +533,37 @@ def generate_subcatalogs(self, template, defaults=None, **kwargs): defaults (dict): Default values for the template variables that will be used if the property cannot be found on the item. + parent_ids (List[str]): Optional list of the parent catalogs' + identifiers. If the bottom-most subcatalags already match the + template, no subcatalog is added. Returns: [catalog]: List of new catalogs created """ result = [] + parent_ids = parent_ids or list() + parent_ids.append(self.id) for child in self.get_children(): - result.extend(child.generate_subcatalogs(template, defaults=defaults)) + result.extend(child.generate_subcatalogs(template, defaults=defaults, + parent_ids=parent_ids.copy())) layout_template = LayoutTemplate(template, defaults=defaults) - subcat_id_to_cat = {} - curr_parent = self - while curr_parent is not None: - subcat_id_to_cat[curr_parent.id] = curr_parent - curr_parent = curr_parent.get_parent() - items = list(self.get_items()) for item in items: item_parts = layout_template.get_template_values(item) + id_iter = reversed(parent_ids) + if all([id == next(id_iter, None) for id in reversed(item_parts.values())]): + continue curr_parent = self for k, v in item_parts.items(): subcat_id = '{}'.format(v) - subcat = subcat_id_to_cat.get(subcat_id) + subcat = curr_parent.get_child(subcat_id) if subcat is None: subcat_desc = 'Catalog of items from {} with {} of {}'.format( curr_parent.id, k, v) subcat = pystac.Catalog(id=subcat_id, description=subcat_desc) curr_parent.add_child(subcat) - subcat_id_to_cat[subcat_id] = subcat result.append(subcat) curr_parent = subcat self.remove_item(item.id) From 56515174ee617186a6ee4c783577d38c6f02b595 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Mon, 23 Nov 2020 22:07:54 +0100 Subject: [PATCH 3/6] fix formatting --- pystac/catalog.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pystac/catalog.py b/pystac/catalog.py index 39475db91..ad6e60ed9 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -544,8 +544,10 @@ def generate_subcatalogs(self, template, defaults=None, parent_ids=None, **kwarg parent_ids = parent_ids or list() parent_ids.append(self.id) for child in self.get_children(): - result.extend(child.generate_subcatalogs(template, defaults=defaults, - parent_ids=parent_ids.copy())) + result.extend( + child.generate_subcatalogs(template, + defaults=defaults, + parent_ids=parent_ids.copy())) layout_template = LayoutTemplate(template, defaults=defaults) From 10e8f796f6f30a73a137181d8e0914c17dad49ee Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Tue, 24 Nov 2020 20:21:18 +0100 Subject: [PATCH 4/6] convert template parts to string --- pystac/catalog.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pystac/catalog.py b/pystac/catalog.py index ad6e60ed9..b35c7bf82 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -555,7 +555,8 @@ def generate_subcatalogs(self, template, defaults=None, parent_ids=None, **kwarg for item in items: item_parts = layout_template.get_template_values(item) id_iter = reversed(parent_ids) - if all([id == next(id_iter, None) for id in reversed(item_parts.values())]): + if all(['{}'.format(id) == next(id_iter, None) + for id in reversed(item_parts.values())]): continue curr_parent = self for k, v in item_parts.items(): From 985cc55093083e870b065885145c3aedf7cacdd0 Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Tue, 24 Nov 2020 20:27:20 +0100 Subject: [PATCH 5/6] add unit tests --- tests/test_catalog.py | 85 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index e5e69bebb..f39954a1c 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -329,6 +329,91 @@ def test_generate_subcatalogs_does_not_change_item_count(self): expected = item_counts[child.id] self.assertEqual(actual, expected, msg=" for child '{}'".format(child.id)) + def test_generate_subcatalogs_can_be_applied_multiple_times(self): + catalog = TestCases.test_case_8() + + _ = catalog.generate_subcatalogs('${year}/${month}') + catalog.normalize_hrefs('/tmp') + expected_hrefs = {item.id: item.get_self_href() for item in catalog.get_all_items()} + + result = catalog.generate_subcatalogs('${year}/${month}') + self.assertEqual(len(result), 0) + catalog.normalize_hrefs('/tmp') + for item in catalog.get_all_items(): + self.assertEqual(item.get_self_href(), + expected_hrefs[item.id], + msg=" for item '{}'".format(item.id)) + + def test_generate_subcatalogs_works_after_adding_more_items(self): + catalog = Catalog(id='test', description='Test') + properties = dict(property1='A', property2=1) + catalog.add_item( + Item(id='item1', + geometry=RANDOM_GEOM, + bbox=RANDOM_BBOX, + datetime=datetime.utcnow(), + properties=properties)) + catalog.generate_subcatalogs('${property1}/${property2}') + catalog.add_item( + Item(id='item2', + geometry=RANDOM_GEOM, + bbox=RANDOM_BBOX, + datetime=datetime.utcnow(), + properties=properties)) + catalog.generate_subcatalogs('${property1}/${property2}') + + catalog.normalize_hrefs('/tmp') + item1_parent = catalog.get_item('item1', recursive=True).get_parent() + item2_parent = catalog.get_item('item2', recursive=True).get_parent() + self.assertEqual(item1_parent.get_self_href(), item2_parent.get_self_href()) + + def test_generate_subcatalogs_works_for_branched_subcatalogs(self): + catalog = Catalog(id='test', description='Test') + item_properties = [ + dict(property1='A', property2=1, property3='i'), # add 3 subcats + dict(property1='A', property2=1, property3='j'), # add 1 more + dict(property1='A', property2=2, property3='i'), # add 2 more + dict(property1='B', property2=1, property3='i'), # add 3 more + ] + for ni, properties in enumerate(item_properties): + catalog.add_item( + Item(id='item{}'.format(ni), + geometry=RANDOM_GEOM, + bbox=RANDOM_BBOX, + datetime=datetime.utcnow(), + properties=properties)) + result = catalog.generate_subcatalogs('${property1}/${property2}/${property3}') + self.assertEqual(len(result), 9) + + actual_subcats = set([cat.id for cat in result]) + expected_subcats = {'A', 'B', '1', '2', 'i', 'j'} + self.assertSetEqual(actual_subcats, expected_subcats) + + def test_generate_subcatalogs_works_for_subcatalogs_with_same_ids(self): + catalog = Catalog(id='test', description='Test') + item_properties = [ + dict(property1=1, property2=1), # add 2 subcats + dict(property1=1, property2=2), # add 1 more + dict(property1=2, property2=1), # add 2 more + dict(property1=2, property2=2), # add 1 more + ] + for ni, properties in enumerate(item_properties): + catalog.add_item( + Item(id='item{}'.format(ni), + geometry=RANDOM_GEOM, + bbox=RANDOM_BBOX, + datetime=datetime.utcnow(), + properties=properties)) + result = catalog.generate_subcatalogs('${property1}/${property2}') + self.assertEqual(len(result), 6) + + catalog.normalize_hrefs('/') + for item in catalog.get_all_items(): + parent_href = item.get_parent().get_self_href() + path_to_parent, _ = os.path.split(parent_href) + subcats = [el for el in path_to_parent.split('/') if el] + self.assertEqual(len(subcats), 2, msg=" for item '{}'".format(item.id)) + def test_map_items(self): def item_mapper(item): item.properties['ITEM_MAPPER'] = 'YEP' From 3ab40959328398054f96a85cfd46ea4e19b9d8fc Mon Sep 17 00:00:00 2001 From: Francesco Nattino Date: Fri, 4 Dec 2020 12:46:06 +0100 Subject: [PATCH 6/6] add comments to clarify the reverse --- pystac/catalog.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pystac/catalog.py b/pystac/catalog.py index b35c7bf82..8b71f64da 100644 --- a/pystac/catalog.py +++ b/pystac/catalog.py @@ -557,6 +557,9 @@ def generate_subcatalogs(self, template, defaults=None, parent_ids=None, **kwarg id_iter = reversed(parent_ids) if all(['{}'.format(id) == next(id_iter, None) for id in reversed(item_parts.values())]): + # Skip items for which the sub-catalog structure already + # matches the template. The list of parent IDs can include more + # elements on the root side, so compare the reversed sequences. continue curr_parent = self for k, v in item_parts.items():