Skip to content

Commit

Permalink
Better control on subfolders in generate_subcatalogs (#595)
Browse files Browse the repository at this point in the history
* LayoutTemplate.get_template_values -> .substitute

* test that template elements can be merged in a subdir

* fix test to use "/" as separator in template

* fix docs on the use of "/" as template path separator

* add changes to CHANGELOG.md
  • Loading branch information
fnattino authored Aug 23, 2021
1 parent 98edeaa commit 0ca2972
Show file tree
Hide file tree
Showing 5 changed files with 40 additions and 14 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

### Fixed

- `generate_subcatalogs` can include multiple template values in a single subfolder layer
([#595](https://github.com/stac-utils/pystac/pull/595))
- Avoid implicit re-exports ([#591](https://github.com/stac-utils/pystac/pull/591))

### Deprecated
Expand Down
6 changes: 4 additions & 2 deletions docs/concepts.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ To use them you can pass in a strategy to the normalize_hrefs call.
Using templates
'''''''''''''''

You can utilze template strings to determine the file paths of HREFs set on Catalogs,
You can utilize template strings to determine the file paths of HREFs set on Catalogs,
Collection or Items. These templates use python format strings, which can name
the property or attribute of the item you want to use for replacing the template
variable. For example:
Expand All @@ -84,7 +84,9 @@ variable. For example:
The above code will save items in subfolders based on the collection ID, year and month
of it's datetime (or start_datetime if a date range is defined and no datetime is
defined).
defined). Note that the forward slash (``/``) should be used as path separator in the
template string regardless of the system path separator (thus both in POSIX-compliant
and Windows environments).

You can use dot notation to specify attributes of objects or keys in dictionaries for
template variables. PySTAC will look at the object, it's ``properties`` and its
Expand Down
14 changes: 5 additions & 9 deletions pystac/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,26 +666,22 @@ def generate_subcatalogs(
for link in item_links:
link.resolve_stac_object(root=self.get_root())
item = cast(pystac.Item, link.target)
item_parts = layout_template.get_template_values(item)
subcat_ids = layout_template.substitute(item).split("/")
id_iter = reversed(parent_ids)
if all(
[
"{}".format(id) == next(id_iter, None)
for id in reversed(list(item_parts.values()))
]
["{}".format(id) == next(id_iter, None) for id in reversed(subcat_ids)]
):
# Skip items for which the sub-catalog structure already
# matches the template. The list of parent IDs can include more
# elements on the root side, so compare the reversed sequences.
keep_item_links.append(link)
continue
curr_parent = self
for k, v in item_parts.items():
subcat_id = "{}".format(v)
for subcat_id in subcat_ids:
subcat = curr_parent.get_child(subcat_id)
if subcat is None:
subcat_desc = "Catalog of items from {} with {} of {}".format(
curr_parent.id, k, v
subcat_desc = "Catalog of items from {} with id {}".format(
curr_parent.id, subcat_id
)
subcat = pystac.Catalog(id=subcat_id, description=subcat_desc)
curr_parent.add_child(subcat)
Expand Down
4 changes: 4 additions & 0 deletions pystac/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ class LayoutTemplate:
| ``collection`` | The collection ID of an Item's collection. |
+--------------------+--------------------------------------------------------+
The forward slash (``/``) should be used as path separator in the template
string regardless of the system path separator (thus both in POSIX-compliant
and Windows environments).
Examples::
# Uses the year, month and day of the item
Expand Down
28 changes: 25 additions & 3 deletions tests/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,30 @@ def test_generate_subcatalogs_does_not_change_item_count(self) -> None:
actual, expected, msg=" for child '{}'".format(child.id)
)

def test_generate_subcatalogs_merge_template_elements(self) -> None:
catalog = Catalog(id="test", description="Test")
item_properties = [
dict(property1=p1, property2=p2) for p1 in ("A", "B") for p2 in (1, 2)
]
for ni, properties in enumerate(item_properties):
catalog.add_item(
Item(
id="item{}".format(ni),
geometry=ARBITRARY_GEOM,
bbox=ARBITRARY_BBOX,
datetime=datetime.utcnow(),
properties=properties,
)
)
result = catalog.generate_subcatalogs("${property1}_${property2}")

actual_subcats = set([cat.id for cat in result])
expected_subcats = set(
["{}_{}".format(d["property1"], d["property2"]) for d in item_properties]
)
self.assertEqual(len(result), len(expected_subcats))
self.assertSetEqual(actual_subcats, expected_subcats)

def test_generate_subcatalogs_can_be_applied_multiple_times(self) -> None:
catalog = TestCases.test_case_8()

Expand Down Expand Up @@ -511,9 +535,7 @@ def test_generate_subcatalogs_works_for_subcatalogs_with_same_ids(self) -> None:
)
)

result = catalog.generate_subcatalogs(
join_path_or_url(JoinType.PATH, "${property1}", "${property2}")
)
result = catalog.generate_subcatalogs("${property1}/${property2}")
self.assertEqual(len(result), 6)

catalog.normalize_hrefs("/")
Expand Down

0 comments on commit 0ca2972

Please sign in to comment.