Skip to content

Commit

Permalink
Filter links to only include links to the TARGET_PATH
Browse files Browse the repository at this point in the history
  • Loading branch information
jochenklar committed Feb 28, 2024
1 parent d026a72 commit f454d00
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 7 deletions.
12 changes: 8 additions & 4 deletions isimip_publisher/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ def write_public_jsons():

def write_link_jsons():
public_links = files.list_links(settings.PUBLIC_PATH, settings.PATH)
datasets = patterns.match_datasets(settings.PATTERN, settings.PUBLIC_PATH, public_links,
filtered_links = files.filter_links(settings.PUBLIC_PATH, settings.TARGET_PATH, settings.PATH, public_links)
datasets = patterns.match_datasets(settings.PATTERN, settings.PUBLIC_PATH, filtered_links,
include=settings.INCLUDE, exclude=settings.EXCLUDE)
validation.validate_datasets(settings.SCHEMA, settings.PATH, datasets)

Expand Down Expand Up @@ -172,7 +173,8 @@ def insert_datasets():
def link_links():
remote_links = files.list_links(settings.REMOTE_PATH, settings.PATH,
remote_dest=settings.REMOTE_DEST, suffix=settings.PATTERN['suffix'])
datasets = patterns.match_datasets(settings.PATTERN, settings.REMOTE_PATH, remote_links,
filtered_links = files.filter_links(settings.PUBLIC_PATH, settings.TARGET_PATH, settings.PATH, remote_links)
datasets = patterns.match_datasets(settings.PATTERN, settings.REMOTE_PATH, filtered_links,
include=settings.INCLUDE, exclude=settings.EXCLUDE)
validation.validate_datasets(settings.SCHEMA, settings.PATH, datasets)

Expand All @@ -184,7 +186,8 @@ def link_links():
def link_files():
remote_files = files.list_files(settings.REMOTE_PATH, settings.PATH,
remote_dest=settings.REMOTE_DEST, suffix=settings.PATTERN['suffix'])
datasets = patterns.match_datasets(settings.PATTERN, settings.REMOTE_PATH, remote_files,
filtered_links = files.filter_links(settings.PUBLIC_PATH, settings.TARGET_PATH, settings.PATH, remote_files)
datasets = patterns.match_datasets(settings.PATTERN, settings.REMOTE_PATH, filtered_links,
include=settings.INCLUDE, exclude=settings.EXCLUDE)
validation.validate_datasets(settings.SCHEMA, settings.PATH, datasets)

Expand All @@ -196,7 +199,8 @@ def link_files():
def link_datasets():
# collect and validate the links
public_links = files.list_links(settings.PUBLIC_PATH, settings.PATH)
datasets = patterns.match_datasets(settings.PATTERN, settings.PUBLIC_PATH, public_links,
filtered_links = files.filter_links(settings.PUBLIC_PATH, settings.TARGET_PATH, settings.PATH, public_links)
datasets = patterns.match_datasets(settings.PATTERN, settings.PUBLIC_PATH, filtered_links,
include=settings.INCLUDE, exclude=settings.EXCLUDE)
validation.validate_datasets(settings.SCHEMA, settings.PATH, datasets)

Expand Down
4 changes: 2 additions & 2 deletions isimip_publisher/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def get_parser(add_path=False, add_subparsers=False):
# add a subparser for each subcommand
for func in [list_remote, list_remote_links, list_local, list_public, list_public_links,
match_remote, match_remote_links, match_local, match_public, match_public_links,
fetch_files, write_local_jsons, write_public_jsons, write_link_jsons,
fetch_files, write_local_jsons, write_public_jsons,
insert_datasets, update_datasets, publish_datasets, archive_datasets,
check, clean, update_search, update_tree, run]:
subparser = subparsers.add_parser(func.__name__)
Expand All @@ -117,7 +117,7 @@ def get_parser(add_path=False, add_subparsers=False):
subparser.set_defaults(func=func)
subparser.add_argument('doi', help='DOI to process')

for func in [link_links, link_files, link_datasets, link]:
for func in [link_links, link_files, link_datasets, link, write_link_jsons]:
subparser = subparsers.add_parser(func.__name__)
subparser.set_defaults(func=func)
subparser.add_argument('target_path', help='path of the files to process')
Expand Down
3 changes: 2 additions & 1 deletion isimip_publisher/tests/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ def test_write_public_jsons(setup, public_files, script_runner):


def test_write_link_jsons(setup, public_links, script_runner):
response = script_runner.run(['isimip-publisher', 'write_link_jsons', 'round/product/sector2/model'])
response = script_runner.run(['isimip-publisher', 'write_link_jsons',
'round/product/sector/model', 'round/product/sector2/model'])
assert response.success, response.stderr
assert not response.stdout
assert response.stderr.strip().startswith('write_link_jsons')
Expand Down
9 changes: 9 additions & 0 deletions isimip_publisher/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,15 @@ def list_links(base_path, path, remote_dest=None, suffix=None):
return list_files(base_path, path, remote_dest=remote_dest, suffix=suffix, find_type='l')


def filter_links(public_path, target_path, path, links):
filtered_links = []
for link_path in links:
target_abspath = public_path / target_path / Path(link_path).relative_to(path)
if target_abspath.exists() and not target_abspath.is_symlink():
filtered_links.append(link_path)
return filtered_links


def copy_files(remote_dest, remote_path, local_path, path, datasets):
# check if path is a file
if Path(path).suffix:
Expand Down

0 comments on commit f454d00

Please sign in to comment.