diff --git a/CHANGELOG.md b/CHANGELOG.md index 426225685a..a03ba8af70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ ### Template +### Download + +- Improved container image resolution and prioritization of http downloads over Docker URIs ([#2364](https://github.com/nf-core/tools/pull/2364)). + ### Linting ### Modules diff --git a/nf_core/download.py b/nf_core/download.py index f049b41ab5..cf95d27a14 100644 --- a/nf_core/download.py +++ b/nf_core/download.py @@ -693,7 +693,7 @@ def find_container_images(self, workflow_directory): # for DSL2 syntax in process scope of configs config_regex = re.compile( - r"[\s{}=$]*(?P(?(?:.(?!(?(?(?:.(?!(?[\'\"]) The quote character is captured into the quote group \1. The pattern (?:.(?!\1))*.? is used to match any character (.) not followed by the closing quote character (?!\1). This capture happens greedy *, but we add a .? to ensure that we don't match the whole file until the last occurrence of the closing quote character, but rather stop at the first occurrence. \1 inserts the matched quote character into the regex, either " or '. - It may be followed by whitespace or closing bracket [\s}]* + It may be followed by whitespace or closing bracket [\\s}]* re.DOTALL is used to account for the string to be spread out across multiple lines. """ container_regex = re.compile( - r"container\s+[\s{}=$]*(?P[\'\"])(?P(?:.(?!\1))*.?)\1[\s}]*", re.DOTALL + r"container\s+[\\s{}=$]*(?P[\'\"])(?P(?:.(?!\1))*.?)\1[\\s}]*", re.DOTALL ) local_module_findings = re.findall(container_regex, search_space) @@ -748,8 +750,8 @@ def find_container_images(self, workflow_directory): # Like above run on shallow copy, because length may change at runtime. module_findings = self.rectify_raw_container_matches(module_findings[:]) - # Remove duplicates and sort - self.containers = sorted(list(set(previous_findings + config_findings + module_findings))) + # Again clean list, in case config declares Docker URI but module or previous finding already had the http:// download + self.containers = self.prioritize_direct_download(previous_findings + config_findings + module_findings) def rectify_raw_container_matches(self, raw_findings): """Helper function to rectify the raw extracted container matches into fully qualified container names. @@ -781,6 +783,16 @@ def rectify_raw_container_matches(self, raw_findings): """ cleaned_matches = [] + # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/3809435/713980 + url_regex = ( + r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)" + ) + # Thanks Stack Overflow for the regex: https://stackoverflow.com/a/39672069/713980 + docker_regex = r"^(?:(?=[^:\/]{1,253})(?!-)[a-zA-Z0-9-]{1,63}(?(?(?(?:.(?!(?