From 2e84e7ca30fdcd7633ab8f45ffb5e4a74e06f8c4 Mon Sep 17 00:00:00 2001 From: Annick Renevey <47788523+rannick@users.noreply.github.com> Date: Wed, 2 Oct 2024 15:34:15 +0200 Subject: [PATCH] Make db optional (#77) ### Added Make databases optional with --no-cosmic --no-fusiongdb2 --no-mitelman options --- CHANGELOG.md | 6 ++++++ docs/download.md | 8 ++++++++ fusion_report/app.py | 32 ++++++++++++++++++++++---------- fusion_report/args_builder.py | 18 ++++++++++++++++-- fusion_report/arguments.json | 34 ++++++++++++++++++++++++++++++++++ fusion_report/download.py | 29 ++++++++++++++++------------- requirements.txt | 1 - setup.cfg | 2 +- 8 files changed, 103 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f4111ee..94678f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.0.0] + +### Added + +- Options --no-cosmic/--no-fusiongdb2/--no-mitelman to download and run without those specified databases + ## [2.1.8] ### Removed diff --git a/docs/download.md b/docs/download.md index 3bf2cb4..544729c 100644 --- a/docs/download.md +++ b/docs/download.md @@ -25,6 +25,14 @@ fusion_report download /path/to/db ``` +You can exclude a specific database with --no-cosmic/--no-mitelman/--no-fusiongdb2. Exemple for no cosmic: + +```bash +fusion_report download + --no-cosmic + /path/to/db +``` + ## Manual download diff --git a/fusion_report/app.py b/fusion_report/app.py index 8162be6..4be7368 100644 --- a/fusion_report/app.py +++ b/fusion_report/app.py @@ -8,7 +8,7 @@ from collections import defaultdict from typing import Any, Dict, List -import rapidjson +import json from tqdm import tqdm @@ -81,7 +81,7 @@ def run(self): def preprocess(self, params: Namespace) -> None: """Parse, enrich and score fusion.""" self.parse_fusion_outputs(vars(params)) - self.enrich(params.db_path) + self.enrich(params) self.score(vars(params)) def generate_report(self, params: Namespace) -> None: @@ -118,13 +118,25 @@ def parse_fusion_outputs(self, params: Dict[str, Any]) -> None: # value: fusion tool output self.manager.parse(param, value, params["allow_multiple_gene_symbols"]) - def enrich(self, path: str) -> None: + def enrich(self, params) -> None: """Enrich fusion with all relevant information from local databases.""" - local_fusions: Dict[str, List[str]] = { - CosmicDB(path).name: CosmicDB(path).get_all_fusions(), - MitelmanDB(path).name: MitelmanDB(path).get_all_fusions(), - FusionGDB2(path).name: FusionGDB2(path).get_all_fusions(), - } + local_fusions: Dict[str, List[str]] = {} + + if not params.no_cosmic: + local_fusions.update( + {CosmicDB(params.db_path).name: CosmicDB(params.db_path).get_all_fusions()} + ) + + if not params.no_fusiongdb2: + local_fusions.update( + {MitelmanDB(params.db_path).name: MitelmanDB(params.db_path).get_all_fusions()} + ) + + if not params.no_mitelman: + local_fusions.update( + {FusionGDB2(params.db_path).name: FusionGDB2(params.db_path).get_all_fusions()} + ) + for fusion in self.manager.fusions: for db_name, db_list in local_fusions.items(): if fusion.name in db_list: @@ -138,7 +150,7 @@ def export_results(self, path: str, extension: str) -> None: if extension == "json": with open(dest, "w", encoding="utf-8") as output: results = [fusion.json_serialize() for fusion in self.manager.fusions] - output.write(rapidjson.dumps(results)) + output.write(json.dumps(results)) elif extension == "csv": with open(dest, "w", encoding="utf-8") as output: csv_writer = csv.writer( @@ -248,4 +260,4 @@ def generate_multiqc( dest = f"{os.path.join(path, 'fusion_genes_mqc.json')}" with open(dest, "w", encoding="utf-8") as output: - output.write(rapidjson.dumps(configuration)) + output.write(json.dumps(configuration)) diff --git a/fusion_report/args_builder.py b/fusion_report/args_builder.py index 2704bf9..8e9c30b 100644 --- a/fusion_report/args_builder.py +++ b/fusion_report/args_builder.py @@ -4,7 +4,7 @@ from argparse import ArgumentParser, Namespace, _SubParsersAction from typing import Any, Dict -import rapidjson +import json from fusion_report.settings import Settings @@ -21,7 +21,7 @@ class ArgsBuilder: def __init__(self): configuration = os.path.join(Settings.ROOT_DIR, "arguments.json") with open(configuration, "r") as config_file: - self.arguments: Dict[str, Any] = rapidjson.loads(config_file.read()) + self.arguments: Dict[str, Any] = json.load(config_file) self.arguments["weight"] = float(100 / len(self.supported_tools)) self.parser = ArgumentParser( description="""Tool for generating friendly UI custom report.""" @@ -100,6 +100,13 @@ def run_args(self, args, weight) -> None: type=type(optional.get("default")), ) + for database in args["databases"]: + run_parser.add_argument( + database["key"], + help=database["help"], + action=database.get("action", "store"), + ) + def download_args(self, args: Dict[str, Any]) -> None: """Build download command-line arguments.""" download_parser = self.command_parser.add_parser( @@ -108,6 +115,13 @@ def download_args(self, args: Dict[str, Any]) -> None: for mandatory in args["mandatory"]: download_parser.add_argument(mandatory["key"], help=mandatory["help"], type=str) + for optional in args["optionals"]: + download_parser.add_argument( + optional["key"], + help=optional["help"], + action=optional.get("action", "store"), + ) + self._cosmic(args, download_parser) def sync_args(self, args: Dict[str, Any]) -> None: diff --git a/fusion_report/arguments.json b/fusion_report/arguments.json index 71c5f7a..3a25384 100644 --- a/fusion_report/arguments.json +++ b/fusion_report/arguments.json @@ -49,6 +49,23 @@ "help": "Jaffa output file" } ], + "databases": [ + { + "key": "--no-cosmic", + "help": "Do not download cosmic fusion database", + "action": "store_true" + }, + { + "key": "--no-fusiongdb2", + "help": "Do not download fusiongdb2 fusion database", + "action": "store_true" + }, + { + "key": "--no-mitelman", + "help": "Do not download mitelman fusion database", + "action": "store_true" + } + ], "optionals": [ { "key": ["--allow-multiple-gene-symbols"], @@ -79,6 +96,23 @@ "help": "Output directory" } ], + "optionals": [ + { + "key": "--no-cosmic", + "help": "Do not download cosmic fusion database", + "action": "store_true" + }, + { + "key": "--no-fusiongdb2", + "help": "Do not download fusiongdb2 fusion database", + "action": "store_true" + }, + { + "key": "--no-mitelman", + "help": "Do not download mitelman fusion database", + "action": "store_true" + } + ], "cosmic": [ { "key": "--cosmic_usr", diff --git a/fusion_report/download.py b/fusion_report/download.py index 7f5a7a1..fec94a9 100644 --- a/fusion_report/download.py +++ b/fusion_report/download.py @@ -19,7 +19,6 @@ class Download: """ def __init__(self, params: Namespace): - self.validate(params) self.download_all(params) def validate(self, params: Namespace) -> None: @@ -29,28 +28,32 @@ def validate(self, params: Namespace) -> None: else: self.cosmic_token = Net.get_cosmic_token(params) + def download_all(self, params: Namespace) -> None: # making sure output directory exists if not os.path.exists(params.output): os.makedirs(params.output, 0o755) - def download_all(self, params: Namespace) -> None: """Download all databases.""" return_err: List[str] = [] os.chdir(params.output) - # MITELMAN - Net.get_mitelman(self, return_err) + if not params.no_mitelman: + # MITELMAN + Net.get_mitelman(self, return_err) - # FusionGDB2 - Net.get_fusiongdb2(self, return_err) + if not params.no_fusiongdb2: + # FusionGDB2 + Net.get_fusiongdb2(self, return_err) - # COSMIC - if params.qiagen: - Logger(__name__).info("Downloading resources from QIAGEN...") - Net.get_cosmic_from_qiagen(self.cosmic_token, return_err, params.output) - else: - Logger(__name__).info("Downloading resources from SANGER...") - Net.get_cosmic_from_sanger(self.cosmic_token, return_err) + if not params.no_cosmic: + # COSMIC + self.validate(params) + if params.qiagen: + Logger(__name__).info("Downloading resources from QIAGEN...") + Net.get_cosmic_from_qiagen(self.cosmic_token, return_err, params.output) + else: + Logger(__name__).info("Downloading resources from SANGER...") + Net.get_cosmic_from_sanger(self.cosmic_token, return_err) if len(return_err) > 0: raise DownloadException(return_err) diff --git a/requirements.txt b/requirements.txt index 6827900..e39ca61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,6 @@ pyyaml>=4.2b1 Jinja2>=2.10 MarkupSafe>=2.1.1 pandas -python-rapidjson requests openpyxl xlrd >= 2.0.0 diff --git a/setup.cfg b/setup.cfg index b04e8dd..0f181b7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,4 +6,4 @@ lines_after_imports=2 lines_between_types=1 multi_line_output=7 use_parentheses=true -known_third_party=colorlog,jinja2,rapidjson,setuptools,tqdm,yaml +known_third_party=colorlog,jinja2,setuptools,tqdm,yaml