Skip to content

Commit

Permalink
Make db optional (#77)
Browse files Browse the repository at this point in the history
### Added

Make databases optional with --no-cosmic --no-fusiongdb2 --no-mitelman options
  • Loading branch information
rannick authored Oct 2, 2024
1 parent 61d06dc commit 2e84e7c
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 27 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.0.0]

### Added

- Options --no-cosmic/--no-fusiongdb2/--no-mitelman to download and run without those specified databases

## [2.1.8]

### Removed
Expand Down
8 changes: 8 additions & 0 deletions docs/download.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ fusion_report download
/path/to/db
```

You can exclude a specific database with --no-cosmic/--no-mitelman/--no-fusiongdb2. Exemple for no cosmic:

```bash
fusion_report download
--no-cosmic
/path/to/db
```


## Manual download

Expand Down
32 changes: 22 additions & 10 deletions fusion_report/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from collections import defaultdict
from typing import Any, Dict, List

import rapidjson
import json

from tqdm import tqdm

Expand Down Expand Up @@ -81,7 +81,7 @@ def run(self):
def preprocess(self, params: Namespace) -> None:
"""Parse, enrich and score fusion."""
self.parse_fusion_outputs(vars(params))
self.enrich(params.db_path)
self.enrich(params)
self.score(vars(params))

def generate_report(self, params: Namespace) -> None:
Expand Down Expand Up @@ -118,13 +118,25 @@ def parse_fusion_outputs(self, params: Dict[str, Any]) -> None:
# value: fusion tool output
self.manager.parse(param, value, params["allow_multiple_gene_symbols"])

def enrich(self, path: str) -> None:
def enrich(self, params) -> None:
"""Enrich fusion with all relevant information from local databases."""
local_fusions: Dict[str, List[str]] = {
CosmicDB(path).name: CosmicDB(path).get_all_fusions(),
MitelmanDB(path).name: MitelmanDB(path).get_all_fusions(),
FusionGDB2(path).name: FusionGDB2(path).get_all_fusions(),
}
local_fusions: Dict[str, List[str]] = {}

if not params.no_cosmic:
local_fusions.update(
{CosmicDB(params.db_path).name: CosmicDB(params.db_path).get_all_fusions()}
)

if not params.no_fusiongdb2:
local_fusions.update(
{MitelmanDB(params.db_path).name: MitelmanDB(params.db_path).get_all_fusions()}
)

if not params.no_mitelman:
local_fusions.update(
{FusionGDB2(params.db_path).name: FusionGDB2(params.db_path).get_all_fusions()}
)

for fusion in self.manager.fusions:
for db_name, db_list in local_fusions.items():
if fusion.name in db_list:
Expand All @@ -138,7 +150,7 @@ def export_results(self, path: str, extension: str) -> None:
if extension == "json":
with open(dest, "w", encoding="utf-8") as output:
results = [fusion.json_serialize() for fusion in self.manager.fusions]
output.write(rapidjson.dumps(results))
output.write(json.dumps(results))
elif extension == "csv":
with open(dest, "w", encoding="utf-8") as output:
csv_writer = csv.writer(
Expand Down Expand Up @@ -248,4 +260,4 @@ def generate_multiqc(

dest = f"{os.path.join(path, 'fusion_genes_mqc.json')}"
with open(dest, "w", encoding="utf-8") as output:
output.write(rapidjson.dumps(configuration))
output.write(json.dumps(configuration))
18 changes: 16 additions & 2 deletions fusion_report/args_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from argparse import ArgumentParser, Namespace, _SubParsersAction
from typing import Any, Dict

import rapidjson
import json

from fusion_report.settings import Settings

Expand All @@ -21,7 +21,7 @@ class ArgsBuilder:
def __init__(self):
configuration = os.path.join(Settings.ROOT_DIR, "arguments.json")
with open(configuration, "r") as config_file:
self.arguments: Dict[str, Any] = rapidjson.loads(config_file.read())
self.arguments: Dict[str, Any] = json.load(config_file)
self.arguments["weight"] = float(100 / len(self.supported_tools))
self.parser = ArgumentParser(
description="""Tool for generating friendly UI custom report."""
Expand Down Expand Up @@ -100,6 +100,13 @@ def run_args(self, args, weight) -> None:
type=type(optional.get("default")),
)

for database in args["databases"]:
run_parser.add_argument(
database["key"],
help=database["help"],
action=database.get("action", "store"),
)

def download_args(self, args: Dict[str, Any]) -> None:
"""Build download command-line arguments."""
download_parser = self.command_parser.add_parser(
Expand All @@ -108,6 +115,13 @@ def download_args(self, args: Dict[str, Any]) -> None:
for mandatory in args["mandatory"]:
download_parser.add_argument(mandatory["key"], help=mandatory["help"], type=str)

for optional in args["optionals"]:
download_parser.add_argument(
optional["key"],
help=optional["help"],
action=optional.get("action", "store"),
)

self._cosmic(args, download_parser)

def sync_args(self, args: Dict[str, Any]) -> None:
Expand Down
34 changes: 34 additions & 0 deletions fusion_report/arguments.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,23 @@
"help": "Jaffa output file"
}
],
"databases": [
{
"key": "--no-cosmic",
"help": "Do not download cosmic fusion database",
"action": "store_true"
},
{
"key": "--no-fusiongdb2",
"help": "Do not download fusiongdb2 fusion database",
"action": "store_true"
},
{
"key": "--no-mitelman",
"help": "Do not download mitelman fusion database",
"action": "store_true"
}
],
"optionals": [
{
"key": ["--allow-multiple-gene-symbols"],
Expand Down Expand Up @@ -79,6 +96,23 @@
"help": "Output directory"
}
],
"optionals": [
{
"key": "--no-cosmic",
"help": "Do not download cosmic fusion database",
"action": "store_true"
},
{
"key": "--no-fusiongdb2",
"help": "Do not download fusiongdb2 fusion database",
"action": "store_true"
},
{
"key": "--no-mitelman",
"help": "Do not download mitelman fusion database",
"action": "store_true"
}
],
"cosmic": [
{
"key": "--cosmic_usr",
Expand Down
29 changes: 16 additions & 13 deletions fusion_report/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ class Download:
"""

def __init__(self, params: Namespace):
self.validate(params)
self.download_all(params)

def validate(self, params: Namespace) -> None:
Expand All @@ -29,28 +28,32 @@ def validate(self, params: Namespace) -> None:
else:
self.cosmic_token = Net.get_cosmic_token(params)

def download_all(self, params: Namespace) -> None:
# making sure output directory exists
if not os.path.exists(params.output):
os.makedirs(params.output, 0o755)

def download_all(self, params: Namespace) -> None:
"""Download all databases."""
return_err: List[str] = []
os.chdir(params.output)

# MITELMAN
Net.get_mitelman(self, return_err)
if not params.no_mitelman:
# MITELMAN
Net.get_mitelman(self, return_err)

# FusionGDB2
Net.get_fusiongdb2(self, return_err)
if not params.no_fusiongdb2:
# FusionGDB2
Net.get_fusiongdb2(self, return_err)

# COSMIC
if params.qiagen:
Logger(__name__).info("Downloading resources from QIAGEN...")
Net.get_cosmic_from_qiagen(self.cosmic_token, return_err, params.output)
else:
Logger(__name__).info("Downloading resources from SANGER...")
Net.get_cosmic_from_sanger(self.cosmic_token, return_err)
if not params.no_cosmic:
# COSMIC
self.validate(params)
if params.qiagen:
Logger(__name__).info("Downloading resources from QIAGEN...")
Net.get_cosmic_from_qiagen(self.cosmic_token, return_err, params.output)
else:
Logger(__name__).info("Downloading resources from SANGER...")
Net.get_cosmic_from_sanger(self.cosmic_token, return_err)

if len(return_err) > 0:
raise DownloadException(return_err)
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ pyyaml>=4.2b1
Jinja2>=2.10
MarkupSafe>=2.1.1
pandas
python-rapidjson
requests
openpyxl
xlrd >= 2.0.0
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ lines_after_imports=2
lines_between_types=1
multi_line_output=7
use_parentheses=true
known_third_party=colorlog,jinja2,rapidjson,setuptools,tqdm,yaml
known_third_party=colorlog,jinja2,setuptools,tqdm,yaml

0 comments on commit 2e84e7c

Please sign in to comment.