From 1bceebfc50610a4746c8f8cbd3cca21327608358 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joan=20H=C3=A9risson?= Date: Tue, 21 Jun 2022 10:48:25 +0200 Subject: [PATCH] feat: add --data-path arg --- selenzy_wrapper/Args.py | 16 ++++++++++++++++ selenzy_wrapper/__main__.py | 1 + selenzy_wrapper/selenzy_wrapper.py | 28 ++++++++++++---------------- 3 files changed, 29 insertions(+), 16 deletions(-) diff --git a/selenzy_wrapper/Args.py b/selenzy_wrapper/Args.py index 7379d75..6dcc29e 100644 --- a/selenzy_wrapper/Args.py +++ b/selenzy_wrapper/Args.py @@ -1,7 +1,11 @@ from argparse import ArgumentParser +from os import ( + path as os_path, +) from typing import ( Callable, ) + from selenzy_wrapper._version import __version__ from brs_utils import add_logger_args @@ -9,6 +13,11 @@ DEFAULT_NB_IDS = -1 DEFAULT_HOST = '83333' DEFAULT_MAX_NB_GENES = 5 +__PACKAGE_FOLDER = os_path.dirname( + os_path.realpath(__file__) +) +DEFAULT_DATA_FOLDER = __PACKAGE_FOLDER + def build_args_parser( prog: str, @@ -109,4 +118,11 @@ def add_arguments(parser: ArgumentParser) -> ArgumentParser: default=None, help='''Comma separated taxon IDs of output enzyme sequences''' ) + parser.add_argument( + '--data-path', + type=str, + default=DEFAULT_DATA_FOLDER, + help='''Path of the data directory. If does not exist, + a 'data' folder will be created (default in package directory)''' + ) return parser diff --git a/selenzy_wrapper/__main__.py b/selenzy_wrapper/__main__.py index 9a2c74f..82c301d 100644 --- a/selenzy_wrapper/__main__.py +++ b/selenzy_wrapper/__main__.py @@ -63,6 +63,7 @@ def entry_point(): taxonIDs=taxonIDs, nb_targets=args.nb_targets, nb_ids=args.nb_ids, + datadir=args.data_path, logger=logger ) diff --git a/selenzy_wrapper/selenzy_wrapper.py b/selenzy_wrapper/selenzy_wrapper.py index a410076..b5a668c 100644 --- a/selenzy_wrapper/selenzy_wrapper.py +++ b/selenzy_wrapper/selenzy_wrapper.py @@ -20,10 +20,9 @@ from .Args import ( DEFAULT_NB_TARGETS, DEFAULT_NB_IDS, - DEFAULT_HOST -) -__PACKAGE_FOLDER = os_path.dirname( - os_path.realpath(__file__) + DEFAULT_HOST, + DEFAULT_DATA_FOLDER, + __PACKAGE_FOLDER ) __SELENZY_FOLDER = 'selenzy' sys_path.insert( @@ -36,29 +35,26 @@ from .selenzy.Selenzy import readData from .selenzy.newtax import newtax - __DATA_URL = 'https://gitlab.com/breakthewall/rrcache-data/-/raw/master/selenzy/data.tar.gz' -__DATA_FOLDER = os_path.join( - __PACKAGE_FOLDER, - 'data' -) - def selenzy_pathway( pathway: rpPathway = None, taxonIDs: str = DEFAULT_HOST, nb_targets: int = DEFAULT_NB_TARGETS, nb_ids: int = DEFAULT_NB_IDS, + datadir: str = DEFAULT_DATA_FOLDER, logger: Logger = getLogger(__name__) ) -> Dict: - if not os_path.exists(__DATA_FOLDER): - logger.info(f'Downloading databases into {__DATA_FOLDER}...') + + datadir_data = os_path.join(datadir, 'data') + if not os_path.exists(datadir_data): + logger.info(f'Downloading databases into \'{os_path.abspath(datadir)}\'...') download_and_extract_tar_gz( __DATA_URL, - __PACKAGE_FOLDER + datadir ) - logger.info('Reading databases...') - pc = readData(__DATA_FOLDER) + logger.info(f'Reading databases from \'{os_path.abspath(datadir)}\'...') + pc = readData(datadir_data) result_ids = {} for rxn_id, rxn in pathway.get_reactions().items(): @@ -68,7 +64,7 @@ def selenzy_pathway( smarts=True, rxn=rxn.get_smiles(), taxonIDs=taxonIDs, - datadir=__DATA_FOLDER, + datadir=datadir_data, outdir=tmpOutputFolder, nb_targets=nb_targets, pc=pc,