diff --git a/stemcnv_check/__init__.py b/stemcnv_check/__init__.py index ea1e3aa..b12d81e 100644 --- a/stemcnv_check/__init__.py +++ b/stemcnv_check/__init__.py @@ -3,3 +3,4 @@ from stemcnv_check.version import __version__ STEM_CNV_CHECK = 'stemcnv_check' +VEP_version = 112 \ No newline at end of file diff --git a/stemcnv_check/app/make_staticdata.py b/stemcnv_check/app/make_staticdata.py index ec0affc..6bc0f29 100644 --- a/stemcnv_check/app/make_staticdata.py +++ b/stemcnv_check/app/make_staticdata.py @@ -7,7 +7,7 @@ from snakemake.api import SnakemakeApi from snakemake.settings.types import ResourceSettings, ConfigSettings, DeploymentSettings, DAGSettings, OutputSettings, DeploymentMethod from .check_input import check_config -from .. import STEM_CNV_CHECK +from .. import STEM_CNV_CHECK, VEP_version from ..helpers import config_extract, make_apptainer_args, read_sample_table, get_cache_dir, get_vep_cache_path, load_config from loguru import logger as logging @@ -74,7 +74,7 @@ def create_missing_staticdata(args): genome_fasta = config['global_settings'][f'{genome_build}_genome_fasta'] if genome_fasta == '__use-vep__': genome_fasta = os.path.join(vep_cache_path, 'fasta', - 'homo_sapiens', f'112_{vep_genome}', + 'homo_sapiens', f'{VEP_version}_{vep_genome}', 'Homo_sapiens.GRCh38.dna.toplevel.fa.gz' if vep_genome == 'GRCh38' else 'Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz' ) @@ -162,7 +162,7 @@ def create_missing_staticdata(args): # 'configfile': args.config, # 'target': 'SNP-probe-data', 'use_vep_cache': vep_cache_path, - 'global_settings': {f'{genome_build}_genome_fasta': genome_fasta} + # 'global_settings': {f'{genome_build}_genome_fasta': genome_fasta} } ), deployment_settings=DeploymentSettings( diff --git a/stemcnv_check/app/run_workflow.py b/stemcnv_check/app/run_workflow.py index e6bd325..69a83a7 100644 --- a/stemcnv_check/app/run_workflow.py +++ b/stemcnv_check/app/run_workflow.py @@ -36,14 +36,6 @@ def run_stemcnv_check_workflow(args): # Define / overwrite place-holder values for VEP downloaded data vep_cache_path = get_vep_cache_path(config['settings']['VEP_annotation']['VEP_cache_path'], cache_path) - hg19_fasta = config['global_settings']['hg19_genome_fasta'].replace( - '__use-vep__', - os.path.join(vep_cache_path, 'fasta', 'homo_sapiens', '112_GRCh37', 'Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz') - ) - hg38_fasta = config['global_settings']['hg38_genome_fasta'].replace( - '__use-vep__', - os.path.join(vep_cache_path, 'fasta', 'homo_sapiens', '112_GRCh38', 'Homo_sapiens.GRCh38.dna.toplevel.fa.gz') - ) basedir = args.directory if args.directory else os.getcwd() argv += [ @@ -54,7 +46,6 @@ def run_stemcnv_check_workflow(args): f'configfile={args.config}', f'target={args.target}', f'use_vep_cache={vep_cache_path}', - f"global_settings='{{hg19_genome_fasta: {hg19_fasta}, hg38_genome_fasta: {hg38_fasta}}}'" ] #FIXME: use a clearer local vs cluster submission if args.cluster_profile: diff --git a/stemcnv_check/control_files/allowedvalues_config.yaml b/stemcnv_check/control_files/allowedvalues_config.yaml index e694e8b..79e2fd3 100644 --- a/stemcnv_check/control_files/allowedvalues_config.yaml +++ b/stemcnv_check/control_files/allowedvalues_config.yaml @@ -7,22 +7,27 @@ # - filterset, filtersetdefault, sections, sectionsall & insamplehseet are special functions # - everything else is treated as regex (needs to match the *full* value) static_data: - bpm_manifest_file: str - csv_manifest_file: str - egt_cluster_file: str - genome_gtf_file: str - penncnv_pfb_file: str - penncnv_GCmodel_file: str - array_density_file: str - array_gaps_file: str - genomeInfo_file: str + bpm_manifest_file: str # Path|file + csv_manifest_file: str # Path|file + egt_cluster_file: str # Path|file + genome_gtf_file: str # Path|file + penncnv_pfb_file: str # Path|file + penncnv_GCmodel_file: str # Path|file + array_density_file: str # Path|file + array_gaps_file: str # Path|file + genomeInfo_file: str # Path|file genome_version: str_(hg38|GRCh38|hg19|GRCh37) array_name: str -raw_data_folder: str -data_path: str -log_path: str +raw_data_folder: str # Path|dir +data_path: str # Path|dir|no-exist-ok +log_path: str # Path|dir|no-exist-ok + +global_settings: + cache_dir: str # Path|dir|no-exist-ok + hg19_genome_fasta: str # Path|__-use-vep__ + hg38_genome_fasta: str # Path|__-use-vep__ settings: CNV.calling.tools: list__str__(PennCNV|CBS) @@ -32,8 +37,6 @@ settings: GenCallScore: float_le1_ge0 Position.duplicates: str_(keep|remove|highest-GenCall|highest-GenTrain) - chromosomes: list__str__(chr)?[0-9XY]+ - default-filter-set: filtersetnodefault PennCNV: diff --git a/stemcnv_check/envs/vep-annotation.yaml b/stemcnv_check/envs/vep-annotation.yaml index 52c6f7d..f3cbedc 100644 --- a/stemcnv_check/envs/vep-annotation.yaml +++ b/stemcnv_check/envs/vep-annotation.yaml @@ -5,4 +5,5 @@ channels: - nodefaults dependencies: - bcftools + # This needs to match VEP_version in the base __init__.py - ensembl-vep = 112 \ No newline at end of file diff --git a/stemcnv_check/rules/StemCNV-check.smk b/stemcnv_check/rules/StemCNV-check.smk index 8de1eb0..aa5698f 100644 --- a/stemcnv_check/rules/StemCNV-check.smk +++ b/stemcnv_check/rules/StemCNV-check.smk @@ -7,10 +7,7 @@ from loguru import logger as logging import tempfile import ruamel.yaml as ruamel_yaml from stemcnv_check import STEM_CNV_CHECK -from stemcnv_check.helpers import ( - read_sample_table, - collect_SNP_cluster_ids, -) +from stemcnv_check.helpers import read_sample_table from stemcnv_check.exceptions import SampleConstraintError, ConfigValueError SNAKEDIR = str(importlib.resources.files(STEM_CNV_CHECK)) @@ -62,7 +59,6 @@ wildcard_constraints: # Never submit these to cluster localrules: - relink_gencall, all, @@ -170,9 +166,9 @@ rule run_CBS: mem_mb=get_tool_resource("CBS", "memory"), partition=get_tool_resource("CBS", "partition"), params: - # SDundo = config['settings']['CBS']['SDundo'], - # filter=get_tool_filter_settings('CBS'), + # Ensure rerun on changes to settings or sample meta data settings=config["settings"]["CBS"], + sex_info=lambda wildcards: get_ref_id(wildcards,True), log: err=os.path.join(LOGPATH, "CBS", "{sample_id}", "error.log"), out=os.path.join(LOGPATH, "CBS", "{sample_id}", "out.log"), diff --git a/stemcnv_check/rules/common.smk b/stemcnv_check/rules/common.smk index 153df9f..3b0ad9c 100644 --- a/stemcnv_check/rules/common.smk +++ b/stemcnv_check/rules/common.smk @@ -1,7 +1,7 @@ import importlib.resources import os from pathlib import Path -from stemcnv_check import STEM_CNV_CHECK +from stemcnv_check import STEM_CNV_CHECK, VEP_version from stemcnv_check.helpers import config_extract from stemcnv_check.exceptions import SampleConstraintError @@ -94,10 +94,20 @@ def get_genome_fasta(wildcards): # #FIXME: future # chip = get_sample_info(wildcards.sample_id)['array_name'] # genome = config['array_definitions'][chip]['genome_version'] + if config["genome_version"] in ("hg38", "GRCh38"): - return config["global_settings"]["hg38_genome_fasta"] + out = config["global_settings"]["hg38_genome_fasta"] else: - return config["global_settings"]["hg19_genome_fasta"] + out = config["global_settings"]["hg19_genome_fasta"] + + if out == '__use-vep__': + vep_cache_path = config['use_vep_cache'] + if config["genome_version"] in ("hg38", "GRCh38"): + return os.path.join(vep_cache_path, 'fasta', 'homo_sapiens', f'{VEP_version}_GRCh38', 'Homo_sapiens.GRCh38.dna.toplevel.fa.gz') + else: + return os.path.join(vep_cache_path, 'fasta', 'homo_sapiens', f'{VEP_version}_GRCh37', 'Homo_sapiens.GRCh37.75.dna.primary_assembly.fa.gz') + else: + return out def cnv_vcf_input_function(tool): diff --git a/stemcnv_check/rules/illumina_raw_processing.smk b/stemcnv_check/rules/illumina_raw_processing.smk index 0ee5c66..3d295fe 100644 --- a/stemcnv_check/rules/illumina_raw_processing.smk +++ b/stemcnv_check/rules/illumina_raw_processing.smk @@ -1,5 +1,8 @@ import os +localrules: + relink_gencall, + rule run_gencall: input: bpm=config["static_data"]["bpm_manifest_file"], diff --git a/stemcnv_check/rules/penncnv.smk b/stemcnv_check/rules/penncnv.smk index fa1f45e..48b894b 100644 --- a/stemcnv_check/rules/penncnv.smk +++ b/stemcnv_check/rules/penncnv.smk @@ -1,5 +1,9 @@ import os +# Never submit these to cluster +localrules: + prep_PennCNV_sexfile, + rule prep_PennCNV_sexfile: input: @@ -30,6 +34,10 @@ rule prep_PennCNV_input: tsv=temp(os.path.join(DATAPATH, "{sample_id}", "{sample_id}.penncnv.input.tsv")), log: os.path.join(LOGPATH, "PennCNV", "{sample_id}", "input.log"), + resources: + runtime=get_tool_resource("PennCNV", "runtime"), + mem_mb=get_tool_resource("PennCNV", "memory"), + partition=get_tool_resource("PennCNV", "partition"), conda: "../envs/vembrane.yaml" params: @@ -168,6 +176,10 @@ rule combined_PennCNV_output: # stats=os.path.join(DATAPATH,"{sample_id}","{sample_id}.CNV_calls.penncnv.stats.tsv") log: err=os.path.join(LOGPATH, "PennCNV", "{sample_id}", "combine.error.log"), + resources: + runtime=get_tool_resource("PennCNV", "runtime"), + mem_mb=get_tool_resource("PennCNV", "memory"), + partition=get_tool_resource("PennCNV", "partition"), conda: "../envs/general-R.yaml" script: diff --git a/stemcnv_check/rules/report_generation.smk b/stemcnv_check/rules/report_generation.smk index 0b76469..44f3d5b 100644 --- a/stemcnv_check/rules/report_generation.smk +++ b/stemcnv_check/rules/report_generation.smk @@ -1,5 +1,5 @@ import os -from stemcnv_check.helpers import config_extract +from stemcnv_check.helpers import config_extract, collect_SNP_cluster_ids def get_report_sample_input(wildcards): sample_id, ref_id, sex, ref_sex = get_ref_id(wildcards, True) @@ -61,6 +61,10 @@ def get_report_sample_input(wildcards): rule check_latex_installation: output: os.path.join(LOGPATH, "report", "_latex_installation_check"), + resources: + runtime=get_tool_resource("default", "runtime"), + mem_mb=get_tool_resource("default", "memory"), + partition=get_tool_resource("default", "partition"), conda: "../envs/general-R.yaml" shell: diff --git a/stemcnv_check/rules/staticdata_creation.smk b/stemcnv_check/rules/staticdata_creation.smk index 77ce7b4..f221861 100644 --- a/stemcnv_check/rules/staticdata_creation.smk +++ b/stemcnv_check/rules/staticdata_creation.smk @@ -3,7 +3,7 @@ import importlib.resources import os from pathlib import Path import tempfile -from stemcnv_check import STEM_CNV_CHECK +from stemcnv_check import STEM_CNV_CHECK, VEP_version DOWNLOAD_DIR = config["TMPDIR"] if "TMPDIR" in config else tempfile.mkdtemp() GENOME = config["genome"] @@ -269,7 +269,7 @@ def get_vep_fasta_path(): ) return os.path.join(config["vep_fasta_path"], "homo_sapiens", - "112_{genome}", + f"{VEP_version}_{{genome}}", filename ) @@ -288,7 +288,7 @@ rule download_vep_cache: output: done=os.path.join(config["vep_cache_path"], ".{genome}.done"), folder=directory( - os.path.join(config["vep_cache_path"], "homo_sapiens", "112_{genome}") + os.path.join(config["vep_cache_path"], "homo_sapiens", f"{VEP_version}_{{genome}}") ), conda: "../envs/vep-annotation.yaml" diff --git a/tests/test_app_check_input.py b/tests/test_app_check_input.py index e528fef..b5c28c6 100644 --- a/tests/test_app_check_input.py +++ b/tests/test_app_check_input.py @@ -172,8 +172,9 @@ def update_config(testconfig): # Check for Error on entry outside specifications: del testconfig['unknown_entry'] + testconfig['settings'] = dict() # - wrong type - testconfig['settings'] = {'chromosomes': '1-22'} + testconfig['settings']['VEP_annotation'] = {'enabled': 'True'} # - wrong number type (float vs int) testconfig['settings']['array_attribute_summary'] = {'density.windows': 1.5} # - value not matching regex @@ -184,8 +185,8 @@ def update_config(testconfig): logrecords = caplog.records[-3:] assert [rec.levelname for rec in logrecords] == ['ERROR'] * 3 assert [rec.message for rec in logrecords] == [ - "The config entry '1-22' for 'settings:chromosomes' is invalid. " + - "Value(s) need to be in a list, and matching this regex: (chr)?[0-9XY]+.", + "The config entry 'True' for 'settings:VEP_annotation:enabled' is invalid. " + + "Value(s) need to be booleans (True/False).", "The config entry '1.5' for 'settings:array_attribute_summary:density.windows' is invalid. " + "Value(s) need to be integers (whole numbers).", "The config entry '('PennCNV', 'GATK')' for 'settings:CNV.calling.tools' is invalid. " +