Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update balsamic tags & upload a CGH file to Scout #1477

Merged
merged 25 commits into from
May 19, 2022
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0087b81
remove outdated command line
ivadym May 3, 2022
b471d4d
update delivery tags & add umi specific tags
ivadym May 3, 2022
17be8a1
upload of a vcf2cytosure file to scout
ivadym May 4, 2022
090e981
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 4, 2022
beaa5dc
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 5, 2022
3994890
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 5, 2022
aca3495
genome_version cli option
ivadym May 5, 2022
ba22d21
pon cnn flag
ivadym May 5, 2022
f834586
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 5, 2022
db52114
update start command
ivadym May 6, 2022
a40a031
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 9, 2022
479b78b
remove comments
ivadym May 9, 2022
c66914a
update balsamic umi tags
ivadym May 9, 2022
ab8cd58
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 9, 2022
a053d72
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 10, 2022
8fdc83f
remove -g command line option
ivadym May 10, 2022
f23a1c1
genome version constants
ivadym May 10, 2022
f374510
Update QOS balsamic comment
ivadym May 10, 2022
e0b78cc
use dry_run constant
ivadym May 10, 2022
d63a73d
type hint & remove os.path
ivadym May 10, 2022
28b629c
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 12, 2022
4dd49bb
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 17, 2022
4403bea
Generalise AnalysisAPI
ivadym May 18, 2022
ad926ec
change name enum
ivadym May 18, 2022
c6143f1
Merge branch 'master' of github.com:Clinical-Genomics/cg into update/…
ivadym May 18, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 39 additions & 11 deletions cg/cli/workflow/balsamic/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
from cg.apps.housekeeper.hk import HousekeeperAPI
from cg.cli.workflow.balsamic.options import (
OPTION_PANEL_BED,
OPTION_DRY,
OPTION_QOS,
OPTION_ANALYSIS_TYPE,
OPTION_RUN_ANALYSIS,
OPTION_GENOME_VERSION,
OPTION_PON_CNN,
)
from cg.cli.workflow.commands import link, resolve_compression, ARGUMENT_CASE_ID
from cg.constants import EXIT_FAIL, EXIT_SUCCESS
from cg.constants.constants import DRY_RUN
from cg.exc import CgError, DecompressionNeededError
from cg.meta.workflow.analysis import AnalysisAPI
from cg.meta.workflow.balsamic import BalsamicAnalysisAPI
Expand Down Expand Up @@ -42,17 +44,32 @@ def balsamic(context: click.Context):

@balsamic.command("config-case")
@ARGUMENT_CASE_ID
@OPTION_GENOME_VERSION
@OPTION_PANEL_BED
@OPTION_DRY
@OPTION_PON_CNN
@DRY_RUN
@click.pass_obj
def config_case(context: CGConfig, panel_bed: str, case_id: str, dry_run: bool):
def config_case(
context: CGConfig,
case_id: str,
genome_version: str,
panel_bed: str,
pon_cnn: click.Path,
dry_run: bool,
):
"""Create config file for BALSAMIC analysis for a given CASE_ID"""

analysis_api: AnalysisAPI = context.meta_apis["analysis_api"]
try:
LOG.info(f"Creating config file for {case_id}.")
analysis_api.verify_case_id_in_statusdb(case_id=case_id)
analysis_api.config_case(case_id=case_id, panel_bed=panel_bed, dry_run=dry_run)
analysis_api.config_case(
case_id=case_id,
genome_version=genome_version,
panel_bed=panel_bed,
pon_cnn=pon_cnn,
dry_run=dry_run,
)
except CgError as e:
LOG.error(f"Could not create config: {e.message}")
raise click.Abort()
Expand All @@ -63,7 +80,7 @@ def config_case(context: CGConfig, panel_bed: str, case_id: str, dry_run: bool):

@balsamic.command("run")
@ARGUMENT_CASE_ID
@OPTION_DRY
@DRY_RUN
@OPTION_QOS
@OPTION_ANALYSIS_TYPE
@OPTION_RUN_ANALYSIS
Expand Down Expand Up @@ -103,7 +120,7 @@ def run(

@balsamic.command("report-deliver")
@ARGUMENT_CASE_ID
@OPTION_DRY
@DRY_RUN
@OPTION_ANALYSIS_TYPE
@click.pass_obj
def report_deliver(context: CGConfig, case_id: str, analysis_type: str, dry_run: bool):
Expand Down Expand Up @@ -156,17 +173,21 @@ def store_housekeeper(context: CGConfig, case_id: str):

@balsamic.command("start")
@ARGUMENT_CASE_ID
@OPTION_GENOME_VERSION
@OPTION_ANALYSIS_TYPE
@OPTION_QOS
@OPTION_DRY
@DRY_RUN
@OPTION_PANEL_BED
@OPTION_PON_CNN
@OPTION_RUN_ANALYSIS
@click.pass_context
def start(
context: click.Context,
case_id: str,
genome_version: str,
analysis_type: str,
panel_bed: str,
pon_cnn: str,
slurm_quality_of_service: str,
run_analysis: bool,
dry_run: bool,
Expand All @@ -176,7 +197,14 @@ def start(
try:
context.invoke(resolve_compression, case_id=case_id, dry_run=dry_run)
context.invoke(link, case_id=case_id, dry_run=dry_run)
context.invoke(config_case, case_id=case_id, panel_bed=panel_bed, dry_run=dry_run)
context.invoke(
config_case,
case_id=case_id,
genome_version=genome_version,
panel_bed=panel_bed,
pon_cnn=pon_cnn,
dry_run=dry_run,
)
context.invoke(
run,
case_id=case_id,
Expand All @@ -190,7 +218,7 @@ def start(


@balsamic.command("start-available")
@OPTION_DRY
@DRY_RUN
@click.pass_context
def start_available(context: click.Context, dry_run: bool = False):
"""Start full workflow for all cases ready for analysis"""
Expand All @@ -213,7 +241,7 @@ def start_available(context: click.Context, dry_run: bool = False):

@balsamic.command("store")
@ARGUMENT_CASE_ID
@OPTION_DRY
@DRY_RUN
@OPTION_ANALYSIS_TYPE
@click.pass_context
def store(context: click.Context, case_id: str, analysis_type: str, dry_run: bool):
Expand All @@ -224,7 +252,7 @@ def store(context: click.Context, case_id: str, analysis_type: str, dry_run: boo


@balsamic.command("store-available")
@OPTION_DRY
@DRY_RUN
@click.pass_context
def store_available(context: click.Context, dry_run: bool) -> None:
"""Store bundles for all finished analyses in Housekeeper"""
Expand Down
18 changes: 14 additions & 4 deletions cg/cli/workflow/balsamic/options.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
import click

from cg.constants.constants import GenomeVersions
from cg.constants.priority import SlurmQos

OPTION_DRY = click.option(
"-d", "--dry-run", help="Print command to console without executing", is_flag=True
OPTION_GENOME_VERSION = click.option(
"--genome-version",
show_default=True,
default=GenomeVersions.hg19,
type=click.Choice([GenomeVersions.hg19, GenomeVersions.hg38, GenomeVersions.canfam3]),
help="Type and build version of the reference genome. Set this option to override the default.",
)
OPTION_PANEL_BED = click.option(
"--panel-bed",
Expand All @@ -28,6 +33,11 @@
"-qos",
"--slurm-quality-of-service",
type=click.Choice([SlurmQos.LOW, SlurmQos.NORMAL, SlurmQos.HIGH, SlurmQos.EXPRESS]),
help="Job priority in SLURM. Will be set automatically according to priority in ClinicalDB, \
this option can be used to override server setting",
help="Job priority in SLURM. Setting this option will override the StatusDB case priority.",
)
OPTION_PON_CNN = click.option(
"--pon-cnn",
type=click.Path(exists=True),
required=False,
help="Panel of normal reference (.cnn) for cnvkit",
)
6 changes: 6 additions & 0 deletions cg/constants/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@
STATUS_OPTIONS = ("affected", "unaffected", "unknown")


class GenomeVersions(StrEnum):
ivadym marked this conversation as resolved.
Show resolved Hide resolved
hg19: str = "hg19"
hg38: str = "hg38"
canfam3: str = "canfam3"


class DataDelivery(StrEnum):
ANALYSIS_BAM_FILES: str = "analysis-bam"
ANALYSIS_FILES: str = "analysis"
Expand Down
66 changes: 30 additions & 36 deletions cg/constants/delivery.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,49 +14,44 @@
"sarscov2",
]

BALSAMIC_ANALYSIS_ONLY_CASE_TAGS = [
BALSAMIC_ANALYSIS_CASE_TAGS = [
{"multiqc-html"},
{"metrics"},
{"visualization"},
{"vcf-sv-clinical"},
{"vcf-sv-clinical-index"},
{"vcf-sv-research"},
{"vcf-sv-research-index"},
{"vcf-snv-clinical"},
{"vcf-snv-clinical-index"},
{"vardict", "deliver"},
{"vcf", "sention", "haplotype-caller", "filtered"},
{"vcf-index", "sention", "haplotype-caller", "filtered"},
{"vcf-sv-clinical", "manta", "filtered"},
{"vcf-sv-clinical-index", "manta", "filtered"},
{"vcf-sv-research", "filtered"},
{"vcf-sv-research-index", "filtered"},
{"ascatngs", "visualization"},
{"cnvkit", "vcf-sv-research", "filtered"},
{"cnvkit", "vcf-sv-research-index", "filtered"},
{"cnvkit", "visualization"},
{"cnvkit", "visualization", "diagram"},
{"cnvkit", "regions"},
{"multiqc-html"},
{"vcf-snv-research"},
{"vcf-snv-research-index"},
{"germline", "vcf"},
{"germline", "vcf-index"},
{"vcf2cytosure"},
]

BALSAMIC_ANALYSIS_CASE_TAGS = copy.deepcopy(BALSAMIC_ANALYSIS_ONLY_CASE_TAGS)
BALSAMIC_ANALYSIS_CASE_TAGS.extend(
[
{"cram", "normal"},
{"cram-index"},
{"cram", "tumor"},
{"cram-index", "tumor"},
]
)

BALSAMIC_ANALYSIS_SAMPLE_TAGS = [
{"cram", "normal"},
{"cram"},
{"cram-index"},
{"cram", "tumor"},
{"cram-index", "tumor"},
]

BALSAMIC_QC_CASE_TAGS = [
{"multiqc-html"},
BALSAMIC_UMI_ANALYSIS_CASE_TAGS = [
{"vcf-umi-clinical"},
{"vcf-umi-clinical-index"},
{"vcf-umi-research"},
{"vcf-umi-research-index"},
]
BALSAMIC_QC_SAMPLE_TAGS = [
{"fastq"},

BALSAMIC_UMI_ANALYSIS_CASE_TAGS.extend(BALSAMIC_ANALYSIS_CASE_TAGS)

BALSAMIC_UMI_ANALYSIS_SAMPLE_TAGS = [
{"umi-cram"},
{"umi-cram-index"},
]

BALSAMIC_UMI_ANALYSIS_SAMPLE_TAGS.extend(BALSAMIC_ANALYSIS_SAMPLE_TAGS)


MIP_DNA_ANALYSIS_CASE_TAGS = [
{"vcf-clinical-sv-bin"},
Expand Down Expand Up @@ -135,10 +130,9 @@
"case_tags": BALSAMIC_ANALYSIS_CASE_TAGS,
"sample_tags": BALSAMIC_ANALYSIS_SAMPLE_TAGS,
},
"balsamic-analysis": {"case_tags": BALSAMIC_ANALYSIS_ONLY_CASE_TAGS, "sample_tags": []},
"balsamic-qc": {
"case_tags": BALSAMIC_QC_CASE_TAGS,
"sample_tags": BALSAMIC_QC_SAMPLE_TAGS,
"balsamic-umi": {
"case_tags": BALSAMIC_UMI_ANALYSIS_CASE_TAGS,
"sample_tags": BALSAMIC_UMI_ANALYSIS_SAMPLE_TAGS,
},
"mip-dna": {
"case_tags": MIP_DNA_ANALYSIS_CASE_TAGS,
Expand Down
3 changes: 2 additions & 1 deletion cg/constants/scout_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
)

BALSAMIC_CASE_TAGS = dict(
snv_vcf={"vcf-snv-clinical"},
sv_vcf={"vcf-sv-clinical"},
snv_vcf={"vcf-snv-clinical"},
cnv_report={"ascatngs", "visualization"},
multiqc_report={"multiqc-html"},
delivery_report={"delivery-report"},
Expand All @@ -34,4 +34,5 @@
BALSAMIC_SAMPLE_TAGS = dict(
bam_file={"bam"},
alignment_file={"cram"},
vcf2cytosure={"vcf2cytosure"},
)
6 changes: 5 additions & 1 deletion cg/meta/upload/scout/balsamic_config_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,12 @@ def include_case_files(self):
self.include_multiqc_report()
self.include_delivery_report()

def include_sample_files(self, config_sample: ScoutBalsamicIndividual):
def include_sample_files(self, config_sample: ScoutBalsamicIndividual) -> None:
LOG.info("Including BALSAMIC specific sample level files")
if config_sample.alignment_path and "tumor" in config_sample.alignment_path:
config_sample.vcf2cytosure = self.fetch_sample_file(
ivadym marked this conversation as resolved.
Show resolved Hide resolved
hk_tags=self.sample_tags.vcf2cytosure, sample_id=self.load_config.family
)

def include_delivery_report(self) -> None:
LOG.info("Include coverage qc report to case")
Expand Down
44 changes: 38 additions & 6 deletions cg/meta/workflow/balsamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,24 @@ def get_verified_bed(self, sample_data: dict, panel_bed: Path) -> Optional[str]:
)
return Path(self.bed_path, target_bed).as_posix()

@staticmethod
def get_verified_pon(panel_bed: Path, pon_cnn: str) -> Optional[str]:
"""Returns the validated PoN path

Raises BalsamicStartError:
When there is a missmatch between the PoN and the panel bed file names
"""
if pon_cnn:
pon_cnn = Path(str(pon_cnn))
if panel_bed.stem not in pon_cnn.stem:
raise BalsamicStartError(
f"The specified PoN reference file {pon_cnn} does not match the panel bed {panel_bed}"
)

return pon_cnn.as_posix()

return pon_cnn

@staticmethod
def get_verified_tumor_path(sample_data: dict) -> str:
"""Takes a dict with samples and attributes, and returns the path
Expand Down Expand Up @@ -341,9 +359,7 @@ def get_normal_sample_name(self, case_id: str) -> Optional[str]:
return sample_obj.internal_id

def get_verified_config_case_arguments(
self,
case_id: str,
panel_bed: str,
self, case_id: str, genome_version: str, panel_bed: str, pon_cnn: str
) -> dict:
"""Takes a dictionary with per-sample parameters,
validates them, and transforms into command line arguments
Expand All @@ -370,9 +386,11 @@ def get_verified_config_case_arguments(

return {
"case_id": case_id,
"genome_version": genome_version,
"normal": self.get_verified_normal_path(sample_data=sample_data),
"tumor": self.get_verified_tumor_path(sample_data=sample_data),
"panel_bed": self.get_verified_bed(sample_data=sample_data, panel_bed=panel_bed),
"pon_cnn": self.get_verified_pon(pon_cnn=pon_cnn, panel_bed=panel_bed),
"tumor_sample_name": self.get_tumor_sample_name(case_id=case_id),
"normal_sample_name": self.get_normal_sample_name(case_id=case_id),
}
Expand Down Expand Up @@ -508,18 +526,32 @@ def __build_command_str(options: dict) -> List[str]:
formatted_options.append(str(val))
return formatted_options

def config_case(self, case_id: str, panel_bed: str, dry_run: bool = False) -> None:
def config_case(
self,
case_id: str,
genome_version: str,
panel_bed: str,
pon_cnn: str,
dry_run: bool = False,
) -> None:
"""Create config file for BALSAMIC analysis"""
arguments = self.get_verified_config_case_arguments(case_id=case_id, panel_bed=panel_bed)
arguments = self.get_verified_config_case_arguments(
case_id=case_id,
genome_version=genome_version,
panel_bed=panel_bed,
pon_cnn=pon_cnn,
)
command = ["config", "case"]
options = self.__build_command_str(
{
"--analysis-dir": self.root_dir,
"--balsamic-cache": self.balsamic_cache,
"--case-id": arguments.get("case_id"),
"--genome-version": arguments.get("genome_version"),
"--normal": arguments.get("normal"),
"--tumor": arguments.get("tumor"),
"--panel-bed": arguments.get("panel_bed"),
"--pon-cnn": arguments.get("pon_cnn"),
"--umi-trim-length": arguments.get("umi_trim_length"),
"--tumor-sample-name": arguments.get("tumor_sample_name"),
"--normal-sample-name": arguments.get("normal_sample_name"),
Expand Down Expand Up @@ -565,7 +597,7 @@ def report_deliver(
"--analysis-type": analysis_type or self.get_analysis_type(case_id),
}
)
parameters = command + options + ["--no-qc-metrics"]
parameters = command + options
self.process.run_command(parameters=parameters, dry_run=dry_run)

def get_analysis_type(self, case_id: str) -> Optional[str]:
Expand Down
Loading