Skip to content

Commit

Permalink
Merge branch 'master' into write_config_taxprofiler
Browse files Browse the repository at this point in the history
  • Loading branch information
sofstam authored Feb 3, 2025
2 parents 02aafa5 + cfe857d commit ed33eb2
Show file tree
Hide file tree
Showing 34 changed files with 972 additions and 322 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 67.0.9
current_version = 67.0.19
commit = True
tag = True
tag_name = v{new_version}
Expand Down
2 changes: 1 addition & 1 deletion cg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__title__ = "cg"
__version__ = "67.0.9"
__version__ = "67.0.19"
27 changes: 24 additions & 3 deletions cg/cli/workflow/nallo/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@

import rich_click as click

from cg.cli.utils import CLICK_CONTEXT_SETTINGS

from cg.cli.workflow.nf_analysis import config_case, run, start

from cg.cli.utils import CLICK_CONTEXT_SETTINGS, echo_lines
from cg.cli.workflow.commands import ARGUMENT_CASE_ID
from cg.constants.cli_options import DRY_RUN
from cg.cli.workflow.nf_analysis import config_case, metrics_deliver, report_deliver, run, start
from cg.constants.constants import MetaApis
from cg.meta.workflow.analysis import AnalysisAPI
from cg.meta.workflow.nallo import NalloAnalysisAPI
from cg.models.cg_config import CGConfig

LOG = logging.getLogger(__name__)

Expand All @@ -24,5 +26,24 @@ def nallo(context: click.Context) -> None:


nallo.add_command(config_case)
nallo.add_command(report_deliver)
nallo.add_command(run)
nallo.add_command(start)
nallo.add_command(metrics_deliver)


@nallo.command("panel")
@DRY_RUN
@ARGUMENT_CASE_ID
@click.pass_obj
def panel(context: CGConfig, case_id: str, dry_run: bool) -> None:
"""Write aggregated gene panel file exported from Scout."""

analysis_api: NalloAnalysisAPI = context.meta_apis["analysis_api"]
analysis_api.status_db.verify_case_exists(case_internal_id=case_id)

bed_lines: list[str] = analysis_api.get_gene_panel(case_id=case_id)
if dry_run:
echo_lines(lines=bed_lines)
return
analysis_api.write_panel_as_tsv(case_id=case_id, content=bed_lines)
4 changes: 4 additions & 0 deletions cg/constants/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ class NfTowerStatus(StrEnum):
UNKNOWN: str = "UNKNOWN"


NALLO_METRIC_CONDITIONS: dict[str, dict[str, Any]] = {
"median_coverage": {"norm": "gt", "threshold": 25},
}

RAREDISEASE_PREDICTED_SEX_METRIC = "predicted_sex_sex_check"

RAREDISEASE_METRIC_CONDITIONS: dict[str, dict[str, Any]] = {
Expand Down
1 change: 1 addition & 0 deletions cg/constants/scout.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ class GenomeBuild(StrEnum):
class ScoutExportFileName(StrEnum):
MANAGED_VARIANTS: str = f"managed_variants{FileExtensions.VCF}"
PANELS: str = f"gene_panels{FileExtensions.BED}"
PANELS_TSV: str = f"gene_panels{FileExtensions.TSV}"


class UploadTrack(StrEnum):
Expand Down
99 changes: 22 additions & 77 deletions cg/meta/upload/scout/uploadscoutapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pathlib import Path

from housekeeper.store.models import File, Version
from pydantic.dataclasses import dataclass

from cg.apps.housekeeper.hk import HousekeeperAPI
from cg.apps.lims import LimsAPI
Expand All @@ -26,22 +25,13 @@
from cg.meta.workflow.analysis import AnalysisAPI
from cg.meta.workflow.utils.genome_build_helpers import genome_to_scout_format, get_genome_build
from cg.models.scout.scout_load_config import ScoutLoadConfig
from cg.store.api.data_classes import RNADNACollection
from cg.store.models import Analysis, Case, Customer, Sample
from cg.store.store import Store

LOG = logging.getLogger(__name__)


@dataclass
class RNADNACollection:
"""Contains the id for an RNA sample, the name of its connected DNA sample,
and a list of connected, uploaded DNA cases."""

rna_sample_internal_id: str
dna_sample_name: str
dna_case_ids: list[str]


class UploadScoutAPI:
"""Class that handles everything that has to do with uploading to Scout."""

Expand Down Expand Up @@ -182,15 +172,6 @@ def get_rna_omics_outrider(self, case_id: str) -> File | None:
tags: set[str] = {AnalysisTag.OUTRIDER, case_id, AnalysisTag.CLINICAL}
return self.housekeeper.get_file_from_latest_version(bundle_name=case_id, tags=tags)

def get_unique_dna_cases_related_to_rna_case(self, case_id: str) -> set[str]:
"""Return a set of unique DNA cases related to an RNA case."""
case: Case = self.status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(case)
unique_dna_cases_related_to_rna_case: set[str] = set()
for rna_dna_collection in rna_dna_collections:
unique_dna_cases_related_to_rna_case.update(rna_dna_collection.dna_case_ids)
return unique_dna_cases_related_to_rna_case

def get_rna_alignment_cram(self, case_id: str, sample_id: str) -> File | None:
"""Return an RNA alignment CRAM file for a case in Housekeeper."""
tags: set[str] = {AlignmentFileTag.CRAM, sample_id}
Expand All @@ -206,9 +187,11 @@ def get_rna_alignment_cram(self, case_id: str, sample_id: str) -> File | None:
def upload_rna_alignment_file(self, case_id: str, dry_run: bool) -> None:
"""Upload RNA alignment file to Scout."""
rna_case: Case = self.status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
rna_alignment_cram: File | None = self.get_rna_alignment_cram(
case_id=case_id, sample_id=rna_sample_internal_id
Expand Down Expand Up @@ -346,9 +329,11 @@ def upload_rna_coverage_bigwig_to_scout(self, case_id: str, dry_run: bool) -> No

status_db: Store = self.status_db
rna_case = status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
rna_coverage_bigwig: File | None = self.get_rna_coverage_bigwig(
case_id=case_id, sample_id=rna_sample_internal_id
Expand Down Expand Up @@ -382,7 +367,7 @@ def upload_omics_sample_id_to_scout(
self, dry_run: bool, rna_dna_collections: list[RNADNACollection]
) -> None:
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
for dna_case_id in rna_dna_collection.dna_case_ids:
LOG.info(
Expand All @@ -406,7 +391,7 @@ def upload_rna_fraser_outrider_to_scout(
"""Upload omics fraser and outrider file for a case to Scout."""
status_db: Store = self.status_db
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
rna_fraser: File | None = self.get_rna_omics_fraser(case_id=case_id)
rna_outrider: File | None = self.get_rna_omics_outrider(case_id=case_id)
Expand Down Expand Up @@ -442,7 +427,7 @@ def upload_rna_fraser_outrider_to_scout(
def upload_rna_genome_build_to_scout(
self,
dry_run: bool,
rna_case: str,
rna_case: Case,
rna_dna_collections: list[RNADNACollection],
) -> None:
"""Upload RNA genome built for a RNA/DNA case to Scout."""
Expand Down Expand Up @@ -502,9 +487,11 @@ def upload_splice_junctions_bed_to_scout(self, dry_run: bool, case_id: str) -> N
status_db: Store = self.status_db
rna_case: Case = status_db.get_case_by_internal_id(case_id)

rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
splice_junctions_bed: File | None = self.get_splice_junctions_bed(
case_id=case_id, sample_id=rna_sample_internal_id
Expand Down Expand Up @@ -615,7 +602,9 @@ def upload_rna_omics_to_scout(self, dry_run: bool, case_id: str) -> None:
"""Upload RNA omics files to Scout."""
status_db: Store = self.status_db
rna_case = status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
self.upload_omics_sample_id_to_scout(
dry_run=dry_run, rna_dna_collections=rna_dna_collections
)
Expand Down Expand Up @@ -675,45 +664,6 @@ def get_config_builder(self, analysis, hk_version) -> ScoutConfigBuilder:

return config_builders[analysis.workflow]

def create_rna_dna_collections(self, rna_case: Case) -> list[RNADNACollection]:
return [self.create_rna_dna_collection(link.sample) for link in rna_case.links]

def create_rna_dna_collection(self, rna_sample: Sample) -> RNADNACollection:
"""Creates a collection containing the given RNA sample id, its related DNA sample name, and
a list of ids for the DNA cases connected to the DNA sample."""
if not rna_sample.subject_id:
raise CgDataError(
f"Failed to link RNA sample {rna_sample.internal_id} to DNA samples - subject_id field is empty."
)

collaborators: set[Customer] = rna_sample.customer.collaborators
subject_id_samples: list[Sample] = (
self.status_db.get_samples_by_customer_ids_and_subject_id_and_is_tumour(
customer_ids=[customer.id for customer in collaborators],
subject_id=rna_sample.subject_id,
is_tumour=rna_sample.is_tumour,
)
)

subject_id_dna_samples: list[Sample] = self._get_application_prep_category(
subject_id_samples
)

if len(subject_id_dna_samples) != 1:
raise CgDataError(
f"Failed to upload files for RNA case: unexpected number of DNA sample matches for subject_id: "
f"{rna_sample.subject_id}. Number of matches: {len(subject_id_dna_samples)} "
)
dna_sample: Sample = subject_id_dna_samples[0]
dna_cases: list[str] = self._dna_cases_related_to_dna_sample(
dna_sample=dna_sample, collaborators=collaborators
)
return RNADNACollection(
rna_sample_internal_id=rna_sample.internal_id,
dna_sample_name=dna_sample.name,
dna_case_ids=dna_cases,
)

def _dna_cases_related_to_dna_sample(
self, dna_sample: Sample, collaborators: set[Customer]
) -> list[str]:
Expand Down Expand Up @@ -768,11 +718,6 @@ def _get_application_prep_category(

def get_related_uploaded_dna_cases(self, rna_case_id: str) -> set[str]:
"""Returns all uploaded DNA cases related to the specified RNA case."""
unique_dna_case_ids: set[str] = self.get_unique_dna_cases_related_to_rna_case(rna_case_id)
uploaded_dna_cases: set[str] = set()
for dna_case_id in unique_dna_case_ids:
if self.status_db.get_case_by_internal_id(dna_case_id).is_uploaded:
uploaded_dna_cases.add(dna_case_id)
else:
LOG.warning(f"Related DNA case {dna_case_id} has not been completed.")
return uploaded_dna_cases
rna_case: Case = self.status_db.get_case_by_internal_id(rna_case_id)
dna_cases: list[Case] = self.status_db.get_uploaded_related_dna_cases(rna_case)
return {dna_case.internal_id for dna_case in dna_cases}
53 changes: 51 additions & 2 deletions cg/meta/workflow/nallo.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
"""Module for Nallo Analysis API."""

import logging
from pathlib import Path

from cg.constants import Workflow
from cg.constants.constants import GenomeVersion, FileFormat
from cg.constants.nf_analysis import NALLO_METRIC_CONDITIONS
from cg.constants.scout import ScoutExportFileName
from cg.constants.subject import PlinkPhenotypeStatus, PlinkSex
from cg.io.controller import WriteFile
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.models.cg_config import CGConfig
from cg.models.nallo.nallo import NalloSampleSheetHeaders, NalloSampleSheetEntry, NalloParameters
from cg.models.nallo.nallo import NalloParameters, NalloSampleSheetEntry, NalloSampleSheetHeaders
from cg.resources import NALLO_BUNDLE_FILENAMES_PATH
from cg.store.models import CaseSample
from pathlib import Path

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -90,4 +96,47 @@ def get_built_workflow_parameters(self, case_id: str) -> NalloParameters:
return NalloParameters(
input=self.get_sample_sheet_path(case_id=case_id),
outdir=outdir,
filter_variants_hgnc_ids=f"{outdir}/{ScoutExportFileName.PANELS_TSV}",
)

@property
def is_gene_panel_required(self) -> bool:
"""Return True if a gene panel needs to be created using information in StatusDB and exporting it from Scout."""
return True

def create_gene_panel(self, case_id: str, dry_run: bool) -> None:
"""Create and write an aggregated gene panel file exported from Scout as tsv file."""
LOG.info("Creating gene panel file")
bed_lines: list[str] = self.get_gene_panel(case_id=case_id, dry_run=dry_run)
if dry_run:
bed_lines: str = "\n".join(bed_lines)
LOG.debug(f"{bed_lines}")
return
self.write_panel_as_tsv(case_id=case_id, content=bed_lines)

def write_panel_as_tsv(self, case_id: str, content: list[str]) -> None:
"""Write the gene panel to case dir."""
self._write_panel_as_tsv(out_dir=Path(self.root, case_id), content=content)

@staticmethod
def _write_panel_as_tsv(out_dir: Path, content: list[str]) -> None:
"""Write the gene panel to case dir while omitted the commented BED lines."""
filtered_content = [line for line in content if not line.startswith("##")]
out_dir.mkdir(parents=True, exist_ok=True)
WriteFile.write_file_from_content(
content="\n".join(filtered_content),
file_format=FileFormat.TXT,
file_path=Path(out_dir, ScoutExportFileName.PANELS_TSV),
)

def get_genome_build(self, case_id: str) -> GenomeVersion:
"""Return reference genome for a Nallo case. Currently fixed for hg38."""
return GenomeVersion.HG38

@staticmethod
def get_bundle_filenames_path() -> Path:
"""Return Nallo bundle filenames path."""
return NALLO_BUNDLE_FILENAMES_PATH

def get_workflow_metrics(self, metric_id: str) -> dict:
return NALLO_METRIC_CONDITIONS
11 changes: 7 additions & 4 deletions cg/models/analysis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import BaseModel, ConfigDict
from pydantic import BaseModel, ConfigDict, Field

from cg.models.raredisease.raredisease import RarediseaseQCMetrics
from cg.models.rnafusion.rnafusion import RnafusionQCMetrics
Expand All @@ -15,6 +15,9 @@ class AnalysisModel(BaseModel):
class NextflowAnalysis(AnalysisModel):
"""Nextflow's analysis results model."""

sample_metrics: dict[
str, RarediseaseQCMetrics | RnafusionQCMetrics | TaxprofilerQCMetrics | TomteQCMetrics
]
sample_metrics: (
dict[str, RarediseaseQCMetrics]
| dict[str, RnafusionQCMetrics]
| dict[str, TaxprofilerQCMetrics]
| dict[str, TomteQCMetrics]
) = Field(union_mode="left_to_right")
8 changes: 8 additions & 0 deletions cg/models/nallo/nallo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
from cg.models.nf_analysis import WorkflowParameters


class NalloQCMetrics(BaseModel):
"""Nallo QC metrics"""

median_coverage: float | None


class NalloSampleSheetEntry(BaseModel):
"""Nallo sample model is used when building the sample sheet."""

Expand Down Expand Up @@ -61,3 +67,5 @@ def list(cls) -> list[str]:

class NalloParameters(WorkflowParameters):
"""Model for Nallo parameters."""

filter_variants_hgnc_ids: str
5 changes: 3 additions & 2 deletions cg/models/taxprofiler/taxprofiler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from pathlib import Path

from pydantic import BaseModel, Field
from pydantic import Field

from cg.constants.sequencing import SequencingPlatform
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
from cg.models.qc_metrics import QCMetrics


class TaxprofilerQCMetrics(BaseModel):
class TaxprofilerQCMetrics(QCMetrics):
"""Taxprofiler QC metrics."""

after_filtering_gc_content: float
Expand Down
Loading

0 comments on commit ed33eb2

Please sign in to comment.