Skip to content

Commit

Permalink
Merge pull request #319 from NCI-CGR/issue-210-revisit-disc-exp-reps
Browse files Browse the repository at this point in the history
fix: Handle removal of discordant expected duplicates (issue #210)
  • Loading branch information
jaamarks authored Sep 6, 2024
2 parents 5cf67de + 3eff185 commit 0987eb1
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 10 deletions.
2 changes: 1 addition & 1 deletion src/cgr_gwas_qc/workflow/conda/illuminaio.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ channels:
- bioconda
- conda-forge
dependencies:
- bioconductor-illuminaio=0.20.0
- bioconductor-illuminaio=0.44.0
12 changes: 5 additions & 7 deletions src/cgr_gwas_qc/workflow/scripts/sample_qc_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,18 +194,14 @@ def main(
add_qc_columns(
sample_qc, remove_contam, remove_rep_discordant,
)
sample_qc["is_unexpected_replicate"] = (
sample_qc["is_unexpected_replicate"].replace("", False).fillna(False)
)
sample_qc["is_discordant_replicate"] = (
sample_qc["is_discordant_replicate"].replace("", False).fillna(False)
)

sample_qc = sample_qc.rename(
columns={
"is_unexpected_replicate": "Unexpected Replicate",
"is_discordant_replicate": "Expected Replicate Discordance",
}
)

save(sample_qc, outfile)


Expand Down Expand Up @@ -396,6 +392,8 @@ def _read_concordance(filename: Path, Sample_IDs: pd.Index) -> pd.DataFrame:
.max() # Flag a sample as True if it is True for any comparison.
.astype("boolean")
.reindex(Sample_IDs)
.replace("", False)
.fillna(False)
)


Expand Down Expand Up @@ -527,7 +525,7 @@ def _add_analytic_exclusion(
exclusion_criteria["is_contaminated"] = "Contamination"

if remove_rep_discordant:
exclusion_criteria["Expected Replicate Discordance"] = "Replicate Discordance"
exclusion_criteria["is_discordant_replicate"] = "Replicate Discordance"

sample_qc["analytic_exclusion"] = sample_qc.reindex(exclusion_criteria.keys(), axis=1).any(
axis=1
Expand Down
4 changes: 3 additions & 1 deletion src/cgr_gwas_qc/workflow/sub_workflows/entry_points.smk
Original file line number Diff line number Diff line change
Expand Up @@ -88,13 +88,15 @@ if cfg.config.user_files.gtc_pattern:
cfg.conda("bcftools-gtc2vcf-plugin")
shell:
"bcftools +gtc2vcf --gtcs {input.gtcs} --bpm {input.bpm} --fasta-ref {input.reference_fasta} --output {output.vcf} --use-gtc-sample-names"

rule filter_missing_allele_snps:
input:
vcf=rules.gtc_to_vcf.output.vcf,
output:
vcf=temp("sample_level/samples_filtered.vcf"),
shell:
"grep -vP '\t\.\t\.\t\.' {input.vcf} > {output.vcf}"
"grep -vP '\t\.\t\.\t\.' {input.vcf} > {output.vcf}"

rule vcf_to_bed:
input:
vcf=rules.filter_missing_allele_snps.output.vcf,
Expand Down
2 changes: 1 addition & 1 deletion tests/workflow/scripts/test_sample_qc_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def fake_sample_qc() -> pd.DataFrame:
"is_cr1_filtered",
"is_cr2_filtered",
"is_contaminated",
"Expected Replicate Discordance",
"is_discordant_replicate",
]
data = [
("SP00001", "SB00001", False, False, 0.99, False, False, False, False),
Expand Down

0 comments on commit 0987eb1

Please sign in to comment.