Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: UMCUGenetics/CustomModules
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v2.0.2
Choose a base ref
...
head repository: UMCUGenetics/CustomModules
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: v2.1.0
Choose a head ref
Loading
2 changes: 1 addition & 1 deletion CheckFingerprintVCF/CheckFingerprintVCF.nf
Original file line number Diff line number Diff line change
@@ -15,6 +15,6 @@ process CheckFingerprintVCF {

script:
"""
python ${baseDir}/CustomModules/CheckFingerprintVCF/check_fingerprint_vcf.py -o logbook.txt ${vcf_files}
python ${projectDir}/CustomModules/CheckFingerprintVCF/check_fingerprint_vcf.py -o logbook.txt ${vcf_files}
"""
}
2 changes: 1 addition & 1 deletion CheckQC/CheckQC.nf
Original file line number Diff line number Diff line change
@@ -13,6 +13,6 @@ process CheckQC {

script:
"""
python ${baseDir}/CustomModules/CheckQC/check_qc.py ${params.qc_settings_path} '.' ${identifier} ${input_files}
python ${projectDir}/CustomModules/CheckQC/check_qc.py ${params.qc_settings_path} '.' ${identifier} ${input_files}
"""
}
14 changes: 7 additions & 7 deletions ClarityEpp/SampleIndications.nf → ClarityEpp/SampleUDFDx.nf
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
process SampleIndications {
// Custom process to run clarity_epp export sample_indications
tag {"ClarityEpp SampleIndications ${sample_id}"}
process SampleUDFDx {
// Custom process to run clarity_epp export sample_udf_dx
tag {"ClarityEpp SampleUDFDx ${sample_id}"}
label 'ClarityEpp'
label 'ClarityEpp_SampleIndications'
label 'ClarityEpp_SampleUDFDx'
shell = ['/bin/bash', '-eo', 'pipefail']
cache = false //Disable cache to force a clarity export restarting the workflow.

@@ -15,7 +15,7 @@ process SampleIndications {
script:
"""
source ${params.clarity_epp_path}/venv/bin/activate
python ${params.clarity_epp_path}/clarity_epp.py export sample_indications \
-a ${sample_id} | cut -f 2 | grep -v 'Indication' | tr -d '\n'
python ${params.clarity_epp_path}/clarity_epp.py export sample_udf_dx \
-a ${sample_id} -u '$params.udf' -c '$params.column_name' | cut -f 2 | grep -v $params.column_name | tr -d '\n'
"""
}
}
27 changes: 27 additions & 0 deletions GenderCheck/CompareGender.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
process CompareGender {
// Custom process to check gender of sample with known status
tag {"CompareGender ${sample_id}"}
label 'CompareGender'
label 'CompareGender_Pysam'
container = 'ghcr.io/umcugenetics/custommodules_gendercheck:1.0.0'
shell = ['/bin/bash', '-eo', 'pipefail']

input:
tuple(val(sample_id), val(analysis_id), path(bam_file), path(bai_file), val(true_gender))

output:
tuple(path("*gendercheck.txt"), emit: gendercheck_qc)

script:
"""
python ${projectDir}/CustomModules/GenderCheck/calculate_gender.py \
${sample_id} \
${analysis_id} \
${bam_file} \
./ \
${true_gender} \
$params.gendercheck_ratio_y \
$params.gendercheck_mapping_qual \
$params.gendercheck_locus_y
"""
}
11 changes: 11 additions & 0 deletions GenderCheck/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
################## BASE IMAGE ######################
FROM --platform=linux/amd64 python:3.11

################## METADATA ######################
LABEL base_image="python:3.11"
LABEL version="1.0.0"
LABEL extra.binaries="pysam,pytest"

################## INSTALLATIONS ######################
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
64 changes: 64 additions & 0 deletions GenderCheck/calculate_gender.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#! /usr/bin/env python3

import argparse
import pysam


def is_valid_read(read, mapping_qual):
"""Check if a read is properly mapped."""
if (read.mapping_quality >= mapping_qual and read.reference_end and read.reference_start):
return True
return False


def get_gender_from_bam(bam, mapping_qual, locus_y, ratio_y):
with pysam.AlignmentFile(bam, "rb") as bam_file:
y_reads = float(
sum([is_valid_read(read, mapping_qual) for read in bam_file.fetch(region=locus_y)])
)
total_reads = float(bam_file.mapped)
y_ratio_perc = (y_reads / total_reads) * 100
if y_ratio_perc <= ratio_y:
return "female"
else:
return "male"


def compare_gender(sample_id, analysis_id, test_gender, true_gender):
if test_gender == true_gender or true_gender == "unknown": # if gender if unknown/onbekend in database, pass
qc = "PASS"
else: # not_detected in database considered failed
qc = "FAIL"
return f"{sample_id}\t{analysis_id}\t{test_gender}\t{true_gender}\t{qc}\n"


def write_qc_file(sample_id, analysis_id, comparison, outputfolder):
with open(f"{outputfolder}/{sample_id}_{analysis_id}_gendercheck.txt", 'w') as write_file:
write_file.write("sample_id\tanalysis_id\ttest_gender\ttrue_gender\tstatus\n")
write_file.write(comparison)


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('sample_id', help='sample_id')
parser.add_argument('analysis_id', help='analysis_id')
parser.add_argument('bam', help='path to bam file')
parser.add_argument('outputfolder', help='path to output folder')
parser.add_argument('true_gender', help='gender regarded as the truth')
parser.add_argument(
"ratio_y",
type=float,
help="maximunum chromosome Y ratio for females [float]"
)
parser.add_argument('mapping_qual', type=int, help='minimum mapping quality of reads to be considered [int]')
parser.add_argument('locus_y', help='Coordinates for includes region on chromosome Y (chr:start-stop)')
args = parser.parse_args()

translation = {"Man": "male", "Vrouw": "female", "Onbekend": "unknown", "unknown": "not_detected"}
true_gender = args.true_gender
if true_gender in translation:
true_gender = translation[true_gender]

test_gender = get_gender_from_bam(args.bam, args.mapping_qual, args.locus_y, args.ratio_y)
comparison = compare_gender(args.sample_id, args.analysis_id, test_gender, true_gender)
write_qc_file(args.sample_id, args.analysis_id, comparison, args.outputfolder)
5 changes: 5 additions & 0 deletions GenderCheck/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
iniconfig==2.0.0
packaging==23.2
pluggy==1.4.0
pysam==0.22.0
pytest==8.0.2
Binary file added GenderCheck/test_bam.bam
Binary file not shown.
Binary file added GenderCheck/test_bam.bam.bai
Binary file not shown.
55 changes: 55 additions & 0 deletions GenderCheck/test_calculate_gender.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import calculate_gender

import pytest


class TestIsValidRead():

class MyObject:
def __init__(self, qual, start, end):
self.mapping_quality = qual
self.reference_start = start
self.reference_end = end

@pytest.mark.parametrize("read,mapping_qual,expected", [
(MyObject(19, True, True), 20, False), # mapping quality is below the threshold
(MyObject(20, True, True), 20, True), # mapping quality is equal to the threshold
(MyObject(20, True, True), 19, True), # mapping quality is higher than the threshold
(MyObject(20, False, True), 20, False), # reference_end is false
(MyObject(20, True, False), 20, False), # reference_start is false
])
def test_is_valid_read(self, read, mapping_qual, expected):
assert expected == calculate_gender.is_valid_read(read, mapping_qual)


class TestGetGenderFromBam():
@pytest.mark.parametrize("bam,mapping_qual,locus_y,ratio_y,expected", [
("./test_bam.bam", 20, "Y:2649520-59034050", 0.02, "male"), # output male below
("./test_bam.bam", 20, "Y:2649520-59034050", 0.22, "female"), # output female
])
def test_get_gender_from_bam(self, bam, mapping_qual, locus_y, ratio_y, expected):
assert expected == calculate_gender.get_gender_from_bam(bam, mapping_qual, locus_y, ratio_y)


class TestCompareGender():
@pytest.mark.parametrize("sample_id,analysis_id,test_gender,true_gender,expected", [
# test_gender and true_gender identical, should be PASS
("test_sample", "test_analyse", "male", "male", "test_sample\ttest_analyse\tmale\tmale\tPASS\n"),
# test_gender and true_gender not identical , should be FAIL
("test_sample", "test_analyse", "male", "female", "test_sample\ttest_analyse\tmale\tfemale\tFAIL\n"),
# true_gender unknown, should be PASS
("test_sample", "test_analyse", "male", "unknown", "test_sample\ttest_analyse\tmale\tunknown\tPASS\n"),
# true_gender not_detected, should be FAIL
("test_sample", "test_analyse", "male", "not_detected", "test_sample\ttest_analyse\tmale\tnot_detected\tFAIL\n"),
])
def test_compare_gender(self, sample_id, analysis_id, test_gender, true_gender, expected):
assert expected == calculate_gender.compare_gender(sample_id, analysis_id, test_gender, true_gender)


def test_write_qc_file(tmp_path):
path = tmp_path / "qc_folder"
path.mkdir()
qc_file = path / "test_sample_test_analyse_gendercheck.txt"
calculate_gender.write_qc_file("test_sample", "test_analyse", "test_sample\ttest_analyse\tmale\tmale\tPASS\n", path)
message = "sample_id\tanalysis_id\ttest_gender\ttrue_gender\tstatus\ntest_sample\ttest_analyse\tmale\tmale\tPASS\n"
assert message in qc_file.read_text()
2 changes: 1 addition & 1 deletion Kinship/Kinship.nf
Original file line number Diff line number Diff line change
@@ -17,6 +17,6 @@ process Kinship {
plink --file out --make-bed --noweb
king -b plink.bed --kinship
cp king.kin0 ${analysis_id}.kinship
python ${baseDir}/CustomModules/Kinship/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out
python ${projectDir}/CustomModules/Kinship/check_kinship.py ${analysis_id}.kinship ${ped_file} > ${analysis_id}.kinship_check.out
"""
}
17 changes: 17 additions & 0 deletions MosaicHunter/1.0.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
FROM --platform=linux/amd64 ubuntu:latest

RUN apt-get update && \
apt-get -y install openjdk-8-jdk-headless && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

RUN apt-get update
RUN apt-get install -y git
RUN apt-get install -y git-lfs
RUN git lfs install
RUN apt-get install libcurl4

RUN apt-get install -y rsync
RUN rsync -aP hgdownload.soe.ucsc.edu::genome/admin/exe/linux.x86_64/blat/ ./../usr/bin

RUN git clone https://github.com/zzhang526/MosaicHunter
124 changes: 124 additions & 0 deletions MosaicHunter/1.0.0/MosaicHunter.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env nextflow

process MosaicHunterGetGender {
tag {"MosaicHunterGetGender ${sample_id}"}
label 'MosaicHunterGetGender'
container = 'ghcr.io/umcugenetics/custommodules_gendercheck:1.0.0'
shell = ['/bin/bash', '-eo', 'pipefail']

/*
Define inputs.
- Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file
*/
input:
tuple(val(sample_id), path(bam_files), path(bai_files))

/*
Define outputs.
- A tuple containing respectively the number for the alpha and beta found in the sample.
*/
output:
tuple(val(sample_id), path("gender_data_${sample_id}.tsv"))

// The command to execute MosaicHunter Get Gender
script:
"""
python ${workflow.projectDir}/CustomModules/MosaicHunter/1.0.0/get_gender_from_bam_chrx.py \
${sample_id} \
${bam_files} \
./ \
$params.mh_gender_ratio_x_threshold_male \
$params.mh_gender_ratio_x_threshold_female \
$params.mh_gender_mapping_qual \
$params.mh_gender_locus_x
"""
}

process MosaicHunterQualityCorrection {
// Step 1: Process input files
tag {"MosaicHunterQualityCorrection ${sample_id}"}
label 'MosaicHunterQualityCorrection'
container = 'docker://umcugenbioinf/mosaic_hunter:1.0.0'
shell = ['/bin/bash', '-euo', 'pipefail']

/*
Define inputs.
- Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file
- Path to the reference file
- Path to the MosaicHunter common site filter bed file
- Path to the MosaicHunter config file for the Quality Correction step
*/
input:
tuple(val(sample_id), path(bam_files), path(bai_files), path(gender_data))
path(mh_reference_file)
path(mh_common_site_filter_bed_file)
path(mh_config_file_one)

/*
Define outputs.
- A tuple containing respectively the number for the alpha and beta found in the sample.
*/
output:
tuple(val(sample_id), env(MHALPHA), env(MHBETA))

// The command to execute MosaicHunter
shell:
'''
SEX_STRING=$(awk 'NR>1 {print $2}' gender_data_!{sample_id}.tsv)
java -Xmx!{task.memory.toGiga()-4}G -jar /MosaicHunter/build/mosaichunter.jar \
-C !{mh_config_file_one} \
-P input_file=!{bam_files} \
-P mosaic_filter.sex=$SEX_STRING \
-P reference_file=!{mh_reference_file} \
-P common_site_filter.bed_file=!{mh_common_site_filter_bed_file} \
-P output_dir=./
export MHALPHA="\$(grep -Po "(?<=alpha:\\s)\\w+" ./stdout*)"
export MHBETA="\$(grep -Po "(?<=beta:\\s)\\w+" ./stdout*)"
'''
}

process MosaicHunterMosaicVariantCalculation {
// Caclulate the Mosaic Variants
tag {"MosaicHunterMosaicVariantCalculation ${sample_id}"}
label 'MosaicHunterMosaicVariantCalculation'
container = 'docker://umcugenbioinf/mosaic_hunter:1.0.0'
shell = ['/bin/bash', '-euo', 'pipefail']

publishDir "QC/MosaicHunter", saveAs: { filename -> "${sample_id}_$filename" }, mode: 'copy'

/*
Define inputs.
- Tuple consisting of a sample_id, a path to the .bam file, a path to the .bai file
- Path to the reference file
- Path to the MosaicHunter common site filter bed file
- Path to the MosaicHunter config file for the Mosaic Variant Calculation step
- The output of the MosaicHunterQualityCorrection step. This makes the environment variables available in this process
- A tuple containing respectively the number for the alpha and beta found in the
sample, which are stored in an environment variable.
*/
input:
tuple(val(sample_id), path(bam_files), path(bai_files), path(gender_data), env(MHALPHA), env(MHBETA))
path(mh_reference_file)
path(mh_common_site_filter_bed_file)
path(mh_config_file_two)

output:
path('final.passed.tsv')

// The command to execute step two of MosaicHunter
// First get the SEX_STRING from the sample
shell:
'''
SEX_STRING=$(awk 'NR>1 {print $2}' gender_data_!{sample_id}.tsv)
java -Xmx!{task.memory.toGiga()-8}G -jar /MosaicHunter/build/mosaichunter.jar \
-C !{mh_config_file_two} \
-P mosaic_filter.alpha_param=$MHALPHA -P mosaic_filter.beta_param=$MHBETA \
-P input_file=!{bam_files} \
-P mosaic_filter.sex=$SEX_STRING \
-P reference_file=!{mh_reference_file} \
-P common_site_filter.bed_file=!{mh_common_site_filter_bed_file} \
-P output_dir=./
'''
}
Loading