-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcre.roh.naive.sh
executable file
·83 lines (79 loc) · 2.22 KB
/
cre.roh.naive.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/bin/bash
# runs of HOM MAF 5% variant in proband (CH0620) affected sib (CH0621) and in both
# filters DP=50, QUAL>=500
# gt_types=0=HOM_REF or 3 = HOM_ALT, 1 = HET, 2 - no call
# usage:
# cre.roh.naive.sh sample gemini.db [maf=0.05]
# output:
# - sample.roh_variants.tsv - list of variants, print with tee for debugging
# - stdout: list of ROH
sample=$1
echo "chrom,pos,ref,alt,impact,qual,dp,gene,maf,gts."$sample",gt_types."$sample",gt_alt_depths."$sample",stretch_length_variants,stretch_length_bp,stretch_id,stretch_genes" | tee $sample.roh_variants.tsv
maf=0.05
if [ -n "$3" ]
then
maf=$3
fi
gemini query -q "select chrom,start+1 as pos, ref, alt,impact,qual,dp, gene, gnomad_af_popmax as maf, gts."$sample",gt_types."$sample",gt_alt_depths."$sample" from variants where
type='snp' and dp>=50 and qual>=500 and gnomad_af_popmax<="$maf --gt-filter "gt_types."$sample" != 2" $2 | sed s/"\t"/","/g | sort -t "," -k1,1n -k2,2n \
| tee -a $sample.roh_variants.tsv | awk -F "," '
BEGIN{
prev=1;
prev_gene="";
prev_chrom="";
stretch_length_variants=0;
stretch_length_bp=0;
stretch_id="";
stretch_genes="";
}
{
if($11 == 3){
genotype=0
}else{
genotype=$11
};
if(genotype==1 || $1 != prev_chrom){
stretch_length_variants=0;
stretch_length_bp=0;
stretch_id=0;
stretch_genes="";
prev_gene="";
}else{
if(prev==0){
stretch_length_variants=stretch_length_variants+1;
stretch_length_bp=$2-stretch_id+1;
if ($8 != prev_gene && $8 != ""){
stretch_genes=stretch_genes","$8;
}
prev_gene=$8;
}else{
stretch_length_variants=1;
stretch_id=$2;
stretch_length_bp=$2-stretch_id+1;
stretch_genes=$8;
prev_gene=$8;
};
}
prev=genotype;
prev_chrom=$1;
print $0","stretch_length_variants","stretch_length_bp","stretch_id",\""stretch_genes"\"";
}' | grep -v "0$" | awk -F ',' '{ if ($13>5) print $0;}' \
| sort -t "," -k1,1 -k15,15n \
| awk -F "," '
BEGIN{
prev_chr="";
prev_stretch_id="";
prev_stretch="";
}
{
if($1 != prev_chr){
print prev_stretch;
}else{
if (prev_stretch_id != $15){
print prev_stretch;
}
}
prev_stretch=$0;
prev_chr=$1;
prev_stretch_id=$15;
}' | grep -v "^$"