forked from misko/minichr
-
Notifications
You must be signed in to change notification settings - Fork 2
/
commands
47 lines (24 loc) · 8.94 KB
/
commands
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
zcat clusters_mq10_s10_v2.gz | sed 's/\([0-9]\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)/\1\t\2\t\3\t\4\t\5\t\6/g' | awk '{if ($2==23) {$2="X"}; if ($2==24) {$2="Y"}; print "chr"$2"\t"$3"\t"$5"\t"$1"\t"$6"\t0\t0\t0.0\t0\tchr"$4"\tEDGE"}' > clusters_mq10_s10_v2_formatted
sed 's/\([0-9]\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)\t\(.*\)/\2\t\3\t\4\t\5\t\1\t\6/g' | awk '{if ($1==23) {$1="X"}; if ($1==24) {$1="Y"}; if ($3==23) {$3="X"}; if ($3==24) {$3="Y"}; a="+"; b="+"; if ($5==1) {a="-"; b="-"}; if ($5==2) {a="+"; b="-"}; if ($5==3) {a="-"; b="+"}; print "chr"$1"\t"$2"\t"a"\tchr"$3"\t"$4"\t"b}'
sed 's/\([0-9]\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)[:]\([0-9]*\)\t\([0-9]*\)\t\(.*\)/\2\t\3\t\4\t\5\t\1\t\6/g' | awk '{if ($1==23) {$1="X"}; if ($1==24) {$1="Y"}; if ($3==23) {$3="X"}; if ($3==24) {$3="Y"}; a="+"; b="+"; if ($5==1) {a="-"; b="-"}; if ($5==2) {a="+"; b="-"}; if ($5==3) {a="-"; b="+"}; print "chr"$1"\t"$2"\t"a"\tchr"$3"\t"$4"\t"b"\t"$6}'
python2.6 overlap.py 1000 /tmp/n.gz /tmp/t.gz > /tmp/out
cat /tmp/out | awk '{if (NF>4) {print $0}}' | awk '{print $1"\t"$2"\t"$2+1"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}' | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v | awk '{print $5"\t"$6"\t"$6+1"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v | awk '{print $5"\t"$6"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' > filtered_greedy.txt
cat filtered_greedy.txt | grep chr | awk '{print $1"\t"$2"\t"$3; print $4"\t"$5"\t"$6; }' | sort | uniq | awk '{if ($3=="-") {print $1"\t"$2"\t"($2+1000);} else {print $1"\t"($2-1000)"\t"$2;}}' > dipg29t_removed1000n_nocentro_intervals.txt
while read line ; do echo $line 1>&2 ; ~/samtools view ../merged.sorted.bam `echo $line | sed 's/\(chr\S*\)\s*\([0-9]*\)\s*\([0-9]*\)/\1:\2-\3/g'`; done < dipg29t_removed1000n_nocentro_intervals.txt | sort -S 5g | uniq > uniq.sam
cat dipg29t/n3t10_nocentro_bigger1000 | sort -k 7,7n | sed 's/chr/hs/g' | awk '{print $1"\t"$2-100"\t"$2+100"\t"$4"\t"$5-100"\t"$5+100}' > dipg29t/n3t10_nocentro_bigger1000_circos
cat dipg29t/n3t10_nocentro_refined_arc_bounded | sort -k 7,7n | sed 's/chr/hs/g' | awk '{t=$7/2; if (t<2) {t=2}; if ($t>=2) {print $1"\t"$2-100"\t"$2+100"\t"$4"\t"$5-100"\t"$5+100"\tthickness="t}}' > dipg29t/n3t10_nocentro_bigger1000_circos
cat dipg29t/n3t10_nocentro_refined_arc_bounded | head -n 2 | awk '{x=$3; y=$6; if (x=="+") {if (y=="+") {t=0} else {t=2}} else {if (y=="-") {t=1} else {t=3}}; OFS="\t"; print $1,$2,$5,t,$7,0,0,0.0,0,$4,"EDGE"}'
cat /dupa-filer/misko/dipg29t/n3t10_nocentro_refined | sed 's/\(chr[^:]*\):\([0-9]*\)\([+-]\)\s\(chr[^:]*\):\([0-9]*\)\([+-]\)/\1\t\2\t\3\t\4\t\5\t\6/g' | awk '{OFS="\t"; type=0; if ($3=="+") {if ($6=="+") {type=0} else {type=2} } else { if ($6=="+") {type=3} else {type=1} }; print $1,$2,$5,type,$7,0,0,0.0,0,$4,"EDGE" }'
a=0; for x in data_x*.svg; do /Applications/Inkscape.app/Contents/Resources/bin/inkscape --export-png type1_$a.png --export-background=white $x; a=`expr $a + 1`; done
#take the output of make_clusters and get the normal fitlered (1000bp) , centromere subtracted , 1000bp+ calls
python2.6 ../../overlap.py 1000 ../../DIPG01N/clusters/clusters.txt_f.gz clusters_mq10_cov10.txt_f.gz | awk '{if (NF>4) {print $0}}' | awk '{print $1"\t"$2"\t"$2+1"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}' | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v | awk '{print $5"\t"$6"\t"$6+1"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v | awk '{print $5"\t"$6"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | awk '{d=$2-$5; if (d<0) {d=-d}; if ($3==$6 && $1==$4 && d<2000) { } else {print $0}}' > dipg01t_nsubtract_centrosubtract_1000bp
~/samtools view -H ../merged.sorted.bam > sam_headers; cat dipg01t_nsubtract_centrosubtract_1000bp | grep chr | awk '{OFS="\t"; if ($3=="+") {print $1,$2,"-"} else {print $1,$2,"+"}; if ($6=="+") {print $4,$5,"+"} else {print $4,$5,"-"};}' | sort | uniq | awk '{if ($3=="-") {print $1"\t"($2-1200)"\t"($2+200)} else {print $1"\t"($2-200)"\t"($2+1200)}}' | while read line ; do echo $line 1>&2 ; ~/samtools view ../merged.sorted.bam `echo $line | sed 's/\(chr\S*\)\s*\([0-9]*\)\s*\([0-9]*\)/\1:\2-\3/g'`; done | sort -S 5g | uniq | cat sam_headers - | ~/samtools view -Sb -o uniq.bam - ; ~/samtools sort uniq.bam uniq_sorted
~/samtools view uniq_sorted.bam | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/minichr/clustering/cluster_em 327 57 dipg01t_nsubtract_centrosubtract_1000bp /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/indexes/nt_hg19/hg19.fa > dipg01t_nsubtract_centrosubtract_1000bp_emd
cat dipg01t_nsubtract_centrosubtract_1000bp dipg01t_nsubtract_centrosubtract_1000bp_emd | sort | awk '{c=$1","$2","$3","$4","$5","$6; if (c==a) {s_new=$NF; if (s_new>s_old) {print c,s_new} else {print a,s_old}; a=""; c="";} else {if (a!="") {print a,s_old}; a=c; s_old=$NF}}' | sed 's/,/\t/g' > dipg01t_nsubtract_centrosubtract_1000bp_joined
cat dipg01t_nsubtract_centrosubtract_1000bp_joined | sed 's/\(chr[^:]*\):\([0-9]*\)\([+-]\)\s\(chr[^:]*\):\([0-9]*\)\([+-]\)/\1\t\2\t\3\t\4\t\5\t\6/g' | awk '{OFS="\t"; type=0; if ($3=="+") {if ($6=="+") {type=0} else {type=2} } else { if ($6=="+") {type=3} else {type=1} }; print $1,$2,$5,type,$7,0,0,0.0,0,$4,"EDGE" }' > dipg01t_nsubtract_centrosubtract_1000bp_joined_f
../../scripts/minichr/hmm dipg01t_nsubtract_centrosubtract_1000bp_joined_f ../merged.sorted.bam_q10.cov ../../DIPG01N/merged.sorted.bam_q10.cov > hmm_out
cat dipg01t_nsubtract_centrosubtract_1000bp_joined | grep chr | awk '{print $1"\t"$2; print $4"\t"$5; }' | sort | uniq > dipg01t_nsubtract_centrosubtract_1000bp_joined_intervals
~/samtools view uniq_sorted.bam | ../../scripts/minichr/arc_coverage/arc_coverage 327 57 dipg01t_nsubtract_centrosubtract_1000bp_joined_intervals > dipg01t_nsubtract_centrosubtract_1000bp_joined_arc_coveage
#epic one-liner
cat tumor | while read line; do echo $line ; s=`echo $line | sed 's/DIPG\([0-9][0-9]\).*/\1/g'`; python2.6 ./overlap.py 1000 DIPG${s}N*/clusters/clusters.txt_f.gz DIPG${s}T*/clusters/clusters_mq10_cov10.txt_f.gz | awk '{if (NF>4) {print $0}}' | awk '{print $1"\t"$2"\t"$2+1"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7}' | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v | awk '{print $5"\t"$6"\t"$6+1"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/bedtools-2.17.0/bin/intersectBed -a - -b /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/centromeres_merged -v | awk '{print $5"\t"$6"\t"$7"\t"$1"\t"$2"\t"$4"\t"$8}' | awk '{d=$2-$5; if (d<0) {d=-d}; if ($3==$6 && $1==$4 && d<2000) { } else {print $0}}' > $line/dipg${s}t_nsubtract_centrosubtract_1000bp; ~/samtools view -H $line/merged.sorted.bam > $line/sam_headers; cat $line/dipg${s}t_nsubtract_centrosubtract_1000bp | grep chr | awk '{OFS="\t"; if ($3=="+") {print $1,$2,"-"} else {print $1,$2,"+"}; if ($6=="+") {print $4,$5,"+"} else {print $4,$5,"-"};}' | sort | uniq | awk '{if ($3=="-") {print $1"\t"($2-1200)"\t"($2+200)} else {print $1"\t"($2-200)"\t"($2+1200)}}' | while read linex ; do ~/samtools view $line/merged.sorted.bam `echo $linex | sed 's/\(chr\S*\)\s*\([0-9]*\)\s*\([0-9]*\)/\1:\2-\3/g'`; done | sort -S 5g | uniq | cat $line/sam_headers - | ~/samtools view -Sb -o $line/clusters/uniq.bam - ; ~/samtools sort $line/clusters/uniq.bam $line/clusters/uniq_sorted; mean=`cat $line/mean_and_std.txt | awk '{print $1}'`; std=`cat $line/mean_and_std.txt | awk '{print $2}'`; ~/samtools view $line/clusters/uniq_sorted.bam | /hpf/largeprojects/ccm/amorrison/DIPG_WGS/scripts/minichr/clustering/cluster_em $mean $std $line/dipg${s}t_nsubtract_centrosubtract_1000bp /hpf/largeprojects/ccm/amorrison/DIPG_WGS/junk/indexes/nt_hg19/hg19.fa > $line/dipg${s}t_nsubtract_centrosubtract_1000bp_emd; cat $line/dipg${s}t_nsubtract_centrosubtract_1000bp $line/dipg${s}t_nsubtract_centrosubtract_1000bp_emd | sort | awk '{c=$1","$2","$3","$4","$5","$6; if (c==a) {s_new=$NF; if (s_new>s_old) {print c,s_new} else {print a,s_old}; a=""; c="";} else {if (a!="") {print a,s_old}; a=c; s_old=$NF}}' | sed 's/,/\t/g' > $line/dipg${s}t_nsubtract_centrosubtract_1000bp_joined; done
cat tumor | while read line; do echo $line ; s=`echo $line | sed 's/DIPG\([0-9][0-9]\).*/\1/g'`; echo qsub -l h_vmem=64g /hpf/largeprojects/ccm/amorrison/DIPG_WGS/run_hmm.sh /hpf/largeprojects/ccm/amorrison/DIPG_WGS/DIPG${s}T*/dipg${s}t_*joined /hpf/largeprojects/ccm/amorrison/DIPG_WGS/DIPG${s}T*/merged.sorted.bam_q10.cov /hpf/largeprojects/ccm/amorrison/DIPG_WGS/DIPG${s}N*/merged.sorted.bam_q10.cov /hpf/largeprojects/ccm/amorrison/DIPG_WGS/$line/hmm_out; done