-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsvtools.pl
89 lines (80 loc) · 2.7 KB
/
svtools.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
###############################################################################
# Author: Alex Di Genova
# Laboratory: GCS/IARC
# Copyright (c)
# year: 2020
###############################################################################
use Data::Dumper;
use Getopt::Std;
use FindBin;
use lib "$FindBin::Bin";
use SV;
use SVannot;
use strict;
sub usage {
print "$0 usage : -a <vcf_tumor> -b <pon_vcf>
-c <GNOMAD.vcf> -d <PCAWG.vcf>
-e <CNV-READS> -s <Somatic.vcf>
-x <list_bed_files.txt> -p <prefix>\n";
print "Error in use\n";
exit 1;
}
my %opts = ();
getopts( "a:b:c:d:e:s:x:p:", \%opts );
if ( !defined $opts{a} or !defined $opts{p}) {
usage;
}
#target file
my $target = new SV($opts{a});
#filter
my $ftype=0;#true means that vars are filers by type
my $fdelta=1000; #average distance for breakpoint overlap
#object to annotate SVs
my $sva=new SVannot();
#remove SVs on non-chr
#add type and lenght as well as method
#add SR and PE support to predictions
#store all in an internal array
#load the genotype information
$target->norm_svs(1);#load genotype information
#remove SVs shorter than 50 bp, matching to alternative chromosomes or with read support lower than 5
$target->basic_filters(5,50,500000000);
#load somatic variants of the sample
my $som = new SV($opts{s});
#annotate using PANEL of normals
$som->norm_svs(0);#do not load genotype information
#annotate custom PON SVs and the context
$sva->annot_Somatic_sv($som,$target,$ftype,$fdelta); #match target using the PON
$som=();#we free the memory of the variable
#load custom PON file
my $pon = new SV($opts{b});
#annotate using PANEL of normals
$pon->norm_svs(0);#do not load genotype information
#annotate custom PON SVs and the context
$sva->annot_customPON_sv($pon,$target,$ftype,$fdelta); #match target using the PON
$pon=();#we free the memory of the variable
#load GNOMAD PON file
my $gnomad= new SV($opts{c});
#do not load genotype information
$gnomad->norm_svs(0);
#annotate GNOMAD SVs and the context
$sva->annot_gnomad_sv($gnomad,$target,$ftype,$fdelta);
$gnomad=(); #we free the memory of the variable
#load PCAWG file
my $pcawg= new SV($opts{d});
$pcawg->norm_svs(0);
#annotate PCAWG SVs and the context
$sva->annot_pcawg_sv($pcawg,$target,$ftype,$fdelta);
$pcawg=();#we free the memory of the variable
#$sva->annot_breapoint_coverage();
#annoted COSMIC SVs are of low quality and were replaced by PCAWG
#$sv->annotate_cosmic();
open(BEDF,$opts{x}) or die "cannot open list of BED files\n";
while(my $line=<BEDF>){
chomp $line;
my ($type,$fbed)=split("\t",$line);
$sva->annot_with_bed_file($fbed,$target,$type,100);
}
close(BEDF);
#print the matrix to train the RF tool
$target->print_matrix($opts{p});