-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstep2.pl
executable file
·92 lines (81 loc) · 1.94 KB
/
step2.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/usr/bin/perl -w
use strict;
use lib '.';
use promoteromeLib;
use YAML::Any;
$| = 1;
# This file was written for clustering of data from Fantom 5.
# This script fits the power-laws to the cumulatives produced in step1.pl
######################## config
my $conf_file = shift @ARGV or die "specify a config file!\n";
my $theOnlyFile = shift @ARGV; # second parameter if not empty is just one file to run on: for parallelizing
my $config = YAML::Any::LoadFile($conf_file);
my $dir = $config->{dir};
my $tmpDir = $config->{tmpDir};
my $cumDir = $config->{cumDir};
my $mappedDir = $config->{mappedDir};
my $normDir = $config->{normDir};
my $fitDir = $config->{fitDir};
my $fitRegionF = $config->{fitRegionF};
##################### CODE
mkdir $tmpDir.$fitDir;
my $fitRegion = readRegions($fitRegionF);
if(defined $theOnlyFile)
{
my($desc, $sample) = split(/\./, $theOnlyFile);
process($sample);
}
else
{
opendir(my $D, $tmpDir.$cumDir) or die;
while(my $f = readdir $D)
{
if(-f $tmpDir.$cumDir.$f) #&& $f =~ /\.revcum$/
{
if(! -e $tmpDir.$fitDir.$f )
{
process($f);
}
}
}
close $D;
}
sub process
{
my ($f) = @_;
my ($sampleID) = split(/\./, $f);
print STDERR $sampleID, "\n";
my($startX, $startY);
if(exists $fitRegion->{$sampleID})
{
($startX, $startY) = @{$fitRegion->{$sampleID}};
}
else
{
$startX = 5;
$startY = 1000;
}
my $d = readCumulative($tmpDir.$cumDir.$f);
my ($a, $b, $beta, $lambda, $txt) = regressPowerLaw(\$d, $startX, $startY, "1.15");
print $txt, "\n";
gnuplotRevcumLine($tmpDir.$cumDir.$f, $tmpDir.$cumDir.$f.'.revcum.png', $a, $b);
open(my $outH, ">", $tmpDir.$fitDir.$f) or die;
print $outH join("\t", $f, $a, $b, $beta, $lambda), "\n";
close $outH;
}
sub readRegions
{
my($f) = @_;
open (my $fH, $f) or die;
my %ret;
while(<$fH>)
{
chomp;
next if /^\s*#/;
my($lib, $startX, $endX, $startY) = split(/\t/);
my @tmp = ($startX, $startY);
$ret{$lib} = \@tmp;
}
close $fH;
return \%ret;
}