-
Notifications
You must be signed in to change notification settings - Fork 0
/
infer_grn_arni_dream4_multifactorial.m
executable file
·135 lines (101 loc) · 3.6 KB
/
infer_grn_arni_dream4_multifactorial.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
function infer_grn_arni_dream4_multifactorial(datapath, dataname, n_step, nbootstraps)
dataset = [datapath,'/',dataname];
data2=importdata(dataset,'\t');
gexp_data = data2.data;
[nsamples, ngenes] = size(gexp_data);
% Set regulator indices
nregs = ngenes;
reg_indices = 1:nregs;
% Set default parameters
if nargin < 3
n_step = min(25, nregs-1);
nbootstraps = 500;
elseif nargin < 4
nbootstraps = 500;
end
%default samp_frac
samp_frac = 1;
% Number of LARS step should be at most one less than the number of
% transcription factors.
if n_step > nregs - 1
error('Number of ARNI step should be less than number of regulators minus one!');
end
%% Normalize gene expression data to zero mean and unit variance
gexp_data = bsxfun(@minus, gexp_data, mean(gexp_data));
std_data = std(gexp_data); std_data(std_data == 0) = eps;
gexp_data = bsxfun(@rdivide, gexp_data, std_data);
clear std_data;
% Initialize adjacency matrix for all bootstrap runs
idx_mat_collection = zeros(n_step, ngenes, nbootstraps);
fprintf('# Genes:%d, #tx-factor:%d, #samples: %d, #lars step:%d, #bootstraps: %d\n', ...
ngenes,nregs,nsamples,n_step,nbootstraps);
fprintf('\n');
% Check and use if parallel computing is available
toolboxName = 'Parallel Computing Toolbox';
v = ver;
if any(strcmp(toolboxName, {v.Name}))
parallel = true;
end
if parallel
% if matlabpool('size'), matlabpool close; end
% if matlabpool('size') == 0
% matlabpool('open',4) ;
% end
% if matlabpool('size'), matlabpool close; end
% Reserve Matlab pool
delete(gcp('nocreate'));
parpool = gcp('nocreate'); % If no pool, do not create new one.
fprintf('Running %d MATLABPOOL workers.\n', parpool.NumWorkers);
end
result_dir = [datapath, '/', dataname, '_result'];
if ~isdir(result_dir)
mkdir(result_dir);
end
last_saved_file = '';
basis_fns = '_poly';
%% Run over all bootstrap (bs) runsgexp_data
for bs=1:nbootstraps
% Start timer
fprintf('Bootstrap run: %d\n',bs);
% Bootstrap related settings. Current: samp_frac samples are selected
% uniformly at random with replacement.
sampk = floor(nsamples*samp_frac);
gexp_data_bs = datasample(gexp_data, sampk,'Replace',true);
idx_mat_bs = zeros(n_step, ngenes);
% For each of targets, run parallel jobs
%for t = 1:ngenes
parfor t = 1:ngenes
% Target data
tic
target = gexp_data_bs(:, t);
reg_idx = [1:(t-1), (t+1):length(reg_indices)];
% Get expression of the potential regulators
reg_data = gexp_data_bs(:, reg_idx);
[list,vec] = reconstruct_dream4_multifactor(reg_data',target',n_step);
nz_indices = list(1:n_step)';
idx_mat_bs(:,t) = reg_idx(:,nz_indices)';
fprintf('\tFinished target :%d\n', t);
toc
end
% Store the indices of selected tx-factors for each target for this
% bootstrap run
idx_mat_collection(:,:,bs) = idx_mat_bs;
if rem(bs,1) == 0 || bs == nbootstraps
[~,x] = system('hostname');
hn = textscan(x,'%s','delimiter','.');
dt = datestr(now, 'yyyymmddHHMMSS');
cfile = [result_dir,'/','idxcol_', ...
hn{1}{1},'_frac',num2str(samp_frac),'_bs',num2str(bs),basis_fns,'_',dt,'.mat'];
save(cfile,'idx_mat_collection');
% Delete last run's output
if last_saved_file
delete(last_saved_file);
end
last_saved_file = cfile;
end
end
if parallel
% After a hard long day, time for workers to rest now
delete(parpool);
end
end