-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_running_time_large.py
109 lines (105 loc) · 5.75 KB
/
test_running_time_large.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from ic_bfs_eval import *
from generate_seeds import LoadNodesFromFile, generateSeedFiles
import subprocess
from link_server import LinkServerCP
from ic_bfs_eval import EstimateInfluence
from bfs_seq import sequential_estimation
import cPickle as cp
from time import time
import argparse
import numpy as np
from plot_running_times import plotRatiosFromFile
from scipy.stats import sem
from math import log
import gc
parser = argparse.ArgumentParser()
parser.add_argument('-csv', type = str, #required,
default = 'input/datasets/Wiki-Vote_stripped.txt', help="Name of source csv file (if we generate a new Link Server" )
parser.add_argument('-delim', type = int, default = 0,
help = '-delim : which delimiter (for edge-tuples) 0-tab, 1-space,2-comma. Default: 0')
parser.add_argument('-min', type = float, default = 0.1,
help = '-min : minimum number of seed nodes per set. Default: 0.01')
parser.add_argument('-max', type = float, default = 1.0,
help = '-max : maximum number of seed nodes per set. Default: 0.11')
parser.add_argument('-interval', type = float, default = 0.1,
help = '-interval : size of interval. Default: 0.01')
parser.add_argument('-samples', type = int, default = 10,
help = '-samples : number of samples of given seed set size. Default: 10')
parser.add_argument('-cores', type = int, default = 30,
help = '-cores : number of cores of to use. Default: 30')
parser.add_argument('-undirected', type = int, default = 0,
help = '-undirected : is the graph undirected')
parser.add_argument('-output', type = str, default = 'out')
parser.add_argument('-dataset', type = str, default = 'input/datasets/wiki')
parser.add_argument('-title', type = str, default = '',
help = '-title : title of the generated plot. Default : blank')
parser.add_argument('-seq_samples_scale', type = float, default = 1.0)
parser.add_argument('-prob_method', type = int, default = 3)
def getNumCycles(fname):
f = open(fname, 'r')
for line in f.readlines():
words = line.split(',')
if 'cycles\n' in words:
f.close()
return int(words[0])
def removeFile(fname):
try:
os.remove(fname)
except OSError:
print "failed to delete the file: ",fname
pass
if __name__ == "__main__":
gc.enable()
parameters = parser.parse_args()
min_frac, max_frac, interval, nSamples, edges_csv, delim_option, output, dataset, seq_samples_scale = parameters.min, \
parameters.max, parameters.interval, parameters.samples, parameters.csv, parameters.delim, parameters.output, parameters.dataset, parameters.seq_samples_scale#,\
#[float(x) for x in parameters.eps.split(',')], parameters.title
delim_dict = {0 : '\t', 1 : ' ', 2 : ','}
delimeter = delim_dict[delim_option]
results_dir = 'experiments/results/'
bUndirected = True if parameters.undirected == 1 else False
bfs_method = 'seq'
start_time = time()
print "creating link-server object"
if delimeter == "\t":
print "delimeter is tab"
L=LinkServerCP(dataset, edges_csv, create_new=True, prob_method=parameters.prob_method, prob=[0.1,0.01], delim=delimeter, undirected = bUndirected)
n = L.getNumNodes()
#n = len(V)
print "Number of nodes: ", n
nBFS_samples = 1000
running_times_file = results_dir + output + '-running_times-ratios_k_min-%.3f-k_max-%.3f-samples-%d-bfs_samples-%d-large'%(min_frac,max_frac,nSamples,nBFS_samples)
running_times_file_raw = running_times_file + "-raw"
removeFile(running_times_file)
f = open(running_times_file, 'w')
f_raw = open(running_times_file_raw,'w')
nBFS_samples = 1000
nBFS_samples_theoretic = n * log(n,2)
for nSeeds in xrange(int(min_frac * n), int(max_frac * n), int(interval * n)):
print "k = ", nSeeds
seeds_fname = output + "-seeds-" + str(nSeeds) + '.cp'
runtimes_approx, runtimes_seq = [], []
for i in xrange(nSamples):
generateSeedFiles(nSeeds, nSeeds+1, int(interval * n), range(n), 1, output + "-seeds-")
perf_csv_fname = dataset + 'runtimes_large.csv'
subprocess.Popen("perf stat -x, -o %s python ic_bfs_eval.py -dataset %s -res_fname %s -seeds %s -output_mode 3 -cores %d"%\
(perf_csv_fname, dataset, output + "-approx-" + str(nSeeds), seeds_fname, parameters.cores), \
shell = True, stdout = subprocess.PIPE).stdout.read()
num_cycles_approx = getNumCycles(perf_csv_fname)
runtimes_approx.append(num_cycles_approx)
print "Done approximating, now running naive sequential algorithm"
removeFile(perf_csv_fname)
subprocess.Popen("perf stat -x, -o %s python seq_estimation.py -dataset %s -seeds_file %s -results_file %s -output_mode 3 -nSamples %d -cores %d"%(perf_csv_fname, dataset, seeds_fname, output + "-seq-" + str(nSeeds), nBFS_samples, parameters.cores), shell=True,stdout=subprocess.PIPE).stdout.read()
runtime_seq_samples = getNumCycles(perf_csv_fname)
theoretic_num_cycles = 1.0 * runtime_seq_samples / nBFS_samples * nBFS_samples_theoretic
runtimes_seq.append(theoretic_num_cycles)
f_raw.write('%.3f\t%.3f\t%.3f\n'%(1.*nSeeds/n, num_cycles_approx, theoretic_num_cycles))
removeFile('runtimes_large_seq.csv')
print "runtimes_approx: ", runtimes_seq
print "runtimes_seq: ", runtimes_seq
ratios = 1. * np.array(runtimes_seq) / np.array(runtimes_approx)
print "ratios: ", ratios
f.write("%.3f\t%.3f\t%.3f\n"%(1.0*nSeeds/n, np.mean(ratios), sem(ratios)))
f.close()
f_raw.close()
print "Total runtime: ", (time() - start_time)/60.0