-
Notifications
You must be signed in to change notification settings - Fork 0
/
fiobatch.py
executable file
·210 lines (169 loc) · 9.13 KB
/
fiobatch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#!/usr/bin/env python3
import sys
import json
import logging
import argparse
import random
from subprocess import Popen, PIPE, STDOUT, TimeoutExpired
from time import sleep
from string import Template
from itertools import product
class FioJob(object):
fio_cmd = 'fio'
fio_headers = 'terse_version_3;fio_version;jobname;groupid;error;read_kb;read_bandwidth_kb;read_iops;read_runtime_ms;read_slat_min_us;read_slat_max_us;read_slat_mean_us;read_slat_dev_us;read_clat_min_us;read_clat_max_us;read_clat_mean_us;read_clat_dev_us;read_clat_pct01;read_clat_pct02;read_clat_pct03;read_clat_pct04;read_clat_pct05;read_clat_pct06;read_clat_pct07;read_clat_pct08;read_clat_pct09;read_clat_pct10;read_clat_pct11;read_clat_pct12;read_clat_pct13;read_clat_pct14;read_clat_pct15;read_clat_pct16;read_clat_pct17;read_clat_pct18;read_clat_pct19;read_clat_pct20;read_tlat_min_us;read_lat_max_us;read_lat_mean_us;read_lat_dev_us;read_bw_min_kb;read_bw_max_kb;read_bw_agg_pct;read_bw_mean_kb;read_bw_dev_kb;write_kb;write_bandwidth_kb;write_iops;write_runtime_ms;write_slat_min_us;write_slat_max_us;write_slat_mean_us;write_slat_dev_us;write_clat_min_us;write_clat_max_us;write_clat_mean_us;write_clat_dev_us;write_clat_pct01;write_clat_pct02;write_clat_pct03;write_clat_pct04;write_clat_pct05;write_clat_pct06;write_clat_pct07;write_clat_pct08;write_clat_pct09;write_clat_pct10;write_clat_pct11;write_clat_pct12;write_clat_pct13;write_clat_pct14;write_clat_pct15;write_clat_pct16;write_clat_pct17;write_clat_pct18;write_clat_pct19;write_clat_pct20;write_tlat_min_us;write_lat_max_us;write_lat_mean_us;write_lat_dev_us;write_bw_min_kb;write_bw_max_kb;write_bw_agg_pct;write_bw_mean_kb;write_bw_dev_kb;cpu_user;cpu_sys;cpu_csw;cpu_mjf;cpu_minf;iodepth_1;iodepth_2;iodepth_4;iodepth_8;iodepth_16;iodepth_32;iodepth_64;lat_2us;lat_4us;lat_10us;lat_20us;lat_50us;lat_100us;lat_250us;lat_500us;lat_750us;lat_1000us;lat_2ms;lat_4ms;lat_10ms;lat_20ms;lat_50ms;lat_100ms;lat_250ms;lat_500ms;lat_750ms;lat_1000ms;lat_2000ms;lat_over_2000ms;disk_name;disk_read_iops;disk_write_iops;disk_read_merges;disk_write_merges;disk_read_ticks;write_ticks;disk_queue_time;disk_util'
# Provide a sequential unique ID for each instance, unlikely to be
# repeated between executions
__last_batchid = random.randrange(2**16)*100
@classmethod
def next_batchid(cls):
cls.__last_batchid += 1
return cls.__last_batchid
def __init__(self, fio_script, **kwargs):
self.batchid = self.next_batchid()
self.fio = fio_script
self.cmd = [self.fio_cmd, '--output-format=terse', '-']
self.success = None
for k, v in kwargs.items():
self.__dict__[k] = v
def __str__(self):
if 'mapping' in self.__dict__:
return ', '.join('%s=%s' % (str(k), str(v)) for k, v in self.mapping.items())
def run(self):
logger.debug("fio input:\n" + self.fio)
logger.info("start|batchid=%d|%s" % (self.batchid, str(self)))
self.proc = Popen(self.cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True)
try:
self.result = self.proc.communicate(input=self.fio, timeout=cmdline.timeout)
logger.info("stop|batchid=%d|%s" %(self.batchid, str(self)))
except TimeoutExpired:
logger.error("Timeout waiting for " + str(self))
self.success = False
return
if self.proc.returncode != 0:
text = "Job return code is %d" % self.proc.returncode
if self.result[0].strip() != '':
text += '\n' + self.result[0]
if self.result[1].strip() != '':
text += '\n' + self.result[1]
logger.error(text)
self.success = False
return
text = self.result[0].rstrip('\n')
to_append = ';' + ';'.join(str(self.mapping[k]) for k in self.order)
text = '\n'.join([line + to_append for line in text.splitlines()]) + '\n'
cmdline.output.write(text)
cmdline.output.flush()
self.success = True
def read_params(filename):
with open(filename, 'rt') as infile:
params = json.load(infile)
# JSON object must be dict
if type(params) is not dict:
logger.fatal("Dictionary expected for JSON parameter file, but found " + str(type(params)))
fio_cols = set(FioJob.fio_headers.split(';'))
if 'replicates' in params:
if 'replicate' in params:
logger.warning("Both 'replicates' and 'replicate' specified. Using 'replicates' value")
if type(params['replicates']) is not int:
logger.fatal("'replicates' must be an integer")
params['replicate'] = list(range(1, params['replicates'] + 1))
del params['replicates']
elif 'replicate' in params:
if type(params['replicate']) is not list:
params['replicate'] = [params['replicate']]
if 'fio' in params and type(params['fio']) is dict:
# Replace single values with singleton lists
for k, v in params['fio'].items():
if type(v) is not list:
params['fio'][k] = [v]
if k in fio_cols:
logger.warning("Parameter %s conflicts with fio column. Consider renaming" % str(k))
else:
logger.fatal("Missing fio dict in parameter file")
return params
def parse_cmdline():
parser = argparse.ArgumentParser()
parser.add_argument('parameters', metavar='JSON')
parser.add_argument('fiofiles', metavar='FIO', nargs='+')
parser.add_argument('--output', '-o', type=argparse.FileType('wt'), default=sys.stdout)
parser.add_argument('--log', '-l')
parser.add_argument('--verbose', '-v', action='count', default=0)
parser.add_argument('--timeout', type=float)
parser.add_argument('--norandom', action='store_true', help="Do not shuffle jobs")
parser.add_argument('--cooldown', metavar='SECONDS', type=float, default=2, help="cool down time between jobs")
parser.add_argument('--no_drop_caches', action='store_true', help="Do not attempt to drop caches before each workload")
parsed = parser.parse_args()
parsed.parameters = read_params(parsed.parameters)
parsed.fio_params = parsed.parameters['fio']
templates = []
for fiofile in parsed.fiofiles:
with open(fiofile, 'rt') as f:
templates.append(f.read())
parsed.__dict__['templates'] = templates
return parsed
def setup_logger(cmdline):
logger = logging.getLogger(__name__)
log_props = {
'level': logging.DEBUG if cmdline.verbose > 0 else logging.INFO,
'style': '{',
'format': '|'.join(['{asctime}', '{levelname}', '{message}']),
}
if cmdline.log:
logging.basicConfig(filename=cmdline.log, filemode='w', **log_props)
else:
logging.basicConfig(stream=sys.stderr, **log_props)
return logger
def drop_caches():
if cmdline.no_drop_caches: return
try:
drop_caches = '/proc/sys/vm/drop_caches'
logger.debug('echo 3 > ' + drop_caches)
with open(drop_caches, 'w') as f:
f.write('3')
except FileNotFoundError:
logger.debug('File not found ' + drop_caches)
except PermissionError:
logger.debug('Permission denied ' + drop_caches)
def main():
param_keys = list(cmdline.fio_params.keys())
cmdline.output.write(FioJob.fio_headers + ';' + ';'.join(param_keys) + ';replicate\n')
cmdline.output.flush()
all_jobs = {}
for replicate in cmdline.parameters['replicate']:
logger.info("Starting replicate %s" % str(replicate))
replicate_jobs = []
for template in cmdline.templates:
param_values = product(*[cmdline.fio_params[k] for k in param_keys])
if not cmdline.norandom:
param_values = list(param_values)
random.shuffle(param_values)
t = Template(template)
for values in param_values:
mapping = { k: values[i] for i, k in enumerate(param_keys) }
mapping['replicate'] = replicate
job = FioJob(fio_script=t.substitute(mapping), mapping=mapping, order=param_keys + ['replicate'])
replicate_jobs.append(job)
all_jobs[replicate] = replicate_jobs
logger.info("Number of jobs for replicate %s: %d" % (str(replicate), len(replicate_jobs)))
for job_num, job in enumerate(replicate_jobs):
drop_caches()
logger.info("Starting job %d of %d (replicate %s)" % \
(job_num + 1, len(replicate_jobs), replicate))
if cmdline.cooldown > 0:
logger.info("cooldown %g seconds" % cmdline.cooldown)
sleep(cmdline.cooldown)
job.run()
logger.info("Finished replicate %s" % str(replicate))
successes, fails = 0, 0
for replicate_jobs in all_jobs.values():
successes += sum(job.success == True for job in replicate_jobs)
fails += sum(job.success == False for job in replicate_jobs)
logger.info("Jobs: %d completed, %d failed" % (successes, fails))
if __name__ == '__main__':
global cmdline, logger
cmdline = parse_cmdline()
logger = setup_logger(cmdline)
logger.info('Started')
main()
logger.info('All done')
#EOF