From 1c98d97aa9a9bd269dba760fbfa0acfbd3d78b78 Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Wed, 3 Mar 2021 14:41:46 +0100 Subject: [PATCH] [utils] add benchmark for YCSB This tools allows to put multiple suites and run them one-by-one and parse the output to easy to use form as CSV files. --- utils/parser.py | 39 ++++++++ utils/run_suite.py | 202 ++++++++++++++++++++++++++++++++++++++++++ utils/run_workload.sh | 69 +++++++++++++++ 3 files changed, 310 insertions(+) create mode 100755 utils/parser.py create mode 100755 utils/run_suite.py create mode 100755 utils/run_workload.sh diff --git a/utils/parser.py b/utils/parser.py new file mode 100755 index 00000000..90d0de06 --- /dev/null +++ b/utils/parser.py @@ -0,0 +1,39 @@ +import os +from os.path import join, getsize + +for root, dirs, filenames in os.walk('results'): + if len(dirs) == 0: + parsed_results = [] + for filename in filenames: + if filename.split('_')[0] == 'run': + with open(root + '/' + filename) as file_object: + file_object.readline() + trimmed_lines = [] + for line in file_object.readlines(): + record = tuple(line.replace(',','').split(' ')) + if record[0] != '[CLEANUP]' or record[0] != '[READ-FAILED]': + if record[0] == '[READ]' or record[0] == '[INSERT]' or record[0] == '[UPDATE]' or record[0] == '[OVERALL]': #in case of READ + try: + int(record[1]) + except ValueError: #if cannot cast it's fine + trimmed_lines.append(record) + parsed_results.append([int(filename.split('_')[1].split('.')[0]), trimmed_lines]) + + parsed_results = sorted(parsed_results, key=lambda x: x[0], reverse=False) + csv = [] + threads = 'Threads;#;' + if len(parsed_results) <= 0: + continue + print '------CSV------' + for i in range(0, len(parsed_results[0][1])): + csv.append(parsed_results[0][1][i][0] + ';' + parsed_results[0][1][i][1] + ';') + for test_result in parsed_results: + threads += str(test_result[0]) + ';' + for i, line in enumerate(test_result[1]): + csv[i] += line[2].replace('\n','').replace('.',',') + ';' + csv.insert(0, threads) + with open(root + '/results.csv','w') as csv_file: + for x in csv: + csv_file.write(x + '\n') + print x + csv_file.close() \ No newline at end of file diff --git a/utils/run_suite.py b/utils/run_suite.py new file mode 100755 index 00000000..a4a525e6 --- /dev/null +++ b/utils/run_suite.py @@ -0,0 +1,202 @@ +#!/usr/bin/python2 +import json +import os +import subprocess + +#comment +# SUITE write_workload +# THREADS 1 2 4 8 16 32 48 64 96 +# JOURNALING enabled/disabled +# RECORDS 1000 +# OPERATIONS 100 +# READ_PROPORTION 0.0 +# UPDATE_PROPORTION 0.0 +# INSERT_PROPORTION 1.0 +# YCSB_NUMA 1 +# DROP_BEFORE +# ENDSUITE + +#GET PATHS FROM CONFIG FILE +PATH_TO_YCSB = '' + +path_configuration = open("path_configuration.txt", "r") +for line in path_configuration: + if line.startswith('YCSB_PATH='): + arg = line.split("=") + if len(arg) > 1: + PATH_TO_YCSB = arg[1].replace('\n','') + else: + raise NameError('No path in YCSB_PATH!') + +if not os.path.isdir(PATH_TO_YCSB): + raise NameError('Wrong path to YCSB!') + +class Test: + def __init__(self): + self.pmemkv_engine = "cmap" + self.pmemkv_dbsize = 0 + self.pmemkv_dbpath = "/dev/shm/" + self.workload_type = "workloada" + self.testName = "" + self.threads = [] +# self.journaling = "" + self.records = 0 + self.operations = 0 + self.read_proportion = -1.0 + self.update_proportion = -1.0 + self.insert_proportion = -1.0 + self.ycsb_numa = -1 +# Actually we don't need creation +# self.drop_before = -1 +# self.create_after_drop = -1 + self.is_load = -1 + def toJSON(self): + return json.dumps(self, default=lambda o: o.__dict__, + sort_keys=True, indent=4) + +def getArgs(str): + arguments = [] + for i in range(1, len(str)): + arguments.append(str[i]) + return arguments + +KEYWORDS = set(["THREADS", "JOURNALING", "RECORDS", "OPERATIONS", + "READ_PROPORTION", "LOAD", "UPDATE_PROPORTION", + "INSERT_PROPORTION", "YCSB_NUMA", "SUITE", "ENDSUITE", + "DROP_BEFORE", "CREATE_AFTER_DROP", "PMEMKV_ENGINE", + "PMEMKV_DBSIZE", "PMEMKV_DBPATH", "WORKLOAD_TYPE"]) #Add keyword if you need to extend implementation + +# open meta file +with open("test_suite.txt", "r") as configfile: + configurations = [] + for line in configfile: + splittedLine = line.split() + if line == '\n' or line.startswith('#'): + continue + if len(set.intersection(KEYWORDS, splittedLine)) != 1: + print(splittedLine) + raise NameError('Too many keywords in single line!') + + #get args if exists + args = getArgs(splittedLine) + + #if line starts from keyword we must read arguments + if splittedLine[0] == "SUITE": + configurations.append(Test()) + configurations[len(configurations)-1].testName = args[0] + elif splittedLine[0] == "THREADS": + configurations[len(configurations)-1].threads = args + elif splittedLine[0] == "LOAD": + configurations[len(configurations)-1].is_load = 1 + elif splittedLine[0] == "RECORDS": + configurations[len(configurations)-1].records = args[0] + elif splittedLine[0] == "OPERATIONS": + configurations[len(configurations)-1].operations = args[0] + elif splittedLine[0] == "READ_PROPORTION": + configurations[len(configurations)-1].read_proportion = args[0] + elif splittedLine[0] == "UPDATE_PROPORTION": + configurations[len(configurations)-1].update_proportion = args[0] + elif splittedLine[0] == "INSERT_PROPORTION": + configurations[len(configurations)-1].insert_proportion = args[0] + elif splittedLine[0] == "YCSB_NUMA": + configurations[len(configurations)-1].ycsb_numa = args[0] + elif splittedLine[0] == "PMEMKV_ENGINE": + configurations[len(configurations)-1].pmemkv_engine = args[0] + elif splittedLine[0] == "PMEMKV_DBSIZE": + configurations[len(configurations)-1].pmemkv_dbsize = args[0] + elif splittedLine[0] == "PMEMKV_DBPATH": + configurations[len(configurations)-1].pmemkv_dbpath = args[0] + elif splittedLine[0] == "WORKLOAD_TYPE": + configurations[len(configurations)-1].workload_type = args[0] + elif splittedLine[0] == "ENDSUITE": + continue + else: + raise NameError('Unrecognized keyword') +configfile.close() + +print('Script read those tests:') +i = 1 +for conf in configurations: + print('{:>20} {:<12}'.format('Test#: ', str(i))) + print('{:>20} {:<12}'.format("Name: ", conf.testName)) + print('{:>20} {:<12}'.format("Threads: " ,str(conf.threads))) + print('{:>20} {:<12}'.format("Records: ", conf.records)) + print('{:>20} {:<12}'.format("Operation: ", conf.operations)) + print('{:>20} {:<12}'.format("Read proportion: ", str(conf.read_proportion))) + print('{:>20} {:<12}'.format("Update proportion: ", str(conf.update_proportion))) + print('{:>20} {:<12}'.format("Insert proportion: ", str(conf.insert_proportion))) + print('{:>20} {:<12}'.format("Is load: ", str(conf.is_load))) + print('{:>20} {:<12}'.format("NUMA for YCSB: ", conf.ycsb_numa)) + print('{:>20} {:<12}'.format("Workload type: ", conf.workload_type)) + print('{:>20} {:<12}'.format("Pmemkv engine: ", conf.pmemkv_engine)) + print('{:>20} {:<12}'.format("Pmemkv size: ", conf.pmemkv_dbsize)) + print('{:>20} {:<12}'.format("Pmemkv path: ", conf.pmemkv_dbpath)) + print("") + i = i + 1 + +# PUT CONFIGURATION TO FILE IN PROPER PATH +results_directory = "results/" +if not os.path.exists(results_directory): + os.makedirs(results_directory) +i = 1 +with open(results_directory + '/configurations.json', 'w') as jsonconfig: + for conf in configurations: + jsonconfig.write(conf.toJSON() + '\n') + if not os.path.exists(results_directory + conf.testName + '/'): + os.makedirs(results_directory + conf.testName + '/') + with open(results_directory + conf.testName + '/test_description.txt', 'a') as test_description: + test_description.write('{:>20} {:<12}'.format('Test#: ', str(i)) + '\n') # 'Test #' + str(i) + test_description.write('{:>20} {:<12}'.format("Name: ", conf.testName) + '\n') + test_description.write('{:>20} {:<12}'.format("Threads: " ,str(conf.threads)) + '\n') + test_description.write('{:>20} {:<12}'.format("Records: ", conf.records) + '\n') + test_description.write('{:>20} {:<12}'.format("Operation: ", conf.operations) + '\n') + test_description.write('{:>20} {:<12}'.format("Read proportion: ", str(conf.read_proportion)) + '\n') + test_description.write('{:>20} {:<12}'.format("Update proportion: ", str(conf.update_proportion)) + '\n') + test_description.write('{:>20} {:<12}'.format("Insert proportion: ", str(conf.insert_proportion)) + '\n') + test_description.write('{:>20} {:<12}'.format("NUMA for YCSB: ", conf.ycsb_numa) + '\n') + test_description.write('{:>20} {:<12}'.format("Workload type: ", conf.workload_type) + '\n') + test_description.write('{:>20} {:<12}'.format("Pmemkv engine: ", conf.pmemkv_engine) + '\n') + test_description.write('{:>20} {:<12}'.format("Pmemkv size: ", conf.pmemkv_dbsize) + '\n') + test_description.write('{:>20} {:<12}'.format("Pmemkv path: ", conf.pmemkv_dbpath) + '\n') + test_description.write('\n') + i = i + 1 + +# run specified configurations +generated_commands = [] +for test in configurations: + command_prefix = '' + command_suffix = '' + + command_prefix = './run_workload.sh ' + test.testName + + if not test.is_load == 1: + command_prefix += ' run ' + else: + command_prefix += ' load ' + + + # Put path to YCSB main directory + command_suffix += PATH_TO_YCSB + ' ' + # Put operation numbers + command_suffix += test.records + ' ' + test.operations + ' ' + # Put workload ratios + command_suffix += test.read_proportion + ' ' + test.update_proportion + ' ' + test.insert_proportion + ' ' + # Put NUMA node + if test.ycsb_numa == -1: + print('NUMA node is not set for test: ' + test.testName + '.') + command_suffix += test.ycsb_numa + ' ' + # Put workload type + command_suffix += test.workload_type + ' ' + # Put engine specific fields + command_suffix += test.pmemkv_engine + ' ' + test.pmemkv_dbsize + ' ' + test.pmemkv_dbpath + ' ' + + for thread_no in test.threads: + # DROP&CREATE BEFORE NEXT INSERTS + generated_commands.append(command_prefix + thread_no + ' ' + command_suffix) + +# Generate script +with open('testplan.sh','w') as testplan: + testplan.write('#!/bin/bash\n') + for x in generated_commands: + testplan.write(x + '\n') +print(generated_commands) diff --git a/utils/run_workload.sh b/utils/run_workload.sh new file mode 100755 index 00000000..8ab8f3bc --- /dev/null +++ b/utils/run_workload.sh @@ -0,0 +1,69 @@ +#!/bin/bash +# Run workload from command line +# +# e.g. ./run_workload.sh run_cmap run 12 PATH_TO_YCSB 1000000 1000000 +# {0} {1} {2} {3} {4} {5} {6} +# -1.0 -1.0 -1.0 1 workloadb csmap 80000000 DBPATH +# {7} {8} {9} {10} {11} {12} {13} {14} +# 1 - suite name +# 2 - ycsb phase: load/run +# 3 - thread count +# 4 - path to YCSB +# 5 - record count +# 6 - operation count +# 7 - read proportion +# 8 - insert proportion +# 9 - update proportion +# 10 - NUMA node for YCSB +# 11 - workload scenario (workload[a-f]) +####### Engine related args +# 12 - pmemkv: engine name +# 13 - pmemkv: pool size +# 14 - pmemkv: path to pool + +YCSB_PATH=/home/kfilipek/Development/work/YCSB/ # TODO(kfilipek): remove hardcoding +echo $YCSB_PATH +OLD_PATH=$(pwd) + +echo $@ +echo "Passed $# argumets to script" + +if [ "$#" -ne "14" ]; +then + echo "Illegal number of parameters, should be 11. Check script documentation." + exit 0 +fi + +mkdir -p "results/$1/" # Create results directory: results/{test_suite_name}/ +# Prepare future arguments for YCSB +NUMA_ARG="" +READ_RATIO="" +INSERT_RATIO="" +UPDATE_RATIO="" +if [ "$7" != "-1.0" ]; +then + READ_RATIO=" -p readproportion=$7 " +fi +if [ "$8" != "-1.0" ]; +then + INSERT_RATIO=" -p insertproportion=$8 " +fi +if [ "$9" != "-1.0" ]; +then + UPDATE_RATIO=" -p updateproportion=$9 " +fi +if [ "${10}" != "-1" ]; +then + NUMA_ARG=" numactl -N ${10} " +fi +# echo "READ_RATIO param: $READ_RATIO" +# echo "INSERT_RATIO param: $INSERT_RATIO" +# echo "UPDATE_RATIO param: $UPDATE_RATIO" +# echo "NUMA NODE param: $NUMA_ARG" +#exit + +# TODOD(kfilipek): Implement splitting threads into processes +cd $YCSB_PATH +echo "PMEM_IS_PMEM_FORCE=1 $NUMA_ARG bin/ycsb.sh $2 pmemkv -P workloads/${11} -p hdrhistogram.percentiles=95,99,99.9,99.99 -p recordcount=$5 -p operationcount=$6 -p pmemkv.engine=${12} -p pmemkv.dbsize=${13} -p pmemkv.dbpath=${14} > $OLD_PATH/results/$1/${2}_${3}.log" >> $OLD_PATH/results/$1/cmds_executed.log +PMEM_IS_PMEM_FORCE=1 $NUMA_ARG bin/ycsb.sh $2 pmemkv -P workloads/${11} -p hdrhistogram.percentiles=95,99,99.9,99.99 -p recordcount=$5 -p operationcount=$6 -p pmemkv.engine=${12} -p pmemkv.dbsize=${13} -p pmemkv.dbpath=${14} > $OLD_PATH/results/$1/${2}_${3}.log +cd $OLD_PATH