From 1c98d97aa9a9bd269dba760fbfa0acfbd3d78b78 Mon Sep 17 00:00:00 2001
From: Krzysztof Filipek <krzysztof.filipek@intel.com>
Date: Wed, 3 Mar 2021 14:41:46 +0100
Subject: [PATCH] [utils] add benchmark for YCSB

This tools allows to put multiple suites and run them one-by-one
and parse the output to easy to use form as CSV files.
---
 utils/parser.py       |  39 ++++++++
 utils/run_suite.py    | 202 ++++++++++++++++++++++++++++++++++++++++++
 utils/run_workload.sh |  69 +++++++++++++++
 3 files changed, 310 insertions(+)
 create mode 100755 utils/parser.py
 create mode 100755 utils/run_suite.py
 create mode 100755 utils/run_workload.sh

diff --git a/utils/parser.py b/utils/parser.py
new file mode 100755
index 00000000..90d0de06
--- /dev/null
+++ b/utils/parser.py
@@ -0,0 +1,39 @@
+import os
+from os.path import join, getsize
+
+for root, dirs, filenames in os.walk('results'):
+    if len(dirs) == 0:
+        parsed_results = []
+        for filename in filenames:
+            if filename.split('_')[0] == 'run':
+                with open(root + '/' + filename) as file_object:
+                    file_object.readline()
+                    trimmed_lines = []
+                    for line in file_object.readlines():
+                        record = tuple(line.replace(',','').split(' '))
+                        if record[0] != '[CLEANUP]' or record[0] != '[READ-FAILED]':
+                            if record[0] == '[READ]' or record[0] == '[INSERT]' or record[0] == '[UPDATE]' or record[0] == '[OVERALL]': #in case of READ
+                                try:
+                                    int(record[1])
+                                except ValueError: #if cannot cast it's fine
+                                    trimmed_lines.append(record)
+                    parsed_results.append([int(filename.split('_')[1].split('.')[0]), trimmed_lines])
+
+        parsed_results = sorted(parsed_results, key=lambda x: x[0], reverse=False)
+        csv = []
+        threads = 'Threads;#;'
+        if len(parsed_results) <= 0:
+            continue
+        print '------CSV------'
+        for i in range(0, len(parsed_results[0][1])):
+            csv.append(parsed_results[0][1][i][0] + ';' + parsed_results[0][1][i][1] + ';')
+        for test_result in parsed_results:
+            threads += str(test_result[0]) + ';'
+            for i, line in enumerate(test_result[1]):
+                csv[i] += line[2].replace('\n','').replace('.',',') + ';'
+        csv.insert(0, threads)
+        with open(root + '/results.csv','w') as csv_file:
+            for x in csv:
+                csv_file.write(x + '\n')
+                print x
+            csv_file.close()
\ No newline at end of file
diff --git a/utils/run_suite.py b/utils/run_suite.py
new file mode 100755
index 00000000..a4a525e6
--- /dev/null
+++ b/utils/run_suite.py
@@ -0,0 +1,202 @@
+#!/usr/bin/python2
+import json
+import os
+import subprocess
+
+#comment
+# SUITE write_workload
+# THREADS 1 2 4 8 16 32 48 64 96
+# JOURNALING enabled/disabled
+# RECORDS 1000
+# OPERATIONS 100
+# READ_PROPORTION 0.0
+# UPDATE_PROPORTION 0.0
+# INSERT_PROPORTION 1.0
+# YCSB_NUMA 1
+# DROP_BEFORE
+# ENDSUITE
+
+#GET PATHS FROM CONFIG FILE
+PATH_TO_YCSB = ''
+
+path_configuration = open("path_configuration.txt", "r")
+for line in path_configuration:
+    if line.startswith('YCSB_PATH='):
+        arg = line.split("=")
+        if len(arg) > 1:
+            PATH_TO_YCSB = arg[1].replace('\n','')
+        else:
+            raise NameError('No path in YCSB_PATH!')
+            
+if not os.path.isdir(PATH_TO_YCSB):
+    raise NameError('Wrong path to YCSB!')
+
+class Test:
+    def __init__(self):
+        self.pmemkv_engine = "cmap"
+        self.pmemkv_dbsize = 0
+        self.pmemkv_dbpath = "/dev/shm/"
+        self.workload_type = "workloada"
+        self.testName = ""
+        self.threads = []
+#        self.journaling = ""
+        self.records = 0
+        self.operations = 0
+        self.read_proportion = -1.0
+        self.update_proportion = -1.0
+        self.insert_proportion = -1.0
+        self.ycsb_numa = -1
+# Actually we don't need creation
+#        self.drop_before = -1
+#        self.create_after_drop = -1
+        self.is_load = -1
+    def toJSON(self):
+        return json.dumps(self, default=lambda o: o.__dict__, 
+                          sort_keys=True, indent=4)
+
+def getArgs(str):
+    arguments = []
+    for i in range(1, len(str)):
+        arguments.append(str[i])
+    return arguments
+
+KEYWORDS = set(["THREADS", "JOURNALING", "RECORDS", "OPERATIONS",
+                "READ_PROPORTION", "LOAD", "UPDATE_PROPORTION",
+                "INSERT_PROPORTION", "YCSB_NUMA", "SUITE", "ENDSUITE",
+                "DROP_BEFORE", "CREATE_AFTER_DROP", "PMEMKV_ENGINE",
+                "PMEMKV_DBSIZE", "PMEMKV_DBPATH", "WORKLOAD_TYPE"]) #Add keyword if you need to extend implementation
+
+# open meta file
+with open("test_suite.txt", "r") as configfile:
+    configurations = []
+    for line in configfile:
+        splittedLine = line.split()
+        if line == '\n' or line.startswith('#'):
+            continue
+        if len(set.intersection(KEYWORDS, splittedLine)) != 1:
+            print(splittedLine)
+            raise NameError('Too many keywords in single line!')
+
+        #get args if exists
+        args = getArgs(splittedLine)
+        
+        #if line starts from keyword we must read arguments
+        if splittedLine[0] == "SUITE":
+            configurations.append(Test())
+            configurations[len(configurations)-1].testName = args[0]
+        elif splittedLine[0] == "THREADS":
+            configurations[len(configurations)-1].threads = args
+        elif splittedLine[0] == "LOAD":
+            configurations[len(configurations)-1].is_load = 1
+        elif splittedLine[0] == "RECORDS":
+            configurations[len(configurations)-1].records = args[0]
+        elif splittedLine[0] == "OPERATIONS":
+            configurations[len(configurations)-1].operations = args[0]
+        elif splittedLine[0] == "READ_PROPORTION":
+            configurations[len(configurations)-1].read_proportion = args[0]
+        elif splittedLine[0] == "UPDATE_PROPORTION":
+            configurations[len(configurations)-1].update_proportion = args[0]
+        elif splittedLine[0] == "INSERT_PROPORTION":
+            configurations[len(configurations)-1].insert_proportion = args[0]
+        elif splittedLine[0] == "YCSB_NUMA":
+            configurations[len(configurations)-1].ycsb_numa = args[0]
+        elif splittedLine[0] == "PMEMKV_ENGINE":
+            configurations[len(configurations)-1].pmemkv_engine = args[0]
+        elif splittedLine[0] == "PMEMKV_DBSIZE":
+            configurations[len(configurations)-1].pmemkv_dbsize = args[0]
+        elif splittedLine[0] == "PMEMKV_DBPATH":
+            configurations[len(configurations)-1].pmemkv_dbpath = args[0]
+        elif splittedLine[0] == "WORKLOAD_TYPE":
+            configurations[len(configurations)-1].workload_type = args[0]
+        elif splittedLine[0] == "ENDSUITE":
+            continue
+        else:
+            raise NameError('Unrecognized keyword')
+configfile.close()
+
+print('Script read those tests:')
+i = 1
+for conf in configurations:
+    print('{:>20} {:<12}'.format('Test#: ', str(i)))
+    print('{:>20} {:<12}'.format("Name: ", conf.testName))
+    print('{:>20} {:<12}'.format("Threads: " ,str(conf.threads)))
+    print('{:>20} {:<12}'.format("Records: ", conf.records))
+    print('{:>20} {:<12}'.format("Operation: ", conf.operations))
+    print('{:>20} {:<12}'.format("Read proportion: ", str(conf.read_proportion)))
+    print('{:>20} {:<12}'.format("Update proportion: ", str(conf.update_proportion)))
+    print('{:>20} {:<12}'.format("Insert proportion: ", str(conf.insert_proportion)))
+    print('{:>20} {:<12}'.format("Is load: ", str(conf.is_load)))
+    print('{:>20} {:<12}'.format("NUMA for YCSB: ", conf.ycsb_numa))
+    print('{:>20} {:<12}'.format("Workload type: ", conf.workload_type))
+    print('{:>20} {:<12}'.format("Pmemkv engine: ", conf.pmemkv_engine))
+    print('{:>20} {:<12}'.format("Pmemkv size: ", conf.pmemkv_dbsize))
+    print('{:>20} {:<12}'.format("Pmemkv path: ", conf.pmemkv_dbpath))
+    print("")
+    i = i + 1
+
+# PUT CONFIGURATION TO FILE IN PROPER PATH
+results_directory = "results/"
+if not os.path.exists(results_directory):
+    os.makedirs(results_directory)
+i = 1
+with open(results_directory + '/configurations.json', 'w') as jsonconfig:
+    for conf in configurations:
+        jsonconfig.write(conf.toJSON() + '\n')
+        if not os.path.exists(results_directory + conf.testName + '/'):
+                os.makedirs(results_directory + conf.testName + '/')
+        with open(results_directory + conf.testName + '/test_description.txt', 'a') as test_description:
+            test_description.write('{:>20} {:<12}'.format('Test#: ', str(i)) + '\n') #   'Test #' + str(i)
+            test_description.write('{:>20} {:<12}'.format("Name: ", conf.testName) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Threads: " ,str(conf.threads)) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Records: ", conf.records) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Operation: ", conf.operations) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Read proportion: ", str(conf.read_proportion)) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Update proportion: ", str(conf.update_proportion)) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Insert proportion: ", str(conf.insert_proportion)) + '\n')
+            test_description.write('{:>20} {:<12}'.format("NUMA for YCSB: ", conf.ycsb_numa) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Workload type: ", conf.workload_type) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Pmemkv engine: ", conf.pmemkv_engine) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Pmemkv size: ", conf.pmemkv_dbsize) + '\n')
+            test_description.write('{:>20} {:<12}'.format("Pmemkv path: ", conf.pmemkv_dbpath) + '\n')
+            test_description.write('\n')
+        i = i + 1
+
+# run specified configurations
+generated_commands = []
+for test in configurations:
+    command_prefix = ''
+    command_suffix = ''
+    
+    command_prefix = './run_workload.sh ' + test.testName
+    
+    if not test.is_load == 1:
+        command_prefix += ' run '
+    else:
+        command_prefix += ' load '
+
+
+    # Put path to YCSB main directory
+    command_suffix += PATH_TO_YCSB + ' '
+    # Put operation numbers
+    command_suffix += test.records + ' ' + test.operations + ' '
+    # Put workload ratios
+    command_suffix += test.read_proportion + ' ' + test.update_proportion + ' ' + test.insert_proportion + ' '
+    # Put NUMA node
+    if test.ycsb_numa == -1:
+        print('NUMA node is not set for test: ' + test.testName + '.')
+    command_suffix += test.ycsb_numa + ' '
+    # Put workload type
+    command_suffix += test.workload_type + ' '
+    # Put engine specific fields
+    command_suffix += test.pmemkv_engine + ' ' + test.pmemkv_dbsize + ' ' + test.pmemkv_dbpath + ' '
+
+    for thread_no in test.threads:
+        # DROP&CREATE BEFORE NEXT INSERTS
+        generated_commands.append(command_prefix + thread_no + ' ' + command_suffix)
+
+# Generate script
+with open('testplan.sh','w') as testplan:
+    testplan.write('#!/bin/bash\n')
+    for x in generated_commands:
+        testplan.write(x + '\n')
+print(generated_commands)
diff --git a/utils/run_workload.sh b/utils/run_workload.sh
new file mode 100755
index 00000000..8ab8f3bc
--- /dev/null
+++ b/utils/run_workload.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+# Run workload from command line
+#
+# e.g. ./run_workload.sh run_cmap run 12 PATH_TO_YCSB 1000000 1000000
+#             {0}          {1}    {2} {3}   {4}         {5}     {6}
+#                        -1.0 -1.0 -1.0   1  workloadb csmap 80000000 DBPATH
+#                         {7}  {8}  {9} {10}    {11}   {12}    {13}    {14}
+# 1 - suite name
+# 2 - ycsb phase: load/run
+# 3 - thread count
+# 4 - path to YCSB
+# 5 - record count
+# 6 - operation count
+# 7 - read proportion
+# 8 - insert proportion
+# 9 - update proportion
+# 10 - NUMA node for YCSB
+# 11 - workload scenario (workload[a-f])
+####### Engine related args
+# 12 - pmemkv: engine name
+# 13 - pmemkv: pool size
+# 14 - pmemkv: path to pool
+
+YCSB_PATH=/home/kfilipek/Development/work/YCSB/ # TODO(kfilipek): remove hardcoding
+echo $YCSB_PATH
+OLD_PATH=$(pwd)
+
+echo $@
+echo "Passed $# argumets to script"
+
+if [ "$#" -ne "14" ]; 
+then
+	echo "Illegal number of parameters, should be 11. Check script documentation."
+	exit 0
+fi
+
+mkdir -p "results/$1/" # Create results directory: results/{test_suite_name}/
+# Prepare future arguments for YCSB
+NUMA_ARG=""
+READ_RATIO=""
+INSERT_RATIO=""
+UPDATE_RATIO=""
+if [ "$7" != "-1.0" ];
+then
+	READ_RATIO=" -p readproportion=$7 "
+fi
+if [ "$8" != "-1.0" ];
+then
+	INSERT_RATIO=" -p insertproportion=$8 "
+fi
+if [ "$9" != "-1.0" ];
+then
+	UPDATE_RATIO=" -p updateproportion=$9 "
+fi
+if [ "${10}" != "-1" ];
+then
+	NUMA_ARG=" numactl -N ${10} "
+fi
+# echo "READ_RATIO param: $READ_RATIO"
+# echo "INSERT_RATIO param: $INSERT_RATIO"
+# echo "UPDATE_RATIO param: $UPDATE_RATIO"
+# echo "NUMA NODE param: $NUMA_ARG"
+#exit
+
+# TODOD(kfilipek): Implement splitting threads into processes
+cd $YCSB_PATH
+echo "PMEM_IS_PMEM_FORCE=1 $NUMA_ARG bin/ycsb.sh $2 pmemkv -P workloads/${11} -p hdrhistogram.percentiles=95,99,99.9,99.99 -p recordcount=$5 -p operationcount=$6 -p pmemkv.engine=${12} -p pmemkv.dbsize=${13} -p pmemkv.dbpath=${14} > $OLD_PATH/results/$1/${2}_${3}.log" >> $OLD_PATH/results/$1/cmds_executed.log
+PMEM_IS_PMEM_FORCE=1 $NUMA_ARG bin/ycsb.sh $2 pmemkv -P workloads/${11} -p hdrhistogram.percentiles=95,99,99.9,99.99 -p recordcount=$5 -p operationcount=$6 -p pmemkv.engine=${12} -p pmemkv.dbsize=${13} -p pmemkv.dbpath=${14} > $OLD_PATH/results/$1/${2}_${3}.log
+cd $OLD_PATH