diff --git a/.gitmodules b/.gitmodules index dd7b2323..b735d478 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "Analyzer/ScalerRun/lib/yamlcpp_0_7_0"] path = Analyzer/ScalerRun/lib/yamlcpp_0_7_0 url = https://github.com/jbeder/yaml-cpp.git +[submodule "benchmarktookit"] + path = benchmarktookit + url = ssh://git@code.xttech.top:6081/masslab/benchmarktookit.git diff --git a/.idea/Scaler.iml b/.idea/Scaler.iml index 40ba48d7..a45220d6 100644 --- a/.idea/Scaler.iml +++ b/.idea/Scaler.iml @@ -5,4 +5,11 @@ + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml index bf0af86f..a864adec 100644 --- a/.idea/vcs.xml +++ b/.idea/vcs.xml @@ -3,6 +3,7 @@ + diff --git a/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py b/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py index 77c3f5c4..06383b89 100644 --- a/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py +++ b/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py @@ -673,7 +673,7 @@ def main(): # they can refuse it and the aggregator will use a default file path set to finalFold.folded in the repo root = tk.Tk() root.withdraw() - fileName = filedialog.askopenfilename() + fileName = '' # If an input file was selected, then we will ask them for an output file. They can opt to stop the program # By directly closing the file dialog twice in a row. @@ -689,7 +689,7 @@ def main(): if fileName == '': # If no file name then just default to opening a file in the repo # print(True) - fileName = "C:/Users/John/PycharmProjects/Scaler/libAnalyzer/tests/PerfTests/finalFold.folded" + fileName = "/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Perf-Artifects/out.stacks-folded" # outFileName = "perfMemcachedData_V2.json" outFileName = "perfMemcachedData_V2_nokernelinlined.json" # outFileName = "perfMemcachedData_V2_noinlined.json" @@ -709,12 +709,12 @@ def main(): # We will handle the data differently depending on if the user wants to use the timing data # If "y" is entered, then we will use the time stamp info and sample data, # if not then we will use sample data by default - timestampInput = input("Use Timestamps? y/n Default is n: ") + timestampInput = 'n' if timestampInput == "y": # print(timestampInput) useTimestamps = True - inlinedInput = input("Attribute Inlined functions to last known library? y/n Default is n: ") + inlinedInput = 'n' if inlinedInput == "y": # print(attributeInline) attributeInline = True diff --git a/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml b/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml index b7978a8d..2aa44330 100644 --- a/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml +++ b/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml @@ -37,8 +37,9 @@ + - + + + + + + + + + @@ -85,6 +113,8 @@ + + @@ -92,20 +122,6 @@ - - - - file://$PROJECT_DIR$/main.py - 107 - - - file://$PROJECT_DIR$/main.py - 142 - - - @@ -113,6 +129,7 @@ - + + \ No newline at end of file diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeAPIInvocationDistribution.py b/Analyzer/PyVisualizer/src/V3/AnalyzeAPIInvocationDistribution.py deleted file mode 100644 index 788350d8..00000000 --- a/Analyzer/PyVisualizer/src/V3/AnalyzeAPIInvocationDistribution.py +++ /dev/null @@ -1,195 +0,0 @@ -import os -import traceback -from matplotlib import cm -import matplotlib.pyplot as plt -import pandas as pd -import struct -import numpy as np -from multiprocessing import Pool -from multiprocessing import Pool, cpu_count -import time - -from datastructure.TimingStruct import ArrayDescriptor -from util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming -from util.Parser.TimeOutputPrarser import readSymbolFiles, aggregatePerThreadArray, readTimingStruct -import numpy as np - - -def calcInvokedApis(scalerDataFolder, recInfo): - invokedAPIs = [] - totalAPIs = [] - for threadId in recInfo.threadIdList: - curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) - curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0] - invokedAPIs.append(len(curThreadInvokedRecArray)) - totalAPIs.append(len(curThreadRecArray)) - return invokedAPIs, totalAPIs - - -def printCoverageReport(scalerDataFolder, recInfo, invokedAPIs, totalAPIs): - ''' - Coverage Report - ''' - print(' \t ', end='') - for threadId in recInfo.threadIdList: - print(threadId, end='\t') - print() - - print('Invoked %', end='\t') - for perc in np.array(invokedAPIs) / np.array(totalAPIs): - print('%2.2f%%' % (perc * 100), end='\t') - print() - - print('Invoked APIs', end='\t') - for invokedAPI in invokedAPIs: - print(invokedAPI, end='\t') - - print() - print('Total APIs', end='\t') - for totalAPI in totalAPIs: - print(totalAPI, end='\t') - - -def drawCountingHist(scalerDataFolder, recInfo): - ''' - Counting histogram Report - ''' - histogramRoot = os.path.join(scalerDataFolder, 'InvocationCountHist') - if not os.path.exists(histogramRoot): - os.mkdir(histogramRoot) - - print() - - # create 3 data sets with 1,000 samples - mu, sigma = 200, 25 - x = mu + sigma * np.random.randn(1000, 3) - - totalCountArr = None - for threadId in recInfo.threadIdList: - curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) - # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0] - - times = np.array([rec.count for rec in curThreadRecArray]) - if totalCountArr is None: - totalCountArr = times - else: - totalCountArr = np.vstack([totalCountArr, times]) - - plt.figure() - plt.hist(times, range=(1, times.max()), edgecolor="black", bins=50) - plt.xlabel('Invocation counts') - plt.ylabel('API number') - plt.title('Histogram of invocation counts for thread %s' % (str(threadId))) - - plt.savefig(os.path.join(histogramRoot, threadId + '.png')) - plt.close() - # print(threadId, np.max(times)) - - totalCountArr = totalCountArr.transpose() - # totalCountArr = totalCountArr[np.where(totalCountArr > 0)] - plt.figure() - print(totalCountArr.max()) - plt.hist(totalCountArr, range=(1, totalCountArr.max()), bins=50, stacked=True) - plt.xlabel('Invocation counts') - plt.ylabel('API number') - plt.title('Histogram of invocation counts for all threads staked') - plt.savefig(os.path.join(histogramRoot, 'total.png')) - plt.close() - - -def printInvocNumberPerThread(scalerDataFolder): - if scalerDataFolder is None: - print() - return - - recInfo = readSymbolFiles(scalerDataFolder) - invokedAPIs, totalAPIs = calcInvokedApis(scalerDataFolder, recInfo) - - totalInvocationCnts = 0 - - for threadId in recInfo.threadIdList: - curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) - # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0] - - times = np.array([rec.count for rec in curThreadRecArray]) - totalInvocationCnts += np.sum(times) - - min = np.min(invokedAPIs) - q1 = np.quantile(invokedAPIs, 0.25) - q2 = np.quantile(invokedAPIs, 0.5) - q3 = np.quantile(invokedAPIs, 0.75) - q4 = np.quantile(invokedAPIs, 1) - iqr = q3 - q1 - qLower = q1 - 1.5 * iqr - qUpper = q3 + 1.5 * iqr - assert (np.unique(totalAPIs).shape[0] == 1) - assert (q4 == np.max(invokedAPIs)) - print(scalerDataFolder.split('/')[-2], min, q1, q2, q3, q4, iqr, qLower, qUpper, len(invokedAPIs), totalAPIs[0], - totalInvocationCnts, sep='\t') - - -def printInvocCntPerAPI(scalerDataFolder): - if scalerDataFolder is None: - print() - return - - recInfo = readSymbolFiles(scalerDataFolder) - - totalCountArr = None - for threadId in recInfo.threadIdList: - curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) - # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0] - - times = np.array([rec.count for rec in curThreadRecArray]) - if totalCountArr is None: - totalCountArr = times - else: - totalCountArr = np.vstack([totalCountArr, times]) - totalCountArr = np.array(totalCountArr).sum(axis=0) - - totalCountArr = totalCountArr[np.where(totalCountArr > 0)] - min = np.min(totalCountArr) - q1 = np.quantile(totalCountArr, 0.25) - q2 = np.quantile(totalCountArr, 0.5) - q3 = np.quantile(totalCountArr, 0.75) - q4 = np.quantile(totalCountArr, 1) - iqr = q3 - q1 - qLower = q1 - 1.5 * iqr - qUpper = q3 + 1.5 * iqr - - assert (q4 == np.max(totalCountArr)) - print(scalerDataFolder.split('/')[-2], min, q1, q2, q3, q4, iqr, qLower, qUpper, np.sum(totalCountArr), sep='\t') - - -# steven@masslabserv1:~/Downloads/2022-11-23_10-21-06$ find . -name "scalerdata*" -scalerDataFolders = [ - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/scalerdata_19148850692747664', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/scalerdata_19148905483325260', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.canneal_0/scalerdata_19149009421840348', - None, - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.facesim_0/scalerdata_19149183735878138', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.ferret_0/scalerdata_19149441937366104', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/scalerdata_19149498481345624', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/scalerdata_19149660473046832', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/scalerdata_19149730167129240', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/scalerdata_19150235160442436', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/scalerdata_19150507898053624', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.vips_0/scalerdata_19150561039693292', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.x264_0/scalerdata_19150582352742288', - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.db.memcached.memcached_1_6_17_0/scalerdata_19150608805586386', - None, - '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.db.redis.redis_7_0_4_0/scalerdata_19150762055485288', - None, - None, - None, - None, - None, -] - -print('Thread inovked API # imbalance Analysis') -for scalerDataFolder in scalerDataFolders: - printInvocNumberPerThread(scalerDataFolder) - -print('API inovked CNT Analysis') -for scalerDataFolder in scalerDataFolders: - printInvocCntPerAPI(scalerDataFolder) diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py b/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py index 3153795c..d203d2fd 100644 --- a/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py +++ b/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py @@ -9,6 +9,10 @@ from multiprocessing import Pool, cpu_count import time +from Analyzer.PyVisualizer.src.V3.datastructure.TimingStruct import ArrayDescriptor +from Analyzer.PyVisualizer.src.V3.util.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming +from Analyzer.PyVisualizer.src.V3.util.TimeOutputPrarser import readSymbolFiles + def removeOutliers(x): a = np.array(x) @@ -29,43 +33,39 @@ def shouldSkip(timingArr): return c1 == 0 and c2 == 0 -def saveSinglePlot(ROOT_PATH, symbolNameList, threadIdSet, symId): +def saveSinglePlot(ROOT_PATH, symbolNameList, threadId, tgtSymId): # hasPoints = False + # ROOT_PATH: str, symbolNameList: list, threadId: str, tgtSymIds: list): + detailedTimingDict = parseSingleSymDetailedTiming(ROOT_PATH, threadId, [tgtSymId]) + + for symId, detailedTimingArr in detailedTimingDict.items(): + fig, (ax1, ax2) = plt.subplots(2) + + # if detailedTimingArr.shape[0] < 1001: + # continue + # + # skipThis = shouldSkip(detailedTimingArr) + # if skipThis: + # continue + + ax1.scatter(np.arange(detailedTimingArr.shape[0]), detailedTimingArr, s=10) + # Calculate the first 500 mean + mean = np.average(detailedTimingArr[0:500]) + meanUpperbound = mean * (1 + 0.01) + meanLowerbound = mean * (1 - 0.01) + + ax2.scatter(np.arange(min(1000,detailedTimingArr.shape[0])), detailedTimingArr[0:min(1000,detailedTimingArr.shape[0])], s=10) + ax2.hlines(meanUpperbound, 0, detailedTimingArr.shape[0], colors='red') + ax2.hlines(meanLowerbound, 0, detailedTimingArr.shape[0], colors='red') + hasPoints = True + + print(os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId))) + # if hasPoints: + fig.savefig( + os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId))) + print( + os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId))) - for i, threadId in enumerate(threadIdSet): - with open(os.path.join(ROOT_PATH, 'threadDetailedTiming_%s_%d.bin' % (threadId, symId)), - 'rb') as f: - fig, (ax1, ax2) = plt.subplots(2) - byteArr1 = f.read() - elemSize = np.fromfile(os.path.join(ROOT_PATH, 'threadDetailedTiming_%s_%d.bin' % (threadId, symId)), - np.int64, count=1, offset=0)[0] - if elemSize == 0: - continue - symbolTiming = np.fromfile( - os.path.join(ROOT_PATH, 'threadDetailedTiming_%s_%d.bin' % (threadId, symId)), - np.int64, offset=8) - skipThis = shouldSkip(symbolTiming) - if skipThis: - continue - if symbolTiming.shape[0] < 1001: - continue - ax1.scatter(np.arange(elemSize), symbolTiming, s=10) - # Calculate the first 500 mean - mean = np.average(symbolTiming[0:500]) - meanUpperbound = mean * (1 + 0.01) - meanLowerbound = mean * (1 - 0.01) - - # ax2.text(i*50, i * 20, str(np.var(symbolTiming[0:500]))) - ax2.scatter(np.arange(1000), symbolTiming[0:1000], s=10) - ax2.hlines(meanUpperbound, 0, elemSize, colors='red') - ax2.hlines(meanLowerbound, 0, elemSize, colors='red') - # hasPoints = True - - # if hasPoints: - fig.savefig( - os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId))) - print( - os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId))) return 0 @@ -78,51 +78,35 @@ def error_callback(e): def doIt(ROOT_PATH, pool, rltList): print('========================', ROOT_PATH) allFiles = os.listdir(ROOT_PATH) - threadIdSet = set({}) - maxSymId = 0 + symbolNum = 0 + + recInfo = readSymbolFiles(ROOT_PATH) + threadSymInfo = dict({}) # Threadid : symbol size for fileName in allFiles: if fileName.startswith('threadDetailedTiming') and fileName.endswith('.bin'): - _, threadId, symbolId = fileName.replace('.bin', '').split('_') - symbolId = int(symbolId) - if symbolId > maxSymId: - maxSymId = symbolId - threadIdSet.add(threadId) - - df = pd.read_csv(os.path.join(ROOT_PATH, 'fileName.txt')) - fileNameList = df['pathName'].to_list() - - df = pd.read_csv(os.path.join(ROOT_PATH, 'symbolInfo.txt')) - symbolNameList = df['funcName'].to_list() - symbolFileIdList = df['fileId'].to_list() - symIdInFile = df['symIdInFile'].to_list() - - print('Deploying tasks to pool') - # for symId in range(maxSymId): - # saveSinglePlot(ROOT_PATH, symbolNameList, threadIdSet, symId) - for symId in range(maxSymId): - res = pool.apply_async(saveSinglePlot, args=[ROOT_PATH, symbolNameList, threadIdSet, symId], - error_callback=error_callback) - rltList.append(res) - - -pool = Pool(60) + _, threadId = fileName.replace('.bin', '').split('_') + with open(os.path.join(ROOT_PATH, fileName), 'rb') as f: + symDetailedTimingDesc = ArrayDescriptor() + f.readinto(symDetailedTimingDesc) + assert (symDetailedTimingDesc.arrayElemSize == 0) + assert (symDetailedTimingDesc._magicNum == 167) + symbolNum = symDetailedTimingDesc.arraySize + threadSymInfo[threadId] = symbolNum + + for symId in range(symbolNum): + res = pool.apply_async(saveSinglePlot, args=[ROOT_PATH, recInfo.symbolNameList, threadId, symId], + error_callback=error_callback) + rltList.append(res) + + return rltList + + +pool = Pool(1) rltList = [] -for i in [ - 'scalerdata_6364935512299934', - 'scalerdata_6364979105953714', - 'scalerdata_6365014036860570', - 'scalerdata_6365088124846144', - 'scalerdata_6365123879328866', - 'scalerdata_6365618607468352', - 'scalerdata_6365739459778370', - 'scalerdata_6365776935349298', - 'scalerdata_6365841128804326', - 'scalerdata_6366139523773026', - 'scalerdata_6366165053302622' -]: - ROOT_PATH = '/media/umass/datasystem/steven/Downloads/CurStrategy1/' + i +for ROOT_PATH in ['/tmp/scalerdata_14676207526291652']: doIt(ROOT_PATH, pool, rltList) + pool.close() while len(rltList) > 0: time.sleep(2) diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeInvocation.py b/Analyzer/PyVisualizer/src/V3/AnalyzeInvocation.py new file mode 100644 index 00000000..e38a8cbd --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/AnalyzeInvocation.py @@ -0,0 +1,239 @@ +import os +import traceback +from collections import defaultdict + +from matplotlib import cm +import matplotlib.pyplot as plt +import pandas as pd +import struct +import numpy as np +from multiprocessing import Pool +from multiprocessing import Pool, cpu_count +import time + +from Analyzer.PyVisualizer.src.V3.util.Quantile import calcQuantile +from datastructure.TimingStruct import ArrayDescriptor +from util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming +from util.Parser.TimeOutputPrarser import readSymbolFiles, aggregatePerThreadArray, readTimingStruct +import numpy as np + + +def calcInvokedApiNum(scalerDataFolder, recInfo): + invokedAPIs = [] + totalAPIs = [] + for threadId in recInfo.threadIdList: + curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) + curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0] + invokedAPIs.append(len(curThreadInvokedRecArray)) + totalAPIs.append(len(curThreadRecArray)) + return invokedAPIs, totalAPIs + + +def calcInvokedApiCNT(scalerDataFolder, recInfo): + invokedAPICnts = [] + for threadId in recInfo.threadIdList: + curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) + invokedAPICnts.append(np.sum([rec.count for rec in curThreadRecArray])) + return invokedAPICnts + + +def printInvocNumberByEachThread(scalerDataFolder): + if scalerDataFolder is None: + print() + return + + recInfo = readSymbolFiles(scalerDataFolder) + invokedAPIs, totalAPIs = calcInvokedApiNum(scalerDataFolder, recInfo) + + totalInvocationCnts = 0 + + for threadId in recInfo.threadIdList: + curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) + # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0] + + times = np.array([rec.count for rec in curThreadRecArray]) + totalInvocationCnts += np.sum(times) + + minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(invokedAPIs) + + print(scalerDataFolder.split('/')[-3], minimum, q1, q2, q3, q4, iqr, qLower, qUpper, len(invokedAPIs), totalAPIs[0], + totalInvocationCnts, sep='\t') + + +def printInvocCntByEachThread(scalerDataFolder): + if scalerDataFolder is None: + print() + return + + recInfo = readSymbolFiles(scalerDataFolder) + invokedAPIs = calcInvokedApiCNT(scalerDataFolder, recInfo) + + totalInvocationCnts = 0 + + for threadId in recInfo.threadIdList: + curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) + # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0] + + times = np.array([rec.count for rec in curThreadRecArray]) + totalInvocationCnts += np.sum(times) + + minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(invokedAPIs) + + print(scalerDataFolder.split('/')[-3], minimum, q1, q2, q3, q4, iqr, qLower, qUpper, totalInvocationCnts, sep='\t') + + +def printInvocCnt(scalerDataFolder): + if scalerDataFolder is None: + print() + return + + recInfo = readSymbolFiles(scalerDataFolder) + + totalCountArr = None + for threadId in recInfo.threadIdList: + curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) + # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0] + + times = np.array([rec.count for rec in curThreadRecArray]) + if totalCountArr is None: + totalCountArr = times + else: + totalCountArr = np.vstack([totalCountArr, times]) + + if len(totalCountArr.shape)==2: + # if totalCountArr.shape + totalCountArr = np.array(totalCountArr).sum(axis=0) + + + totalCountArr1 = totalCountArr[np.where(totalCountArr > 0)] + + minima, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(totalCountArr1) + print(scalerDataFolder.split('/')[-3], minima, q1, q2, q3, q4, iqr, qLower, qUpper, np.sum(totalCountArr1), sep='\t') + + +class APIInfo: + def __init__(self): + self.name = None + self.cntSum = [] + self.timeAvg = [] + self.timeVa = [] + self.timeAvgDenoise = None + self.timeVarDenoise = None + + +def printPerAPIInfoAndCnts(scalerDataFolder): + if scalerDataFolder is None: + print() + return + print(scalerDataFolder.split('/')[-3]) + recInfo = readSymbolFiles(scalerDataFolder) + + totalCountArr = None + totalVarianceArry = None + for threadId in recInfo.threadIdList: + curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) + + counts = np.array([rec.count for rec in curThreadRecArray]) + if totalCountArr is None: + totalCountArr = counts + else: + totalCountArr = np.vstack([totalCountArr, counts]) + if len(totalCountArr.shape)==2: + # if totalCountArr.shape + totalCountArr = np.array(totalCountArr).sum(axis=0) + + sortedNameCntTuple = [ + (i, recInfo.symbolNameList[i], recInfo.symbolFileIdList[i], recInfo.fileNameList[recInfo.symbolFileIdList[i]], + recInfo.realFileIdList[i], recInfo.fileNameList[recInfo.realFileIdList[i]], totalCountArr[i]) for i in + range(totalCountArr.shape[0]) if + totalCountArr[i] > 0] + + for symId, symName, invokerFIleId, invokerFileName, realFileId, realFileName, count in sorted(sortedNameCntTuple, + reverse=True, + key=lambda x: x[-1]): + print(symId, symName, invokerFIleId, invokerFileName.split('/')[-1], realFileId, realFileName.split('/')[-1], + count, sep='\t') + + +def printPerLibInfoAndCnts(scalerDataFolder): + if scalerDataFolder is None: + print() + return + print(scalerDataFolder.split('/')[-3]) + recInfo = readSymbolFiles(scalerDataFolder) + + totalCountArr = None + totalVarianceArry = None + for threadId in recInfo.threadIdList: + curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) + + counts = np.array([rec.count for rec in curThreadRecArray]) + if totalCountArr is None: + totalCountArr = counts + else: + totalCountArr = np.vstack([totalCountArr, counts]) + if len(totalCountArr.shape)==2: + # if totalCountArr.shape + totalCountArr = np.array(totalCountArr).sum(axis=0) + + sortedNameCntTuple = [ + (i, recInfo.symbolNameList[i], recInfo.symbolFileIdList[i], recInfo.fileNameList[recInfo.symbolFileIdList[i]], + recInfo.realFileIdList[i], recInfo.fileNameList[recInfo.realFileIdList[i]], totalCountArr[i]) for i in + range(totalCountArr.shape[0]) if + totalCountArr[i] > 0] + + libFileDict = defaultdict(int) + + for symId, symName, invokerFIleId, invokerFileName, realFileId, realFileName, count in sorted(sortedNameCntTuple, + reverse=True, + key=lambda x: x[-1]): + libFileDict[realFileName] += count + + countList = list(libFileDict.items()) + countList = sorted(countList, key=lambda x: x[0]) + for name, count in countList: + print(name, count, sep='\t') + + +# steven@masslabserv1:~/Downloads/DistributionAnalysis$ find . -name "scalerdata*" +scalerDataFolders = [ + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DETAIL-Artifects/scalerdata_1098304170115468', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Scaler-DETAIL-Artifects/scalerdata_1098358945736648', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Scaler-DETAIL-Artifects/scalerdata_1098456760642602', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Scaler-DETAIL-Artifects/scalerdata_1098609218045480', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Scaler-DETAIL-Artifects/scalerdata_1098633517549600', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Scaler-DETAIL-Artifects/scalerdata_1098896081465298', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Scaler-DETAIL-Artifects/scalerdata_1107220283374448', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Scaler-DETAIL-Artifects/scalerdata_1099112583217984', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Scaler-DETAIL-Artifects/scalerdata_1099182362951380', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Scaler-DETAIL-Artifects/scalerdata_1099698097420154', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Scaler-DETAIL-Artifects/scalerdata_1099956187630596', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.vips_0/Scaler-DETAIL-Artifects/scalerdata_1100002948879490', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.x264_0/Scaler-DETAIL-Artifects/scalerdata_1100026471754668', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.httpd.httpd_2_4_54_0/Scaler-DETAIL-Artifects/scalerdata_1100053088973938', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.memcached.memcached_1_6_17_0/Scaler-DETAIL-Artifects/scalerdata_1100215829624386', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.mysql.mysql_8_0_31_0/Scaler-DETAIL-Artifects/scalerdata_1100366950684482', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.nginx.nginx_1_23_2_0/Scaler-DETAIL-Artifects/scalerdata_1100701228337962', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.redis.redis_7_0_4_0/Scaler-DETAIL-Artifects/scalerdata_1100850174945384', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.sqlite.sqlite_3_39_4_0/Scaler-DETAIL-Artifects/scalerdata_1101150204449494' +] + +print('Thread inovked API #') +for scalerDataFolder in scalerDataFolders: + printInvocNumberByEachThread(scalerDataFolder) + +print('Thread inovked API CNT') +for scalerDataFolder in scalerDataFolders: + printInvocCntByEachThread(scalerDataFolder) + +print('API invocation CNT Analysis') +for scalerDataFolder in scalerDataFolders: + printInvocCnt(scalerDataFolder) + +print('Per-API infos') +for scalerDataFolder in scalerDataFolders: + printPerAPIInfoAndCnts(scalerDataFolder) + +print('Per-Lib infos') +for scalerDataFolder in scalerDataFolders: + printPerLibInfoAndCnts(scalerDataFolder) diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeOutlierRemovalTechnique.py b/Analyzer/PyVisualizer/src/V3/AnalyzeOutlierRemovalTechnique.py new file mode 100644 index 00000000..4e4a6661 --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/AnalyzeOutlierRemovalTechnique.py @@ -0,0 +1,180 @@ +import math +import os +import traceback +from collections import defaultdict + +import matplotlib.pyplot as plt +import pandas as pd +import struct +import numpy as np +from multiprocessing import Pool +from multiprocessing import Pool, cpu_count +import time +import math + +from Analyzer.PyVisualizer.src.V3.datastructure.TimingStruct import ArrayDescriptor +from Analyzer.PyVisualizer.src.V3.util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming +from Analyzer.PyVisualizer.src.V3.util.Parser.TimeOutputPrarser import readSymbolFiles, readTimingStruct +from Analyzer.PyVisualizer.src.V3.util.Quantile import calcQuantile + + +class APIMetric: + def __init__(self): + self.estimated = False + self.estimatedSum = 0 + self.realSum = 0 + self.invocationCnt = 0 + + def getErrorRate(self): + assert (self.realSum > 0) + return abs(self.realSum - self.estimatedSum) / self.realSum + + +def methodBoundAndNaiveClipping(symId, recInfo, timingArr): + def shouldSkip(timingArr): + if timingArr.shape[0] <= 500: + return False + mean = np.average(timingArr[0:500]) + meanUpperbound = mean * (1 + 0.50) + meanLowerbound = mean * (1 - 0.50) + + c1 = np.where(timingArr[500:1000] > meanUpperbound)[0].shape[0] + c2 = np.where(timingArr[500:1000] < meanLowerbound)[0].shape[0] + return c1 == 0 and c2 == 0 + + def removeOutliersByPercentage(x, lowerPerc, upperPerc): + a = np.array(x) + upper_quartile = np.percentile(a, upperPerc) + lower_quartile = np.percentile(a, lowerPerc) + return x[np.logical_and((lower_quartile <= x), (x <= upper_quartile))] + + skipped = shouldSkip(timingArr) + estimatedSum = np.sum(timingArr) + + if skipped: + outlierRemovedTimArray = removeOutliersByPercentage(timingArr[500:1000], 5, 95) + estimatedSum = np.average(outlierRemovedTimArray) * timingArr.shape[0] + + return skipped, estimatedSum + + +def methodPreEstimation(symId, recInfo, timingArr): + shouldSkip = False + + if timingArr.shape[0] <= 500: + shouldSkip = False + else: + shouldSkip = True + + def removeOutliers(timingArr): + rlt = [] + prevVal = timingArr[0] + threshold = 100 + for i in range(1, timingArr.shape[0]): + if abs(timingArr[i] - prevVal) < threshold: + rlt.append(timingArr[i]) + else: + rlt.append(prevVal) + prevVal = timingArr[i] + return np.array(rlt) + + realSum = np.sum(timingArr) + + if not shouldSkip: + return shouldSkip, realSum + + # Test Prediction + outlierRemovedFirst500 = removeOutliers(timingArr[0:500]) + estimatedSum = np.mean(outlierRemovedFirst500) * timingArr.shape[0] + + if abs(estimatedSum-realSum)/realSum < 0.02: + shouldSkip=True + return shouldSkip, estimatedSum + else: + shouldSkip=False + return shouldSkip, realSum + +def methodEstimation(symId, recInfo, timingArr): + pass + + +def analyzeOutlierRemovalTechnique(ROOT_PATH, methodFunction): + if ROOT_PATH is None: + print() + return + allFiles = os.listdir(ROOT_PATH) + symbolNum = 0 + + allInvocationRelationCnt = 0 + skippedApiCnt = 0 + allNonZeroApiCnt = 0 + + recInfo = readSymbolFiles(ROOT_PATH) + threadSymInfo = dict({}) # Threadid : symbol size + + # print('=====> ', ROOT_PATH) + apiMetricsPerApp = [] + totalAPICount = 0 + + totalInvocationCnt = 0 + for fileName in allFiles: + if fileName.startswith('threadDetailedTiming') and fileName.endswith('.bin'): + # Read symbol number in threads + _, threadId = fileName.replace('.bin', '').split('_') + + detailedTimingArr = parseSingleSymDetailedTiming(ROOT_PATH, threadId, None) + recArrForThisThread = readTimingStruct(ROOT_PATH, threadId) + totalAPICount = len(detailedTimingArr) + apiMetricsPerThread = [] + + for symId, timingArr in detailedTimingArr.items(): + curMetric = APIMetric() + curMetric.estimated, curMetric.estimatedSum = methodFunction(symId, recInfo, timingArr) + curMetric.realSum = np.sum(timingArr) + curMetric.invocationCnt = recArrForThisThread[symId].count + totalInvocationCnt += recArrForThisThread[symId].count + if curMetric.realSum > 0 and curMetric.getErrorRate() > 0: + apiMetricsPerThread.append(curMetric) + apiMetricsPerApp.append(curMetric) + # if len(apiMetricsPerThread) > 0: + # # Print table for estimated value per API + # minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile( + # [apiMetric.getErrorRate() for apiMetric in apiMetricsPerThread]) + # estimatedNum = np.sum([1 for apiMetric in apiMetricsPerThread if apiMetric.estimated]) + # totalAPICount = len(detailedTimingArr) + # print(fileName, minimum, q1, q2, q3, q4, iqr, qLower, qUpper, estimatedNum, totalAPICount, sep='\t') + + if len(apiMetricsPerApp) > 0: + minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile( + [apiMetric.getErrorRate() for apiMetric in apiMetricsPerApp]) + estimatedNum = np.sum([1 for apiMetric in apiMetricsPerApp if apiMetric.estimated]) + skippedApiCnt = np.sum([apiMetric.invocationCnt for apiMetric in apiMetricsPerApp if apiMetric.estimated]) + print(ROOT_PATH.split('/')[-3], minimum, q1, q2, q3, q4, iqr, qLower, qUpper, estimatedNum, totalAPICount, + skippedApiCnt, skippedApiCnt / totalInvocationCnt, totalInvocationCnt, + sep='\t') + else: + print(ROOT_PATH.split('/')[-2]) + + +for ROOT_PATH in [ + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DETAIL-Artifects/scalerdata_1098304170115468', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Scaler-DETAIL-Artifects/scalerdata_1098358945736648', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Scaler-DETAIL-Artifects/scalerdata_1098456760642602', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Scaler-DETAIL-Artifects/scalerdata_1098609218045480', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Scaler-DETAIL-Artifects/scalerdata_1098633517549600', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Scaler-DETAIL-Artifects/scalerdata_1098896081465298', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Scaler-DETAIL-Artifects/scalerdata_1107220283374448', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Scaler-DETAIL-Artifects/scalerdata_1099112583217984', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Scaler-DETAIL-Artifects/scalerdata_1099182362951380', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Scaler-DETAIL-Artifects/scalerdata_1099698097420154', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Scaler-DETAIL-Artifects/scalerdata_1099956187630596', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.vips_0/Scaler-DETAIL-Artifects/scalerdata_1100002948879490', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.x264_0/Scaler-DETAIL-Artifects/scalerdata_1100026471754668', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.httpd.httpd_2_4_54_0/Scaler-DETAIL-Artifects/scalerdata_1100053088973938', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.memcached.memcached_1_6_17_0/Scaler-DETAIL-Artifects/scalerdata_1100215829624386', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.mysql.mysql_8_0_31_0/Scaler-DETAIL-Artifects/scalerdata_1100366950684482', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.nginx.nginx_1_23_2_0/Scaler-DETAIL-Artifects/scalerdata_1100701228337962', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.redis.redis_7_0_4_0/Scaler-DETAIL-Artifects/scalerdata_1100850174945384', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.sqlite.sqlite_3_39_4_0/Scaler-DETAIL-Artifects/scalerdata_1101150204449494' +]: + analyzeOutlierRemovalTechnique(ROOT_PATH, methodPreEstimation) diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeParsecOutput.py b/Analyzer/PyVisualizer/src/V3/AnalyzeParsecOutput.py new file mode 100644 index 00000000..c8c15acd --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/AnalyzeParsecOutput.py @@ -0,0 +1,152 @@ +import os +import numpy as np +import tempfile +import subprocess + +''' +This script parses the output of parsecmgmt +''' + + +def findAppStart(lines): + benchmarkName = [] + outputTuple = [] + for id, line in enumerate(lines): + # Find start + if line.startswith('[PARSEC] [========== Running '): + benchmarkName.append(line.replace('[PARSEC] [========== Running benchmark', '') + .replace(' [1] ==========]', '').strip()) + outputTuple.append([id, 0]) + + curI = 0 + for id, line in enumerate(lines): + # Find start + if line.startswith('[PARSEC] [---------- End of output ----------]'): + outputTuple[curI][1] = id + curI += 1 + return benchmarkName, outputTuple + + +def extractRealTimeAndMemory(lines, outputTuple): + realTimeList = [] + memList = [] + for startI, endI in outputTuple: + lineSplit = lines[endI - 1].split(',') + realTIme = lineSplit[0].split(':')[1].strip() + memUsg = lineSplit[-1].split(':')[1].strip() + + realTimeList.append(float(realTIme)) + memList.append(float(memUsg)) + return np.array(realTimeList), np.array(memList) + + +def parseOutputFile(PARSEC_OUTPUT_FOLDER): + timeArray = None + memArray = None + benchmarkName = None + + for textFileName in os.listdir(PARSEC_OUTPUT_FOLDER): + with open(os.path.join(PARSEC_OUTPUT_FOLDER, textFileName)) as f: + assert (str.isdigit(textFileName[:-4])) # Make sure the first id is + lines = f.readlines() + benchmarkName, outputTuple = findAppStart(lines) + realTimeList, memList = extractRealTimeAndMemory(lines, outputTuple) + if timeArray is None: + timeArray = realTimeList + if memArray is None: + memArray = memList + timeArray = np.vstack([timeArray, realTimeList]) + memArray = np.vstack([memArray, memList]) + + return benchmarkName, memArray, timeArray + + +def printMatrix(matrixName, preloadList, benchmarkNameList, matrix): + print(matrixName) + print(' \t', end='') + for benchmarkName in benchmarkNameList: + print(benchmarkName, end='\t') + print() + + for row in range(matrix.shape[0]): + print(preloadList[row][0], end='\t') + for col in range(matrix.shape[1]): + print(matrix[row][col], end='\t') + print() + print() + + +def runBenchmark(runTimes, configNames, preloadList, inputType, threadNum, outputFolder): + for (preloadName, preloadCmd) in preloadList: + curOutputDir = os.path.join(outputFolder, preloadName) + os.mkdir(curOutputDir) + for curRunId in range(runTimes): + curOutputFile = os.path.join(curOutputDir, '%d.txt' % curRunId) + + cmd = 'parsecmgmt -a run' + cmd += ''.join([''.join([' -p ', config]) for config in configNames]) + if preloadCmd.strip() != '': + cmd += ''.join([' -s "', preloadCmd, '"']) + cmd += ''.join([' -n ', str(threadNum)]) + cmd += ''.join([' -i ', inputType]) + cmd += ''.join([' > ', curOutputFile]) + cmd += ''.join([' 2> ', curOutputFile]) + + print('Running:', cmd) + if os.system(cmd) != 0: + print(cmd, 'failed') + + +def packOutput(preloadList, outputFolder): + stdRealTimes = None + avgRealTimes = None + avgMem = None + stdMem = None + outputBenchNameList = None + for (preloadName, preloadCmd) in preloadList: + curOutputDir = os.path.join(outputFolder, preloadName) + ''' + Calculate mean and average of the results + ''' + outputBenchNameList, memArray, timeArray = parseOutputFile(curOutputDir) + if avgRealTimes is None: + avgRealTimes = np.average(timeArray, axis=0) + else: + avgRealTimes = np.vstack([avgRealTimes, np.average(timeArray, axis=0)]) + + if stdRealTimes is None: + stdRealTimes = np.std(timeArray, axis=0) + else: + stdRealTimes = np.vstack([stdRealTimes, np.std(timeArray, axis=0)]) + + if avgMem is None: + avgMem = np.average(memArray, axis=0) + else: + avgMem = np.vstack([avgMem, np.average(memArray, axis=0)]) + + if stdMem is None: + stdMem = np.std(memArray, axis=0) + else: + stdMem = np.vstack([stdMem, np.std(memArray, axis=0)]) + return avgRealTimes, stdRealTimes, avgMem, stdMem, outputBenchNameList + + +''' +Find max output id +''' + +TIME_COMMAND = "/usr/bin/time -f 'real:%e, user:%U, sys:%S, memKB:%M' " +RUNTIMES = 1 +CONFIG_NAMES = ['blackscholes', 'bodytrack', 'facesim', 'ferret', 'fluidanimate', 'freqmine', 'raytrace', 'swaptions', + 'vips', 'x264'] +# PRELOAD_LIST = [('Default', TIME_COMMAND), ('Perf', TIME_COMMAND + 'perf record -g -o perf.data')] +PRELOAD_LIST = [('Scaler', 'export LD_PRELOAD=/media/umass/datasystem/steven/Scaler/cmake-build-release/libHook-c/libScalerHook-HookAutoAsm-C.so')] + +outputFolder = tempfile.mkdtemp() +runBenchmark(RUNTIMES, CONFIG_NAMES, PRELOAD_LIST, 'simsmall', 64, outputFolder) + +avgRealTimes, stdRealTimes, avgMem, stdMem, outputBenchNameList = packOutput(PRELOAD_LIST, outputFolder) +printMatrix('Table of average runtime', PRELOAD_LIST, outputBenchNameList, avgRealTimes) +printMatrix('Table of std runtime', PRELOAD_LIST, outputBenchNameList, stdRealTimes) +printMatrix('Table of average memory', PRELOAD_LIST, outputBenchNameList, avgMem) +printMatrix('Table of std memory', PRELOAD_LIST, outputBenchNameList, stdMem) diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzePerfCounts.py b/Analyzer/PyVisualizer/src/V3/AnalyzePerfCounts.py new file mode 100644 index 00000000..d25df8a0 --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/AnalyzePerfCounts.py @@ -0,0 +1,108 @@ +import os +import traceback +from collections import defaultdict + +import matplotlib.pyplot as plt +import pandas as pd +import struct +import numpy as np +from multiprocessing import Pool +from multiprocessing import Pool, cpu_count +import time + + +class StackElem: + def __init__(self, libName, funcName): + self.libName = libName + self.funcName = funcName + + def __str__(self): + return self.libName + " " + self.funcName + + +def analyzePerfScript(rootPath): + callCountDict = defaultdict(int) + + lastStack = [] + curStack = [] + with open(rootPath, 'r') as f: + + firstLine = True + skipThisStak = False + while True: + + if not firstLine: + line = f.readline() + else: + line = '\n' + if line == '\n': + skipThisStak = False + firstLine = False + # print('New call stack') + tmp = f.readline() + + if not tmp: + break + + if tmp.split()[0].strip() == 'perf': + skipThisStak = True + # Calculate count based on call stack + for i in range(len(lastStack)): + if i >= len(curStack) or lastStack[i].funcName != curStack[i].funcName: + for j in range(i, len(lastStack)): + callCountDict[lastStack[j].libName] += 1 + break + + lastStack = curStack + curStack = [] + if not tmp: + break + continue + else: + if not skipThisStak: + line = line.strip().strip('\t').strip('\n') + addrEndI = line.find(' ') + addr = line[0:addrEndI] + libStartI = line.rfind(' ') + 1 + libName = line[libStartI:] + funcNameAddr = line[addrEndI:libStartI].strip() + if funcNameAddr != '[unknown]' and libName != '([kernel.kallsyms])': + # print(libName) + plutInd = funcNameAddr.rfind('+') + funcName = funcNameAddr[0:plutInd] + addr = funcNameAddr[plutInd + 1:] + curStack.insert(0, StackElem(libName[1:-1], funcName)) + return callCountDict + + +for ROOT_PATH in [ + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.vips_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.x264_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.db.memcached.memcached_1_6_17_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.db.mysql.mysql_8_0_31_0/Perf-Artifects/script.txt', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.db.redis.redis_7_0_4_0/Perf-Artifects/script.txt', + '/media/umass/datasystem/steven/Downloads/Perf-Sampling-At4000/Application.db.sqlite.sqlite_3_39_4_0/Perf-Artifects/script.txt', + '/media/umass/datasystem/steven/Downloads/Perf-Sampling-At4000/Application.webserver.httpd.httpd_2_4_54_0/Perf-Artifects/script.txt', + '/media/umass/datasystem/steven/Downloads/Perf-Sampling-At4000/Application.webserver.nginx.nginx_1_23_2_0/Perf-Artifects/script.txt', + # None, + # None, + # None, + # None, +]: + callDict = analyzePerfScript(ROOT_PATH) + print(ROOT_PATH) + itemList = list(callDict.items()) + itemList = sorted(itemList, key=lambda x: x[0]) + for libName, counts in itemList: + print(libName, counts, sep='\t') diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeTiming.py b/Analyzer/PyVisualizer/src/V3/AnalyzeTiming.py new file mode 100644 index 00000000..edf258d8 --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/AnalyzeTiming.py @@ -0,0 +1,103 @@ +import os +import traceback +from collections import defaultdict + +from matplotlib import cm +import matplotlib.pyplot as plt +import pandas as pd +import struct +import numpy as np +from multiprocessing import Pool +from multiprocessing import Pool, cpu_count +import time + +from Analyzer.PyVisualizer.src.V3.datastructure.Metric import Metric +from Analyzer.PyVisualizer.src.V3.util.Quantile import calcQuantile +from datastructure.TimingStruct import ArrayDescriptor +from util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming +from util.Parser.TimeOutputPrarser import readSymbolFiles, aggregatePerThreadArray, readTimingStruct +import numpy as np + + +def parsePerLibSelfTime(scalerDataFolder): + if scalerDataFolder is None: + print() + return + recInfo = readSymbolFiles(scalerDataFolder) + + totalTimingArr = None + totalVarianceArry = None + for threadId in recInfo.threadIdList: + curThreadRecArray = readTimingStruct(scalerDataFolder, threadId) + + cycles = np.array([rec.totalClockCycles for rec in curThreadRecArray]) + if totalTimingArr is None: + totalTimingArr = cycles + else: + totalTimingArr = np.vstack([totalTimingArr, cycles]) + + if len(totalTimingArr.shape) == 1: + totalTimingArr = np.reshape(totalTimingArr, (1, -1)) + + sortedNameCntTuple = [] + + for i in range(totalTimingArr.shape[1] - 1): + if np.sum(totalTimingArr[:, i] > 0): + sortedNameCntTuple.append((i, recInfo.symbolNameList[i], recInfo.symbolFileIdList[i], + recInfo.fileNameList[recInfo.symbolFileIdList[i]], + recInfo.realFileIdList[i], recInfo.fileNameList[recInfo.realFileIdList[i]], + np.sum(totalTimingArr[:, i]))) + + libFileDict = defaultdict(int) + + libFileDict = defaultdict(int) + + for symId, symName, invokerFIleId, invokerFileName, realFileId, realFileName, time in sorted(sortedNameCntTuple, + reverse=True, + key=lambda x: x[-1]): + libFileDict[realFileName] += time + + # timeList = list(libFileDict.items()) + # timeList = sorted(timeList, key=lambda x: x[0]) + # + # for name, time in timeList: + # print(name, time, sep='\t') + return libFileDict + + +def findScalerDataFolder(rootPath): + for root, dirs, files in os.walk(rootPath): + for dir in dirs: + if dir.startswith('scalerdata'): + return os.path.join(root, dir) + + +scalerDataFolders = '/media/umass/datasystem/steven/Downloads/accuracyTest/2022-12-08_06-26-18-Sampling-0B111' +print('Per-Lib infos') + +pathDict = defaultdict(list) + +for folderName in os.listdir(scalerDataFolders): + cache= folderName.split('.')[-1] + appName=cache[0:-2] + runTime=cache[-1:] + scalerDataFolder = findScalerDataFolder(os.path.join(scalerDataFolders, folderName)) + pathDict[appName].append(scalerDataFolder) + +rlt = {} +for appName, pathList in pathDict.items(): + rlt[appName] = defaultdict(Metric) + for path in pathList: + libFileDict = parsePerLibSelfTime(path) + for libName, time in libFileDict.items(): + rlt[appName][libName].append(time) + +# Sort Results +rlt = list(rlt.items()) +rlt = sorted(rlt, key=lambda x: x[0]) +for appName, libraryList in rlt: + libraryList = list(libraryList.items()) + libraryList = sorted(libraryList, key=lambda x: x[0]) + print(appName) + for libraryName, metric in libraryList: + print(libraryName, metric.mean(), metric.std(), metric.std() / metric.mean(), sep='\t') diff --git a/Analyzer/PyVisualizer/src/V3/PlotInvocation.py b/Analyzer/PyVisualizer/src/V3/PlotInvocation.py new file mode 100644 index 00000000..cac813ce --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/PlotInvocation.py @@ -0,0 +1,120 @@ +import os +import shutil +import traceback + +import matplotlib.pyplot as plt +import pandas as pd +import struct +import numpy as np +from multiprocessing import Pool +from multiprocessing import Pool, cpu_count +import time + +from Analyzer.PyVisualizer.src.V3.datastructure.TimingStruct import ArrayDescriptor +from Analyzer.PyVisualizer.src.V3.util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming +from Analyzer.PyVisualizer.src.V3.util.Parser.TimeOutputPrarser import readSymbolFiles + + +def shouldPlt(timingArr): + return timingArr.shape[0] > 500 + + +def saveSinglePlot(ROOT_PATH, symbolNameList, threadId, tgtSymId, graphType='hist'): + # hasPoints = False + # ROOT_PATH: str, symbolNameList: list, threadId: str, tgtSymIds: list): + detailedTimingDict = parseSingleSymDetailedTiming(ROOT_PATH, threadId, [tgtSymId]) + + for symId, detailedTimingArr in detailedTimingDict.items(): + fig, axes = plt.subplots(nrows=1,ncols=2) + ax1, ax2 = axes + + if not shouldPlt(detailedTimingArr): + continue + + if graphType == 'hist': + ax1.hist(detailedTimingArr, range=(1, detailedTimingArr.max()), edgecolor="black", bins=50) + # print(os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId))) + # if hasPoints: + elif graphType == 'scatter': + ax1.scatter(np.arange(detailedTimingArr.shape[0]), detailedTimingArr, s=10) + else: + assert (False) + + if not os.path.exists(os.path.join(ROOT_PATH, 'DetailedTime', graphType, symbolNameList[symId])): + os.makedirs(os.path.join(ROOT_PATH, 'DetailedTime', graphType, symbolNameList[symId]), exist_ok=True) + fig.savefig( + os.path.join(ROOT_PATH, 'DetailedTime', graphType, symbolNameList[symId], + 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId))) + # print(os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId))) + plt.close(fig) + return 0 + + +def error_callback(e): + print('error') + print(dir(e), "\n") + print("-->{}<--".format(e.__cause__)) + + +def doIt(ROOT_PATH, pool, rltList): + print('========================', ROOT_PATH) + allFiles = os.listdir(ROOT_PATH) + symbolNum = 0 + + recInfo = readSymbolFiles(ROOT_PATH) + threadSymInfo = dict({}) # Threadid : symbol size + for fileName in allFiles: + if fileName.startswith('threadDetailedTiming') and fileName.endswith('.bin'): + _, threadId = fileName.replace('.bin', '').split('_') + with open(os.path.join(ROOT_PATH, fileName), 'rb') as f: + symDetailedTimingDesc = ArrayDescriptor() + f.readinto(symDetailedTimingDesc) + assert (symDetailedTimingDesc.arrayElemSize == 0) + assert (symDetailedTimingDesc._magicNum == 167) + symbolNum = symDetailedTimingDesc.arraySize + threadSymInfo[threadId] = symbolNum + + for symId in range(symbolNum): + res = pool.apply_async(saveSinglePlot, + args=[ROOT_PATH, recInfo.symbolNameList, threadId, symId, 'scatter'], + error_callback=error_callback) + rltList.append(res) + + return rltList + + +pool = Pool(64) +rltList = [] +scalerDataFolders = [ + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DETAIL-Artifects/scalerdata_1098304170115468', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Scaler-DETAIL-Artifects/scalerdata_1098358945736648', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Scaler-DETAIL-Artifects/scalerdata_1098456760642602', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Scaler-DETAIL-Artifects/scalerdata_1098609218045480', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Scaler-DETAIL-Artifects/scalerdata_1098633517549600', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Scaler-DETAIL-Artifects/scalerdata_1098896081465298', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Scaler-DETAIL-Artifects/scalerdata_1107220283374448', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Scaler-DETAIL-Artifects/scalerdata_1099112583217984', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Scaler-DETAIL-Artifects/scalerdata_1099182362951380', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Scaler-DETAIL-Artifects/scalerdata_1099698097420154', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Scaler-DETAIL-Artifects/scalerdata_1099956187630596', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.vips_0/Scaler-DETAIL-Artifects/scalerdata_1100002948879490', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.x264_0/Scaler-DETAIL-Artifects/scalerdata_1100026471754668', + # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.httpd.httpd_2_4_54_0/Scaler-DETAIL-Artifects/scalerdata_1100053088973938', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.memcached.memcached_1_6_17_0/Scaler-DETAIL-Artifects/scalerdata_1100215829624386', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.mysql.mysql_8_0_31_0/Scaler-DETAIL-Artifects/scalerdata_1100366950684482', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.nginx.nginx_1_23_2_0/Scaler-DETAIL-Artifects/scalerdata_1100701228337962', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.redis.redis_7_0_4_0/Scaler-DETAIL-Artifects/scalerdata_1100850174945384', + '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.sqlite.sqlite_3_39_4_0/Scaler-DETAIL-Artifects/scalerdata_1101150204449494' +] + +for ROOT_PATH in scalerDataFolders: + if ROOT_PATH is not None: + recInfo = readSymbolFiles(ROOT_PATH) + doIt(ROOT_PATH, pool, rltList) + +pool.close() +while len(rltList) > 0: + time.sleep(2) + rltList = [rlt for rlt in rltList if not rlt.ready()] + print("%d jobs left" % len(rltList)) +pool.join() diff --git a/Analyzer/PyVisualizer/src/V3/main.py b/Analyzer/PyVisualizer/src/V3/XFAVisualization.py similarity index 56% rename from Analyzer/PyVisualizer/src/V3/main.py rename to Analyzer/PyVisualizer/src/V3/XFAVisualization.py index 39ac125a..536019d3 100644 --- a/Analyzer/PyVisualizer/src/V3/main.py +++ b/Analyzer/PyVisualizer/src/V3/XFAVisualization.py @@ -3,23 +3,36 @@ import pandas as pd import struct import re + +from util.Analyzer.XFA import generateXFAStruct from datastructure.TimingStruct import FileRecord, RecTuple -from preProcessing import aggregatePerThreadArray, generateTimingStruct, calcPercentage, readSymbolFiles +from util.Parser.TimeOutputPrarser import aggregatePerThreadArray, readSymbolFiles # scalerDataFolder = '/media/umass/datasystem/steven/benchmark/parsec/tests/dedup/scalerdata_30414326191467414' -scalerDataFolder = '/media/umass/datasystem/steven/intel/Perf_Scaler-Parsec-Callgraph-Sig2022Fall/x264/scalerdata_12852017355851478_FGDS' +scalerDataFolder = '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-12-07_20-11-36-EffImp/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DBG-Artifects/scalerdata_1120018768482198' recInfo = readSymbolFiles(scalerDataFolder) realFileId = None aggregatedTimeArray, aggregatedStartingTime = aggregatePerThreadArray(scalerDataFolder, recInfo) + +for i, v in enumerate(aggregatedTimeArray): + if v.count > 0: + curRealFileId=recInfo.realFileIdList[i] + if curRealFileId==len(recInfo.fileNameList): + curRealFileId=len(recInfo.fileNameList)-1 + print(recInfo.symbolNameList[i], recInfo.fileNameList[curRealFileId], v.count, sep='\t') + # Generate graph -timingRecord = generateTimingStruct(list(aggregatedTimeArray), aggregatedStartingTime, recInfo) +timingRecord = generateXFAStruct(list(aggregatedTimeArray), aggregatedStartingTime, recInfo) print(timingRecord) +for time in timingRecord: + print(time.fileName,time.selfClockCycles.value,sep='\t') + # totalSelfTime = 0 # for fileRec in timingRecord: # if fileRec.selfClockCycles.value<0: diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/Metric.py b/Analyzer/PyVisualizer/src/V3/datastructure/Metric.py new file mode 100644 index 00000000..29bba7a7 --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/datastructure/Metric.py @@ -0,0 +1,16 @@ +import numpy as np + + +class Metric: + def __init__(self): + self.metricList = [] + + def append(self, *args, **kwargs): + self.metricList.append(*args, **kwargs) + + def std(self): + return np.std(np.array(self.metricList)) + + def mean(self): + return np.mean(np.array(self.metricList)) + diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py b/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py index 16b91306..e909c723 100644 --- a/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py +++ b/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py @@ -1,6 +1,10 @@ from collections import defaultdict from ctypes import * +''' +Display related parameter: +''' + class VPG: def __init__(self): @@ -63,16 +67,6 @@ def __str__(self): return str(self.fileName) -class RecTuple(Structure): - _fields_ = [ - ('totalClockCycles', c_uint64), - ('count', c_int64), - ('_gap', c_uint32), - ('_meanClockTick', c_float), - ('_durThreshold', c_uint32), - ('_flags', c_uint32)] - - class RecordingInfo: ''' Used to share common information needed in processing between different functions @@ -86,3 +80,39 @@ def __init__(self): self.symbolNameList = [] self.symbolFileIdList = [] self.symIdInFileList = [] + + +''' +C data structures + +These structs should be consistent with https://github.com/UTSASRG/Scaler/blob/feature-tuneParm/libHook-c/src/include/type/RecTuple.h +''' + +class RecTuple(Structure): + _fields_ = [ + ('totalClockCycles', c_uint64), + ('count', c_int64), + ('_prevCount', c_int64), + ('_gap', c_uint32), + ('_meanClockTick', c_float), + ('_durThreshold', c_uint32), + ('_flags', c_uint32)] + + +class ArrayDescriptor(Structure): + _fields_ = [ + ('arrayElemSize', c_uint64), + ('arraySize', c_uint64), + ('_magicNum', c_uint8)] + + +class DetailedTimingDescriptor(Structure): + _fields_ = [ + ('timingSize', c_int64)] + + +class ThreadCreatorInfo(Structure): + _fields_ = [ + ('threadCreatorFileId', c_int64), + ('threadExecutionCycles', c_int64), + ('_magicNum', c_uint8)] diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/TimingStruct.cpython-36.pyc b/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/TimingStruct.cpython-36.pyc deleted file mode 100644 index d4d54b81..00000000 Binary files a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/TimingStruct.cpython-36.pyc and /dev/null differ diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/__init__.cpython-36.pyc b/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 05c6d129..00000000 Binary files a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/Analyzer/PyVisualizer/src/V3/pythonmp.py b/Analyzer/PyVisualizer/src/V3/pythonmp.py deleted file mode 100644 index c10fc4f9..00000000 --- a/Analyzer/PyVisualizer/src/V3/pythonmp.py +++ /dev/null @@ -1,18 +0,0 @@ -import multiprocessing - - -def do(): - print("Foobar", flush=True) - raise Exception() -def asdf(): - - with multiprocessing.Pool(1) as pool: - for i in range(5): - result = pool.apply_async(do) - - result.get() - - pool.close() - pool.join() - -asdf() \ No newline at end of file diff --git a/Analyzer/PyVisualizer/src/V3/testBinSearch.py b/Analyzer/PyVisualizer/src/V3/testBinSearch.py new file mode 100644 index 00000000..b4d9f80b --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/testBinSearch.py @@ -0,0 +1,30 @@ +A = [1, 2, 3, 4, 4, 5] + + +def binSearch(A, tgt): + """ + Return lower bound of the segment array + :param A: Array + :param tgt: Target + :return: Lower bound of segments in the segment array + """ + lo = 0 + hi = len(A) + md = 0 + while lo < hi: + md = lo + (hi - lo) // 2 + if A[md] < tgt: + lo = md + 1 + elif A[md] > tgt: + hi = md + elif A[md] == tgt: + hi = md + + return lo-1 + + +testList = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5] +expectedVal = [-1, 0, 1, 2, 4, 5] +for i in range(len(testList)): + lo = binSearch(A, testList[i]) + print(testList[i], lo) diff --git a/Analyzer/PyVisualizer/src/V3/preProcessing.py b/Analyzer/PyVisualizer/src/V3/util/Analyzer/XFA.py similarity index 57% rename from Analyzer/PyVisualizer/src/V3/preProcessing.py rename to Analyzer/PyVisualizer/src/V3/util/Analyzer/XFA.py index 5ad6144d..fd550886 100644 --- a/Analyzer/PyVisualizer/src/V3/preProcessing.py +++ b/Analyzer/PyVisualizer/src/V3/util/Analyzer/XFA.py @@ -1,110 +1,7 @@ -import math -import os -import re -import struct -from datastructure.TimingStruct import FileRecord, RecTuple, RecordingInfo, RecordingInfo -import pandas as pd -from collections import defaultdict - - -def readSymbolFiles(scalerDataFolder): - rlt = RecordingInfo() - - df = pd.read_csv(os.path.join(scalerDataFolder, 'fileName.txt')) - rlt.fileNameList = df['pathName'].to_list() - rlt.pthreadFileId = parsePthreadId(rlt.fileNameList) - - for fileName in os.listdir(scalerDataFolder): - if fileName.startswith('threadTiming_'): - rlt.threadIdList.append(fileName[len('threadTiming_'): -4]) - - with open(os.path.join(scalerDataFolder, 'realFileId.bin'), 'rb') as f: - byteArr1 = f.read() - arraySize = struct.unpack_from('Q', byteArr1, 0) # The first element is the array size - rlt.realFileIdList = list(struct.unpack_from('<%dQ' % (arraySize), byteArr1, - 8)) # The last id marks the creator thread - df = pd.read_csv(os.path.join(scalerDataFolder, 'symbolInfo.txt')) - rlt.symbolNameList = df['funcName'].to_list() - rlt.symbolFileIdList = df['fileId'].to_list() - rlt.symIdInFileList = df['symIdInFile'].to_list() - return rlt - - -def readTimingStruct(threadFileFullPath): - recDataArr = [] - recTupleSize = 8 + 8 + 4 + 4 + 4 + 4 - with open(threadFileFullPath, 'rb') as f: - byteArr = f.read() - mainFileId, recArrSize = struct.unpack_from('qq', byteArr, 0) # 16 bytes - f.seek(16) - - for i in range(recArrSize): - curRecFormat = RecTuple() - f.readinto(curRecFormat) - recDataArr.append(curRecFormat) - # assert (len(symbolNameList) == recArrSize - 1) - assert (len(recDataArr) == recArrSize) - return recDataArr - - -def aggregatePerThreadArray(scalerDataFolder, recInfo: RecordingInfo): - """ - - Aggregate per-thread timing data into one using simple addition and return as the first return value - - The last element in each RecTuple records how much time the thread takes to execute so we should not aggregate them - together. Instead, we collect them into one list and return as the second parameter - - :param scalerDataFolder: Scaler output data folder - :param threadIdList: A list of thread ids - :return aggregatedTimeArray: Aggregated counting and timing information - :return startingInfoArray: Information about thread creator. This value is used in time aggregation steps - """ - api = 0 - fgdsApi = 0 - - aggregatedTimeArray = [] - aggregatedStartingTime = defaultdict( - lambda: 0) # Map fileId and starting time. Thread may created by modules other than the main application - for threadId in recInfo.threadIdList: - curThreadRecArray = readTimingStruct(os.path.join(scalerDataFolder, 'threadTiming_%s.bin' % threadId)) - aggregatedStartingTime[curThreadRecArray[-1]._flags] += curThreadRecArray[-1].totalClockCycles - # print(curThreadRecArray[-1].totalClockCycles) - - for i, curRec in enumerate(curThreadRecArray[:-1]): - if curRec._flags & (1 << 0): - fgdsApi += 1 - api += 1 - # if curRec.count>0: - # print('totalCount',totalCount,curRec.count) - if len(curThreadRecArray) != len(aggregatedTimeArray) + 1: - # First time - aggregatedTimeArray = curThreadRecArray[:-1].copy() - else: - for i, curRec in enumerate(curThreadRecArray[:-1]): - aggregatedTimeArray[i].count += curRec.count - # if recInfo.symbolNameList[i] == 'pthread_join': - # print('Skip pthread_join') - # continue - - if aggregatedTimeArray[i]._flags & (1 << 0): - # Use mean and count to estimate total clock cycles - aggregatedTimeArray[i].totalClockCycles += int(curRec.count * curRec._meanClockTick) - else: - aggregatedTimeArray[i].totalClockCycles += curRec.totalClockCycles - print('fgdsapi/api=', round(fgdsApi / api*100,2), 'fgdsCount/TotalCount=', round(fgdsCount / totalCount*100,2), sep='\t') - return aggregatedTimeArray, aggregatedStartingTime - - -pthreadFileRegex = re.compile(r'libpthread-.*\.so$') - - -def parsePthreadId(fileNameList): - for i, fileName in enumerate(fileNameList): - if len(pthreadFileRegex.findall(fileName)) != 0: - return i - raise Exception('Cannot find pthread library in fileList') +from datastructure.TimingStruct import RecordingInfo, FileRecord -def generateTimingStruct(aggregatedTimeEntries, aggregatedStartingTime, recInfo: RecordingInfo): +def generateXFAStruct(aggregatedTimeEntries, aggregatedStartingTime, recInfo: RecordingInfo): timingRecord = [] # Map file name to FileRecord struct mainFileId = None @@ -195,7 +92,7 @@ def calcPercentage(timingRecord, programRuntime, totalApiCallCount): else: curExtFileRecord.counts.globalPercent = 0.0 - if curFileRecord.childrenClockCycles.value > 0: + if curFileRecord.selfClockCycles.value + curFileRecord.childrenClockCycles.value > 0: curExtFileRecord.totalClockCycles.parentPercent = curExtFileRecord.totalClockCycles.value / ( curFileRecord.selfClockCycles.value + curFileRecord.childrenClockCycles.value) else: diff --git a/Analyzer/PyVisualizer/src/V3/util/Parser/DetailedTimeOutputPrarser.py b/Analyzer/PyVisualizer/src/V3/util/Parser/DetailedTimeOutputPrarser.py new file mode 100644 index 00000000..a87ef2af --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/util/Parser/DetailedTimeOutputPrarser.py @@ -0,0 +1,55 @@ +import os +import traceback + +import matplotlib.pyplot as plt +import pandas as pd +import struct +import numpy as np +from multiprocessing import Pool +from multiprocessing import Pool, cpu_count +import time + +from datastructure.TimingStruct import ArrayDescriptor + + +def parseSingleSymDetailedTiming(ROOT_PATH: str, threadId: str, tgtSymIds: list): + # hasPoints = False + if tgtSymIds: + tgtSymIds = sorted(tgtSymIds) + + rlt = {} # SymId, timingArray + with open(os.path.join(ROOT_PATH, 'threadDetailedTiming_%s.bin' % (threadId)), 'rb') as f: + # Get the amount of symbols in this file + arrayDesc = ArrayDescriptor() + f.readinto(arrayDesc) + assert (arrayDesc.arrayElemSize == 0) + assert (arrayDesc._magicNum == 167) + + if tgtSymIds is None: + tgtSymIds = range(arrayDesc.arraySize) + assert (tgtSymIds[-1] < arrayDesc.arraySize) + + detailedTimingForCurSym = None + for curSymId in range(arrayDesc.arraySize): + symDetailedTimingDesc = ArrayDescriptor() + f.readinto(symDetailedTimingDesc) + + assert (symDetailedTimingDesc.arrayElemSize == 8) + assert (symDetailedTimingDesc._magicNum == 167) + if curSymId < tgtSymIds[0]: + # Only read specified symbol + f.seek(symDetailedTimingDesc.arraySize * symDetailedTimingDesc.arrayElemSize, os.SEEK_CUR) + continue + elif curSymId == tgtSymIds[0]: + detailedTimingForCurSym = np.array(struct.unpack_from('<%dQ' % (symDetailedTimingDesc.arraySize), + f.read(symDetailedTimingDesc.arraySize * + symDetailedTimingDesc.arrayElemSize))) + rlt[curSymId] = detailedTimingForCurSym + tgtSymIds = tgtSymIds[1:] + if len(tgtSymIds) == 0: + break + else: + # There are duplicate or negative values inside tgtSymIds + assert (False) + # assert (len(tgtSymIds) == 0) + return rlt diff --git a/Analyzer/PyVisualizer/src/V3/util/Parser/TimeOutputPrarser.py b/Analyzer/PyVisualizer/src/V3/util/Parser/TimeOutputPrarser.py new file mode 100644 index 00000000..eb8d1bb6 --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/util/Parser/TimeOutputPrarser.py @@ -0,0 +1,120 @@ +import math +import os +import re +import struct +from datastructure.TimingStruct import FileRecord, RecTuple, RecordingInfo, RecordingInfo, ArrayDescriptor, \ + ThreadCreatorInfo +import pandas as pd +from collections import defaultdict + + +def readSymbolFiles(scalerDataFolder): + if scalerDataFolder is None: + print() + return + rlt = RecordingInfo() + + df = pd.read_csv(os.path.join(scalerDataFolder, 'fileName.txt')) + rlt.fileNameList = df['pathName'].to_list() + rlt.pthreadFileId = parsePthreadId(rlt.fileNameList) + + for fileName in os.listdir(scalerDataFolder): + if fileName.startswith('threadTiming_'): + rlt.threadIdList.append(fileName[len('threadTiming_'): -4]) + + with open(os.path.join(scalerDataFolder, 'realFileId.bin'), 'rb') as f: + arrDesc = ArrayDescriptor() + f.readinto(arrDesc) + assert (arrDesc._magicNum == 167) + assert (arrDesc.arrayElemSize == 8) + rlt.realFileIdList = list( + struct.unpack_from('<%dQ' % (arrDesc.arraySize), f.read(arrDesc.arrayElemSize * arrDesc.arraySize))) + assert (f.read() == b'') # Make sure this is the end + df = pd.read_csv(os.path.join(scalerDataFolder, 'symbolInfo.txt')) + rlt.symbolNameList = df['funcName'].to_list() + rlt.symbolFileIdList = df['fileId'].to_list() + rlt.symIdInFileList = df['symIdInFile'].to_list() + return rlt + + +def readTimingStruct(ROOT_PATH,threadId): + recDataArr = [] + + with open(os.path.join(ROOT_PATH, 'threadTiming_%s.bin' % threadId), 'rb') as f: + threadCreatorInfo = ThreadCreatorInfo() + f.readinto(threadCreatorInfo) + assert (threadCreatorInfo._magicNum == 167) + + arrayDescriptor = ArrayDescriptor() + f.readinto(arrayDescriptor) + assert (arrayDescriptor._magicNum == 167) + + for i in range(arrayDescriptor.arraySize): + curRecFormat = RecTuple() + f.readinto(curRecFormat) + recDataArr.append(curRecFormat) + + # assert (len(symbolNameList) == recArrSize - 1) + return recDataArr + + +def aggregatePerThreadArray(scalerDataFolder, recInfo: RecordingInfo): + """ + - Aggregate per-thread timing data into one using simple addition and return as the first return value + - The last element in each RecTuple records how much time the thread takes to execute so we should not aggregate them + together. Instead, we collect them into one list and return as the second parameter + + :param scalerDataFolder: Scaler output data folder + :param threadIdList: A list of thread ids + :return aggregatedTimeArray: Aggregated counting and timing information + :return startingInfoArray: Information about thread creator. This value is used in time aggregation steps + """ + api = 0 + fgdsApi = 0 + fgdsCount = 0 + totalCount = 0 + + aggregatedTimeArray = [] + aggregatedStartingTime = defaultdict( + lambda: 0) # Map fileId and starting time. Thread may created by modules other than the main application + for threadId in recInfo.threadIdList: + curThreadRecArray = readTimingStruct(scalerDataFolder,threadId) + aggregatedStartingTime[curThreadRecArray[-1]._flags] += curThreadRecArray[-1].totalClockCycles + # print(curThreadRecArray[-1].totalClockCycles) + + for i, curRec in enumerate(curThreadRecArray[:-1]): + if curRec._flags & (1 << 0): + fgdsApi += 1 + fgdsCount += curRec.count + api += 1 + totalCount += curRec.count + # if curRec.count>0: + # print('totalCount',totalCount,curRec.count) + if len(curThreadRecArray) != len(aggregatedTimeArray) + 1: + # First time + aggregatedTimeArray = curThreadRecArray[:-1].copy() + else: + for i, curRec in enumerate(curThreadRecArray[:-1]): + aggregatedTimeArray[i].count += curRec.count + # if recInfo.symbolNameList[i] == 'pthread_join': + # print('Skip pthread_join') + # continue + + if aggregatedTimeArray[i]._flags & (1 << 0): + # Use mean and count to estimate total clock cycles + aggregatedTimeArray[i].totalClockCycles += int(curRec.count * curRec._meanClockTick) + else: + aggregatedTimeArray[i].totalClockCycles += curRec.totalClockCycles + print('fgdsapi/api=', round(fgdsApi / api * 100, 2), 'fgdsCount/TotalCount=', + round(fgdsCount / totalCount * 100, 2), sep='\t') + return aggregatedTimeArray, aggregatedStartingTime + + +pthreadFileRegex = re.compile(r'libpthread-.*\.so$') + + +def parsePthreadId(fileNameList): + for i, fileName in enumerate(fileNameList): + if len(pthreadFileRegex.findall(fileName)) != 0: + return i + raise Exception('Cannot find pthread library in fileList') diff --git a/Analyzer/PyVisualizer/src/V3/util/Quantile.py b/Analyzer/PyVisualizer/src/V3/util/Quantile.py new file mode 100644 index 00000000..0c0d11a6 --- /dev/null +++ b/Analyzer/PyVisualizer/src/V3/util/Quantile.py @@ -0,0 +1,13 @@ +import numpy as np + + +def calcQuantile(array): + min = np.min(array) + q1 = np.quantile(array, 0.25) + q2 = np.quantile(array, 0.5) + q3 = np.quantile(array, 0.75) + q4 = np.quantile(array, 1) + iqr = q3 - q1 + qLower = q1 - 1.5 * iqr + qUpper = q3 + 1.5 * iqr + return min, q1, q2, q3, q4, iqr, qLower, qUpper diff --git a/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl b/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl index f6427d07..336e6c38 100644 --- a/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl +++ b/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl @@ -62,7 +62,6 @@ # # 02-Mar-2012 Brendan Gregg Created this. # 02-Jul-2014 " " Added process name to stacks. -# 08-Jun-2021 John Diep edited script to force shared object names to be reported always use strict; use Getopt::Long; @@ -89,46 +88,83 @@ sub remember_stack { my $show_inline = 0; my $show_context = 0; + +my $srcline_in_input = 0; # if there are extra lines with source location (perf script -F+srcline) GetOptions('inline' => \$show_inline, - 'context' => \$show_context, - 'pid' => \$include_pid, - 'kernel' => \$annotate_kernel, - 'jit' => \$annotate_jit, - 'all' => \$annotate_all, - 'tid' => \$include_tid, - 'addrs' => \$include_addrs, - 'event-filter=s' => \$event_filter) +'context' => \$show_context, +'srcline' => \$srcline_in_input, +'pid' => \$include_pid, +'kernel' => \$annotate_kernel, +'jit' => \$annotate_jit, +'all' => \$annotate_all, +'tid' => \$include_tid, +'addrs' => \$include_addrs, +'event-filter=s' => \$event_filter) or die < outfile\n - --pid # include PID with process names [1] - --tid # include TID and PID with process names [1] - --inline # un-inline using addr2line - --all # all annotations (--kernel --jit) - --kernel # annotate kernel functions with a _[k] - --jit # annotate jit functions with a _[j] - --context # adds source context to --inline - --addrs # include raw addresses where symbols can't be found - --event-filter=EVENT # event name filter\n +--pid # include PID with process names [1] +--tid # include TID and PID with process names [1] +--inline # un-inline using addr2line +--all # all annotations (--kernel --jit) +--kernel # annotate kernel functions with a _[k] +--jit # annotate jit functions with a _[j] +--context # adds source context to --inline +--srcline # parses output of 'perf script -F+srcline' and adds source context +--addrs # include raw addresses where symbols can't be found +--event-filter=EVENT # event name filter\n [1] perf script must emit both PID and TIDs for these to work; eg, Linux < 4.1: - perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace - for Linux >= 4.1: - perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace - If you save this output add --header on Linux >= 3.14 to include perf info. +perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace +for Linux >= 4.1: +perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace +If you save this output add --header on Linux >= 3.14 to include perf info. USAGE_END if ($annotate_all) { - $annotate_kernel = $annotate_jit = 1; +$annotate_kernel = $annotate_jit = 1; +} + +my %inlineCache; + +my %nmCache; + +sub inlineCacheAdd { + my ($pc, $mod, $result) = @_; + if (defined($inlineCache{$pc})) { + $inlineCache{$pc}{$mod} = $result; + } else { + $inlineCache{$pc} = {$mod => $result}; + } } # for the --inline option sub inline { - my ($pc, $mod) = @_; +my ($pc, $rawfunc, $mod) = @_; - # capture addr2line output - my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`; +return $inlineCache{$pc}{$mod} if defined($inlineCache{$pc}{$mod}); - # remove first line - $a2l_output =~ s/^(.*\n){1}//; +# capture addr2line output +my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`; + +# remove first line +$a2l_output =~ s/^(.*\n){1}//; + +if ($a2l_output =~ /\?\?\n\?\?:0/) { +# if addr2line fails and rawfunc is func+offset, then fall back to it +if ($rawfunc =~ /^(.+)\+0x([0-9a-f]+)$/) { + my $func = $1; + my $addr = hex $2; + + $nmCache{$mod}=`nm $mod` unless defined $nmCache{$mod}; + + if ($nmCache{$mod} =~ /^([0-9a-f]+) . \Q$func\E$/m) { + my $base = hex $1; + my $newPc = sprintf "0x%x", $base+$addr; + my $result = inline($newPc, '', $mod); + inlineCacheAdd($pc, $mod, $result); + return $result; + } + } + } my @fullfunc; my $one_item = ""; @@ -150,13 +186,18 @@ sub inline { } } - return join(";", @fullfunc); + my $result = join ";" , @fullfunc; + + inlineCacheAdd($pc, $mod, $result); + + return $result; } my @stack; my $pname; my $m_pid; my $m_tid; +my $m_period; # # Main loop @@ -192,7 +233,7 @@ sub inline { unshift @stack, ""; } } - remember_stack(join(";", @stack), 1) if @stack; + remember_stack(join(";", @stack), $m_period) if @stack; undef @stack; undef $pname; next; @@ -203,21 +244,22 @@ sub inline { # if (/^(\S.+?)\s+(\d+)\/*(\d+)*\s+/) { # default "perf script" output has TID but not PID - # eg, "java 25607 4794564.109216: cycles:" - # eg, "java 12688 [002] 6544038.708352: cpu-clock:" - # eg, "V8 WorkerThread 25607 4794564.109216: cycles:" - # eg, "java 24636/25607 [000] 4794564.109216: cycles:" - # eg, "java 12688/12764 6544038.708352: cpu-clock:" - # eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: cycles:" + # eg, "java 25607 4794564.109216: 1 cycles:" + # eg, "java 12688 [002] 6544038.708352: 235 cpu-clock:" + # eg, "V8 WorkerThread 25607 4794564.109216: 104345 cycles:" + # eg, "java 24636/25607 [000] 4794564.109216: 1 cycles:" + # eg, "java 12688/12764 6544038.708352: 10309278 cpu-clock:" + # eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: 100 cycles:" # other combinations possible - my ($comm, $pid, $tid) = ($1, $2, $3); + my ($comm, $pid, $tid, $period) = ($1, $2, $3, ""); if (not $tid) { $tid = $pid; $pid = "?"; } - if (/(\S+):\s*$/) { - my $event = $1; + if (/:\s*(\d+)*\s+(\S+):\s*$/) { + $period = $1; + my $event = $2; if ($event_filter eq "") { # By default only show events of the first encountered @@ -237,7 +279,10 @@ sub inline { } } - ($m_pid, $m_tid) = ($pid, $tid); + if (not $period) { + $period = 1 + } + ($m_pid, $m_tid, $m_period) = ($pid, $tid, $period); if ($include_tid) { $pname = "$comm-$m_pid/$m_tid"; @@ -257,18 +302,25 @@ sub inline { my ($pc, $rawfunc, $mod) = ($1, $2, $3); + if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) { + my $inlineRes = inline($pc, $rawfunc, $mod); + # - empty result this happens e.g., when $mod does not exist or is a path to a compressed kernel module + # if this happens, the user will see error message from addr2line written to stderr + # - if addr2line results in "??" , then it's much more sane to fall back than produce a '??' in graph + if($inlineRes ne "" and $inlineRes ne "??" and $inlineRes ne "??:??:0" ) { + unshift @stack, $inlineRes; + next; + } + } + # Linux 4.8 included symbol offsets in perf script output by default, eg: # 7fffb84c9afc cpu_startup_entry+0x800047c022ec ([kernel.kallsyms]) # strip these off: $rawfunc =~ s/\+0x[\da-f]+$//; - if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) { - unshift @stack, inline($pc, $mod); - next; - } - next if $rawfunc =~ /^\(/; # skip process names + my $is_unknown=0; my @inline; for (split /\->/, $rawfunc) { my $func = $_; @@ -279,6 +331,7 @@ sub inline { $func =~ s/.*\///; } else { $func = "unknown"; + $is_unknown=1; } if ($include_addrs) { @@ -320,7 +373,7 @@ sub inline { # # detect inlined from the @inline array # detect kernel from the module name; eg, frames to parse include: - # ffffffff8103ce3b native_safe_halt ([kernel.kallsyms]) + # ffffffff8103ce3b native_safe_halt ([kernel.kallsyms]) # 8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux) # 7d8 ipv4_conntrack_local+0x7f8f80b8 ([nf_conntrack_ipv4]) # detect jit from the module name; eg: @@ -332,15 +385,42 @@ sub inline { } elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) { $func .= "_[j]"; # jitted } - else { - if ($mod ne "[unknown]") { - $mod =~ s/.*\///; - $mod = "\[$mod\]"; - } - if ($func ne $mod) { - $func .= " " . $mod; - } + + # + # Source lines + # + # + # Sample outputs: + # | a.out 35081 252436.005167: 667783 cycles: + # | 408ebb some_method_name+0x8b (/full/path/to/a.out) + # | uniform_int_dist.h:300 + # | 4069f5 main+0x935 (/full/path/to/a.out) + # | file.cpp:137 + # | 7f6d2148eb25 __libc_start_main+0xd5 (/lib64/libc-2.33.so) + # | libc-2.33.so[27b25] + # + # | a.out 35081 252435.738165: 306459 cycles: + # | 7f6d213c2750 [unknown] (/usr/lib64/libkmod.so.2.3.6) + # | libkmod.so.2.3.6[6750] + # + # | a.out 35081 252435.738373: 315813 cycles: + # | 7f6d215ca51b __strlen_avx2+0x4b (/lib64/libc-2.33.so) + # | libc-2.33.so[16351b] + # | 7ffc71ee9580 [unknown] ([unknown]) + # | + # + # | a.out 35081 252435.718940: 247984 cycles: + # | ffffffff814f9302 up_write+0x32 ([kernel.kallsyms]) + # | [kernel.kallsyms][ffffffff814f9302] + if($srcline_in_input and not $is_unknown){ + $_ = <>; + chomp; + s/\[.*?\]//g; + s/^\s*//g; + s/\s*$//g; + $func.=':'.$_ unless $_ eq ""; } + push @inline, $func; } diff --git a/benchmarktookit b/benchmarktookit new file mode 160000 index 00000000..c1ff8fb5 --- /dev/null +++ b/benchmarktookit @@ -0,0 +1 @@ +Subproject commit c1ff8fb5dcc7e62628a9c5d4fd8e8e858ae8ab00 diff --git a/libHook-c/src/ExtFuncCallHook.cpp b/libHook-c/src/ExtFuncCallHook.cpp index abd46bbb..c545bb27 100644 --- a/libHook-c/src/ExtFuncCallHook.cpp +++ b/libHook-c/src/ExtFuncCallHook.cpp @@ -169,7 +169,8 @@ namespace scaler { Elf64_Word type; Elf64_Word bind; parser.getExtSymbolInfo(i, funcName, bind, type); - if (!shouldHookThisSymbol(funcName, bind, type, allExtSymbol.getSize())) { + ssize_t initialGap = 0; + if (!shouldHookThisSymbol(funcName, bind, type, allExtSymbol.getSize(), initialGap)) { continue; } //Get function id from plt entry @@ -198,7 +199,7 @@ namespace scaler { newSym->pltEntryAddr = pltEntry; newSym->pltSecEntryAddr = pltSecEntry; newSym->pltStubId = pltStubId; - + newSym->initialGap = initialGap; fprintf(symInfoFile, "%s,%ld,%ld\n", funcName, newSym->fileId, newSym->symIdInFile); DBG_LOGS( @@ -212,8 +213,12 @@ namespace scaler { } - bool - ExtFuncCallHook::shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId) { + const int SAMPLING_GAP = 0b0; + + bool ExtFuncCallHook::shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId, + ssize_t &initialGap) { + + initialGap = 0; if (bind != STB_GLOBAL || type != STT_FUNC) { return false; } @@ -230,13 +235,33 @@ namespace scaler { } if (funcNameLen == 3) { - if (strncmp(funcName, "oom", 3) == 0) { + if (strncmp(funcName, "cos", 3) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "exp", 3) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "log", 3) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "sin", 3) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "oom", 3) == 0) { return false; } else if (strncmp(funcName, "err", 3) == 0) { return false; } } else if (funcNameLen == 4) { - if (strncmp(funcName, "jump", 4) == 0) { + if (strncmp(funcName, "cosf", 4) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "expf", 4) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "logf", 4) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "powf", 4) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "sinf", 4) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "sqrtf", 4) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "jump", 4) == 0) { return false; } else if (strncmp(funcName, "exit", 4) == 0) { return false; @@ -248,7 +273,11 @@ namespace scaler { return false; } } else if (funcNameLen == 5) { - if (strncmp(funcName, "_exit", 5) == 0) { + if (strncmp(funcName, "atan2", 5) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "sqrtf", 5) == 0) { + initialGap = SAMPLING_GAP; + } else if (strncmp(funcName, "_exit", 5) == 0) { return false; } else if (strncmp(funcName, "abort", 5) == 0) { return false; @@ -629,7 +658,7 @@ namespace scaler { uint8_t *tlsOffset = nullptr; __asm__ __volatile__ ( - "movq 0x2F4CC0(%%rip),%0\n\t" + "movq 0x2F5B60(%%rip),%0\n\t" :"=r" (tlsOffset) : : diff --git a/libHook-c/src/HookContext.cpp b/libHook-c/src/HookContext.cpp index 41d768fb..0e309e79 100644 --- a/libHook-c/src/HookContext.cpp +++ b/libHook-c/src/HookContext.cpp @@ -2,11 +2,13 @@ #include #include #include +#include extern "C" { static thread_local DataSaver saverElem; -HookContext *constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize) { +HookContext * +constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize, scaler::Array &allExtSymbol) { uint8_t *contextHeap = static_cast(mmap(NULL, sizeof(HookContext) + sizeof(scaler::Array) + @@ -21,7 +23,15 @@ HookContext *constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize) { rlt->recArr = new(contextHeap + sizeof(HookContext)) scaler::Array(hookedSymbolSize); rlt->threadDataSavingLock = reinterpret_cast(contextHeap + sizeof(HookContext) + sizeof(scaler::Array)); - +#ifdef INSTR_TIMING + detailedTimingVectors = new TIMING_TYPE *[hookedSymbolSize]; + detailedTimingVectorSize = new TIMING_TYPE[hookedSymbolSize]; + memset(detailedTimingVectorSize, 0, sizeof(TIMING_TYPE) * hookedSymbolSize); + for (ssize_t i = 0; i < hookedSymbolSize; ++i) { + detailedTimingVectors[i] = new TIMING_TYPE[TIMING_REC_COUNT]; + memset(detailedTimingVectors[i], 0, sizeof(TIMING_TYPE) * TIMING_REC_COUNT); + } +#endif pthread_mutexattr_t Attr; pthread_mutexattr_init(&Attr); @@ -31,11 +41,9 @@ HookContext *constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize) { //Initialize gap to one for (int i = 0; i < rlt->recArr->getSize(); ++i) { //number mod 2^n is equivalent to stripping off all but the n lowest-order - rlt->recArr->internalArr[i].gap = 0; //0b11 if %4, because 4=2^2 Initially time everything + rlt->recArr->internalArr[i].gap = allExtSymbol[i].initialGap; //0b11 if %4, because 4=2^2 Initially time everything rlt->recArr->internalArr[i].count = 0; } - - // memArrayHeap(1), timingArr(hookedSymbolSize), // indexPosi(0) @@ -86,9 +94,9 @@ void __attribute__((used, noinline, optimize(3))) printRecOffset() { auto m __attribute__((used)) = (uint8_t *) &curContext->recArr->internalArr[0].gap; printf("\nTLS offset: Check assembly\n" - "RecArr Offset: 0x%lx\n" - "Counting Entry Offset: 0x%lx\n" - "Gap Entry Offset: 0x%lx\n", j - i, l - k, m - k); + "RecArr Offset: 0x%lx\n" + "Counting Entry Offset: 0x%lx\n" + "Gap Entry Offset: 0x%lx\n", j - i, l - k, m - k); } @@ -106,19 +114,17 @@ bool initTLS() { //Put a dummy variable to avoid null checking //Initialize saving data structure - curContext = constructContext( - scaler::ExtFuncCallHook::instance->elfImgInfoMap.getSize(), - scaler::ExtFuncCallHook::instance->allExtSymbol.getSize() + 1); + curContext = constructContext(scaler::ExtFuncCallHook::instance->elfImgInfoMap.getSize(), + scaler::ExtFuncCallHook::instance->allExtSymbol.getSize() + 1, + scaler::ExtFuncCallHook::instance->allExtSymbol); //#ifdef PRINT_DBG_LOG // printRecOffset(); //#endif - if (!curContext) { fatalError("Failed to allocate memory for Context"); + if (!curContext) { + fatalError("Failed to allocate memory for Context"); return false; } - - //RuntimeInfo newInfo; - return true; } @@ -126,10 +132,168 @@ __thread HookContext *curContext __attribute((tls_model("initial-exec"))); __thread uint8_t bypassCHooks __attribute((tls_model("initial-exec"))) = SCALER_FALSE; //Anything that is not SCALER_FALSE should be treated as SCALER_FALSE +#ifdef INSTR_TIMING +const int TIMING_REC_COUNT = 20000; +typedef int64_t TIMING_TYPE; +__thread TIMING_TYPE **detailedTimingVectors; +__thread TIMING_TYPE *detailedTimingVectorSize; +#endif + DataSaver::~DataSaver() { saveData(curContext); } +#ifdef INSTR_TIMING +inline void saveThreadDetailedTiming(std::stringstream &ss, HookContext *curContextPtr) { + ss.str(""); + ss << scaler::ExtFuncCallHook::instance->folderName << "/threadDetailedTiming_" << curContextPtr->threadId + << ".bin"; + + //Calculate file total size + + ssize_t recordedInvocationCnt = 0; + + for (ssize_t i = 0; i < scaler::ExtFuncCallHook::instance->allExtSymbol.getSize(); ++i) { + recordedInvocationCnt += detailedTimingVectorSize[i]; + } + + int fd; + size_t realFileIdSizeInBytes = sizeof(ArrayDescriptor) + + sizeof(ArrayDescriptor) * scaler::ExtFuncCallHook::instance->allExtSymbol.getSize() + + recordedInvocationCnt * sizeof(TIMING_TYPE); + + uint8_t *fileContentInMem = nullptr; + if (!scaler::fOpen4Write(ss.str().c_str(), fd, realFileIdSizeInBytes, fileContentInMem)) { + fatalErrorS("Cannot open %s because:%s", ss.str().c_str(), strerror(errno)) + } + uint8_t *_fileContentInMem = fileContentInMem; + + /*Write whole symbol info*/ + ArrayDescriptor *arrayDescriptor = reinterpret_cast(fileContentInMem); + arrayDescriptor->arrayElemSize = 0; + arrayDescriptor->arraySize = scaler::ExtFuncCallHook::instance->allExtSymbol.getSize(); + arrayDescriptor->magicNum = 167; + fileContentInMem += sizeof(ArrayDescriptor); + + + for (ssize_t i = 0; i < scaler::ExtFuncCallHook::instance->allExtSymbol.getSize(); ++i) { + /** + * Write array descriptor first + */ + ArrayDescriptor *arrayDescriptor = reinterpret_cast(fileContentInMem); + arrayDescriptor->arrayElemSize = sizeof(TIMING_TYPE); + arrayDescriptor->arraySize = detailedTimingVectorSize[i]; + arrayDescriptor->magicNum = 167; + fileContentInMem += sizeof(ArrayDescriptor); + + /** + * Then write detailed timing array + */ + memcpy(fileContentInMem, detailedTimingVectors[i], arrayDescriptor->arraySize * arrayDescriptor->arrayElemSize); + fileContentInMem += arrayDescriptor->arraySize * arrayDescriptor->arrayElemSize; + } + if (!scaler::fClose(fd, realFileIdSizeInBytes, _fileContentInMem)) { + fatalErrorS("Cannot close file %s, because %s", ss.str().c_str(), strerror(errno)); + } +} +#endif + + +inline void savePerThreadTimingData(std::stringstream &ss, HookContext *curContextPtr) { + ss.str(""); + ss << scaler::ExtFuncCallHook::instance->folderName << "/threadTiming_" << curContextPtr->threadId << ".bin"; + //INFO_LOGS("Saving timing data to %s", ss.str().c_str()); + + int fd; + size_t realFileIdSizeInBytes = + sizeof(ThreadCreatorInfo) + sizeof(ArrayDescriptor) + curContextPtr->recArr->getSize() * sizeof(RecTuple); + uint8_t *fileContentInMem = nullptr; + if (!scaler::fOpen4Write(ss.str().c_str(), fd, realFileIdSizeInBytes, fileContentInMem)) { + fatalErrorS("Cannot fopen %s because:%s", ss.str().c_str(), strerror(errno)); + } + uint8_t *_fileContentInMem = fileContentInMem; + /** + * Record who created the thread + */ + ThreadCreatorInfo *threadCreatorInfo = reinterpret_cast(fileContentInMem); + threadCreatorInfo->threadExecutionCycles = curContextPtr->endTImestamp - curContextPtr->startTImestamp; + threadCreatorInfo->threadCreatorFileId = curContextPtr->threadCreatorFileId; + threadCreatorInfo->magicNum = 167; + fileContentInMem += sizeof(ThreadCreatorInfo); + + /** + * Record size information about the recorded array + */ + ArrayDescriptor *arrayDescriptor = reinterpret_cast(fileContentInMem); + arrayDescriptor->arrayElemSize = sizeof(RecTuple); + arrayDescriptor->arraySize = curContextPtr->recArr->getSize(); + arrayDescriptor->magicNum = 167; + fileContentInMem += sizeof(ArrayDescriptor); + + + /** + * Write recording tuple onto the disk + */ + memcpy(fileContentInMem, curContextPtr->recArr->data(), + curContextPtr->recArr->getTypeSizeInBytes() * curContextPtr->recArr->getSize()); + + if (!scaler::fClose(fd, realFileIdSizeInBytes, _fileContentInMem)) { + fatalErrorS("Cannot close file %s, because %s", ss.str().c_str(), strerror(errno)); + } + + INFO_LOGS("Saving data to %s, %lu", scaler::ExtFuncCallHook::instance->folderName.c_str(), pthread_self()); +} + +inline void saveRealFileId(std::stringstream &ss, HookContext *curContextPtr) { + ss.str(""); + ss << scaler::ExtFuncCallHook::instance->folderName << "/realFileId.bin"; + //The real id of each function is resolved in after hook, so I can only save it in datasaver + + int fd; + ssize_t realFileIdSizeInBytes = sizeof(ArrayDescriptor) + + (curContextPtr->_this->allExtSymbol.getSize()) * sizeof(uint64_t); + uint8_t *fileContentInMem = nullptr; + if (!scaler::fOpen4Write(ss.str().c_str(), fd, realFileIdSizeInBytes, fileContentInMem)) { + fatalErrorS( + "Cannot open %s because:%s", ss.str().c_str(), strerror(errno)) + } + uint8_t *_fileContentInMem = fileContentInMem; + + /** + * Write array descriptor first + */ + ArrayDescriptor *arrayDescriptor = reinterpret_cast(fileContentInMem); + arrayDescriptor->arrayElemSize = sizeof(uint64_t); + arrayDescriptor->arraySize = curContextPtr->_this->allExtSymbol.getSize(); + arrayDescriptor->magicNum = 167; + fileContentInMem += sizeof(ArrayDescriptor); + + uint64_t *realFileIdMem = reinterpret_cast(fileContentInMem); + for (int i = 0; i < curContextPtr->_this->allExtSymbol.getSize(); ++i) { + realFileIdMem[i] = curContextPtr->_this->pmParser.findExecNameByAddr( + *(curContextPtr->_this->allExtSymbol[i].gotEntryAddr)); + } + + if (!scaler::fClose(fd, realFileIdSizeInBytes, _fileContentInMem)) { + fatalErrorS("Cannot close file %s, because %s", ss.str().c_str(), strerror(errno)); + } +} + +inline void saveDataForAllOtherThread(std::stringstream &ss, HookContext *curContextPtr) { + INFO_LOG("Save data of all existing threads"); + for (int i = 0; i < threadContextMap.getSize(); ++i) { + HookContext *threadContext = threadContextMap[i]; + if (!threadContext->dataSaved) { + pthread_mutex_lock(threadContext->threadDataSavingLock); + INFO_LOGS("Thread data not saved, save it %d/%zd", i, threadContextMap.getSize()); + saveData(threadContext); + pthread_mutex_unlock(threadContext->threadDataSavingLock); + } else { + INFO_LOGS("Thread data already saved, skip %d/%zd", i, threadContextMap.getSize()); + } + } +} + void saveData(HookContext *curContextPtr, bool finalize) { bypassCHooks = SCALER_TRUE; if (!curContextPtr) { @@ -146,83 +310,28 @@ void saveData(HookContext *curContextPtr, bool finalize) { curContextPtr->dataSaved = true; //Resolve real address - if (!curContextPtr->endTImestamp) { //Not finished succesfully curContextPtr->endTImestamp = getunixtimestampms(); } - if (!curContext) { fatalError("curContext is not initialized, won't save anything"); + if (!curContext) { + fatalError("curContext is not initialized, won't save anything"); return; } std::stringstream ss; - ss << scaler::ExtFuncCallHook::instance->folderName << "/threadTiming_" << curContextPtr->threadId << ".bin"; - //INFO_LOGS("Saving timing data to %s", ss.str().c_str()); - FILE *threadDataSaver = fopen(ss.str().c_str(), "wb"); - if (!threadDataSaver) { fatalErrorS("Cannot fopen %s because:%s", ss.str().c_str(), - strerror(errno)); - } - //Main application at the end - curContextPtr->recArr->internalArr[curContextPtr->recArr->getSize() - 1].totalClockCycles = - curContextPtr->endTImestamp - curContextPtr->startTImestamp; +#ifdef INSTR_TIMING + saveThreadDetailedTiming(ss, curContextPtr); +#endif - - if (fwrite(&curContextPtr->curFileId, sizeof(HookContext::curFileId), 1, threadDataSaver) != 1) { fatalErrorS( - "Cannot curFileId of %s because:%s", ss.str().c_str(), - strerror(errno)); - } - - int64_t timeEntrySize = curContextPtr->recArr->getSize(); - if (fwrite(&timeEntrySize, sizeof(int64_t), 1, threadDataSaver) != 1) { fatalErrorS( - "Cannot write timeEntrySize of %s because:%s", ss.str().c_str(), - strerror(errno)); - } - if (fwrite(curContextPtr->recArr->data(), curContextPtr->recArr->getTypeSizeInBytes(), - curContextPtr->recArr->getSize(), threadDataSaver) != - curContextPtr->recArr->getSize()) { fatalErrorS("Cannot write timingArr of %s because:%s", ss.str().c_str(), - strerror(errno)); - } - - - INFO_LOGS("Saving data to %s, %lu", scaler::ExtFuncCallHook::instance->folderName.c_str(), pthread_self()); + savePerThreadTimingData(ss, curContextPtr); if (curContextPtr->isMainThread || finalize) { -// printf("Main thread id is: %lu", curContextPtr->threadId); - ss.str(""); - ss << scaler::ExtFuncCallHook::instance->folderName << "/realFileId.bin"; - //The real id of each function is resolved in after hook, so I can only save it in datasaver - - int fd; - - size_t realFileIdSizeInBytes = (curContextPtr->_this->allExtSymbol.getSize() + 1) * sizeof(ssize_t); - size_t *realFileIdMem = nullptr; - if (!scaler::fOpen4Write(ss.str().c_str(), fd, realFileIdSizeInBytes, realFileIdMem)) { fatalErrorS( - "Cannot open %s because:%s", ss.str().c_str(), strerror(errno)) - } - realFileIdMem[0] = curContextPtr->_this->allExtSymbol.getSize(); - for (int i = 0; i < curContextPtr->_this->allExtSymbol.getSize(); ++i) { - realFileIdMem[i + 1] = curContextPtr->_this->pmParser.findExecNameByAddr( - *(curContextPtr->_this->allExtSymbol[i].gotEntryAddr)); - } - if (!scaler::fClose(fd, realFileIdSizeInBytes, realFileIdMem)) { fatalError("Cannot close file"); - } - - INFO_LOG("Save data of all existing threads"); - for (int i = 0; i < threadContextMap.getSize(); ++i) { - HookContext *threadContext = threadContextMap[i]; - if (!threadContext->dataSaved) { - pthread_mutex_lock(threadContext->threadDataSavingLock); - INFO_LOGS("Thread data not saved, save it %d/%zd", i, threadContextMap.getSize()); - saveData(threadContext); - pthread_mutex_unlock(threadContext->threadDataSavingLock); - } else { - INFO_LOGS("Thread data already saved, skip %d/%zd", i, threadContextMap.getSize()); - } - } + saveRealFileId(ss, curContextPtr); + saveDataForAllOtherThread(ss, curContextPtr); } - fclose(threadDataSaver); pthread_mutex_unlock(curContextPtr->threadDataSavingLock); } diff --git a/libHook-c/src/HookHandlers.cpp b/libHook-c/src/HookHandlers.cpp index a7006083..431b796c 100644 --- a/libHook-c/src/HookHandlers.cpp +++ b/libHook-c/src/HookHandlers.cpp @@ -385,15 +385,9 @@ void *afterHookHandler() { // int64_t prevClockTick = curContextPtr->hookTuple[curContextPtr->indexPosi].clockTicks; uint64_t preClockCycle = curContextPtr->hookTuple[curContextPtr->indexPosi].clockCycles; -// int64_t curClockTick = 0; - //(((int64_t) hi << 32) | lo) ; + int64_t &c = curContextPtr->recArr->internalArr[symbolId].count; -// if (c < (1 << 10)) { -// struct tms curTime; -// clock_t rlt = times(&curTime); -// curClockTick = curTime.tms_utime + curTime.tms_stime - prevClockTick; -// printf("Clock Ticks in posthook=%ld\n", curTime.tms_utime + curTime.tms_stime); -// } + --curContextPtr->indexPosi; assert(curContextPtr->indexPosi >= 1); @@ -406,47 +400,19 @@ void *afterHookHandler() { int32_t &clockCycleThreshold = curContextPtr->recArr->internalArr[symbolId].durThreshold; int64_t clockCyclesDuration = (int64_t) (postHookClockCycles - preClockCycle); - if (c < (1 << 10)) { - - if (c > (1 << 9)) { - //Calculation phase - int64_t clockTickDiff = clockCyclesDuration - meanClockCycle; - - if (-clockCycleThreshold <= clockTickDiff && clockTickDiff <= clockCycleThreshold) { -// printf("Skipped\n"); - //Skip this - setbit(curContextPtr->recArr->internalArr[symbolId].flags, 0); - } -// printf("Threshold=%d clockDiff=%ld shouldSkip?=%s\n", clockTickThreshold, clockTickDiff, -// -clockTickThreshold <= clockTickDiff && clockTickDiff < = clockTickThreshold ? "True" : "False"); - - } else if (c < (1 << 9)) { - //Counting only, no modifying gap. Here the gap should be zero. Meaning every invocation counts - //https://blog.csdn.net/u014485485/article/details/77679669 - meanClockCycle += (clockCyclesDuration - meanClockCycle) / (float) c; //c<100, safe conversion -// printf("meanClockTick += (%ld - %f) / (float) %ld\n", clockCyclesDuration, meanClockCycle, c); - } else if (c == (1 << 9)) { - //Mean calculation has finished, calculate a threshold based on that - clockCycleThreshold = meanClockCycle * 0.1; -// printf("MeanClockTick=%f MeanClockTick*0.1=%f\n", meanClockCycle, meanClockCycle * 0.1); - } - } else if (c == (1 << 10)) { - if (chkbit(curContextPtr->recArr->internalArr[symbolId].flags, 0)) { - //Skip this symbol - //printf("Skipped\n"); - curContextPtr->recArr->internalArr[symbolId].gap = 0b11111111111111111111; - } - } - //RDTSCTiming if not skipped - if (!chkbit(curContextPtr->recArr->internalArr[symbolId].flags, 0)) { - curContextPtr->recArr->internalArr[symbolId].totalClockCycles += clockCyclesDuration; - } - //c = 1 << 10; +#ifdef INSTR_TIMING + TIMING_TYPE &curSize = detailedTimingVectorSize[symbolId]; + if (curSize < TIMING_REC_COUNT) { + ++curSize; + detailedTimingVectors[symbolId][curSize] = clockCyclesDuration; + } +#endif + //RDTSCTiming if not skipped + curContextPtr->recArr->internalArr[symbolId].totalClockCycles += clockCyclesDuration * (c - curContextPtr->recArr->internalArr[symbolId].prevCount + 1); -// INFO_LOGS("[Post Hook] Thread ID:%lu Func(%ld) CalleeFileId(%ld) Timestamp: %lu\n", -// pthread_self(), symbolId, curElfSymInfo.libFileId, getunixtimestampms()); + curContextPtr->recArr->internalArr[symbolId].prevCount = c; bypassCHooks = SCALER_FALSE; return callerAddr; diff --git a/libHook-c/src/ProcInfoParser.cpp b/libHook-c/src/ProcInfoParser.cpp index 1d8f8ec7..2ab15a58 100644 --- a/libHook-c/src/ProcInfoParser.cpp +++ b/libHook-c/src/ProcInfoParser.cpp @@ -182,12 +182,9 @@ namespace scaler { //We could use binary search to lookup addr in this array. //Binary search impl segAddrFileMap - ssize_t lo = 0; - ssize_t hi = pmEntryArray.getSize(); - ssize_t md; - bool found = false; - while (lo != hi) { - md = (lo + hi) / 2; + ssize_t lo = 0, md = 0, hi = pmEntryArray.getSize() - 1; + while (lo < hi) { + md = lo + (hi - lo) / 2; if (pmEntryArray[md].addrStart < addr) { //printf("hi(%d) = md(%d) - 1=(%d)\n", hi, md, md - 1); lo = md + 1; @@ -195,35 +192,11 @@ namespace scaler { //printf("lo(%d) = md(%d) + 1=(%d)\n", lo, md, md + 1); hi = md; } else { - //printf("lo = md =%d\n", md); - lo = md; - found = true; - break; + //Find left bound, although this should be impossible in this case + hi = md; } } - if (!found && lo == 0) { - lo = -1; - } - - - //It is possible that the address falls within the range of last entry. We need to check this scenario - - if (lo == -1) { fatalErrorS( - "Cannot find addr %p in pmMap. The address is lower than the lowest address if /proc/{pid}/maps.", - addr); - exit(-1); - } else if (lo == pmEntryArray.getSize()) { - //Address is within range - lo = pmEntryArray.getSize() - 1; - } - - //Check if it's end address is indeed in this entry. If not, it is because the caller is not in procinfomapper - // (Maybe skipped, in this case return an id that is larger than the largest function addr) - if (addr > pmEntryArray[lo].addrEnd) { - return fileNameArr.size(); - } - - return pmEntryArray[lo].fileId; + return pmEntryArray[lo - 1].fileId; } diff --git a/libHook-c/src/include/type/ExtSymInfo.h b/libHook-c/src/include/type/ExtSymInfo.h index ff3b001a..5966685a 100644 --- a/libHook-c/src/include/type/ExtSymInfo.h +++ b/libHook-c/src/include/type/ExtSymInfo.h @@ -21,14 +21,7 @@ namespace scaler { uint8_t *pltSecEntryAddr = nullptr; //(8 bytes) uint64_t pltStubId = 0; //(8 bytes) FileID libFileId = -1; //(8 bytes) Deprecated, move to a dedicated array - char padding0; - char padding1; - char padding2; - char padding3; - char padding4; - char padding5; - char padding6; - char padding7; + ssize_t initialGap = 0;//8 Bytes. Initial gap value }; } #endif \ No newline at end of file diff --git a/libHook-c/src/include/type/RecTuple.h b/libHook-c/src/include/type/RecTuple.h new file mode 100644 index 00000000..e0c26c15 --- /dev/null +++ b/libHook-c/src/include/type/RecTuple.h @@ -0,0 +1,44 @@ +#ifndef SCALER_RECTUPLE_H +#define SCALER_RECTUPLE_H + +/** + * This struct is the format that we record time and save to disk. + */ +struct RecTuple { + uint64_t totalClockCycles; //8 + int64_t count; //8 + int64_t prevCount; //8 Used to perform sampling + int32_t gap; //4 + float meanClockTick; //4 + int32_t durThreshold; //4 + uint32_t flags; //4 +}; + + +/** + * This struct stores the total size and element size of an array. + * On disk, this struct is followed by array elements + */ +struct ArrayDescriptor { + uint64_t arrayElemSize; + uint64_t arraySize; + uint8_t magicNum = 167; //1 Used to ensure the collected data format is recognized in python scripts. +}; + +/** + * This struct is the format that we record detailed timing and save to disk. + */ +typedef int64_t TIMING_TYPE; + +struct DetailedTimingDescriptor { + TIMING_TYPE timingSize; +}; + +struct ThreadCreatorInfo { + uint64_t threadCreatorFileId; + uint64_t threadExecutionCycles; + uint8_t magicNum = 167; //1 Used to ensure the collected data format is recognized in python scripts. +}; + + +#endif //SCALER_RECTUPLE_H diff --git a/libHook-c/src/include/util/hook/ExtFuncCallHook.h b/libHook-c/src/include/util/hook/ExtFuncCallHook.h index 0affb25a..5a0fe6f9 100644 --- a/libHook-c/src/include/util/hook/ExtFuncCallHook.h +++ b/libHook-c/src/include/util/hook/ExtFuncCallHook.h @@ -64,7 +64,7 @@ namespace scaler { protected: - inline bool shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId); + inline bool shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId, ssize_t& initialGap); inline bool diff --git a/libHook-c/src/include/util/hook/HookContext.h b/libHook-c/src/include/util/hook/HookContext.h index 3a9144a7..26e4ca66 100644 --- a/libHook-c/src/include/util/hook/HookContext.h +++ b/libHook-c/src/include/util/hook/HookContext.h @@ -1,10 +1,11 @@ -#ifndef THREAD_LOCAL_H -#define THREAD_LOCAL_H +#ifndef HOOK_CONTEXT_H +#define HOOK_CONTEXT_H #include #include #include #include +#include #include "ExtFuncCallHook.h" extern "C" { @@ -20,22 +21,13 @@ struct HookTuple { uint32_t clockTicks; //8 }; -struct RecTuple { - uint64_t totalClockCycles; //8 - int64_t count; //8 - int32_t gap; //4 - float meanClockTick; //4 - int32_t durThreshold; //4 - uint32_t flags; //4 -}; - struct HookContext { //todo: Initialize using maximum stack size int64_t indexPosi;//8bytes scaler::Array *recArr; //8bytes //Records which function calls which function for how long, the index is scalerid (Only contains hooked function) //todo: Replace timingMatrix to a class - int64_t curFileId = 1; //Which library created the current thread? The default one is main thread + int64_t threadCreatorFileId = 1; //Which library created the current thread? The default one is main thread scaler::ExtFuncCallHook *_this = nullptr; //8bytes //Records which symbol is called for how many times, the index is scalerid (Only contains hooked function) uint64_t startTImestamp; @@ -49,10 +41,12 @@ struct HookContext { uint8_t isMainThread = false; uint8_t initialized = 0; }; + const uint8_t SCALER_TRUE = 145; const uint8_t SCALER_FALSE = 167; extern uint32_t *countingArr; + class DataSaver { public: char initializeMe = 0; @@ -77,5 +71,13 @@ extern pthread_mutex_t threadDataSavingLock; bool initTLS(); +//#define INSTR_TIMING +#ifdef INSTR_TIMING +extern const int TIMING_REC_COUNT; +extern __thread TIMING_TYPE **detailedTimingVectors; +extern __thread TIMING_TYPE *detailedTimingVectorSize; +#endif + + } #endif \ No newline at end of file diff --git a/libHook-c/src/include/util/tool/Logging.h b/libHook-c/src/include/util/tool/Logging.h index e5871921..eca0dfb8 100644 --- a/libHook-c/src/include/util/tool/Logging.h +++ b/libHook-c/src/include/util/tool/Logging.h @@ -5,7 +5,7 @@ #define PRINT_INFO_LOG true #define PRINT_DBG_LOG false -#define PRINT_ERR_LOG false +#define PRINT_ERR_LOG true #if PRINT_DBG_LOG // Print a single log string diff --git a/libHook-c/src/libcProxy.cpp b/libHook-c/src/libcProxy.cpp index 7bf3525a..3badec11 100644 --- a/libHook-c/src/libcProxy.cpp +++ b/libHook-c/src/libcProxy.cpp @@ -15,7 +15,7 @@ main_fn_t real_main; -bool installed=false; +bool installed = false; extern "C" { scaler::Vector threadContextMap; @@ -51,7 +51,7 @@ int doubletake_main(int argc, char **argv, char **envp) { HookContext *curContextPtr = curContext; - curContextPtr->curFileId = 0; + curContextPtr->threadCreatorFileId = 0; curContextPtr->endTImestamp = 0; curContextPtr->startTImestamp = getunixtimestampms(); curContextPtr->isMainThread = true; @@ -86,7 +86,7 @@ int doubletake_libc_start_main(main_fn_t main_fn, int argc, char **argv, void (* void exit(int __status) { auto realExit = (exit_origt) dlsym(RTLD_NEXT, "exit"); - if(!installed){ + if (!installed) { realExit(__status); return; } diff --git a/libHook-c/src/pthreadProxy.cpp b/libHook-c/src/pthreadProxy.cpp index 104932ed..aeed7996 100644 --- a/libHook-c/src/pthreadProxy.cpp +++ b/libHook-c/src/pthreadProxy.cpp @@ -41,7 +41,7 @@ void *dummy_thread_function(void *data) { HookContext *curContextPtr = curContext; assert(curContextPtr != NULL); - curContextPtr->curFileId = curContextPtr->_this->pmParser.findExecNameByAddr( + curContextPtr->threadCreatorFileId = curContextPtr->_this->pmParser.findExecNameByAddr( (void *) actualFuncPtr); /** diff --git a/libHook-c/tests/CMakeLists.txt b/libHook-c/tests/CMakeLists.txt index 3203faa0..52a72f86 100644 --- a/libHook-c/tests/CMakeLists.txt +++ b/libHook-c/tests/CMakeLists.txt @@ -79,9 +79,9 @@ target_compile_options(ScalerHook-demoapps-FuncCall PRIVATE ${TEST_FLAGS} -Werro add_executable(ScalerHook-demoapps-TimingAccuracy src/demoapps/TestTimingAccuracy.cpp) -target_link_libraries(ScalerHook-demoapps-TimingAccuracy PUBLIC Testlib-FuncCall Testlib-CallFuncCall KuBoPltHook dl) +target_link_libraries(ScalerHook-demoapps-TimingAccuracy PUBLIC Testlib-FuncCall Testlib-CallFuncCall KuBoPltHook dl) target_compile_options(ScalerHook-demoapps-TimingAccuracy PRIVATE ${TEST_FLAGS} -Werror) -target_link_options(ScalerHook-demoapps-TimingAccuracy PRIVATE "-z" "lazy") +target_link_options(ScalerHook-demoapps-TimingAccuracy PRIVATE "-z" "lazy") add_executable(ScalerHook-demoapps-HookEverything src/demoapps/TestHookEverything.cpp) target_include_directories(ScalerHook-demoapps-HookEverything PUBLIC libtest/header) @@ -207,7 +207,7 @@ add_executable(ScalerHook-parsecapps-swaptions src/parsecapps/swaptions/MaxFunction.cpp src/parsecapps/swaptions/nr_routines.cpp src/parsecapps/swaptions/RanUnif.cpp) -target_link_libraries(ScalerHook-parsecapps-swaptions pthread ScalerHook-HookAutoAsm-C) +target_link_libraries(ScalerHook-parsecapps-swaptions pthread ScalerHook-HookAutoAsm-C) target_compile_options(ScalerHook-parsecapps-swaptions PRIVATE ${TEST_FLAGS} "-DENABLE_THREADS" "-fstrict-aliasing" "-fkeep-inline-functions") target_include_directories(ScalerHook-parsecapps-swaptions PRIVATE src/parsecapps/swaptions/include) target_compile_definitions(ScalerHook-parsecapps-swaptions PRIVATE ENABLE_THREADS) @@ -279,6 +279,9 @@ target_link_libraries(ScalerHook-proof-SaveDataUponExit pthread) add_executable(ScalerHook-proof-threadlocalasmarray src/proofconcept/TestAccessThreadLocalArrayInAsm.cpp) target_link_libraries(ScalerHook-proof-threadlocalasmarray pthread) +add_executable(GetUserSysRealTime src/proofconcept/getUserTime.cpp) +target_link_libraries(GetUserSysRealTime) + #add_executable(ScalerHook-demoapps src/proofconcept/testprog.cpp) #target_link_libraries(DemoProg libTest PltHookLib dl) diff --git a/libHook-c/tests/src/proofconcept/binarySegmentSearch.py b/libHook-c/tests/src/proofconcept/binarySegmentSearch.py new file mode 100644 index 00000000..f289d75d --- /dev/null +++ b/libHook-c/tests/src/proofconcept/binarySegmentSearch.py @@ -0,0 +1,32 @@ +# The following algorithms are used in ProcinfoParser to find the left bound in an array representing the starting element of the array +A = [1, 2, 3, 4, 4, 5] + + +def binSearch(A, tgt): + """ + Return lower bound of the segment array + :param A: Array + :param tgt: Target + :return: Lower bound of segments in the segment array + """ + lo = 0 + hi = len(A) + md = 0 + while lo < hi: + md = lo + (hi - lo) // 2 + if A[md] < tgt: + lo = md + 1 + elif A[md] > tgt: + hi = md + elif A[md] == tgt: + hi = md + + return lo-1 + + +testList = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5] +expectedVal = [-1, 0, 1, 2, 4, 5] +for i in range(len(testList)): + lo = binSearch(A, testList[i]) + print(testList[i], lo) + assert (lo == expectedVal[i])