diff --git a/.gitmodules b/.gitmodules
index dd7b2323..b735d478 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
[submodule "Analyzer/ScalerRun/lib/yamlcpp_0_7_0"]
path = Analyzer/ScalerRun/lib/yamlcpp_0_7_0
url = https://github.com/jbeder/yaml-cpp.git
+[submodule "benchmarktookit"]
+ path = benchmarktookit
+ url = ssh://git@code.xttech.top:6081/masslab/benchmarktookit.git
diff --git a/.idea/Scaler.iml b/.idea/Scaler.iml
index 40ba48d7..a45220d6 100644
--- a/.idea/Scaler.iml
+++ b/.idea/Scaler.iml
@@ -5,4 +5,11 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
index bf0af86f..a864adec 100644
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -3,6 +3,7 @@
+
diff --git a/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py b/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py
index 77c3f5c4..06383b89 100644
--- a/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py
+++ b/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py
@@ -673,7 +673,7 @@ def main():
# they can refuse it and the aggregator will use a default file path set to finalFold.folded in the repo
root = tk.Tk()
root.withdraw()
- fileName = filedialog.askopenfilename()
+ fileName = ''
# If an input file was selected, then we will ask them for an output file. They can opt to stop the program
# By directly closing the file dialog twice in a row.
@@ -689,7 +689,7 @@ def main():
if fileName == '':
# If no file name then just default to opening a file in the repo
# print(True)
- fileName = "C:/Users/John/PycharmProjects/Scaler/libAnalyzer/tests/PerfTests/finalFold.folded"
+ fileName = "/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Perf-Artifects/out.stacks-folded"
# outFileName = "perfMemcachedData_V2.json"
outFileName = "perfMemcachedData_V2_nokernelinlined.json"
# outFileName = "perfMemcachedData_V2_noinlined.json"
@@ -709,12 +709,12 @@ def main():
# We will handle the data differently depending on if the user wants to use the timing data
# If "y" is entered, then we will use the time stamp info and sample data,
# if not then we will use sample data by default
- timestampInput = input("Use Timestamps? y/n Default is n: ")
+ timestampInput = 'n'
if timestampInput == "y":
# print(timestampInput)
useTimestamps = True
- inlinedInput = input("Attribute Inlined functions to last known library? y/n Default is n: ")
+ inlinedInput = 'n'
if inlinedInput == "y":
# print(attributeInline)
attributeInline = True
diff --git a/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml b/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml
index b7978a8d..2aa44330 100644
--- a/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml
+++ b/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml
@@ -37,8 +37,9 @@
+
-
+
@@ -61,6 +62,33 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -85,6 +113,8 @@
+
+
@@ -92,20 +122,6 @@
-
-
-
- file://$PROJECT_DIR$/main.py
- 107
-
-
-
- file://$PROJECT_DIR$/main.py
- 142
-
-
-
-
@@ -113,6 +129,7 @@
-
+
+
\ No newline at end of file
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeAPIInvocationDistribution.py b/Analyzer/PyVisualizer/src/V3/AnalyzeAPIInvocationDistribution.py
deleted file mode 100644
index 788350d8..00000000
--- a/Analyzer/PyVisualizer/src/V3/AnalyzeAPIInvocationDistribution.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import os
-import traceback
-from matplotlib import cm
-import matplotlib.pyplot as plt
-import pandas as pd
-import struct
-import numpy as np
-from multiprocessing import Pool
-from multiprocessing import Pool, cpu_count
-import time
-
-from datastructure.TimingStruct import ArrayDescriptor
-from util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
-from util.Parser.TimeOutputPrarser import readSymbolFiles, aggregatePerThreadArray, readTimingStruct
-import numpy as np
-
-
-def calcInvokedApis(scalerDataFolder, recInfo):
- invokedAPIs = []
- totalAPIs = []
- for threadId in recInfo.threadIdList:
- curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
- curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
- invokedAPIs.append(len(curThreadInvokedRecArray))
- totalAPIs.append(len(curThreadRecArray))
- return invokedAPIs, totalAPIs
-
-
-def printCoverageReport(scalerDataFolder, recInfo, invokedAPIs, totalAPIs):
- '''
- Coverage Report
- '''
- print(' \t ', end='')
- for threadId in recInfo.threadIdList:
- print(threadId, end='\t')
- print()
-
- print('Invoked %', end='\t')
- for perc in np.array(invokedAPIs) / np.array(totalAPIs):
- print('%2.2f%%' % (perc * 100), end='\t')
- print()
-
- print('Invoked APIs', end='\t')
- for invokedAPI in invokedAPIs:
- print(invokedAPI, end='\t')
-
- print()
- print('Total APIs', end='\t')
- for totalAPI in totalAPIs:
- print(totalAPI, end='\t')
-
-
-def drawCountingHist(scalerDataFolder, recInfo):
- '''
- Counting histogram Report
- '''
- histogramRoot = os.path.join(scalerDataFolder, 'InvocationCountHist')
- if not os.path.exists(histogramRoot):
- os.mkdir(histogramRoot)
-
- print()
-
- # create 3 data sets with 1,000 samples
- mu, sigma = 200, 25
- x = mu + sigma * np.random.randn(1000, 3)
-
- totalCountArr = None
- for threadId in recInfo.threadIdList:
- curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
- # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
-
- times = np.array([rec.count for rec in curThreadRecArray])
- if totalCountArr is None:
- totalCountArr = times
- else:
- totalCountArr = np.vstack([totalCountArr, times])
-
- plt.figure()
- plt.hist(times, range=(1, times.max()), edgecolor="black", bins=50)
- plt.xlabel('Invocation counts')
- plt.ylabel('API number')
- plt.title('Histogram of invocation counts for thread %s' % (str(threadId)))
-
- plt.savefig(os.path.join(histogramRoot, threadId + '.png'))
- plt.close()
- # print(threadId, np.max(times))
-
- totalCountArr = totalCountArr.transpose()
- # totalCountArr = totalCountArr[np.where(totalCountArr > 0)]
- plt.figure()
- print(totalCountArr.max())
- plt.hist(totalCountArr, range=(1, totalCountArr.max()), bins=50, stacked=True)
- plt.xlabel('Invocation counts')
- plt.ylabel('API number')
- plt.title('Histogram of invocation counts for all threads staked')
- plt.savefig(os.path.join(histogramRoot, 'total.png'))
- plt.close()
-
-
-def printInvocNumberPerThread(scalerDataFolder):
- if scalerDataFolder is None:
- print()
- return
-
- recInfo = readSymbolFiles(scalerDataFolder)
- invokedAPIs, totalAPIs = calcInvokedApis(scalerDataFolder, recInfo)
-
- totalInvocationCnts = 0
-
- for threadId in recInfo.threadIdList:
- curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
- # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
-
- times = np.array([rec.count for rec in curThreadRecArray])
- totalInvocationCnts += np.sum(times)
-
- min = np.min(invokedAPIs)
- q1 = np.quantile(invokedAPIs, 0.25)
- q2 = np.quantile(invokedAPIs, 0.5)
- q3 = np.quantile(invokedAPIs, 0.75)
- q4 = np.quantile(invokedAPIs, 1)
- iqr = q3 - q1
- qLower = q1 - 1.5 * iqr
- qUpper = q3 + 1.5 * iqr
- assert (np.unique(totalAPIs).shape[0] == 1)
- assert (q4 == np.max(invokedAPIs))
- print(scalerDataFolder.split('/')[-2], min, q1, q2, q3, q4, iqr, qLower, qUpper, len(invokedAPIs), totalAPIs[0],
- totalInvocationCnts, sep='\t')
-
-
-def printInvocCntPerAPI(scalerDataFolder):
- if scalerDataFolder is None:
- print()
- return
-
- recInfo = readSymbolFiles(scalerDataFolder)
-
- totalCountArr = None
- for threadId in recInfo.threadIdList:
- curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
- # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
-
- times = np.array([rec.count for rec in curThreadRecArray])
- if totalCountArr is None:
- totalCountArr = times
- else:
- totalCountArr = np.vstack([totalCountArr, times])
- totalCountArr = np.array(totalCountArr).sum(axis=0)
-
- totalCountArr = totalCountArr[np.where(totalCountArr > 0)]
- min = np.min(totalCountArr)
- q1 = np.quantile(totalCountArr, 0.25)
- q2 = np.quantile(totalCountArr, 0.5)
- q3 = np.quantile(totalCountArr, 0.75)
- q4 = np.quantile(totalCountArr, 1)
- iqr = q3 - q1
- qLower = q1 - 1.5 * iqr
- qUpper = q3 + 1.5 * iqr
-
- assert (q4 == np.max(totalCountArr))
- print(scalerDataFolder.split('/')[-2], min, q1, q2, q3, q4, iqr, qLower, qUpper, np.sum(totalCountArr), sep='\t')
-
-
-# steven@masslabserv1:~/Downloads/2022-11-23_10-21-06$ find . -name "scalerdata*"
-scalerDataFolders = [
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/scalerdata_19148850692747664',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/scalerdata_19148905483325260',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.canneal_0/scalerdata_19149009421840348',
- None,
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.facesim_0/scalerdata_19149183735878138',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.ferret_0/scalerdata_19149441937366104',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/scalerdata_19149498481345624',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/scalerdata_19149660473046832',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/scalerdata_19149730167129240',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/scalerdata_19150235160442436',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/scalerdata_19150507898053624',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.vips_0/scalerdata_19150561039693292',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.x264_0/scalerdata_19150582352742288',
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.db.memcached.memcached_1_6_17_0/scalerdata_19150608805586386',
- None,
- '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.db.redis.redis_7_0_4_0/scalerdata_19150762055485288',
- None,
- None,
- None,
- None,
- None,
-]
-
-print('Thread inovked API # imbalance Analysis')
-for scalerDataFolder in scalerDataFolders:
- printInvocNumberPerThread(scalerDataFolder)
-
-print('API inovked CNT Analysis')
-for scalerDataFolder in scalerDataFolders:
- printInvocCntPerAPI(scalerDataFolder)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py b/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py
index 3153795c..d203d2fd 100644
--- a/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py
@@ -9,6 +9,10 @@
from multiprocessing import Pool, cpu_count
import time
+from Analyzer.PyVisualizer.src.V3.datastructure.TimingStruct import ArrayDescriptor
+from Analyzer.PyVisualizer.src.V3.util.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from Analyzer.PyVisualizer.src.V3.util.TimeOutputPrarser import readSymbolFiles
+
def removeOutliers(x):
a = np.array(x)
@@ -29,43 +33,39 @@ def shouldSkip(timingArr):
return c1 == 0 and c2 == 0
-def saveSinglePlot(ROOT_PATH, symbolNameList, threadIdSet, symId):
+def saveSinglePlot(ROOT_PATH, symbolNameList, threadId, tgtSymId):
# hasPoints = False
+ # ROOT_PATH: str, symbolNameList: list, threadId: str, tgtSymIds: list):
+ detailedTimingDict = parseSingleSymDetailedTiming(ROOT_PATH, threadId, [tgtSymId])
+
+ for symId, detailedTimingArr in detailedTimingDict.items():
+ fig, (ax1, ax2) = plt.subplots(2)
+
+ # if detailedTimingArr.shape[0] < 1001:
+ # continue
+ #
+ # skipThis = shouldSkip(detailedTimingArr)
+ # if skipThis:
+ # continue
+
+ ax1.scatter(np.arange(detailedTimingArr.shape[0]), detailedTimingArr, s=10)
+ # Calculate the first 500 mean
+ mean = np.average(detailedTimingArr[0:500])
+ meanUpperbound = mean * (1 + 0.01)
+ meanLowerbound = mean * (1 - 0.01)
+
+ ax2.scatter(np.arange(min(1000,detailedTimingArr.shape[0])), detailedTimingArr[0:min(1000,detailedTimingArr.shape[0])], s=10)
+ ax2.hlines(meanUpperbound, 0, detailedTimingArr.shape[0], colors='red')
+ ax2.hlines(meanLowerbound, 0, detailedTimingArr.shape[0], colors='red')
+ hasPoints = True
+
+ print(os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+ # if hasPoints:
+ fig.savefig(
+ os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+ print(
+ os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
- for i, threadId in enumerate(threadIdSet):
- with open(os.path.join(ROOT_PATH, 'threadDetailedTiming_%s_%d.bin' % (threadId, symId)),
- 'rb') as f:
- fig, (ax1, ax2) = plt.subplots(2)
- byteArr1 = f.read()
- elemSize = np.fromfile(os.path.join(ROOT_PATH, 'threadDetailedTiming_%s_%d.bin' % (threadId, symId)),
- np.int64, count=1, offset=0)[0]
- if elemSize == 0:
- continue
- symbolTiming = np.fromfile(
- os.path.join(ROOT_PATH, 'threadDetailedTiming_%s_%d.bin' % (threadId, symId)),
- np.int64, offset=8)
- skipThis = shouldSkip(symbolTiming)
- if skipThis:
- continue
- if symbolTiming.shape[0] < 1001:
- continue
- ax1.scatter(np.arange(elemSize), symbolTiming, s=10)
- # Calculate the first 500 mean
- mean = np.average(symbolTiming[0:500])
- meanUpperbound = mean * (1 + 0.01)
- meanLowerbound = mean * (1 - 0.01)
-
- # ax2.text(i*50, i * 20, str(np.var(symbolTiming[0:500])))
- ax2.scatter(np.arange(1000), symbolTiming[0:1000], s=10)
- ax2.hlines(meanUpperbound, 0, elemSize, colors='red')
- ax2.hlines(meanLowerbound, 0, elemSize, colors='red')
- # hasPoints = True
-
- # if hasPoints:
- fig.savefig(
- os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
- print(
- os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
return 0
@@ -78,51 +78,35 @@ def error_callback(e):
def doIt(ROOT_PATH, pool, rltList):
print('========================', ROOT_PATH)
allFiles = os.listdir(ROOT_PATH)
- threadIdSet = set({})
- maxSymId = 0
+ symbolNum = 0
+
+ recInfo = readSymbolFiles(ROOT_PATH)
+ threadSymInfo = dict({}) # Threadid : symbol size
for fileName in allFiles:
if fileName.startswith('threadDetailedTiming') and fileName.endswith('.bin'):
- _, threadId, symbolId = fileName.replace('.bin', '').split('_')
- symbolId = int(symbolId)
- if symbolId > maxSymId:
- maxSymId = symbolId
- threadIdSet.add(threadId)
-
- df = pd.read_csv(os.path.join(ROOT_PATH, 'fileName.txt'))
- fileNameList = df['pathName'].to_list()
-
- df = pd.read_csv(os.path.join(ROOT_PATH, 'symbolInfo.txt'))
- symbolNameList = df['funcName'].to_list()
- symbolFileIdList = df['fileId'].to_list()
- symIdInFile = df['symIdInFile'].to_list()
-
- print('Deploying tasks to pool')
- # for symId in range(maxSymId):
- # saveSinglePlot(ROOT_PATH, symbolNameList, threadIdSet, symId)
- for symId in range(maxSymId):
- res = pool.apply_async(saveSinglePlot, args=[ROOT_PATH, symbolNameList, threadIdSet, symId],
- error_callback=error_callback)
- rltList.append(res)
-
-
-pool = Pool(60)
+ _, threadId = fileName.replace('.bin', '').split('_')
+ with open(os.path.join(ROOT_PATH, fileName), 'rb') as f:
+ symDetailedTimingDesc = ArrayDescriptor()
+ f.readinto(symDetailedTimingDesc)
+ assert (symDetailedTimingDesc.arrayElemSize == 0)
+ assert (symDetailedTimingDesc._magicNum == 167)
+ symbolNum = symDetailedTimingDesc.arraySize
+ threadSymInfo[threadId] = symbolNum
+
+ for symId in range(symbolNum):
+ res = pool.apply_async(saveSinglePlot, args=[ROOT_PATH, recInfo.symbolNameList, threadId, symId],
+ error_callback=error_callback)
+ rltList.append(res)
+
+ return rltList
+
+
+pool = Pool(1)
rltList = []
-for i in [
- 'scalerdata_6364935512299934',
- 'scalerdata_6364979105953714',
- 'scalerdata_6365014036860570',
- 'scalerdata_6365088124846144',
- 'scalerdata_6365123879328866',
- 'scalerdata_6365618607468352',
- 'scalerdata_6365739459778370',
- 'scalerdata_6365776935349298',
- 'scalerdata_6365841128804326',
- 'scalerdata_6366139523773026',
- 'scalerdata_6366165053302622'
-]:
- ROOT_PATH = '/media/umass/datasystem/steven/Downloads/CurStrategy1/' + i
+for ROOT_PATH in ['/tmp/scalerdata_14676207526291652']:
doIt(ROOT_PATH, pool, rltList)
+
pool.close()
while len(rltList) > 0:
time.sleep(2)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeInvocation.py b/Analyzer/PyVisualizer/src/V3/AnalyzeInvocation.py
new file mode 100644
index 00000000..e38a8cbd
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeInvocation.py
@@ -0,0 +1,239 @@
+import os
+import traceback
+from collections import defaultdict
+
+from matplotlib import cm
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+from Analyzer.PyVisualizer.src.V3.util.Quantile import calcQuantile
+from datastructure.TimingStruct import ArrayDescriptor
+from util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from util.Parser.TimeOutputPrarser import readSymbolFiles, aggregatePerThreadArray, readTimingStruct
+import numpy as np
+
+
+def calcInvokedApiNum(scalerDataFolder, recInfo):
+ invokedAPIs = []
+ totalAPIs = []
+ for threadId in recInfo.threadIdList:
+ curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+ curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
+ invokedAPIs.append(len(curThreadInvokedRecArray))
+ totalAPIs.append(len(curThreadRecArray))
+ return invokedAPIs, totalAPIs
+
+
+def calcInvokedApiCNT(scalerDataFolder, recInfo):
+ invokedAPICnts = []
+ for threadId in recInfo.threadIdList:
+ curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+ invokedAPICnts.append(np.sum([rec.count for rec in curThreadRecArray]))
+ return invokedAPICnts
+
+
+def printInvocNumberByEachThread(scalerDataFolder):
+ if scalerDataFolder is None:
+ print()
+ return
+
+ recInfo = readSymbolFiles(scalerDataFolder)
+ invokedAPIs, totalAPIs = calcInvokedApiNum(scalerDataFolder, recInfo)
+
+ totalInvocationCnts = 0
+
+ for threadId in recInfo.threadIdList:
+ curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+ # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
+
+ times = np.array([rec.count for rec in curThreadRecArray])
+ totalInvocationCnts += np.sum(times)
+
+ minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(invokedAPIs)
+
+ print(scalerDataFolder.split('/')[-3], minimum, q1, q2, q3, q4, iqr, qLower, qUpper, len(invokedAPIs), totalAPIs[0],
+ totalInvocationCnts, sep='\t')
+
+
+def printInvocCntByEachThread(scalerDataFolder):
+ if scalerDataFolder is None:
+ print()
+ return
+
+ recInfo = readSymbolFiles(scalerDataFolder)
+ invokedAPIs = calcInvokedApiCNT(scalerDataFolder, recInfo)
+
+ totalInvocationCnts = 0
+
+ for threadId in recInfo.threadIdList:
+ curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+ # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
+
+ times = np.array([rec.count for rec in curThreadRecArray])
+ totalInvocationCnts += np.sum(times)
+
+ minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(invokedAPIs)
+
+ print(scalerDataFolder.split('/')[-3], minimum, q1, q2, q3, q4, iqr, qLower, qUpper, totalInvocationCnts, sep='\t')
+
+
+def printInvocCnt(scalerDataFolder):
+ if scalerDataFolder is None:
+ print()
+ return
+
+ recInfo = readSymbolFiles(scalerDataFolder)
+
+ totalCountArr = None
+ for threadId in recInfo.threadIdList:
+ curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+ # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
+
+ times = np.array([rec.count for rec in curThreadRecArray])
+ if totalCountArr is None:
+ totalCountArr = times
+ else:
+ totalCountArr = np.vstack([totalCountArr, times])
+
+ if len(totalCountArr.shape)==2:
+ # if totalCountArr.shape
+ totalCountArr = np.array(totalCountArr).sum(axis=0)
+
+
+ totalCountArr1 = totalCountArr[np.where(totalCountArr > 0)]
+
+ minima, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(totalCountArr1)
+ print(scalerDataFolder.split('/')[-3], minima, q1, q2, q3, q4, iqr, qLower, qUpper, np.sum(totalCountArr1), sep='\t')
+
+
+class APIInfo:
+ def __init__(self):
+ self.name = None
+ self.cntSum = []
+ self.timeAvg = []
+ self.timeVa = []
+ self.timeAvgDenoise = None
+ self.timeVarDenoise = None
+
+
+def printPerAPIInfoAndCnts(scalerDataFolder):
+ if scalerDataFolder is None:
+ print()
+ return
+ print(scalerDataFolder.split('/')[-3])
+ recInfo = readSymbolFiles(scalerDataFolder)
+
+ totalCountArr = None
+ totalVarianceArry = None
+ for threadId in recInfo.threadIdList:
+ curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+
+ counts = np.array([rec.count for rec in curThreadRecArray])
+ if totalCountArr is None:
+ totalCountArr = counts
+ else:
+ totalCountArr = np.vstack([totalCountArr, counts])
+ if len(totalCountArr.shape)==2:
+ # if totalCountArr.shape
+ totalCountArr = np.array(totalCountArr).sum(axis=0)
+
+ sortedNameCntTuple = [
+ (i, recInfo.symbolNameList[i], recInfo.symbolFileIdList[i], recInfo.fileNameList[recInfo.symbolFileIdList[i]],
+ recInfo.realFileIdList[i], recInfo.fileNameList[recInfo.realFileIdList[i]], totalCountArr[i]) for i in
+ range(totalCountArr.shape[0]) if
+ totalCountArr[i] > 0]
+
+ for symId, symName, invokerFIleId, invokerFileName, realFileId, realFileName, count in sorted(sortedNameCntTuple,
+ reverse=True,
+ key=lambda x: x[-1]):
+ print(symId, symName, invokerFIleId, invokerFileName.split('/')[-1], realFileId, realFileName.split('/')[-1],
+ count, sep='\t')
+
+
+def printPerLibInfoAndCnts(scalerDataFolder):
+ if scalerDataFolder is None:
+ print()
+ return
+ print(scalerDataFolder.split('/')[-3])
+ recInfo = readSymbolFiles(scalerDataFolder)
+
+ totalCountArr = None
+ totalVarianceArry = None
+ for threadId in recInfo.threadIdList:
+ curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+
+ counts = np.array([rec.count for rec in curThreadRecArray])
+ if totalCountArr is None:
+ totalCountArr = counts
+ else:
+ totalCountArr = np.vstack([totalCountArr, counts])
+ if len(totalCountArr.shape)==2:
+ # if totalCountArr.shape
+ totalCountArr = np.array(totalCountArr).sum(axis=0)
+
+ sortedNameCntTuple = [
+ (i, recInfo.symbolNameList[i], recInfo.symbolFileIdList[i], recInfo.fileNameList[recInfo.symbolFileIdList[i]],
+ recInfo.realFileIdList[i], recInfo.fileNameList[recInfo.realFileIdList[i]], totalCountArr[i]) for i in
+ range(totalCountArr.shape[0]) if
+ totalCountArr[i] > 0]
+
+ libFileDict = defaultdict(int)
+
+ for symId, symName, invokerFIleId, invokerFileName, realFileId, realFileName, count in sorted(sortedNameCntTuple,
+ reverse=True,
+ key=lambda x: x[-1]):
+ libFileDict[realFileName] += count
+
+ countList = list(libFileDict.items())
+ countList = sorted(countList, key=lambda x: x[0])
+ for name, count in countList:
+ print(name, count, sep='\t')
+
+
+# steven@masslabserv1:~/Downloads/DistributionAnalysis$ find . -name "scalerdata*"
+scalerDataFolders = [
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DETAIL-Artifects/scalerdata_1098304170115468',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Scaler-DETAIL-Artifects/scalerdata_1098358945736648',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Scaler-DETAIL-Artifects/scalerdata_1098456760642602',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Scaler-DETAIL-Artifects/scalerdata_1098609218045480',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Scaler-DETAIL-Artifects/scalerdata_1098633517549600',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Scaler-DETAIL-Artifects/scalerdata_1098896081465298',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Scaler-DETAIL-Artifects/scalerdata_1107220283374448',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Scaler-DETAIL-Artifects/scalerdata_1099112583217984',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Scaler-DETAIL-Artifects/scalerdata_1099182362951380',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Scaler-DETAIL-Artifects/scalerdata_1099698097420154',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Scaler-DETAIL-Artifects/scalerdata_1099956187630596',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.vips_0/Scaler-DETAIL-Artifects/scalerdata_1100002948879490',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.x264_0/Scaler-DETAIL-Artifects/scalerdata_1100026471754668',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.httpd.httpd_2_4_54_0/Scaler-DETAIL-Artifects/scalerdata_1100053088973938',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.memcached.memcached_1_6_17_0/Scaler-DETAIL-Artifects/scalerdata_1100215829624386',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.mysql.mysql_8_0_31_0/Scaler-DETAIL-Artifects/scalerdata_1100366950684482',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.nginx.nginx_1_23_2_0/Scaler-DETAIL-Artifects/scalerdata_1100701228337962',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.redis.redis_7_0_4_0/Scaler-DETAIL-Artifects/scalerdata_1100850174945384',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.sqlite.sqlite_3_39_4_0/Scaler-DETAIL-Artifects/scalerdata_1101150204449494'
+]
+
+print('Thread inovked API #')
+for scalerDataFolder in scalerDataFolders:
+ printInvocNumberByEachThread(scalerDataFolder)
+
+print('Thread inovked API CNT')
+for scalerDataFolder in scalerDataFolders:
+ printInvocCntByEachThread(scalerDataFolder)
+
+print('API invocation CNT Analysis')
+for scalerDataFolder in scalerDataFolders:
+ printInvocCnt(scalerDataFolder)
+
+print('Per-API infos')
+for scalerDataFolder in scalerDataFolders:
+ printPerAPIInfoAndCnts(scalerDataFolder)
+
+print('Per-Lib infos')
+for scalerDataFolder in scalerDataFolders:
+ printPerLibInfoAndCnts(scalerDataFolder)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeOutlierRemovalTechnique.py b/Analyzer/PyVisualizer/src/V3/AnalyzeOutlierRemovalTechnique.py
new file mode 100644
index 00000000..4e4a6661
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeOutlierRemovalTechnique.py
@@ -0,0 +1,180 @@
+import math
+import os
+import traceback
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+import math
+
+from Analyzer.PyVisualizer.src.V3.datastructure.TimingStruct import ArrayDescriptor
+from Analyzer.PyVisualizer.src.V3.util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from Analyzer.PyVisualizer.src.V3.util.Parser.TimeOutputPrarser import readSymbolFiles, readTimingStruct
+from Analyzer.PyVisualizer.src.V3.util.Quantile import calcQuantile
+
+
+class APIMetric:
+ def __init__(self):
+ self.estimated = False
+ self.estimatedSum = 0
+ self.realSum = 0
+ self.invocationCnt = 0
+
+ def getErrorRate(self):
+ assert (self.realSum > 0)
+ return abs(self.realSum - self.estimatedSum) / self.realSum
+
+
+def methodBoundAndNaiveClipping(symId, recInfo, timingArr):
+ def shouldSkip(timingArr):
+ if timingArr.shape[0] <= 500:
+ return False
+ mean = np.average(timingArr[0:500])
+ meanUpperbound = mean * (1 + 0.50)
+ meanLowerbound = mean * (1 - 0.50)
+
+ c1 = np.where(timingArr[500:1000] > meanUpperbound)[0].shape[0]
+ c2 = np.where(timingArr[500:1000] < meanLowerbound)[0].shape[0]
+ return c1 == 0 and c2 == 0
+
+ def removeOutliersByPercentage(x, lowerPerc, upperPerc):
+ a = np.array(x)
+ upper_quartile = np.percentile(a, upperPerc)
+ lower_quartile = np.percentile(a, lowerPerc)
+ return x[np.logical_and((lower_quartile <= x), (x <= upper_quartile))]
+
+ skipped = shouldSkip(timingArr)
+ estimatedSum = np.sum(timingArr)
+
+ if skipped:
+ outlierRemovedTimArray = removeOutliersByPercentage(timingArr[500:1000], 5, 95)
+ estimatedSum = np.average(outlierRemovedTimArray) * timingArr.shape[0]
+
+ return skipped, estimatedSum
+
+
+def methodPreEstimation(symId, recInfo, timingArr):
+ shouldSkip = False
+
+ if timingArr.shape[0] <= 500:
+ shouldSkip = False
+ else:
+ shouldSkip = True
+
+ def removeOutliers(timingArr):
+ rlt = []
+ prevVal = timingArr[0]
+ threshold = 100
+ for i in range(1, timingArr.shape[0]):
+ if abs(timingArr[i] - prevVal) < threshold:
+ rlt.append(timingArr[i])
+ else:
+ rlt.append(prevVal)
+ prevVal = timingArr[i]
+ return np.array(rlt)
+
+ realSum = np.sum(timingArr)
+
+ if not shouldSkip:
+ return shouldSkip, realSum
+
+ # Test Prediction
+ outlierRemovedFirst500 = removeOutliers(timingArr[0:500])
+ estimatedSum = np.mean(outlierRemovedFirst500) * timingArr.shape[0]
+
+ if abs(estimatedSum-realSum)/realSum < 0.02:
+ shouldSkip=True
+ return shouldSkip, estimatedSum
+ else:
+ shouldSkip=False
+ return shouldSkip, realSum
+
+def methodEstimation(symId, recInfo, timingArr):
+ pass
+
+
+def analyzeOutlierRemovalTechnique(ROOT_PATH, methodFunction):
+ if ROOT_PATH is None:
+ print()
+ return
+ allFiles = os.listdir(ROOT_PATH)
+ symbolNum = 0
+
+ allInvocationRelationCnt = 0
+ skippedApiCnt = 0
+ allNonZeroApiCnt = 0
+
+ recInfo = readSymbolFiles(ROOT_PATH)
+ threadSymInfo = dict({}) # Threadid : symbol size
+
+ # print('=====> ', ROOT_PATH)
+ apiMetricsPerApp = []
+ totalAPICount = 0
+
+ totalInvocationCnt = 0
+ for fileName in allFiles:
+ if fileName.startswith('threadDetailedTiming') and fileName.endswith('.bin'):
+ # Read symbol number in threads
+ _, threadId = fileName.replace('.bin', '').split('_')
+
+ detailedTimingArr = parseSingleSymDetailedTiming(ROOT_PATH, threadId, None)
+ recArrForThisThread = readTimingStruct(ROOT_PATH, threadId)
+ totalAPICount = len(detailedTimingArr)
+ apiMetricsPerThread = []
+
+ for symId, timingArr in detailedTimingArr.items():
+ curMetric = APIMetric()
+ curMetric.estimated, curMetric.estimatedSum = methodFunction(symId, recInfo, timingArr)
+ curMetric.realSum = np.sum(timingArr)
+ curMetric.invocationCnt = recArrForThisThread[symId].count
+ totalInvocationCnt += recArrForThisThread[symId].count
+ if curMetric.realSum > 0 and curMetric.getErrorRate() > 0:
+ apiMetricsPerThread.append(curMetric)
+ apiMetricsPerApp.append(curMetric)
+ # if len(apiMetricsPerThread) > 0:
+ # # Print table for estimated value per API
+ # minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(
+ # [apiMetric.getErrorRate() for apiMetric in apiMetricsPerThread])
+ # estimatedNum = np.sum([1 for apiMetric in apiMetricsPerThread if apiMetric.estimated])
+ # totalAPICount = len(detailedTimingArr)
+ # print(fileName, minimum, q1, q2, q3, q4, iqr, qLower, qUpper, estimatedNum, totalAPICount, sep='\t')
+
+ if len(apiMetricsPerApp) > 0:
+ minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(
+ [apiMetric.getErrorRate() for apiMetric in apiMetricsPerApp])
+ estimatedNum = np.sum([1 for apiMetric in apiMetricsPerApp if apiMetric.estimated])
+ skippedApiCnt = np.sum([apiMetric.invocationCnt for apiMetric in apiMetricsPerApp if apiMetric.estimated])
+ print(ROOT_PATH.split('/')[-3], minimum, q1, q2, q3, q4, iqr, qLower, qUpper, estimatedNum, totalAPICount,
+ skippedApiCnt, skippedApiCnt / totalInvocationCnt, totalInvocationCnt,
+ sep='\t')
+ else:
+ print(ROOT_PATH.split('/')[-2])
+
+
+for ROOT_PATH in [
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DETAIL-Artifects/scalerdata_1098304170115468',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Scaler-DETAIL-Artifects/scalerdata_1098358945736648',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Scaler-DETAIL-Artifects/scalerdata_1098456760642602',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Scaler-DETAIL-Artifects/scalerdata_1098609218045480',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Scaler-DETAIL-Artifects/scalerdata_1098633517549600',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Scaler-DETAIL-Artifects/scalerdata_1098896081465298',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Scaler-DETAIL-Artifects/scalerdata_1107220283374448',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Scaler-DETAIL-Artifects/scalerdata_1099112583217984',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Scaler-DETAIL-Artifects/scalerdata_1099182362951380',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Scaler-DETAIL-Artifects/scalerdata_1099698097420154',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Scaler-DETAIL-Artifects/scalerdata_1099956187630596',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.vips_0/Scaler-DETAIL-Artifects/scalerdata_1100002948879490',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.x264_0/Scaler-DETAIL-Artifects/scalerdata_1100026471754668',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.httpd.httpd_2_4_54_0/Scaler-DETAIL-Artifects/scalerdata_1100053088973938',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.memcached.memcached_1_6_17_0/Scaler-DETAIL-Artifects/scalerdata_1100215829624386',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.mysql.mysql_8_0_31_0/Scaler-DETAIL-Artifects/scalerdata_1100366950684482',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.nginx.nginx_1_23_2_0/Scaler-DETAIL-Artifects/scalerdata_1100701228337962',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.redis.redis_7_0_4_0/Scaler-DETAIL-Artifects/scalerdata_1100850174945384',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.sqlite.sqlite_3_39_4_0/Scaler-DETAIL-Artifects/scalerdata_1101150204449494'
+]:
+ analyzeOutlierRemovalTechnique(ROOT_PATH, methodPreEstimation)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeParsecOutput.py b/Analyzer/PyVisualizer/src/V3/AnalyzeParsecOutput.py
new file mode 100644
index 00000000..c8c15acd
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeParsecOutput.py
@@ -0,0 +1,152 @@
+import os
+import numpy as np
+import tempfile
+import subprocess
+
+'''
+This script parses the output of parsecmgmt
+'''
+
+
+def findAppStart(lines):
+ benchmarkName = []
+ outputTuple = []
+ for id, line in enumerate(lines):
+ # Find start
+ if line.startswith('[PARSEC] [========== Running '):
+ benchmarkName.append(line.replace('[PARSEC] [========== Running benchmark', '')
+ .replace(' [1] ==========]', '').strip())
+ outputTuple.append([id, 0])
+
+ curI = 0
+ for id, line in enumerate(lines):
+ # Find start
+ if line.startswith('[PARSEC] [---------- End of output ----------]'):
+ outputTuple[curI][1] = id
+ curI += 1
+ return benchmarkName, outputTuple
+
+
+def extractRealTimeAndMemory(lines, outputTuple):
+ realTimeList = []
+ memList = []
+ for startI, endI in outputTuple:
+ lineSplit = lines[endI - 1].split(',')
+ realTIme = lineSplit[0].split(':')[1].strip()
+ memUsg = lineSplit[-1].split(':')[1].strip()
+
+ realTimeList.append(float(realTIme))
+ memList.append(float(memUsg))
+ return np.array(realTimeList), np.array(memList)
+
+
+def parseOutputFile(PARSEC_OUTPUT_FOLDER):
+ timeArray = None
+ memArray = None
+ benchmarkName = None
+
+ for textFileName in os.listdir(PARSEC_OUTPUT_FOLDER):
+ with open(os.path.join(PARSEC_OUTPUT_FOLDER, textFileName)) as f:
+ assert (str.isdigit(textFileName[:-4])) # Make sure the first id is
+ lines = f.readlines()
+ benchmarkName, outputTuple = findAppStart(lines)
+ realTimeList, memList = extractRealTimeAndMemory(lines, outputTuple)
+ if timeArray is None:
+ timeArray = realTimeList
+ if memArray is None:
+ memArray = memList
+ timeArray = np.vstack([timeArray, realTimeList])
+ memArray = np.vstack([memArray, memList])
+
+ return benchmarkName, memArray, timeArray
+
+
+def printMatrix(matrixName, preloadList, benchmarkNameList, matrix):
+ print(matrixName)
+ print(' \t', end='')
+ for benchmarkName in benchmarkNameList:
+ print(benchmarkName, end='\t')
+ print()
+
+ for row in range(matrix.shape[0]):
+ print(preloadList[row][0], end='\t')
+ for col in range(matrix.shape[1]):
+ print(matrix[row][col], end='\t')
+ print()
+ print()
+
+
+def runBenchmark(runTimes, configNames, preloadList, inputType, threadNum, outputFolder):
+ for (preloadName, preloadCmd) in preloadList:
+ curOutputDir = os.path.join(outputFolder, preloadName)
+ os.mkdir(curOutputDir)
+ for curRunId in range(runTimes):
+ curOutputFile = os.path.join(curOutputDir, '%d.txt' % curRunId)
+
+ cmd = 'parsecmgmt -a run'
+ cmd += ''.join([''.join([' -p ', config]) for config in configNames])
+ if preloadCmd.strip() != '':
+ cmd += ''.join([' -s "', preloadCmd, '"'])
+ cmd += ''.join([' -n ', str(threadNum)])
+ cmd += ''.join([' -i ', inputType])
+ cmd += ''.join([' > ', curOutputFile])
+ cmd += ''.join([' 2> ', curOutputFile])
+
+ print('Running:', cmd)
+ if os.system(cmd) != 0:
+ print(cmd, 'failed')
+
+
+def packOutput(preloadList, outputFolder):
+ stdRealTimes = None
+ avgRealTimes = None
+ avgMem = None
+ stdMem = None
+ outputBenchNameList = None
+ for (preloadName, preloadCmd) in preloadList:
+ curOutputDir = os.path.join(outputFolder, preloadName)
+ '''
+ Calculate mean and average of the results
+ '''
+ outputBenchNameList, memArray, timeArray = parseOutputFile(curOutputDir)
+ if avgRealTimes is None:
+ avgRealTimes = np.average(timeArray, axis=0)
+ else:
+ avgRealTimes = np.vstack([avgRealTimes, np.average(timeArray, axis=0)])
+
+ if stdRealTimes is None:
+ stdRealTimes = np.std(timeArray, axis=0)
+ else:
+ stdRealTimes = np.vstack([stdRealTimes, np.std(timeArray, axis=0)])
+
+ if avgMem is None:
+ avgMem = np.average(memArray, axis=0)
+ else:
+ avgMem = np.vstack([avgMem, np.average(memArray, axis=0)])
+
+ if stdMem is None:
+ stdMem = np.std(memArray, axis=0)
+ else:
+ stdMem = np.vstack([stdMem, np.std(memArray, axis=0)])
+ return avgRealTimes, stdRealTimes, avgMem, stdMem, outputBenchNameList
+
+
+'''
+Find max output id
+'''
+
+TIME_COMMAND = "/usr/bin/time -f 'real:%e, user:%U, sys:%S, memKB:%M' "
+RUNTIMES = 1
+CONFIG_NAMES = ['blackscholes', 'bodytrack', 'facesim', 'ferret', 'fluidanimate', 'freqmine', 'raytrace', 'swaptions',
+ 'vips', 'x264']
+# PRELOAD_LIST = [('Default', TIME_COMMAND), ('Perf', TIME_COMMAND + 'perf record -g -o perf.data')]
+PRELOAD_LIST = [('Scaler', 'export LD_PRELOAD=/media/umass/datasystem/steven/Scaler/cmake-build-release/libHook-c/libScalerHook-HookAutoAsm-C.so')]
+
+outputFolder = tempfile.mkdtemp()
+runBenchmark(RUNTIMES, CONFIG_NAMES, PRELOAD_LIST, 'simsmall', 64, outputFolder)
+
+avgRealTimes, stdRealTimes, avgMem, stdMem, outputBenchNameList = packOutput(PRELOAD_LIST, outputFolder)
+printMatrix('Table of average runtime', PRELOAD_LIST, outputBenchNameList, avgRealTimes)
+printMatrix('Table of std runtime', PRELOAD_LIST, outputBenchNameList, stdRealTimes)
+printMatrix('Table of average memory', PRELOAD_LIST, outputBenchNameList, avgMem)
+printMatrix('Table of std memory', PRELOAD_LIST, outputBenchNameList, stdMem)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzePerfCounts.py b/Analyzer/PyVisualizer/src/V3/AnalyzePerfCounts.py
new file mode 100644
index 00000000..d25df8a0
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzePerfCounts.py
@@ -0,0 +1,108 @@
+import os
+import traceback
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+
+class StackElem:
+ def __init__(self, libName, funcName):
+ self.libName = libName
+ self.funcName = funcName
+
+ def __str__(self):
+ return self.libName + " " + self.funcName
+
+
+def analyzePerfScript(rootPath):
+ callCountDict = defaultdict(int)
+
+ lastStack = []
+ curStack = []
+ with open(rootPath, 'r') as f:
+
+ firstLine = True
+ skipThisStak = False
+ while True:
+
+ if not firstLine:
+ line = f.readline()
+ else:
+ line = '\n'
+ if line == '\n':
+ skipThisStak = False
+ firstLine = False
+ # print('New call stack')
+ tmp = f.readline()
+
+ if not tmp:
+ break
+
+ if tmp.split()[0].strip() == 'perf':
+ skipThisStak = True
+ # Calculate count based on call stack
+ for i in range(len(lastStack)):
+ if i >= len(curStack) or lastStack[i].funcName != curStack[i].funcName:
+ for j in range(i, len(lastStack)):
+ callCountDict[lastStack[j].libName] += 1
+ break
+
+ lastStack = curStack
+ curStack = []
+ if not tmp:
+ break
+ continue
+ else:
+ if not skipThisStak:
+ line = line.strip().strip('\t').strip('\n')
+ addrEndI = line.find(' ')
+ addr = line[0:addrEndI]
+ libStartI = line.rfind(' ') + 1
+ libName = line[libStartI:]
+ funcNameAddr = line[addrEndI:libStartI].strip()
+ if funcNameAddr != '[unknown]' and libName != '([kernel.kallsyms])':
+ # print(libName)
+ plutInd = funcNameAddr.rfind('+')
+ funcName = funcNameAddr[0:plutInd]
+ addr = funcNameAddr[plutInd + 1:]
+ curStack.insert(0, StackElem(libName[1:-1], funcName))
+ return callCountDict
+
+
+for ROOT_PATH in [
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.vips_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.x264_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.db.memcached.memcached_1_6_17_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.db.mysql.mysql_8_0_31_0/Perf-Artifects/script.txt',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.db.redis.redis_7_0_4_0/Perf-Artifects/script.txt',
+ '/media/umass/datasystem/steven/Downloads/Perf-Sampling-At4000/Application.db.sqlite.sqlite_3_39_4_0/Perf-Artifects/script.txt',
+ '/media/umass/datasystem/steven/Downloads/Perf-Sampling-At4000/Application.webserver.httpd.httpd_2_4_54_0/Perf-Artifects/script.txt',
+ '/media/umass/datasystem/steven/Downloads/Perf-Sampling-At4000/Application.webserver.nginx.nginx_1_23_2_0/Perf-Artifects/script.txt',
+ # None,
+ # None,
+ # None,
+ # None,
+]:
+ callDict = analyzePerfScript(ROOT_PATH)
+ print(ROOT_PATH)
+ itemList = list(callDict.items())
+ itemList = sorted(itemList, key=lambda x: x[0])
+ for libName, counts in itemList:
+ print(libName, counts, sep='\t')
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeTiming.py b/Analyzer/PyVisualizer/src/V3/AnalyzeTiming.py
new file mode 100644
index 00000000..edf258d8
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeTiming.py
@@ -0,0 +1,103 @@
+import os
+import traceback
+from collections import defaultdict
+
+from matplotlib import cm
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+from Analyzer.PyVisualizer.src.V3.datastructure.Metric import Metric
+from Analyzer.PyVisualizer.src.V3.util.Quantile import calcQuantile
+from datastructure.TimingStruct import ArrayDescriptor
+from util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from util.Parser.TimeOutputPrarser import readSymbolFiles, aggregatePerThreadArray, readTimingStruct
+import numpy as np
+
+
+def parsePerLibSelfTime(scalerDataFolder):
+ if scalerDataFolder is None:
+ print()
+ return
+ recInfo = readSymbolFiles(scalerDataFolder)
+
+ totalTimingArr = None
+ totalVarianceArry = None
+ for threadId in recInfo.threadIdList:
+ curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+
+ cycles = np.array([rec.totalClockCycles for rec in curThreadRecArray])
+ if totalTimingArr is None:
+ totalTimingArr = cycles
+ else:
+ totalTimingArr = np.vstack([totalTimingArr, cycles])
+
+ if len(totalTimingArr.shape) == 1:
+ totalTimingArr = np.reshape(totalTimingArr, (1, -1))
+
+ sortedNameCntTuple = []
+
+ for i in range(totalTimingArr.shape[1] - 1):
+ if np.sum(totalTimingArr[:, i] > 0):
+ sortedNameCntTuple.append((i, recInfo.symbolNameList[i], recInfo.symbolFileIdList[i],
+ recInfo.fileNameList[recInfo.symbolFileIdList[i]],
+ recInfo.realFileIdList[i], recInfo.fileNameList[recInfo.realFileIdList[i]],
+ np.sum(totalTimingArr[:, i])))
+
+ libFileDict = defaultdict(int)
+
+ libFileDict = defaultdict(int)
+
+ for symId, symName, invokerFIleId, invokerFileName, realFileId, realFileName, time in sorted(sortedNameCntTuple,
+ reverse=True,
+ key=lambda x: x[-1]):
+ libFileDict[realFileName] += time
+
+ # timeList = list(libFileDict.items())
+ # timeList = sorted(timeList, key=lambda x: x[0])
+ #
+ # for name, time in timeList:
+ # print(name, time, sep='\t')
+ return libFileDict
+
+
+def findScalerDataFolder(rootPath):
+ for root, dirs, files in os.walk(rootPath):
+ for dir in dirs:
+ if dir.startswith('scalerdata'):
+ return os.path.join(root, dir)
+
+
+scalerDataFolders = '/media/umass/datasystem/steven/Downloads/accuracyTest/2022-12-08_06-26-18-Sampling-0B111'
+print('Per-Lib infos')
+
+pathDict = defaultdict(list)
+
+for folderName in os.listdir(scalerDataFolders):
+ cache= folderName.split('.')[-1]
+ appName=cache[0:-2]
+ runTime=cache[-1:]
+ scalerDataFolder = findScalerDataFolder(os.path.join(scalerDataFolders, folderName))
+ pathDict[appName].append(scalerDataFolder)
+
+rlt = {}
+for appName, pathList in pathDict.items():
+ rlt[appName] = defaultdict(Metric)
+ for path in pathList:
+ libFileDict = parsePerLibSelfTime(path)
+ for libName, time in libFileDict.items():
+ rlt[appName][libName].append(time)
+
+# Sort Results
+rlt = list(rlt.items())
+rlt = sorted(rlt, key=lambda x: x[0])
+for appName, libraryList in rlt:
+ libraryList = list(libraryList.items())
+ libraryList = sorted(libraryList, key=lambda x: x[0])
+ print(appName)
+ for libraryName, metric in libraryList:
+ print(libraryName, metric.mean(), metric.std(), metric.std() / metric.mean(), sep='\t')
diff --git a/Analyzer/PyVisualizer/src/V3/PlotInvocation.py b/Analyzer/PyVisualizer/src/V3/PlotInvocation.py
new file mode 100644
index 00000000..cac813ce
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/PlotInvocation.py
@@ -0,0 +1,120 @@
+import os
+import shutil
+import traceback
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+from Analyzer.PyVisualizer.src.V3.datastructure.TimingStruct import ArrayDescriptor
+from Analyzer.PyVisualizer.src.V3.util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from Analyzer.PyVisualizer.src.V3.util.Parser.TimeOutputPrarser import readSymbolFiles
+
+
+def shouldPlt(timingArr):
+ return timingArr.shape[0] > 500
+
+
+def saveSinglePlot(ROOT_PATH, symbolNameList, threadId, tgtSymId, graphType='hist'):
+ # hasPoints = False
+ # ROOT_PATH: str, symbolNameList: list, threadId: str, tgtSymIds: list):
+ detailedTimingDict = parseSingleSymDetailedTiming(ROOT_PATH, threadId, [tgtSymId])
+
+ for symId, detailedTimingArr in detailedTimingDict.items():
+ fig, axes = plt.subplots(nrows=1,ncols=2)
+ ax1, ax2 = axes
+
+ if not shouldPlt(detailedTimingArr):
+ continue
+
+ if graphType == 'hist':
+ ax1.hist(detailedTimingArr, range=(1, detailedTimingArr.max()), edgecolor="black", bins=50)
+ # print(os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+ # if hasPoints:
+ elif graphType == 'scatter':
+ ax1.scatter(np.arange(detailedTimingArr.shape[0]), detailedTimingArr, s=10)
+ else:
+ assert (False)
+
+ if not os.path.exists(os.path.join(ROOT_PATH, 'DetailedTime', graphType, symbolNameList[symId])):
+ os.makedirs(os.path.join(ROOT_PATH, 'DetailedTime', graphType, symbolNameList[symId]), exist_ok=True)
+ fig.savefig(
+ os.path.join(ROOT_PATH, 'DetailedTime', graphType, symbolNameList[symId],
+ 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+ # print(os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+ plt.close(fig)
+ return 0
+
+
+def error_callback(e):
+ print('error')
+ print(dir(e), "\n")
+ print("-->{}<--".format(e.__cause__))
+
+
+def doIt(ROOT_PATH, pool, rltList):
+ print('========================', ROOT_PATH)
+ allFiles = os.listdir(ROOT_PATH)
+ symbolNum = 0
+
+ recInfo = readSymbolFiles(ROOT_PATH)
+ threadSymInfo = dict({}) # Threadid : symbol size
+ for fileName in allFiles:
+ if fileName.startswith('threadDetailedTiming') and fileName.endswith('.bin'):
+ _, threadId = fileName.replace('.bin', '').split('_')
+ with open(os.path.join(ROOT_PATH, fileName), 'rb') as f:
+ symDetailedTimingDesc = ArrayDescriptor()
+ f.readinto(symDetailedTimingDesc)
+ assert (symDetailedTimingDesc.arrayElemSize == 0)
+ assert (symDetailedTimingDesc._magicNum == 167)
+ symbolNum = symDetailedTimingDesc.arraySize
+ threadSymInfo[threadId] = symbolNum
+
+ for symId in range(symbolNum):
+ res = pool.apply_async(saveSinglePlot,
+ args=[ROOT_PATH, recInfo.symbolNameList, threadId, symId, 'scatter'],
+ error_callback=error_callback)
+ rltList.append(res)
+
+ return rltList
+
+
+pool = Pool(64)
+rltList = []
+scalerDataFolders = [
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DETAIL-Artifects/scalerdata_1098304170115468',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Scaler-DETAIL-Artifects/scalerdata_1098358945736648',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Scaler-DETAIL-Artifects/scalerdata_1098456760642602',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Scaler-DETAIL-Artifects/scalerdata_1098609218045480',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Scaler-DETAIL-Artifects/scalerdata_1098633517549600',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Scaler-DETAIL-Artifects/scalerdata_1098896081465298',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Scaler-DETAIL-Artifects/scalerdata_1107220283374448',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Scaler-DETAIL-Artifects/scalerdata_1099112583217984',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Scaler-DETAIL-Artifects/scalerdata_1099182362951380',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Scaler-DETAIL-Artifects/scalerdata_1099698097420154',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Scaler-DETAIL-Artifects/scalerdata_1099956187630596',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.vips_0/Scaler-DETAIL-Artifects/scalerdata_1100002948879490',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.x264_0/Scaler-DETAIL-Artifects/scalerdata_1100026471754668',
+ # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.httpd.httpd_2_4_54_0/Scaler-DETAIL-Artifects/scalerdata_1100053088973938',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.memcached.memcached_1_6_17_0/Scaler-DETAIL-Artifects/scalerdata_1100215829624386',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.mysql.mysql_8_0_31_0/Scaler-DETAIL-Artifects/scalerdata_1100366950684482',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.nginx.nginx_1_23_2_0/Scaler-DETAIL-Artifects/scalerdata_1100701228337962',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.redis.redis_7_0_4_0/Scaler-DETAIL-Artifects/scalerdata_1100850174945384',
+ '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.sqlite.sqlite_3_39_4_0/Scaler-DETAIL-Artifects/scalerdata_1101150204449494'
+]
+
+for ROOT_PATH in scalerDataFolders:
+ if ROOT_PATH is not None:
+ recInfo = readSymbolFiles(ROOT_PATH)
+ doIt(ROOT_PATH, pool, rltList)
+
+pool.close()
+while len(rltList) > 0:
+ time.sleep(2)
+ rltList = [rlt for rlt in rltList if not rlt.ready()]
+ print("%d jobs left" % len(rltList))
+pool.join()
diff --git a/Analyzer/PyVisualizer/src/V3/main.py b/Analyzer/PyVisualizer/src/V3/XFAVisualization.py
similarity index 56%
rename from Analyzer/PyVisualizer/src/V3/main.py
rename to Analyzer/PyVisualizer/src/V3/XFAVisualization.py
index 39ac125a..536019d3 100644
--- a/Analyzer/PyVisualizer/src/V3/main.py
+++ b/Analyzer/PyVisualizer/src/V3/XFAVisualization.py
@@ -3,23 +3,36 @@
import pandas as pd
import struct
import re
+
+from util.Analyzer.XFA import generateXFAStruct
from datastructure.TimingStruct import FileRecord, RecTuple
-from preProcessing import aggregatePerThreadArray, generateTimingStruct, calcPercentage, readSymbolFiles
+from util.Parser.TimeOutputPrarser import aggregatePerThreadArray, readSymbolFiles
# scalerDataFolder = '/media/umass/datasystem/steven/benchmark/parsec/tests/dedup/scalerdata_30414326191467414'
-scalerDataFolder = '/media/umass/datasystem/steven/intel/Perf_Scaler-Parsec-Callgraph-Sig2022Fall/x264/scalerdata_12852017355851478_FGDS'
+scalerDataFolder = '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-12-07_20-11-36-EffImp/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DBG-Artifects/scalerdata_1120018768482198'
recInfo = readSymbolFiles(scalerDataFolder)
realFileId = None
aggregatedTimeArray, aggregatedStartingTime = aggregatePerThreadArray(scalerDataFolder, recInfo)
+
+for i, v in enumerate(aggregatedTimeArray):
+ if v.count > 0:
+ curRealFileId=recInfo.realFileIdList[i]
+ if curRealFileId==len(recInfo.fileNameList):
+ curRealFileId=len(recInfo.fileNameList)-1
+ print(recInfo.symbolNameList[i], recInfo.fileNameList[curRealFileId], v.count, sep='\t')
+
# Generate graph
-timingRecord = generateTimingStruct(list(aggregatedTimeArray), aggregatedStartingTime, recInfo)
+timingRecord = generateXFAStruct(list(aggregatedTimeArray), aggregatedStartingTime, recInfo)
print(timingRecord)
+for time in timingRecord:
+ print(time.fileName,time.selfClockCycles.value,sep='\t')
+
# totalSelfTime = 0
# for fileRec in timingRecord:
# if fileRec.selfClockCycles.value<0:
diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/Metric.py b/Analyzer/PyVisualizer/src/V3/datastructure/Metric.py
new file mode 100644
index 00000000..29bba7a7
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/datastructure/Metric.py
@@ -0,0 +1,16 @@
+import numpy as np
+
+
+class Metric:
+ def __init__(self):
+ self.metricList = []
+
+ def append(self, *args, **kwargs):
+ self.metricList.append(*args, **kwargs)
+
+ def std(self):
+ return np.std(np.array(self.metricList))
+
+ def mean(self):
+ return np.mean(np.array(self.metricList))
+
diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py b/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py
index 16b91306..e909c723 100644
--- a/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py
+++ b/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py
@@ -1,6 +1,10 @@
from collections import defaultdict
from ctypes import *
+'''
+Display related parameter:
+'''
+
class VPG:
def __init__(self):
@@ -63,16 +67,6 @@ def __str__(self):
return str(self.fileName)
-class RecTuple(Structure):
- _fields_ = [
- ('totalClockCycles', c_uint64),
- ('count', c_int64),
- ('_gap', c_uint32),
- ('_meanClockTick', c_float),
- ('_durThreshold', c_uint32),
- ('_flags', c_uint32)]
-
-
class RecordingInfo:
'''
Used to share common information needed in processing between different functions
@@ -86,3 +80,39 @@ def __init__(self):
self.symbolNameList = []
self.symbolFileIdList = []
self.symIdInFileList = []
+
+
+'''
+C data structures
+
+These structs should be consistent with https://github.com/UTSASRG/Scaler/blob/feature-tuneParm/libHook-c/src/include/type/RecTuple.h
+'''
+
+class RecTuple(Structure):
+ _fields_ = [
+ ('totalClockCycles', c_uint64),
+ ('count', c_int64),
+ ('_prevCount', c_int64),
+ ('_gap', c_uint32),
+ ('_meanClockTick', c_float),
+ ('_durThreshold', c_uint32),
+ ('_flags', c_uint32)]
+
+
+class ArrayDescriptor(Structure):
+ _fields_ = [
+ ('arrayElemSize', c_uint64),
+ ('arraySize', c_uint64),
+ ('_magicNum', c_uint8)]
+
+
+class DetailedTimingDescriptor(Structure):
+ _fields_ = [
+ ('timingSize', c_int64)]
+
+
+class ThreadCreatorInfo(Structure):
+ _fields_ = [
+ ('threadCreatorFileId', c_int64),
+ ('threadExecutionCycles', c_int64),
+ ('_magicNum', c_uint8)]
diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/TimingStruct.cpython-36.pyc b/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/TimingStruct.cpython-36.pyc
deleted file mode 100644
index d4d54b81..00000000
Binary files a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/TimingStruct.cpython-36.pyc and /dev/null differ
diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/__init__.cpython-36.pyc b/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 05c6d129..00000000
Binary files a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/Analyzer/PyVisualizer/src/V3/pythonmp.py b/Analyzer/PyVisualizer/src/V3/pythonmp.py
deleted file mode 100644
index c10fc4f9..00000000
--- a/Analyzer/PyVisualizer/src/V3/pythonmp.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import multiprocessing
-
-
-def do():
- print("Foobar", flush=True)
- raise Exception()
-def asdf():
-
- with multiprocessing.Pool(1) as pool:
- for i in range(5):
- result = pool.apply_async(do)
-
- result.get()
-
- pool.close()
- pool.join()
-
-asdf()
\ No newline at end of file
diff --git a/Analyzer/PyVisualizer/src/V3/testBinSearch.py b/Analyzer/PyVisualizer/src/V3/testBinSearch.py
new file mode 100644
index 00000000..b4d9f80b
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/testBinSearch.py
@@ -0,0 +1,30 @@
+A = [1, 2, 3, 4, 4, 5]
+
+
+def binSearch(A, tgt):
+ """
+ Return lower bound of the segment array
+ :param A: Array
+ :param tgt: Target
+ :return: Lower bound of segments in the segment array
+ """
+ lo = 0
+ hi = len(A)
+ md = 0
+ while lo < hi:
+ md = lo + (hi - lo) // 2
+ if A[md] < tgt:
+ lo = md + 1
+ elif A[md] > tgt:
+ hi = md
+ elif A[md] == tgt:
+ hi = md
+
+ return lo-1
+
+
+testList = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5]
+expectedVal = [-1, 0, 1, 2, 4, 5]
+for i in range(len(testList)):
+ lo = binSearch(A, testList[i])
+ print(testList[i], lo)
diff --git a/Analyzer/PyVisualizer/src/V3/preProcessing.py b/Analyzer/PyVisualizer/src/V3/util/Analyzer/XFA.py
similarity index 57%
rename from Analyzer/PyVisualizer/src/V3/preProcessing.py
rename to Analyzer/PyVisualizer/src/V3/util/Analyzer/XFA.py
index 5ad6144d..fd550886 100644
--- a/Analyzer/PyVisualizer/src/V3/preProcessing.py
+++ b/Analyzer/PyVisualizer/src/V3/util/Analyzer/XFA.py
@@ -1,110 +1,7 @@
-import math
-import os
-import re
-import struct
-from datastructure.TimingStruct import FileRecord, RecTuple, RecordingInfo, RecordingInfo
-import pandas as pd
-from collections import defaultdict
-
-
-def readSymbolFiles(scalerDataFolder):
- rlt = RecordingInfo()
-
- df = pd.read_csv(os.path.join(scalerDataFolder, 'fileName.txt'))
- rlt.fileNameList = df['pathName'].to_list()
- rlt.pthreadFileId = parsePthreadId(rlt.fileNameList)
-
- for fileName in os.listdir(scalerDataFolder):
- if fileName.startswith('threadTiming_'):
- rlt.threadIdList.append(fileName[len('threadTiming_'): -4])
-
- with open(os.path.join(scalerDataFolder, 'realFileId.bin'), 'rb') as f:
- byteArr1 = f.read()
- arraySize = struct.unpack_from('Q', byteArr1, 0) # The first element is the array size
- rlt.realFileIdList = list(struct.unpack_from('<%dQ' % (arraySize), byteArr1,
- 8)) # The last id marks the creator thread
- df = pd.read_csv(os.path.join(scalerDataFolder, 'symbolInfo.txt'))
- rlt.symbolNameList = df['funcName'].to_list()
- rlt.symbolFileIdList = df['fileId'].to_list()
- rlt.symIdInFileList = df['symIdInFile'].to_list()
- return rlt
-
-
-def readTimingStruct(threadFileFullPath):
- recDataArr = []
- recTupleSize = 8 + 8 + 4 + 4 + 4 + 4
- with open(threadFileFullPath, 'rb') as f:
- byteArr = f.read()
- mainFileId, recArrSize = struct.unpack_from('qq', byteArr, 0) # 16 bytes
- f.seek(16)
-
- for i in range(recArrSize):
- curRecFormat = RecTuple()
- f.readinto(curRecFormat)
- recDataArr.append(curRecFormat)
- # assert (len(symbolNameList) == recArrSize - 1)
- assert (len(recDataArr) == recArrSize)
- return recDataArr
-
-
-def aggregatePerThreadArray(scalerDataFolder, recInfo: RecordingInfo):
- """
- - Aggregate per-thread timing data into one using simple addition and return as the first return value
- - The last element in each RecTuple records how much time the thread takes to execute so we should not aggregate them
- together. Instead, we collect them into one list and return as the second parameter
-
- :param scalerDataFolder: Scaler output data folder
- :param threadIdList: A list of thread ids
- :return aggregatedTimeArray: Aggregated counting and timing information
- :return startingInfoArray: Information about thread creator. This value is used in time aggregation steps
- """
- api = 0
- fgdsApi = 0
-
- aggregatedTimeArray = []
- aggregatedStartingTime = defaultdict(
- lambda: 0) # Map fileId and starting time. Thread may created by modules other than the main application
- for threadId in recInfo.threadIdList:
- curThreadRecArray = readTimingStruct(os.path.join(scalerDataFolder, 'threadTiming_%s.bin' % threadId))
- aggregatedStartingTime[curThreadRecArray[-1]._flags] += curThreadRecArray[-1].totalClockCycles
- # print(curThreadRecArray[-1].totalClockCycles)
-
- for i, curRec in enumerate(curThreadRecArray[:-1]):
- if curRec._flags & (1 << 0):
- fgdsApi += 1
- api += 1
- # if curRec.count>0:
- # print('totalCount',totalCount,curRec.count)
- if len(curThreadRecArray) != len(aggregatedTimeArray) + 1:
- # First time
- aggregatedTimeArray = curThreadRecArray[:-1].copy()
- else:
- for i, curRec in enumerate(curThreadRecArray[:-1]):
- aggregatedTimeArray[i].count += curRec.count
- # if recInfo.symbolNameList[i] == 'pthread_join':
- # print('Skip pthread_join')
- # continue
-
- if aggregatedTimeArray[i]._flags & (1 << 0):
- # Use mean and count to estimate total clock cycles
- aggregatedTimeArray[i].totalClockCycles += int(curRec.count * curRec._meanClockTick)
- else:
- aggregatedTimeArray[i].totalClockCycles += curRec.totalClockCycles
- print('fgdsapi/api=', round(fgdsApi / api*100,2), 'fgdsCount/TotalCount=', round(fgdsCount / totalCount*100,2), sep='\t')
- return aggregatedTimeArray, aggregatedStartingTime
-
-
-pthreadFileRegex = re.compile(r'libpthread-.*\.so$')
-
-
-def parsePthreadId(fileNameList):
- for i, fileName in enumerate(fileNameList):
- if len(pthreadFileRegex.findall(fileName)) != 0:
- return i
- raise Exception('Cannot find pthread library in fileList')
+from datastructure.TimingStruct import RecordingInfo, FileRecord
-def generateTimingStruct(aggregatedTimeEntries, aggregatedStartingTime, recInfo: RecordingInfo):
+def generateXFAStruct(aggregatedTimeEntries, aggregatedStartingTime, recInfo: RecordingInfo):
timingRecord = [] # Map file name to FileRecord struct
mainFileId = None
@@ -195,7 +92,7 @@ def calcPercentage(timingRecord, programRuntime, totalApiCallCount):
else:
curExtFileRecord.counts.globalPercent = 0.0
- if curFileRecord.childrenClockCycles.value > 0:
+ if curFileRecord.selfClockCycles.value + curFileRecord.childrenClockCycles.value > 0:
curExtFileRecord.totalClockCycles.parentPercent = curExtFileRecord.totalClockCycles.value / (
curFileRecord.selfClockCycles.value + curFileRecord.childrenClockCycles.value)
else:
diff --git a/Analyzer/PyVisualizer/src/V3/util/Parser/DetailedTimeOutputPrarser.py b/Analyzer/PyVisualizer/src/V3/util/Parser/DetailedTimeOutputPrarser.py
new file mode 100644
index 00000000..a87ef2af
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/util/Parser/DetailedTimeOutputPrarser.py
@@ -0,0 +1,55 @@
+import os
+import traceback
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+from datastructure.TimingStruct import ArrayDescriptor
+
+
+def parseSingleSymDetailedTiming(ROOT_PATH: str, threadId: str, tgtSymIds: list):
+ # hasPoints = False
+ if tgtSymIds:
+ tgtSymIds = sorted(tgtSymIds)
+
+ rlt = {} # SymId, timingArray
+ with open(os.path.join(ROOT_PATH, 'threadDetailedTiming_%s.bin' % (threadId)), 'rb') as f:
+ # Get the amount of symbols in this file
+ arrayDesc = ArrayDescriptor()
+ f.readinto(arrayDesc)
+ assert (arrayDesc.arrayElemSize == 0)
+ assert (arrayDesc._magicNum == 167)
+
+ if tgtSymIds is None:
+ tgtSymIds = range(arrayDesc.arraySize)
+ assert (tgtSymIds[-1] < arrayDesc.arraySize)
+
+ detailedTimingForCurSym = None
+ for curSymId in range(arrayDesc.arraySize):
+ symDetailedTimingDesc = ArrayDescriptor()
+ f.readinto(symDetailedTimingDesc)
+
+ assert (symDetailedTimingDesc.arrayElemSize == 8)
+ assert (symDetailedTimingDesc._magicNum == 167)
+ if curSymId < tgtSymIds[0]:
+ # Only read specified symbol
+ f.seek(symDetailedTimingDesc.arraySize * symDetailedTimingDesc.arrayElemSize, os.SEEK_CUR)
+ continue
+ elif curSymId == tgtSymIds[0]:
+ detailedTimingForCurSym = np.array(struct.unpack_from('<%dQ' % (symDetailedTimingDesc.arraySize),
+ f.read(symDetailedTimingDesc.arraySize *
+ symDetailedTimingDesc.arrayElemSize)))
+ rlt[curSymId] = detailedTimingForCurSym
+ tgtSymIds = tgtSymIds[1:]
+ if len(tgtSymIds) == 0:
+ break
+ else:
+ # There are duplicate or negative values inside tgtSymIds
+ assert (False)
+ # assert (len(tgtSymIds) == 0)
+ return rlt
diff --git a/Analyzer/PyVisualizer/src/V3/util/Parser/TimeOutputPrarser.py b/Analyzer/PyVisualizer/src/V3/util/Parser/TimeOutputPrarser.py
new file mode 100644
index 00000000..eb8d1bb6
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/util/Parser/TimeOutputPrarser.py
@@ -0,0 +1,120 @@
+import math
+import os
+import re
+import struct
+from datastructure.TimingStruct import FileRecord, RecTuple, RecordingInfo, RecordingInfo, ArrayDescriptor, \
+ ThreadCreatorInfo
+import pandas as pd
+from collections import defaultdict
+
+
+def readSymbolFiles(scalerDataFolder):
+ if scalerDataFolder is None:
+ print()
+ return
+ rlt = RecordingInfo()
+
+ df = pd.read_csv(os.path.join(scalerDataFolder, 'fileName.txt'))
+ rlt.fileNameList = df['pathName'].to_list()
+ rlt.pthreadFileId = parsePthreadId(rlt.fileNameList)
+
+ for fileName in os.listdir(scalerDataFolder):
+ if fileName.startswith('threadTiming_'):
+ rlt.threadIdList.append(fileName[len('threadTiming_'): -4])
+
+ with open(os.path.join(scalerDataFolder, 'realFileId.bin'), 'rb') as f:
+ arrDesc = ArrayDescriptor()
+ f.readinto(arrDesc)
+ assert (arrDesc._magicNum == 167)
+ assert (arrDesc.arrayElemSize == 8)
+ rlt.realFileIdList = list(
+ struct.unpack_from('<%dQ' % (arrDesc.arraySize), f.read(arrDesc.arrayElemSize * arrDesc.arraySize)))
+ assert (f.read() == b'') # Make sure this is the end
+ df = pd.read_csv(os.path.join(scalerDataFolder, 'symbolInfo.txt'))
+ rlt.symbolNameList = df['funcName'].to_list()
+ rlt.symbolFileIdList = df['fileId'].to_list()
+ rlt.symIdInFileList = df['symIdInFile'].to_list()
+ return rlt
+
+
+def readTimingStruct(ROOT_PATH,threadId):
+ recDataArr = []
+
+ with open(os.path.join(ROOT_PATH, 'threadTiming_%s.bin' % threadId), 'rb') as f:
+ threadCreatorInfo = ThreadCreatorInfo()
+ f.readinto(threadCreatorInfo)
+ assert (threadCreatorInfo._magicNum == 167)
+
+ arrayDescriptor = ArrayDescriptor()
+ f.readinto(arrayDescriptor)
+ assert (arrayDescriptor._magicNum == 167)
+
+ for i in range(arrayDescriptor.arraySize):
+ curRecFormat = RecTuple()
+ f.readinto(curRecFormat)
+ recDataArr.append(curRecFormat)
+
+ # assert (len(symbolNameList) == recArrSize - 1)
+ return recDataArr
+
+
+def aggregatePerThreadArray(scalerDataFolder, recInfo: RecordingInfo):
+ """
+ - Aggregate per-thread timing data into one using simple addition and return as the first return value
+ - The last element in each RecTuple records how much time the thread takes to execute so we should not aggregate them
+ together. Instead, we collect them into one list and return as the second parameter
+
+ :param scalerDataFolder: Scaler output data folder
+ :param threadIdList: A list of thread ids
+ :return aggregatedTimeArray: Aggregated counting and timing information
+ :return startingInfoArray: Information about thread creator. This value is used in time aggregation steps
+ """
+ api = 0
+ fgdsApi = 0
+ fgdsCount = 0
+ totalCount = 0
+
+ aggregatedTimeArray = []
+ aggregatedStartingTime = defaultdict(
+ lambda: 0) # Map fileId and starting time. Thread may created by modules other than the main application
+ for threadId in recInfo.threadIdList:
+ curThreadRecArray = readTimingStruct(scalerDataFolder,threadId)
+ aggregatedStartingTime[curThreadRecArray[-1]._flags] += curThreadRecArray[-1].totalClockCycles
+ # print(curThreadRecArray[-1].totalClockCycles)
+
+ for i, curRec in enumerate(curThreadRecArray[:-1]):
+ if curRec._flags & (1 << 0):
+ fgdsApi += 1
+ fgdsCount += curRec.count
+ api += 1
+ totalCount += curRec.count
+ # if curRec.count>0:
+ # print('totalCount',totalCount,curRec.count)
+ if len(curThreadRecArray) != len(aggregatedTimeArray) + 1:
+ # First time
+ aggregatedTimeArray = curThreadRecArray[:-1].copy()
+ else:
+ for i, curRec in enumerate(curThreadRecArray[:-1]):
+ aggregatedTimeArray[i].count += curRec.count
+ # if recInfo.symbolNameList[i] == 'pthread_join':
+ # print('Skip pthread_join')
+ # continue
+
+ if aggregatedTimeArray[i]._flags & (1 << 0):
+ # Use mean and count to estimate total clock cycles
+ aggregatedTimeArray[i].totalClockCycles += int(curRec.count * curRec._meanClockTick)
+ else:
+ aggregatedTimeArray[i].totalClockCycles += curRec.totalClockCycles
+ print('fgdsapi/api=', round(fgdsApi / api * 100, 2), 'fgdsCount/TotalCount=',
+ round(fgdsCount / totalCount * 100, 2), sep='\t')
+ return aggregatedTimeArray, aggregatedStartingTime
+
+
+pthreadFileRegex = re.compile(r'libpthread-.*\.so$')
+
+
+def parsePthreadId(fileNameList):
+ for i, fileName in enumerate(fileNameList):
+ if len(pthreadFileRegex.findall(fileName)) != 0:
+ return i
+ raise Exception('Cannot find pthread library in fileList')
diff --git a/Analyzer/PyVisualizer/src/V3/util/Quantile.py b/Analyzer/PyVisualizer/src/V3/util/Quantile.py
new file mode 100644
index 00000000..0c0d11a6
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/util/Quantile.py
@@ -0,0 +1,13 @@
+import numpy as np
+
+
+def calcQuantile(array):
+ min = np.min(array)
+ q1 = np.quantile(array, 0.25)
+ q2 = np.quantile(array, 0.5)
+ q3 = np.quantile(array, 0.75)
+ q4 = np.quantile(array, 1)
+ iqr = q3 - q1
+ qLower = q1 - 1.5 * iqr
+ qUpper = q3 + 1.5 * iqr
+ return min, q1, q2, q3, q4, iqr, qLower, qUpper
diff --git a/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl b/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl
index f6427d07..336e6c38 100644
--- a/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl
+++ b/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl
@@ -62,7 +62,6 @@
#
# 02-Mar-2012 Brendan Gregg Created this.
# 02-Jul-2014 " " Added process name to stacks.
-# 08-Jun-2021 John Diep edited script to force shared object names to be reported always
use strict;
use Getopt::Long;
@@ -89,46 +88,83 @@ sub remember_stack {
my $show_inline = 0;
my $show_context = 0;
+
+my $srcline_in_input = 0; # if there are extra lines with source location (perf script -F+srcline)
GetOptions('inline' => \$show_inline,
- 'context' => \$show_context,
- 'pid' => \$include_pid,
- 'kernel' => \$annotate_kernel,
- 'jit' => \$annotate_jit,
- 'all' => \$annotate_all,
- 'tid' => \$include_tid,
- 'addrs' => \$include_addrs,
- 'event-filter=s' => \$event_filter)
+'context' => \$show_context,
+'srcline' => \$srcline_in_input,
+'pid' => \$include_pid,
+'kernel' => \$annotate_kernel,
+'jit' => \$annotate_jit,
+'all' => \$annotate_all,
+'tid' => \$include_tid,
+'addrs' => \$include_addrs,
+'event-filter=s' => \$event_filter)
or die < outfile\n
- --pid # include PID with process names [1]
- --tid # include TID and PID with process names [1]
- --inline # un-inline using addr2line
- --all # all annotations (--kernel --jit)
- --kernel # annotate kernel functions with a _[k]
- --jit # annotate jit functions with a _[j]
- --context # adds source context to --inline
- --addrs # include raw addresses where symbols can't be found
- --event-filter=EVENT # event name filter\n
+--pid # include PID with process names [1]
+--tid # include TID and PID with process names [1]
+--inline # un-inline using addr2line
+--all # all annotations (--kernel --jit)
+--kernel # annotate kernel functions with a _[k]
+--jit # annotate jit functions with a _[j]
+--context # adds source context to --inline
+--srcline # parses output of 'perf script -F+srcline' and adds source context
+--addrs # include raw addresses where symbols can't be found
+--event-filter=EVENT # event name filter\n
[1] perf script must emit both PID and TIDs for these to work; eg, Linux < 4.1:
- perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace
- for Linux >= 4.1:
- perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace
- If you save this output add --header on Linux >= 3.14 to include perf info.
+perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace
+for Linux >= 4.1:
+perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace
+If you save this output add --header on Linux >= 3.14 to include perf info.
USAGE_END
if ($annotate_all) {
- $annotate_kernel = $annotate_jit = 1;
+$annotate_kernel = $annotate_jit = 1;
+}
+
+my %inlineCache;
+
+my %nmCache;
+
+sub inlineCacheAdd {
+ my ($pc, $mod, $result) = @_;
+ if (defined($inlineCache{$pc})) {
+ $inlineCache{$pc}{$mod} = $result;
+ } else {
+ $inlineCache{$pc} = {$mod => $result};
+ }
}
# for the --inline option
sub inline {
- my ($pc, $mod) = @_;
+my ($pc, $rawfunc, $mod) = @_;
- # capture addr2line output
- my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`;
+return $inlineCache{$pc}{$mod} if defined($inlineCache{$pc}{$mod});
- # remove first line
- $a2l_output =~ s/^(.*\n){1}//;
+# capture addr2line output
+my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`;
+
+# remove first line
+$a2l_output =~ s/^(.*\n){1}//;
+
+if ($a2l_output =~ /\?\?\n\?\?:0/) {
+# if addr2line fails and rawfunc is func+offset, then fall back to it
+if ($rawfunc =~ /^(.+)\+0x([0-9a-f]+)$/) {
+ my $func = $1;
+ my $addr = hex $2;
+
+ $nmCache{$mod}=`nm $mod` unless defined $nmCache{$mod};
+
+ if ($nmCache{$mod} =~ /^([0-9a-f]+) . \Q$func\E$/m) {
+ my $base = hex $1;
+ my $newPc = sprintf "0x%x", $base+$addr;
+ my $result = inline($newPc, '', $mod);
+ inlineCacheAdd($pc, $mod, $result);
+ return $result;
+ }
+ }
+ }
my @fullfunc;
my $one_item = "";
@@ -150,13 +186,18 @@ sub inline {
}
}
- return join(";", @fullfunc);
+ my $result = join ";" , @fullfunc;
+
+ inlineCacheAdd($pc, $mod, $result);
+
+ return $result;
}
my @stack;
my $pname;
my $m_pid;
my $m_tid;
+my $m_period;
#
# Main loop
@@ -192,7 +233,7 @@ sub inline {
unshift @stack, "";
}
}
- remember_stack(join(";", @stack), 1) if @stack;
+ remember_stack(join(";", @stack), $m_period) if @stack;
undef @stack;
undef $pname;
next;
@@ -203,21 +244,22 @@ sub inline {
#
if (/^(\S.+?)\s+(\d+)\/*(\d+)*\s+/) {
# default "perf script" output has TID but not PID
- # eg, "java 25607 4794564.109216: cycles:"
- # eg, "java 12688 [002] 6544038.708352: cpu-clock:"
- # eg, "V8 WorkerThread 25607 4794564.109216: cycles:"
- # eg, "java 24636/25607 [000] 4794564.109216: cycles:"
- # eg, "java 12688/12764 6544038.708352: cpu-clock:"
- # eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: cycles:"
+ # eg, "java 25607 4794564.109216: 1 cycles:"
+ # eg, "java 12688 [002] 6544038.708352: 235 cpu-clock:"
+ # eg, "V8 WorkerThread 25607 4794564.109216: 104345 cycles:"
+ # eg, "java 24636/25607 [000] 4794564.109216: 1 cycles:"
+ # eg, "java 12688/12764 6544038.708352: 10309278 cpu-clock:"
+ # eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: 100 cycles:"
# other combinations possible
- my ($comm, $pid, $tid) = ($1, $2, $3);
+ my ($comm, $pid, $tid, $period) = ($1, $2, $3, "");
if (not $tid) {
$tid = $pid;
$pid = "?";
}
- if (/(\S+):\s*$/) {
- my $event = $1;
+ if (/:\s*(\d+)*\s+(\S+):\s*$/) {
+ $period = $1;
+ my $event = $2;
if ($event_filter eq "") {
# By default only show events of the first encountered
@@ -237,7 +279,10 @@ sub inline {
}
}
- ($m_pid, $m_tid) = ($pid, $tid);
+ if (not $period) {
+ $period = 1
+ }
+ ($m_pid, $m_tid, $m_period) = ($pid, $tid, $period);
if ($include_tid) {
$pname = "$comm-$m_pid/$m_tid";
@@ -257,18 +302,25 @@ sub inline {
my ($pc, $rawfunc, $mod) = ($1, $2, $3);
+ if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) {
+ my $inlineRes = inline($pc, $rawfunc, $mod);
+ # - empty result this happens e.g., when $mod does not exist or is a path to a compressed kernel module
+ # if this happens, the user will see error message from addr2line written to stderr
+ # - if addr2line results in "??" , then it's much more sane to fall back than produce a '??' in graph
+ if($inlineRes ne "" and $inlineRes ne "??" and $inlineRes ne "??:??:0" ) {
+ unshift @stack, $inlineRes;
+ next;
+ }
+ }
+
# Linux 4.8 included symbol offsets in perf script output by default, eg:
# 7fffb84c9afc cpu_startup_entry+0x800047c022ec ([kernel.kallsyms])
# strip these off:
$rawfunc =~ s/\+0x[\da-f]+$//;
- if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) {
- unshift @stack, inline($pc, $mod);
- next;
- }
-
next if $rawfunc =~ /^\(/; # skip process names
+ my $is_unknown=0;
my @inline;
for (split /\->/, $rawfunc) {
my $func = $_;
@@ -279,6 +331,7 @@ sub inline {
$func =~ s/.*\///;
} else {
$func = "unknown";
+ $is_unknown=1;
}
if ($include_addrs) {
@@ -320,7 +373,7 @@ sub inline {
#
# detect inlined from the @inline array
# detect kernel from the module name; eg, frames to parse include:
- # ffffffff8103ce3b native_safe_halt ([kernel.kallsyms])
+ # ffffffff8103ce3b native_safe_halt ([kernel.kallsyms])
# 8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux)
# 7d8 ipv4_conntrack_local+0x7f8f80b8 ([nf_conntrack_ipv4])
# detect jit from the module name; eg:
@@ -332,15 +385,42 @@ sub inline {
} elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) {
$func .= "_[j]"; # jitted
}
- else {
- if ($mod ne "[unknown]") {
- $mod =~ s/.*\///;
- $mod = "\[$mod\]";
- }
- if ($func ne $mod) {
- $func .= " " . $mod;
- }
+
+ #
+ # Source lines
+ #
+ #
+ # Sample outputs:
+ # | a.out 35081 252436.005167: 667783 cycles:
+ # | 408ebb some_method_name+0x8b (/full/path/to/a.out)
+ # | uniform_int_dist.h:300
+ # | 4069f5 main+0x935 (/full/path/to/a.out)
+ # | file.cpp:137
+ # | 7f6d2148eb25 __libc_start_main+0xd5 (/lib64/libc-2.33.so)
+ # | libc-2.33.so[27b25]
+ #
+ # | a.out 35081 252435.738165: 306459 cycles:
+ # | 7f6d213c2750 [unknown] (/usr/lib64/libkmod.so.2.3.6)
+ # | libkmod.so.2.3.6[6750]
+ #
+ # | a.out 35081 252435.738373: 315813 cycles:
+ # | 7f6d215ca51b __strlen_avx2+0x4b (/lib64/libc-2.33.so)
+ # | libc-2.33.so[16351b]
+ # | 7ffc71ee9580 [unknown] ([unknown])
+ # |
+ #
+ # | a.out 35081 252435.718940: 247984 cycles:
+ # | ffffffff814f9302 up_write+0x32 ([kernel.kallsyms])
+ # | [kernel.kallsyms][ffffffff814f9302]
+ if($srcline_in_input and not $is_unknown){
+ $_ = <>;
+ chomp;
+ s/\[.*?\]//g;
+ s/^\s*//g;
+ s/\s*$//g;
+ $func.=':'.$_ unless $_ eq "";
}
+
push @inline, $func;
}
diff --git a/benchmarktookit b/benchmarktookit
new file mode 160000
index 00000000..c1ff8fb5
--- /dev/null
+++ b/benchmarktookit
@@ -0,0 +1 @@
+Subproject commit c1ff8fb5dcc7e62628a9c5d4fd8e8e858ae8ab00
diff --git a/libHook-c/src/ExtFuncCallHook.cpp b/libHook-c/src/ExtFuncCallHook.cpp
index abd46bbb..c545bb27 100644
--- a/libHook-c/src/ExtFuncCallHook.cpp
+++ b/libHook-c/src/ExtFuncCallHook.cpp
@@ -169,7 +169,8 @@ namespace scaler {
Elf64_Word type;
Elf64_Word bind;
parser.getExtSymbolInfo(i, funcName, bind, type);
- if (!shouldHookThisSymbol(funcName, bind, type, allExtSymbol.getSize())) {
+ ssize_t initialGap = 0;
+ if (!shouldHookThisSymbol(funcName, bind, type, allExtSymbol.getSize(), initialGap)) {
continue;
}
//Get function id from plt entry
@@ -198,7 +199,7 @@ namespace scaler {
newSym->pltEntryAddr = pltEntry;
newSym->pltSecEntryAddr = pltSecEntry;
newSym->pltStubId = pltStubId;
-
+ newSym->initialGap = initialGap;
fprintf(symInfoFile, "%s,%ld,%ld\n", funcName, newSym->fileId, newSym->symIdInFile);
DBG_LOGS(
@@ -212,8 +213,12 @@ namespace scaler {
}
- bool
- ExtFuncCallHook::shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId) {
+ const int SAMPLING_GAP = 0b0;
+
+ bool ExtFuncCallHook::shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId,
+ ssize_t &initialGap) {
+
+ initialGap = 0;
if (bind != STB_GLOBAL || type != STT_FUNC) {
return false;
}
@@ -230,13 +235,33 @@ namespace scaler {
}
if (funcNameLen == 3) {
- if (strncmp(funcName, "oom", 3) == 0) {
+ if (strncmp(funcName, "cos", 3) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "exp", 3) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "log", 3) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "sin", 3) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "oom", 3) == 0) {
return false;
} else if (strncmp(funcName, "err", 3) == 0) {
return false;
}
} else if (funcNameLen == 4) {
- if (strncmp(funcName, "jump", 4) == 0) {
+ if (strncmp(funcName, "cosf", 4) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "expf", 4) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "logf", 4) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "powf", 4) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "sinf", 4) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "sqrtf", 4) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "jump", 4) == 0) {
return false;
} else if (strncmp(funcName, "exit", 4) == 0) {
return false;
@@ -248,7 +273,11 @@ namespace scaler {
return false;
}
} else if (funcNameLen == 5) {
- if (strncmp(funcName, "_exit", 5) == 0) {
+ if (strncmp(funcName, "atan2", 5) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "sqrtf", 5) == 0) {
+ initialGap = SAMPLING_GAP;
+ } else if (strncmp(funcName, "_exit", 5) == 0) {
return false;
} else if (strncmp(funcName, "abort", 5) == 0) {
return false;
@@ -629,7 +658,7 @@ namespace scaler {
uint8_t *tlsOffset = nullptr;
__asm__ __volatile__ (
- "movq 0x2F4CC0(%%rip),%0\n\t"
+ "movq 0x2F5B60(%%rip),%0\n\t"
:"=r" (tlsOffset)
:
:
diff --git a/libHook-c/src/HookContext.cpp b/libHook-c/src/HookContext.cpp
index 41d768fb..0e309e79 100644
--- a/libHook-c/src/HookContext.cpp
+++ b/libHook-c/src/HookContext.cpp
@@ -2,11 +2,13 @@
#include
#include
#include
+#include
extern "C" {
static thread_local DataSaver saverElem;
-HookContext *constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize) {
+HookContext *
+constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize, scaler::Array &allExtSymbol) {
uint8_t *contextHeap = static_cast(mmap(NULL, sizeof(HookContext) +
sizeof(scaler::Array) +
@@ -21,7 +23,15 @@ HookContext *constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize) {
rlt->recArr = new(contextHeap + sizeof(HookContext)) scaler::Array(hookedSymbolSize);
rlt->threadDataSavingLock = reinterpret_cast(contextHeap + sizeof(HookContext) +
sizeof(scaler::Array));
-
+#ifdef INSTR_TIMING
+ detailedTimingVectors = new TIMING_TYPE *[hookedSymbolSize];
+ detailedTimingVectorSize = new TIMING_TYPE[hookedSymbolSize];
+ memset(detailedTimingVectorSize, 0, sizeof(TIMING_TYPE) * hookedSymbolSize);
+ for (ssize_t i = 0; i < hookedSymbolSize; ++i) {
+ detailedTimingVectors[i] = new TIMING_TYPE[TIMING_REC_COUNT];
+ memset(detailedTimingVectors[i], 0, sizeof(TIMING_TYPE) * TIMING_REC_COUNT);
+ }
+#endif
pthread_mutexattr_t Attr;
pthread_mutexattr_init(&Attr);
@@ -31,11 +41,9 @@ HookContext *constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize) {
//Initialize gap to one
for (int i = 0; i < rlt->recArr->getSize(); ++i) {
//number mod 2^n is equivalent to stripping off all but the n lowest-order
- rlt->recArr->internalArr[i].gap = 0; //0b11 if %4, because 4=2^2 Initially time everything
+ rlt->recArr->internalArr[i].gap = allExtSymbol[i].initialGap; //0b11 if %4, because 4=2^2 Initially time everything
rlt->recArr->internalArr[i].count = 0;
}
-
-
// memArrayHeap(1), timingArr(hookedSymbolSize),
// indexPosi(0)
@@ -86,9 +94,9 @@ void __attribute__((used, noinline, optimize(3))) printRecOffset() {
auto m __attribute__((used)) = (uint8_t *) &curContext->recArr->internalArr[0].gap;
printf("\nTLS offset: Check assembly\n"
- "RecArr Offset: 0x%lx\n"
- "Counting Entry Offset: 0x%lx\n"
- "Gap Entry Offset: 0x%lx\n", j - i, l - k, m - k);
+ "RecArr Offset: 0x%lx\n"
+ "Counting Entry Offset: 0x%lx\n"
+ "Gap Entry Offset: 0x%lx\n", j - i, l - k, m - k);
}
@@ -106,19 +114,17 @@ bool initTLS() {
//Put a dummy variable to avoid null checking
//Initialize saving data structure
- curContext = constructContext(
- scaler::ExtFuncCallHook::instance->elfImgInfoMap.getSize(),
- scaler::ExtFuncCallHook::instance->allExtSymbol.getSize() + 1);
+ curContext = constructContext(scaler::ExtFuncCallHook::instance->elfImgInfoMap.getSize(),
+ scaler::ExtFuncCallHook::instance->allExtSymbol.getSize() + 1,
+ scaler::ExtFuncCallHook::instance->allExtSymbol);
//#ifdef PRINT_DBG_LOG
// printRecOffset();
//#endif
- if (!curContext) { fatalError("Failed to allocate memory for Context");
+ if (!curContext) {
+ fatalError("Failed to allocate memory for Context");
return false;
}
-
-
//RuntimeInfo newInfo;
-
return true;
}
@@ -126,10 +132,168 @@ __thread HookContext *curContext __attribute((tls_model("initial-exec")));
__thread uint8_t bypassCHooks __attribute((tls_model("initial-exec"))) = SCALER_FALSE; //Anything that is not SCALER_FALSE should be treated as SCALER_FALSE
+#ifdef INSTR_TIMING
+const int TIMING_REC_COUNT = 20000;
+typedef int64_t TIMING_TYPE;
+__thread TIMING_TYPE **detailedTimingVectors;
+__thread TIMING_TYPE *detailedTimingVectorSize;
+#endif
+
DataSaver::~DataSaver() {
saveData(curContext);
}
+#ifdef INSTR_TIMING
+inline void saveThreadDetailedTiming(std::stringstream &ss, HookContext *curContextPtr) {
+ ss.str("");
+ ss << scaler::ExtFuncCallHook::instance->folderName << "/threadDetailedTiming_" << curContextPtr->threadId
+ << ".bin";
+
+ //Calculate file total size
+
+ ssize_t recordedInvocationCnt = 0;
+
+ for (ssize_t i = 0; i < scaler::ExtFuncCallHook::instance->allExtSymbol.getSize(); ++i) {
+ recordedInvocationCnt += detailedTimingVectorSize[i];
+ }
+
+ int fd;
+ size_t realFileIdSizeInBytes = sizeof(ArrayDescriptor) +
+ sizeof(ArrayDescriptor) * scaler::ExtFuncCallHook::instance->allExtSymbol.getSize()
+ + recordedInvocationCnt * sizeof(TIMING_TYPE);
+
+ uint8_t *fileContentInMem = nullptr;
+ if (!scaler::fOpen4Write(ss.str().c_str(), fd, realFileIdSizeInBytes, fileContentInMem)) {
+ fatalErrorS("Cannot open %s because:%s", ss.str().c_str(), strerror(errno))
+ }
+ uint8_t *_fileContentInMem = fileContentInMem;
+
+ /*Write whole symbol info*/
+ ArrayDescriptor *arrayDescriptor = reinterpret_cast(fileContentInMem);
+ arrayDescriptor->arrayElemSize = 0;
+ arrayDescriptor->arraySize = scaler::ExtFuncCallHook::instance->allExtSymbol.getSize();
+ arrayDescriptor->magicNum = 167;
+ fileContentInMem += sizeof(ArrayDescriptor);
+
+
+ for (ssize_t i = 0; i < scaler::ExtFuncCallHook::instance->allExtSymbol.getSize(); ++i) {
+ /**
+ * Write array descriptor first
+ */
+ ArrayDescriptor *arrayDescriptor = reinterpret_cast(fileContentInMem);
+ arrayDescriptor->arrayElemSize = sizeof(TIMING_TYPE);
+ arrayDescriptor->arraySize = detailedTimingVectorSize[i];
+ arrayDescriptor->magicNum = 167;
+ fileContentInMem += sizeof(ArrayDescriptor);
+
+ /**
+ * Then write detailed timing array
+ */
+ memcpy(fileContentInMem, detailedTimingVectors[i], arrayDescriptor->arraySize * arrayDescriptor->arrayElemSize);
+ fileContentInMem += arrayDescriptor->arraySize * arrayDescriptor->arrayElemSize;
+ }
+ if (!scaler::fClose(fd, realFileIdSizeInBytes, _fileContentInMem)) {
+ fatalErrorS("Cannot close file %s, because %s", ss.str().c_str(), strerror(errno));
+ }
+}
+#endif
+
+
+inline void savePerThreadTimingData(std::stringstream &ss, HookContext *curContextPtr) {
+ ss.str("");
+ ss << scaler::ExtFuncCallHook::instance->folderName << "/threadTiming_" << curContextPtr->threadId << ".bin";
+ //INFO_LOGS("Saving timing data to %s", ss.str().c_str());
+
+ int fd;
+ size_t realFileIdSizeInBytes =
+ sizeof(ThreadCreatorInfo) + sizeof(ArrayDescriptor) + curContextPtr->recArr->getSize() * sizeof(RecTuple);
+ uint8_t *fileContentInMem = nullptr;
+ if (!scaler::fOpen4Write(ss.str().c_str(), fd, realFileIdSizeInBytes, fileContentInMem)) {
+ fatalErrorS("Cannot fopen %s because:%s", ss.str().c_str(), strerror(errno));
+ }
+ uint8_t *_fileContentInMem = fileContentInMem;
+ /**
+ * Record who created the thread
+ */
+ ThreadCreatorInfo *threadCreatorInfo = reinterpret_cast(fileContentInMem);
+ threadCreatorInfo->threadExecutionCycles = curContextPtr->endTImestamp - curContextPtr->startTImestamp;
+ threadCreatorInfo->threadCreatorFileId = curContextPtr->threadCreatorFileId;
+ threadCreatorInfo->magicNum = 167;
+ fileContentInMem += sizeof(ThreadCreatorInfo);
+
+ /**
+ * Record size information about the recorded array
+ */
+ ArrayDescriptor *arrayDescriptor = reinterpret_cast(fileContentInMem);
+ arrayDescriptor->arrayElemSize = sizeof(RecTuple);
+ arrayDescriptor->arraySize = curContextPtr->recArr->getSize();
+ arrayDescriptor->magicNum = 167;
+ fileContentInMem += sizeof(ArrayDescriptor);
+
+
+ /**
+ * Write recording tuple onto the disk
+ */
+ memcpy(fileContentInMem, curContextPtr->recArr->data(),
+ curContextPtr->recArr->getTypeSizeInBytes() * curContextPtr->recArr->getSize());
+
+ if (!scaler::fClose(fd, realFileIdSizeInBytes, _fileContentInMem)) {
+ fatalErrorS("Cannot close file %s, because %s", ss.str().c_str(), strerror(errno));
+ }
+
+ INFO_LOGS("Saving data to %s, %lu", scaler::ExtFuncCallHook::instance->folderName.c_str(), pthread_self());
+}
+
+inline void saveRealFileId(std::stringstream &ss, HookContext *curContextPtr) {
+ ss.str("");
+ ss << scaler::ExtFuncCallHook::instance->folderName << "/realFileId.bin";
+ //The real id of each function is resolved in after hook, so I can only save it in datasaver
+
+ int fd;
+ ssize_t realFileIdSizeInBytes = sizeof(ArrayDescriptor) +
+ (curContextPtr->_this->allExtSymbol.getSize()) * sizeof(uint64_t);
+ uint8_t *fileContentInMem = nullptr;
+ if (!scaler::fOpen4Write(ss.str().c_str(), fd, realFileIdSizeInBytes, fileContentInMem)) {
+ fatalErrorS(
+ "Cannot open %s because:%s", ss.str().c_str(), strerror(errno))
+ }
+ uint8_t *_fileContentInMem = fileContentInMem;
+
+ /**
+ * Write array descriptor first
+ */
+ ArrayDescriptor *arrayDescriptor = reinterpret_cast(fileContentInMem);
+ arrayDescriptor->arrayElemSize = sizeof(uint64_t);
+ arrayDescriptor->arraySize = curContextPtr->_this->allExtSymbol.getSize();
+ arrayDescriptor->magicNum = 167;
+ fileContentInMem += sizeof(ArrayDescriptor);
+
+ uint64_t *realFileIdMem = reinterpret_cast(fileContentInMem);
+ for (int i = 0; i < curContextPtr->_this->allExtSymbol.getSize(); ++i) {
+ realFileIdMem[i] = curContextPtr->_this->pmParser.findExecNameByAddr(
+ *(curContextPtr->_this->allExtSymbol[i].gotEntryAddr));
+ }
+
+ if (!scaler::fClose(fd, realFileIdSizeInBytes, _fileContentInMem)) {
+ fatalErrorS("Cannot close file %s, because %s", ss.str().c_str(), strerror(errno));
+ }
+}
+
+inline void saveDataForAllOtherThread(std::stringstream &ss, HookContext *curContextPtr) {
+ INFO_LOG("Save data of all existing threads");
+ for (int i = 0; i < threadContextMap.getSize(); ++i) {
+ HookContext *threadContext = threadContextMap[i];
+ if (!threadContext->dataSaved) {
+ pthread_mutex_lock(threadContext->threadDataSavingLock);
+ INFO_LOGS("Thread data not saved, save it %d/%zd", i, threadContextMap.getSize());
+ saveData(threadContext);
+ pthread_mutex_unlock(threadContext->threadDataSavingLock);
+ } else {
+ INFO_LOGS("Thread data already saved, skip %d/%zd", i, threadContextMap.getSize());
+ }
+ }
+}
+
void saveData(HookContext *curContextPtr, bool finalize) {
bypassCHooks = SCALER_TRUE;
if (!curContextPtr) {
@@ -146,83 +310,28 @@ void saveData(HookContext *curContextPtr, bool finalize) {
curContextPtr->dataSaved = true;
//Resolve real address
-
if (!curContextPtr->endTImestamp) {
//Not finished succesfully
curContextPtr->endTImestamp = getunixtimestampms();
}
- if (!curContext) { fatalError("curContext is not initialized, won't save anything");
+ if (!curContext) {
+ fatalError("curContext is not initialized, won't save anything");
return;
}
std::stringstream ss;
- ss << scaler::ExtFuncCallHook::instance->folderName << "/threadTiming_" << curContextPtr->threadId << ".bin";
- //INFO_LOGS("Saving timing data to %s", ss.str().c_str());
- FILE *threadDataSaver = fopen(ss.str().c_str(), "wb");
- if (!threadDataSaver) { fatalErrorS("Cannot fopen %s because:%s", ss.str().c_str(),
- strerror(errno));
- }
- //Main application at the end
- curContextPtr->recArr->internalArr[curContextPtr->recArr->getSize() - 1].totalClockCycles =
- curContextPtr->endTImestamp - curContextPtr->startTImestamp;
+#ifdef INSTR_TIMING
+ saveThreadDetailedTiming(ss, curContextPtr);
+#endif
-
- if (fwrite(&curContextPtr->curFileId, sizeof(HookContext::curFileId), 1, threadDataSaver) != 1) { fatalErrorS(
- "Cannot curFileId of %s because:%s", ss.str().c_str(),
- strerror(errno));
- }
-
- int64_t timeEntrySize = curContextPtr->recArr->getSize();
- if (fwrite(&timeEntrySize, sizeof(int64_t), 1, threadDataSaver) != 1) { fatalErrorS(
- "Cannot write timeEntrySize of %s because:%s", ss.str().c_str(),
- strerror(errno));
- }
- if (fwrite(curContextPtr->recArr->data(), curContextPtr->recArr->getTypeSizeInBytes(),
- curContextPtr->recArr->getSize(), threadDataSaver) !=
- curContextPtr->recArr->getSize()) { fatalErrorS("Cannot write timingArr of %s because:%s", ss.str().c_str(),
- strerror(errno));
- }
-
-
- INFO_LOGS("Saving data to %s, %lu", scaler::ExtFuncCallHook::instance->folderName.c_str(), pthread_self());
+ savePerThreadTimingData(ss, curContextPtr);
if (curContextPtr->isMainThread || finalize) {
-// printf("Main thread id is: %lu", curContextPtr->threadId);
- ss.str("");
- ss << scaler::ExtFuncCallHook::instance->folderName << "/realFileId.bin";
- //The real id of each function is resolved in after hook, so I can only save it in datasaver
-
- int fd;
-
- size_t realFileIdSizeInBytes = (curContextPtr->_this->allExtSymbol.getSize() + 1) * sizeof(ssize_t);
- size_t *realFileIdMem = nullptr;
- if (!scaler::fOpen4Write(ss.str().c_str(), fd, realFileIdSizeInBytes, realFileIdMem)) { fatalErrorS(
- "Cannot open %s because:%s", ss.str().c_str(), strerror(errno))
- }
- realFileIdMem[0] = curContextPtr->_this->allExtSymbol.getSize();
- for (int i = 0; i < curContextPtr->_this->allExtSymbol.getSize(); ++i) {
- realFileIdMem[i + 1] = curContextPtr->_this->pmParser.findExecNameByAddr(
- *(curContextPtr->_this->allExtSymbol[i].gotEntryAddr));
- }
- if (!scaler::fClose(fd, realFileIdSizeInBytes, realFileIdMem)) { fatalError("Cannot close file");
- }
-
- INFO_LOG("Save data of all existing threads");
- for (int i = 0; i < threadContextMap.getSize(); ++i) {
- HookContext *threadContext = threadContextMap[i];
- if (!threadContext->dataSaved) {
- pthread_mutex_lock(threadContext->threadDataSavingLock);
- INFO_LOGS("Thread data not saved, save it %d/%zd", i, threadContextMap.getSize());
- saveData(threadContext);
- pthread_mutex_unlock(threadContext->threadDataSavingLock);
- } else {
- INFO_LOGS("Thread data already saved, skip %d/%zd", i, threadContextMap.getSize());
- }
- }
+ saveRealFileId(ss, curContextPtr);
+ saveDataForAllOtherThread(ss, curContextPtr);
}
- fclose(threadDataSaver);
pthread_mutex_unlock(curContextPtr->threadDataSavingLock);
}
diff --git a/libHook-c/src/HookHandlers.cpp b/libHook-c/src/HookHandlers.cpp
index a7006083..431b796c 100644
--- a/libHook-c/src/HookHandlers.cpp
+++ b/libHook-c/src/HookHandlers.cpp
@@ -385,15 +385,9 @@ void *afterHookHandler() {
// int64_t prevClockTick = curContextPtr->hookTuple[curContextPtr->indexPosi].clockTicks;
uint64_t preClockCycle = curContextPtr->hookTuple[curContextPtr->indexPosi].clockCycles;
-// int64_t curClockTick = 0;
- //(((int64_t) hi << 32) | lo) ;
+
int64_t &c = curContextPtr->recArr->internalArr[symbolId].count;
-// if (c < (1 << 10)) {
-// struct tms curTime;
-// clock_t rlt = times(&curTime);
-// curClockTick = curTime.tms_utime + curTime.tms_stime - prevClockTick;
-// printf("Clock Ticks in posthook=%ld\n", curTime.tms_utime + curTime.tms_stime);
-// }
+
--curContextPtr->indexPosi;
assert(curContextPtr->indexPosi >= 1);
@@ -406,47 +400,19 @@ void *afterHookHandler() {
int32_t &clockCycleThreshold = curContextPtr->recArr->internalArr[symbolId].durThreshold;
int64_t clockCyclesDuration = (int64_t) (postHookClockCycles - preClockCycle);
- if (c < (1 << 10)) {
-
- if (c > (1 << 9)) {
- //Calculation phase
- int64_t clockTickDiff = clockCyclesDuration - meanClockCycle;
-
- if (-clockCycleThreshold <= clockTickDiff && clockTickDiff <= clockCycleThreshold) {
-// printf("Skipped\n");
- //Skip this
- setbit(curContextPtr->recArr->internalArr[symbolId].flags, 0);
- }
-// printf("Threshold=%d clockDiff=%ld shouldSkip?=%s\n", clockTickThreshold, clockTickDiff,
-// -clockTickThreshold <= clockTickDiff && clockTickDiff < = clockTickThreshold ? "True" : "False");
-
- } else if (c < (1 << 9)) {
- //Counting only, no modifying gap. Here the gap should be zero. Meaning every invocation counts
- //https://blog.csdn.net/u014485485/article/details/77679669
- meanClockCycle += (clockCyclesDuration - meanClockCycle) / (float) c; //c<100, safe conversion
-// printf("meanClockTick += (%ld - %f) / (float) %ld\n", clockCyclesDuration, meanClockCycle, c);
- } else if (c == (1 << 9)) {
- //Mean calculation has finished, calculate a threshold based on that
- clockCycleThreshold = meanClockCycle * 0.1;
-// printf("MeanClockTick=%f MeanClockTick*0.1=%f\n", meanClockCycle, meanClockCycle * 0.1);
- }
- } else if (c == (1 << 10)) {
- if (chkbit(curContextPtr->recArr->internalArr[symbolId].flags, 0)) {
- //Skip this symbol
- //printf("Skipped\n");
- curContextPtr->recArr->internalArr[symbolId].gap = 0b11111111111111111111;
- }
- }
- //RDTSCTiming if not skipped
- if (!chkbit(curContextPtr->recArr->internalArr[symbolId].flags, 0)) {
- curContextPtr->recArr->internalArr[symbolId].totalClockCycles += clockCyclesDuration;
- }
- //c = 1 << 10;
+#ifdef INSTR_TIMING
+ TIMING_TYPE &curSize = detailedTimingVectorSize[symbolId];
+ if (curSize < TIMING_REC_COUNT) {
+ ++curSize;
+ detailedTimingVectors[symbolId][curSize] = clockCyclesDuration;
+ }
+#endif
+ //RDTSCTiming if not skipped
+ curContextPtr->recArr->internalArr[symbolId].totalClockCycles += clockCyclesDuration * (c - curContextPtr->recArr->internalArr[symbolId].prevCount + 1);
-// INFO_LOGS("[Post Hook] Thread ID:%lu Func(%ld) CalleeFileId(%ld) Timestamp: %lu\n",
-// pthread_self(), symbolId, curElfSymInfo.libFileId, getunixtimestampms());
+ curContextPtr->recArr->internalArr[symbolId].prevCount = c;
bypassCHooks = SCALER_FALSE;
return callerAddr;
diff --git a/libHook-c/src/ProcInfoParser.cpp b/libHook-c/src/ProcInfoParser.cpp
index 1d8f8ec7..2ab15a58 100644
--- a/libHook-c/src/ProcInfoParser.cpp
+++ b/libHook-c/src/ProcInfoParser.cpp
@@ -182,12 +182,9 @@ namespace scaler {
//We could use binary search to lookup addr in this array.
//Binary search impl segAddrFileMap
- ssize_t lo = 0;
- ssize_t hi = pmEntryArray.getSize();
- ssize_t md;
- bool found = false;
- while (lo != hi) {
- md = (lo + hi) / 2;
+ ssize_t lo = 0, md = 0, hi = pmEntryArray.getSize() - 1;
+ while (lo < hi) {
+ md = lo + (hi - lo) / 2;
if (pmEntryArray[md].addrStart < addr) {
//printf("hi(%d) = md(%d) - 1=(%d)\n", hi, md, md - 1);
lo = md + 1;
@@ -195,35 +192,11 @@ namespace scaler {
//printf("lo(%d) = md(%d) + 1=(%d)\n", lo, md, md + 1);
hi = md;
} else {
- //printf("lo = md =%d\n", md);
- lo = md;
- found = true;
- break;
+ //Find left bound, although this should be impossible in this case
+ hi = md;
}
}
- if (!found && lo == 0) {
- lo = -1;
- }
-
-
- //It is possible that the address falls within the range of last entry. We need to check this scenario
-
- if (lo == -1) { fatalErrorS(
- "Cannot find addr %p in pmMap. The address is lower than the lowest address if /proc/{pid}/maps.",
- addr);
- exit(-1);
- } else if (lo == pmEntryArray.getSize()) {
- //Address is within range
- lo = pmEntryArray.getSize() - 1;
- }
-
- //Check if it's end address is indeed in this entry. If not, it is because the caller is not in procinfomapper
- // (Maybe skipped, in this case return an id that is larger than the largest function addr)
- if (addr > pmEntryArray[lo].addrEnd) {
- return fileNameArr.size();
- }
-
- return pmEntryArray[lo].fileId;
+ return pmEntryArray[lo - 1].fileId;
}
diff --git a/libHook-c/src/include/type/ExtSymInfo.h b/libHook-c/src/include/type/ExtSymInfo.h
index ff3b001a..5966685a 100644
--- a/libHook-c/src/include/type/ExtSymInfo.h
+++ b/libHook-c/src/include/type/ExtSymInfo.h
@@ -21,14 +21,7 @@ namespace scaler {
uint8_t *pltSecEntryAddr = nullptr; //(8 bytes)
uint64_t pltStubId = 0; //(8 bytes)
FileID libFileId = -1; //(8 bytes) Deprecated, move to a dedicated array
- char padding0;
- char padding1;
- char padding2;
- char padding3;
- char padding4;
- char padding5;
- char padding6;
- char padding7;
+ ssize_t initialGap = 0;//8 Bytes. Initial gap value
};
}
#endif
\ No newline at end of file
diff --git a/libHook-c/src/include/type/RecTuple.h b/libHook-c/src/include/type/RecTuple.h
new file mode 100644
index 00000000..e0c26c15
--- /dev/null
+++ b/libHook-c/src/include/type/RecTuple.h
@@ -0,0 +1,44 @@
+#ifndef SCALER_RECTUPLE_H
+#define SCALER_RECTUPLE_H
+
+/**
+ * This struct is the format that we record time and save to disk.
+ */
+struct RecTuple {
+ uint64_t totalClockCycles; //8
+ int64_t count; //8
+ int64_t prevCount; //8 Used to perform sampling
+ int32_t gap; //4
+ float meanClockTick; //4
+ int32_t durThreshold; //4
+ uint32_t flags; //4
+};
+
+
+/**
+ * This struct stores the total size and element size of an array.
+ * On disk, this struct is followed by array elements
+ */
+struct ArrayDescriptor {
+ uint64_t arrayElemSize;
+ uint64_t arraySize;
+ uint8_t magicNum = 167; //1 Used to ensure the collected data format is recognized in python scripts.
+};
+
+/**
+ * This struct is the format that we record detailed timing and save to disk.
+ */
+typedef int64_t TIMING_TYPE;
+
+struct DetailedTimingDescriptor {
+ TIMING_TYPE timingSize;
+};
+
+struct ThreadCreatorInfo {
+ uint64_t threadCreatorFileId;
+ uint64_t threadExecutionCycles;
+ uint8_t magicNum = 167; //1 Used to ensure the collected data format is recognized in python scripts.
+};
+
+
+#endif //SCALER_RECTUPLE_H
diff --git a/libHook-c/src/include/util/hook/ExtFuncCallHook.h b/libHook-c/src/include/util/hook/ExtFuncCallHook.h
index 0affb25a..5a0fe6f9 100644
--- a/libHook-c/src/include/util/hook/ExtFuncCallHook.h
+++ b/libHook-c/src/include/util/hook/ExtFuncCallHook.h
@@ -64,7 +64,7 @@ namespace scaler {
protected:
- inline bool shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId);
+ inline bool shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId, ssize_t& initialGap);
inline bool
diff --git a/libHook-c/src/include/util/hook/HookContext.h b/libHook-c/src/include/util/hook/HookContext.h
index 3a9144a7..26e4ca66 100644
--- a/libHook-c/src/include/util/hook/HookContext.h
+++ b/libHook-c/src/include/util/hook/HookContext.h
@@ -1,10 +1,11 @@
-#ifndef THREAD_LOCAL_H
-#define THREAD_LOCAL_H
+#ifndef HOOK_CONTEXT_H
+#define HOOK_CONTEXT_H
#include
#include
#include
#include
+#include
#include "ExtFuncCallHook.h"
extern "C" {
@@ -20,22 +21,13 @@ struct HookTuple {
uint32_t clockTicks; //8
};
-struct RecTuple {
- uint64_t totalClockCycles; //8
- int64_t count; //8
- int32_t gap; //4
- float meanClockTick; //4
- int32_t durThreshold; //4
- uint32_t flags; //4
-};
-
struct HookContext {
//todo: Initialize using maximum stack size
int64_t indexPosi;//8bytes
scaler::Array *recArr; //8bytes
//Records which function calls which function for how long, the index is scalerid (Only contains hooked function)
//todo: Replace timingMatrix to a class
- int64_t curFileId = 1; //Which library created the current thread? The default one is main thread
+ int64_t threadCreatorFileId = 1; //Which library created the current thread? The default one is main thread
scaler::ExtFuncCallHook *_this = nullptr; //8bytes
//Records which symbol is called for how many times, the index is scalerid (Only contains hooked function)
uint64_t startTImestamp;
@@ -49,10 +41,12 @@ struct HookContext {
uint8_t isMainThread = false;
uint8_t initialized = 0;
};
+
const uint8_t SCALER_TRUE = 145;
const uint8_t SCALER_FALSE = 167;
extern uint32_t *countingArr;
+
class DataSaver {
public:
char initializeMe = 0;
@@ -77,5 +71,13 @@ extern pthread_mutex_t threadDataSavingLock;
bool initTLS();
+//#define INSTR_TIMING
+#ifdef INSTR_TIMING
+extern const int TIMING_REC_COUNT;
+extern __thread TIMING_TYPE **detailedTimingVectors;
+extern __thread TIMING_TYPE *detailedTimingVectorSize;
+#endif
+
+
}
#endif
\ No newline at end of file
diff --git a/libHook-c/src/include/util/tool/Logging.h b/libHook-c/src/include/util/tool/Logging.h
index e5871921..eca0dfb8 100644
--- a/libHook-c/src/include/util/tool/Logging.h
+++ b/libHook-c/src/include/util/tool/Logging.h
@@ -5,7 +5,7 @@
#define PRINT_INFO_LOG true
#define PRINT_DBG_LOG false
-#define PRINT_ERR_LOG false
+#define PRINT_ERR_LOG true
#if PRINT_DBG_LOG
// Print a single log string
diff --git a/libHook-c/src/libcProxy.cpp b/libHook-c/src/libcProxy.cpp
index 7bf3525a..3badec11 100644
--- a/libHook-c/src/libcProxy.cpp
+++ b/libHook-c/src/libcProxy.cpp
@@ -15,7 +15,7 @@
main_fn_t real_main;
-bool installed=false;
+bool installed = false;
extern "C" {
scaler::Vector threadContextMap;
@@ -51,7 +51,7 @@ int doubletake_main(int argc, char **argv, char **envp) {
HookContext *curContextPtr = curContext;
- curContextPtr->curFileId = 0;
+ curContextPtr->threadCreatorFileId = 0;
curContextPtr->endTImestamp = 0;
curContextPtr->startTImestamp = getunixtimestampms();
curContextPtr->isMainThread = true;
@@ -86,7 +86,7 @@ int doubletake_libc_start_main(main_fn_t main_fn, int argc, char **argv, void (*
void exit(int __status) {
auto realExit = (exit_origt) dlsym(RTLD_NEXT, "exit");
- if(!installed){
+ if (!installed) {
realExit(__status);
return;
}
diff --git a/libHook-c/src/pthreadProxy.cpp b/libHook-c/src/pthreadProxy.cpp
index 104932ed..aeed7996 100644
--- a/libHook-c/src/pthreadProxy.cpp
+++ b/libHook-c/src/pthreadProxy.cpp
@@ -41,7 +41,7 @@ void *dummy_thread_function(void *data) {
HookContext *curContextPtr = curContext;
assert(curContextPtr != NULL);
- curContextPtr->curFileId = curContextPtr->_this->pmParser.findExecNameByAddr(
+ curContextPtr->threadCreatorFileId = curContextPtr->_this->pmParser.findExecNameByAddr(
(void *) actualFuncPtr);
/**
diff --git a/libHook-c/tests/CMakeLists.txt b/libHook-c/tests/CMakeLists.txt
index 3203faa0..52a72f86 100644
--- a/libHook-c/tests/CMakeLists.txt
+++ b/libHook-c/tests/CMakeLists.txt
@@ -79,9 +79,9 @@ target_compile_options(ScalerHook-demoapps-FuncCall PRIVATE ${TEST_FLAGS} -Werro
add_executable(ScalerHook-demoapps-TimingAccuracy src/demoapps/TestTimingAccuracy.cpp)
-target_link_libraries(ScalerHook-demoapps-TimingAccuracy PUBLIC Testlib-FuncCall Testlib-CallFuncCall KuBoPltHook dl)
+target_link_libraries(ScalerHook-demoapps-TimingAccuracy PUBLIC Testlib-FuncCall Testlib-CallFuncCall KuBoPltHook dl)
target_compile_options(ScalerHook-demoapps-TimingAccuracy PRIVATE ${TEST_FLAGS} -Werror)
-target_link_options(ScalerHook-demoapps-TimingAccuracy PRIVATE "-z" "lazy")
+target_link_options(ScalerHook-demoapps-TimingAccuracy PRIVATE "-z" "lazy")
add_executable(ScalerHook-demoapps-HookEverything src/demoapps/TestHookEverything.cpp)
target_include_directories(ScalerHook-demoapps-HookEverything PUBLIC libtest/header)
@@ -207,7 +207,7 @@ add_executable(ScalerHook-parsecapps-swaptions
src/parsecapps/swaptions/MaxFunction.cpp
src/parsecapps/swaptions/nr_routines.cpp
src/parsecapps/swaptions/RanUnif.cpp)
-target_link_libraries(ScalerHook-parsecapps-swaptions pthread ScalerHook-HookAutoAsm-C)
+target_link_libraries(ScalerHook-parsecapps-swaptions pthread ScalerHook-HookAutoAsm-C)
target_compile_options(ScalerHook-parsecapps-swaptions PRIVATE ${TEST_FLAGS} "-DENABLE_THREADS" "-fstrict-aliasing" "-fkeep-inline-functions")
target_include_directories(ScalerHook-parsecapps-swaptions PRIVATE src/parsecapps/swaptions/include)
target_compile_definitions(ScalerHook-parsecapps-swaptions PRIVATE ENABLE_THREADS)
@@ -279,6 +279,9 @@ target_link_libraries(ScalerHook-proof-SaveDataUponExit pthread)
add_executable(ScalerHook-proof-threadlocalasmarray src/proofconcept/TestAccessThreadLocalArrayInAsm.cpp)
target_link_libraries(ScalerHook-proof-threadlocalasmarray pthread)
+add_executable(GetUserSysRealTime src/proofconcept/getUserTime.cpp)
+target_link_libraries(GetUserSysRealTime)
+
#add_executable(ScalerHook-demoapps src/proofconcept/testprog.cpp)
#target_link_libraries(DemoProg libTest PltHookLib dl)
diff --git a/libHook-c/tests/src/proofconcept/binarySegmentSearch.py b/libHook-c/tests/src/proofconcept/binarySegmentSearch.py
new file mode 100644
index 00000000..f289d75d
--- /dev/null
+++ b/libHook-c/tests/src/proofconcept/binarySegmentSearch.py
@@ -0,0 +1,32 @@
+# The following algorithms are used in ProcinfoParser to find the left bound in an array representing the starting element of the array
+A = [1, 2, 3, 4, 4, 5]
+
+
+def binSearch(A, tgt):
+ """
+ Return lower bound of the segment array
+ :param A: Array
+ :param tgt: Target
+ :return: Lower bound of segments in the segment array
+ """
+ lo = 0
+ hi = len(A)
+ md = 0
+ while lo < hi:
+ md = lo + (hi - lo) // 2
+ if A[md] < tgt:
+ lo = md + 1
+ elif A[md] > tgt:
+ hi = md
+ elif A[md] == tgt:
+ hi = md
+
+ return lo-1
+
+
+testList = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5]
+expectedVal = [-1, 0, 1, 2, 4, 5]
+for i in range(len(testList)):
+ lo = binSearch(A, testList[i])
+ print(testList[i], lo)
+ assert (lo == expectedVal[i])