diff --git a/.gitmodules b/.gitmodules
index dd7b2323..b735d478 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -7,3 +7,6 @@
 [submodule "Analyzer/ScalerRun/lib/yamlcpp_0_7_0"]
 	path = Analyzer/ScalerRun/lib/yamlcpp_0_7_0
 	url = https://github.com/jbeder/yaml-cpp.git
+[submodule "benchmarktookit"]
+	path = benchmarktookit
+	url = ssh://git@code.xttech.top:6081/masslab/benchmarktookit.git
diff --git a/.idea/Scaler.iml b/.idea/Scaler.iml
index 40ba48d7..a45220d6 100644
--- a/.idea/Scaler.iml
+++ b/.idea/Scaler.iml
@@ -5,4 +5,11 @@
       <configuration sdkName="Python 3.6" />
     </facet>
   </component>
+  <component name="PyNamespacePackagesService">
+    <option name="namespacePackageFolders">
+      <list>
+        <option value="$MODULE_DIR$/Analyzer/PyVisualizer/src/V3" />
+      </list>
+    </option>
+  </component>
 </module>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
index bf0af86f..a864adec 100644
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -3,6 +3,7 @@
   <component name="VcsDirectoryMappings">
     <mapping directory="$PROJECT_DIR$" vcs="Git" />
     <mapping directory="$PROJECT_DIR$/Analyzer/ScalerRun/lib/yamlcpp_0_7_0" vcs="Git" />
+    <mapping directory="$PROJECT_DIR$/benchmarktookit" vcs="Git" />
     <mapping directory="$PROJECT_DIR$/libScalerHook/lib/json" vcs="Git" />
     <mapping directory="$PROJECT_DIR$/libScalerHook/tests/lib/multithreadingtests" vcs="Git" />
     <mapping directory="$PROJECT_DIR$/libScalerHook/tests/lib/multithreadingtests/benchmarks/pyperformance/src" vcs="Git" />
diff --git a/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py b/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py
index 77c3f5c4..06383b89 100644
--- a/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py
+++ b/Analyzer/PyVisualizer/src/V2/DataAggregator_V2.py
@@ -673,7 +673,7 @@ def main():
     # they can refuse it and the aggregator will use a default file path set to finalFold.folded in the repo
     root = tk.Tk()
     root.withdraw()
-    fileName = filedialog.askopenfilename()
+    fileName = ''
 
     # If an input file was selected, then we will ask them for an output file. They can opt to stop the program
     # By directly closing the file dialog twice in a row.
@@ -689,7 +689,7 @@ def main():
     if fileName == '':
         # If no file name then just default to opening a file in the repo
         # print(True)
-        fileName = "C:/Users/John/PycharmProjects/Scaler/libAnalyzer/tests/PerfTests/finalFold.folded"
+        fileName = "/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Perf-Artifects/out.stacks-folded"
         # outFileName = "perfMemcachedData_V2.json"
         outFileName = "perfMemcachedData_V2_nokernelinlined.json"
         # outFileName = "perfMemcachedData_V2_noinlined.json"
@@ -709,12 +709,12 @@ def main():
     # We will handle the data differently depending on if the user wants to use the timing data
     # If "y" is entered, then we will use the time stamp info and sample data,
     # if not then we will use sample data by default
-    timestampInput = input("Use Timestamps? y/n Default is n: ")
+    timestampInput = 'n'
     if timestampInput == "y":
         # print(timestampInput)
         useTimestamps = True
 
-    inlinedInput = input("Attribute Inlined functions to last known library? y/n Default is n: ")
+    inlinedInput = 'n'
     if inlinedInput == "y":
         # print(attributeInline)
         attributeInline = True
diff --git a/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml b/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml
index b7978a8d..2aa44330 100644
--- a/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml
+++ b/Analyzer/PyVisualizer/src/V3/.idea/workspace.xml
@@ -37,8 +37,9 @@
     <property name="RunOnceActivity.OpenProjectViewOnStart" value="true" />
     <property name="RunOnceActivity.ShowReadmeOnStart" value="true" />
     <property name="WebServerToolWindowFactoryState" value="false" />
+    <property name="last_opened_file_path" value="$USER_HOME$/benchmark/benchmarktookit" />
   </component>
-  <component name="RunManager">
+  <component name="RunManager" selected="Python.testBinSearch">
     <configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
       <module name="V3" />
       <option name="INTERPRETER_OPTIONS" value="" />
@@ -61,6 +62,33 @@
       <option name="INPUT_FILE" value="" />
       <method v="2" />
     </configuration>
+    <configuration name="testBinSearch" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="V3" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/testBinSearch.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <recent_temporary>
+      <list>
+        <item itemvalue="Python.testBinSearch" />
+      </list>
+    </recent_temporary>
   </component>
   <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
   <component name="TaskManager">
@@ -85,6 +113,8 @@
       <workItem from="1652188522765" duration="3209000" />
       <workItem from="1661805868867" duration="384000" />
       <workItem from="1663123586309" duration="12520000" />
+      <workItem from="1668278734742" duration="229000" />
+      <workItem from="1668279019538" duration="1966000" />
     </task>
     <servers />
   </component>
@@ -92,20 +122,6 @@
     <option name="version" value="3" />
   </component>
   <component name="XDebuggerManager">
-    <breakpoint-manager>
-      <breakpoints>
-        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
-          <url>file://$PROJECT_DIR$/main.py</url>
-          <line>107</line>
-          <option name="timeStamp" value="15" />
-        </line-breakpoint>
-        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
-          <url>file://$PROJECT_DIR$/main.py</url>
-          <line>142</line>
-          <option name="timeStamp" value="22" />
-        </line-breakpoint>
-      </breakpoints>
-    </breakpoint-manager>
     <watches-manager>
       <configuration name="PythonConfigurationType">
         <watch expression="timingRecord" language="Python" />
@@ -113,6 +129,7 @@
     </watches-manager>
   </component>
   <component name="com.intellij.coverage.CoverageDataManagerImpl">
-    <SUITE FILE_PATH="coverage/V3$main.coverage" NAME="main Coverage Results" MODIFIED="1664050409356" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
+    <SUITE FILE_PATH="coverage/V3$main.coverage" NAME="main Coverage Results" MODIFIED="1668279472793" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
+    <SUITE FILE_PATH="coverage/V3$testBinSearch.coverage" NAME="testBinSearch Coverage Results" MODIFIED="1668280308053" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="true" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$" />
   </component>
 </project>
\ No newline at end of file
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeAPIInvocationDistribution.py b/Analyzer/PyVisualizer/src/V3/AnalyzeAPIInvocationDistribution.py
deleted file mode 100644
index 788350d8..00000000
--- a/Analyzer/PyVisualizer/src/V3/AnalyzeAPIInvocationDistribution.py
+++ /dev/null
@@ -1,195 +0,0 @@
-import os
-import traceback
-from matplotlib import cm
-import matplotlib.pyplot as plt
-import pandas as pd
-import struct
-import numpy as np
-from multiprocessing import Pool
-from multiprocessing import Pool, cpu_count
-import time
-
-from datastructure.TimingStruct import ArrayDescriptor
-from util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
-from util.Parser.TimeOutputPrarser import readSymbolFiles, aggregatePerThreadArray, readTimingStruct
-import numpy as np
-
-
-def calcInvokedApis(scalerDataFolder, recInfo):
-    invokedAPIs = []
-    totalAPIs = []
-    for threadId in recInfo.threadIdList:
-        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
-        curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
-        invokedAPIs.append(len(curThreadInvokedRecArray))
-        totalAPIs.append(len(curThreadRecArray))
-    return invokedAPIs, totalAPIs
-
-
-def printCoverageReport(scalerDataFolder, recInfo, invokedAPIs, totalAPIs):
-    '''
-    Coverage Report
-    '''
-    print(' \t ', end='')
-    for threadId in recInfo.threadIdList:
-        print(threadId, end='\t')
-    print()
-
-    print('Invoked %', end='\t')
-    for perc in np.array(invokedAPIs) / np.array(totalAPIs):
-        print('%2.2f%%' % (perc * 100), end='\t')
-    print()
-
-    print('Invoked APIs', end='\t')
-    for invokedAPI in invokedAPIs:
-        print(invokedAPI, end='\t')
-
-    print()
-    print('Total APIs', end='\t')
-    for totalAPI in totalAPIs:
-        print(totalAPI, end='\t')
-
-
-def drawCountingHist(scalerDataFolder, recInfo):
-    '''
-    Counting histogram Report
-    '''
-    histogramRoot = os.path.join(scalerDataFolder, 'InvocationCountHist')
-    if not os.path.exists(histogramRoot):
-        os.mkdir(histogramRoot)
-
-    print()
-
-    # create 3 data sets with 1,000 samples
-    mu, sigma = 200, 25
-    x = mu + sigma * np.random.randn(1000, 3)
-
-    totalCountArr = None
-    for threadId in recInfo.threadIdList:
-        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
-        # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
-
-        times = np.array([rec.count for rec in curThreadRecArray])
-        if totalCountArr is None:
-            totalCountArr = times
-        else:
-            totalCountArr = np.vstack([totalCountArr, times])
-
-        plt.figure()
-        plt.hist(times, range=(1, times.max()), edgecolor="black", bins=50)
-        plt.xlabel('Invocation counts')
-        plt.ylabel('API number')
-        plt.title('Histogram of invocation counts for thread %s' % (str(threadId)))
-
-        plt.savefig(os.path.join(histogramRoot, threadId + '.png'))
-        plt.close()
-        # print(threadId, np.max(times))
-
-    totalCountArr = totalCountArr.transpose()
-    # totalCountArr = totalCountArr[np.where(totalCountArr > 0)]
-    plt.figure()
-    print(totalCountArr.max())
-    plt.hist(totalCountArr, range=(1, totalCountArr.max()), bins=50, stacked=True)
-    plt.xlabel('Invocation counts')
-    plt.ylabel('API number')
-    plt.title('Histogram of invocation counts for all threads staked')
-    plt.savefig(os.path.join(histogramRoot, 'total.png'))
-    plt.close()
-
-
-def printInvocNumberPerThread(scalerDataFolder):
-    if scalerDataFolder is None:
-        print()
-        return
-
-    recInfo = readSymbolFiles(scalerDataFolder)
-    invokedAPIs, totalAPIs = calcInvokedApis(scalerDataFolder, recInfo)
-
-    totalInvocationCnts = 0
-
-    for threadId in recInfo.threadIdList:
-        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
-        # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
-
-        times = np.array([rec.count for rec in curThreadRecArray])
-        totalInvocationCnts += np.sum(times)
-
-    min = np.min(invokedAPIs)
-    q1 = np.quantile(invokedAPIs, 0.25)
-    q2 = np.quantile(invokedAPIs, 0.5)
-    q3 = np.quantile(invokedAPIs, 0.75)
-    q4 = np.quantile(invokedAPIs, 1)
-    iqr = q3 - q1
-    qLower = q1 - 1.5 * iqr
-    qUpper = q3 + 1.5 * iqr
-    assert (np.unique(totalAPIs).shape[0] == 1)
-    assert (q4 == np.max(invokedAPIs))
-    print(scalerDataFolder.split('/')[-2], min, q1, q2, q3, q4, iqr, qLower, qUpper, len(invokedAPIs), totalAPIs[0],
-          totalInvocationCnts, sep='\t')
-
-
-def printInvocCntPerAPI(scalerDataFolder):
-    if scalerDataFolder is None:
-        print()
-        return
-
-    recInfo = readSymbolFiles(scalerDataFolder)
-
-    totalCountArr = None
-    for threadId in recInfo.threadIdList:
-        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
-        # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
-
-        times = np.array([rec.count for rec in curThreadRecArray])
-        if totalCountArr is None:
-            totalCountArr = times
-        else:
-            totalCountArr = np.vstack([totalCountArr, times])
-    totalCountArr = np.array(totalCountArr).sum(axis=0)
-
-    totalCountArr = totalCountArr[np.where(totalCountArr > 0)]
-    min = np.min(totalCountArr)
-    q1 = np.quantile(totalCountArr, 0.25)
-    q2 = np.quantile(totalCountArr, 0.5)
-    q3 = np.quantile(totalCountArr, 0.75)
-    q4 = np.quantile(totalCountArr, 1)
-    iqr = q3 - q1
-    qLower = q1 - 1.5 * iqr
-    qUpper = q3 + 1.5 * iqr
-
-    assert (q4 == np.max(totalCountArr))
-    print(scalerDataFolder.split('/')[-2], min, q1, q2, q3, q4, iqr, qLower, qUpper, np.sum(totalCountArr), sep='\t')
-
-
-# steven@masslabserv1:~/Downloads/2022-11-23_10-21-06$ find . -name "scalerdata*"
-scalerDataFolders = [
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/scalerdata_19148850692747664',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/scalerdata_19148905483325260',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.canneal_0/scalerdata_19149009421840348',
-    None,
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.facesim_0/scalerdata_19149183735878138',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.ferret_0/scalerdata_19149441937366104',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/scalerdata_19149498481345624',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/scalerdata_19149660473046832',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/scalerdata_19149730167129240',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/scalerdata_19150235160442436',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/scalerdata_19150507898053624',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.vips_0/scalerdata_19150561039693292',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.benchmarksuite.parsec.parsec3_0.x264_0/scalerdata_19150582352742288',
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.db.memcached.memcached_1_6_17_0/scalerdata_19150608805586386',
-    None,
-    '/media/umass/datasystem/steven/Downloads/2022-11-23_10-21-06/Application.db.redis.redis_7_0_4_0/scalerdata_19150762055485288',
-    None,
-    None,
-    None,
-    None,
-    None,
-]
-
-print('Thread inovked API # imbalance Analysis')
-for scalerDataFolder in scalerDataFolders:
-    printInvocNumberPerThread(scalerDataFolder)
-
-print('API inovked CNT Analysis')
-for scalerDataFolder in scalerDataFolders:
-    printInvocCntPerAPI(scalerDataFolder)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py b/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py
index 3153795c..d203d2fd 100644
--- a/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeFuncVariation.py
@@ -9,6 +9,10 @@
 from multiprocessing import Pool, cpu_count
 import time
 
+from Analyzer.PyVisualizer.src.V3.datastructure.TimingStruct import ArrayDescriptor
+from Analyzer.PyVisualizer.src.V3.util.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from Analyzer.PyVisualizer.src.V3.util.TimeOutputPrarser import readSymbolFiles
+
 
 def removeOutliers(x):
     a = np.array(x)
@@ -29,43 +33,39 @@ def shouldSkip(timingArr):
     return c1 == 0 and c2 == 0
 
 
-def saveSinglePlot(ROOT_PATH, symbolNameList, threadIdSet, symId):
+def saveSinglePlot(ROOT_PATH, symbolNameList, threadId, tgtSymId):
     # hasPoints = False
+    # ROOT_PATH: str, symbolNameList: list, threadId: str, tgtSymIds: list):
+    detailedTimingDict = parseSingleSymDetailedTiming(ROOT_PATH, threadId, [tgtSymId])
+
+    for symId, detailedTimingArr in detailedTimingDict.items():
+        fig, (ax1, ax2) = plt.subplots(2)
+
+        # if detailedTimingArr.shape[0] < 1001:
+        #     continue
+        #
+        # skipThis = shouldSkip(detailedTimingArr)
+        # if skipThis:
+        #     continue
+
+        ax1.scatter(np.arange(detailedTimingArr.shape[0]), detailedTimingArr, s=10)
+        # Calculate the first 500 mean
+        mean = np.average(detailedTimingArr[0:500])
+        meanUpperbound = mean * (1 + 0.01)
+        meanLowerbound = mean * (1 - 0.01)
+
+        ax2.scatter(np.arange(min(1000,detailedTimingArr.shape[0])), detailedTimingArr[0:min(1000,detailedTimingArr.shape[0])], s=10)
+        ax2.hlines(meanUpperbound, 0, detailedTimingArr.shape[0], colors='red')
+        ax2.hlines(meanLowerbound, 0, detailedTimingArr.shape[0], colors='red')
+        hasPoints = True
+
+        print(os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+        # if hasPoints:
+        fig.savefig(
+            os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+        print(
+            os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
 
-    for i, threadId in enumerate(threadIdSet):
-        with open(os.path.join(ROOT_PATH, 'threadDetailedTiming_%s_%d.bin' % (threadId, symId)),
-                  'rb') as f:
-            fig, (ax1, ax2) = plt.subplots(2)
-            byteArr1 = f.read()
-            elemSize = np.fromfile(os.path.join(ROOT_PATH, 'threadDetailedTiming_%s_%d.bin' % (threadId, symId)),
-                                   np.int64, count=1, offset=0)[0]
-            if elemSize == 0:
-                continue
-            symbolTiming = np.fromfile(
-                os.path.join(ROOT_PATH, 'threadDetailedTiming_%s_%d.bin' % (threadId, symId)),
-                np.int64, offset=8)
-            skipThis = shouldSkip(symbolTiming)
-            if skipThis:
-                continue
-            if symbolTiming.shape[0] < 1001:
-                continue
-            ax1.scatter(np.arange(elemSize), symbolTiming, s=10)
-            # Calculate the first 500 mean
-            mean = np.average(symbolTiming[0:500])
-            meanUpperbound = mean * (1 + 0.01)
-            meanLowerbound = mean * (1 - 0.01)
-
-            # ax2.text(i*50, i * 20, str(np.var(symbolTiming[0:500])))
-            ax2.scatter(np.arange(1000), symbolTiming[0:1000], s=10)
-            ax2.hlines(meanUpperbound, 0, elemSize, colors='red')
-            ax2.hlines(meanLowerbound, 0, elemSize, colors='red')
-            # hasPoints = True
-
-            # if hasPoints:
-            fig.savefig(
-                os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
-            print(
-                os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
     return 0
 
 
@@ -78,51 +78,35 @@ def error_callback(e):
 def doIt(ROOT_PATH, pool, rltList):
     print('========================', ROOT_PATH)
     allFiles = os.listdir(ROOT_PATH)
-    threadIdSet = set({})
-    maxSymId = 0
+    symbolNum = 0
+
+    recInfo = readSymbolFiles(ROOT_PATH)
+    threadSymInfo = dict({})  # Threadid : symbol size
     for fileName in allFiles:
         if fileName.startswith('threadDetailedTiming') and fileName.endswith('.bin'):
-            _, threadId, symbolId = fileName.replace('.bin', '').split('_')
-            symbolId = int(symbolId)
-            if symbolId > maxSymId:
-                maxSymId = symbolId
-            threadIdSet.add(threadId)
-
-    df = pd.read_csv(os.path.join(ROOT_PATH, 'fileName.txt'))
-    fileNameList = df['pathName'].to_list()
-
-    df = pd.read_csv(os.path.join(ROOT_PATH, 'symbolInfo.txt'))
-    symbolNameList = df['funcName'].to_list()
-    symbolFileIdList = df['fileId'].to_list()
-    symIdInFile = df['symIdInFile'].to_list()
-
-    print('Deploying tasks to pool')
-    # for symId in range(maxSymId):
-    #     saveSinglePlot(ROOT_PATH, symbolNameList, threadIdSet, symId)
-    for symId in range(maxSymId):
-        res = pool.apply_async(saveSinglePlot, args=[ROOT_PATH, symbolNameList, threadIdSet, symId],
-                               error_callback=error_callback)
-        rltList.append(res)
-
-
-pool = Pool(60)
+            _, threadId = fileName.replace('.bin', '').split('_')
+            with open(os.path.join(ROOT_PATH, fileName), 'rb') as f:
+                symDetailedTimingDesc = ArrayDescriptor()
+                f.readinto(symDetailedTimingDesc)
+                assert (symDetailedTimingDesc.arrayElemSize == 0)
+                assert (symDetailedTimingDesc._magicNum == 167)
+                symbolNum = symDetailedTimingDesc.arraySize
+                threadSymInfo[threadId] = symbolNum
+
+            for symId in range(symbolNum):
+                res = pool.apply_async(saveSinglePlot, args=[ROOT_PATH, recInfo.symbolNameList, threadId, symId],
+                                       error_callback=error_callback)
+                rltList.append(res)
+
+    return rltList
+
+
+pool = Pool(1)
 rltList = []
 
-for i in [
-    'scalerdata_6364935512299934',
-    'scalerdata_6364979105953714',
-    'scalerdata_6365014036860570',
-    'scalerdata_6365088124846144',
-    'scalerdata_6365123879328866',
-    'scalerdata_6365618607468352',
-    'scalerdata_6365739459778370',
-    'scalerdata_6365776935349298',
-    'scalerdata_6365841128804326',
-    'scalerdata_6366139523773026',
-    'scalerdata_6366165053302622'
-]:
-    ROOT_PATH = '/media/umass/datasystem/steven/Downloads/CurStrategy1/' + i
+for ROOT_PATH in ['/tmp/scalerdata_14676207526291652']:
     doIt(ROOT_PATH, pool, rltList)
+
 pool.close()
 while len(rltList) > 0:
     time.sleep(2)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeInvocation.py b/Analyzer/PyVisualizer/src/V3/AnalyzeInvocation.py
new file mode 100644
index 00000000..e38a8cbd
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeInvocation.py
@@ -0,0 +1,239 @@
+import os
+import traceback
+from collections import defaultdict
+
+from matplotlib import cm
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+from Analyzer.PyVisualizer.src.V3.util.Quantile import calcQuantile
+from datastructure.TimingStruct import ArrayDescriptor
+from util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from util.Parser.TimeOutputPrarser import readSymbolFiles, aggregatePerThreadArray, readTimingStruct
+import numpy as np
+
+
+def calcInvokedApiNum(scalerDataFolder, recInfo):
+    invokedAPIs = []
+    totalAPIs = []
+    for threadId in recInfo.threadIdList:
+        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+        curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
+        invokedAPIs.append(len(curThreadInvokedRecArray))
+        totalAPIs.append(len(curThreadRecArray))
+    return invokedAPIs, totalAPIs
+
+
+def calcInvokedApiCNT(scalerDataFolder, recInfo):
+    invokedAPICnts = []
+    for threadId in recInfo.threadIdList:
+        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+        invokedAPICnts.append(np.sum([rec.count for rec in curThreadRecArray]))
+    return invokedAPICnts
+
+
+def printInvocNumberByEachThread(scalerDataFolder):
+    if scalerDataFolder is None:
+        print()
+        return
+
+    recInfo = readSymbolFiles(scalerDataFolder)
+    invokedAPIs, totalAPIs = calcInvokedApiNum(scalerDataFolder, recInfo)
+
+    totalInvocationCnts = 0
+
+    for threadId in recInfo.threadIdList:
+        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+        # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
+
+        times = np.array([rec.count for rec in curThreadRecArray])
+        totalInvocationCnts += np.sum(times)
+
+    minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(invokedAPIs)
+
+    print(scalerDataFolder.split('/')[-3], minimum, q1, q2, q3, q4, iqr, qLower, qUpper, len(invokedAPIs), totalAPIs[0],
+          totalInvocationCnts, sep='\t')
+
+
+def printInvocCntByEachThread(scalerDataFolder):
+    if scalerDataFolder is None:
+        print()
+        return
+
+    recInfo = readSymbolFiles(scalerDataFolder)
+    invokedAPIs = calcInvokedApiCNT(scalerDataFolder, recInfo)
+
+    totalInvocationCnts = 0
+
+    for threadId in recInfo.threadIdList:
+        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+        # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
+
+        times = np.array([rec.count for rec in curThreadRecArray])
+        totalInvocationCnts += np.sum(times)
+
+    minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(invokedAPIs)
+
+    print(scalerDataFolder.split('/')[-3], minimum, q1, q2, q3, q4, iqr, qLower, qUpper, totalInvocationCnts, sep='\t')
+
+
+def printInvocCnt(scalerDataFolder):
+    if scalerDataFolder is None:
+        print()
+        return
+
+    recInfo = readSymbolFiles(scalerDataFolder)
+
+    totalCountArr = None
+    for threadId in recInfo.threadIdList:
+        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+        # curThreadInvokedRecArray = [rec for rec in curThreadRecArray if rec.count > 0]
+
+        times = np.array([rec.count for rec in curThreadRecArray])
+        if totalCountArr is None:
+            totalCountArr = times
+        else:
+            totalCountArr = np.vstack([totalCountArr, times])
+
+    if len(totalCountArr.shape)==2:
+        # if totalCountArr.shape
+        totalCountArr = np.array(totalCountArr).sum(axis=0)
+
+
+    totalCountArr1 = totalCountArr[np.where(totalCountArr > 0)]
+
+    minima, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(totalCountArr1)
+    print(scalerDataFolder.split('/')[-3], minima, q1, q2, q3, q4, iqr, qLower, qUpper, np.sum(totalCountArr1), sep='\t')
+
+
+class APIInfo:
+    def __init__(self):
+        self.name = None
+        self.cntSum = []
+        self.timeAvg = []
+        self.timeVa = []
+        self.timeAvgDenoise = None
+        self.timeVarDenoise = None
+
+
+def printPerAPIInfoAndCnts(scalerDataFolder):
+    if scalerDataFolder is None:
+        print()
+        return
+    print(scalerDataFolder.split('/')[-3])
+    recInfo = readSymbolFiles(scalerDataFolder)
+
+    totalCountArr = None
+    totalVarianceArry = None
+    for threadId in recInfo.threadIdList:
+        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+
+        counts = np.array([rec.count for rec in curThreadRecArray])
+        if totalCountArr is None:
+            totalCountArr = counts
+        else:
+            totalCountArr = np.vstack([totalCountArr, counts])
+    if len(totalCountArr.shape)==2:
+        # if totalCountArr.shape
+        totalCountArr = np.array(totalCountArr).sum(axis=0)
+
+    sortedNameCntTuple = [
+        (i, recInfo.symbolNameList[i], recInfo.symbolFileIdList[i], recInfo.fileNameList[recInfo.symbolFileIdList[i]],
+         recInfo.realFileIdList[i], recInfo.fileNameList[recInfo.realFileIdList[i]], totalCountArr[i]) for i in
+        range(totalCountArr.shape[0]) if
+        totalCountArr[i] > 0]
+
+    for symId, symName, invokerFIleId, invokerFileName, realFileId, realFileName, count in sorted(sortedNameCntTuple,
+                                                                                                  reverse=True,
+                                                                                                  key=lambda x: x[-1]):
+        print(symId, symName, invokerFIleId, invokerFileName.split('/')[-1], realFileId, realFileName.split('/')[-1],
+              count, sep='\t')
+
+
+def printPerLibInfoAndCnts(scalerDataFolder):
+    if scalerDataFolder is None:
+        print()
+        return
+    print(scalerDataFolder.split('/')[-3])
+    recInfo = readSymbolFiles(scalerDataFolder)
+
+    totalCountArr = None
+    totalVarianceArry = None
+    for threadId in recInfo.threadIdList:
+        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+
+        counts = np.array([rec.count for rec in curThreadRecArray])
+        if totalCountArr is None:
+            totalCountArr = counts
+        else:
+            totalCountArr = np.vstack([totalCountArr, counts])
+    if len(totalCountArr.shape)==2:
+        # if totalCountArr.shape
+        totalCountArr = np.array(totalCountArr).sum(axis=0)
+
+    sortedNameCntTuple = [
+        (i, recInfo.symbolNameList[i], recInfo.symbolFileIdList[i], recInfo.fileNameList[recInfo.symbolFileIdList[i]],
+         recInfo.realFileIdList[i], recInfo.fileNameList[recInfo.realFileIdList[i]], totalCountArr[i]) for i in
+        range(totalCountArr.shape[0]) if
+        totalCountArr[i] > 0]
+
+    libFileDict = defaultdict(int)
+
+    for symId, symName, invokerFIleId, invokerFileName, realFileId, realFileName, count in sorted(sortedNameCntTuple,
+                                                                                                  reverse=True,
+                                                                                                  key=lambda x: x[-1]):
+        libFileDict[realFileName] += count
+
+    countList = list(libFileDict.items())
+    countList = sorted(countList, key=lambda x: x[0])
+    for name, count in countList:
+        print(name, count, sep='\t')
+
+
+# steven@masslabserv1:~/Downloads/DistributionAnalysis$ find . -name "scalerdata*"
+scalerDataFolders = [
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DETAIL-Artifects/scalerdata_1098304170115468',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Scaler-DETAIL-Artifects/scalerdata_1098358945736648',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Scaler-DETAIL-Artifects/scalerdata_1098456760642602',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Scaler-DETAIL-Artifects/scalerdata_1098609218045480',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Scaler-DETAIL-Artifects/scalerdata_1098633517549600',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Scaler-DETAIL-Artifects/scalerdata_1098896081465298',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Scaler-DETAIL-Artifects/scalerdata_1107220283374448',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Scaler-DETAIL-Artifects/scalerdata_1099112583217984',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Scaler-DETAIL-Artifects/scalerdata_1099182362951380',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Scaler-DETAIL-Artifects/scalerdata_1099698097420154',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Scaler-DETAIL-Artifects/scalerdata_1099956187630596',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.vips_0/Scaler-DETAIL-Artifects/scalerdata_1100002948879490',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.x264_0/Scaler-DETAIL-Artifects/scalerdata_1100026471754668',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.httpd.httpd_2_4_54_0/Scaler-DETAIL-Artifects/scalerdata_1100053088973938',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.memcached.memcached_1_6_17_0/Scaler-DETAIL-Artifects/scalerdata_1100215829624386',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.mysql.mysql_8_0_31_0/Scaler-DETAIL-Artifects/scalerdata_1100366950684482',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.nginx.nginx_1_23_2_0/Scaler-DETAIL-Artifects/scalerdata_1100701228337962',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.redis.redis_7_0_4_0/Scaler-DETAIL-Artifects/scalerdata_1100850174945384',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.sqlite.sqlite_3_39_4_0/Scaler-DETAIL-Artifects/scalerdata_1101150204449494'
+]
+
+print('Thread inovked API #')
+for scalerDataFolder in scalerDataFolders:
+    printInvocNumberByEachThread(scalerDataFolder)
+
+print('Thread inovked API CNT')
+for scalerDataFolder in scalerDataFolders:
+    printInvocCntByEachThread(scalerDataFolder)
+
+print('API invocation CNT Analysis')
+for scalerDataFolder in scalerDataFolders:
+    printInvocCnt(scalerDataFolder)
+
+print('Per-API infos')
+for scalerDataFolder in scalerDataFolders:
+    printPerAPIInfoAndCnts(scalerDataFolder)
+
+print('Per-Lib infos')
+for scalerDataFolder in scalerDataFolders:
+    printPerLibInfoAndCnts(scalerDataFolder)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeOutlierRemovalTechnique.py b/Analyzer/PyVisualizer/src/V3/AnalyzeOutlierRemovalTechnique.py
new file mode 100644
index 00000000..4e4a6661
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeOutlierRemovalTechnique.py
@@ -0,0 +1,180 @@
+import math
+import os
+import traceback
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+import math
+
+from Analyzer.PyVisualizer.src.V3.datastructure.TimingStruct import ArrayDescriptor
+from Analyzer.PyVisualizer.src.V3.util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from Analyzer.PyVisualizer.src.V3.util.Parser.TimeOutputPrarser import readSymbolFiles, readTimingStruct
+from Analyzer.PyVisualizer.src.V3.util.Quantile import calcQuantile
+
+
+class APIMetric:
+    def __init__(self):
+        self.estimated = False
+        self.estimatedSum = 0
+        self.realSum = 0
+        self.invocationCnt = 0
+
+    def getErrorRate(self):
+        assert (self.realSum > 0)
+        return abs(self.realSum - self.estimatedSum) / self.realSum
+
+
+def methodBoundAndNaiveClipping(symId, recInfo, timingArr):
+    def shouldSkip(timingArr):
+        if timingArr.shape[0] <= 500:
+            return False
+        mean = np.average(timingArr[0:500])
+        meanUpperbound = mean * (1 + 0.50)
+        meanLowerbound = mean * (1 - 0.50)
+
+        c1 = np.where(timingArr[500:1000] > meanUpperbound)[0].shape[0]
+        c2 = np.where(timingArr[500:1000] < meanLowerbound)[0].shape[0]
+        return c1 == 0 and c2 == 0
+
+    def removeOutliersByPercentage(x, lowerPerc, upperPerc):
+        a = np.array(x)
+        upper_quartile = np.percentile(a, upperPerc)
+        lower_quartile = np.percentile(a, lowerPerc)
+        return x[np.logical_and((lower_quartile <= x), (x <= upper_quartile))]
+
+    skipped = shouldSkip(timingArr)
+    estimatedSum = np.sum(timingArr)
+
+    if skipped:
+        outlierRemovedTimArray = removeOutliersByPercentage(timingArr[500:1000], 5, 95)
+        estimatedSum = np.average(outlierRemovedTimArray) * timingArr.shape[0]
+
+    return skipped, estimatedSum
+
+
+def methodPreEstimation(symId, recInfo, timingArr):
+    shouldSkip = False
+
+    if timingArr.shape[0] <= 500:
+        shouldSkip = False
+    else:
+        shouldSkip = True
+
+    def removeOutliers(timingArr):
+        rlt = []
+        prevVal = timingArr[0]
+        threshold = 100
+        for i in range(1, timingArr.shape[0]):
+            if abs(timingArr[i] - prevVal) < threshold:
+                rlt.append(timingArr[i])
+            else:
+                rlt.append(prevVal)
+            prevVal = timingArr[i]
+        return np.array(rlt)
+
+    realSum = np.sum(timingArr)
+
+    if not shouldSkip:
+        return shouldSkip, realSum
+
+    # Test Prediction
+    outlierRemovedFirst500 = removeOutliers(timingArr[0:500])
+    estimatedSum = np.mean(outlierRemovedFirst500) * timingArr.shape[0]
+
+    if abs(estimatedSum-realSum)/realSum < 0.02:
+        shouldSkip=True
+        return shouldSkip, estimatedSum
+    else:
+        shouldSkip=False
+        return shouldSkip, realSum
+
+def methodEstimation(symId, recInfo, timingArr):
+    pass
+
+
+def analyzeOutlierRemovalTechnique(ROOT_PATH, methodFunction):
+    if ROOT_PATH is None:
+        print()
+        return
+    allFiles = os.listdir(ROOT_PATH)
+    symbolNum = 0
+
+    allInvocationRelationCnt = 0
+    skippedApiCnt = 0
+    allNonZeroApiCnt = 0
+
+    recInfo = readSymbolFiles(ROOT_PATH)
+    threadSymInfo = dict({})  # Threadid : symbol size
+
+    # print('=====> ', ROOT_PATH)
+    apiMetricsPerApp = []
+    totalAPICount = 0
+
+    totalInvocationCnt = 0
+    for fileName in allFiles:
+        if fileName.startswith('threadDetailedTiming') and fileName.endswith('.bin'):
+            # Read symbol number in threads
+            _, threadId = fileName.replace('.bin', '').split('_')
+
+            detailedTimingArr = parseSingleSymDetailedTiming(ROOT_PATH, threadId, None)
+            recArrForThisThread = readTimingStruct(ROOT_PATH, threadId)
+            totalAPICount = len(detailedTimingArr)
+            apiMetricsPerThread = []
+
+            for symId, timingArr in detailedTimingArr.items():
+                curMetric = APIMetric()
+                curMetric.estimated, curMetric.estimatedSum = methodFunction(symId, recInfo, timingArr)
+                curMetric.realSum = np.sum(timingArr)
+                curMetric.invocationCnt = recArrForThisThread[symId].count
+                totalInvocationCnt += recArrForThisThread[symId].count
+                if curMetric.realSum > 0 and curMetric.getErrorRate() > 0:
+                    apiMetricsPerThread.append(curMetric)
+                    apiMetricsPerApp.append(curMetric)
+            # if len(apiMetricsPerThread) > 0:
+            #     # Print table for estimated value per API
+            #     minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(
+            #         [apiMetric.getErrorRate() for apiMetric in apiMetricsPerThread])
+            #     estimatedNum = np.sum([1 for apiMetric in apiMetricsPerThread if apiMetric.estimated])
+            #     totalAPICount = len(detailedTimingArr)
+            #     print(fileName, minimum, q1, q2, q3, q4, iqr, qLower, qUpper, estimatedNum, totalAPICount, sep='\t')
+
+    if len(apiMetricsPerApp) > 0:
+        minimum, q1, q2, q3, q4, iqr, qLower, qUpper = calcQuantile(
+            [apiMetric.getErrorRate() for apiMetric in apiMetricsPerApp])
+        estimatedNum = np.sum([1 for apiMetric in apiMetricsPerApp if apiMetric.estimated])
+        skippedApiCnt = np.sum([apiMetric.invocationCnt for apiMetric in apiMetricsPerApp if apiMetric.estimated])
+        print(ROOT_PATH.split('/')[-3], minimum, q1, q2, q3, q4, iqr, qLower, qUpper, estimatedNum, totalAPICount,
+              skippedApiCnt, skippedApiCnt / totalInvocationCnt, totalInvocationCnt,
+              sep='\t')
+    else:
+        print(ROOT_PATH.split('/')[-2])
+
+
+for ROOT_PATH in [
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DETAIL-Artifects/scalerdata_1098304170115468',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Scaler-DETAIL-Artifects/scalerdata_1098358945736648',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Scaler-DETAIL-Artifects/scalerdata_1098456760642602',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Scaler-DETAIL-Artifects/scalerdata_1098609218045480',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Scaler-DETAIL-Artifects/scalerdata_1098633517549600',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Scaler-DETAIL-Artifects/scalerdata_1098896081465298',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Scaler-DETAIL-Artifects/scalerdata_1107220283374448',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Scaler-DETAIL-Artifects/scalerdata_1099112583217984',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Scaler-DETAIL-Artifects/scalerdata_1099182362951380',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Scaler-DETAIL-Artifects/scalerdata_1099698097420154',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Scaler-DETAIL-Artifects/scalerdata_1099956187630596',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.vips_0/Scaler-DETAIL-Artifects/scalerdata_1100002948879490',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.x264_0/Scaler-DETAIL-Artifects/scalerdata_1100026471754668',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.httpd.httpd_2_4_54_0/Scaler-DETAIL-Artifects/scalerdata_1100053088973938',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.memcached.memcached_1_6_17_0/Scaler-DETAIL-Artifects/scalerdata_1100215829624386',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.mysql.mysql_8_0_31_0/Scaler-DETAIL-Artifects/scalerdata_1100366950684482',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.nginx.nginx_1_23_2_0/Scaler-DETAIL-Artifects/scalerdata_1100701228337962',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.redis.redis_7_0_4_0/Scaler-DETAIL-Artifects/scalerdata_1100850174945384',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.sqlite.sqlite_3_39_4_0/Scaler-DETAIL-Artifects/scalerdata_1101150204449494'
+]:
+    analyzeOutlierRemovalTechnique(ROOT_PATH, methodPreEstimation)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeParsecOutput.py b/Analyzer/PyVisualizer/src/V3/AnalyzeParsecOutput.py
new file mode 100644
index 00000000..c8c15acd
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeParsecOutput.py
@@ -0,0 +1,152 @@
+import os
+import numpy as np
+import tempfile
+import subprocess
+
+'''
+This script parses the output of parsecmgmt
+'''
+
+
+def findAppStart(lines):
+    benchmarkName = []
+    outputTuple = []
+    for id, line in enumerate(lines):
+        # Find start
+        if line.startswith('[PARSEC] [========== Running '):
+            benchmarkName.append(line.replace('[PARSEC] [========== Running benchmark', '')
+                                 .replace(' [1] ==========]', '').strip())
+            outputTuple.append([id, 0])
+
+    curI = 0
+    for id, line in enumerate(lines):
+        # Find start
+        if line.startswith('[PARSEC] [----------    End of output    ----------]'):
+            outputTuple[curI][1] = id
+            curI += 1
+    return benchmarkName, outputTuple
+
+
+def extractRealTimeAndMemory(lines, outputTuple):
+    realTimeList = []
+    memList = []
+    for startI, endI in outputTuple:
+        lineSplit = lines[endI - 1].split(',')
+        realTIme = lineSplit[0].split(':')[1].strip()
+        memUsg = lineSplit[-1].split(':')[1].strip()
+
+        realTimeList.append(float(realTIme))
+        memList.append(float(memUsg))
+    return np.array(realTimeList), np.array(memList)
+
+
+def parseOutputFile(PARSEC_OUTPUT_FOLDER):
+    timeArray = None
+    memArray = None
+    benchmarkName = None
+
+    for textFileName in os.listdir(PARSEC_OUTPUT_FOLDER):
+        with open(os.path.join(PARSEC_OUTPUT_FOLDER, textFileName)) as f:
+            assert (str.isdigit(textFileName[:-4]))  # Make sure the first id is
+            lines = f.readlines()
+            benchmarkName, outputTuple = findAppStart(lines)
+            realTimeList, memList = extractRealTimeAndMemory(lines, outputTuple)
+            if timeArray is None:
+                timeArray = realTimeList
+            if memArray is None:
+                memArray = memList
+            timeArray = np.vstack([timeArray, realTimeList])
+            memArray = np.vstack([memArray, memList])
+
+    return benchmarkName, memArray, timeArray
+
+
+def printMatrix(matrixName, preloadList, benchmarkNameList, matrix):
+    print(matrixName)
+    print('       \t', end='')
+    for benchmarkName in benchmarkNameList:
+        print(benchmarkName, end='\t')
+    print()
+
+    for row in range(matrix.shape[0]):
+        print(preloadList[row][0], end='\t')
+        for col in range(matrix.shape[1]):
+            print(matrix[row][col], end='\t')
+        print()
+    print()
+
+
+def runBenchmark(runTimes, configNames, preloadList, inputType, threadNum, outputFolder):
+    for (preloadName, preloadCmd) in preloadList:
+        curOutputDir = os.path.join(outputFolder, preloadName)
+        os.mkdir(curOutputDir)
+        for curRunId in range(runTimes):
+            curOutputFile = os.path.join(curOutputDir, '%d.txt' % curRunId)
+
+            cmd = 'parsecmgmt -a run'
+            cmd += ''.join([''.join([' -p ', config]) for config in configNames])
+            if preloadCmd.strip() != '':
+                cmd += ''.join([' -s "', preloadCmd, '"'])
+            cmd += ''.join([' -n ', str(threadNum)])
+            cmd += ''.join([' -i ', inputType])
+            cmd += ''.join([' > ', curOutputFile])
+            cmd += ''.join([' 2> ', curOutputFile])
+
+            print('Running:', cmd)
+            if os.system(cmd) != 0:
+                print(cmd, 'failed')
+
+
+def packOutput(preloadList, outputFolder):
+    stdRealTimes = None
+    avgRealTimes = None
+    avgMem = None
+    stdMem = None
+    outputBenchNameList = None
+    for (preloadName, preloadCmd) in preloadList:
+        curOutputDir = os.path.join(outputFolder, preloadName)
+        '''
+        Calculate mean and average of the results
+        '''
+        outputBenchNameList, memArray, timeArray = parseOutputFile(curOutputDir)
+        if avgRealTimes is None:
+            avgRealTimes = np.average(timeArray, axis=0)
+        else:
+            avgRealTimes = np.vstack([avgRealTimes, np.average(timeArray, axis=0)])
+
+        if stdRealTimes is None:
+            stdRealTimes = np.std(timeArray, axis=0)
+        else:
+            stdRealTimes = np.vstack([stdRealTimes, np.std(timeArray, axis=0)])
+
+        if avgMem is None:
+            avgMem = np.average(memArray, axis=0)
+        else:
+            avgMem = np.vstack([avgMem, np.average(memArray, axis=0)])
+
+        if stdMem is None:
+            stdMem = np.std(memArray, axis=0)
+        else:
+            stdMem = np.vstack([stdMem, np.std(memArray, axis=0)])
+    return avgRealTimes, stdRealTimes, avgMem, stdMem, outputBenchNameList
+
+
+'''
+Find max output id
+'''
+
+TIME_COMMAND = "/usr/bin/time -f 'real:%e, user:%U, sys:%S, memKB:%M' "
+RUNTIMES = 1
+CONFIG_NAMES = ['blackscholes', 'bodytrack', 'facesim', 'ferret', 'fluidanimate', 'freqmine', 'raytrace', 'swaptions',
+                'vips', 'x264']
+# PRELOAD_LIST = [('Default', TIME_COMMAND), ('Perf', TIME_COMMAND + 'perf record -g -o perf.data')]
+PRELOAD_LIST = [('Scaler', 'export LD_PRELOAD=/media/umass/datasystem/steven/Scaler/cmake-build-release/libHook-c/libScalerHook-HookAutoAsm-C.so')]
+
+outputFolder = tempfile.mkdtemp()
+runBenchmark(RUNTIMES, CONFIG_NAMES, PRELOAD_LIST, 'simsmall', 64, outputFolder)
+
+avgRealTimes, stdRealTimes, avgMem, stdMem, outputBenchNameList = packOutput(PRELOAD_LIST, outputFolder)
+printMatrix('Table of average runtime', PRELOAD_LIST, outputBenchNameList, avgRealTimes)
+printMatrix('Table of std runtime', PRELOAD_LIST, outputBenchNameList, stdRealTimes)
+printMatrix('Table of average memory', PRELOAD_LIST, outputBenchNameList, avgMem)
+printMatrix('Table of std memory', PRELOAD_LIST, outputBenchNameList, stdMem)
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzePerfCounts.py b/Analyzer/PyVisualizer/src/V3/AnalyzePerfCounts.py
new file mode 100644
index 00000000..d25df8a0
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzePerfCounts.py
@@ -0,0 +1,108 @@
+import os
+import traceback
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+
+class StackElem:
+    def __init__(self, libName, funcName):
+        self.libName = libName
+        self.funcName = funcName
+
+    def __str__(self):
+        return self.libName + " " + self.funcName
+
+
+def analyzePerfScript(rootPath):
+    callCountDict = defaultdict(int)
+
+    lastStack = []
+    curStack = []
+    with open(rootPath, 'r') as f:
+
+        firstLine = True
+        skipThisStak = False
+        while True:
+
+            if not firstLine:
+                line = f.readline()
+            else:
+                line = '\n'
+            if line == '\n':
+                skipThisStak = False
+                firstLine = False
+                # print('New call stack')
+                tmp = f.readline()
+
+                if not tmp:
+                    break
+
+                if tmp.split()[0].strip() == 'perf':
+                    skipThisStak = True
+                # Calculate count based on call stack
+                for i in range(len(lastStack)):
+                    if i >= len(curStack) or lastStack[i].funcName != curStack[i].funcName:
+                        for j in range(i, len(lastStack)):
+                            callCountDict[lastStack[j].libName] += 1
+                        break
+
+                lastStack = curStack
+                curStack = []
+                if not tmp:
+                    break
+                continue
+            else:
+                if not skipThisStak:
+                    line = line.strip().strip('\t').strip('\n')
+                    addrEndI = line.find(' ')
+                    addr = line[0:addrEndI]
+                    libStartI = line.rfind(' ') + 1
+                    libName = line[libStartI:]
+                    funcNameAddr = line[addrEndI:libStartI].strip()
+                    if funcNameAddr != '[unknown]' and libName != '([kernel.kallsyms])':
+                        # print(libName)
+                        plutInd = funcNameAddr.rfind('+')
+                        funcName = funcNameAddr[0:plutInd]
+                        addr = funcNameAddr[plutInd + 1:]
+                        curStack.insert(0, StackElem(libName[1:-1], funcName))
+    return callCountDict
+
+
+for ROOT_PATH in [
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.vips_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.benchmarksuite.parsec.parsec3_0.x264_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.db.memcached.memcached_1_6_17_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.db.mysql.mysql_8_0_31_0/Perf-Artifects/script.txt',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-27_19-31-45-Perf-4000/Application.db.redis.redis_7_0_4_0/Perf-Artifects/script.txt',
+    '/media/umass/datasystem/steven/Downloads/Perf-Sampling-At4000/Application.db.sqlite.sqlite_3_39_4_0/Perf-Artifects/script.txt',
+    '/media/umass/datasystem/steven/Downloads/Perf-Sampling-At4000/Application.webserver.httpd.httpd_2_4_54_0/Perf-Artifects/script.txt',
+    '/media/umass/datasystem/steven/Downloads/Perf-Sampling-At4000/Application.webserver.nginx.nginx_1_23_2_0/Perf-Artifects/script.txt',
+    # None,
+    # None,
+    # None,
+    # None,
+]:
+    callDict = analyzePerfScript(ROOT_PATH)
+    print(ROOT_PATH)
+    itemList = list(callDict.items())
+    itemList = sorted(itemList, key=lambda x: x[0])
+    for libName, counts in itemList:
+        print(libName, counts, sep='\t')
diff --git a/Analyzer/PyVisualizer/src/V3/AnalyzeTiming.py b/Analyzer/PyVisualizer/src/V3/AnalyzeTiming.py
new file mode 100644
index 00000000..edf258d8
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/AnalyzeTiming.py
@@ -0,0 +1,103 @@
+import os
+import traceback
+from collections import defaultdict
+
+from matplotlib import cm
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+from Analyzer.PyVisualizer.src.V3.datastructure.Metric import Metric
+from Analyzer.PyVisualizer.src.V3.util.Quantile import calcQuantile
+from datastructure.TimingStruct import ArrayDescriptor
+from util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from util.Parser.TimeOutputPrarser import readSymbolFiles, aggregatePerThreadArray, readTimingStruct
+import numpy as np
+
+
+def parsePerLibSelfTime(scalerDataFolder):
+    if scalerDataFolder is None:
+        print()
+        return
+    recInfo = readSymbolFiles(scalerDataFolder)
+
+    totalTimingArr = None
+    totalVarianceArry = None
+    for threadId in recInfo.threadIdList:
+        curThreadRecArray = readTimingStruct(scalerDataFolder, threadId)
+
+        cycles = np.array([rec.totalClockCycles for rec in curThreadRecArray])
+        if totalTimingArr is None:
+            totalTimingArr = cycles
+        else:
+            totalTimingArr = np.vstack([totalTimingArr, cycles])
+
+    if len(totalTimingArr.shape) == 1:
+        totalTimingArr = np.reshape(totalTimingArr, (1, -1))
+
+    sortedNameCntTuple = []
+
+    for i in range(totalTimingArr.shape[1] - 1):
+        if np.sum(totalTimingArr[:, i] > 0):
+            sortedNameCntTuple.append((i, recInfo.symbolNameList[i], recInfo.symbolFileIdList[i],
+                                       recInfo.fileNameList[recInfo.symbolFileIdList[i]],
+                                       recInfo.realFileIdList[i], recInfo.fileNameList[recInfo.realFileIdList[i]],
+                                       np.sum(totalTimingArr[:, i])))
+
+    libFileDict = defaultdict(int)
+
+    libFileDict = defaultdict(int)
+
+    for symId, symName, invokerFIleId, invokerFileName, realFileId, realFileName, time in sorted(sortedNameCntTuple,
+                                                                                                 reverse=True,
+                                                                                                 key=lambda x: x[-1]):
+        libFileDict[realFileName] += time
+
+    # timeList = list(libFileDict.items())
+    # timeList = sorted(timeList, key=lambda x: x[0])
+    #
+    # for name, time in timeList:
+    #     print(name, time, sep='\t')
+    return libFileDict
+
+
+def findScalerDataFolder(rootPath):
+    for root, dirs, files in os.walk(rootPath):
+        for dir in dirs:
+            if dir.startswith('scalerdata'):
+                return os.path.join(root, dir)
+
+
+scalerDataFolders = '/media/umass/datasystem/steven/Downloads/accuracyTest/2022-12-08_06-26-18-Sampling-0B111'
+print('Per-Lib infos')
+
+pathDict = defaultdict(list)
+
+for folderName in os.listdir(scalerDataFolders):
+    cache= folderName.split('.')[-1]
+    appName=cache[0:-2]
+    runTime=cache[-1:]
+    scalerDataFolder = findScalerDataFolder(os.path.join(scalerDataFolders, folderName))
+    pathDict[appName].append(scalerDataFolder)
+
+rlt = {}
+for appName, pathList in pathDict.items():
+    rlt[appName] = defaultdict(Metric)
+    for path in pathList:
+        libFileDict = parsePerLibSelfTime(path)
+        for libName, time in libFileDict.items():
+            rlt[appName][libName].append(time)
+
+# Sort Results
+rlt = list(rlt.items())
+rlt = sorted(rlt, key=lambda x: x[0])
+for appName, libraryList in rlt:
+    libraryList = list(libraryList.items())
+    libraryList = sorted(libraryList, key=lambda x: x[0])
+    print(appName)
+    for libraryName, metric in libraryList:
+        print(libraryName, metric.mean(), metric.std(), metric.std() / metric.mean(), sep='\t')
diff --git a/Analyzer/PyVisualizer/src/V3/PlotInvocation.py b/Analyzer/PyVisualizer/src/V3/PlotInvocation.py
new file mode 100644
index 00000000..cac813ce
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/PlotInvocation.py
@@ -0,0 +1,120 @@
+import os
+import shutil
+import traceback
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+from Analyzer.PyVisualizer.src.V3.datastructure.TimingStruct import ArrayDescriptor
+from Analyzer.PyVisualizer.src.V3.util.Parser.DetailedTimeOutputPrarser import parseSingleSymDetailedTiming
+from Analyzer.PyVisualizer.src.V3.util.Parser.TimeOutputPrarser import readSymbolFiles
+
+
+def shouldPlt(timingArr):
+    return timingArr.shape[0] > 500
+
+
+def saveSinglePlot(ROOT_PATH, symbolNameList, threadId, tgtSymId, graphType='hist'):
+    # hasPoints = False
+    # ROOT_PATH: str, symbolNameList: list, threadId: str, tgtSymIds: list):
+    detailedTimingDict = parseSingleSymDetailedTiming(ROOT_PATH, threadId, [tgtSymId])
+
+    for symId, detailedTimingArr in detailedTimingDict.items():
+        fig, axes = plt.subplots(nrows=1,ncols=2)
+        ax1, ax2 = axes
+
+        if not shouldPlt(detailedTimingArr):
+            continue
+
+        if graphType == 'hist':
+            ax1.hist(detailedTimingArr, range=(1, detailedTimingArr.max()), edgecolor="black", bins=50)
+            # print(os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+            # if hasPoints:
+        elif graphType == 'scatter':
+            ax1.scatter(np.arange(detailedTimingArr.shape[0]), detailedTimingArr, s=10)
+        else:
+            assert (False)
+
+        if not os.path.exists(os.path.join(ROOT_PATH, 'DetailedTime', graphType, symbolNameList[symId])):
+            os.makedirs(os.path.join(ROOT_PATH, 'DetailedTime', graphType, symbolNameList[symId]), exist_ok=True)
+        fig.savefig(
+            os.path.join(ROOT_PATH, 'DetailedTime', graphType, symbolNameList[symId],
+                         'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+        # print(os.path.join(ROOT_PATH, 'threadDetailedTiming_%d_%s_%s.png' % (symId, symbolNameList[symId], threadId)))
+        plt.close(fig)
+    return 0
+
+
+def error_callback(e):
+    print('error')
+    print(dir(e), "\n")
+    print("-->{}<--".format(e.__cause__))
+
+
+def doIt(ROOT_PATH, pool, rltList):
+    print('========================', ROOT_PATH)
+    allFiles = os.listdir(ROOT_PATH)
+    symbolNum = 0
+
+    recInfo = readSymbolFiles(ROOT_PATH)
+    threadSymInfo = dict({})  # Threadid : symbol size
+    for fileName in allFiles:
+        if fileName.startswith('threadDetailedTiming') and fileName.endswith('.bin'):
+            _, threadId = fileName.replace('.bin', '').split('_')
+            with open(os.path.join(ROOT_PATH, fileName), 'rb') as f:
+                symDetailedTimingDesc = ArrayDescriptor()
+                f.readinto(symDetailedTimingDesc)
+                assert (symDetailedTimingDesc.arrayElemSize == 0)
+                assert (symDetailedTimingDesc._magicNum == 167)
+                symbolNum = symDetailedTimingDesc.arraySize
+                threadSymInfo[threadId] = symbolNum
+
+            for symId in range(symbolNum):
+                res = pool.apply_async(saveSinglePlot,
+                                       args=[ROOT_PATH, recInfo.symbolNameList, threadId, symId, 'scatter'],
+                                       error_callback=error_callback)
+                rltList.append(res)
+
+    return rltList
+
+
+pool = Pool(64)
+rltList = []
+scalerDataFolders = [
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DETAIL-Artifects/scalerdata_1098304170115468',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.bodytrack_0/Scaler-DETAIL-Artifects/scalerdata_1098358945736648',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.canneal_0/Scaler-DETAIL-Artifects/scalerdata_1098456760642602',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.dedup_0/Scaler-DETAIL-Artifects/scalerdata_1098609218045480',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.facesim_0/Scaler-DETAIL-Artifects/scalerdata_1098633517549600',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.ferret_0/Scaler-DETAIL-Artifects/scalerdata_1098896081465298',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.fluidanimate_0/Scaler-DETAIL-Artifects/scalerdata_1107220283374448',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.freqmine_0/Scaler-DETAIL-Artifects/scalerdata_1099112583217984',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.raytrace_0/Scaler-DETAIL-Artifects/scalerdata_1099182362951380',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.streamcluster_0/Scaler-DETAIL-Artifects/scalerdata_1099698097420154',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.swaptions_0/Scaler-DETAIL-Artifects/scalerdata_1099956187630596',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.vips_0/Scaler-DETAIL-Artifects/scalerdata_1100002948879490',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.benchmarksuite.parsec.parsec3_0.x264_0/Scaler-DETAIL-Artifects/scalerdata_1100026471754668',
+    # '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.httpd.httpd_2_4_54_0/Scaler-DETAIL-Artifects/scalerdata_1100053088973938',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.memcached.memcached_1_6_17_0/Scaler-DETAIL-Artifects/scalerdata_1100215829624386',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.mysql.mysql_8_0_31_0/Scaler-DETAIL-Artifects/scalerdata_1100366950684482',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.webserver.nginx.nginx_1_23_2_0/Scaler-DETAIL-Artifects/scalerdata_1100701228337962',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.redis.redis_7_0_4_0/Scaler-DETAIL-Artifects/scalerdata_1100850174945384',
+    '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-11-30_19-51-43-DetailedTiming/Application.db.sqlite.sqlite_3_39_4_0/Scaler-DETAIL-Artifects/scalerdata_1101150204449494'
+]
+
+for ROOT_PATH in scalerDataFolders:
+    if ROOT_PATH is not None:
+        recInfo = readSymbolFiles(ROOT_PATH)
+        doIt(ROOT_PATH, pool, rltList)
+
+pool.close()
+while len(rltList) > 0:
+    time.sleep(2)
+    rltList = [rlt for rlt in rltList if not rlt.ready()]
+    print("%d jobs left" % len(rltList))
+pool.join()
diff --git a/Analyzer/PyVisualizer/src/V3/main.py b/Analyzer/PyVisualizer/src/V3/XFAVisualization.py
similarity index 56%
rename from Analyzer/PyVisualizer/src/V3/main.py
rename to Analyzer/PyVisualizer/src/V3/XFAVisualization.py
index 39ac125a..536019d3 100644
--- a/Analyzer/PyVisualizer/src/V3/main.py
+++ b/Analyzer/PyVisualizer/src/V3/XFAVisualization.py
@@ -3,23 +3,36 @@
 import pandas as pd
 import struct
 import re
+
+from util.Analyzer.XFA import generateXFAStruct
 from datastructure.TimingStruct import FileRecord, RecTuple
-from preProcessing import aggregatePerThreadArray, generateTimingStruct, calcPercentage, readSymbolFiles
+from util.Parser.TimeOutputPrarser import aggregatePerThreadArray, readSymbolFiles
 
 # scalerDataFolder = '/media/umass/datasystem/steven/benchmark/parsec/tests/dedup/scalerdata_30414326191467414'
 
-scalerDataFolder = '/media/umass/datasystem/steven/intel/Perf_Scaler-Parsec-Callgraph-Sig2022Fall/x264/scalerdata_12852017355851478_FGDS'
+scalerDataFolder = '/media/umass/datasystem/steven/Downloads/performancetest20221124/2022-12-07_20-11-36-EffImp/Application.benchmarksuite.parsec.parsec3_0.blackscholes_0/Scaler-DBG-Artifects/scalerdata_1120018768482198'
 
 recInfo = readSymbolFiles(scalerDataFolder)
 
 realFileId = None
 
 aggregatedTimeArray, aggregatedStartingTime = aggregatePerThreadArray(scalerDataFolder, recInfo)
+
+for i, v in enumerate(aggregatedTimeArray):
+    if v.count > 0:
+        curRealFileId=recInfo.realFileIdList[i]
+        if curRealFileId==len(recInfo.fileNameList):
+            curRealFileId=len(recInfo.fileNameList)-1
+        print(recInfo.symbolNameList[i], recInfo.fileNameList[curRealFileId], v.count, sep='\t')
+
 # Generate graph
-timingRecord = generateTimingStruct(list(aggregatedTimeArray), aggregatedStartingTime, recInfo)
+timingRecord = generateXFAStruct(list(aggregatedTimeArray), aggregatedStartingTime, recInfo)
 
 print(timingRecord)
 
+for time in timingRecord:
+    print(time.fileName,time.selfClockCycles.value,sep='\t')
+
 # totalSelfTime = 0
 # for fileRec in timingRecord:
 #     if fileRec.selfClockCycles.value<0:
diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/Metric.py b/Analyzer/PyVisualizer/src/V3/datastructure/Metric.py
new file mode 100644
index 00000000..29bba7a7
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/datastructure/Metric.py
@@ -0,0 +1,16 @@
+import numpy as np
+
+
+class Metric:
+    def __init__(self):
+        self.metricList = []
+
+    def append(self, *args, **kwargs):
+        self.metricList.append(*args, **kwargs)
+
+    def std(self):
+        return np.std(np.array(self.metricList))
+
+    def mean(self):
+        return np.mean(np.array(self.metricList))
+
diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py b/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py
index 16b91306..e909c723 100644
--- a/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py
+++ b/Analyzer/PyVisualizer/src/V3/datastructure/TimingStruct.py
@@ -1,6 +1,10 @@
 from collections import defaultdict
 from ctypes import *
 
+'''
+Display related parameter:
+'''
+
 
 class VPG:
     def __init__(self):
@@ -63,16 +67,6 @@ def __str__(self):
         return str(self.fileName)
 
 
-class RecTuple(Structure):
-    _fields_ = [
-        ('totalClockCycles', c_uint64),
-        ('count', c_int64),
-        ('_gap', c_uint32),
-        ('_meanClockTick', c_float),
-        ('_durThreshold', c_uint32),
-        ('_flags', c_uint32)]
-
-
 class RecordingInfo:
     '''
     Used to share common information needed in processing between different functions
@@ -86,3 +80,39 @@ def __init__(self):
         self.symbolNameList = []
         self.symbolFileIdList = []
         self.symIdInFileList = []
+
+
+'''
+C data structures
+
+These structs should be consistent with https://github.com/UTSASRG/Scaler/blob/feature-tuneParm/libHook-c/src/include/type/RecTuple.h
+'''
+
+class RecTuple(Structure):
+    _fields_ = [
+        ('totalClockCycles', c_uint64),
+        ('count', c_int64),
+        ('_prevCount', c_int64),
+        ('_gap', c_uint32),
+        ('_meanClockTick', c_float),
+        ('_durThreshold', c_uint32),
+        ('_flags', c_uint32)]
+
+
+class ArrayDescriptor(Structure):
+    _fields_ = [
+        ('arrayElemSize', c_uint64),
+        ('arraySize', c_uint64),
+        ('_magicNum', c_uint8)]
+
+
+class DetailedTimingDescriptor(Structure):
+    _fields_ = [
+        ('timingSize', c_int64)]
+
+
+class ThreadCreatorInfo(Structure):
+    _fields_ = [
+        ('threadCreatorFileId', c_int64),
+        ('threadExecutionCycles', c_int64),
+        ('_magicNum', c_uint8)]
diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/TimingStruct.cpython-36.pyc b/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/TimingStruct.cpython-36.pyc
deleted file mode 100644
index d4d54b81..00000000
Binary files a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/TimingStruct.cpython-36.pyc and /dev/null differ
diff --git a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/__init__.cpython-36.pyc b/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 05c6d129..00000000
Binary files a/Analyzer/PyVisualizer/src/V3/datastructure/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/Analyzer/PyVisualizer/src/V3/pythonmp.py b/Analyzer/PyVisualizer/src/V3/pythonmp.py
deleted file mode 100644
index c10fc4f9..00000000
--- a/Analyzer/PyVisualizer/src/V3/pythonmp.py
+++ /dev/null
@@ -1,18 +0,0 @@
-import multiprocessing
-
-
-def do():
-    print("Foobar", flush=True)
-    raise Exception()
-def asdf():
-
-    with multiprocessing.Pool(1) as pool:
-        for i in range(5):
-            result = pool.apply_async(do)
-
-            result.get()
-
-        pool.close()
-        pool.join()
-
-asdf()
\ No newline at end of file
diff --git a/Analyzer/PyVisualizer/src/V3/testBinSearch.py b/Analyzer/PyVisualizer/src/V3/testBinSearch.py
new file mode 100644
index 00000000..b4d9f80b
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/testBinSearch.py
@@ -0,0 +1,30 @@
+A = [1, 2, 3, 4, 4, 5]
+
+
+def binSearch(A, tgt):
+    """
+    Return lower bound of the segment array
+    :param A: Array
+    :param tgt: Target
+    :return: Lower bound of segments in the segment array
+    """
+    lo = 0
+    hi = len(A)
+    md = 0
+    while lo < hi:
+        md = lo + (hi - lo) // 2
+        if A[md] < tgt:
+            lo = md + 1
+        elif A[md] > tgt:
+            hi = md
+        elif A[md] == tgt:
+            hi = md
+
+    return lo-1
+
+
+testList = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5]
+expectedVal = [-1, 0, 1, 2, 4, 5]
+for i in range(len(testList)):
+    lo = binSearch(A, testList[i])
+    print(testList[i], lo)
diff --git a/Analyzer/PyVisualizer/src/V3/preProcessing.py b/Analyzer/PyVisualizer/src/V3/util/Analyzer/XFA.py
similarity index 57%
rename from Analyzer/PyVisualizer/src/V3/preProcessing.py
rename to Analyzer/PyVisualizer/src/V3/util/Analyzer/XFA.py
index 5ad6144d..fd550886 100644
--- a/Analyzer/PyVisualizer/src/V3/preProcessing.py
+++ b/Analyzer/PyVisualizer/src/V3/util/Analyzer/XFA.py
@@ -1,110 +1,7 @@
-import math
-import os
-import re
-import struct
-from datastructure.TimingStruct import FileRecord, RecTuple, RecordingInfo, RecordingInfo
-import pandas as pd
-from collections import defaultdict
-
-
-def readSymbolFiles(scalerDataFolder):
-    rlt = RecordingInfo()
-
-    df = pd.read_csv(os.path.join(scalerDataFolder, 'fileName.txt'))
-    rlt.fileNameList = df['pathName'].to_list()
-    rlt.pthreadFileId = parsePthreadId(rlt.fileNameList)
-
-    for fileName in os.listdir(scalerDataFolder):
-        if fileName.startswith('threadTiming_'):
-            rlt.threadIdList.append(fileName[len('threadTiming_'): -4])
-
-    with open(os.path.join(scalerDataFolder, 'realFileId.bin'), 'rb') as f:
-        byteArr1 = f.read()
-        arraySize = struct.unpack_from('Q', byteArr1, 0)  # The first element is the array size
-        rlt.realFileIdList = list(struct.unpack_from('<%dQ' % (arraySize), byteArr1,
-                                                     8))  # The last id marks the creator thread
-    df = pd.read_csv(os.path.join(scalerDataFolder, 'symbolInfo.txt'))
-    rlt.symbolNameList = df['funcName'].to_list()
-    rlt.symbolFileIdList = df['fileId'].to_list()
-    rlt.symIdInFileList = df['symIdInFile'].to_list()
-    return rlt
-
-
-def readTimingStruct(threadFileFullPath):
-    recDataArr = []
-    recTupleSize = 8 + 8 + 4 + 4 + 4 + 4
-    with open(threadFileFullPath, 'rb') as f:
-        byteArr = f.read()
-        mainFileId, recArrSize = struct.unpack_from('qq', byteArr, 0)  # 16 bytes
-        f.seek(16)
-
-        for i in range(recArrSize):
-            curRecFormat = RecTuple()
-            f.readinto(curRecFormat)
-            recDataArr.append(curRecFormat)
-    # assert (len(symbolNameList) == recArrSize - 1)
-    assert (len(recDataArr) == recArrSize)
-    return recDataArr
-
-
-def aggregatePerThreadArray(scalerDataFolder, recInfo: RecordingInfo):
-    """
-    - Aggregate per-thread timing data into one using simple addition and return as the first return value
-    - The last element in each RecTuple records how much time the thread takes to execute so we should not aggregate them
-    together. Instead, we collect them into one list and return as the second parameter
-
-    :param scalerDataFolder: Scaler output data folder
-    :param threadIdList: A list of thread ids
-    :return aggregatedTimeArray: Aggregated counting and timing information
-    :return startingInfoArray: Information about thread creator. This value is used in time aggregation steps
-    """
-    api = 0
-    fgdsApi = 0
-
-    aggregatedTimeArray = []
-    aggregatedStartingTime = defaultdict(
-        lambda: 0)  # Map fileId and starting time. Thread may created by modules other than the main application
-    for threadId in recInfo.threadIdList:
-        curThreadRecArray = readTimingStruct(os.path.join(scalerDataFolder, 'threadTiming_%s.bin' % threadId))
-        aggregatedStartingTime[curThreadRecArray[-1]._flags] += curThreadRecArray[-1].totalClockCycles
-        # print(curThreadRecArray[-1].totalClockCycles)
-
-        for i, curRec in enumerate(curThreadRecArray[:-1]):
-            if curRec._flags & (1 << 0):
-                fgdsApi += 1
-            api += 1
-            # if curRec.count>0:
-            # print('totalCount',totalCount,curRec.count)
-        if len(curThreadRecArray) != len(aggregatedTimeArray) + 1:
-            # First time
-            aggregatedTimeArray = curThreadRecArray[:-1].copy()
-        else:
-            for i, curRec in enumerate(curThreadRecArray[:-1]):
-                aggregatedTimeArray[i].count += curRec.count
-                # if recInfo.symbolNameList[i] == 'pthread_join':
-                #     print('Skip pthread_join')
-                #     continue
-
-                if aggregatedTimeArray[i]._flags & (1 << 0):
-                    # Use mean and count to estimate total clock cycles
-                    aggregatedTimeArray[i].totalClockCycles += int(curRec.count * curRec._meanClockTick)
-                else:
-                    aggregatedTimeArray[i].totalClockCycles += curRec.totalClockCycles
-    print('fgdsapi/api=', round(fgdsApi / api*100,2), 'fgdsCount/TotalCount=', round(fgdsCount / totalCount*100,2), sep='\t')
-    return aggregatedTimeArray, aggregatedStartingTime
-
-
-pthreadFileRegex = re.compile(r'libpthread-.*\.so$')
-
-
-def parsePthreadId(fileNameList):
-    for i, fileName in enumerate(fileNameList):
-        if len(pthreadFileRegex.findall(fileName)) != 0:
-            return i
-    raise Exception('Cannot find pthread library in fileList')
+from datastructure.TimingStruct import RecordingInfo, FileRecord
 
 
-def generateTimingStruct(aggregatedTimeEntries, aggregatedStartingTime, recInfo: RecordingInfo):
+def generateXFAStruct(aggregatedTimeEntries, aggregatedStartingTime, recInfo: RecordingInfo):
     timingRecord = []  # Map file name to FileRecord struct
     mainFileId = None
 
@@ -195,7 +92,7 @@ def calcPercentage(timingRecord, programRuntime, totalApiCallCount):
             else:
                 curExtFileRecord.counts.globalPercent = 0.0
 
-            if curFileRecord.childrenClockCycles.value > 0:
+            if curFileRecord.selfClockCycles.value + curFileRecord.childrenClockCycles.value > 0:
                 curExtFileRecord.totalClockCycles.parentPercent = curExtFileRecord.totalClockCycles.value / (
                         curFileRecord.selfClockCycles.value + curFileRecord.childrenClockCycles.value)
             else:
diff --git a/Analyzer/PyVisualizer/src/V3/util/Parser/DetailedTimeOutputPrarser.py b/Analyzer/PyVisualizer/src/V3/util/Parser/DetailedTimeOutputPrarser.py
new file mode 100644
index 00000000..a87ef2af
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/util/Parser/DetailedTimeOutputPrarser.py
@@ -0,0 +1,55 @@
+import os
+import traceback
+
+import matplotlib.pyplot as plt
+import pandas as pd
+import struct
+import numpy as np
+from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
+import time
+
+from datastructure.TimingStruct import ArrayDescriptor
+
+
+def parseSingleSymDetailedTiming(ROOT_PATH: str, threadId: str, tgtSymIds: list):
+    # hasPoints = False
+    if tgtSymIds:
+        tgtSymIds = sorted(tgtSymIds)
+
+    rlt = {}  # SymId, timingArray
+    with open(os.path.join(ROOT_PATH, 'threadDetailedTiming_%s.bin' % (threadId)), 'rb') as f:
+        # Get the amount of symbols in this file
+        arrayDesc = ArrayDescriptor()
+        f.readinto(arrayDesc)
+        assert (arrayDesc.arrayElemSize == 0)
+        assert (arrayDesc._magicNum == 167)
+
+        if tgtSymIds is None:
+            tgtSymIds = range(arrayDesc.arraySize)
+        assert (tgtSymIds[-1] < arrayDesc.arraySize)
+
+        detailedTimingForCurSym = None
+        for curSymId in range(arrayDesc.arraySize):
+            symDetailedTimingDesc = ArrayDescriptor()
+            f.readinto(symDetailedTimingDesc)
+
+            assert (symDetailedTimingDesc.arrayElemSize == 8)
+            assert (symDetailedTimingDesc._magicNum == 167)
+            if curSymId < tgtSymIds[0]:
+                # Only read specified symbol
+                f.seek(symDetailedTimingDesc.arraySize * symDetailedTimingDesc.arrayElemSize, os.SEEK_CUR)
+                continue
+            elif curSymId == tgtSymIds[0]:
+                detailedTimingForCurSym = np.array(struct.unpack_from('<%dQ' % (symDetailedTimingDesc.arraySize),
+                                                                      f.read(symDetailedTimingDesc.arraySize *
+                                                                             symDetailedTimingDesc.arrayElemSize)))
+                rlt[curSymId] = detailedTimingForCurSym
+                tgtSymIds = tgtSymIds[1:]
+                if len(tgtSymIds) == 0:
+                    break
+            else:
+                # There are duplicate or negative values inside tgtSymIds
+                assert (False)
+    # assert (len(tgtSymIds) == 0)
+    return rlt
diff --git a/Analyzer/PyVisualizer/src/V3/util/Parser/TimeOutputPrarser.py b/Analyzer/PyVisualizer/src/V3/util/Parser/TimeOutputPrarser.py
new file mode 100644
index 00000000..eb8d1bb6
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/util/Parser/TimeOutputPrarser.py
@@ -0,0 +1,120 @@
+import math
+import os
+import re
+import struct
+from datastructure.TimingStruct import FileRecord, RecTuple, RecordingInfo, RecordingInfo, ArrayDescriptor, \
+    ThreadCreatorInfo
+import pandas as pd
+from collections import defaultdict
+
+
+def readSymbolFiles(scalerDataFolder):
+    if scalerDataFolder is None:
+        print()
+        return
+    rlt = RecordingInfo()
+
+    df = pd.read_csv(os.path.join(scalerDataFolder, 'fileName.txt'))
+    rlt.fileNameList = df['pathName'].to_list()
+    rlt.pthreadFileId = parsePthreadId(rlt.fileNameList)
+
+    for fileName in os.listdir(scalerDataFolder):
+        if fileName.startswith('threadTiming_'):
+            rlt.threadIdList.append(fileName[len('threadTiming_'): -4])
+
+    with open(os.path.join(scalerDataFolder, 'realFileId.bin'), 'rb') as f:
+        arrDesc = ArrayDescriptor()
+        f.readinto(arrDesc)
+        assert (arrDesc._magicNum == 167)
+        assert (arrDesc.arrayElemSize == 8)
+        rlt.realFileIdList = list(
+            struct.unpack_from('<%dQ' % (arrDesc.arraySize), f.read(arrDesc.arrayElemSize * arrDesc.arraySize)))
+        assert (f.read() == b'')  # Make sure this is the end
+    df = pd.read_csv(os.path.join(scalerDataFolder, 'symbolInfo.txt'))
+    rlt.symbolNameList = df['funcName'].to_list()
+    rlt.symbolFileIdList = df['fileId'].to_list()
+    rlt.symIdInFileList = df['symIdInFile'].to_list()
+    return rlt
+
+
+def readTimingStruct(ROOT_PATH,threadId):
+    recDataArr = []
+
+    with open(os.path.join(ROOT_PATH, 'threadTiming_%s.bin' % threadId), 'rb') as f:
+        threadCreatorInfo = ThreadCreatorInfo()
+        f.readinto(threadCreatorInfo)
+        assert (threadCreatorInfo._magicNum == 167)
+
+        arrayDescriptor = ArrayDescriptor()
+        f.readinto(arrayDescriptor)
+        assert (arrayDescriptor._magicNum == 167)
+
+        for i in range(arrayDescriptor.arraySize):
+            curRecFormat = RecTuple()
+            f.readinto(curRecFormat)
+            recDataArr.append(curRecFormat)
+
+    # assert (len(symbolNameList) == recArrSize - 1)
+    return recDataArr
+
+
+def aggregatePerThreadArray(scalerDataFolder, recInfo: RecordingInfo):
+    """
+    - Aggregate per-thread timing data into one using simple addition and return as the first return value
+    - The last element in each RecTuple records how much time the thread takes to execute so we should not aggregate them
+    together. Instead, we collect them into one list and return as the second parameter
+
+    :param scalerDataFolder: Scaler output data folder
+    :param threadIdList: A list of thread ids
+    :return aggregatedTimeArray: Aggregated counting and timing information
+    :return startingInfoArray: Information about thread creator. This value is used in time aggregation steps
+    """
+    api = 0
+    fgdsApi = 0
+    fgdsCount = 0
+    totalCount = 0
+
+    aggregatedTimeArray = []
+    aggregatedStartingTime = defaultdict(
+        lambda: 0)  # Map fileId and starting time. Thread may created by modules other than the main application
+    for threadId in recInfo.threadIdList:
+        curThreadRecArray = readTimingStruct(scalerDataFolder,threadId)
+        aggregatedStartingTime[curThreadRecArray[-1]._flags] += curThreadRecArray[-1].totalClockCycles
+        # print(curThreadRecArray[-1].totalClockCycles)
+
+        for i, curRec in enumerate(curThreadRecArray[:-1]):
+            if curRec._flags & (1 << 0):
+                fgdsApi += 1
+                fgdsCount += curRec.count
+            api += 1
+            totalCount += curRec.count
+            # if curRec.count>0:
+            # print('totalCount',totalCount,curRec.count)
+        if len(curThreadRecArray) != len(aggregatedTimeArray) + 1:
+            # First time
+            aggregatedTimeArray = curThreadRecArray[:-1].copy()
+        else:
+            for i, curRec in enumerate(curThreadRecArray[:-1]):
+                aggregatedTimeArray[i].count += curRec.count
+                # if recInfo.symbolNameList[i] == 'pthread_join':
+                #     print('Skip pthread_join')
+                #     continue
+
+                if aggregatedTimeArray[i]._flags & (1 << 0):
+                    # Use mean and count to estimate total clock cycles
+                    aggregatedTimeArray[i].totalClockCycles += int(curRec.count * curRec._meanClockTick)
+                else:
+                    aggregatedTimeArray[i].totalClockCycles += curRec.totalClockCycles
+    print('fgdsapi/api=', round(fgdsApi / api * 100, 2), 'fgdsCount/TotalCount=',
+          round(fgdsCount / totalCount * 100, 2), sep='\t')
+    return aggregatedTimeArray, aggregatedStartingTime
+
+
+pthreadFileRegex = re.compile(r'libpthread-.*\.so$')
+
+
+def parsePthreadId(fileNameList):
+    for i, fileName in enumerate(fileNameList):
+        if len(pthreadFileRegex.findall(fileName)) != 0:
+            return i
+    raise Exception('Cannot find pthread library in fileList')
diff --git a/Analyzer/PyVisualizer/src/V3/util/Quantile.py b/Analyzer/PyVisualizer/src/V3/util/Quantile.py
new file mode 100644
index 00000000..0c0d11a6
--- /dev/null
+++ b/Analyzer/PyVisualizer/src/V3/util/Quantile.py
@@ -0,0 +1,13 @@
+import numpy as np
+
+
+def calcQuantile(array):
+    min = np.min(array)
+    q1 = np.quantile(array, 0.25)
+    q2 = np.quantile(array, 0.5)
+    q3 = np.quantile(array, 0.75)
+    q4 = np.quantile(array, 1)
+    iqr = q3 - q1
+    qLower = q1 - 1.5 * iqr
+    qUpper = q3 + 1.5 * iqr
+    return min, q1, q2, q3, q4, iqr, qLower, qUpper
diff --git a/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl b/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl
index f6427d07..336e6c38 100644
--- a/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl
+++ b/Analyzer/PyVisualizer/tests/PerfTests/stackcollapse-perf.pl
@@ -62,7 +62,6 @@
 #
 # 02-Mar-2012	Brendan Gregg	Created this.
 # 02-Jul-2014	   "	  "	Added process name to stacks.
-# 08-Jun-2021   John Diep edited script to force shared object names to be reported always
 
 use strict;
 use Getopt::Long;
@@ -89,46 +88,83 @@ sub remember_stack {
 
 my $show_inline = 0;
 my $show_context = 0;
+
+my $srcline_in_input = 0; # if there are extra lines with source location (perf script -F+srcline)
 GetOptions('inline' => \$show_inline,
-           'context' => \$show_context,
-           'pid' => \$include_pid,
-           'kernel' => \$annotate_kernel,
-           'jit' => \$annotate_jit,
-           'all' => \$annotate_all,
-           'tid' => \$include_tid,
-           'addrs' => \$include_addrs,
-           'event-filter=s' => \$event_filter)
+'context' => \$show_context,
+'srcline' => \$srcline_in_input,
+'pid' => \$include_pid,
+'kernel' => \$annotate_kernel,
+'jit' => \$annotate_jit,
+'all' => \$annotate_all,
+'tid' => \$include_tid,
+'addrs' => \$include_addrs,
+'event-filter=s' => \$event_filter)
 or die <<USAGE_END;
 USAGE: $0 [options] infile > outfile\n
-	--pid		# include PID with process names [1]
-	--tid		# include TID and PID with process names [1]
-	--inline	# un-inline using addr2line
-	--all		# all annotations (--kernel --jit)
-	--kernel	# annotate kernel functions with a _[k]
-	--jit		# annotate jit functions with a _[j]
-	--context	# adds source context to --inline
-	--addrs		# include raw addresses where symbols can't be found
-	--event-filter=EVENT	# event name filter\n
+--pid		# include PID with process names [1]
+--tid		# include TID and PID with process names [1]
+--inline	# un-inline using addr2line
+--all		# all annotations (--kernel --jit)
+--kernel	# annotate kernel functions with a _[k]
+--jit		# annotate jit functions with a _[j]
+--context	# adds source context to --inline
+--srcline	# parses output of 'perf script -F+srcline' and adds source context
+--addrs		# include raw addresses where symbols can't be found
+--event-filter=EVENT	# event name filter\n
 [1] perf script must emit both PID and TIDs for these to work; eg, Linux < 4.1:
-	perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace
-    for Linux >= 4.1:
-	perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace
-    If you save this output add --header on Linux >= 3.14 to include perf info.
+perf script -f comm,pid,tid,cpu,time,event,ip,sym,dso,trace
+for Linux >= 4.1:
+perf script -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace
+If you save this output add --header on Linux >= 3.14 to include perf info.
 USAGE_END
 
 if ($annotate_all) {
-	$annotate_kernel = $annotate_jit = 1;
+$annotate_kernel = $annotate_jit = 1;
+}
+
+my %inlineCache;
+
+my %nmCache;
+
+sub inlineCacheAdd {
+        my ($pc, $mod, $result) = @_;
+        if (defined($inlineCache{$pc})) {
+            $inlineCache{$pc}{$mod} = $result;
+        } else {
+            $inlineCache{$pc} = {$mod => $result};
+        }
 }
 
 # for the --inline option
 sub inline {
-	my ($pc, $mod) = @_;
+my ($pc, $rawfunc, $mod) = @_;
 
-	# capture addr2line output
-	my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`;
+return $inlineCache{$pc}{$mod} if defined($inlineCache{$pc}{$mod});
 
-	# remove first line
-	$a2l_output =~ s/^(.*\n){1}//;
+# capture addr2line output
+my $a2l_output = `addr2line -a $pc -e $mod -i -f -s -C`;
+
+# remove first line
+$a2l_output =~ s/^(.*\n){1}//;
+
+if ($a2l_output =~ /\?\?\n\?\?:0/) {
+# if addr2line fails and rawfunc is func+offset, then fall back to it
+if ($rawfunc =~ /^(.+)\+0x([0-9a-f]+)$/) {
+			my $func = $1;
+			my $addr = hex $2;
+
+			$nmCache{$mod}=`nm $mod` unless defined $nmCache{$mod};
+
+			if ($nmCache{$mod} =~ /^([0-9a-f]+) . \Q$func\E$/m) {
+			   my $base = hex $1;
+				my $newPc = sprintf "0x%x", $base+$addr;
+				my $result = inline($newPc, '', $mod);
+				inlineCacheAdd($pc, $mod, $result);
+				return $result;
+			}
+		}
+	}
 
 	my @fullfunc;
 	my $one_item = "";
@@ -150,13 +186,18 @@ sub inline {
 		}
 	}
 
-	return join(";", @fullfunc);
+	my $result = join ";" , @fullfunc;
+
+	inlineCacheAdd($pc, $mod, $result);
+
+	return $result;
 }
 
 my @stack;
 my $pname;
 my $m_pid;
 my $m_tid;
+my $m_period;
 
 #
 # Main loop
@@ -192,7 +233,7 @@ sub inline {
 				unshift @stack, "";
 			}
 		}
-		remember_stack(join(";", @stack), 1) if @stack;
+		remember_stack(join(";", @stack), $m_period) if @stack;
 		undef @stack;
 		undef $pname;
 		next;
@@ -203,21 +244,22 @@ sub inline {
 	#
 	if (/^(\S.+?)\s+(\d+)\/*(\d+)*\s+/) {
 		# default "perf script" output has TID but not PID
-		# eg, "java 25607 4794564.109216: cycles:"
-		# eg, "java 12688 [002] 6544038.708352: cpu-clock:"
-		# eg, "V8 WorkerThread 25607 4794564.109216: cycles:"
-		# eg, "java 24636/25607 [000] 4794564.109216: cycles:"
-		# eg, "java 12688/12764 6544038.708352: cpu-clock:"
-		# eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: cycles:"
+		# eg, "java 25607 4794564.109216: 1 cycles:"
+		# eg, "java 12688 [002] 6544038.708352: 235 cpu-clock:"
+		# eg, "V8 WorkerThread 25607 4794564.109216: 104345 cycles:"
+		# eg, "java 24636/25607 [000] 4794564.109216: 1 cycles:"
+		# eg, "java 12688/12764 6544038.708352: 10309278 cpu-clock:"
+		# eg, "V8 WorkerThread 24636/25607 [000] 94564.109216: 100 cycles:"
 		# other combinations possible
-		my ($comm, $pid, $tid) = ($1, $2, $3);
+		my ($comm, $pid, $tid, $period) = ($1, $2, $3, "");
 		if (not $tid) {
 			$tid = $pid;
 			$pid = "?";
 		}
 
-		if (/(\S+):\s*$/) {
-			my $event = $1;
+		if (/:\s*(\d+)*\s+(\S+):\s*$/) {
+			$period = $1;
+			my $event = $2;
 
 			if ($event_filter eq "") {
 				# By default only show events of the first encountered
@@ -237,7 +279,10 @@ sub inline {
 			}
 		}
 
-		($m_pid, $m_tid) = ($pid, $tid);
+		if (not $period) {
+			$period = 1
+		}
+		($m_pid, $m_tid, $m_period) = ($pid, $tid, $period);
 
 		if ($include_tid) {
 			$pname = "$comm-$m_pid/$m_tid";
@@ -257,18 +302,25 @@ sub inline {
 
 		my ($pc, $rawfunc, $mod) = ($1, $2, $3);
 
+		if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) {
+			my $inlineRes = inline($pc, $rawfunc, $mod);
+			# - empty result this happens e.g., when $mod does not exist or is a path to a compressed kernel module
+			#   if this happens, the user will see error message from addr2line written to stderr
+			# - if addr2line results in "??" , then it's much more sane to fall back than produce a '??' in graph
+			if($inlineRes ne "" and $inlineRes ne "??" and $inlineRes ne "??:??:0" ) {
+				unshift @stack, $inlineRes;
+				next;
+			}
+		}
+
 		# Linux 4.8 included symbol offsets in perf script output by default, eg:
 		# 7fffb84c9afc cpu_startup_entry+0x800047c022ec ([kernel.kallsyms])
 		# strip these off:
 		$rawfunc =~ s/\+0x[\da-f]+$//;
 
-		if ($show_inline == 1 && $mod !~ m/(perf-\d+.map|kernel\.|\[[^\]]+\])/) {
-			unshift @stack, inline($pc, $mod);
-			next;
-		}
-
 		next if $rawfunc =~ /^\(/;		# skip process names
 
+		my $is_unknown=0;
 		my @inline;
 		for (split /\->/, $rawfunc) {
 			my $func = $_;
@@ -279,6 +331,7 @@ sub inline {
 					$func =~ s/.*\///;
 				} else {
 					$func = "unknown";
+					$is_unknown=1;
 				}
 
 				if ($include_addrs) {
@@ -320,7 +373,7 @@ sub inline {
 			#
 			# detect inlined from the @inline array
 			# detect kernel from the module name; eg, frames to parse include:
-			#          ffffffff8103ce3b native_safe_halt ([kernel.kallsyms]) 
+			#          ffffffff8103ce3b native_safe_halt ([kernel.kallsyms])
 			#          8c3453 tcp_sendmsg (/lib/modules/4.3.0-rc1-virtual/build/vmlinux)
 			#          7d8 ipv4_conntrack_local+0x7f8f80b8 ([nf_conntrack_ipv4])
 			# detect jit from the module name; eg:
@@ -332,15 +385,42 @@ sub inline {
 			} elsif ($annotate_jit == 1 && $mod =~ m:/tmp/perf-\d+\.map:) {
 				$func .= "_[j]";	# jitted
 			}
-			else {
-				if ($mod ne "[unknown]") {
-					$mod =~ s/.*\///;
-					$mod = "\[$mod\]";
-				}
-				if ($func ne $mod) {
-					$func .= " " . $mod;
-				}
+
+			#
+			# Source lines
+			#
+			#
+			# Sample outputs:
+			#   | a.out 35081 252436.005167:     667783 cycles:
+			#   |                   408ebb some_method_name+0x8b (/full/path/to/a.out)
+			#   |   uniform_int_dist.h:300
+			#   |                   4069f5 main+0x935 (/full/path/to/a.out)
+			#   |   file.cpp:137
+			#   |             7f6d2148eb25 __libc_start_main+0xd5 (/lib64/libc-2.33.so)
+			#   |   libc-2.33.so[27b25]
+			#
+			#   | a.out 35081 252435.738165:     306459 cycles:
+			#   |             7f6d213c2750 [unknown] (/usr/lib64/libkmod.so.2.3.6)
+			#   |   libkmod.so.2.3.6[6750]
+			#
+			#   | a.out 35081 252435.738373:     315813 cycles:
+			#   |             7f6d215ca51b __strlen_avx2+0x4b (/lib64/libc-2.33.so)
+			#   |   libc-2.33.so[16351b]
+			#   |             7ffc71ee9580 [unknown] ([unknown])
+			#   |
+			#
+			#   | a.out 35081 252435.718940:     247984 cycles:
+			#   |         ffffffff814f9302 up_write+0x32 ([kernel.kallsyms])
+			#   |   [kernel.kallsyms][ffffffff814f9302]
+			if($srcline_in_input and not $is_unknown){
+				$_ = <>;
+				chomp;
+				s/\[.*?\]//g;
+				s/^\s*//g;
+				s/\s*$//g;
+				$func.=':'.$_ unless $_ eq "";
 			}
+
 			push @inline, $func;
 		}
 
diff --git a/benchmarktookit b/benchmarktookit
new file mode 160000
index 00000000..c1ff8fb5
--- /dev/null
+++ b/benchmarktookit
@@ -0,0 +1 @@
+Subproject commit c1ff8fb5dcc7e62628a9c5d4fd8e8e858ae8ab00
diff --git a/libHook-c/src/ExtFuncCallHook.cpp b/libHook-c/src/ExtFuncCallHook.cpp
index abd46bbb..c545bb27 100644
--- a/libHook-c/src/ExtFuncCallHook.cpp
+++ b/libHook-c/src/ExtFuncCallHook.cpp
@@ -169,7 +169,8 @@ namespace scaler {
             Elf64_Word type;
             Elf64_Word bind;
             parser.getExtSymbolInfo(i, funcName, bind, type);
-            if (!shouldHookThisSymbol(funcName, bind, type, allExtSymbol.getSize())) {
+            ssize_t initialGap = 0;
+            if (!shouldHookThisSymbol(funcName, bind, type, allExtSymbol.getSize(), initialGap)) {
                 continue;
             }
             //Get function id from plt entry
@@ -198,7 +199,7 @@ namespace scaler {
             newSym->pltEntryAddr = pltEntry;
             newSym->pltSecEntryAddr = pltSecEntry;
             newSym->pltStubId = pltStubId;
-
+            newSym->initialGap = initialGap;
             fprintf(symInfoFile, "%s,%ld,%ld\n", funcName, newSym->fileId, newSym->symIdInFile);
 
             DBG_LOGS(
@@ -212,8 +213,12 @@ namespace scaler {
     }
 
 
-    bool
-    ExtFuncCallHook::shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId) {
+    const int SAMPLING_GAP = 0b0;
+
+    bool ExtFuncCallHook::shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId,
+                                               ssize_t &initialGap) {
+
+        initialGap = 0;
         if (bind != STB_GLOBAL || type != STT_FUNC) {
             return false;
         }
@@ -230,13 +235,33 @@ namespace scaler {
         }
 
         if (funcNameLen == 3) {
-            if (strncmp(funcName, "oom", 3) == 0) {
+            if (strncmp(funcName, "cos", 3) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "exp", 3) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "log", 3) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "sin", 3) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "oom", 3) == 0) {
                 return false;
             } else if (strncmp(funcName, "err", 3) == 0) {
                 return false;
             }
         } else if (funcNameLen == 4) {
-            if (strncmp(funcName, "jump", 4) == 0) {
+            if (strncmp(funcName, "cosf", 4) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "expf", 4) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "logf", 4) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "powf", 4) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "sinf", 4) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "sqrtf", 4) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "jump", 4) == 0) {
                 return false;
             } else if (strncmp(funcName, "exit", 4) == 0) {
                 return false;
@@ -248,7 +273,11 @@ namespace scaler {
                 return false;
             }
         } else if (funcNameLen == 5) {
-            if (strncmp(funcName, "_exit", 5) == 0) {
+            if (strncmp(funcName, "atan2", 5) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "sqrtf", 5) == 0) {
+                initialGap = SAMPLING_GAP;
+            } else if (strncmp(funcName, "_exit", 5) == 0) {
                 return false;
             } else if (strncmp(funcName, "abort", 5) == 0) {
                 return false;
@@ -629,7 +658,7 @@ namespace scaler {
 
         uint8_t *tlsOffset = nullptr;
         __asm__ __volatile__ (
-        "movq 0x2F4CC0(%%rip),%0\n\t"
+        "movq 0x2F5B60(%%rip),%0\n\t"
         :"=r" (tlsOffset)
         :
         :
diff --git a/libHook-c/src/HookContext.cpp b/libHook-c/src/HookContext.cpp
index 41d768fb..0e309e79 100644
--- a/libHook-c/src/HookContext.cpp
+++ b/libHook-c/src/HookContext.cpp
@@ -2,11 +2,13 @@
 #include <util/tool/Timer.h>
 #include <util/tool/FileTool.h>
 #include <cxxabi.h>
+#include <type/RecTuple.h>
 
 extern "C" {
 static thread_local DataSaver saverElem;
 
-HookContext *constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize) {
+HookContext *
+constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize, scaler::Array<scaler::ExtSymInfo> &allExtSymbol) {
 
     uint8_t *contextHeap = static_cast<uint8_t *>(mmap(NULL, sizeof(HookContext) +
                                                              sizeof(scaler::Array<uint64_t>) +
@@ -21,7 +23,15 @@ HookContext *constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize) {
     rlt->recArr = new(contextHeap + sizeof(HookContext)) scaler::Array<RecTuple>(hookedSymbolSize);
     rlt->threadDataSavingLock = reinterpret_cast<pthread_mutex_t *>(contextHeap + sizeof(HookContext) +
                                                                     sizeof(scaler::Array<uint64_t>));
-
+#ifdef INSTR_TIMING
+    detailedTimingVectors = new TIMING_TYPE *[hookedSymbolSize];
+    detailedTimingVectorSize = new TIMING_TYPE[hookedSymbolSize];
+    memset(detailedTimingVectorSize, 0, sizeof(TIMING_TYPE) * hookedSymbolSize);
+    for (ssize_t i = 0; i < hookedSymbolSize; ++i) {
+        detailedTimingVectors[i] = new TIMING_TYPE[TIMING_REC_COUNT];
+        memset(detailedTimingVectors[i], 0, sizeof(TIMING_TYPE) * TIMING_REC_COUNT);
+    }
+#endif
 
     pthread_mutexattr_t Attr;
     pthread_mutexattr_init(&Attr);
@@ -31,11 +41,9 @@ HookContext *constructContext(ssize_t libFileSize, ssize_t hookedSymbolSize) {
     //Initialize gap to one
     for (int i = 0; i < rlt->recArr->getSize(); ++i) {
         //number mod 2^n is equivalent to stripping off all but the n lowest-order
-        rlt->recArr->internalArr[i].gap = 0; //0b11 if %4, because 4=2^2 Initially time everything
+        rlt->recArr->internalArr[i].gap = allExtSymbol[i].initialGap; //0b11 if %4, because 4=2^2 Initially time everything
         rlt->recArr->internalArr[i].count = 0;
     }
-
-
 //    memArrayHeap(1), timingArr(hookedSymbolSize),
 //            indexPosi(0)
 
@@ -86,9 +94,9 @@ void __attribute__((used, noinline, optimize(3))) printRecOffset() {
     auto m __attribute__((used)) = (uint8_t *) &curContext->recArr->internalArr[0].gap;
 
     printf("\nTLS offset: Check assembly\n"
-             "RecArr Offset: 0x%lx\n"
-             "Counting Entry Offset: 0x%lx\n"
-             "Gap Entry Offset: 0x%lx\n", j - i, l - k, m - k);
+           "RecArr Offset: 0x%lx\n"
+           "Counting Entry Offset: 0x%lx\n"
+           "Gap Entry Offset: 0x%lx\n", j - i, l - k, m - k);
 }
 
 
@@ -106,19 +114,17 @@ bool initTLS() {
 
     //Put a dummy variable to avoid null checking
     //Initialize saving data structure
-    curContext = constructContext(
-            scaler::ExtFuncCallHook::instance->elfImgInfoMap.getSize(),
-            scaler::ExtFuncCallHook::instance->allExtSymbol.getSize() + 1);
+    curContext = constructContext(scaler::ExtFuncCallHook::instance->elfImgInfoMap.getSize(),
+                                  scaler::ExtFuncCallHook::instance->allExtSymbol.getSize() + 1,
+                                  scaler::ExtFuncCallHook::instance->allExtSymbol);
 //#ifdef PRINT_DBG_LOG
 //    printRecOffset();
 //#endif
-    if (!curContext) { fatalError("Failed to allocate memory for Context");
+    if (!curContext) {
+        fatalError("Failed to allocate memory for Context");
         return false;
     }
-
-
     //RuntimeInfo newInfo;
-
     return true;
 }
 
@@ -126,10 +132,168 @@ __thread HookContext *curContext __attribute((tls_model("initial-exec")));
 
 __thread uint8_t bypassCHooks __attribute((tls_model("initial-exec"))) = SCALER_FALSE; //Anything that is not SCALER_FALSE should be treated as SCALER_FALSE
 
+#ifdef INSTR_TIMING
+const int TIMING_REC_COUNT = 20000;
+typedef int64_t TIMING_TYPE;
+__thread TIMING_TYPE **detailedTimingVectors;
+__thread TIMING_TYPE *detailedTimingVectorSize;
+#endif
+
 DataSaver::~DataSaver() {
     saveData(curContext);
 }
 
+#ifdef INSTR_TIMING
+inline void saveThreadDetailedTiming(std::stringstream &ss, HookContext *curContextPtr) {
+    ss.str("");
+    ss << scaler::ExtFuncCallHook::instance->folderName << "/threadDetailedTiming_" << curContextPtr->threadId
+       << ".bin";
+
+    //Calculate file total size
+
+    ssize_t recordedInvocationCnt = 0;
+
+    for (ssize_t i = 0; i < scaler::ExtFuncCallHook::instance->allExtSymbol.getSize(); ++i) {
+        recordedInvocationCnt += detailedTimingVectorSize[i];
+    }
+
+    int fd;
+    size_t realFileIdSizeInBytes = sizeof(ArrayDescriptor) +
+                                   sizeof(ArrayDescriptor) * scaler::ExtFuncCallHook::instance->allExtSymbol.getSize()
+                                   + recordedInvocationCnt * sizeof(TIMING_TYPE);
+
+    uint8_t *fileContentInMem = nullptr;
+    if (!scaler::fOpen4Write<uint8_t>(ss.str().c_str(), fd, realFileIdSizeInBytes, fileContentInMem)) {
+        fatalErrorS("Cannot open %s because:%s", ss.str().c_str(), strerror(errno))
+    }
+    uint8_t *_fileContentInMem = fileContentInMem;
+
+    /*Write whole symbol info*/
+    ArrayDescriptor *arrayDescriptor = reinterpret_cast<ArrayDescriptor *>(fileContentInMem);
+    arrayDescriptor->arrayElemSize = 0;
+    arrayDescriptor->arraySize = scaler::ExtFuncCallHook::instance->allExtSymbol.getSize();
+    arrayDescriptor->magicNum = 167;
+    fileContentInMem += sizeof(ArrayDescriptor);
+
+
+    for (ssize_t i = 0; i < scaler::ExtFuncCallHook::instance->allExtSymbol.getSize(); ++i) {
+        /**
+         * Write array descriptor first
+         */
+        ArrayDescriptor *arrayDescriptor = reinterpret_cast<ArrayDescriptor *>(fileContentInMem);
+        arrayDescriptor->arrayElemSize = sizeof(TIMING_TYPE);
+        arrayDescriptor->arraySize = detailedTimingVectorSize[i];
+        arrayDescriptor->magicNum = 167;
+        fileContentInMem += sizeof(ArrayDescriptor);
+
+        /**
+         * Then write detailed timing array
+         */
+        memcpy(fileContentInMem, detailedTimingVectors[i], arrayDescriptor->arraySize * arrayDescriptor->arrayElemSize);
+        fileContentInMem += arrayDescriptor->arraySize * arrayDescriptor->arrayElemSize;
+    }
+    if (!scaler::fClose<uint8_t>(fd, realFileIdSizeInBytes, _fileContentInMem)) {
+        fatalErrorS("Cannot close file %s, because %s", ss.str().c_str(), strerror(errno));
+    }
+}
+#endif
+
+
+inline void savePerThreadTimingData(std::stringstream &ss, HookContext *curContextPtr) {
+    ss.str("");
+    ss << scaler::ExtFuncCallHook::instance->folderName << "/threadTiming_" << curContextPtr->threadId << ".bin";
+    //INFO_LOGS("Saving timing data to %s", ss.str().c_str());
+
+    int fd;
+    size_t realFileIdSizeInBytes =
+            sizeof(ThreadCreatorInfo) + sizeof(ArrayDescriptor) + curContextPtr->recArr->getSize() * sizeof(RecTuple);
+    uint8_t *fileContentInMem = nullptr;
+    if (!scaler::fOpen4Write<uint8_t>(ss.str().c_str(), fd, realFileIdSizeInBytes, fileContentInMem)) {
+        fatalErrorS("Cannot fopen %s because:%s", ss.str().c_str(), strerror(errno));
+    }
+    uint8_t *_fileContentInMem = fileContentInMem;
+    /**
+     * Record who created the thread
+     */
+    ThreadCreatorInfo *threadCreatorInfo = reinterpret_cast<ThreadCreatorInfo *>(fileContentInMem);
+    threadCreatorInfo->threadExecutionCycles = curContextPtr->endTImestamp - curContextPtr->startTImestamp;
+    threadCreatorInfo->threadCreatorFileId = curContextPtr->threadCreatorFileId;
+    threadCreatorInfo->magicNum = 167;
+    fileContentInMem += sizeof(ThreadCreatorInfo);
+
+    /**
+     * Record size information about the recorded array
+     */
+    ArrayDescriptor *arrayDescriptor = reinterpret_cast<ArrayDescriptor *>(fileContentInMem);
+    arrayDescriptor->arrayElemSize = sizeof(RecTuple);
+    arrayDescriptor->arraySize = curContextPtr->recArr->getSize();
+    arrayDescriptor->magicNum = 167;
+    fileContentInMem += sizeof(ArrayDescriptor);
+
+
+    /**
+     * Write recording tuple onto the disk
+     */
+    memcpy(fileContentInMem, curContextPtr->recArr->data(),
+           curContextPtr->recArr->getTypeSizeInBytes() * curContextPtr->recArr->getSize());
+
+    if (!scaler::fClose<uint8_t>(fd, realFileIdSizeInBytes, _fileContentInMem)) {
+        fatalErrorS("Cannot close file %s, because %s", ss.str().c_str(), strerror(errno));
+    }
+
+    INFO_LOGS("Saving data to %s, %lu", scaler::ExtFuncCallHook::instance->folderName.c_str(), pthread_self());
+}
+
+inline void saveRealFileId(std::stringstream &ss, HookContext *curContextPtr) {
+    ss.str("");
+    ss << scaler::ExtFuncCallHook::instance->folderName << "/realFileId.bin";
+    //The real id of each function is resolved in after hook, so I can only save it in datasaver
+
+    int fd;
+    ssize_t realFileIdSizeInBytes = sizeof(ArrayDescriptor) +
+                                    (curContextPtr->_this->allExtSymbol.getSize()) * sizeof(uint64_t);
+    uint8_t *fileContentInMem = nullptr;
+    if (!scaler::fOpen4Write<uint8_t>(ss.str().c_str(), fd, realFileIdSizeInBytes, fileContentInMem)) {
+        fatalErrorS(
+                "Cannot open %s because:%s", ss.str().c_str(), strerror(errno))
+    }
+    uint8_t *_fileContentInMem = fileContentInMem;
+
+    /**
+     * Write array descriptor first
+     */
+    ArrayDescriptor *arrayDescriptor = reinterpret_cast<ArrayDescriptor *>(fileContentInMem);
+    arrayDescriptor->arrayElemSize = sizeof(uint64_t);
+    arrayDescriptor->arraySize = curContextPtr->_this->allExtSymbol.getSize();
+    arrayDescriptor->magicNum = 167;
+    fileContentInMem += sizeof(ArrayDescriptor);
+
+    uint64_t *realFileIdMem = reinterpret_cast<uint64_t *>(fileContentInMem);
+    for (int i = 0; i < curContextPtr->_this->allExtSymbol.getSize(); ++i) {
+        realFileIdMem[i] = curContextPtr->_this->pmParser.findExecNameByAddr(
+                *(curContextPtr->_this->allExtSymbol[i].gotEntryAddr));
+    }
+
+    if (!scaler::fClose<uint8_t>(fd, realFileIdSizeInBytes, _fileContentInMem)) {
+        fatalErrorS("Cannot close file %s, because %s", ss.str().c_str(), strerror(errno));
+    }
+}
+
+inline void saveDataForAllOtherThread(std::stringstream &ss, HookContext *curContextPtr) {
+    INFO_LOG("Save data of all existing threads");
+    for (int i = 0; i < threadContextMap.getSize(); ++i) {
+        HookContext *threadContext = threadContextMap[i];
+        if (!threadContext->dataSaved) {
+            pthread_mutex_lock(threadContext->threadDataSavingLock);
+            INFO_LOGS("Thread data not saved, save it %d/%zd", i, threadContextMap.getSize());
+            saveData(threadContext);
+            pthread_mutex_unlock(threadContext->threadDataSavingLock);
+        } else {
+            INFO_LOGS("Thread data already saved, skip %d/%zd", i, threadContextMap.getSize());
+        }
+    }
+}
+
 void saveData(HookContext *curContextPtr, bool finalize) {
     bypassCHooks = SCALER_TRUE;
     if (!curContextPtr) {
@@ -146,83 +310,28 @@ void saveData(HookContext *curContextPtr, bool finalize) {
     curContextPtr->dataSaved = true;
 
     //Resolve real address
-
     if (!curContextPtr->endTImestamp) {
         //Not finished succesfully
         curContextPtr->endTImestamp = getunixtimestampms();
     }
 
-    if (!curContext) { fatalError("curContext is not initialized, won't save anything");
+    if (!curContext) {
+        fatalError("curContext is not initialized, won't save anything");
         return;
     }
     std::stringstream ss;
-    ss << scaler::ExtFuncCallHook::instance->folderName << "/threadTiming_" << curContextPtr->threadId << ".bin";
-    //INFO_LOGS("Saving timing data to %s", ss.str().c_str());
-    FILE *threadDataSaver = fopen(ss.str().c_str(), "wb");
-    if (!threadDataSaver) { fatalErrorS("Cannot fopen %s because:%s", ss.str().c_str(),
-                                        strerror(errno));
-    }
 
-    //Main application at the end
-    curContextPtr->recArr->internalArr[curContextPtr->recArr->getSize() - 1].totalClockCycles =
-            curContextPtr->endTImestamp - curContextPtr->startTImestamp;
+#ifdef INSTR_TIMING
+    saveThreadDetailedTiming(ss, curContextPtr);
+#endif
 
-
-    if (fwrite(&curContextPtr->curFileId, sizeof(HookContext::curFileId), 1, threadDataSaver) != 1) { fatalErrorS(
-                "Cannot curFileId of %s because:%s", ss.str().c_str(),
-                strerror(errno));
-    }
-
-    int64_t timeEntrySize = curContextPtr->recArr->getSize();
-    if (fwrite(&timeEntrySize, sizeof(int64_t), 1, threadDataSaver) != 1) { fatalErrorS(
-                "Cannot write timeEntrySize of %s because:%s", ss.str().c_str(),
-                strerror(errno));
-    }
-    if (fwrite(curContextPtr->recArr->data(), curContextPtr->recArr->getTypeSizeInBytes(),
-               curContextPtr->recArr->getSize(), threadDataSaver) !=
-        curContextPtr->recArr->getSize()) { fatalErrorS("Cannot write timingArr of %s because:%s", ss.str().c_str(),
-                                                        strerror(errno));
-    }
-
-
-    INFO_LOGS("Saving data to %s, %lu", scaler::ExtFuncCallHook::instance->folderName.c_str(), pthread_self());
+    savePerThreadTimingData(ss, curContextPtr);
 
     if (curContextPtr->isMainThread || finalize) {
-//        printf("Main thread id is: %lu", curContextPtr->threadId);
-        ss.str("");
-        ss << scaler::ExtFuncCallHook::instance->folderName << "/realFileId.bin";
-        //The real id of each function is resolved in after hook, so I can only save it in datasaver
-
-        int fd;
-
-        size_t realFileIdSizeInBytes = (curContextPtr->_this->allExtSymbol.getSize() + 1) * sizeof(ssize_t);
-        size_t *realFileIdMem = nullptr;
-        if (!scaler::fOpen4Write<size_t>(ss.str().c_str(), fd, realFileIdSizeInBytes, realFileIdMem)) { fatalErrorS(
-                    "Cannot open %s because:%s", ss.str().c_str(), strerror(errno))
-        }
-        realFileIdMem[0] = curContextPtr->_this->allExtSymbol.getSize();
-        for (int i = 0; i < curContextPtr->_this->allExtSymbol.getSize(); ++i) {
-            realFileIdMem[i + 1] = curContextPtr->_this->pmParser.findExecNameByAddr(
-                    *(curContextPtr->_this->allExtSymbol[i].gotEntryAddr));
-        }
-        if (!scaler::fClose<size_t>(fd, realFileIdSizeInBytes, realFileIdMem)) { fatalError("Cannot close file");
-        }
-
-        INFO_LOG("Save data of all existing threads");
-        for (int i = 0; i < threadContextMap.getSize(); ++i) {
-            HookContext *threadContext = threadContextMap[i];
-            if (!threadContext->dataSaved) {
-                pthread_mutex_lock(threadContext->threadDataSavingLock);
-                INFO_LOGS("Thread data not saved, save it %d/%zd", i, threadContextMap.getSize());
-                saveData(threadContext);
-                pthread_mutex_unlock(threadContext->threadDataSavingLock);
-            } else {
-                INFO_LOGS("Thread data already saved, skip %d/%zd", i, threadContextMap.getSize());
-            }
-        }
+        saveRealFileId(ss, curContextPtr);
+        saveDataForAllOtherThread(ss, curContextPtr);
     }
 
-    fclose(threadDataSaver);
     pthread_mutex_unlock(curContextPtr->threadDataSavingLock);
 
 }
diff --git a/libHook-c/src/HookHandlers.cpp b/libHook-c/src/HookHandlers.cpp
index a7006083..431b796c 100644
--- a/libHook-c/src/HookHandlers.cpp
+++ b/libHook-c/src/HookHandlers.cpp
@@ -385,15 +385,9 @@ void *afterHookHandler() {
 //    int64_t prevClockTick = curContextPtr->hookTuple[curContextPtr->indexPosi].clockTicks;
     uint64_t preClockCycle = curContextPtr->hookTuple[curContextPtr->indexPosi].clockCycles;
 
-//    int64_t curClockTick = 0;
-    //(((int64_t) hi << 32) | lo) ;
+
     int64_t &c = curContextPtr->recArr->internalArr[symbolId].count;
-//    if (c < (1 << 10)) {
-//        struct tms curTime;
-//        clock_t rlt = times(&curTime);
-//        curClockTick = curTime.tms_utime + curTime.tms_stime - prevClockTick;
-//        printf("Clock Ticks in posthook=%ld\n", curTime.tms_utime + curTime.tms_stime);
-//    }
+
 
     --curContextPtr->indexPosi;
     assert(curContextPtr->indexPosi >= 1);
@@ -406,47 +400,19 @@ void *afterHookHandler() {
     int32_t &clockCycleThreshold = curContextPtr->recArr->internalArr[symbolId].durThreshold;
 
     int64_t clockCyclesDuration = (int64_t) (postHookClockCycles - preClockCycle);
-    if (c < (1 << 10)) {
-
-        if (c > (1 << 9)) {
-            //Calculation phase
-            int64_t clockTickDiff = clockCyclesDuration - meanClockCycle;
-
-            if (-clockCycleThreshold <= clockTickDiff && clockTickDiff <= clockCycleThreshold) {
-//                printf("Skipped\n");
-                //Skip this
-                setbit(curContextPtr->recArr->internalArr[symbolId].flags, 0);
-            }
-//            printf("Threshold=%d clockDiff=%ld shouldSkip?=%s\n", clockTickThreshold, clockTickDiff,
-//                   -clockTickThreshold <= clockTickDiff && clockTickDiff <        = clockTickThreshold ? "True" : "False");
-
-        } else if (c < (1 << 9)) {
-            //Counting only, no modifying gap. Here the gap should be zero. Meaning every invocation counts
-            //https://blog.csdn.net/u014485485/article/details/77679669
-            meanClockCycle += (clockCyclesDuration - meanClockCycle) / (float) c; //c<100, safe conversion
-//            printf("meanClockTick += (%ld - %f) / (float) %ld\n", clockCyclesDuration, meanClockCycle, c);
-        } else if (c == (1 << 9)) {
-            //Mean calculation has finished, calculate a threshold based on that
-            clockCycleThreshold = meanClockCycle * 0.1;
-//            printf("MeanClockTick=%f MeanClockTick*0.1=%f\n", meanClockCycle, meanClockCycle * 0.1);
-        }
-    } else if (c == (1 << 10)) {
-        if (chkbit(curContextPtr->recArr->internalArr[symbolId].flags, 0)) {
-            //Skip this symbol
-            //printf("Skipped\n");
-            curContextPtr->recArr->internalArr[symbolId].gap = 0b11111111111111111111;
-        }
-    }
-    //RDTSCTiming if not skipped
-    if (!chkbit(curContextPtr->recArr->internalArr[symbolId].flags, 0)) {
-        curContextPtr->recArr->internalArr[symbolId].totalClockCycles += clockCyclesDuration;
-    }
 
-    //c = 1 << 10;
+#ifdef INSTR_TIMING
+    TIMING_TYPE &curSize = detailedTimingVectorSize[symbolId];
+    if (curSize < TIMING_REC_COUNT) {
+        ++curSize;
+        detailedTimingVectors[symbolId][curSize] = clockCyclesDuration;
+    }
+#endif
 
+    //RDTSCTiming if not skipped
+    curContextPtr->recArr->internalArr[symbolId].totalClockCycles += clockCyclesDuration * (c - curContextPtr->recArr->internalArr[symbolId].prevCount + 1);
 
-//    INFO_LOGS("[Post Hook] Thread ID:%lu Func(%ld) CalleeFileId(%ld) Timestamp: %lu\n",
-//             pthread_self(), symbolId, curElfSymInfo.libFileId, getunixtimestampms());
+    curContextPtr->recArr->internalArr[symbolId].prevCount = c;
 
     bypassCHooks = SCALER_FALSE;
     return callerAddr;
diff --git a/libHook-c/src/ProcInfoParser.cpp b/libHook-c/src/ProcInfoParser.cpp
index 1d8f8ec7..2ab15a58 100644
--- a/libHook-c/src/ProcInfoParser.cpp
+++ b/libHook-c/src/ProcInfoParser.cpp
@@ -182,12 +182,9 @@ namespace scaler {
         //We could use binary search to lookup addr in this array.
 
         //Binary search impl segAddrFileMap
-        ssize_t lo = 0;
-        ssize_t hi = pmEntryArray.getSize();
-        ssize_t md;
-        bool found = false;
-        while (lo != hi) {
-            md = (lo + hi) / 2;
+        ssize_t lo = 0, md = 0, hi = pmEntryArray.getSize() - 1;
+        while (lo < hi) {
+            md = lo + (hi - lo) / 2;
             if (pmEntryArray[md].addrStart < addr) {
                 //printf("hi(%d) = md(%d) - 1=(%d)\n", hi, md, md - 1);
                 lo = md + 1;
@@ -195,35 +192,11 @@ namespace scaler {
                 //printf("lo(%d) = md(%d) + 1=(%d)\n", lo, md, md + 1);
                 hi = md;
             } else {
-                //printf("lo = md =%d\n", md);
-                lo = md;
-                found = true;
-                break;
+                //Find left bound, although this should be impossible in this case
+                hi = md;
             }
         }
-        if (!found && lo == 0) {
-            lo = -1;
-        }
-
-
-        //It is possible that the address falls within the range of last entry. We need to check this scenario
-
-        if (lo == -1) { fatalErrorS(
-                    "Cannot find addr %p in pmMap. The address is lower than the lowest address if /proc/{pid}/maps.",
-                    addr);
-            exit(-1);
-        } else if (lo == pmEntryArray.getSize()) {
-            //Address is within range
-            lo = pmEntryArray.getSize() - 1;
-        }
-
-        //Check if it's end address is indeed in this entry. If not, it is because the caller is not in procinfomapper
-        // (Maybe skipped, in this case return an id that is larger than the largest function addr)
-        if (addr > pmEntryArray[lo].addrEnd) {
-            return fileNameArr.size();
-        }
-
-        return pmEntryArray[lo].fileId;
+        return pmEntryArray[lo - 1].fileId;
     }
 
 
diff --git a/libHook-c/src/include/type/ExtSymInfo.h b/libHook-c/src/include/type/ExtSymInfo.h
index ff3b001a..5966685a 100644
--- a/libHook-c/src/include/type/ExtSymInfo.h
+++ b/libHook-c/src/include/type/ExtSymInfo.h
@@ -21,14 +21,7 @@ namespace scaler {
         uint8_t *pltSecEntryAddr = nullptr; //(8 bytes)
         uint64_t pltStubId = 0; //(8 bytes)
         FileID libFileId = -1; //(8 bytes) Deprecated, move to a dedicated array
-        char padding0;
-        char padding1;
-        char padding2;
-        char padding3;
-        char padding4;
-        char padding5;
-        char padding6;
-        char padding7;
+        ssize_t initialGap = 0;//8 Bytes. Initial gap value
     };
 }
 #endif
\ No newline at end of file
diff --git a/libHook-c/src/include/type/RecTuple.h b/libHook-c/src/include/type/RecTuple.h
new file mode 100644
index 00000000..e0c26c15
--- /dev/null
+++ b/libHook-c/src/include/type/RecTuple.h
@@ -0,0 +1,44 @@
+#ifndef SCALER_RECTUPLE_H
+#define SCALER_RECTUPLE_H
+
+/**
+ * This struct is the format that we record time and save to disk.
+ */
+struct RecTuple {
+    uint64_t totalClockCycles; //8
+    int64_t count; //8
+    int64_t prevCount; //8 Used to perform sampling
+    int32_t gap; //4
+    float meanClockTick; //4
+    int32_t durThreshold; //4
+    uint32_t flags; //4
+};
+
+
+/**
+ * This struct stores the total size and element size of an array.
+ * On disk, this struct is followed by array elements
+ */
+struct ArrayDescriptor {
+    uint64_t arrayElemSize;
+    uint64_t arraySize;
+    uint8_t magicNum = 167;  //1 Used to ensure the collected data format is recognized in python scripts.
+};
+
+/**
+ * This struct is the format that we record detailed timing and save to disk.
+ */
+typedef int64_t TIMING_TYPE;
+
+struct DetailedTimingDescriptor {
+    TIMING_TYPE timingSize;
+};
+
+struct ThreadCreatorInfo {
+    uint64_t threadCreatorFileId;
+    uint64_t threadExecutionCycles;
+    uint8_t magicNum = 167;  //1 Used to ensure the collected data format is recognized in python scripts.
+};
+
+
+#endif //SCALER_RECTUPLE_H
diff --git a/libHook-c/src/include/util/hook/ExtFuncCallHook.h b/libHook-c/src/include/util/hook/ExtFuncCallHook.h
index 0affb25a..5a0fe6f9 100644
--- a/libHook-c/src/include/util/hook/ExtFuncCallHook.h
+++ b/libHook-c/src/include/util/hook/ExtFuncCallHook.h
@@ -64,7 +64,7 @@ namespace scaler {
 
 
     protected:
-        inline bool shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId);
+        inline bool shouldHookThisSymbol(const char *funcName, Elf64_Word &bind, Elf64_Word &type, SymID curSymId, ssize_t& initialGap);
 
 
         inline bool
diff --git a/libHook-c/src/include/util/hook/HookContext.h b/libHook-c/src/include/util/hook/HookContext.h
index 3a9144a7..26e4ca66 100644
--- a/libHook-c/src/include/util/hook/HookContext.h
+++ b/libHook-c/src/include/util/hook/HookContext.h
@@ -1,10 +1,11 @@
-#ifndef THREAD_LOCAL_H
-#define THREAD_LOCAL_H
+#ifndef HOOK_CONTEXT_H
+#define HOOK_CONTEXT_H
 
 #include <util/datastructure/FStack.h>
 #include <cstdio>
 #include <type/InvocationTree.h>
 #include <util/tool/Timer.h>
+#include <type/RecTuple.h>
 #include "ExtFuncCallHook.h"
 
 extern "C" {
@@ -20,22 +21,13 @@ struct HookTuple {
     uint32_t clockTicks; //8
 };
 
-struct RecTuple {
-    uint64_t totalClockCycles; //8
-    int64_t count; //8
-    int32_t gap; //4
-    float meanClockTick; //4
-    int32_t durThreshold; //4
-    uint32_t flags; //4
-};
-
 struct HookContext {
     //todo: Initialize using maximum stack size
     int64_t indexPosi;//8bytes
     scaler::Array<RecTuple> *recArr; //8bytes
     //Records which function calls which function for how long, the index is scalerid (Only contains hooked function)
     //todo: Replace timingMatrix to a class
-    int64_t curFileId = 1; //Which library created the current thread? The default one is main thread
+    int64_t threadCreatorFileId = 1; //Which library created the current thread? The default one is main thread
     scaler::ExtFuncCallHook *_this = nullptr; //8bytes
     //Records which symbol is called for how many times, the index is scalerid (Only contains hooked function)
     uint64_t startTImestamp;
@@ -49,10 +41,12 @@ struct HookContext {
     uint8_t isMainThread = false;
     uint8_t initialized = 0;
 };
+
 const uint8_t SCALER_TRUE = 145;
 const uint8_t SCALER_FALSE = 167;
 extern uint32_t *countingArr;
 
+
 class DataSaver {
 public:
     char initializeMe = 0;
@@ -77,5 +71,13 @@ extern pthread_mutex_t threadDataSavingLock;
 bool initTLS();
 
 
+//#define INSTR_TIMING
+#ifdef INSTR_TIMING
+extern const int TIMING_REC_COUNT;
+extern __thread TIMING_TYPE **detailedTimingVectors;
+extern __thread TIMING_TYPE *detailedTimingVectorSize;
+#endif
+
+
 }
 #endif
\ No newline at end of file
diff --git a/libHook-c/src/include/util/tool/Logging.h b/libHook-c/src/include/util/tool/Logging.h
index e5871921..eca0dfb8 100644
--- a/libHook-c/src/include/util/tool/Logging.h
+++ b/libHook-c/src/include/util/tool/Logging.h
@@ -5,7 +5,7 @@
 
 #define PRINT_INFO_LOG true
 #define PRINT_DBG_LOG false
-#define PRINT_ERR_LOG false
+#define PRINT_ERR_LOG true
 #if PRINT_DBG_LOG
 
 // Print a single log string
diff --git a/libHook-c/src/libcProxy.cpp b/libHook-c/src/libcProxy.cpp
index 7bf3525a..3badec11 100644
--- a/libHook-c/src/libcProxy.cpp
+++ b/libHook-c/src/libcProxy.cpp
@@ -15,7 +15,7 @@
 main_fn_t real_main;
 
 
-bool installed=false;
+bool installed = false;
 
 extern "C" {
 scaler::Vector<HookContext *> threadContextMap;
@@ -51,7 +51,7 @@ int doubletake_main(int argc, char **argv, char **envp) {
 
 
     HookContext *curContextPtr = curContext;
-    curContextPtr->curFileId = 0;
+    curContextPtr->threadCreatorFileId = 0;
     curContextPtr->endTImestamp = 0;
     curContextPtr->startTImestamp = getunixtimestampms();
     curContextPtr->isMainThread = true;
@@ -86,7 +86,7 @@ int doubletake_libc_start_main(main_fn_t main_fn, int argc, char **argv, void (*
 void exit(int __status) {
     auto realExit = (exit_origt) dlsym(RTLD_NEXT, "exit");
 
-    if(!installed){
+    if (!installed) {
         realExit(__status);
         return;
     }
diff --git a/libHook-c/src/pthreadProxy.cpp b/libHook-c/src/pthreadProxy.cpp
index 104932ed..aeed7996 100644
--- a/libHook-c/src/pthreadProxy.cpp
+++ b/libHook-c/src/pthreadProxy.cpp
@@ -41,7 +41,7 @@ void *dummy_thread_function(void *data) {
 
     HookContext *curContextPtr = curContext;
     assert(curContextPtr != NULL);
-    curContextPtr->curFileId = curContextPtr->_this->pmParser.findExecNameByAddr(
+    curContextPtr->threadCreatorFileId = curContextPtr->_this->pmParser.findExecNameByAddr(
             (void *) actualFuncPtr);
 
     /**
diff --git a/libHook-c/tests/CMakeLists.txt b/libHook-c/tests/CMakeLists.txt
index 3203faa0..52a72f86 100644
--- a/libHook-c/tests/CMakeLists.txt
+++ b/libHook-c/tests/CMakeLists.txt
@@ -79,9 +79,9 @@ target_compile_options(ScalerHook-demoapps-FuncCall PRIVATE ${TEST_FLAGS} -Werro
 
 
 add_executable(ScalerHook-demoapps-TimingAccuracy src/demoapps/TestTimingAccuracy.cpp)
-target_link_libraries(ScalerHook-demoapps-TimingAccuracy PUBLIC Testlib-FuncCall  Testlib-CallFuncCall KuBoPltHook dl)
+target_link_libraries(ScalerHook-demoapps-TimingAccuracy PUBLIC Testlib-FuncCall Testlib-CallFuncCall KuBoPltHook dl)
 target_compile_options(ScalerHook-demoapps-TimingAccuracy PRIVATE ${TEST_FLAGS} -Werror)
-target_link_options(ScalerHook-demoapps-TimingAccuracy  PRIVATE "-z" "lazy")
+target_link_options(ScalerHook-demoapps-TimingAccuracy PRIVATE "-z" "lazy")
 
 add_executable(ScalerHook-demoapps-HookEverything src/demoapps/TestHookEverything.cpp)
 target_include_directories(ScalerHook-demoapps-HookEverything PUBLIC libtest/header)
@@ -207,7 +207,7 @@ add_executable(ScalerHook-parsecapps-swaptions
         src/parsecapps/swaptions/MaxFunction.cpp
         src/parsecapps/swaptions/nr_routines.cpp
         src/parsecapps/swaptions/RanUnif.cpp)
-target_link_libraries(ScalerHook-parsecapps-swaptions pthread  ScalerHook-HookAutoAsm-C)
+target_link_libraries(ScalerHook-parsecapps-swaptions pthread ScalerHook-HookAutoAsm-C)
 target_compile_options(ScalerHook-parsecapps-swaptions PRIVATE ${TEST_FLAGS} "-DENABLE_THREADS" "-fstrict-aliasing" "-fkeep-inline-functions")
 target_include_directories(ScalerHook-parsecapps-swaptions PRIVATE src/parsecapps/swaptions/include)
 target_compile_definitions(ScalerHook-parsecapps-swaptions PRIVATE ENABLE_THREADS)
@@ -279,6 +279,9 @@ target_link_libraries(ScalerHook-proof-SaveDataUponExit pthread)
 add_executable(ScalerHook-proof-threadlocalasmarray src/proofconcept/TestAccessThreadLocalArrayInAsm.cpp)
 target_link_libraries(ScalerHook-proof-threadlocalasmarray pthread)
 
+add_executable(GetUserSysRealTime src/proofconcept/getUserTime.cpp)
+target_link_libraries(GetUserSysRealTime)
+
 
 #add_executable(ScalerHook-demoapps src/proofconcept/testprog.cpp)
 #target_link_libraries(DemoProg libTest PltHookLib dl)
diff --git a/libHook-c/tests/src/proofconcept/binarySegmentSearch.py b/libHook-c/tests/src/proofconcept/binarySegmentSearch.py
new file mode 100644
index 00000000..f289d75d
--- /dev/null
+++ b/libHook-c/tests/src/proofconcept/binarySegmentSearch.py
@@ -0,0 +1,32 @@
+# The following algorithms are used in ProcinfoParser to find the left bound in an array representing the starting element of the array
+A = [1, 2, 3, 4, 4, 5]
+
+
+def binSearch(A, tgt):
+    """
+    Return lower bound of the segment array
+    :param A: Array
+    :param tgt: Target
+    :return: Lower bound of segments in the segment array
+    """
+    lo = 0
+    hi = len(A)
+    md = 0
+    while lo < hi:
+        md = lo + (hi - lo) // 2
+        if A[md] < tgt:
+            lo = md + 1
+        elif A[md] > tgt:
+            hi = md
+        elif A[md] == tgt:
+            hi = md
+
+    return lo-1
+
+
+testList = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5]
+expectedVal = [-1, 0, 1, 2, 4, 5]
+for i in range(len(testList)):
+    lo = binSearch(A, testList[i])
+    print(testList[i], lo)
+    assert (lo == expectedVal[i])