Skip to content

Commit

Permalink
Update for metric calculator to support event options. Consequently, …
Browse files Browse the repository at this point in the history
…updated perf. groups with event options. Options are not needed anymore in formulas, counter name is sufficient
  • Loading branch information
TomTheBear committed Nov 30, 2017
1 parent 89bdb44 commit 88dedc3
Show file tree
Hide file tree
Showing 29 changed files with 143 additions and 112 deletions.
2 changes: 1 addition & 1 deletion groups/haswell/UOPS_EXEC.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/haswell/UOPS_ISSUE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/haswell/UOPS_RETIRE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/haswellEP/CBOX.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
LLC misses per instruction (CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX15C0+CBOX16C0+CBOX17C0)/FIXC0
LL2 data written to MEM [MBytes] 1E-6*(CBOX0C1:STATE=0x1+CBOX1C1:STATE=0x1+CBOX2C1:STATE=0x1+CBOX3C1:STATE=0x1+CBOX4C1:STATE=0x1+CBOX5C1:STATE=0x1+CBOX6C1:STATE=0x1+CBOX7C1:STATE=0x1+CBOX8C1:STATE=0x1+CBOX9C1:STATE=0x1+CBOX10C1:STATE=0x1+CBOX11C1:STATE=0x1+CBOX12C1:STATE=0x1+CBOX13C1:STATE=0x1+CBOX14C1:STATE=0x1+CBOX15C1:STATE=0x1+CBOX16C1:STATE=0x1+CBOX17C1:STATE=0x1)*64
LL2 data written to MEM [MBytes] 1E-6*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1+CBOX15C1+CBOX16C1+CBOX17C1)*64


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/haswellEP/UOPS_EXEC.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/haswellEP/UOPS_ISSUE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/haswellEP/UOPS_RETIRE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/ivybridge/UOPS_EXEC.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/ivybridge/UOPS_ISSUE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/ivybridge/UOPS_RETIRE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
8 changes: 4 additions & 4 deletions groups/ivybridgeEP/CACHES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ L2 to L3 evict bandwidth [MBytes/s] 1.0E-06*PMC3*64.0/time
L2 to L3 evict data volume [GBytes] 1.0E-09*PMC3*64.0
L2 to/from L3 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3)*64.0/time
L2 to/from L3 data volume [GBytes] 1.0E-09*(PMC2+PMC3)*64.0
System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0:STATE=0x3F+CBOX1C0:STATE=0x3F+CBOX2C0:STATE=0x3F+CBOX3C0:STATE=0x3F+CBOX4C0:STATE=0x3F+CBOX5C0:STATE=0x3F+CBOX6C0:STATE=0x3F+CBOX7C0:STATE=0x3F+CBOX8C0:STATE=0x3F+CBOX9C0:STATE=0x3F+CBOX10C0:STATE=0x3F+CBOX11C0:STATE=0x3F+CBOX12C0:STATE=0x3F+CBOX13C0:STATE=0x3F+CBOX14C0:STATE=0x3F)*64.0/time
System to L3 data volume [GBytes] 1.0E-09*(CBOX0C0:STATE=0x3F+CBOX1C0:STATE=0x3F+CBOX2C0:STATE=0x3F+CBOX3C0:STATE=0x3F+CBOX4C0:STATE=0x3F+CBOX5C0:STATE=0x3F+CBOX6C0:STATE=0x3F+CBOX7C0:STATE=0x3F+CBOX8C0:STATE=0x3F+CBOX9C0:STATE=0x3F+CBOX10C0:STATE=0x3F+CBOX11C0:STATE=0x3F+CBOX12C0:STATE=0x3F+CBOX13C0:STATE=0x3F+CBOX14C0:STATE=0x3F)*64.0
System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0)*64.0/time
System to L3 data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0)*64.0
L3 to memory bandwidth [MBytes/s] 1.0E-06*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1)*64/time
L3 to memory data volume [GBytes] 1.0E-09*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1)*64
L3 to/from system bandwidth [MBytes/s] 1.0E-06*(CBOX0C0:STATE=0x3F+CBOX1C0:STATE=0x3F+CBOX2C0:STATE=0x3F+CBOX3C0:STATE=0x3F+CBOX4C0:STATE=0x3F+CBOX5C0:STATE=0x3F+CBOX6C0:STATE=0x3F+CBOX7C0:STATE=0x3F+CBOX8C0:STATE=0x3F+CBOX9C0:STATE=0x3F+CBOX10C0:STATE=0x3F+CBOX11C0:STATE=0x3F+CBOX12C0:STATE=0x3F+CBOX13C0:STATE=0x3F+CBOX14C0:STATE=0x3F+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1)*64.0/time
L3 to/from system data volume [GBytes] 1.0E-09*(CBOX0C0:STATE=0x3F+CBOX1C0:STATE=0x3F+CBOX2C0:STATE=0x3F+CBOX3C0:STATE=0x3F+CBOX4C0:STATE=0x3F+CBOX5C0:STATE=0x3F+CBOX6C0:STATE=0x3F+CBOX7C0:STATE=0x3F+CBOX8C0:STATE=0x3F+CBOX9C0:STATE=0x3F+CBOX10C0:STATE=0x3F+CBOX11C0:STATE=0x3F+CBOX12C0:STATE=0x3F+CBOX13C0:STATE=0x3F+CBOX14C0:STATE=0x3F+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1)*64.0
L3 to/from system bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1)*64.0/time
L3 to/from system data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1)*64.0
Memory read bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0)*64.0/time
Memory read data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0+MBOX4C0+MBOX5C0+MBOX6C0+MBOX7C0)*64.0
Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1+MBOX4C1+MBOX5C1+MBOX6C1+MBOX7C1)*64.0/time
Expand Down
2 changes: 1 addition & 1 deletion groups/ivybridgeEP/CBOX.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
LLC misses per instruction (CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0+CBOX8C0+CBOX9C0+CBOX10C0+CBOX11C0+CBOX12C0+CBOX13C0+CBOX14C0)/FIXC0
LLC data written to MEM [MBytes] 1E-6*(CBOX0C1:STATE=0x1+CBOX1C1:STATE=0x1+CBOX2C1:STATE=0x1+CBOX3C1:STATE=0x1+CBOX4C1:STATE=0x1+CBOX5C1:STATE=0x1+CBOX6C1:STATE=0x1+CBOX7C1:STATE=0x1+CBOX8C1:STATE=0x1+CBOX9C1:STATE=0x1+CBOX10C1:STATE=0x1+CBOX11C1:STATE=0x1+CBOX12C1:STATE=0x1+CBOX13C1:STATE=0x1+CBOX14C1:STATE=0x1)*64
LLC data written to MEM [MBytes] 1E-6*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1+CBOX8C1+CBOX9C1+CBOX10C1+CBOX11C1+CBOX12C1+CBOX13C1+CBOX14C1)*64


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/ivybridgeEP/UOPS_EXEC.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/ivybridgeEP/UOPS_ISSUE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/ivybridgeEP/UOPS_RETIRE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
26 changes: 13 additions & 13 deletions groups/knl/HBM_OFFCORE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,21 @@ Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Memory read bandwidth [MBytes/s] 1.0E-06*(PMC1:MATCH0=0x32F7:MATCH1=0x3F8060)*64.0/time
Memory read data volume [GBytes] 1.0E-09*(PMC1:MATCH0=0x32F7:MATCH1=0x3F8060)*64.0
Memory writeback bandwidth [MBytes/s] 1.0E-06*(PMC0:MATCH0=0x4908:MATCH1=0x3F8060)*64.0/time
Memory writeback data volume [GBytes] 1.0E-09*(PMC0:MATCH0=0x4908:MATCH1=0x3F8060)*64.0
Memory bandwidth [MBytes/s] 1.0E-06*(PMC0:MATCH0=0x4908:MATCH1=0x3F8060+PMC1:MATCH0=0x32F7:MATCH1=0x3F8060)*64.0/time
Memory data volume [GBytes] 1.0E-09*(PMC0:MATCH0=0x4908:MATCH1=0x3F8060+PMC1:MATCH0=0x32F7:MATCH1=0x3F8060)*64.0
Memory read bandwidth [MBytes/s] 1.0E-06*(PMC1)*64.0/time
Memory read data volume [GBytes] 1.0E-09*(PMC1)*64.0
Memory writeback bandwidth [MBytes/s] 1.0E-06*(PMC0)*64.0/time
Memory writeback data volume [GBytes] 1.0E-09*(PMC0)*64.0
Memory bandwidth [MBytes/s] 1.0E-06*(PMC0+PMC1)*64.0/time
Memory data volume [GBytes] 1.0E-09*(PMC0+PMC1)*64.0

LONG
Formulas:
Memory read bandwidth [MBytes/s] = 1.0E-06*(sum(EDC_RPQ_INSERTS))*64/time
Memory read data volume [GBytes] = 1.0E-09*(sum(EDC_RPQ_INSERTS))*64
Memory writeback bandwidth [MBytes/s] = 1.0E-06*(sum(EDC_WPQ_INSERTS))*64/time
Memory writeback data volume [GBytes] = 1.0E-09*(sum(EDC_WPQ_INSERTS))*64
Memory bandwidth [MBytes/s] = 1.0E-06*(sum(EDC_RPQ_INSERTS)+sum(EDC_WPQ_INSERTS))*64/time
Memory data volume [GBytes] = 1.0E-09*(sum(EDC_RPQ_INSERTS)+sum(EDC_WPQ_INSERTS))*64
Memory read bandwidth [MBytes/s] = 1.0E-06*(sum(OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x32F7:MATCH1=0x3F8060))*64/time
Memory read data volume [GBytes] = 1.0E-09*(sum(OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x32F7:MATCH1=0x3F8060))*64
Memory writeback bandwidth [MBytes/s] = 1.0E-06*(sum(OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x4908:MATCH1=0x3F8060))*64/time
Memory writeback data volume [GBytes] = 1.0E-09*(sum(OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x4908:MATCH1=0x3F8060))*64
Memory bandwidth [MBytes/s] = 1.0E-06*(sum(OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x32F7:MATCH1=0x3F8060)+sum(OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x4908:MATCH1=0x3F8060))*64/time
Memory data volume [GBytes] = 1.0E-09*(sum(OFFCORE_RESPONSE_1_OPTIONS:MATCH0=0x32F7:MATCH1=0x3F8060)+sum(OFFCORE_RESPONSE_0_OPTIONS:MATCH0=0x4908:MATCH1=0x3F8060))*64
-
Profiling group to measure data transfers from and to the high bandwidth memory (HBM).

If possible, use the HBM or HBM_CACHE group because they provide more accurate counts.
8 changes: 4 additions & 4 deletions groups/knl/L2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
L2 non-RFO bandwidth [MBytes/s] 1.E-06*(PMC0)*64.0/time
L2 non-RFO data volume [GByte] 1.E-09*PMC0*64.0
L2 RFO bandwidth [MBytes/s] 1.E-06*(PMC1:MATCH0=0x0002:MATCH1=0x1)*64.0/time
L2 RFO data volume [GByte] 1.E-09*(PMC1:MATCH0=0x0002:MATCH1=0x1)*64.0
L2 bandwidth [MBytes/s] 1.E-06*(PMC0+PMC1:MATCH0=0x0002:MATCH1=0x1)*64.0/time
L2 data volume [GByte] 1.E-06*(PMC0+PMC1:MATCH0=0x0002:MATCH1=0x1)*64.0
L2 RFO bandwidth [MBytes/s] 1.E-06*(PMC1)*64.0/time
L2 RFO data volume [GByte] 1.E-09*(PMC1)*64.0
L2 bandwidth [MBytes/s] 1.E-06*(PMC0+PMC1)*64.0/time
L2 data volume [GByte] 1.E-06*(PMC0+PMC1)*64.0

LONG
Formula:
Expand Down
6 changes: 3 additions & 3 deletions groups/sandybridge/L3CACHE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
L3 request rate PMC1:MATCH0=0x0081:MATCH1=0x1/FIXC0
L3 miss rate PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/FIXC0
L3 miss ratio PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/PMC1:MATCH0=0x0081:MATCH1=0x1
L3 request rate PMC1/FIXC0
L3 miss rate PMC0/FIXC0
L3 miss ratio PMC0/PMC1

LONG
Formulas:
Expand Down
2 changes: 1 addition & 1 deletion groups/sandybridge/UOPS_EXEC.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/sandybridge/UOPS_ISSUE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/sandybridge/UOPS_RETIRE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
8 changes: 4 additions & 4 deletions groups/sandybridgeEP/CACHES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,12 @@ L2 to L3 evict bandwidth [MBytes/s] 1.0E-06*PMC3*64.0/time
L2 to L3 evict data volume [GBytes] 1.0E-09*PMC3*64.0
L2 to/from L3 bandwidth [MBytes/s] 1.0E-06*(PMC2+PMC3)*64.0/time
L2 to/from L3 data volume [GBytes] 1.0E-09*(PMC2+PMC3)*64.0
System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0:STATE=0x3F+CBOX1C0:STATE=0x3F+CBOX2C0:STATE=0x3F+CBOX3C0:STATE=0x3F+CBOX4C0:STATE=0x3F+CBOX5C0:STATE=0x3F+CBOX6C0:STATE=0x3F+CBOX7C0:STATE=0x3F)*64.0/time
System to L3 data volume [GBytes] 1.0E-09*(CBOX0C0:STATE=0x3F+CBOX1C0:STATE=0x3F+CBOX2C0:STATE=0x3F+CBOX3C0:STATE=0x3F+CBOX4C0:STATE=0x3F+CBOX5C0:STATE=0x3F+CBOX6C0:STATE=0x3F+CBOX7C0:STATE=0x3F)*64.0
System to L3 bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0)*64.0/time
System to L3 data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX1C0+CBOX2C0+CBOX3C0+CBOX4C0+CBOX5C0+CBOX6C0+CBOX7C0)*64.0
L3 to system bandwidth [MBytes/s] 1.0E-06*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1)*64.0/time
L3 to system data volume [GBytes] 1.0E-09*(CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1)*64.0
L3 to/from system bandwidth [MBytes/s] 1.0E-06*(CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1)*64.0/time
L3 to/from system data volume [GBytes] 1.0E-09*(CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C0:STATE=0x3F+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1)*64.0
L3 to/from system bandwidth [MBytes/s] 1.0E-06*(CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1)*64.0/time
L3 to/from system data volume [GBytes] 1.0E-09*(CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C0+CBOX0C1+CBOX1C1+CBOX2C1+CBOX3C1+CBOX4C1+CBOX5C1+CBOX6C1+CBOX7C1)*64.0
Memory read bandwidth [MBytes/s] 1.0E-06*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0)*64.0/time
Memory read data volume [GBytes] 1.0E-09*(MBOX0C0+MBOX1C0+MBOX2C0+MBOX3C0)*64.0
Memory write bandwidth [MBytes/s] 1.0E-06*(MBOX0C1+MBOX1C1+MBOX2C1+MBOX3C1)*64.0/time
Expand Down
6 changes: 3 additions & 3 deletions groups/sandybridgeEP/L3CACHE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ Runtime (RDTSC) [s] time
Runtime unhalted [s] FIXC1*inverseClock
Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
L3 request rate PMC1:MATCH0=0x0081:MATCH1=0x1/FIXC0
L3 miss rate PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/FIXC0
L3 miss ratio PMC0:MATCH0=0x0081:MATCH1=0x3fffc0/PMC1:MATCH0=0x0081:MATCH1=0x1
L3 request rate PMC1/FIXC0
L3 miss rate PMC0/FIXC0
L3 miss ratio PMC0/PMC1

LONG
Formulas:
Expand Down
2 changes: 1 addition & 1 deletion groups/skylake/UOPS_EXEC.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/skylake/UOPS_ISSUE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
2 changes: 1 addition & 1 deletion groups/skylake/UOPS_RETIRE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Clock [MHz] 1.E-06*(FIXC1/FIXC2)/inverseClock
CPI FIXC1/FIXC0
Used cycles ratio [%] 100*PMC0/PMC2
Unused cycles ratio [%] 100*PMC1/PMC2
Avg stall duration [cycles] PMC1/PMC3:EDGEDETECT
Avg stall duration [cycles] PMC1/PMC3


LONG
Expand Down
4 changes: 3 additions & 1 deletion src/includes/perfgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ extern int add_to_clist(CounterList* clist, char* counter, double result);
extern int update_clist(CounterList* clist, char* counter, double result);
extern void destroy_clist(CounterList* clist);

extern int calc_metric(char* formula, CounterList* clist, double *result);
int add_var(char* name, char* value, char** varstr, char** varlist);
int add_dbl_var(char* name, double value, char** varstr, char** varlist);
extern int calc_metric(int cpu, char* formula, char* varstr, char* varlist, double *result);

#endif /* PERFGROUP_H */
39 changes: 20 additions & 19 deletions src/perfgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1491,6 +1491,23 @@ int add_var(char* name, char* value, char** varstr, char** varlist)
return 0;
}

int add_dbl_var(char* name, double value, char** varstr, char** varlist)
{
int slen = 0;
int ret = 0;
if (*varstr)
slen = strlen(*varstr);
int add = strlen(name) + 45;
*varstr = realloc_buffer(*varstr, slen + add + 4);
if (!*varstr)
{
return -ENOMEM;
}
ret = snprintf(&((*varstr)[slen]), add+4, "%s = %20.20f\n", name, value);
ret = add_to_varlist(name, varlist);
return 0;
}

int add_def(char* name, char* value, int cpu, char** varlist)
{
int slen = 0;
Expand Down Expand Up @@ -1545,7 +1562,7 @@ double do_calc(int cpu, char* s, char* vars, char* varlist)
{
return res;
}

printf("%s\n", t);
ret = luaL_dostring (L, t);
if (!ret)
{
Expand Down Expand Up @@ -1586,39 +1603,23 @@ char* do_expand(int cpu, char* s, char *varlist)


int
calc_metric(char* formula, CounterList* clist, double *result)
calc_metric(int cpu, char* formula, char* vars, char* varlist, double *result)
{
int i=0;
char* f;
int maxstrlen = 0, minstrlen = 10000;
int cpu = 0;
char buf[128];

if ((formula == NULL) || (clist == NULL) || (result == NULL))
if ((formula == NULL) || (vars == NULL) || (varlist == NULL) || (result == NULL))
return -EINVAL;
*result = 0.0;

char* vars = NULL;
char* varlist = NULL;

for(i=0;i<clist->counters;i++)
{
snprintf(buf, 127, "%.25f", clist->cvalues[i]);
add_var(clist->cnames[i], buf, &vars, &varlist);
if (strcmp(clist->cnames[i], "CPU") == 0)
cpu = (int)clist->cvalues[i];
memset(buf, 0, 128 * sizeof(char));
}
f = do_expand(cpu, formula, varlist);
if (f)
{
*result = do_calc(cpu, f, vars, varlist);
}

if (vars)
free(vars);
if (varlist)
free(varlist);
return i;
}

Expand Down
Loading

0 comments on commit 88dedc3

Please sign in to comment.