Skip to content

Commit

Permalink
Add functionality to compare performance reports
Browse files Browse the repository at this point in the history
This commit adds `Comparison` classes which contain comparisons between
performance measurements taken at two different points in time. It uses
these classes in a new script that can be called to generate a markdown
report of what changed regarding performance.
  • Loading branch information
serramatutu committed Nov 5, 2024
1 parent 26b39a8 commit c7eb66b
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 6 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,7 @@ dmypy.json

# Performance profiler results.
**/cprofile_output.bin

# Performance reports
performance-report.json
performance-comparison.md
11 changes: 9 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ PARALLELISM = auto
# Additional command line options to pass to pytest.
ADDITIONAL_PYTEST_OPTIONS =

PERFORMANCE_OUTPUT_FILE = performance-report.json
PERFORMANCE_COMPARISON_OUTPUT_FILE = performance-comparison.md
TESTS_PERFORMANCE = tests_metricflow/performance

# Pytest that can populate the persistent source schema
USE_PERSISTENT_SOURCE_SCHEMA = --use-persistent-source-schema
TESTS_METRICFLOW = tests_metricflow
TESTS_PERFORMANCE = tests_metricflow/performance
TESTS_METRICFLOW_SEMANTICS = tests_metricflow_semantics
POPULATE_PERSISTENT_SOURCE_SCHEMA = $(TESTS_METRICFLOW)/source_schema_tools.py::populate_source_schema

Expand All @@ -21,7 +24,11 @@ install-hatch:

.PHONY: perf
perf:
hatch -v run dev-env:pytest -vv -n 1 $(ADDITIONAL_PYTEST_OPTIONS) $(TESTS_PERFORMANCE)/
hatch -v run dev-env:pytest -vv -n 1 $(ADDITIONAL_PYTEST_OPTIONS) --output-json $(PERFORMANCE_OUTPUT_FILE) $(TESTS_PERFORMANCE)/

.PHONY: perf-compare
perf-compare:
hatch -v run dev-env:python $(TESTS_PERFORMANCE)/compare_reports.py $A $B $(PERFORMANCE_COMPARISON_OUTPUT_FILE)

# Testing and linting
.PHONY: test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,114 @@ def from_calls(cls, context_id: str, calls: List[Call]) -> ContextReport:
wall_ns_max=int(max(c.total_wall_ns for c in calls)),
)

def compare(self, other: ContextReport) -> ContextReportComparison:
"""Compare this report with other."""
assert self.context_id == other.context_id, "Cannot compare unrelated contexts."

calculated_keys = (
"cpu_ns_average",
"cpu_ns_median",
"cpu_ns_max",
"wall_ns_average",
"wall_ns_median",
"wall_ns_max",
)

kwargs = {}
max_pct_change = float("-inf")
for key in calculated_keys:
self_val = getattr(self, key)
other_val = getattr(other, key)

diff = self_val - other_val
kwargs[f"{key}_abs"] = diff

pct = diff / self_val
kwargs[f"{key}_pct"] = pct
if pct > max_pct_change:
max_pct_change = pct

return ContextReportComparison(
context_id=self.context_id,
a=self,
b=other,
max_pct_change=max_pct_change,
**kwargs,
)


class ContextReportComparison(FrozenBaseModel):
"""A comparison between two context reports."""

context_id: str

a: ContextReport
b: ContextReport

max_pct_change: float

cpu_ns_average_abs: int
cpu_ns_average_pct: float
cpu_ns_median_abs: int
cpu_ns_median_pct: float
cpu_ns_max_abs: int
cpu_ns_max_pct: float

wall_ns_average_abs: int
wall_ns_average_pct: float
wall_ns_median_abs: int
wall_ns_median_pct: float
wall_ns_max_abs: int
wall_ns_max_pct: float


class SessionReport(FrozenBaseModel):
"""A performance report containing aggregated runtime statistics from a session."""

session_id: str
contexts: Dict[str, ContextReport]

def compare(self, other: SessionReport) -> SessionReportComparison:
"""Compare this report with other."""
assert self.session_id == other.session_id, "Cannot compare unrelated sessions."

self_contexts = set(self.contexts.keys())
other_contexts = set(other.contexts.keys())
all_contexts = self_contexts.union(other_contexts)

comparisons: Dict[str, Optional[ContextReportComparison]] = {}
max_pct_change = float("-inf")
for context in all_contexts:
if context not in self.contexts or context not in other.contexts:
comparisons[context] = None
else:
comp = self.contexts[context].compare(other.contexts[context])
comparisons[context] = comp
if comp.max_pct_change > max_pct_change:
max_pct_change = comp.max_pct_change

return SessionReportComparison(
session_id=self.session_id,
a=self.contexts,
b=other.contexts,
contexts=comparisons,
max_pct_change=max_pct_change,
)


class SessionReportComparison(FrozenBaseModel):
"""A comparison between two session reports.
If a context is not present in A or B, the absolute and pct values will be None for
that entry.
"""

session_id: str
a: Dict[str, ContextReport]
b: Dict[str, ContextReport]
contexts: Dict[str, Optional[ContextReportComparison]]
max_pct_change: float


class SessionReportSet(FrozenBaseModel):
"""A set of session reports."""
Expand All @@ -78,6 +179,43 @@ def add_report(self, report: SessionReport) -> None:
"""Add a report and associate it with the session ID."""
self.sessions[report.session_id] = report

def compare(self, other: SessionReportSet) -> SessionReportSetComparison:
"""Compare this report set with other."""
self_sessions = set(self.sessions.keys())
other_sessions = set(other.sessions.keys())
all_sessions = self_sessions.union(other_sessions)

comparison: Dict[str, Optional[SessionReportComparison]] = {}
max_pct_change = float("-inf")
max_pct_change_session = ""
for session in all_sessions:
if session not in self.sessions or session not in other.sessions:
comparison[session] = None
else:
comp = self.sessions[session].compare(other.sessions[session])
comparison[session] = comp
if comp.max_pct_change > max_pct_change:
max_pct_change = comp.max_pct_change
max_pct_change_session = session

return SessionReportSetComparison(
sessions=comparison,
max_pct_change=max_pct_change,
max_pct_change_session=max_pct_change_session,
)


class SessionReportSetComparison(FrozenBaseModel):
"""A comparison between two session report sets.
If a session ID is not present in A or B, the comparison is None
"""

sessions: Dict[str, Optional[SessionReportComparison]]

max_pct_change: float
max_pct_change_session: str


class PerformanceTracker:
"""Track performance metrics across different contexts.
Expand Down
94 changes: 94 additions & 0 deletions tests_metricflow/performance/compare_reports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from __future__ import annotations

import argparse
import json
from io import StringIO

from metricflow_semantics.test_helpers.performance_helpers import (
SessionReportSet,
SessionReportSetComparison,
)

MAX_PCT_CHANGE_WARNING_THRESHOLD = 0.15


def _load_report_file(filename: str) -> SessionReportSet:
with open(filename, "r") as f:
raw = f.read()
return SessionReportSet.parse_obj(json.loads(raw))


# I hate this code but there's no real elegant way of creating a markdown file
def _report_comparison_markdown(base_name: str, other_name: str, comp: SessionReportSetComparison) -> str:
buf = StringIO()

buf.write("# Performance comparison\n")
buf.write(f"Comparing `{base_name}` against `{other_name}`\n\n")
buf.write(f"**Worst performance hit:** {comp.max_pct_change * 100:.2f}% in `{comp.max_pct_change_session}`\n\n")

for session, session_comp in comp.sessions.items():
emoji = (
":question:"
if session_comp is None
else (":bangbang:" if session_comp.max_pct_change > MAX_PCT_CHANGE_WARNING_THRESHOLD else ":rocket:")
)

buf.write(f"## `{session}` {emoji}\n\n")
if session_comp is None:
buf.write("Comparison not available since there's no data for this session in one of the reports.\n\n")
continue

buf.write("| context | CPU avg | CPU median | CPU max | Wall avg | Wall median | Wall max |\n")
buf.write("| ------- | ------- | ---------- | ------- | -------- | ----------- | -------- |\n")
for ctx, ctx_comp in session_comp.contexts.items():
buf.write(f"| `{ctx}` ")

if ctx_comp is None:
buf.write(" | n/a" * 6)
else:
buf.write("| ")
buf.write(
" | ".join(
f"{int(abs)/10e6:.4f}ms ({pct * 100:+.2f}%)"
for abs, pct in (
(ctx_comp.cpu_ns_average_abs, ctx_comp.cpu_ns_average_pct),
(ctx_comp.cpu_ns_median_abs, ctx_comp.cpu_ns_median_pct),
(ctx_comp.cpu_ns_max_abs, ctx_comp.cpu_ns_max_pct),
(ctx_comp.wall_ns_average_abs, ctx_comp.wall_ns_average_pct),
(ctx_comp.wall_ns_median_abs, ctx_comp.wall_ns_median_pct),
(ctx_comp.wall_ns_max_abs, ctx_comp.wall_ns_max_pct),
)
)
)

buf.write(" |\n")
buf.write("\n\n")

return buf.getvalue()


def main() -> None: # noqa: D103
parser = argparse.ArgumentParser()
parser.add_argument("a", help="The base report for the comparison")
parser.add_argument("b", help="The other report for the comparison")
parser.add_argument("output", help="The output file for the comparison")

args = parser.parse_args()

a = _load_report_file(args.a)
b = _load_report_file(args.b)

comparison = a.compare(b)
md = _report_comparison_markdown(
base_name=args.a,
other_name=args.b,
comp=comparison,
)
with open(args.output, "w") as f:
f.write(md)

print(args.output)


if __name__ == "__main__":
main()
24 changes: 22 additions & 2 deletions tests_metricflow/performance/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,22 @@
GLOBAL_TRACKING_CONTEXT = "global"


def pytest_addoption(parser: pytest.Parser) -> None:
"""Add option for performance report file."""
parser.addoption(
"--output-json",
action="store",
default="performance-report.json",
help="where to store performance results as JSON",
)


@pytest.fixture(scope="session")
def perf_output_json(request: pytest.FixtureRequest) -> str:
"""The output json for the performance test."""
return request.config.getoption("--output-json") # type: ignore


@pytest.fixture(scope="session")
def perf_tracker() -> PerformanceTracker:
"""Instrument MetricFlow with performance tracking utilities."""
Expand All @@ -44,7 +60,10 @@ def __call__( # noqa: D102


@pytest.fixture(scope="session")
def measure_compilation_performance(perf_tracker: PerformanceTracker) -> Iterator[MeasureFixture]:
def measure_compilation_performance(
perf_tracker: PerformanceTracker,
perf_output_json: str,
) -> Iterator[MeasureFixture]:
"""Fixture that returns a function which measures compilation performance for a given query."""

def _measure(
Expand Down Expand Up @@ -83,4 +102,5 @@ def _measure(

report_set = perf_tracker.get_report_set()

print(report_set.to_pretty_json())
with open(perf_output_json, "w") as f:
f.write(report_set.to_pretty_json())
2 changes: 0 additions & 2 deletions tests_metricflow/performance/test_simple_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ def test_simple_query(
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
)
assert False


def test_simple_query_2(
Expand All @@ -63,4 +62,3 @@ def test_simple_query_2(
dataflow_to_sql_converter=dataflow_to_sql_converter,
sql_client=sql_client,
)
assert False

0 comments on commit c7eb66b

Please sign in to comment.