Add functionality to compare performance reports

This commit adds `Comparison` classes which contain comparisons between performance measurements taken at two different points in time. It uses these classes in a new script that can be called to generate a markdown report of what changed regarding performance.
dbt-labs · Nov 5, 2024 · c7eb66b · c7eb66b
1 parent 26b39a8
commit c7eb66b
Show file tree

Hide file tree

Showing 6 changed files with 267 additions and 6 deletions.
diff --git a/.gitignore b/.gitignore
@@ -137,3 +137,7 @@ dmypy.json
 
 # Performance profiler results.
 **/cprofile_output.bin
+
+# Performance reports
+performance-report.json
+performance-comparison.md
diff --git a/Makefile b/Makefile
@@ -7,10 +7,13 @@ PARALLELISM = auto
 # Additional command line options to pass to pytest.
 ADDITIONAL_PYTEST_OPTIONS =
 
+PERFORMANCE_OUTPUT_FILE = performance-report.json
+PERFORMANCE_COMPARISON_OUTPUT_FILE = performance-comparison.md
+TESTS_PERFORMANCE = tests_metricflow/performance
+
 # Pytest that can populate the persistent source schema
 USE_PERSISTENT_SOURCE_SCHEMA = --use-persistent-source-schema
 TESTS_METRICFLOW = tests_metricflow
-TESTS_PERFORMANCE = tests_metricflow/performance
 TESTS_METRICFLOW_SEMANTICS = tests_metricflow_semantics
 POPULATE_PERSISTENT_SOURCE_SCHEMA = $(TESTS_METRICFLOW)/source_schema_tools.py::populate_source_schema
 
@@ -21,7 +24,11 @@ install-hatch:
 
 .PHONY: perf
 perf:
-	hatch -v run dev-env:pytest -vv -n 1 $(ADDITIONAL_PYTEST_OPTIONS) $(TESTS_PERFORMANCE)/
+	hatch -v run dev-env:pytest -vv -n 1 $(ADDITIONAL_PYTEST_OPTIONS) --output-json $(PERFORMANCE_OUTPUT_FILE) $(TESTS_PERFORMANCE)/
+
+.PHONY: perf-compare
+perf-compare:
+	hatch -v run dev-env:python $(TESTS_PERFORMANCE)/compare_reports.py $A $B $(PERFORMANCE_COMPARISON_OUTPUT_FILE)
 
 # Testing and linting
 .PHONY: test

diff --git a/metricflow-semantics/metricflow_semantics/test_helpers/performance_helpers.py b/metricflow-semantics/metricflow_semantics/test_helpers/performance_helpers.py
@@ -61,13 +61,114 @@ def from_calls(cls, context_id: str, calls: List[Call]) -> ContextReport:
             wall_ns_max=int(max(c.total_wall_ns for c in calls)),
         )
 
+    def compare(self, other: ContextReport) -> ContextReportComparison:
+        """Compare this report with other."""
+        assert self.context_id == other.context_id, "Cannot compare unrelated contexts."
+
+        calculated_keys = (
+            "cpu_ns_average",
+            "cpu_ns_median",
+            "cpu_ns_max",
+            "wall_ns_average",
+            "wall_ns_median",
+            "wall_ns_max",
+        )
+
+        kwargs = {}
+        max_pct_change = float("-inf")
+        for key in calculated_keys:
+            self_val = getattr(self, key)
+            other_val = getattr(other, key)
+
+            diff = self_val - other_val
+            kwargs[f"{key}_abs"] = diff
+
+            pct = diff / self_val
+            kwargs[f"{key}_pct"] = pct
+            if pct > max_pct_change:
+                max_pct_change = pct
+
+        return ContextReportComparison(
+            context_id=self.context_id,
+            a=self,
+            b=other,
+            max_pct_change=max_pct_change,
+            **kwargs,
+        )
+
+
+class ContextReportComparison(FrozenBaseModel):
+    """A comparison between two context reports."""
+
+    context_id: str
+
+    a: ContextReport
+    b: ContextReport
+
+    max_pct_change: float
+
+    cpu_ns_average_abs: int
+    cpu_ns_average_pct: float
+    cpu_ns_median_abs: int
+    cpu_ns_median_pct: float
+    cpu_ns_max_abs: int
+    cpu_ns_max_pct: float
+
+    wall_ns_average_abs: int
+    wall_ns_average_pct: float
+    wall_ns_median_abs: int
+    wall_ns_median_pct: float
+    wall_ns_max_abs: int
+    wall_ns_max_pct: float
+
 
 class SessionReport(FrozenBaseModel):
     """A performance report containing aggregated runtime statistics from a session."""
 
     session_id: str
     contexts: Dict[str, ContextReport]
 
+    def compare(self, other: SessionReport) -> SessionReportComparison:
+        """Compare this report with other."""
+        assert self.session_id == other.session_id, "Cannot compare unrelated sessions."
+
+        self_contexts = set(self.contexts.keys())
+        other_contexts = set(other.contexts.keys())
+        all_contexts = self_contexts.union(other_contexts)
+
+        comparisons: Dict[str, Optional[ContextReportComparison]] = {}
+        max_pct_change = float("-inf")
+        for context in all_contexts:
+            if context not in self.contexts or context not in other.contexts:
+                comparisons[context] = None
+            else:
+                comp = self.contexts[context].compare(other.contexts[context])
+                comparisons[context] = comp
+                if comp.max_pct_change > max_pct_change:
+                    max_pct_change = comp.max_pct_change
+
+        return SessionReportComparison(
+            session_id=self.session_id,
+            a=self.contexts,
+            b=other.contexts,
+            contexts=comparisons,
+            max_pct_change=max_pct_change,
+        )
+
+
+class SessionReportComparison(FrozenBaseModel):
+    """A comparison between two session reports.
+
+    If a context is not present in A or B, the absolute and pct values will be None for
+    that entry.
+    """
+
+    session_id: str
+    a: Dict[str, ContextReport]
+    b: Dict[str, ContextReport]
+    contexts: Dict[str, Optional[ContextReportComparison]]
+    max_pct_change: float
+
 
 class SessionReportSet(FrozenBaseModel):
     """A set of session reports."""
@@ -78,6 +179,43 @@ def add_report(self, report: SessionReport) -> None:
         """Add a report and associate it with the session ID."""
         self.sessions[report.session_id] = report
 
+    def compare(self, other: SessionReportSet) -> SessionReportSetComparison:
+        """Compare this report set with other."""
+        self_sessions = set(self.sessions.keys())
+        other_sessions = set(other.sessions.keys())
+        all_sessions = self_sessions.union(other_sessions)
+
+        comparison: Dict[str, Optional[SessionReportComparison]] = {}
+        max_pct_change = float("-inf")
+        max_pct_change_session = ""
+        for session in all_sessions:
+            if session not in self.sessions or session not in other.sessions:
+                comparison[session] = None
+            else:
+                comp = self.sessions[session].compare(other.sessions[session])
+                comparison[session] = comp
+                if comp.max_pct_change > max_pct_change:
+                    max_pct_change = comp.max_pct_change
+                    max_pct_change_session = session
+
+        return SessionReportSetComparison(
+            sessions=comparison,
+            max_pct_change=max_pct_change,
+            max_pct_change_session=max_pct_change_session,
+        )
+
+
+class SessionReportSetComparison(FrozenBaseModel):
+    """A comparison between two session report sets.
+
+    If a session ID is not present in A or B, the comparison is None
+    """
+
+    sessions: Dict[str, Optional[SessionReportComparison]]
+
+    max_pct_change: float
+    max_pct_change_session: str
+
 
 class PerformanceTracker:
     """Track performance metrics across different contexts.

diff --git a/tests_metricflow/performance/compare_reports.py b/tests_metricflow/performance/compare_reports.py
@@ -0,0 +1,94 @@
+from __future__ import annotations
+
+import argparse
+import json
+from io import StringIO
+
+from metricflow_semantics.test_helpers.performance_helpers import (
+    SessionReportSet,
+    SessionReportSetComparison,
+)
+
+MAX_PCT_CHANGE_WARNING_THRESHOLD = 0.15
+
+
+def _load_report_file(filename: str) -> SessionReportSet:
+    with open(filename, "r") as f:
+        raw = f.read()
+    return SessionReportSet.parse_obj(json.loads(raw))
+
+
+# I hate this code but there's no real elegant way of creating a markdown file
+def _report_comparison_markdown(base_name: str, other_name: str, comp: SessionReportSetComparison) -> str:
+    buf = StringIO()
+
+    buf.write("# Performance comparison\n")
+    buf.write(f"Comparing `{base_name}` against `{other_name}`\n\n")
+    buf.write(f"**Worst performance hit:** {comp.max_pct_change * 100:.2f}% in `{comp.max_pct_change_session}`\n\n")
+
+    for session, session_comp in comp.sessions.items():
+        emoji = (
+            ":question:"
+            if session_comp is None
+            else (":bangbang:" if session_comp.max_pct_change > MAX_PCT_CHANGE_WARNING_THRESHOLD else ":rocket:")
+        )
+
+        buf.write(f"## `{session}` {emoji}\n\n")
+        if session_comp is None:
+            buf.write("Comparison not available since there's no data for this session in one of the reports.\n\n")
+            continue
+
+        buf.write("| context | CPU avg | CPU median | CPU max | Wall avg | Wall median | Wall max |\n")
+        buf.write("| ------- | ------- | ---------- | ------- | -------- | ----------- | -------- |\n")
+        for ctx, ctx_comp in session_comp.contexts.items():
+            buf.write(f"| `{ctx}` ")
+
+            if ctx_comp is None:
+                buf.write(" | n/a" * 6)
+            else:
+                buf.write("| ")
+                buf.write(
+                    " | ".join(
+                        f"{int(abs)/10e6:.4f}ms ({pct * 100:+.2f}%)"
+                        for abs, pct in (
+                            (ctx_comp.cpu_ns_average_abs, ctx_comp.cpu_ns_average_pct),
+                            (ctx_comp.cpu_ns_median_abs, ctx_comp.cpu_ns_median_pct),
+                            (ctx_comp.cpu_ns_max_abs, ctx_comp.cpu_ns_max_pct),
+                            (ctx_comp.wall_ns_average_abs, ctx_comp.wall_ns_average_pct),
+                            (ctx_comp.wall_ns_median_abs, ctx_comp.wall_ns_median_pct),
+                            (ctx_comp.wall_ns_max_abs, ctx_comp.wall_ns_max_pct),
+                        )
+                    )
+                )
+
+            buf.write(" |\n")
+        buf.write("\n\n")
+
+    return buf.getvalue()
+
+
+def main() -> None:  # noqa: D103
+    parser = argparse.ArgumentParser()
+    parser.add_argument("a", help="The base report for the comparison")
+    parser.add_argument("b", help="The other report for the comparison")
+    parser.add_argument("output", help="The output file for the comparison")
+
+    args = parser.parse_args()
+
+    a = _load_report_file(args.a)
+    b = _load_report_file(args.b)
+
+    comparison = a.compare(b)
+    md = _report_comparison_markdown(
+        base_name=args.a,
+        other_name=args.b,
+        comp=comparison,
+    )
+    with open(args.output, "w") as f:
+        f.write(md)
+
+    print(args.output)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests_metricflow/performance/conftest.py b/tests_metricflow/performance/conftest.py
@@ -25,6 +25,22 @@
 GLOBAL_TRACKING_CONTEXT = "global"
 
 
+def pytest_addoption(parser: pytest.Parser) -> None:
+    """Add option for performance report file."""
+    parser.addoption(
+        "--output-json",
+        action="store",
+        default="performance-report.json",
+        help="where to store performance results as JSON",
+    )
+
+
+@pytest.fixture(scope="session")
+def perf_output_json(request: pytest.FixtureRequest) -> str:
+    """The output json for the performance test."""
+    return request.config.getoption("--output-json")  # type: ignore
+
+
 @pytest.fixture(scope="session")
 def perf_tracker() -> PerformanceTracker:
     """Instrument MetricFlow with performance tracking utilities."""
@@ -44,7 +60,10 @@ def __call__(  # noqa: D102
 
 
 @pytest.fixture(scope="session")
-def measure_compilation_performance(perf_tracker: PerformanceTracker) -> Iterator[MeasureFixture]:
+def measure_compilation_performance(
+    perf_tracker: PerformanceTracker,
+    perf_output_json: str,
+) -> Iterator[MeasureFixture]:
     """Fixture that returns a function which measures compilation performance for a given query."""
 
     def _measure(
@@ -83,4 +102,5 @@ def _measure(
 
     report_set = perf_tracker.get_report_set()
 
-    print(report_set.to_pretty_json())
+    with open(perf_output_json, "w") as f:
+        f.write(report_set.to_pretty_json())
diff --git a/tests_metricflow/performance/test_simple_manifest.py b/tests_metricflow/performance/test_simple_manifest.py
@@ -36,7 +36,6 @@ def test_simple_query(
         dataflow_to_sql_converter=dataflow_to_sql_converter,
         sql_client=sql_client,
     )
-    assert False
 
 
 def test_simple_query_2(
@@ -63,4 +62,3 @@ def test_simple_query_2(
         dataflow_to_sql_converter=dataflow_to_sql_converter,
         sql_client=sql_client,
     )
-    assert False