Merge pull request #670 from sfu-db/benchmark_action

test(eda): add performance test
sfu-db · Jul 13, 2021 · caac298 · caac298
2 parents 8d63e7a + d8ec5a1
commit caac298
Show file tree

Hide file tree

Showing 5 changed files with 92 additions and 1 deletion.
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -0,0 +1,64 @@
+name: Performance Benchmarks
+
+on:
+  push:
+    branches:
+      - develop
+  pull_request:
+    branches:
+      - develop
+
+jobs:
+  benchmark:
+    name: ${{ matrix.os }} x ${{ matrix.python }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python: ["3.7"]
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+
+      - uses: actions/setup-python@v1
+        with:
+          python-version: ${{ matrix.python }}
+
+      - name: Install dependencies
+        run: |
+          pip install poetry
+          poetry config virtualenvs.in-project true
+          poetry install
+          poetry run pip install pytest-benchmark
+
+      - name: Run benchmark
+        run: poetry run pytest dataprep/tests/benchmarks/eda.py --benchmark-json benchmark.json
+
+      - name: Show benchmark result for pull request
+        if: ${{ github.event_name == 'pull_request'}}
+        uses: rhysd/github-action-benchmark@v1
+        with:
+          name: DataPrep.EDA Benchmarks
+          tool: "pytest"
+          output-file-path: benchmark.json
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          auto-push: false
+          save-data-file: false
+          fail-threshold: "200%"
+          comment-always: true
+          fail-on-alert: true
+
+      - name: Store benchmark result for push operator
+        if: ${{ github.event_name == 'push'}}
+        uses: rhysd/github-action-benchmark@v1
+        with:
+          name: DataPrep.EDA Benchmarks
+          tool: "pytest"
+          output-file-path: benchmark.json
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          auto-push: true
+          fail-threshold: "200%"
+          comment-always: true
+          fail-on-alert: true
diff --git a/dataprep/connector/schema/defs.py b/dataprep/connector/schema/defs.py
@@ -201,7 +201,7 @@ def build(
         req_data["headers"]["Authorization"] = f"Bearer {storage['access_token']}"
 
     def _auth(self, client_id: str, port: int = 9999) -> str:
-        # pylint: disable=protected-access
+        # pylint: disable=protected-access, no-member
 
         state = get_random_string(23)
         scope = ",".join(self.scopes)

diff --git a/dataprep/eda/create_report/formatter.py b/dataprep/eda/create_report/formatter.py
@@ -182,6 +182,7 @@ def _format_correlation(data: Dict[str, Any], cfg: Config) -> Dict[str, Any]:
             rndrd = render_correlation(itmdt, cfg)
             res["correlation_names"] = []
             figs_corr: List[Figure] = []
+            # pylint: disable = not-an-iterable
             for tab in rndrd.tabs:
                 fig = tab.child
                 fig.sizing_mode = "stretch_width"

diff --git a/dataprep/tests/benchmarks/__init__.py b/dataprep/tests/benchmarks/__init__.py
@@ -0,0 +1,3 @@
+"""
+This module is used for performance testing and generating plot in github action.
+"""
diff --git a/dataprep/tests/benchmarks/eda.py b/dataprep/tests/benchmarks/eda.py
@@ -0,0 +1,23 @@
+"""
+This module is for performance testing of EDA module in github action.
+"""
+from functools import partial
+import pandas as pd
+from typing import Any
+from ...datasets import load_dataset
+from ...eda import create_report
+
+
+def report_func(df: pd.DataFrame, **kwargs: Any) -> None:
+    """
+    Create report function, used for performance testing.
+    """
+    create_report(df, **kwargs)
+
+
+def test_create_report(benchmark: Any) -> None:
+    """
+    Performance test of create report on titanic dataset.
+    """
+    df = load_dataset("titanic")
+    benchmark(partial(report_func), df)