Skip to content

Commit

Permalink
Merge pull request #670 from sfu-db/benchmark_action
Browse files Browse the repository at this point in the history
test(eda): add performance test
  • Loading branch information
jinglinpeng authored Jul 13, 2021
2 parents 8d63e7a + d8ec5a1 commit caac298
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 1 deletion.
64 changes: 64 additions & 0 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: Performance Benchmarks

on:
push:
branches:
- develop
pull_request:
branches:
- develop

jobs:
benchmark:
name: ${{ matrix.os }} x ${{ matrix.python }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
python: ["3.7"]
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0

- uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python }}

- name: Install dependencies
run: |
pip install poetry
poetry config virtualenvs.in-project true
poetry install
poetry run pip install pytest-benchmark
- name: Run benchmark
run: poetry run pytest dataprep/tests/benchmarks/eda.py --benchmark-json benchmark.json

- name: Show benchmark result for pull request
if: ${{ github.event_name == 'pull_request'}}
uses: rhysd/github-action-benchmark@v1
with:
name: DataPrep.EDA Benchmarks
tool: "pytest"
output-file-path: benchmark.json
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: false
save-data-file: false
fail-threshold: "200%"
comment-always: true
fail-on-alert: true

- name: Store benchmark result for push operator
if: ${{ github.event_name == 'push'}}
uses: rhysd/github-action-benchmark@v1
with:
name: DataPrep.EDA Benchmarks
tool: "pytest"
output-file-path: benchmark.json
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: true
fail-threshold: "200%"
comment-always: true
fail-on-alert: true
2 changes: 1 addition & 1 deletion dataprep/connector/schema/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def build(
req_data["headers"]["Authorization"] = f"Bearer {storage['access_token']}"

def _auth(self, client_id: str, port: int = 9999) -> str:
# pylint: disable=protected-access
# pylint: disable=protected-access, no-member

state = get_random_string(23)
scope = ",".join(self.scopes)
Expand Down
1 change: 1 addition & 0 deletions dataprep/eda/create_report/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def _format_correlation(data: Dict[str, Any], cfg: Config) -> Dict[str, Any]:
rndrd = render_correlation(itmdt, cfg)
res["correlation_names"] = []
figs_corr: List[Figure] = []
# pylint: disable = not-an-iterable
for tab in rndrd.tabs:
fig = tab.child
fig.sizing_mode = "stretch_width"
Expand Down
3 changes: 3 additions & 0 deletions dataprep/tests/benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""
This module is used for performance testing and generating plot in github action.
"""
23 changes: 23 additions & 0 deletions dataprep/tests/benchmarks/eda.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
This module is for performance testing of EDA module in github action.
"""
from functools import partial
import pandas as pd
from typing import Any
from ...datasets import load_dataset
from ...eda import create_report


def report_func(df: pd.DataFrame, **kwargs: Any) -> None:
"""
Create report function, used for performance testing.
"""
create_report(df, **kwargs)


def test_create_report(benchmark: Any) -> None:
"""
Performance test of create report on titanic dataset.
"""
df = load_dataset("titanic")
benchmark(partial(report_func), df)

0 comments on commit caac298

Please sign in to comment.